Only inspect diffs when checking files for keywords

We previously checked which files were modified and then loaded all
of the file contents of these files and checked them against keywords.

This commit makes it so that we only inspect the Git diff which has
the advantage that we are not rejecting pushes that don't contain
keywords themselves but modify files that do.

We inspect the right side of the diff, so only text that is added by
the commit we inspect.

This improvement is not gated by a config paramter because it seems
universally useful to existing installations.

Future improvements could make the logic only inspect intraline diffs.

This diffing will not add latency to a 'git push' validated by this
plugin because we use the diff cache that is triggered anyway
during a git push in a later stage of processing.

Change-Id: I7892039c8dfe1fc4ca90cf47012753e1fee5ee89
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidator.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidator.java
index 8b8b244..ef66c33 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidator.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidator.java
@@ -24,13 +24,13 @@
 import com.google.common.collect.ImmutableListMultimap;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
-import com.google.common.flogger.FluentLogger;
 import com.google.gerrit.entities.Patch;
 import com.google.gerrit.entities.Project;
 import com.google.gerrit.entities.Project.NameKey;
 import com.google.gerrit.extensions.annotations.Exports;
 import com.google.gerrit.extensions.annotations.PluginName;
 import com.google.gerrit.extensions.api.projects.ProjectConfigEntryType;
+import com.google.gerrit.extensions.client.DiffPreferencesInfo;
 import com.google.gerrit.extensions.registration.DynamicSet;
 import com.google.gerrit.extensions.validators.CommentForValidation;
 import com.google.gerrit.extensions.validators.CommentValidationContext;
@@ -44,6 +44,10 @@
 import com.google.gerrit.server.git.validators.CommitValidationException;
 import com.google.gerrit.server.git.validators.CommitValidationListener;
 import com.google.gerrit.server.git.validators.CommitValidationMessage;
+import com.google.gerrit.server.patch.PatchList;
+import com.google.gerrit.server.patch.PatchListCache;
+import com.google.gerrit.server.patch.PatchListKey;
+import com.google.gerrit.server.patch.PatchListNotAvailableException;
 import com.google.gerrit.server.project.NoSuchProjectException;
 import com.google.inject.AbstractModule;
 import com.google.inject.Inject;
@@ -66,6 +70,7 @@
 import java.util.concurrent.ExecutionException;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import org.eclipse.jgit.diff.Edit;
 import org.eclipse.jgit.diff.RawText;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
@@ -80,8 +85,6 @@
  * validator classes to run its list of blocked keywords against commit content and comments.
  */
 public class BlockedKeywordValidator implements CommitValidationListener, CommentValidator {
-  private static final FluentLogger logger = FluentLogger.forEnclosingClass();
-
   // These keys are used for turning on specific validation elements.
   // i.e. enableSkipValidation = blockedKeyword will enabled skipRef and skipGroup checks
   // i.e. disabledValidation = blockedKeywordComments will disable the comment blocked keyword check
@@ -116,6 +119,7 @@
   private final GitRepositoryManager repoManager;
   private final LoadingCache<String, Pattern> patternCache;
   private final ContentTypeUtil contentTypeUtil;
+  private final PatchListCache patchListCache;
   private final ValidatorConfig validatorConfig;
 
   @Inject
@@ -125,12 +129,14 @@
       @Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache,
       PluginConfigFactory cfgFactory,
       GitRepositoryManager repoManager,
+      PatchListCache patchListCache,
       ValidatorConfig validatorConfig) {
     this.pluginName = pluginName;
     this.patternCache = patternCache;
     this.cfgFactory = cfgFactory;
     this.repoManager = repoManager;
     this.contentTypeUtil = contentTypeUtil;
+    this.patchListCache = patchListCache;
     this.validatorConfig = validatorConfig;
   }
 
@@ -158,6 +164,7 @@
         try (Repository repo = repoManager.openRepository(receiveEvent.project.getNameKey())) {
           List<CommitValidationMessage> messages =
               performValidation(
+                  receiveEvent.project.getNameKey(),
                   repo,
                   receiveEvent.commit,
                   receiveEvent.revWalk,
@@ -169,7 +176,10 @@
           }
         }
       }
-    } catch (NoSuchProjectException | IOException | ExecutionException e) {
+    } catch (NoSuchProjectException
+        | IOException
+        | ExecutionException
+        | PatchListNotAvailableException e) {
       throw new CommitValidationException("failed to check on blocked keywords", e);
     }
     return Collections.emptyList();
@@ -205,15 +215,19 @@
 
   @VisibleForTesting
   List<CommitValidationMessage> performValidation(
+      Project.NameKey project,
       Repository repo,
       RevCommit c,
       RevWalk revWalk,
       ImmutableCollection<Pattern> blockedKeywordPatterns,
       PluginConfig cfg)
-      throws IOException, ExecutionException {
+      throws IOException, ExecutionException, PatchListNotAvailableException {
     List<CommitValidationMessage> messages = new LinkedList<>();
     checkCommitMessageForBlockedKeywords(blockedKeywordPatterns, messages, c.getFullMessage());
     Map<String, ObjectId> content = CommitUtils.getChangedContent(repo, c, revWalk);
+    PatchList patchList =
+        patchListCache.get(
+            PatchListKey.againstDefaultBase(c, DiffPreferencesInfo.Whitespace.IGNORE_NONE), project);
     for (String path : content.keySet()) {
       ObjectLoader ol = revWalk.getObjectReader().open(content.get(path));
       try (InputStream in = ol.openStream()) {
@@ -221,7 +235,8 @@
           continue;
         }
       }
-      checkFileForBlockedKeywords(blockedKeywordPatterns, messages, path, ol);
+      checkLineDiffForBlockedKeywords(
+          patchList.get(path).getEdits(), blockedKeywordPatterns, messages, path, ol);
     }
     return messages;
   }
@@ -252,18 +267,23 @@
     }
   }
 
-  private static void checkFileForBlockedKeywords(
+  private static void checkLineDiffForBlockedKeywords(
+      List<Edit> edits,
       ImmutableCollection<Pattern> blockedKeywordPatterns,
       List<CommitValidationMessage> messages,
       String path,
       ObjectLoader ol)
       throws IOException {
+    List<String> lines = new ArrayList<>();
     try (BufferedReader br =
         new BufferedReader(new InputStreamReader(ol.openStream(), StandardCharsets.UTF_8))) {
-      int line = 0;
       for (String l = br.readLine(); l != null; l = br.readLine()) {
-        line++;
-        checkLineForBlockedKeywords(blockedKeywordPatterns, messages, path, line, l);
+        lines.add(l);
+      }
+    }
+    for (Edit edit : edits) {
+      for (int i = edit.getBeginB(); i < edit.getEndB(); i++) {
+        checkLineForBlockedKeywords(blockedKeywordPatterns, messages, path, i + 1, lines.get(i));
       }
     }
   }
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md
index 7c1cc23..aa7b0cf 100644
--- a/src/main/resources/Documentation/config.md
+++ b/src/main/resources/Documentation/config.md
@@ -143,7 +143,7 @@
 :    Patterns for blocked keywords.
 
     This check looks for blocked keywords in files. If the check finds an
-    blocked keyword the push will be rejected.
+    blocked keyword in the diff between the pushed commit and it's parent.
 
     To find a keyword it is possible to pass a regular expressions by
     blockedKeywordPattern.
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
index c709622..245f2dc 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
@@ -17,11 +17,19 @@
 import static com.google.common.truth.Truth.assertThat;
 import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.EMPTY_PLUGIN_CONFIG;
 import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.PATTERN_CACHE;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.gerrit.entities.Patch;
+import com.google.gerrit.entities.Project;
 import com.google.gerrit.server.git.validators.CommitValidationMessage;
+import com.google.gerrit.server.patch.PatchList;
+import com.google.gerrit.server.patch.PatchListCache;
+import com.google.gerrit.server.patch.PatchListEntry;
 import java.io.File;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -31,11 +39,26 @@
 import java.util.Set;
 import java.util.regex.Pattern;
 import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.diff.Edit;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevWalk;
 import org.junit.Test;
 
 public class BlockedKeywordValidatorTest extends ValidatorTestCase {
+  /** Maps file names to content. */
+  private static final Map<String, String> FILE_CONTENTS =
+      ImmutableMap.of(
+          "bar.txt",
+          "$Id$\n"
+              + "$Header$\n"
+              + "$Author$\n"
+              + "processXFile($File::Find::name, $Config{$type});\n"
+              + "$Id: foo bar$\n",
+          "foo.txt",
+          "http://foo.bar.tld/?pw=myp4ssw0rdTefoobarstline2\n",
+          "foobar.txt",
+          "Testline1\n" + "Testline2\n" + "Testline3\n" + "Testline4");
+
   private static ImmutableMap<String, Pattern> getPatterns() {
     return ImmutableMap.<String, Pattern>builder()
         .put("myp4ssw0rd", Pattern.compile("myp4ssw0rd"))
@@ -46,39 +69,42 @@
 
   private RevCommit makeCommit(RevWalk rw) throws IOException, GitAPIException {
     Map<File, byte[]> files = new HashMap<>();
-    // invalid files
-    String content = "http://foo.bar.tld/?pw=myp4ssw0rdTefoobarstline2\n";
-    files.put(
-        new File(repo.getDirectory().getParent(), "foo.txt"),
-        content.getBytes(StandardCharsets.UTF_8));
-
-    content =
-        "$Id$\n"
-            + "$Header$\n"
-            + "$Author$\n"
-            + "processXFile($File::Find::name, $Config{$type});\n"
-            + "$Id: foo bar$\n";
-    files.put(
-        new File(repo.getDirectory().getParent(), "bar.txt"),
-        content.getBytes(StandardCharsets.UTF_8));
-
-    // valid file
-    content = "Testline1\n" + "Testline2\n" + "Testline3\n" + "Testline4";
-    files.put(
-        new File(repo.getDirectory().getParent(), "foobar.txt"),
-        content.getBytes(StandardCharsets.UTF_8));
+    for (Map.Entry<String, String> fileContents : FILE_CONTENTS.entrySet()) {
+      files.put(
+          new File(repo.getDirectory().getParent(), fileContents.getKey()),
+          fileContents.getValue().getBytes(StandardCharsets.UTF_8));
+    }
     return TestUtils.makeCommit(rw, repo, "Commit foobar with test files.", files);
   }
 
   @Test
-  public void testKeywords() throws Exception {
+  public void keywords() throws Exception {
+    // Mock the PatchListCache to return a diff for each file in our new commit
+    PatchListCache patchListCacheMock = mock(PatchListCache.class);
+    PatchList mockPatchList = mock(PatchList.class);
+    when(patchListCacheMock.get(any(), any(Project.NameKey.class))).thenReturn(mockPatchList);
+    for (Map.Entry<String, String> fileContent : FILE_CONTENTS.entrySet()) {
+      PatchListEntry file = mock(PatchListEntry.class);
+      when(file.getEdits())
+          .thenReturn(
+              ImmutableList.of(new Edit(0, 0, 0, numberOfLinesInString(fileContent.getValue()))));
+      when(mockPatchList.get(fileContent.getKey())).thenReturn(file);
+    }
+
     try (RevWalk rw = new RevWalk(repo)) {
       RevCommit c = makeCommit(rw);
       BlockedKeywordValidator validator =
           new BlockedKeywordValidator(
-              null, new ContentTypeUtil(PATTERN_CACHE), PATTERN_CACHE, null, null, null);
+              null,
+              new ContentTypeUtil(PATTERN_CACHE),
+              PATTERN_CACHE,
+              null,
+              null,
+              patchListCacheMock,
+              null);
       List<CommitValidationMessage> m =
-          validator.performValidation(repo, c, rw, getPatterns().values(), EMPTY_PLUGIN_CONFIG);
+          validator.performValidation(
+              Project.nameKey("project"), repo, c, rw, getPatterns().values(), EMPTY_PLUGIN_CONFIG);
       Set<String> expected =
           ImmutableSet.of(
               "ERROR: blocked keyword(s) found in: foo.txt (Line: 1)"
@@ -95,4 +121,8 @@
   public void validatorInactiveWhenConfigEmpty() {
     assertThat(BlockedKeywordValidator.isActive(EMPTY_PLUGIN_CONFIG)).isFalse();
   }
+
+  public static int numberOfLinesInString(String str) {
+    return str.length() - str.replace("\n", "").length();
+  }
 }
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/UploadValidatorIT.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/UploadValidatorIT.java
index cafc18c..bee5418 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/UploadValidatorIT.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/UploadValidatorIT.java
@@ -244,4 +244,127 @@
     push.setPushOptions(ImmutableList.of("uploadvalidator~skip"));
     push.to("refs/heads/master").assertOkStatus();
   }
+
+  @Test
+  public void keywordExistsInFileButNotInDiff() throws Exception {
+    pushFactory
+        .create(
+            admin.newIdent(),
+            clone,
+            "Subject",
+            "file.txt",
+            "" + "blah \n" + "secr3t\n" + "blah" + "")
+        .to("refs/heads/master")
+        .assertOkStatus();
+    pushConfig(
+        Joiner.on("\n").join("[plugin \"uploadvalidator\"]", "    blockedKeywordPattern = secr3t"));
+    pushFactory
+        .create(
+            admin.newIdent(),
+            clone,
+            "Subject",
+            "file.txt",
+            "" + "blah \n" + "secr3t\n" + "foo" + "")
+        .to("refs/heads/master")
+        .assertOkStatus();
+  }
+
+  @Test
+  public void keywordExistsInNewFile() throws Exception {
+    pushConfig(
+        Joiner.on("\n").join("[plugin \"uploadvalidator\"]", "    blockedKeywordPattern = secr3t"));
+    pushFactory
+        .create(
+            admin.newIdent(),
+            clone,
+            "Subject",
+            "file.txt",
+            "" + "blah \n" + "secr3t\n" + "foo\n" + "secr3t")
+        .to("refs/heads/master")
+        .assertErrorStatus("blocked keywords");
+  }
+
+  @Test
+  public void keywordExistsInFileAndInDiff() throws Exception {
+    pushFactory
+        .create(
+            admin.newIdent(),
+            clone,
+            "Subject",
+            "file.txt",
+            "" + "blah \n" + "secr3t\n" + "blah" + "")
+        .to("refs/heads/master")
+        .assertOkStatus();
+    pushConfig(
+        Joiner.on("\n").join("[plugin \"uploadvalidator\"]", "    blockedKeywordPattern = secr3t"));
+    pushFactory
+        .create(
+            admin.newIdent(),
+            clone,
+            "Subject",
+            "file.txt",
+            "" + "blah \n" + "secr3t\n" + "blah\n" + "secr3t")
+        .to("refs/heads/master")
+        .assertErrorStatus("blocked keywords");
+  }
+
+  @Test
+  public void keywordExistsInFileAndIsRemoved() throws Exception {
+    pushFactory
+        .create(
+            admin.newIdent(), clone, "Subject", "file.txt", "" + "blah \n" + "secr3t\n" + "blah")
+        .to("refs/heads/master")
+        .assertOkStatus();
+    pushConfig(
+        Joiner.on("\n").join("[plugin \"uploadvalidator\"]", "    blockedKeywordPattern = secr3t"));
+    pushFactory
+        .create(admin.newIdent(), clone, "Subject", "file.txt", "" + "blah \n" + "blah\n")
+        .to("refs/heads/master")
+        .assertOkStatus();
+  }
+
+  @Test
+  public void keywordExistsInFileAndSameLineIsModified() throws Exception {
+    pushFactory
+        .create(
+            admin.newIdent(),
+            clone,
+            "Subject",
+            "file.txt",
+            "" + "blah \n" + "secr3t\n" + "blah" + "")
+        .to("refs/heads/master")
+        .assertOkStatus();
+    pushConfig(
+        Joiner.on("\n").join("[plugin \"uploadvalidator\"]", "    blockedKeywordPattern = secr3t"));
+    // This could be further improved using intra-line diffs
+    pushFactory
+        .create(
+            admin.newIdent(),
+            clone,
+            "Subject",
+            "file.txt",
+            "" + "blah \n" + "secr3t foobar\n" + "blah" + "")
+        .to("refs/heads/master")
+        .assertErrorStatus("blocked keywords");
+  }
+
+  @Test
+  public void keywordExistsInOldAndFileIsDeleted() throws Exception {
+    pushFactory
+        .create(
+            admin.newIdent(),
+            clone,
+            "Subject",
+            "file.txt",
+            "" + "blah \n" + "secr3t\n" + "blah" + "")
+        .to("refs/heads/master")
+        .assertOkStatus();
+    pushConfig(
+        Joiner.on("\n").join("[plugin \"uploadvalidator\"]", "    blockedKeywordPattern = secr3t"));
+    pushFactory
+        .create(admin.newIdent(), clone, "Subject", "foo.txt", "blah")
+        .rmFile("file.txt")
+        .to("refs/heads/master")
+        .assertOkStatus();
+  }
 }