Default to full-file evaluation if diffs are not available

Gerrit's diff cache reads objects using a self-created
ObjectReader without an option to pass in a RevWalk or an
ObjectReader instance.

This works for 'git push' where the commit we want to validate
has already been written to disk, but fails for REST endpoints
where the ObjectInserter has not been flushed yet when the
CommitValidators are called. Hence, the diff cache can't see
the objects it should diff.

There are multiple options to fix this, but no golden way. To
unblock users of the plugin, we default back to full-file
evaluation if diffs aren't available. We'll adapt Gerrit
core to be able to use diffs in the currently broken cases
as well. Options we are eyeballing include:

1) Pass an ObjectInserter/Reader to PatchListCache
2) Make Repository aware of open ObjectInserters
3) Adapt CreateChange to flush twice

Change-Id: Ic9ac19e8913243c7d78e971a300f52dffe983743
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidator.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidator.java
index ef66c33..64ce6ab 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidator.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidator.java
@@ -24,6 +24,7 @@
 import com.google.common.collect.ImmutableListMultimap;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
+import com.google.common.flogger.FluentLogger;
 import com.google.gerrit.entities.Patch;
 import com.google.gerrit.entities.Project;
 import com.google.gerrit.entities.Project.NameKey;
@@ -85,6 +86,8 @@
  * validator classes to run its list of blocked keywords against commit content and comments.
  */
 public class BlockedKeywordValidator implements CommitValidationListener, CommentValidator {
+  private static final FluentLogger logger = FluentLogger.forEnclosingClass();
+
   // These keys are used for turning on specific validation elements.
   // i.e. enableSkipValidation = blockedKeyword will enabled skipRef and skipGroup checks
   // i.e. disabledValidation = blockedKeywordComments will disable the comment blocked keyword check
@@ -176,10 +179,7 @@
           }
         }
       }
-    } catch (NoSuchProjectException
-        | IOException
-        | ExecutionException
-        | PatchListNotAvailableException e) {
+    } catch (NoSuchProjectException | IOException | ExecutionException e) {
       throw new CommitValidationException("failed to check on blocked keywords", e);
     }
     return Collections.emptyList();
@@ -221,13 +221,25 @@
       RevWalk revWalk,
       ImmutableCollection<Pattern> blockedKeywordPatterns,
       PluginConfig cfg)
-      throws IOException, ExecutionException, PatchListNotAvailableException {
+      throws IOException, ExecutionException {
     List<CommitValidationMessage> messages = new LinkedList<>();
     checkCommitMessageForBlockedKeywords(blockedKeywordPatterns, messages, c.getFullMessage());
     Map<String, ObjectId> content = CommitUtils.getChangedContent(repo, c, revWalk);
-    PatchList patchList =
-        patchListCache.get(
-            PatchListKey.againstDefaultBase(c, DiffPreferencesInfo.Whitespace.IGNORE_NONE), project);
+    Optional<PatchList> patchList;
+    try {
+      patchList =
+          Optional.of(
+              patchListCache.get(
+                  PatchListKey.againstDefaultBase(c, DiffPreferencesInfo.Whitespace.IGNORE_NONE),
+                  project));
+    } catch (PatchListNotAvailableException e) {
+      // TODO(hiesel): Make changes to Gerrit core to always get a diff here. This entails figuring
+      // out Git object visibility between unflushed ObjectInserters and ObjectReaders created from
+      // the repo.
+      logger.atInfo().withCause(e).log("unable to load patch set");
+      patchList = Optional.empty();
+    }
+
     for (String path : content.keySet()) {
       ObjectLoader ol = revWalk.getObjectReader().open(content.get(path));
       try (InputStream in = ol.openStream()) {
@@ -235,8 +247,12 @@
           continue;
         }
       }
-      checkLineDiffForBlockedKeywords(
-          patchList.get(path).getEdits(), blockedKeywordPatterns, messages, path, ol);
+      if (patchList.isPresent()) {
+        checkLineDiffForBlockedKeywords(
+            patchList.get().get(path).getEdits(), blockedKeywordPatterns, messages, path, ol);
+      } else {
+        checkEntireFileForBlockedKeywords(blockedKeywordPatterns, messages, path, ol);
+      }
     }
     return messages;
   }
@@ -288,6 +304,22 @@
     }
   }
 
+  private static void checkEntireFileForBlockedKeywords(
+      ImmutableCollection<Pattern> blockedKeywordPatterns,
+      List<CommitValidationMessage> messages,
+      String path,
+      ObjectLoader ol)
+      throws IOException {
+    try (BufferedReader br =
+        new BufferedReader(new InputStreamReader(ol.openStream(), StandardCharsets.UTF_8))) {
+      int line = 1;
+      for (String l = br.readLine(); l != null; l = br.readLine()) {
+        checkLineForBlockedKeywords(blockedKeywordPatterns, messages, path, line, l);
+        line++;
+      }
+    }
+  }
+
   private static List<String> findBlockedKeywordsInString(
       ImmutableCollection<Pattern> blockedKeywordPatterns, String text) {
     List<String> found = new ArrayList<>();
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/UploadValidatorIT.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/UploadValidatorIT.java
index bee5418..bd5f8f3 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/UploadValidatorIT.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/UploadValidatorIT.java
@@ -31,7 +31,11 @@
 import com.google.gerrit.extensions.api.changes.DraftInput;
 import com.google.gerrit.extensions.api.changes.ReviewInput;
 import com.google.gerrit.extensions.api.changes.ReviewInput.DraftHandling;
+import com.google.gerrit.extensions.client.ChangeStatus;
+import com.google.gerrit.extensions.common.ChangeInput;
+import com.google.gerrit.extensions.common.MergeInput;
 import com.google.gerrit.extensions.restapi.BadRequestException;
+import com.google.gerrit.extensions.restapi.ResourceConflictException;
 import com.google.gerrit.server.group.SystemGroupBackend;
 import com.google.inject.Inject;
 import org.eclipse.jgit.internal.storage.dfs.InMemoryRepository;
@@ -367,4 +371,62 @@
         .to("refs/heads/master")
         .assertOkStatus();
   }
+
+  @Test
+  public void createChangeSucceedsWhenKeywordDoesNotExistInFileAndDiff() throws Exception {
+    RevCommit head = getHead(testRepo.getRepository(), "HEAD");
+    pushConfig(
+        Joiner.on("\n").join("[plugin \"uploadvalidator\"]", "    blockedKeywordPattern = secr3t"));
+    pushFactory
+        .create(admin.newIdent(), clone, "Subject", "file.txt", "" + "blah \n")
+        .to("refs/heads/master")
+        .assertOkStatus();
+    clone.reset(head);
+    pushFactory
+        .create(admin.newIdent(), clone, "Subject", "foo.txt", "" + "blah \n")
+        .to("refs/heads/stable")
+        .assertOkStatus();
+    // Create a change that merges the other branch into master. This defaults back to
+    // full-file validation. If it doesn't, the create change call below would fail with
+    // a MissingObjectException.
+    ChangeInput changeInput = new ChangeInput();
+    changeInput.project = project.get();
+    changeInput.branch = "master";
+    changeInput.subject = "A change";
+    changeInput.status = ChangeStatus.NEW;
+    MergeInput mergeInput = new MergeInput();
+    mergeInput.source = gApi.projects().name(project.get()).branch("stable").get().revision;
+    changeInput.merge = mergeInput;
+    gApi.changes().create(changeInput);
+  }
+
+  @Test
+  public void createChangeWithKeywordInMessageFails() throws Exception {
+    RevCommit head = getHead(testRepo.getRepository(), "HEAD");
+    pushConfig(
+        Joiner.on("\n").join("[plugin \"uploadvalidator\"]", "    blockedKeywordPattern = secr3t"));
+    pushFactory
+        .create(admin.newIdent(), clone, "Subject", "file.txt", "" + "blah \n")
+        .to("refs/heads/master")
+        .assertOkStatus();
+    clone.reset(head);
+    pushFactory
+        .create(admin.newIdent(), clone, "Subject", "foo.txt", "" + "blah \n")
+        .to("refs/heads/stable")
+        .assertOkStatus();
+    // Create a change that merges the other branch into master. This defaults back to
+    // full-file validation. If it doesn't, the create change call below would fail with
+    // a MissingObjectException.
+    ChangeInput changeInput = new ChangeInput();
+    changeInput.project = project.get();
+    changeInput.branch = "master";
+    changeInput.subject = "A secr3t change";
+    changeInput.status = ChangeStatus.NEW;
+    MergeInput mergeInput = new MergeInput();
+    mergeInput.source = gApi.projects().name(project.get()).branch("stable").get().revision;
+    changeInput.merge = mergeInput;
+    ResourceConflictException e =
+        assertThrows(ResourceConflictException.class, () -> gApi.changes().create(changeInput));
+    assertThat(e).hasMessageThat().contains("blocked keyword(s) found");
+  }
 }