Better handle error conditions

When ScanTask encounters an error, log it instead of silently dying
and not processing any more refs.  This also exposed a few bugs in
JGit: there's an erroneous assumption that the submodule name will
be the same as the path, and the UTF-8 BOM is not handled cleanly by
BlobBasedConfig, which results in an error from SubmoduleWalk.

Change-Id: I280204390ab5fcf60dd6d0432c8acb17d85e3cb8
diff --git a/src/main/java/com/googlesource/gerrit/plugins/repositoryuse/RefUpdateHandlerImpl.java b/src/main/java/com/googlesource/gerrit/plugins/repositoryuse/RefUpdateHandlerImpl.java
index 638c267..6e184a8 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/repositoryuse/RefUpdateHandlerImpl.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/repositoryuse/RefUpdateHandlerImpl.java
@@ -25,6 +25,7 @@
 import org.eclipse.jgit.diff.RawTextComparator;
 import org.eclipse.jgit.errors.ConfigInvalidException;
 import org.eclipse.jgit.errors.RepositoryNotFoundException;
+import org.eclipse.jgit.lib.ConfigConstants;
 import org.eclipse.jgit.lib.Constants;
 import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectLoader;
@@ -35,6 +36,8 @@
 import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.submodule.SubmoduleWalk;
 import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.treewalk.filter.PathFilter;
+import org.eclipse.jgit.util.RawParseUtils;
 import org.eclipse.jgit.util.io.DisabledOutputStream;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -78,7 +81,7 @@
     if (event.isDelete() && event.getRefName().startsWith(Constants.R_HEADS)
         || event.getRefName().startsWith(Constants.R_TAGS)) {
       // Ref was deleted... clean up any references
-      Ref ref = Ref.fetchByRef(event.getProjectName(), event.getRefName());
+      Ref ref = Ref.fetchByRef(getCanonicalProject(event.getProjectName()), event.getRefName());
       if (ref != null) {
         ref.delete();
       }
@@ -88,11 +91,11 @@
             event.getRefName());
       }
     } else if (event.getRefName().startsWith(Constants.R_TAGS)) {
-      Ref updatedRef = new Ref(event.getProjectName(), event.getRefName(),
+      Ref updatedRef = new Ref(getCanonicalProject(event.getProjectName()), event.getRefName(),
           event.getNewObjectId());
       updatedRef.save();
     } else if (event.getRefName().startsWith(Constants.R_HEADS)) {
-      Ref updatedRef = new Ref(event.getProjectName(), event.getRefName(),
+      Ref updatedRef = new Ref(getCanonicalProject(event.getProjectName()), event.getRefName(),
           event.getNewObjectId());
       updatedRef.save();
       Project.NameKey nameKey = new Project.NameKey(event.getProjectName());
@@ -144,7 +147,10 @@
                   String.format("%s:%s", event.getProjectName(), path),
                   event.getRefName(), projects);
             } else {
-              log.warn(String.format("%s is too large, skipping manifest parse",
+              log.warn(String.format(
+                  "project: %s, branch: %s, file: %s is too large, "
+                      + "skipping manifest parse",
+                  event.getProjectName(), event.getRefName(),
                   tw.getPathString()));
             }
           }
@@ -202,19 +208,77 @@
     HashMap<String, String> submodules = new HashMap<>();
     try (Repository repo = repoManager.openRepository(project)) {
       try (RevWalk walk = new RevWalk(repo);
-          SubmoduleWalk sw = new SubmoduleWalk(repo)) {
+          SubmoduleWalk sw = new SubmoduleWalk(repo);
+          TreeWalk cw = new TreeWalk(repo)) {
+        org.eclipse.jgit.lib.Config modulesConfig = null;
+
+        // TODO: Nasty hack! Work around JGit bug where modules aren't
+        // found if path is not the same as the name in the config!
+        // Also, BlobBasedConfig (which is used by SubmoduleWalk) doesn't
+        // handle UTF-8 BOMs, so we need to do some massaging.
         RevCommit commit =
             walk.parseCommit(repo.resolve(event.getNewObjectId()));
+        cw.addTree(commit.getTree());
+        cw.setRecursive(false);
+        PathFilter filter = PathFilter.create(Constants.DOT_GIT_MODULES);
+        cw.setFilter(filter);
+        while (cw.next()) {
+          if (filter.isDone(cw)) {
+            ObjectReader reader = repo.newObjectReader();
+            String decoded = "";
+            try {
+              ObjectLoader loader =
+                  reader.open(cw.getObjectId(0), Constants.OBJ_BLOB);
+              byte[] configBytes = loader.getCachedBytes(Integer.MAX_VALUE);
+              if (configBytes.length >= 3 && configBytes[0] == (byte) 0xEF
+                  && configBytes[1] == (byte) 0xBB
+                  && configBytes[2] == (byte) 0xBF) {
+                decoded = RawParseUtils.decode(RawParseUtils.UTF8_CHARSET,
+                    configBytes, 3, configBytes.length);
+              } else {
+                decoded = RawParseUtils.decode(configBytes);
+              }
+            } catch (IOException e) {
+              log.error(
+                  String.format("Unable to load .gitmodules in %s branch %s",
+                      event.getProjectName(), event.getRefName()),
+                  e);
+            }
+            modulesConfig = new org.eclipse.jgit.lib.Config();
+            modulesConfig.fromText(decoded);
+          }
+        }
         sw.setTree(commit.getTree());
         sw.setRootTree(commit.getTree());
+        sw.setModulesConfig(modulesConfig);
         while (sw.next()) {
-          submodules.put(
-              normalizePath(event.getProjectName(), sw.getModulesUrl(), false),
-              sw.getObjectId().name());
+          String modulesUrl = sw.getModulesUrl();
+          if (modulesUrl == null && modulesConfig != null) {
+            for (String key : modulesConfig
+                .getSubsections(ConfigConstants.CONFIG_SUBMODULE_SECTION)) {
+              if (sw.getPath().equals(modulesConfig.getString(
+                  ConfigConstants.CONFIG_SUBMODULE_SECTION, key,
+                  ConfigConstants.CONFIG_KEY_PATH))) {
+                modulesUrl = modulesConfig.getString(
+                    ConfigConstants.CONFIG_SUBMODULE_SECTION, key,
+                    ConfigConstants.CONFIG_KEY_URL);
+                break;
+              }
+            }
+          }
+          if (modulesUrl != null) {
+            submodules.put(normalizePath(event.getProjectName(), modulesUrl, false),
+                sw.getObjectId().name());
+          } else {
+            log.warn(String.format(
+                "invalid .gitmodules in %s %s configuration: missing url for %s",
+                event.getProjectName(), event.getRefName(), sw.getPath()));
+          }
         }
       } catch (ConfigInvalidException e) {
-        log.warn("Invalid .gitmodules configuration while parsing "
-            + event.getProjectName());
+        log.warn(String.format(
+            "Invalid .gitmodules configuration while parsing %s branch %s",
+            event.getProjectName(), event.getRefName()), e);
       }
     }
     return submodules;
diff --git a/src/main/java/com/googlesource/gerrit/plugins/repositoryuse/ScanTaskImpl.java b/src/main/java/com/googlesource/gerrit/plugins/repositoryuse/ScanTaskImpl.java
index 8f7d251..642c9a0 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/repositoryuse/ScanTaskImpl.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/repositoryuse/ScanTaskImpl.java
@@ -87,7 +87,12 @@
         RefUpdate rescan = new RefUpdate(project,
             branches.get(currentBranch).getName(), ObjectId.zeroId().getName(),
             branches.get(currentBranch).getObjectId().name());
-        refUpdateHandlerFactory.create(rescan).run();
+        try {
+          refUpdateHandlerFactory.create(rescan).run();
+        } catch (Exception e) {
+          log.error(String.format("Error updating %s branch %s: %s", project,
+              branch, e.getMessage()), e);
+        }
       }
     } else {
       if (branch.startsWith(Constants.R_HEADS)) {
@@ -98,7 +103,12 @@
         RefUpdate rescan = new RefUpdate(project,
             branches.get(branch).getName(), ObjectId.zeroId().getName(),
             branches.get(branch).getObjectId().name());
-        refUpdateHandlerFactory.create(rescan).run();
+        try {
+          refUpdateHandlerFactory.create(rescan).run();
+        } catch (Exception e) {
+          log.error(String.format("Error updating %s branch %s: %s", project,
+              branch, e.getMessage()), e);
+        }
       } else {
         log.warn(String.format("Branch %s does not exist; skipping", branch));
       }