Add ability to ignore specific files suffixes

Introduce a new 'ignore-file-suffix' configuration in the
analytics.config file to ignore matching files from the statistics.

A typical case for this would be to ignore binary files and avoid
accounting for them when extracting contributor statistics.

This conveniently speeds up performances since skips completely the
loading and the diffing of binary files, which is a quite an expensive
computation.

Feature: Issue 10756
Change-Id: I60deb05586b7fd984f3067e3b3df465cd46597a7
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md
index 6c08a06..e9d9718 100644
--- a/src/main/resources/Documentation/config.md
+++ b/src/main/resources/Documentation/config.md
@@ -34,4 +34,24 @@
   ```ini
   [contributors]
     extract-issues = true
+  ```
+
+- `contributors.ignore-file-suffix`
+
+  List of file suffixes to be ignored from the analytics.
+  Files matching any of the specified suffixes will not be accounted for in
+  `num_files`, `num_distinct_files`, `added_lines` and `deleted_lines` fields
+  nor will they be listed in the `commits.files` array field.
+  This can be used to explicitly ignore binary files for which, file-based
+  statistics makes little or no sense.
+
+  Default: empty
+
+  Example:
+  ```ini
+  [contributors]
+    ignore-file-suffix = .dmg
+    ignore-file-suffix = .ko
+    ignore-file-suffix = .png
+    ignore-file-suffix = .exe
   ```
\ No newline at end of file
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/AnalyticsConfig.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/AnalyticsConfig.scala
index fd31626..10eb8ed 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/AnalyticsConfig.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/AnalyticsConfig.scala
@@ -23,15 +23,18 @@
 trait AnalyticsConfig {
   def botlikeFilenameRegexps: List[String]
   def isExtractIssues: Boolean
+  def ignoreFileSuffixes: List[String]
 }
 
 class AnalyticsConfigImpl @Inject() (val pluginConfigFactory: PluginConfigFactory, @PluginName val pluginName: String) extends AnalyticsConfig{
   lazy val botlikeFilenameRegexps: List[String] = pluginConfigBotLikeFilenameRegexp
   lazy val isExtractIssues: Boolean = pluginConfig.getBoolean(Contributors, null, ExtractIssues, false)
+  lazy val ignoreFileSuffixes: List[String] = pluginConfig.getStringList(Contributors, null, IgnoreFileSuffix).toList
 
   private lazy val pluginConfig: Config = pluginConfigFactory.getGlobalPluginConfig(pluginName)
   private val Contributors = "contributors"
   private val BotlikeFilenameRegexp = "botlike-filename-regexp"
   private val ExtractIssues = "extract-issues"
+  private val IgnoreFileSuffix = "ignore-file-suffix"
   private lazy val pluginConfigBotLikeFilenameRegexp = pluginConfig.getStringList(Contributors, null, BotlikeFilenameRegexp).toList
 }
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsCache.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsCache.scala
index 8d573f0..c684c57 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsCache.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsCache.scala
@@ -20,7 +20,6 @@
 import com.googlesource.gerrit.plugins.analytics.common.CommitsStatisticsCache.COMMITS_STATISTICS_CACHE
 import org.eclipse.jgit.lib.ObjectId
 
-@ImplementedBy(classOf[CommitsStatisticsCacheImpl])
 trait CommitsStatisticsCache {
   def get(project: String, objectId: ObjectId): CommitsStatistics
 }
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsCacheModule.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsCacheModule.scala
index 9b2e358..5e4fe2f 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsCacheModule.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsCacheModule.scala
@@ -21,6 +21,7 @@
 class CommitsStatisticsCacheModule extends CacheModule() {
 
   override protected def configure(): Unit = {
+    bind(classOf[CommitsStatisticsCache]).to(classOf[CommitsStatisticsCacheImpl])
     persist(CommitsStatisticsCache.COMMITS_STATISTICS_CACHE, classOf[CommitsStatisticsCacheKey], classOf[CommitsStatistics])
       .version(1)
       .diskLimit(-1)
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsLoader.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsLoader.scala
index 1144901..8025922 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsLoader.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatisticsLoader.scala
@@ -34,7 +34,8 @@
   gitRepositoryManager: GitRepositoryManager,
   projectCache: ProjectCache,
   botLikeExtractor: BotLikeExtractor,
-  config: AnalyticsConfig
+  config: AnalyticsConfig,
+  ignoreFileSuffixFilter: IgnoreFileSuffixFilter
 ) extends CacheLoader[CommitsStatisticsCacheKey, CommitsStatistics] {
 
   override def load(cacheKey: CommitsStatisticsCacheKey): CommitsStatistics = {
@@ -70,6 +71,7 @@
 
           val df = new DiffFormatter(DisabledOutputStream.INSTANCE)
           df.setRepository(repo)
+          df.setPathFilter(ignoreFileSuffixFilter)
           df.setDiffComparator(RawTextComparator.DEFAULT)
           df.setDetectRenames(true)
           val diffs = df.scan(oldTree, newTree).asScala
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/IgnoreFileSuffixFilter.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/IgnoreFileSuffixFilter.scala
new file mode 100644
index 0000000..d875eb0
--- /dev/null
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/IgnoreFileSuffixFilter.scala
@@ -0,0 +1,35 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.common
+
+import com.google.inject.{Inject, Singleton}
+import com.googlesource.gerrit.plugins.analytics.AnalyticsConfig
+import org.eclipse.jgit.treewalk.TreeWalk
+import org.eclipse.jgit.treewalk.filter.TreeFilter
+import org.gitective.core.PathFilterUtils
+
+@Singleton
+case class IgnoreFileSuffixFilter @Inject() (config: AnalyticsConfig) extends TreeFilter {
+
+  private lazy val suffixFilter =
+    if (config.ignoreFileSuffixes.nonEmpty)
+      PathFilterUtils.orSuffix(config.ignoreFileSuffixes:_*).negate()
+    else
+      TreeFilter.ALL
+
+  override def include(treeWalk: TreeWalk): Boolean = treeWalk.isSubtree || suffixFilter.include(treeWalk)
+  override def shouldBeRecursive(): Boolean = suffixFilter.shouldBeRecursive()
+  override def clone(): TreeFilter = this
+}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/BotLikeExtractorImplSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/BotLikeExtractorImplSpec.scala
index 5bb73c5..42b0d1c 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/BotLikeExtractorImplSpec.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/BotLikeExtractorImplSpec.scala
@@ -49,5 +49,6 @@
   private def newBotLikeExtractorImpl(botLikeRegexps: List[String]) = new BotLikeExtractorImpl(new AnalyticsConfig {
     override lazy val botlikeFilenameRegexps = botLikeRegexps
     override lazy val isExtractIssues: Boolean = false
+    override def ignoreFileSuffixes: List[String] = List.empty
   })
 }
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/IgnoreFileSuffixFilterSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/IgnoreFileSuffixFilterSpec.scala
new file mode 100644
index 0000000..4ed373c
--- /dev/null
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/IgnoreFileSuffixFilterSpec.scala
@@ -0,0 +1,54 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.common
+
+import com.google.gerrit.acceptance.UseLocalDisk
+import com.googlesource.gerrit.plugins.analytics.AnalyticsConfig
+import com.googlesource.gerrit.plugins.analytics.test.GerritTestDaemon
+import org.eclipse.jgit.treewalk.TreeWalk
+import org.scalatest.{FlatSpec, Matchers}
+
+@UseLocalDisk
+class IgnoreFileSuffixFilterSpec extends FlatSpec with Matchers with GerritTestDaemon {
+
+  behavior of "IgnoreFileSuffixFilter"
+
+  it should "include a file with suffix not listed in configuration" in {
+    val ignoreSuffix = ".dmg"
+    val fileSuffix = ".txt"
+    val aFile = s"aFile$fileSuffix"
+    val commit = testFileRepository.commitFile(aFile, "some content")
+
+    val walk = TreeWalk.forPath(testFileRepository.getRepository, aFile, commit.getTree)
+
+    newIgnoreFileSuffix(ignoreSuffix).include(walk) shouldBe true
+  }
+
+  it should "not include a file with suffix listed in configuration" in {
+    val ignoreSuffix = ".dmg"
+    val aFile = s"aFile$ignoreSuffix"
+    val commit = testFileRepository.commitFile(aFile, "some content")
+
+    val walk = TreeWalk.forPath(testFileRepository.getRepository, aFile, commit.getTree)
+
+    newIgnoreFileSuffix(ignoreSuffix).include(walk) shouldBe false
+  }
+
+  private def newIgnoreFileSuffix(suffixes: String*) = IgnoreFileSuffixFilter(new AnalyticsConfig {
+    override lazy val botlikeFilenameRegexps = List.empty
+    override lazy val isExtractIssues: Boolean = false
+    override def ignoreFileSuffixes: List[String] = suffixes.toList
+  })
+}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
index b2ae9e7..8c5eb60 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
@@ -16,7 +16,7 @@
 
 import com.google.gerrit.acceptance.UseLocalDisk
 import com.googlesource.gerrit.plugins.analytics.CommitInfo
-import com.googlesource.gerrit.plugins.analytics.common.{CommitsStatistics, CommitsStatisticsLoader, Statistics}
+import com.googlesource.gerrit.plugins.analytics.common.{CommitsStatistics, Statistics}
 import org.scalatest.{FlatSpec, Inside, Matchers}
 
 @UseLocalDisk
@@ -131,5 +131,4 @@
       case wrongContent => fail(s"Expected two results instead got $wrongContent")
     }
   }
-
 }
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/ContributorsServiceSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/ContributorsServiceSpec.scala
new file mode 100644
index 0000000..39aefb4
--- /dev/null
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/ContributorsServiceSpec.scala
@@ -0,0 +1,76 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.test
+
+import java.lang.reflect.Type
+
+import com.google.gerrit.acceptance.UseLocalDisk
+import com.google.gson.{Gson, JsonDeserializationContext, JsonDeserializer, JsonElement}
+import com.googlesource.gerrit.plugins.analytics.UserActivitySummary
+import com.googlesource.gerrit.plugins.analytics.common.AggregationStrategy.EMAIL_HOUR
+import com.googlesource.gerrit.plugins.analytics.common.GsonFormatter
+import com.googlesource.gerrit.plugins.analytics.test.TestAnalyticsConfig.IGNORED_FILE_SUFFIX
+import org.scalatest.{FlatSpec, Inside, Matchers}
+
+import scala.collection.JavaConverters._
+
+@UseLocalDisk
+class ContributorsServiceSpec extends FlatSpec with Matchers with GerritTestDaemon with Inside {
+
+  "ContributorsService" should "get commit statistics" in {
+    val aContributorName = "Contributor Name"
+    val aContributorEmail = "contributor@test.com"
+    val aFileName = "file.txt"
+    val anIgnoredFileName = s"file$IGNORED_FILE_SUFFIX"
+
+    val commit = testFileRepository.commitFiles(
+      List(anIgnoredFileName -> "1\n2\n", aFileName -> "1\n2\n"),
+      newPersonIdent(aContributorName, aContributorEmail)
+    )
+
+    val statsJson = daemonTest.restSession.get(s"/projects/${fileRepositoryName.get()}/analytics~contributors?aggregate=${EMAIL_HOUR.name}")
+
+    statsJson.assertOK()
+
+    val stats = TestGson().fromJson(statsJson.getEntityContent, classOf[UserActivitySummary])
+
+    inside(stats) {
+      case UserActivitySummary(_,_,_,_,theAuthorName,theAuthorEmail,numCommits,numFiles, numDistinctFiles, addedLines, deletedLines, commits, _, _, _, _, _, _) =>
+        theAuthorName shouldBe aContributorName
+        theAuthorEmail shouldBe aContributorEmail
+        numCommits shouldBe 1
+        numFiles shouldBe 1
+        numDistinctFiles shouldBe 1
+        addedLines shouldBe 2
+        deletedLines shouldBe 0
+        commits.head.files should contain only aFileName
+        commits.head.sha1 shouldBe commit.name
+    }
+  }
+}
+
+object TestGson {
+
+  class SetStringDeserializer extends JsonDeserializer[Set[String]] {
+    override def deserialize(json: JsonElement, typeOfT: Type, context: JsonDeserializationContext): Set[String] =
+      json.getAsJsonArray.asScala.map(_.getAsString).toSet
+  }
+
+  def apply(): Gson =
+    new GsonFormatter()
+    .gsonBuilder
+    .registerTypeHierarchyAdapter(classOf[Iterable[String]], new SetStringDeserializer)
+    .create()
+}
\ No newline at end of file
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GerritTestDaemon.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GerritTestDaemon.scala
index 59a6d90..db82d13 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GerritTestDaemon.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GerritTestDaemon.scala
@@ -17,11 +17,13 @@
 import java.io.File
 import java.util.{Date, UUID}
 
-import com.google.gerrit.acceptance.{AbstractDaemonTest, GitUtil}
-import com.google.gerrit.extensions.annotations.PluginName
+import com.google.gerrit.acceptance._
+import com.google.gerrit.extensions.restapi.RestApiModule
 import com.google.gerrit.reviewdb.client.Project
-import com.google.inject.{AbstractModule, Module}
-import com.googlesource.gerrit.plugins.analytics.AnalyticsConfig
+import com.google.gerrit.server.project.ProjectResource.PROJECT_KIND
+import com.google.inject.AbstractModule
+import com.googlesource.gerrit.plugins.analytics.{AnalyticsConfig, ContributorsResource}
+import com.googlesource.gerrit.plugins.analytics.common.CommitsStatisticsCache
 import org.eclipse.jgit.api.MergeCommand.FastForwardMode
 import org.eclipse.jgit.api.{Git, MergeResult}
 import org.eclipse.jgit.internal.storage.file.FileRepository
@@ -68,6 +70,7 @@
     new PersonIdent(new PersonIdent(name, email), ts)
 
   override def beforeEach {
+    daemonTest.setUpTestPlugin()
     fileRepositoryName = daemonTest.newProject(testSpecificRepositoryName)
     fileRepository = daemonTest.getRepository(fileRepositoryName)
     testFileRepository = GitUtil.newTestRepository(fileRepository)
@@ -135,8 +138,13 @@
   }
 }
 
-object GerritTestDaemon extends AbstractDaemonTest {
+@TestPlugin(
+  name = "analytics",
+  sysModule = "com.googlesource.gerrit.plugins.analytics.test.GerritTestDaemon$TestModule"
+)
+object GerritTestDaemon extends LightweightPluginDaemonTest {
   baseConfig = new Config()
+  tempDataDir.create()
 
   def newProject(nameSuffix: String) = {
     resourcePrefix = ""
@@ -149,15 +157,21 @@
   def adminAuthor = admin.getIdent
 
   def getInstance[T](clazz: Class[T]): T =
-    server.getTestInjector.getInstance(clazz)
+    plugin.getSysInjector.getInstance(clazz)
 
-  override def createModule(): Module = new AbstractModule {
+  def getCanonicalWebUrl: String = canonicalWebUrl.get()
+
+  def restSession: RestSession = adminRestSession
+
+  class TestModule extends AbstractModule {
     override def configure(): Unit = {
-      bind(classOf[AnalyticsConfig]).toInstance(new AnalyticsConfig {
-        override def botlikeFilenameRegexps: List[String] = List.empty
-        override def isExtractIssues: Boolean = true
+      bind(classOf[CommitsStatisticsCache]).to(classOf[CommitsStatisticsNoCache])
+      bind(classOf[AnalyticsConfig]).toInstance(TestAnalyticsConfig)
+      install(new RestApiModule() {
+        override protected def configure() = {
+          get(PROJECT_KIND, "contributors").to(classOf[ContributorsResource])
+        }
       })
-      bind(classOf[String]).annotatedWith(classOf[PluginName]).toInstance("analytics")
     }
   }
 }
\ No newline at end of file
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/TestAnalyticsConfig.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/TestAnalyticsConfig.scala
new file mode 100644
index 0000000..598df31
--- /dev/null
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/TestAnalyticsConfig.scala
@@ -0,0 +1,24 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.test
+
+import com.googlesource.gerrit.plugins.analytics.AnalyticsConfig
+
+object TestAnalyticsConfig extends AnalyticsConfig {
+  val IGNORED_FILE_SUFFIX = ".bin"
+  val botlikeFilenameRegexps: List[String] = List.empty
+  val isExtractIssues: Boolean = true
+  val ignoreFileSuffixes: List[String] = List(IGNORED_FILE_SUFFIX)
+}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/TestCommitStatisticsNoCache.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/TestCommitStatisticsNoCache.scala
index 8834418..b0b962f 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/TestCommitStatisticsNoCache.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/TestCommitStatisticsNoCache.scala
@@ -14,6 +14,7 @@
 
 package com.googlesource.gerrit.plugins.analytics.test
 
+import com.google.inject.Inject
 import com.googlesource.gerrit.plugins.analytics.common._
 import org.eclipse.jgit.lib.ObjectId
 
@@ -23,7 +24,7 @@
   lazy val commitsStatisticsNoCache  = CommitsStatisticsNoCache(daemonTest.getInstance(classOf[CommitsStatisticsLoader]))
 }
 
-case class CommitsStatisticsNoCache(commitsStatisticsLoader: CommitsStatisticsLoader) extends CommitsStatisticsCache {
+case class CommitsStatisticsNoCache @Inject() (commitsStatisticsLoader: CommitsStatisticsLoader) extends CommitsStatisticsCache {
   override def get(project: String, objectId: ObjectId): CommitsStatistics =
     commitsStatisticsLoader.load(CommitsStatisticsCacheKey(project, objectId))
 }