Adding extra field num_distinct_files field to track number of different files aggregated.
Adding also list of files included in every commit

Change-Id: I91ee6397bc44374deaf6ab80484e1391a118346b
Jira-Id: GERICS-619
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
index faefac8..f24d996 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
@@ -18,6 +18,8 @@
 import com.google.gerrit.server.git.GitRepositoryManager
 import com.google.gerrit.server.project.{ProjectResource, ProjectsCollection}
 import com.google.gerrit.sshd.{CommandMetaData, SshCommand}
+import com.google.gson.TypeAdapter
+import com.google.gson.stream.{JsonReader, JsonWriter}
 import com.google.inject.Inject
 import com.googlesource.gerrit.plugins.analytics.common.DateConversions._
 import com.googlesource.gerrit.plugins.analytics.common._
@@ -144,7 +146,7 @@
   }
 }
 
-case class CommitInfo(sha1: String, date: Long, merge: Boolean)
+case class CommitInfo(sha1: String, date: Long, merge: Boolean, files: java.util.Set[String])
 
 case class UserActivitySummary(year: Integer,
                                month: Integer,
@@ -154,6 +156,7 @@
                                email: String,
                                numCommits: Integer,
                                numFiles: Integer,
+                               numDistinctFiles: Integer,
                                addedLines: Integer,
                                deletedLines: Integer,
                                commits: Array[CommitInfo],
@@ -179,7 +182,7 @@
         statisticsHandler.forCommits(uca.getIds: _*).map { stat =>
           UserActivitySummary(
             year, month, day, hour, uca.getName, uca.getEmail, uca.getCount,
-            stat.numFiles, stat.addedLines, stat.deletedLines,
+            stat.numFiles, stat.numDistinctFiles, stat.addedLines, stat.deletedLines,
             stat.commits.toArray, branches.toArray, uca.getLatest, stat.isForMergeCommits
           )
         }
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala
index cf90d4a..9f92e3c 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala
@@ -14,7 +14,6 @@
 
 package com.googlesource.gerrit.plugins.analytics.common
 
-import com.googlesource.gerrit.plugins.analytics.common.AggregatedCommitHistogram.AggregationStrategyMapping
 import org.eclipse.jgit.revwalk.{RevCommit, RevWalk}
 
 /**
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
index 7d434b1..e5aae6c 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
@@ -24,16 +24,42 @@
 
 import scala.collection.JavaConversions._
 
-case class CommitsStatistics(numFiles: Int, addedLines: Int, deletedLines: Int, isForMergeCommits: Boolean, commits: List[CommitInfo]) {
+/**
+  * Collects overall stats on a series of commits and provides some basic info on the included commits
+  *
+  * @param addedLines           sum of the number of line additions in the included commits
+  * @param deletedLines         sum of the number of line deletions in the included commits
+  * @param isForMergeCommits    true if the current instance is including stats for merge commits and false if
+  *                             calculated for NON merge commits. The current code is not generating stats objects for
+  *                             a mixture of merge and non-merge commits
+  * @param commits              list of commits the stats are calculated for
+  */
+case class CommitsStatistics(
+                              addedLines: Int,
+                              deletedLines: Int,
+                              isForMergeCommits: Boolean,
+                              commits: List[CommitInfo]
+                            ) {
   require(commits.forall(_.merge == isForMergeCommits), s"Creating a stats object with isMergeCommit = $isForMergeCommits but containing commits of different type")
 
-  def isEmpty = commits.isEmpty
+  /**
+    * sum of the number of files in each of the included commits
+    */
+  val numFiles: Int = commits.map(_.files.size).sum
+
+  /**
+    * number of distinct files the included commits have been touching
+    */
+  val numDistinctFiles: Int = changedFiles.size
+
+  def isEmpty: Boolean = commits.isEmpty
+
+  def changedFiles: Set[String] = commits.map(_.files.toSet).fold(Set.empty)(_ union _)
 
   // Is not a proper monoid since we cannot sum a MergeCommit with a non merge one but it would overkill to define two classes
-  protected [common] def + (that: CommitsStatistics) = {
+  def + (that: CommitsStatistics) = {
     require(this.isForMergeCommits == that.isForMergeCommits, "Cannot sum a merge commit stats with a non merge commit stats")
     this.copy(
-      numFiles = this.numFiles + that.numFiles,
       addedLines = this.addedLines + that.addedLines,
       deletedLines = this.deletedLines + that.deletedLines,
       commits = this.commits ++ that.commits
@@ -42,7 +68,7 @@
 }
 
 object CommitsStatistics {
-  val Empty = CommitsStatistics(0, 0, 0, false, List.empty)
+  val Empty = CommitsStatistics(0, 0, false, List.empty)
   val EmptyMerge = Empty.copy(isForMergeCommits = true)
 }
 
@@ -96,9 +122,11 @@
         edit <- df.toFileHeader(diff).toEditList
       } yield Lines(edit.getEndA - edit.getBeginA, edit.getEndB - edit.getBeginB)).fold(Lines(0, 0))(_ + _)
 
-      val commitInfo = CommitInfo(objectId.getName, commit.getAuthorIdent.getWhen.getTime, commit.isMerge)
+      val files: Set[String] = diffs.map(df.toFileHeader(_).getNewPath).toSet
 
-      CommitsStatistics(diffs.size, lines.added, lines.deleted, commitInfo.merge, List(commitInfo))
+      val commitInfo = CommitInfo(objectId.getName, commit.getAuthorIdent.getWhen.getTime, commit.isMerge, files)
+
+      CommitsStatistics(lines.added, lines.deleted, commitInfo.merge, List(commitInfo))
     }
   }
 
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala
index ba80bf2..58b5dc5 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala
@@ -19,16 +19,18 @@
 import com.google.gerrit.server.OutputFormat
 import com.google.gson.{Gson, GsonBuilder}
 import com.google.inject.Singleton
+import com.googlesource.gerrit.plugins.analytics.CommitInfo
 
 @Singleton
 class GsonFormatter {
-  val gsonBuilder: GsonBuilder = OutputFormat.JSON_COMPACT.newGsonBuilder
+  val gsonBuilder: GsonBuilder =
+    OutputFormat.JSON_COMPACT.newGsonBuilder
 
   def format[T](values: TraversableOnce[T], out: PrintWriter) {
     val gson: Gson = gsonBuilder.create
     for (value <- values) {
       gson.toJson(value, out)
-      out.println
+      out.println()
     }
   }
 }
\ No newline at end of file
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala
new file mode 100644
index 0000000..7de2c03
--- /dev/null
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala
@@ -0,0 +1,22 @@
+package com.googlesource.gerrit.plugins.analytics.test
+
+import com.google.common.collect.Sets.newHashSet
+import com.google.gerrit.server.OutputFormat
+import com.googlesource.gerrit.plugins.analytics.CommitInfo
+import org.scalatest.{FlatSpec, Matchers}
+
+class CommitInfoSpec extends FlatSpec with Matchers {
+
+  "CommitInfo" should "be serialised as JSON correctly" in {
+    val commitInfo = CommitInfo(sha1 = "sha", date = 1000l, merge = false, files = newHashSet("file1", "file2"))
+
+    val gsonBuilder = OutputFormat.JSON_COMPACT.newGsonBuilder
+
+    val actual = gsonBuilder.create().toJson(commitInfo)
+    List(actual) should contain oneOf(
+      "{\"sha1\":\"sha\",\"date\":1000,\"merge\":false,\"files\":[\"file1\",\"file2\"]}",
+      "{\"sha1\":\"sha\",\"date\":1000,\"merge\":false,\"files\":[\"file2\",\"file1\"]}"
+    )
+  }
+
+}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
index 6912e7b..0f9ba34 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
@@ -16,14 +16,16 @@
 
 import java.util.Date
 
+import com.google.common.collect.Sets
+import com.google.common.collect.Sets.newHashSet
 import com.googlesource.gerrit.plugins.analytics.CommitInfo
 import com.googlesource.gerrit.plugins.analytics.common.{CommitsStatistics, Statistics}
 import org.eclipse.jgit.api.{Git, MergeResult}
 import org.eclipse.jgit.internal.storage.file.FileRepository
-import org.eclipse.jgit.lib.ObjectId
 import org.eclipse.jgit.revwalk.RevCommit
 import org.scalatest.{FlatSpec, Inside, Matchers}
 
+
 class CommitStatisticsSpec extends FlatSpec with GitTestCase with Matchers with Inside {
 
 
@@ -50,10 +52,6 @@
     }
   }
 
-  def idWithInfo(revCommit: RevCommit): (ObjectId, CommitInfo) = {
-    revCommit.getId -> CommitInfo(revCommit.getId.getName, revCommit.getCommitterIdent.getWhen.getTime, revCommit.getParentCount > 1)
-  }
-
   "CommitStatistics" should "stats a single file added" in new TestEnvironment {
     val change = commit("user", "file1.txt", "line1\nline2")
 
@@ -64,6 +62,22 @@
     }
   }
 
+  it should "sum to another compatible CommitStatistics generating an aggregated stat" in {
+    val commit1 = CommitInfo("sha_1", 1000l, false, newHashSet("file1"))
+    val commit2 = CommitInfo("sha_2", 2000l, false, newHashSet("file1"))
+    val commit3 = CommitInfo("sha_3", 3000l, false, newHashSet("file2"))
+    val commit4 = CommitInfo("sha_4", 1000l, false, newHashSet("file1"))
+
+    val stat1 = CommitsStatistics(3, 4, false, List(commit1, commit2))
+    val stat2 = CommitsStatistics(5, 7, false, List(commit3, commit4))
+
+    (stat1 + stat2) shouldBe CommitsStatistics(8, 11, false, List(commit1, commit2, commit3, commit4))
+  }
+
+  it should "fail if trying to be added to a CommitStatistics object for a different isMerge value" in {
+    an [IllegalArgumentException] should be thrownBy  (CommitsStatistics.EmptyMerge + CommitsStatistics.Empty)
+  }
+
   it should "stats multiple files added" in new TestEnvironment {
     val initial = commit("user", "file1.txt", "line1\nline2\n")
     val second = add(testRepo,
@@ -103,7 +117,8 @@
       "second commit")
 
     inside(stats.forCommits(initial, second)) { case List(nonMergeStats: CommitsStatistics) =>
-      nonMergeStats.numFiles should be(4)     //this is wrong? shouldn't we consider just three files?
+      nonMergeStats.numFiles should be(4)
+      nonMergeStats.numDistinctFiles should be(3)
       nonMergeStats.addedLines should be(6)
       nonMergeStats.deletedLines should be(1)
     }
@@ -140,4 +155,5 @@
       case wrongContent => fail(s"Expected two results instad got $wrongContent")
     }
   }
+
 }
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala
index 423f92e..3bf36ae 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala
@@ -35,21 +35,15 @@
 
 package com.googlesource.gerrit.plugins.analytics.test
 
-import java.io.File
-import java.io.PrintWriter
+import java.io.{File, PrintWriter}
 import java.text.MessageFormat
 import java.util.Date
-import java.util
 
-import com.googlesource.gerrit.plugins.analytics.common.ManagedResource
 import com.googlesource.gerrit.plugins.analytics.common.ManagedResource.use
-import org.eclipse.jgit.api.Git
 import org.eclipse.jgit.api.MergeCommand.FastForwardMode
-import org.eclipse.jgit.api.MergeResult
+import org.eclipse.jgit.api.{Git, MergeResult}
 import org.eclipse.jgit.api.errors.GitAPIException
-import org.eclipse.jgit.lib.Constants
-import org.eclipse.jgit.lib.PersonIdent
-import org.eclipse.jgit.lib.Ref
+import org.eclipse.jgit.lib.{Constants, PersonIdent, Ref}
 import org.eclipse.jgit.merge.MergeStrategy
 import org.eclipse.jgit.notes.Note
 import org.eclipse.jgit.revwalk.RevCommit