Adding extra field num_distinct_files field to track number of different files aggregated.
Adding also list of files included in every commit
Change-Id: I91ee6397bc44374deaf6ab80484e1391a118346b
Jira-Id: GERICS-619
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
index faefac8..f24d996 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
@@ -18,6 +18,8 @@
import com.google.gerrit.server.git.GitRepositoryManager
import com.google.gerrit.server.project.{ProjectResource, ProjectsCollection}
import com.google.gerrit.sshd.{CommandMetaData, SshCommand}
+import com.google.gson.TypeAdapter
+import com.google.gson.stream.{JsonReader, JsonWriter}
import com.google.inject.Inject
import com.googlesource.gerrit.plugins.analytics.common.DateConversions._
import com.googlesource.gerrit.plugins.analytics.common._
@@ -144,7 +146,7 @@
}
}
-case class CommitInfo(sha1: String, date: Long, merge: Boolean)
+case class CommitInfo(sha1: String, date: Long, merge: Boolean, files: java.util.Set[String])
case class UserActivitySummary(year: Integer,
month: Integer,
@@ -154,6 +156,7 @@
email: String,
numCommits: Integer,
numFiles: Integer,
+ numDistinctFiles: Integer,
addedLines: Integer,
deletedLines: Integer,
commits: Array[CommitInfo],
@@ -179,7 +182,7 @@
statisticsHandler.forCommits(uca.getIds: _*).map { stat =>
UserActivitySummary(
year, month, day, hour, uca.getName, uca.getEmail, uca.getCount,
- stat.numFiles, stat.addedLines, stat.deletedLines,
+ stat.numFiles, stat.numDistinctFiles, stat.addedLines, stat.deletedLines,
stat.commits.toArray, branches.toArray, uca.getLatest, stat.isForMergeCommits
)
}
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala
index cf90d4a..9f92e3c 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala
@@ -14,7 +14,6 @@
package com.googlesource.gerrit.plugins.analytics.common
-import com.googlesource.gerrit.plugins.analytics.common.AggregatedCommitHistogram.AggregationStrategyMapping
import org.eclipse.jgit.revwalk.{RevCommit, RevWalk}
/**
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
index 7d434b1..e5aae6c 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
@@ -24,16 +24,42 @@
import scala.collection.JavaConversions._
-case class CommitsStatistics(numFiles: Int, addedLines: Int, deletedLines: Int, isForMergeCommits: Boolean, commits: List[CommitInfo]) {
+/**
+ * Collects overall stats on a series of commits and provides some basic info on the included commits
+ *
+ * @param addedLines sum of the number of line additions in the included commits
+ * @param deletedLines sum of the number of line deletions in the included commits
+ * @param isForMergeCommits true if the current instance is including stats for merge commits and false if
+ * calculated for NON merge commits. The current code is not generating stats objects for
+ * a mixture of merge and non-merge commits
+ * @param commits list of commits the stats are calculated for
+ */
+case class CommitsStatistics(
+ addedLines: Int,
+ deletedLines: Int,
+ isForMergeCommits: Boolean,
+ commits: List[CommitInfo]
+ ) {
require(commits.forall(_.merge == isForMergeCommits), s"Creating a stats object with isMergeCommit = $isForMergeCommits but containing commits of different type")
- def isEmpty = commits.isEmpty
+ /**
+ * sum of the number of files in each of the included commits
+ */
+ val numFiles: Int = commits.map(_.files.size).sum
+
+ /**
+ * number of distinct files the included commits have been touching
+ */
+ val numDistinctFiles: Int = changedFiles.size
+
+ def isEmpty: Boolean = commits.isEmpty
+
+ def changedFiles: Set[String] = commits.map(_.files.toSet).fold(Set.empty)(_ union _)
// Is not a proper monoid since we cannot sum a MergeCommit with a non merge one but it would overkill to define two classes
- protected [common] def + (that: CommitsStatistics) = {
+ def + (that: CommitsStatistics) = {
require(this.isForMergeCommits == that.isForMergeCommits, "Cannot sum a merge commit stats with a non merge commit stats")
this.copy(
- numFiles = this.numFiles + that.numFiles,
addedLines = this.addedLines + that.addedLines,
deletedLines = this.deletedLines + that.deletedLines,
commits = this.commits ++ that.commits
@@ -42,7 +68,7 @@
}
object CommitsStatistics {
- val Empty = CommitsStatistics(0, 0, 0, false, List.empty)
+ val Empty = CommitsStatistics(0, 0, false, List.empty)
val EmptyMerge = Empty.copy(isForMergeCommits = true)
}
@@ -96,9 +122,11 @@
edit <- df.toFileHeader(diff).toEditList
} yield Lines(edit.getEndA - edit.getBeginA, edit.getEndB - edit.getBeginB)).fold(Lines(0, 0))(_ + _)
- val commitInfo = CommitInfo(objectId.getName, commit.getAuthorIdent.getWhen.getTime, commit.isMerge)
+ val files: Set[String] = diffs.map(df.toFileHeader(_).getNewPath).toSet
- CommitsStatistics(diffs.size, lines.added, lines.deleted, commitInfo.merge, List(commitInfo))
+ val commitInfo = CommitInfo(objectId.getName, commit.getAuthorIdent.getWhen.getTime, commit.isMerge, files)
+
+ CommitsStatistics(lines.added, lines.deleted, commitInfo.merge, List(commitInfo))
}
}
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala
index ba80bf2..58b5dc5 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala
@@ -19,16 +19,18 @@
import com.google.gerrit.server.OutputFormat
import com.google.gson.{Gson, GsonBuilder}
import com.google.inject.Singleton
+import com.googlesource.gerrit.plugins.analytics.CommitInfo
@Singleton
class GsonFormatter {
- val gsonBuilder: GsonBuilder = OutputFormat.JSON_COMPACT.newGsonBuilder
+ val gsonBuilder: GsonBuilder =
+ OutputFormat.JSON_COMPACT.newGsonBuilder
def format[T](values: TraversableOnce[T], out: PrintWriter) {
val gson: Gson = gsonBuilder.create
for (value <- values) {
gson.toJson(value, out)
- out.println
+ out.println()
}
}
}
\ No newline at end of file
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala
new file mode 100644
index 0000000..7de2c03
--- /dev/null
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala
@@ -0,0 +1,22 @@
+package com.googlesource.gerrit.plugins.analytics.test
+
+import com.google.common.collect.Sets.newHashSet
+import com.google.gerrit.server.OutputFormat
+import com.googlesource.gerrit.plugins.analytics.CommitInfo
+import org.scalatest.{FlatSpec, Matchers}
+
+class CommitInfoSpec extends FlatSpec with Matchers {
+
+ "CommitInfo" should "be serialised as JSON correctly" in {
+ val commitInfo = CommitInfo(sha1 = "sha", date = 1000l, merge = false, files = newHashSet("file1", "file2"))
+
+ val gsonBuilder = OutputFormat.JSON_COMPACT.newGsonBuilder
+
+ val actual = gsonBuilder.create().toJson(commitInfo)
+ List(actual) should contain oneOf(
+ "{\"sha1\":\"sha\",\"date\":1000,\"merge\":false,\"files\":[\"file1\",\"file2\"]}",
+ "{\"sha1\":\"sha\",\"date\":1000,\"merge\":false,\"files\":[\"file2\",\"file1\"]}"
+ )
+ }
+
+}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
index 6912e7b..0f9ba34 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
@@ -16,14 +16,16 @@
import java.util.Date
+import com.google.common.collect.Sets
+import com.google.common.collect.Sets.newHashSet
import com.googlesource.gerrit.plugins.analytics.CommitInfo
import com.googlesource.gerrit.plugins.analytics.common.{CommitsStatistics, Statistics}
import org.eclipse.jgit.api.{Git, MergeResult}
import org.eclipse.jgit.internal.storage.file.FileRepository
-import org.eclipse.jgit.lib.ObjectId
import org.eclipse.jgit.revwalk.RevCommit
import org.scalatest.{FlatSpec, Inside, Matchers}
+
class CommitStatisticsSpec extends FlatSpec with GitTestCase with Matchers with Inside {
@@ -50,10 +52,6 @@
}
}
- def idWithInfo(revCommit: RevCommit): (ObjectId, CommitInfo) = {
- revCommit.getId -> CommitInfo(revCommit.getId.getName, revCommit.getCommitterIdent.getWhen.getTime, revCommit.getParentCount > 1)
- }
-
"CommitStatistics" should "stats a single file added" in new TestEnvironment {
val change = commit("user", "file1.txt", "line1\nline2")
@@ -64,6 +62,22 @@
}
}
+ it should "sum to another compatible CommitStatistics generating an aggregated stat" in {
+ val commit1 = CommitInfo("sha_1", 1000l, false, newHashSet("file1"))
+ val commit2 = CommitInfo("sha_2", 2000l, false, newHashSet("file1"))
+ val commit3 = CommitInfo("sha_3", 3000l, false, newHashSet("file2"))
+ val commit4 = CommitInfo("sha_4", 1000l, false, newHashSet("file1"))
+
+ val stat1 = CommitsStatistics(3, 4, false, List(commit1, commit2))
+ val stat2 = CommitsStatistics(5, 7, false, List(commit3, commit4))
+
+ (stat1 + stat2) shouldBe CommitsStatistics(8, 11, false, List(commit1, commit2, commit3, commit4))
+ }
+
+ it should "fail if trying to be added to a CommitStatistics object for a different isMerge value" in {
+ an [IllegalArgumentException] should be thrownBy (CommitsStatistics.EmptyMerge + CommitsStatistics.Empty)
+ }
+
it should "stats multiple files added" in new TestEnvironment {
val initial = commit("user", "file1.txt", "line1\nline2\n")
val second = add(testRepo,
@@ -103,7 +117,8 @@
"second commit")
inside(stats.forCommits(initial, second)) { case List(nonMergeStats: CommitsStatistics) =>
- nonMergeStats.numFiles should be(4) //this is wrong? shouldn't we consider just three files?
+ nonMergeStats.numFiles should be(4)
+ nonMergeStats.numDistinctFiles should be(3)
nonMergeStats.addedLines should be(6)
nonMergeStats.deletedLines should be(1)
}
@@ -140,4 +155,5 @@
case wrongContent => fail(s"Expected two results instad got $wrongContent")
}
}
+
}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala
index 423f92e..3bf36ae 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala
@@ -35,21 +35,15 @@
package com.googlesource.gerrit.plugins.analytics.test
-import java.io.File
-import java.io.PrintWriter
+import java.io.{File, PrintWriter}
import java.text.MessageFormat
import java.util.Date
-import java.util
-import com.googlesource.gerrit.plugins.analytics.common.ManagedResource
import com.googlesource.gerrit.plugins.analytics.common.ManagedResource.use
-import org.eclipse.jgit.api.Git
import org.eclipse.jgit.api.MergeCommand.FastForwardMode
-import org.eclipse.jgit.api.MergeResult
+import org.eclipse.jgit.api.{Git, MergeResult}
import org.eclipse.jgit.api.errors.GitAPIException
-import org.eclipse.jgit.lib.Constants
-import org.eclipse.jgit.lib.PersonIdent
-import org.eclipse.jgit.lib.Ref
+import org.eclipse.jgit.lib.{Constants, PersonIdent, Ref}
import org.eclipse.jgit.merge.MergeStrategy
import org.eclipse.jgit.notes.Note
import org.eclipse.jgit.revwalk.RevCommit