Adding extra field num_distinct_files field to track number of different files aggregated. Adding also list of files included in every commit Change-Id: I91ee6397bc44374deaf6ab80484e1391a118346b Jira-Id: GERICS-619
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala index faefac8..f24d996 100644 --- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala +++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
@@ -18,6 +18,8 @@ import com.google.gerrit.server.git.GitRepositoryManager import com.google.gerrit.server.project.{ProjectResource, ProjectsCollection} import com.google.gerrit.sshd.{CommandMetaData, SshCommand} +import com.google.gson.TypeAdapter +import com.google.gson.stream.{JsonReader, JsonWriter} import com.google.inject.Inject import com.googlesource.gerrit.plugins.analytics.common.DateConversions._ import com.googlesource.gerrit.plugins.analytics.common._ @@ -144,7 +146,7 @@ } } -case class CommitInfo(sha1: String, date: Long, merge: Boolean) +case class CommitInfo(sha1: String, date: Long, merge: Boolean, files: java.util.Set[String]) case class UserActivitySummary(year: Integer, month: Integer, @@ -154,6 +156,7 @@ email: String, numCommits: Integer, numFiles: Integer, + numDistinctFiles: Integer, addedLines: Integer, deletedLines: Integer, commits: Array[CommitInfo], @@ -179,7 +182,7 @@ statisticsHandler.forCommits(uca.getIds: _*).map { stat => UserActivitySummary( year, month, day, hour, uca.getName, uca.getEmail, uca.getCount, - stat.numFiles, stat.addedLines, stat.deletedLines, + stat.numFiles, stat.numDistinctFiles, stat.addedLines, stat.deletedLines, stat.commits.toArray, branches.toArray, uca.getLatest, stat.isForMergeCommits ) }
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala index cf90d4a..9f92e3c 100644 --- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala +++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/AggregatedHistogramFilterByDates.scala
@@ -14,7 +14,6 @@ package com.googlesource.gerrit.plugins.analytics.common -import com.googlesource.gerrit.plugins.analytics.common.AggregatedCommitHistogram.AggregationStrategyMapping import org.eclipse.jgit.revwalk.{RevCommit, RevWalk} /**
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala index 7d434b1..e5aae6c 100644 --- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala +++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
@@ -24,16 +24,42 @@ import scala.collection.JavaConversions._ -case class CommitsStatistics(numFiles: Int, addedLines: Int, deletedLines: Int, isForMergeCommits: Boolean, commits: List[CommitInfo]) { +/** + * Collects overall stats on a series of commits and provides some basic info on the included commits + * + * @param addedLines sum of the number of line additions in the included commits + * @param deletedLines sum of the number of line deletions in the included commits + * @param isForMergeCommits true if the current instance is including stats for merge commits and false if + * calculated for NON merge commits. The current code is not generating stats objects for + * a mixture of merge and non-merge commits + * @param commits list of commits the stats are calculated for + */ +case class CommitsStatistics( + addedLines: Int, + deletedLines: Int, + isForMergeCommits: Boolean, + commits: List[CommitInfo] + ) { require(commits.forall(_.merge == isForMergeCommits), s"Creating a stats object with isMergeCommit = $isForMergeCommits but containing commits of different type") - def isEmpty = commits.isEmpty + /** + * sum of the number of files in each of the included commits + */ + val numFiles: Int = commits.map(_.files.size).sum + + /** + * number of distinct files the included commits have been touching + */ + val numDistinctFiles: Int = changedFiles.size + + def isEmpty: Boolean = commits.isEmpty + + def changedFiles: Set[String] = commits.map(_.files.toSet).fold(Set.empty)(_ union _) // Is not a proper monoid since we cannot sum a MergeCommit with a non merge one but it would overkill to define two classes - protected [common] def + (that: CommitsStatistics) = { + def + (that: CommitsStatistics) = { require(this.isForMergeCommits == that.isForMergeCommits, "Cannot sum a merge commit stats with a non merge commit stats") this.copy( - numFiles = this.numFiles + that.numFiles, addedLines = this.addedLines + that.addedLines, deletedLines = this.deletedLines + that.deletedLines, commits = this.commits ++ that.commits @@ -42,7 +68,7 @@ } object CommitsStatistics { - val Empty = CommitsStatistics(0, 0, 0, false, List.empty) + val Empty = CommitsStatistics(0, 0, false, List.empty) val EmptyMerge = Empty.copy(isForMergeCommits = true) } @@ -96,9 +122,11 @@ edit <- df.toFileHeader(diff).toEditList } yield Lines(edit.getEndA - edit.getBeginA, edit.getEndB - edit.getBeginB)).fold(Lines(0, 0))(_ + _) - val commitInfo = CommitInfo(objectId.getName, commit.getAuthorIdent.getWhen.getTime, commit.isMerge) + val files: Set[String] = diffs.map(df.toFileHeader(_).getNewPath).toSet - CommitsStatistics(diffs.size, lines.added, lines.deleted, commitInfo.merge, List(commitInfo)) + val commitInfo = CommitInfo(objectId.getName, commit.getAuthorIdent.getWhen.getTime, commit.isMerge, files) + + CommitsStatistics(lines.added, lines.deleted, commitInfo.merge, List(commitInfo)) } }
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala index ba80bf2..58b5dc5 100644 --- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala +++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/GsonFormatter.scala
@@ -19,16 +19,18 @@ import com.google.gerrit.server.OutputFormat import com.google.gson.{Gson, GsonBuilder} import com.google.inject.Singleton +import com.googlesource.gerrit.plugins.analytics.CommitInfo @Singleton class GsonFormatter { - val gsonBuilder: GsonBuilder = OutputFormat.JSON_COMPACT.newGsonBuilder + val gsonBuilder: GsonBuilder = + OutputFormat.JSON_COMPACT.newGsonBuilder def format[T](values: TraversableOnce[T], out: PrintWriter) { val gson: Gson = gsonBuilder.create for (value <- values) { gson.toJson(value, out) - out.println + out.println() } } } \ No newline at end of file
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala new file mode 100644 index 0000000..7de2c03 --- /dev/null +++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitInfoSpec.scala
@@ -0,0 +1,22 @@ +package com.googlesource.gerrit.plugins.analytics.test + +import com.google.common.collect.Sets.newHashSet +import com.google.gerrit.server.OutputFormat +import com.googlesource.gerrit.plugins.analytics.CommitInfo +import org.scalatest.{FlatSpec, Matchers} + +class CommitInfoSpec extends FlatSpec with Matchers { + + "CommitInfo" should "be serialised as JSON correctly" in { + val commitInfo = CommitInfo(sha1 = "sha", date = 1000l, merge = false, files = newHashSet("file1", "file2")) + + val gsonBuilder = OutputFormat.JSON_COMPACT.newGsonBuilder + + val actual = gsonBuilder.create().toJson(commitInfo) + List(actual) should contain oneOf( + "{\"sha1\":\"sha\",\"date\":1000,\"merge\":false,\"files\":[\"file1\",\"file2\"]}", + "{\"sha1\":\"sha\",\"date\":1000,\"merge\":false,\"files\":[\"file2\",\"file1\"]}" + ) + } + +}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala index 6912e7b..0f9ba34 100644 --- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala +++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
@@ -16,14 +16,16 @@ import java.util.Date +import com.google.common.collect.Sets +import com.google.common.collect.Sets.newHashSet import com.googlesource.gerrit.plugins.analytics.CommitInfo import com.googlesource.gerrit.plugins.analytics.common.{CommitsStatistics, Statistics} import org.eclipse.jgit.api.{Git, MergeResult} import org.eclipse.jgit.internal.storage.file.FileRepository -import org.eclipse.jgit.lib.ObjectId import org.eclipse.jgit.revwalk.RevCommit import org.scalatest.{FlatSpec, Inside, Matchers} + class CommitStatisticsSpec extends FlatSpec with GitTestCase with Matchers with Inside { @@ -50,10 +52,6 @@ } } - def idWithInfo(revCommit: RevCommit): (ObjectId, CommitInfo) = { - revCommit.getId -> CommitInfo(revCommit.getId.getName, revCommit.getCommitterIdent.getWhen.getTime, revCommit.getParentCount > 1) - } - "CommitStatistics" should "stats a single file added" in new TestEnvironment { val change = commit("user", "file1.txt", "line1\nline2") @@ -64,6 +62,22 @@ } } + it should "sum to another compatible CommitStatistics generating an aggregated stat" in { + val commit1 = CommitInfo("sha_1", 1000l, false, newHashSet("file1")) + val commit2 = CommitInfo("sha_2", 2000l, false, newHashSet("file1")) + val commit3 = CommitInfo("sha_3", 3000l, false, newHashSet("file2")) + val commit4 = CommitInfo("sha_4", 1000l, false, newHashSet("file1")) + + val stat1 = CommitsStatistics(3, 4, false, List(commit1, commit2)) + val stat2 = CommitsStatistics(5, 7, false, List(commit3, commit4)) + + (stat1 + stat2) shouldBe CommitsStatistics(8, 11, false, List(commit1, commit2, commit3, commit4)) + } + + it should "fail if trying to be added to a CommitStatistics object for a different isMerge value" in { + an [IllegalArgumentException] should be thrownBy (CommitsStatistics.EmptyMerge + CommitsStatistics.Empty) + } + it should "stats multiple files added" in new TestEnvironment { val initial = commit("user", "file1.txt", "line1\nline2\n") val second = add(testRepo, @@ -103,7 +117,8 @@ "second commit") inside(stats.forCommits(initial, second)) { case List(nonMergeStats: CommitsStatistics) => - nonMergeStats.numFiles should be(4) //this is wrong? shouldn't we consider just three files? + nonMergeStats.numFiles should be(4) + nonMergeStats.numDistinctFiles should be(3) nonMergeStats.addedLines should be(6) nonMergeStats.deletedLines should be(1) } @@ -140,4 +155,5 @@ case wrongContent => fail(s"Expected two results instad got $wrongContent") } } + }
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala index 423f92e..3bf36ae 100644 --- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala +++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/GitTestCase.scala
@@ -35,21 +35,15 @@ package com.googlesource.gerrit.plugins.analytics.test -import java.io.File -import java.io.PrintWriter +import java.io.{File, PrintWriter} import java.text.MessageFormat import java.util.Date -import java.util -import com.googlesource.gerrit.plugins.analytics.common.ManagedResource import com.googlesource.gerrit.plugins.analytics.common.ManagedResource.use -import org.eclipse.jgit.api.Git import org.eclipse.jgit.api.MergeCommand.FastForwardMode -import org.eclipse.jgit.api.MergeResult +import org.eclipse.jgit.api.{Git, MergeResult} import org.eclipse.jgit.api.errors.GitAPIException -import org.eclipse.jgit.lib.Constants -import org.eclipse.jgit.lib.PersonIdent -import org.eclipse.jgit.lib.Ref +import org.eclipse.jgit.lib.{Constants, PersonIdent, Ref} import org.eclipse.jgit.merge.MergeStrategy import org.eclipse.jgit.notes.Note import org.eclipse.jgit.revwalk.RevCommit