Extract statistics for each commit

Return additional aggregated statistics such as
num_files, added_lines, delete_lines for each row returned.

Change-Id: Ib2ba0a905cf905a53dc102a867272c0d093647bb
diff --git a/README.md b/README.md
index ab9945c..7042a17 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,9 @@
 ### Contributors
 
 Extract a unordered list of project contributors statistics, including the
-commits data relevant for statistics purposes, such as timestamp and merge flag.
+commits data relevant for statistics purposes, such as number of involved files,
+number of added/deleted lines, timestamp and merge flag.
+
 
 *REST*
 
@@ -69,17 +71,15 @@
 
 ```
    $ curl http://gerrit.mycompany.com/projects/myproject/analytics~contributors
-
-   {"name":"John Doe","email":"john.doe@mycompany.com","num_commits":1,"commits":[{"sha1":"6a1f73738071e299f600017d99f7252d41b96b4b","date":"Apr 28, 2011 5:13:14 AM","merge":false}]}
-   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}]}
+   {"name":"John Doe","email":"john.doe@mycompany.com","num_commits":1, "num_files":4,"added_lines":9,"deleted_lines":1, "commits":[{"sha1":"6a1f73738071e299f600017d99f7252d41b96b4b","date":"Apr 28, 2011 5:13:14 AM","merge":false}]}
+   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1, "num_files":1,"added_lines":90,"deleted_lines":10,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}]}
 ```
 
 SSH Example:
 
 ```
    $ ssh -p 29418 admin@gerrit.mycompany.com analytics contributors myproject --since 2017-08-01 --until 2017-12-31
-
-   {"name":"John Doe","email":"john.doe@mycompany.com","num_commits":1,"commits":[{"sha1":"6a1f73738071e299f600017d99f7252d41b96b4b","date":"Apr 28, 2011 5:13:14 AM","merge":false}]}
-   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}]}
+   {"name":"John Doe","email":"john.doe@mycompany.com","num_commits":1, "num_files":4,"added_lines":9,"deleted_lines":1, "commits":[{"sha1":"6a1f73738071e299f600017d99f7252d41b96b4b","date":"Apr 28, 2011 5:13:14 AM","merge":false}]}
+   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1, "num_files":1,"added_lines":90,"deleted_lines":10,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}]}
 ```
 
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
index 5b267f0..4711ee3 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
@@ -21,7 +21,7 @@
 import com.google.inject.Inject
 import com.googlesource.gerrit.plugins.analytics.common.DateConversions._
 import com.googlesource.gerrit.plugins.analytics.common._
-import org.eclipse.jgit.lib.ObjectId
+import org.eclipse.jgit.lib.{ObjectId, Repository}
 import org.kohsuke.args4j.{Option => ArgOption}
 
 
@@ -127,11 +127,12 @@
 
   def get(projectRes: ProjectResource, startDate: Option[Long], stopDate: Option[Long],
           aggregationStrategy: AggregationStrategy): TraversableOnce[UserActivitySummary] = {
-    ManagedResource.use(repoManager.openRepository(projectRes.getNameKey)) {
-      histogram.get(_, new AggregatedHistogramFilterByDates(startDate, stopDate,
+    ManagedResource.use(repoManager.openRepository(projectRes.getNameKey)) { repo =>
+      val stats = new Statistics(repo)
+      histogram.get(repo, new AggregatedHistogramFilterByDates(startDate, stopDate,
         aggregationStrategy))
         .par
-        .map(UserActivitySummary.apply).toStream
+        .map(UserActivitySummary.apply(stats)).toStream
     }
   }
 }
@@ -145,19 +146,24 @@
                                name: String,
                                email: String,
                                numCommits: Integer,
+                               numFiles: Integer,
+                               addedLines: Integer,
+                               deletedLines: Integer,
                                commits: Array[CommitInfo],
                                lastCommitDate: Long)
 
 object UserActivitySummary {
-  def apply(uca: AggregatedUserCommitActivity): UserActivitySummary = {
+  def apply(statisticsHandler: Statistics)(uca: AggregatedUserCommitActivity): UserActivitySummary = {
     val INCLUDESEMPTY = -1
 
     implicit def stringToIntOrNull(x: String): Integer = if (x.isEmpty) null else new Integer(x)
 
     uca.key.split("/", INCLUDESEMPTY) match {
       case a@Array(email, year, month, day, hour) =>
-        UserActivitySummary(year, month, day, hour, uca.getName, uca.getEmail, uca.getCount,
-          getCommits(uca.getIds, uca.getTimes, uca.getMerges), uca.getLatest)
+        val commits = getCommits(uca.getIds, uca.getTimes, uca.getMerges)
+        val stats = statisticsHandler.find(uca.getIds.toSeq)
+        UserActivitySummary(year, month, day, hour, uca.getName, uca.getEmail, uca.getCount, stats.numFiles,
+          stats.addedLines, stats.deletedLines, commits, uca.getLatest)
       case _ => throw new Exception(s"invalid key format found ${uca.key}")
     }
   }
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
new file mode 100644
index 0000000..baa60ed
--- /dev/null
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
@@ -0,0 +1,71 @@
+// Copyright (C) 2017 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.common
+
+import org.eclipse.jgit.diff.{DiffFormatter, RawTextComparator}
+import org.eclipse.jgit.lib.{ObjectId, Repository}
+import org.eclipse.jgit.revwalk.RevWalk
+import org.eclipse.jgit.treewalk.{CanonicalTreeParser, EmptyTreeIterator}
+import org.eclipse.jgit.util.io.DisabledOutputStream
+import ManagedResource.use
+
+import scala.collection.JavaConversions._
+
+case class CommitsStatistics(numFiles: Int, addedLines: Int, deletedLines: Int)
+
+class Statistics(repo: Repository) {
+
+  def find(objectIds: Seq[ObjectId]): CommitsStatistics =
+    objectIds.foldLeft(CommitsStatistics(0, 0, 0)) {
+      (acc, objectId) => {
+        val stats = find(objectId)
+        CommitsStatistics(acc.numFiles + stats.numFiles, acc.addedLines + stats.addedLines, acc.deletedLines + stats.deletedLines)
+      }
+    }
+
+  def find(objectId: ObjectId): CommitsStatistics = {
+    // I can imagine this kind of statistics is already being available in Gerrit but couldn't understand how to access it
+    // which Injection can be useful for this task?
+    use(new RevWalk(repo)) { rw =>
+      val reader = repo.newObjectReader()
+      val commit = rw.parseCommit(objectId)
+
+      val oldTree = {
+        // protects against initial commit
+        if (commit.getParentCount == 0)
+          new EmptyTreeIterator
+        else
+          new CanonicalTreeParser(null, reader, rw.parseCommit(commit.getParent(0).getId).getTree)
+      }
+
+      val newTree = new CanonicalTreeParser(null, reader, commit.getTree)
+
+      val df = new DiffFormatter(DisabledOutputStream.INSTANCE)
+      df.setRepository(repo)
+      df.setDiffComparator(RawTextComparator.DEFAULT)
+      df.setDetectRenames(true)
+      val diffs = df.scan(oldTree, newTree)
+      case class Lines(deleted: Int, added: Int) {
+        def +(other: Lines) = Lines(deleted + other.deleted, added + other.added)
+      }
+      val lines = (for {
+        diff <- diffs
+        edit <- df.toFileHeader(diff).toEditList
+      } yield Lines(edit.getEndA - edit.getBeginA, edit.getEndB - edit.getBeginB)).reduce(_ + _)
+
+      CommitsStatistics(diffs.size, lines.added, lines.deleted)
+    }
+  }
+}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
new file mode 100644
index 0000000..d37a4a1
--- /dev/null
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
@@ -0,0 +1,85 @@
+// Copyright (C) 2017 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.test
+
+import java.util.Date
+
+import com.googlesource.gerrit.plugins.analytics.common.{CommitsStatistics, Statistics}
+import org.eclipse.jgit.internal.storage.file.FileRepository
+import org.eclipse.jgit.revwalk.RevCommit
+import org.scalatest.{FlatSpec, Inside, Matchers}
+
+import scala.collection.JavaConverters._
+
+class CommitStatisticsSpec extends FlatSpec with GitTestCase with Matchers with Inside {
+
+
+  class TestEnvironment {
+    val repo = new FileRepository(testRepo)
+    val stats = new Statistics(repo)
+  }
+
+  def commit(committer: String, fname: String, content: String): RevCommit = {
+    val date = new Date()
+    val person = newPersonIdent(committer, committer, date)
+    add(testRepo, "afile.txt", content, author = person, committer = author)
+  }
+
+  "CommitStatistics" should "stats a single file added" in new TestEnvironment {
+    val change = commit("user", "file1.txt", "line1\nline2")
+
+    inside(stats.find(change)) { case s: CommitsStatistics =>
+      s.numFiles should be(1)
+      s.addedLines should be(2)
+      s.deletedLines should be(0)
+    }
+  }
+
+  it should "stats multiple files added" in new TestEnvironment {
+    val initial = commit("user", "file1.txt", "line1\nline2\n")
+    val second = add(testRepo,
+      List("file1.txt", "file2.txt").asJava,
+      List("line1\n", "line1\nline2\n").asJava, "second commit")
+    inside(stats.find(second)) { case s: CommitsStatistics =>
+      s.numFiles should be(2)
+      s.addedLines should be(3)
+      s.deletedLines should be(0)
+    }
+  }
+
+  it should "stats lines eliminated" in new TestEnvironment {
+    val initial = commit("user", "file1.txt", "line1\nline2\nline3")
+    val second = commit("user", "file1.txt", "line1\n")
+    inside(stats.find(second)) { case s: CommitsStatistics =>
+      s.numFiles should be(1)
+      s.addedLines should be(0)
+      s.deletedLines should be(2)
+    }
+  }
+
+  it should "stats a Seq[RevCommit]" in new TestEnvironment {
+    val initial = add(testRepo,
+      List("file1.txt", "file3.txt").asJava,
+      List("line1\n", "line1\nline2\n").asJava, "first commit")
+    val second = add(testRepo,
+      List("file1.txt", "file2.txt").asJava,
+      List("line1a\n", "line1\nline2\n").asJava, "second commit")
+    inside(stats.find(List(initial, second))) { case s: CommitsStatistics =>
+      s.numFiles should be(4)
+      s.addedLines should be(6)
+      s.deletedLines should be(1)
+    }
+  }
+}