Aggregate contributors by branch

If using the --extract-branches or -r flag then for each output line it is
optionally added the set of branches to which the commits belong.

Jira-Id: GERICS-513
Change-Id: I434497b9b6610ce68891465be5e7aac6147834b6
diff --git a/README.md b/README.md
index 647bd92..1146e93 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@
 ### Contributors
 
 Extract a unordered list of project contributors statistics, including the
-commits data relevant for statistics purposes, such as number of involved files,
+commits data relevant for statistics purposes, such as number of involved files, and optionally also the list of belonging branches,
 number of added/deleted lines, timestamp and merge flag.
 
 
@@ -65,6 +65,7 @@
 - --since -b Starting timestamp to consider
 - --until -e Ending timestamp (excluded) to consider
 - --aggregate -granularity -g one of email, email_year, email_month, email_day, email_hour defaulting to aggregation by email
+- --extract-branches -r enables branches extraction for each commit
 
 NOTE: Timestamp format is consistent with Gerrit's query syntax, see /Documentation/user-search.html for details.
 
@@ -73,7 +74,7 @@
 ```
    $ curl http://gerrit.mycompany.com/projects/myproject/analytics~contributors
    {"name":"John Doe","email":"john.doe@mycompany.com","num_commits":1, "num_files":4,"added_lines":9,"deleted_lines":1, "commits":[{"sha1":"6a1f73738071e299f600017d99f7252d41b96b4b","date":"Apr 28, 2011 5:13:14 AM","merge":false}]}
-   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1, "num_files":1,"added_lines":90,"deleted_lines":10,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}]}
+   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1, "num_files":1,"added_lines":90,"deleted_lines":10,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}],"branches":["master","branch1"]}
 ```
 
 SSH Example:
@@ -81,6 +82,6 @@
 ```
    $ ssh -p 29418 admin@gerrit.mycompany.com analytics contributors myproject --since 2017-08-01 --until 2017-12-31
    {"name":"John Doe","email":"john.doe@mycompany.com","num_commits":1, "num_files":4,"added_lines":9,"deleted_lines":1, "commits":[{"sha1":"6a1f73738071e299f600017d99f7252d41b96b4b","date":"Apr 28, 2011 5:13:14 AM","merge":false}]}
-   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1, "num_files":1,"added_lines":90,"deleted_lines":10,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}]}
+   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1, "num_files":1,"added_lines":90,"deleted_lines":10,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}],"branches":["master","branch1"]}
 ```
 
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
index 6e16763..faefac8 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
@@ -21,7 +21,6 @@
 import com.google.inject.Inject
 import com.googlesource.gerrit.plugins.analytics.common.DateConversions._
 import com.googlesource.gerrit.plugins.analytics.common._
-import org.eclipse.jgit.lib.{ObjectId, Repository}
 import org.kohsuke.args4j.{Option => ArgOption}
 
 
@@ -32,6 +31,12 @@
   extends SshCommand with ProjectResourceParser {
 
   private var beginDate: Option[Long] = None
+  private var endDate: Option[Long] = None
+  private var granularity: Option[AggregationStrategy] = None
+
+  @ArgOption(name = "--extract-branches", aliases = Array("-r"),
+    usage = "Do extra parsing to extract a list of all branches for each line")
+  private var extractBranches: Boolean = false
 
   @ArgOption(name = "--since", aliases = Array("--after", "-b"),
     usage = "(included) begin timestamp. Must be in the format 2006-01-02[ 15:04:05[.890][ -0700]]")
@@ -43,8 +48,6 @@
     }
   }
 
-  private var endDate: Option[Long] = None
-
   @ArgOption(name = "--until", aliases = Array("--before", "-e"),
     usage = "(excluded) end timestamp. Must be in the format 2006-01-02[ 15:04:05[.890][ -0700]]")
   def setEndDate(date: String) {
@@ -55,8 +58,6 @@
     }
   }
 
-  private var granularity: Option[AggregationStrategy] = None
-
   @ArgOption(name = "--aggregate", aliases = Array("-g"),
     usage = "Type of aggregation requested. ")
   def setGranularity(value: String) {
@@ -67,10 +68,9 @@
     }
   }
 
-
   override protected def run =
     gsonFmt.format(executor.get(projectRes, beginDate, endDate,
-      granularity.getOrElse(AggregationStrategy.EMAIL)), stdout)
+      granularity.getOrElse(AggregationStrategy.EMAIL), extractBranches), stdout)
 
 }
 
@@ -79,6 +79,8 @@
   extends RestReadView[ProjectResource] {
 
   private var beginDate: Option[Long] = None
+  private var endDate: Option[Long] = None
+  private var granularity: Option[AggregationStrategy] = None
 
   @ArgOption(name = "--since", aliases = Array("--after", "-b"), metaVar = "QUERY",
     usage = "(included) begin timestamp. Must be in the format 2006-01-02[ 15:04:05[.890][ -0700]]")
@@ -90,8 +92,6 @@
     }
   }
 
-  private var endDate: Option[Long] = None
-
   @ArgOption(name = "--until", aliases = Array("--before", "-e"), metaVar = "QUERY",
     usage = "(excluded) end timestamp. Must be in the format 2006-01-02[ 15:04:05[.890][ -0700]]")
   def setEndDate(date: String) {
@@ -102,8 +102,6 @@
     }
   }
 
-  private var granularity: Option[AggregationStrategy] = None
-
   @ArgOption(name = "--granularity", aliases = Array("--aggregate", "-g"), metaVar = "QUERY",
     usage = "can be one of EMAIL, EMAIL_HOUR, EMAIL_DAY, EMAIL_MONTH, EMAIL_YEAR, defaulting to EMAIL")
   def setGranularity(value: String) {
@@ -114,25 +112,33 @@
     }
   }
 
+  @ArgOption(name = "--extract-branches", aliases = Array("-r"),
+    usage = "Do extra parsing to extract a list of all branches for each line")
+  private var extractBranches: Boolean = false
+
   override def apply(projectRes: ProjectResource) =
     Response.ok(
       new GsonStreamedResult[UserActivitySummary](gson,
         executor.get(projectRes, beginDate, endDate,
-          granularity.getOrElse(AggregationStrategy.EMAIL))))
+          granularity.getOrElse(AggregationStrategy.EMAIL), extractBranches)))
 }
 
 class ContributorsService @Inject()(repoManager: GitRepositoryManager,
                                     histogram: UserActivityHistogram,
                                     gsonFmt: GsonFormatter) {
-
   def get(projectRes: ProjectResource, startDate: Option[Long], stopDate: Option[Long],
-          aggregationStrategy: AggregationStrategy): TraversableOnce[UserActivitySummary] = {
+          aggregationStrategy: AggregationStrategy, extractBranches: Boolean)
+  : TraversableOnce[UserActivitySummary] = {
     ManagedResource.use(repoManager.openRepository(projectRes.getNameKey)) { repo =>
       val stats = new Statistics(repo)
+      import RichBoolean._
+      val commitsBranchesOptionalEnricher = extractBranches.option(
+        new CommitsBranches(repo, startDate, stopDate)
+      )
       histogram.get(repo, new AggregatedHistogramFilterByDates(startDate, stopDate,
         aggregationStrategy))
         .par
-        .flatMap(UserActivitySummary.apply(stats))
+        .flatMap(UserActivitySummary.apply(stats, commitsBranchesOptionalEnricher))
         .toStream
     }
   }
@@ -151,25 +157,32 @@
                                addedLines: Integer,
                                deletedLines: Integer,
                                commits: Array[CommitInfo],
+                               branches: Array[String],
                                lastCommitDate: Long,
                                isMerge: Boolean
                               )
 
 object UserActivitySummary {
-  def apply(statisticsHandler: Statistics)(uca: AggregatedUserCommitActivity): Iterable[UserActivitySummary] = {
+  def apply(statisticsHandler: Statistics,
+            branchesLabeler: Option[CommitsBranches])
+           (uca: AggregatedUserCommitActivity)
+  : Iterable[UserActivitySummary] = {
     val INCLUDESEMPTY = -1
 
     implicit def stringToIntOrNull(x: String): Integer = if (x.isEmpty) null else new Integer(x)
 
     uca.key.split("/", INCLUDESEMPTY) match {
       case Array(email, year, month, day, hour) =>
+        val branches = branchesLabeler.fold(Set.empty[String]) {
+          labeler => labeler.forCommits(uca.getIds)
+        }
         statisticsHandler.forCommits(uca.getIds: _*).map { stat =>
           UserActivitySummary(
             year, month, day, hour, uca.getName, uca.getEmail, uca.getCount,
-            stat.numFiles, stat.addedLines, stat.deletedLines, stat.commits.toArray, uca.getLatest, stat.isForMergeCommits
+            stat.numFiles, stat.addedLines, stat.deletedLines,
+            stat.commits.toArray, branches.toArray, uca.getLatest, stat.isForMergeCommits
           )
         }
-
       case _ => throw new Exception(s"invalid key format found ${uca.key}")
     }
   }
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsBranches.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsBranches.scala
new file mode 100644
index 0000000..46cca17
--- /dev/null
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsBranches.scala
@@ -0,0 +1,53 @@
+// Copyright (C) 2017 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.common
+
+import com.googlesource.gerrit.plugins.analytics.common.ManagedResource.use
+import org.eclipse.jgit.api.Git
+import org.eclipse.jgit.lib.{Constants, ObjectId, Repository}
+import org.eclipse.jgit.revwalk.RevWalk
+import org.eclipse.jgit.revwalk.filter.CommitTimeRevFilter
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable
+
+class CommitsBranches(repo: Repository, from: Option[Long] = None,
+                      to: Option[Long] = None) {
+
+  def forCommits(objectIds: TraversableOnce[ObjectId]): Set[String] = {
+    val commitToBranchesMap = new mutable.HashMap[String, mutable.Set[String]]
+      with mutable.MultiMap[String, String]
+    use(new Git(repo)) { git =>
+      val refs = git.branchList.call
+      for (ref <- refs) {
+        val branchName = ref.getName.drop(Constants.R_HEADS.length)
+        use(new RevWalk(repo)) { rw: RevWalk =>
+          from.foreach(d1 => rw.setRevFilter(CommitTimeRevFilter.after(d1)))
+          to.foreach(d2 => rw.setRevFilter(CommitTimeRevFilter.before(d2)))
+          rw.markStart(rw.parseCommit(ref.getObjectId))
+          rw.foreach { rev =>
+            val sha1 = rev.getName
+            commitToBranchesMap.addBinding(sha1, branchName)
+          }
+        }
+      }
+      objectIds.foldLeft(Set.empty[String]) {
+        (branches, objectId) => {
+          branches ++ commitToBranchesMap(objectId.getName)
+        }
+      }.filter(_.nonEmpty)
+    }
+  }
+}
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/RichBoolean.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/RichBoolean.scala
new file mode 100644
index 0000000..ff4cdcf
--- /dev/null
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/RichBoolean.scala
@@ -0,0 +1,22 @@
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.common
+
+object RichBoolean {
+  implicit class RichBoolean(val b: Boolean) extends AnyVal {
+    final def option[A](a: => A): Option[A] = if (b) Some(a) else None
+  }
+
+}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsBranchesTest.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsBranchesTest.scala
new file mode 100644
index 0000000..ad9f712
--- /dev/null
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsBranchesTest.scala
@@ -0,0 +1,27 @@
+package com.googlesource.gerrit.plugins.analytics.common
+
+import com.googlesource.gerrit.plugins.analytics.test.GitTestCase
+import org.eclipse.jgit.internal.storage.file.FileRepository
+import org.scalatest.{FlatSpec, Matchers}
+
+class CommitsBranchesTest extends FlatSpec with Matchers with GitTestCase {
+  def commitsBranches = new CommitsBranches(new FileRepository(testRepo))
+
+  "getAllCommitsLabeledWithBranches" should "label correctly a set of " +
+    "commits" in {
+    val c1 = add("file", "content")
+    val c2 = add("file2", "content")
+    val c3 = add("file3", "content")
+    val c4 = add("file4", "content")
+    branch("feature/branch")
+    val c5 = add("fileOnBranch", "content2")
+    val c6 = add("fileOnBranch2", "content2")
+    val c7 = add("fileOnBranch3", "content2")
+    val c8 = add("fileOnBranch4", "content2")
+
+    commitsBranches.forCommits(Seq(c1, c2, c3, c4)) should be(
+      Set("master", "feature/branch"))
+
+    commitsBranches.forCommits(Seq(c7, c8)) should be(Set("feature/branch"))
+  }
+}