Add Issues info for enabling ETL aggregation

Extract issues_codes and issues_links arrays from
the commit message description and add to Statistics.

See:
https://gerrit-review.googlesource.com/Documentation/config-gerrit.html#commentlink

Change-Id: Ibc0d3a4b26297281f56ccecd81c9c953dc335472
Jira-Id: GERICS-628
diff --git a/README.md b/README.md
index 1146e93..29bc565 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,10 @@
 commits data relevant for statistics purposes, such as number of involved files, and optionally also the list of belonging branches,
 number of added/deleted lines, timestamp and merge flag.
 
+Optionally, extract information on issues using the [commentLink](https://gerrit-review.googlesource.com/Documentation/config-gerrit.html#commentlink)
+Gerrit configuration and enrich the statistics with the issue-ids and links obtained from
+the commit message.
+
 
 *REST*
 
@@ -66,6 +70,7 @@
 - --until -e Ending timestamp (excluded) to consider
 - --aggregate -granularity -g one of email, email_year, email_month, email_day, email_hour defaulting to aggregation by email
 - --extract-branches -r enables branches extraction for each commit
+- --extract-issues -i enables the extraction of issues from commentLink
 
 NOTE: Timestamp format is consistent with Gerrit's query syntax, see /Documentation/user-search.html for details.
 
@@ -80,8 +85,8 @@
 SSH Example:
 
 ```
-   $ ssh -p 29418 admin@gerrit.mycompany.com analytics contributors myproject --since 2017-08-01 --until 2017-12-31
-   {"name":"John Doe","email":"john.doe@mycompany.com","num_commits":1, "num_files":4,"added_lines":9,"deleted_lines":1, "commits":[{"sha1":"6a1f73738071e299f600017d99f7252d41b96b4b","date":"Apr 28, 2011 5:13:14 AM","merge":false}]}
-   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1, "num_files":1,"added_lines":90,"deleted_lines":10,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}],"branches":["master","branch1"]}
+   $ ssh -p 29418 admin@gerrit.mycompany.com analytics contributors myproject --since 2017-08-01 --until 2017-12-31 --extract-issues
+   {"name":"John Doe","email":"john.doe@mycompany.com","num_commits":1, "num_files":4,"added_lines":9,"deleted_lines":1, "commits":[{"sha1":"6a1f73738071e299f600017d99f7252d41b96b4b","date":"Apr 28, 2011 5:13:14 AM","merge":false}], "issues_codes":["PRJ-001"],"issues_links":["https://jira.company.org/PRJ-001"]}
+   {"name":"Matt Smith","email":"matt.smith@mycompany.com","num_commits":1, "num_files":1,"added_lines":90,"deleted_lines":10,"commits":[{"sha1":"54527e7e3086758a23e3b069f183db6415aca304","date":"Sep 8, 2015 3:11:23 AM","merge":true}],"branches":["master","branch1"],"issues_codes":["PRJ-002","PRJ-003"],"issues_links":["https://jira.company.org/PRJ-002","https://jira.company.org/PRJ-003"]}
 ```
 
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
index f24d996..3c1253a 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/Contributors.scala
@@ -14,12 +14,11 @@
 
 package com.googlesource.gerrit.plugins.analytics
 
+import com.google.gerrit.extensions.api.projects.CommentLinkInfo
 import com.google.gerrit.extensions.restapi.{BadRequestException, Response, RestReadView}
 import com.google.gerrit.server.git.GitRepositoryManager
-import com.google.gerrit.server.project.{ProjectResource, ProjectsCollection}
+import com.google.gerrit.server.project.{ProjectCache, ProjectResource, ProjectsCollection}
 import com.google.gerrit.sshd.{CommandMetaData, SshCommand}
-import com.google.gson.TypeAdapter
-import com.google.gson.stream.{JsonReader, JsonWriter}
 import com.google.inject.Inject
 import com.googlesource.gerrit.plugins.analytics.common.DateConversions._
 import com.googlesource.gerrit.plugins.analytics.common._
@@ -70,9 +69,13 @@
     }
   }
 
+  @ArgOption(name = "--extract-issues", aliases = Array("-i"),
+    usage = "Extract a list of issues and links using the Gerrit's commentLink configuration")
+  private var extractIssues: Boolean = false
+
   override protected def run =
     gsonFmt.format(executor.get(projectRes, beginDate, endDate,
-      granularity.getOrElse(AggregationStrategy.EMAIL), extractBranches), stdout)
+      granularity.getOrElse(AggregationStrategy.EMAIL), extractBranches, extractIssues), stdout)
 
 }
 
@@ -118,22 +121,34 @@
     usage = "Do extra parsing to extract a list of all branches for each line")
   private var extractBranches: Boolean = false
 
+  @ArgOption(name = "--extract-issues", aliases = Array("-i"),
+    usage = "Extract a list of issues and links using the Gerrit's commentLink configuration")
+  private var extractIssues: Boolean = false
+
   override def apply(projectRes: ProjectResource) =
     Response.ok(
       new GsonStreamedResult[UserActivitySummary](gson,
         executor.get(projectRes, beginDate, endDate,
-          granularity.getOrElse(AggregationStrategy.EMAIL), extractBranches)))
+          granularity.getOrElse(AggregationStrategy.EMAIL), extractBranches, extractIssues)))
 }
 
 class ContributorsService @Inject()(repoManager: GitRepositoryManager,
+                                    projectCache:ProjectCache,
                                     histogram: UserActivityHistogram,
                                     gsonFmt: GsonFormatter) {
+  import RichBoolean._
+  import scala.collection.JavaConverters._
+
   def get(projectRes: ProjectResource, startDate: Option[Long], stopDate: Option[Long],
-          aggregationStrategy: AggregationStrategy, extractBranches: Boolean)
+          aggregationStrategy: AggregationStrategy, extractBranches: Boolean, extractIssues: Boolean)
   : TraversableOnce[UserActivitySummary] = {
+    val nameKey = projectRes.getNameKey
+    val commentLinks: List[CommentLinkInfo] = extractIssues.option {
+      projectCache.get(nameKey).getCommentLinks.asScala
+    }.toList.flatten
+
     ManagedResource.use(repoManager.openRepository(projectRes.getNameKey)) { repo =>
-      val stats = new Statistics(repo)
-      import RichBoolean._
+      val stats = new Statistics(repo, commentLinks.asJava)
       val commitsBranchesOptionalEnricher = extractBranches.option(
         new CommitsBranches(repo, startDate, stopDate)
       )
@@ -148,6 +163,8 @@
 
 case class CommitInfo(sha1: String, date: Long, merge: Boolean, files: java.util.Set[String])
 
+case class IssueInfo(code: String, link: String)
+
 case class UserActivitySummary(year: Integer,
                                month: Integer,
                                day: Integer,
@@ -161,6 +178,8 @@
                                deletedLines: Integer,
                                commits: Array[CommitInfo],
                                branches: Array[String],
+                               issuesCodes: Array[String],
+                               issuesLinks: Array[String],
                                lastCommitDate: Long,
                                isMerge: Boolean
                               )
@@ -183,7 +202,9 @@
           UserActivitySummary(
             year, month, day, hour, uca.getName, uca.getEmail, uca.getCount,
             stat.numFiles, stat.numDistinctFiles, stat.addedLines, stat.deletedLines,
-            stat.commits.toArray, branches.toArray, uca.getLatest, stat.isForMergeCommits
+            stat.commits.toArray, branches.toArray, stat.issues.map(_.code)
+              .toArray, stat.issues.map(_.link).toArray, uca.getLatest, stat
+              .isForMergeCommits
           )
         }
       case _ => throw new Exception(s"invalid key format found ${uca.key}")
diff --git a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
index e5aae6c..2dc7515 100644
--- a/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
+++ b/src/main/scala/com/googlesource/gerrit/plugins/analytics/common/CommitsStatistics.scala
@@ -14,15 +14,18 @@
 
 package com.googlesource.gerrit.plugins.analytics.common
 
-import com.googlesource.gerrit.plugins.analytics.CommitInfo
+import com.google.gerrit.extensions.api.projects.CommentLinkInfo
+import com.googlesource.gerrit.plugins.analytics.{CommitInfo, IssueInfo}
 import com.googlesource.gerrit.plugins.analytics.common.ManagedResource.use
 import org.eclipse.jgit.diff.{DiffFormatter, RawTextComparator}
 import org.eclipse.jgit.lib.{ObjectId, Repository}
-import org.eclipse.jgit.revwalk.{RevCommit, RevWalk}
+import org.eclipse.jgit.revwalk.RevWalk
 import org.eclipse.jgit.treewalk.{CanonicalTreeParser, EmptyTreeIterator}
 import org.eclipse.jgit.util.io.DisabledOutputStream
+import org.slf4j.LoggerFactory
 
 import scala.collection.JavaConversions._
+import scala.util.matching.Regex
 
 /**
   * Collects overall stats on a series of commits and provides some basic info on the included commits
@@ -38,7 +41,8 @@
                               addedLines: Int,
                               deletedLines: Int,
                               isForMergeCommits: Boolean,
-                              commits: List[CommitInfo]
+                              commits: List[CommitInfo],
+                              issues: List[IssueInfo] = Nil
                             ) {
   require(commits.forall(_.merge == isForMergeCommits), s"Creating a stats object with isMergeCommit = $isForMergeCommits but containing commits of different type")
 
@@ -62,17 +66,24 @@
     this.copy(
       addedLines = this.addedLines + that.addedLines,
       deletedLines = this.deletedLines + that.deletedLines,
-      commits = this.commits ++ that.commits
+      commits = this.commits ++ that.commits,
+      issues = this.issues ++ that.issues
     )
   }
 }
 
 object CommitsStatistics {
-  val Empty = CommitsStatistics(0, 0, false, List.empty)
+  val Empty = CommitsStatistics(0, 0, false, List[CommitInfo](), List[IssueInfo]())
   val EmptyMerge = Empty.copy(isForMergeCommits = true)
 }
 
-class Statistics(repo: Repository) {
+class Statistics(repo: Repository, commentInfoList: java.util.List[CommentLinkInfo] = Nil) {
+
+  val log = LoggerFactory.getLogger(classOf[Statistics])
+  val replacers = commentInfoList.map(info =>
+    Replacer(
+      info.`match`.r,
+      Option(info.link).getOrElse(info.html)))
 
   /**
     * Returns up to two different CommitsStatistics object grouping the stats into merge and non-merge commits
@@ -98,6 +109,7 @@
     use(new RevWalk(repo)) { rw =>
       val reader = repo.newObjectReader()
       val commit = rw.parseCommit(objectId)
+      val commitMessage = commit.getFullMessage
 
       val oldTree = {
         // protects against initial commit
@@ -126,8 +138,21 @@
 
       val commitInfo = CommitInfo(objectId.getName, commit.getAuthorIdent.getWhen.getTime, commit.isMerge, files)
 
-      CommitsStatistics(lines.added, lines.deleted, commitInfo.merge, List(commitInfo))
+      CommitsStatistics(lines.added, lines.deleted, commitInfo.merge, List(commitInfo), extractIssues(commitMessage))
     }
   }
 
+  def extractIssues(commitMessage: String): List[IssueInfo] = {
+    replacers.flatMap {
+      case Replacer(pattern, replaced) =>
+        pattern.findAllIn(commitMessage)
+          .map(code => {
+            val transformed = pattern.replaceAllIn(code, replaced)
+            IssueInfo(code, transformed)
+          })
+    }.toList
+  }
+
+  case class Replacer(pattern: Regex, replaced: String)
+
 }
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsCommentLinkSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsCommentLinkSpec.scala
new file mode 100644
index 0000000..b5d7482
--- /dev/null
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsCommentLinkSpec.scala
@@ -0,0 +1,98 @@
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.analytics.test
+
+import java.util.{Arrays, Date}
+
+import com.google.gerrit.extensions.api.projects.CommentLinkInfo
+import com.googlesource.gerrit.plugins.analytics.IssueInfo
+import com.googlesource.gerrit.plugins.analytics.common.{CommitsStatistics, Statistics}
+import org.eclipse.jgit.internal.storage.file.FileRepository
+import org.eclipse.jgit.revwalk.RevCommit
+import org.scalatest.{FlatSpec, Inside, Matchers}
+import scala.collection.JavaConverters._
+
+class CommitStatisticsCommentLinkSpec extends FlatSpec with GitTestCase with Matchers with Inside {
+
+  def createCommentLinkInfo(pattern: String, link: Option[String] = None, html: Option[String] = None) = {
+    val info = new CommentLinkInfo
+    info.`match` = pattern
+    info.link = link.getOrElse(null)
+    info.html = html.getOrElse(null)
+    info
+  }
+
+  def commit(committer: String, fileName: String, content: String, message: Option[String] = None): RevCommit = {
+    val date = new Date()
+    val person = newPersonIdent(committer, committer, date)
+    add(testRepo, fileName, content, author = person, committer = author, message = message.getOrElse("** no message **"))
+  }
+
+  class TestEnvironment(val repo: FileRepository = new FileRepository(testRepo),
+                        val commentLinks: java.util.List[CommentLinkInfo] = Seq(
+                          createCommentLinkInfo(pattern = "(bug\\s+#?)(\\d+)",
+                            link = Some("http://bugs.example.com/show_bug.cgi?id=$2")),
+                          createCommentLinkInfo(pattern = "([Bb]ug:\\s+)(\\d+)",
+                            html = Some("$1<a href=\"http://trak.example.com/$2\">$2</a>"))).asJava) {
+
+    lazy val stats = new Statistics(repo, commentLinks)
+  }
+
+  it should "collect no commentslink if no matching" in new TestEnvironment {
+    val nocomments = commit("user", "file1.txt", "content1")
+
+    inside(stats.forCommits(nocomments)) {
+      case List(s: CommitsStatistics) =>
+        s.issues should have size 0
+    }
+
+  }
+  it should "collect simple bugzilla comments" in new TestEnvironment {
+    val simpleComment = commit("user", "file1.txt", "content2", message =
+      Some("this solves bug #23"))
+
+    inside(stats.forCommits(simpleComment)) {
+      case List(s: CommitsStatistics) =>
+        s.issues should have size 1
+        s.issues should contain(IssueInfo("bug #23", "http://bugs.example.com/show_bug.cgi?id=23"))
+    }
+
+  }
+  it should "collect simple track link" in new TestEnvironment {
+    val simpleTrackComment = commit("user", "file1.txt", "content3", message
+      = Some("this solves Bug: 1234"))
+
+    inside(stats.forCommits(simpleTrackComment)) {
+      case List(s: CommitsStatistics) =>
+        s.issues should have size 1
+        s.issues should contain(IssueInfo("Bug: 1234", "Bug: <a href=\"http://trak.example.com/1234\">1234</a>"))
+    }
+
+  }
+  it should "collect multiple links" in new TestEnvironment {
+    val multipleComments = commit("user", "file1.txt", "content4", message =
+      Some("this solves bug 12 and Bug: 23"))
+
+    inside(stats.forCommits(multipleComments)) {
+      case List(s: CommitsStatistics) =>
+        s.issues should contain allOf(
+          IssueInfo("bug 12", "http://bugs.example.com/show_bug.cgi?id=12"),
+          IssueInfo("Bug: 23", "Bug: <a href=\"http://trak.example.com/23\">23</a>")
+        )
+    }
+
+  }
+
+}
diff --git a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
index 0f9ba34..df4260a 100644
--- a/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
+++ b/src/test/scala/com/googlesource/gerrit/plugins/analytics/test/CommitStatisticsSpec.scala
@@ -152,7 +152,7 @@
         nonMergeStats.addedLines should be(4)
         nonMergeStats.deletedLines should be(2)
 
-      case wrongContent => fail(s"Expected two results instad got $wrongContent")
+      case wrongContent => fail(s"Expected two results instead got $wrongContent")
     }
   }