Filter by branch when importing data from manifest

Manifest files contain the branch (aka "version")
from which each project is referring to.

Make sure to extract data from the correct branch
when importing projects from a manifest file.

The branch is defined at project level. If not
present, the global default branch will be assinged
to the project.

Here a simple manifest example:

<manifest>
  <remote  name="aosp" fetch=".."
           review="https://android-review.googlesource.com/" />
  <default revision="defaultbranch"
           remote="aosp"
           sync-j="4" />
  <project name="repo1" />
  <project name="repo2" revision="branch1" />
<manifest>

In the above case "repo2" data comes from "branch1", while
"repo1" from "defaultbranch".

Change-Id: Ic4861977f44d4b45cbf9896f563f6b7224aaa200
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
index cfe7960..c8cdaed 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
@@ -28,15 +28,15 @@
 
 object GerritAnalyticsTransformations {
 
-  implicit class PimpedGerritProjectRDD(val rdd: RDD[GerritProject]) extends AnyVal {
+  implicit class PimpedGerritProjectRDD(val rdd: RDD[GerritProjectWithRef]) extends AnyVal {
 
     def enrichWithSource(
-        projectToContributorsAnalyticsUrlFactory: String => Option[String]
+        projectToContributorsAnalyticsUrlFactory: GerritProjectWithRef => Option[String]
     ): RDD[ProjectContributionSource] = {
       rdd.map { project =>
         ProjectContributionSource(
           project.name,
-          projectToContributorsAnalyticsUrlFactory(project.id)
+          projectToContributorsAnalyticsUrlFactory(project)
         )
       }
     }
@@ -223,9 +223,9 @@
     ) format DateTimeFormatter.ISO_OFFSET_DATE_TIME
 
   def getContributorStats(
-      projects: RDD[GerritProject],
-      projectToContributorsAnalyticsUrlFactory: String => Option[String],
-      gerritApiConnection: GerritConnectivity
+                           projects: RDD[GerritProjectWithRef],
+                           projectToContributorsAnalyticsUrlFactory: GerritProjectWithRef => Option[String],
+                           gerritApiConnection: GerritConnectivity
   )(implicit spark: SparkSession) = {
     import spark.sqlContext.implicits._ // toDF
 
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
index b012a0d..463dd28 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
@@ -18,7 +18,7 @@
 
 import com.gerritforge.analytics.gitcommits.model.{
   GerritEndpointConfig,
-  GerritProject,
+  GerritProjectWithRef,
   GerritProjectsSupport
 }
 import com.gerritforge.analytics.spark.SparkApp
@@ -147,11 +147,11 @@
 }
 
 trait FetchProjects {
-  def fetchProjects(config: GerritEndpointConfig): Seq[GerritProject]
+  def fetchProjects(config: GerritEndpointConfig): Seq[GerritProjectWithRef]
 }
 
 trait FetchRemoteProjects extends FetchProjects {
-  def fetchProjects(config: GerritEndpointConfig): Seq[GerritProject] =
+  def fetchProjects(config: GerritEndpointConfig): Seq[GerritProjectWithRef] =
   config.projectsFromManifest.map(_.toSeq).getOrElse(
     config.gerritProjectsUrl.toSeq.flatMap { url =>
       GerritProjectsSupport.parseJsonProjectListResponse(
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
index cc42499..6b0c678 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
@@ -40,16 +40,17 @@
     manifest: Option[String] = None
 ) {
 
-  lazy val projectsFromManifest: Option[Set[GerritProject]] = manifest.map { mf =>
+  lazy val projectsFromManifest: Option[Set[GerritProjectWithRef]] = manifest.map { mf =>
       val mfDoc = XML.loadFile(mf)
+      val defaultRef = (mfDoc \ "default" \@ "revision").trim
       val mfProjects = mfDoc \ "project"
-    mfProjects.theSeq
-      .flatMap(_.attribute("name").toSeq)
-      .flatten
-      .map(_.text)
-      .map(_.stripSuffix(".git"))
-      .map(p => GerritProject(URLEncoder.encode(p, "UTF-8"),p))
-      .toSet
+
+      mfProjects.flatMap { projectNode =>
+        for {
+          name <- projectNode.attribute("name").map(_.text.stripSuffix(".git"))
+          revision = projectNode.attribute("revision").map(_.text).orElse(Some(defaultRef))
+        } yield GerritProjectWithRef(URLEncoder.encode(name, "UTF-8"), name, revision)
+      }.toSet
   }
 
   val gerritApiConnection: GerritConnectivity =
@@ -75,8 +76,9 @@
     "extract-branches" -> extractBranches.map(_.toString)
   ).flatMap(queryOpt).mkString("?", "&", "")
 
-  def contributorsUrl(projectName: String): Option[String] =
+  def contributorsUrl(project: GerritProjectWithRef): Option[String] =
     baseUrl.map { url =>
-      s"$url/projects/$projectName/analytics~contributors$queryString"
+      val refFilter = project.refName.fold("")(ref => s"&branch=$ref")
+      s"$url/projects/${project.id}/analytics~contributors$queryString$refFilter"
     }
 }
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritProject.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritProjectWithRef.scala
similarity index 83%
rename from gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritProject.scala
rename to gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritProjectWithRef.scala
index 3259f4d..16f4ef0 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritProject.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritProjectWithRef.scala
@@ -21,28 +21,28 @@
 
 import scala.io.Source
 import scala.util.Try
-case class GerritProject(id: String, name: String)
+case class GerritProjectWithRef(id: String, name: String, refName: Option[String] = None)
 
 class GerritProjectsSupport @Inject()(gerritApi: GerritApi) {
 
-  def getProject(projectName: String): Try[GerritProject] = {
+  def getProject(projectName: String): Try[GerritProjectWithRef] = {
     val projectApi = gerritApi.projects().name(projectName)
     Try {
       val project = projectApi.get()
-      GerritProject(project.id, project.name)
+      GerritProjectWithRef(project.id, project.name)
     }
   }
 }
 
 object GerritProjectsSupport {
 
-  def parseJsonProjectListResponse(jsonSource: Source): Seq[GerritProject] = {
+  def parseJsonProjectListResponse(jsonSource: Source): Seq[GerritProjectWithRef] = {
     parse(jsonSource.dropGerritPrefix.mkString).values
       .asInstanceOf[Map[String, Map[String, String]]]
       .mapValues(projectAttributes => projectAttributes("id"))
       .toSeq
       .map {
-        case (name, id) => GerritProject(id, name)
+        case (name, id) => GerritProjectWithRef(id, name)
       }
   }
 }
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
index 49c7ede..a501d39 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
@@ -6,7 +6,7 @@
 import com.gerritforge.analytics.gitcommits.job.{FetchProjects, Job}
 import com.gerritforge.analytics.gitcommits.model.{
   GerritEndpointConfig,
-  GerritProject,
+  GerritProjectWithRef,
   GerritProjectsSupport
 }
 import com.google.gerrit.server.project.ProjectControl
@@ -125,7 +125,7 @@
     }
   }
 
-  def fetchProjects(config: GerritEndpointConfig): Seq[GerritProject] = {
+  def fetchProjects(config: GerritEndpointConfig): Seq[GerritProjectWithRef] = {
     config.prefix.toSeq.flatMap(
       projectName =>
         gerritProjects.getProject(projectName) match {
diff --git a/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala b/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
index cf7ef08..94336c8 100644
--- a/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
+++ b/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
@@ -20,7 +20,7 @@
 import com.gerritforge.analytics.common.api.GerritConnectivity
 import com.gerritforge.analytics.gitcommits.engine.GerritAnalyticsTransformations._
 import com.gerritforge.analytics.gitcommits.model.{
-  GerritProject,
+  GerritProjectWithRef,
   GerritProjectsSupport,
   ProjectContributionSource
 }
@@ -54,7 +54,7 @@
         |""".stripMargin)
     )
 
-    projects should contain only (GerritProject("All-Projects-id", "All-Projects-name"), GerritProject(
+    projects should contain only (GerritProjectWithRef("All-Projects-id", "All-Projects-name"), GerritProjectWithRef(
       "Test-id",
       "Test-name"
     ))
@@ -62,10 +62,10 @@
 
   "enrichWithSource" should "enrich project RDD object with its source" in {
 
-    val projectRdd = sc.parallelize(Seq(GerritProject("project-id", "project-name")))
+    val projectRdd = sc.parallelize(Seq(GerritProjectWithRef("project-id", "project-name")))
 
     val projectWithSource = projectRdd
-      .enrichWithSource(projectId => Some(s"http://somewhere.com/$projectId"))
+      .enrichWithSource(project => Some(s"http://somewhere.com/${project.id}"))
       .collect
 
     projectWithSource should have size 1
@@ -77,6 +77,24 @@
     }
   }
 
+  it should "enrich project RDD object with ref" in {
+
+    val ref = "aRef"
+    val projectRdd = sc.parallelize(Seq(GerritProjectWithRef("project-id", "project-name", Some(ref))))
+
+    val projectWithSource = projectRdd
+      .enrichWithSource(project => Some(s"http://somewhere.com/${project.id}?ref=${project.refName.get}"))
+      .collect
+
+    projectWithSource should have size 1
+    inside(projectWithSource.head) {
+      case ProjectContributionSource(projectName, url) => {
+        projectName should be("project-name")
+        url should contain(s"http://somewhere.com/project-id?ref=$ref")
+      }
+    }
+  }
+
   "filterEmptyStrings" should "Filter empty strings from BufferedSource" in {
     val contentWithEmptyLines =
       """LineOne
diff --git a/gitcommits/src/test/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfigTest.scala b/gitcommits/src/test/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfigTest.scala
index a58d584..eb2563d 100644
--- a/gitcommits/src/test/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfigTest.scala
+++ b/gitcommits/src/test/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfigTest.scala
@@ -34,7 +34,13 @@
     conf.gerritProjectsUrl should contain(s"testBaseUrl/projects/")
   }
 
-  it should "return projects contained in a repo manifest XML" in {
+  "contributorsUrl" should "" in {
+    val conf = GerritEndpointConfig(baseUrl = Some("testBaseUrl"), prefix = None)
+    conf.contributorsUrl(GerritProjectWithRef("opensbi","opensbi",Some("refs/tags/v0.8"))) should be
+      (Some(s"testBaseUrl/projects/opensbi/analytics~contributors?&branch=refs/tags/v0.8"))
+  }
+
+  "projectsFromManifest" should "return projects contained in a repo manifest XML" in {
     val conf = GerritEndpointConfig(baseUrl = Some("testBaseUrl"), manifest = Option(manifestFile.getAbsolutePath))
     val projectNamesFromManifest = conf.projectsFromManifest.toSeq.flatten.map(_.name)
 
@@ -48,6 +54,23 @@
 
     projectIdsFromManifest should contain only ("sel4_projects_libs", "seL4_tools", "sel4runtime", "musllibc", "seL4_libs", "prefix%2Futil_libs", "sel4test", "nanopb", "opensbi")
   }
+
+  it should "return projects with the version contained in a repo manifest XML with" in {
+    val conf = GerritEndpointConfig(baseUrl = Some("testBaseUrl"), manifest = Option(manifestFile.getAbsolutePath))
+    val projectNamesFromManifest = conf.projectsFromManifest.toSeq.flatten
+
+    projectNamesFromManifest should contain only (
+      GerritProjectWithRef("musllibc","musllibc",Some("sel4")),
+      GerritProjectWithRef("nanopb","nanopb",Some("refs/tags/0.4.3")),
+      GerritProjectWithRef("opensbi","opensbi",Some("refs/tags/v0.8")),
+      GerritProjectWithRef("prefix%2Futil_libs","prefix/util_libs",Some("master")),
+      GerritProjectWithRef("sel4_projects_libs","sel4_projects_libs",Some("master")),
+      GerritProjectWithRef("sel4runtime","sel4runtime",Some("master")),
+      GerritProjectWithRef("sel4test","sel4test",Some("master")),
+      GerritProjectWithRef("seL4_libs","seL4_libs",Some("master")),
+      GerritProjectWithRef("seL4_tools","seL4_tools",Some("master"))
+    )
+  }
 }
 
 trait ManifestXML {