Label results and and manifest branch information

Labelling the results could be used for the aggregation
of projects imported from the same manifest file.
Add "manifest_label" and "manifest_branch" fields to allow
aggregation and filtering by information associated with
the manifest.

Change-Id: I9d868357852aa0386dfa10f012aebd539728a1e1
diff --git a/README.md b/README.md
index a0a869f..7abdcfd 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,9 @@
 - -u --url Gerrit server URL with the analytics plugins installed
 - -m --manifest Repo manifest XML path. Absolute path of the Repo manifest XML to import project
 from. Each project will be imported with the branch specified in the `revision` attribute.
+- -n --manifest-branch (*optional*) Manifest branch. Manifest file git branch.
+- -l --manifest-label (*optional*) Manifest label. A `manifest_label` is an aggregation of projects imported from the same manifest.
+Add it to allow filtering by `manifest_label`.
 - -p --prefix (*optional*) Projects prefix. Limit the results to those projects that start with the specified prefix.
 - -e --elasticIndex Elastic Search index name. If not provided no ES export will be performed. _Note: ElastiSearch 6.x
 requires this index format `name/type`, while from ElasticSearch 7.x just `name`_
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
index c8cdaed..fab54a4 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
@@ -189,6 +189,11 @@
         .handleAliases(aliasesDFMaybe)
         .dropCommits
     }
+
+    def addManifestInfo(manifestLabel: String, manifestBranch: Option[String])(implicit spark: SparkSession): DataFrame = {
+      val dfWithManifestInfo = df.withColumn("manifest_label", lit(manifestLabel))
+      manifestBranch.fold(dfWithManifestInfo)(mb => dfWithManifestInfo.withColumn("manifest_branch", lit(mb)))
+    }
   }
 
   private def emailToDomain(email: String): String = email match {
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
index 463dd28..b3a737e 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
@@ -90,6 +90,14 @@
         c.copy(manifest = Some(x))
       } text "repo manifest XML with the list of projects to process"
 
+      opt[String]('n', "manifest-branch") optional () action { (input, c) =>
+        c.copy(manifestBranch = Some(input))
+      } text "manifest file git branch"
+
+      opt[String]('l', "manifest-label") optional () action { (input, c) =>
+        c.copy(manifestLabel = Some(input))
+      } text "a 'manifest-label' is an aggregation of projects imported from the same manifest. Add to allow query by 'manifest-label'."
+
     }
 
   cliOptionParser.parse(args, GerritEndpointConfig()) match {
@@ -132,7 +140,10 @@
       config.contributorsUrl,
       config.gerritApiConnection
     )
-    contributorsStats.dashboardStats(aliasesDF)
+    val df = contributorsStats.dashboardStats(aliasesDF)
+    config.manifest.flatMap(_ =>
+      config.manifestLabel.map(p => df.addManifestInfo(p, config.manifestBranch)))
+        .getOrElse(df)
   }
 
   def saveES(df: DataFrame)(implicit config: GerritEndpointConfig) {
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
index 6b0c678..53e26c7 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
@@ -37,7 +37,9 @@
     password: Option[String] = None,
     ignoreSSLCert: Option[Boolean] = None,
     extractBranches: Option[Boolean] = None,
-    manifest: Option[String] = None
+    manifest: Option[String] = None,
+    manifestBranch: Option[String] = None,
+    manifestLabel: Option[String] = None
 ) {
 
   lazy val projectsFromManifest: Option[Set[GerritProjectWithRef]] = manifest.map { mf =>
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
index 28e4010..d49063b 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
@@ -79,6 +79,20 @@
   )
   var manifest: String = null
 
+  @ArgOption(
+    name = "--manifest-branch",
+    aliases = Array("-n"),
+    usage = "manifest file git branch"
+  )
+  var manifestBranch: String = null
+
+  @ArgOption(
+    name = "--manifest-label",
+    aliases = Array("-l"),
+    usage = "A 'manifest_label' is an aggregation of projects imported from the same manifest. Add it to allow filtering by 'manifest_label'"
+  )
+  var manifestLabel: String = null
+
   override def run() {
     implicit val config = GerritEndpointConfig(
       gerritConfig.getListenUrl(),
diff --git a/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala b/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
index 94336c8..c8ff288 100644
--- a/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
+++ b/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
@@ -316,6 +316,65 @@
     df.collect should contain theSameElementsAs expectedDF.collect
   }
 
+  "addManifestInfo" should "enrich the data with manifest-label from the configuration" in {
+    import spark.implicits._
+
+    val manifestLabel = "testManifestLabel"
+    val inputDF = sc
+      .parallelize(
+        Seq(
+          ("input_value1"),
+          ("input_value2")
+        )
+      )
+      .toDF("input")
+
+    val expectedDF = sc
+      .parallelize(
+        Seq(
+          ("input_value1", manifestLabel),
+          ("input_value2", manifestLabel)
+        )
+      )
+      .toDF("input", "manifest_label")
+
+    val df = inputDF.addManifestInfo(manifestLabel, None)
+
+    df.schema.fields.map(_.name) should contain allOf ("input", "manifest_label")
+
+    df.collect should contain theSameElementsAs expectedDF.collect
+  }
+
+  it should "enrich the data with manifest-label and manifest branch from the configuration" in {
+    import spark.implicits._
+
+    val manifestLabel = "testManifestLabel"
+    val manifestBranch = "manifestBranch"
+    val inputDF = sc
+      .parallelize(
+        Seq(
+          ("input_value1"),
+          ("input_value2")
+        )
+      )
+      .toDF("input")
+
+    val expectedDF = sc
+      .parallelize(
+        Seq(
+          ("input_value1", manifestLabel, manifestBranch),
+          ("input_value2", manifestLabel, manifestBranch)
+        )
+      )
+      .toDF("input", "manifest_label", "manifest_branch")
+
+    val df = inputDF.addManifestInfo(manifestLabel, Some(manifestBranch))
+
+    df.schema.fields.map(_.name) should contain allOf ("input", "manifest_label", "manifest_branch")
+
+    df.collect should contain theSameElementsAs expectedDF.collect
+  }
+
   it should "enrich the data with organization from the alias DF when available" in {
     import spark.implicits._