Label results and and manifest branch information
Labelling the results could be used for the aggregation
of projects imported from the same manifest file.
Add "manifest_label" and "manifest_branch" fields to allow
aggregation and filtering by information associated with
the manifest.
Change-Id: I9d868357852aa0386dfa10f012aebd539728a1e1
diff --git a/README.md b/README.md
index a0a869f..7abdcfd 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,9 @@
- -u --url Gerrit server URL with the analytics plugins installed
- -m --manifest Repo manifest XML path. Absolute path of the Repo manifest XML to import project
from. Each project will be imported with the branch specified in the `revision` attribute.
+- -n --manifest-branch (*optional*) Manifest branch. Manifest file git branch.
+- -l --manifest-label (*optional*) Manifest label. A `manifest_label` is an aggregation of projects imported from the same manifest.
+Add it to allow filtering by `manifest_label`.
- -p --prefix (*optional*) Projects prefix. Limit the results to those projects that start with the specified prefix.
- -e --elasticIndex Elastic Search index name. If not provided no ES export will be performed. _Note: ElastiSearch 6.x
requires this index format `name/type`, while from ElasticSearch 7.x just `name`_
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
index c8cdaed..fab54a4 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/engine/GerritAnalyticsTransformations.scala
@@ -189,6 +189,11 @@
.handleAliases(aliasesDFMaybe)
.dropCommits
}
+
+ def addManifestInfo(manifestLabel: String, manifestBranch: Option[String])(implicit spark: SparkSession): DataFrame = {
+ val dfWithManifestInfo = df.withColumn("manifest_label", lit(manifestLabel))
+ manifestBranch.fold(dfWithManifestInfo)(mb => dfWithManifestInfo.withColumn("manifest_branch", lit(mb)))
+ }
}
private def emailToDomain(email: String): String = email match {
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
index 463dd28..b3a737e 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
@@ -90,6 +90,14 @@
c.copy(manifest = Some(x))
} text "repo manifest XML with the list of projects to process"
+ opt[String]('n', "manifest-branch") optional () action { (input, c) =>
+ c.copy(manifestBranch = Some(input))
+ } text "manifest file git branch"
+
+ opt[String]('l', "manifest-label") optional () action { (input, c) =>
+ c.copy(manifestLabel = Some(input))
+ } text "a 'manifest-label' is an aggregation of projects imported from the same manifest. Add to allow query by 'manifest-label'."
+
}
cliOptionParser.parse(args, GerritEndpointConfig()) match {
@@ -132,7 +140,10 @@
config.contributorsUrl,
config.gerritApiConnection
)
- contributorsStats.dashboardStats(aliasesDF)
+ val df = contributorsStats.dashboardStats(aliasesDF)
+ config.manifest.flatMap(_ =>
+ config.manifestLabel.map(p => df.addManifestInfo(p, config.manifestBranch)))
+ .getOrElse(df)
}
def saveES(df: DataFrame)(implicit config: GerritEndpointConfig) {
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
index 6b0c678..53e26c7 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
@@ -37,7 +37,9 @@
password: Option[String] = None,
ignoreSSLCert: Option[Boolean] = None,
extractBranches: Option[Boolean] = None,
- manifest: Option[String] = None
+ manifest: Option[String] = None,
+ manifestBranch: Option[String] = None,
+ manifestLabel: Option[String] = None
) {
lazy val projectsFromManifest: Option[Set[GerritProjectWithRef]] = manifest.map { mf =>
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
index 28e4010..d49063b 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/plugin/ProcessGitCommitsCommand.scala
@@ -79,6 +79,20 @@
)
var manifest: String = null
+ @ArgOption(
+ name = "--manifest-branch",
+ aliases = Array("-n"),
+ usage = "manifest file git branch"
+ )
+ var manifestBranch: String = null
+
+ @ArgOption(
+ name = "--manifest-label",
+ aliases = Array("-l"),
+ usage = "A 'manifest_label' is an aggregation of projects imported from the same manifest. Add it to allow filtering by 'manifest_label'"
+ )
+ var manifestLabel: String = null
+
override def run() {
implicit val config = GerritEndpointConfig(
gerritConfig.getListenUrl(),
diff --git a/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala b/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
index 94336c8..c8ff288 100644
--- a/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
+++ b/gitcommits/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
@@ -316,6 +316,65 @@
df.collect should contain theSameElementsAs expectedDF.collect
}
+ "addManifestInfo" should "enrich the data with manifest-label from the configuration" in {
+ import spark.implicits._
+
+ val manifestLabel = "testManifestLabel"
+ val inputDF = sc
+ .parallelize(
+ Seq(
+ ("input_value1"),
+ ("input_value2")
+ )
+ )
+ .toDF("input")
+
+ val expectedDF = sc
+ .parallelize(
+ Seq(
+ ("input_value1", manifestLabel),
+ ("input_value2", manifestLabel)
+ )
+ )
+ .toDF("input", "manifest_label")
+
+ val df = inputDF.addManifestInfo(manifestLabel, None)
+
+ df.schema.fields.map(_.name) should contain allOf ("input", "manifest_label")
+
+ df.collect should contain theSameElementsAs expectedDF.collect
+ }
+
+ it should "enrich the data with manifest-label and manifest branch from the configuration" in {
+ import spark.implicits._
+
+ val manifestLabel = "testManifestLabel"
+ val manifestBranch = "manifestBranch"
+ val inputDF = sc
+ .parallelize(
+ Seq(
+ ("input_value1"),
+ ("input_value2")
+ )
+ )
+ .toDF("input")
+
+ val expectedDF = sc
+ .parallelize(
+ Seq(
+ ("input_value1", manifestLabel, manifestBranch),
+ ("input_value2", manifestLabel, manifestBranch)
+ )
+ )
+ .toDF("input", "manifest_label", "manifest_branch")
+
+ val df = inputDF.addManifestInfo(manifestLabel, Some(manifestBranch))
+
+ df.schema.fields.map(_.name) should contain allOf ("input", "manifest_label", "manifest_branch")
+
+ df.collect should contain theSameElementsAs expectedDF.collect
+ }
+
it should "enrich the data with organization from the alias DF when available" in {
import spark.implicits._