Support projects filtering using repo manifest
When dealing with multiple projects, using a tool like
repo and an XML manifest file is very common.
Allow the ETL to receive a manifest XML file as a parameter
and automatically retrieve all the projects mentioned
in the manifest file.
Change-Id: I839f9eea690edb53053396e3e7fc83eb6c53d0a1
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
index b80e99d..b012a0d 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/job/Main.scala
@@ -86,6 +86,10 @@
c.copy(extractBranches = Some(input))
} text "enables branches extraction for each commit"
+ opt[String]('m', "manifest") optional () action { (x, c) =>
+ c.copy(manifest = Some(x))
+ } text "repo manifest XML with the list of projects to process"
+
}
cliOptionParser.parse(args, GerritEndpointConfig()) match {
@@ -147,11 +151,11 @@
}
trait FetchRemoteProjects extends FetchProjects {
- def fetchProjects(config: GerritEndpointConfig): Seq[GerritProject] = {
+ def fetchProjects(config: GerritEndpointConfig): Seq[GerritProject] =
+ config.projectsFromManifest.map(_.toSeq).getOrElse(
config.gerritProjectsUrl.toSeq.flatMap { url =>
GerritProjectsSupport.parseJsonProjectListResponse(
config.gerritApiConnection.getContentFromApi(url)
)
- }
- }
+ })
}
diff --git a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
index a27611f..cc42499 100644
--- a/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
+++ b/gitcommits/src/main/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfig.scala
@@ -15,11 +15,14 @@
package com.gerritforge.analytics.gitcommits.model
import java.net.URLEncoder
+import scala.xml._
import java.time.format.DateTimeFormatter
import java.time.{LocalDate, ZoneOffset}
-
import com.gerritforge.analytics.common.api.GerritConnectivity
import com.gerritforge.analytics.support.ops.AnalyticsDateTimeFormatter
+
+import java.nio.charset.StandardCharsets
+
case class GerritEndpointConfig(
baseUrl: Option[String] = None,
prefix: Option[String] = None,
@@ -33,9 +36,22 @@
username: Option[String] = None,
password: Option[String] = None,
ignoreSSLCert: Option[Boolean] = None,
- extractBranches: Option[Boolean] = None
+ extractBranches: Option[Boolean] = None,
+ manifest: Option[String] = None
) {
+ lazy val projectsFromManifest: Option[Set[GerritProject]] = manifest.map { mf =>
+ val mfDoc = XML.loadFile(mf)
+ val mfProjects = mfDoc \ "project"
+ mfProjects.theSeq
+ .flatMap(_.attribute("name").toSeq)
+ .flatten
+ .map(_.text)
+ .map(_.stripSuffix(".git"))
+ .map(p => GerritProject(URLEncoder.encode(p, "UTF-8"),p))
+ .toSet
+ }
+
val gerritApiConnection: GerritConnectivity =
new GerritConnectivity(username, password, ignoreSSLCert.getOrElse(false))
diff --git a/gitcommits/src/test/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfigTest.scala b/gitcommits/src/test/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfigTest.scala
index 6830a6e..a58d584 100644
--- a/gitcommits/src/test/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfigTest.scala
+++ b/gitcommits/src/test/scala/com/gerritforge/analytics/gitcommits/model/GerritEndpointConfigTest.scala
@@ -16,7 +16,12 @@
import org.scalatest.{FlatSpec, Matchers}
-class GerritEndpointConfigTest extends FlatSpec with Matchers {
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
+import scala.xml.XML
+
+class GerritEndpointConfigTest extends FlatSpec with Matchers with ManifestXML {
"gerritProjectsUrl" should "contain prefix when available" in {
val prefix = "prefixMustBeThere"
@@ -29,4 +34,50 @@
conf.gerritProjectsUrl should contain(s"testBaseUrl/projects/")
}
+ it should "return projects contained in a repo manifest XML" in {
+ val conf = GerritEndpointConfig(baseUrl = Some("testBaseUrl"), manifest = Option(manifestFile.getAbsolutePath))
+ val projectNamesFromManifest = conf.projectsFromManifest.toSeq.flatten.map(_.name)
+
+ projectNamesFromManifest should contain only ("sel4_projects_libs", "seL4_tools", "sel4runtime", "musllibc", "seL4_libs", "prefix/util_libs", "sel4test", "nanopb", "opensbi")
+ projectNamesFromManifest should not contain ("non-existent-project")
+ }
+
+ it should "return projects ids with URL encoded names mentioned in a repo manifest XML" in {
+ val conf = GerritEndpointConfig(baseUrl = Some("testBaseUrl"), manifest = Option(manifestFile.getAbsolutePath))
+ val projectIdsFromManifest = conf.projectsFromManifest.toSeq.flatten.map(_.id)
+
+ projectIdsFromManifest should contain only ("sel4_projects_libs", "seL4_tools", "sel4runtime", "musllibc", "seL4_libs", "prefix%2Futil_libs", "sel4test", "nanopb", "opensbi")
+ }
+}
+
+trait ManifestXML {
+ // Sample from https://docs.sel4.systems/projects/buildsystem/repo-cheatsheet.html
+ lazy val manifestFile: File = {
+ val tmpFile = File.createTempFile("analytics-etl", s"-${System.nanoTime()}.xml")
+ Files.write(tmpFile.toPath,
+ """<manifest>
+ |<remote name="seL4" fetch="." />
+ |<remote fetch="../sel4proj" name="sel4proj"/>
+ |<remote fetch="https://github.com/nanopb" name="nanopb" />
+ |<remote fetch="https://github.com/riscv" name="opensbi"/>
+ |
+ |<default revision="master" remote="seL4" />
+ |
+ |<project name="seL4_tools.git" path="tools/seL4">
+ | <linkfile src="cmake-tool/init-build.sh" dest="init-build.sh"/>
+ | <linkfile src="cmake-tool/griddle" dest="griddle"/>
+ |</project>
+ |<project name="sel4runtime.git" path="projects/sel4runtime"/>
+ |<project name="musllibc.git" path="projects/musllibc" revision="sel4"/>
+ |<project name="seL4_libs.git" path="projects/seL4_libs"/>
+ |<project name="prefix/util_libs.git" path="projects/util_libs"/>
+ |<project name="sel4test.git" path="projects/sel4test">
+ | <linkfile src="easy-settings.cmake" dest="easy-settings.cmake"/>
+ |</project>
+ |<project name="sel4_projects_libs" path="projects/sel4_projects_libs" />
+ |<project name="opensbi" remote="opensbi" revision="refs/tags/v0.8" path="tools/opensbi"/>
+ |<project name="nanopb" path="tools/nanopb" revision="refs/tags/0.4.3" upstream="master" remote="nanopb"/>
+ |</manifest>""".stripMargin.getBytes(StandardCharsets.UTF_8))
+ tmpFile
+ }
}
diff --git a/project/SharedSettings.scala b/project/SharedSettings.scala
index 136fef6..0d16824 100644
--- a/project/SharedSettings.scala
+++ b/project/SharedSettings.scala
@@ -43,7 +43,8 @@
"com.github.scopt" %% "scopt" % scopt,
"org.scalactic" %% "scalactic" % scalactic % "test",
"org.scalatest" %% "scalatest" % scalaTest % "test",
- "com.dimafeng" %% "testcontainers-scala" % TestContainersScala % Test
+ "com.dimafeng" %% "testcontainers-scala" % TestContainersScala % Test,
+ "org.scala-lang.modules" %% "scala-xml" % "1.3.1",
)
)