Extract project from audit logs
Attempt, where possible, to extract project names from audit logs. This
uses a list of public projects whith is retrieved via the /project/
endpoint via the `GerritProjects` class.
Feature: Issue 10225
Change-Id: Ifb41bd9b7c4a2db3d081dbddb0e1cf377f41338c
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/broadcast/GerritProjects.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/broadcast/GerritProjects.scala
new file mode 100644
index 0000000..cbb72ac
--- /dev/null
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/broadcast/GerritProjects.scala
@@ -0,0 +1,123 @@
+// Copyright (C) 2019 GerritForge Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.gerritforge.analytics.auditlog.broadcast
+import java.net.URLDecoder
+
+import com.gerritforge.analytics.auditlog.util.RegexUtil
+import com.gerritforge.analytics.common.api.GerritConnectivity
+import com.gerritforge.analytics.support.ops.GerritSourceOps._
+import com.typesafe.scalalogging.LazyLogging
+import org.json4s.native.JsonMethods._
+import org.json4s.{DefaultFormats, _}
+
+import scala.annotation.tailrec
+import scala.util.{Failure, Success, Try}
+
+case class GerritProject(name: GerritProjectName)
+case class GerritProjects(private val projects: Map[GerritProjectName, GerritProject]) extends LazyLogging with RegexUtil {
+
+ private val PROJECT_SSH_WITH_SPACES = capture(r = """project:(.+?)\)?\s""")
+ private val PROJECT_SSH_WITH_BRACKETS = capture(r = """project:\{\^?(.*)\}""")
+ private val PROJECT_SSH_NO_SPACES = capture(r = """project(?::|.)(.*)""")
+ private val PROJECT_SSH_PACK = capture(r = """(?:git-receive-pack|git-upload-pack)\.\/(.+)""")
+ private val PROJECT_SSH_REPLICATION_START = capture(r = """replication\.start\.([\.\/a-zA-Z0-9]+(?:-[a-zA-Z0-9\.]+)*)(?:\.|\s|$)""")
+ private val PROJECT_REST_API_CHANGES_SEGMENT = capture(r = """changes\/([^\/]+)~""")
+ private val PROJECT_REST_API_PROJECTS_SEGMENT = capture(r = """projects\/([^\/]+)""")
+ private val PROJECT_HTTP_PACK_INFO_REF = capture(r = """https?:\/\/(.+)\/info\/refs\?service=(?:git-upload-pack|git-receive-pack)""")
+ private val PROJECT_HTTP_PACK = capture(r = """https?:\/\/(.+)\/(?:git-upload-pack|git-receive-pack)""")
+
+ private val NO_PROJECT_RELATED_COMMANDS = capture(r = """(LOGIN|LOGOUT|AUTH)""")
+
+ private def existProject(id: GerritProjectName): Boolean = projects.get(id).isDefined
+
+ // Helper method to find a project at the start of a string.
+ // For example, the string `redhat-performance/quads.github.com.status:open.foo:bar` will match the project
+ // redhat-performance/quads.github.com, if that project exists in `gerritProject`
+ private def findProjectStringAtStart(rawProject: String, sep: Char = '.'): Option[String] =
+ rawProject.split(sep).foldLeft(List.empty[String]) { case (acc, token) =>
+ acc.headOption.map { t => (t + sep + token) +: acc }.getOrElse(List(token))
+ }.find(existProject)
+
+ // Helper method to find a project at the end of a string.
+ // For example, the string `gerrit.foo.bar.baz.redhat-performance/quads.github.com` will match the project
+ // redhat-performance/quads.github.com, if that project exists in `gerritProject`
+ private def findProjectStringAtEnd(rawProject: String, sep: Char = '.'): Option[String] =
+ rawProject.split(sep).foldRight(List.empty[String]) { case (token, acc) =>
+ acc.headOption.map { t => (token + sep + t) +: acc }.getOrElse(List(token))
+ }.find(existProject)
+
+ def extractProject(what: String, accessPath: String): Option[String] = accessPath match {
+ case _ if matches(NO_PROJECT_RELATED_COMMANDS, what) =>
+ None
+ case "SSH_COMMAND" =>
+ extractGroup(PROJECT_SSH_WITH_SPACES, what)
+ .orElse(extractGroup(PROJECT_SSH_WITH_BRACKETS, what))
+ .orElse(extractGroup(PROJECT_SSH_PACK, what))
+ .orElse(extractGroup(PROJECT_SSH_REPLICATION_START, what))
+ .orElse(extractGroup(PROJECT_SSH_NO_SPACES, what).flatMap(findProjectStringAtStart(_)))
+ .orElse(findProjectStringAtStart(what))
+ .orElse(findProjectStringAtEnd(what))
+ case "REST_API" | "UNKNOWN" =>
+ extractGroup(PROJECT_REST_API_CHANGES_SEGMENT, what)
+ .orElse(extractGroup(PROJECT_REST_API_PROJECTS_SEGMENT, what))
+ .map(URLDecoder.decode(_, "UTF-8"))
+ case "GIT" =>
+ extractGroup(PROJECT_HTTP_PACK_INFO_REF, what)
+ .orElse(extractGroup(PROJECT_HTTP_PACK, what))
+ .flatMap(findProjectStringAtEnd(_, '/'))
+ case unexpected =>
+ logger.warn(s"Unexpected access path '$unexpected' encountered when extracting project from '$what'")
+ None
+ }
+}
+
+object GerritProjects extends LazyLogging {
+
+ val empty = GerritProjects(Map.empty[GerritProjectName, GerritProject])
+
+ implicit private val formats = DefaultFormats
+
+ def loadProjects(gerritConnectivity: GerritConnectivity, gerritUrl: String): Try[GerritProjects] = {
+ val PAGE_SIZE = 500
+ logger.debug(s"Loading gerrit projects...")
+
+ val baseUrl = s"""$gerritUrl/projects/?n=$PAGE_SIZE&query=state%3Aactive%20OR%20state%3Aread-only"""
+
+ @tailrec
+ def loopThroughPages(more: Boolean, triedAcc: Try[GerritProjects] = Success(empty)): Try[GerritProjects] = {
+ if (!more)
+ triedAcc
+ else {
+ val acc = triedAcc.get
+
+ val url = baseUrl + s"&S=${acc.projects.size}"
+ val accountsJsonPage = gerritConnectivity.getContentFromApi(url).dropGerritPrefix.mkString
+
+ logger.debug(s"Getting gerrit projects - start: ${acc.projects.size}")
+
+ val pageInfo = Try(parse(accountsJsonPage)).map { jsMapProjects =>
+ val thisPageProjects = jsMapProjects.extract[List[GerritProject]].map(prj => prj.name -> prj)
+ (thisPageProjects.size == PAGE_SIZE, acc.copy(projects = acc.projects ++ thisPageProjects))
+ }
+
+ pageInfo match {
+ case Success((newMore, newGerritProjects)) => loopThroughPages(newMore, Success(newGerritProjects))
+ case Failure(exception) => loopThroughPages(more=false, Failure(exception))
+ }
+ }
+ }
+ loopThroughPages(more=true)
+ }
+}
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/broadcast/package.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/broadcast/package.scala
index 0624eae..e06aa27 100644
--- a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/broadcast/package.scala
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/broadcast/package.scala
@@ -16,5 +16,6 @@
package object broadcast {
type GerritAccountId = Int
+ type GerritProjectName = String
type GerritUserIdentifier = String
}
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/job/Main.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/job/Main.scala
index 8690c58..0557983 100644
--- a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/job/Main.scala
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/job/Main.scala
@@ -14,7 +14,7 @@
package com.gerritforge.analytics.auditlog.job
-import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUserInfo, GerritUserIdentifiers}
+import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUserInfo, GerritProjects, GerritUserIdentifiers}
import com.gerritforge.analytics.auditlog.model.ElasticSearchFields._
import com.gerritforge.analytics.auditlog.model._
import com.gerritforge.analytics.auditlog.range.TimeRange
@@ -30,6 +30,17 @@
CommandLineArguments(args) match {
case Some(config) =>
+
+ val tryProjects = GerritProjects.loadProjects(
+ new GerritConnectivity(config.gerritUsername, config.gerritPassword, config.ignoreSSLCert.getOrElse(false)),
+ config.gerritUrl.get
+ )
+
+ if (tryProjects.isFailure) {
+ logger.error("Error loading public projects", tryProjects.failed.get)
+ sys.exit(1)
+ }
+
val tryUserIdentifiers = GerritUserIdentifiers.loadAccounts(
new GerritConnectivity(config.gerritUsername, config.gerritPassword, config.ignoreSSLCert.getOrElse(false)),
config.gerritUrl.get
@@ -48,7 +59,13 @@
spark
.getEventsFromPath(config.eventsPath.get)
- .transformEvents(tryUserIdentifiers.get, triedAdditionalUserInfo.get,config.eventsTimeAggregation.get, TimeRange(config.since, config.until))
+ .transformEvents(
+ tryUserIdentifiers.get,
+ triedAdditionalUserInfo.get,
+ tryProjects.get,
+ config.eventsTimeAggregation.get,
+ TimeRange(config.since, config.until)
+ )
.saveToEs(s"${config.elasticSearchIndex.get}/$DOCUMENT_TYPE")
case None =>
@@ -56,5 +73,4 @@
sys.exit(1)
}
-}
-
+}
\ No newline at end of file
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/model/ElasticSearchFields.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/model/ElasticSearchFields.scala
index 1980385..59114c2 100644
--- a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/model/ElasticSearchFields.scala
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/model/ElasticSearchFields.scala
@@ -23,6 +23,7 @@
val ACCESS_PATH_FIELD = "access_path"
val RESULT_FIELD = "result"
val USER_TYPE_FIELD = "user_type"
+ val PROJECT_FIELD = "project"
val FACETING_FIELDS = List(
TIME_BUCKET_FIELD,
@@ -32,6 +33,7 @@
ACCESS_PATH_FIELD,
COMMAND_FIELD,
COMMAND_ARGS_FIELD,
+ PROJECT_FIELD,
RESULT_FIELD
)
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/AuditLogsTransformer.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/AuditLogsTransformer.scala
index d9138bc..3fef687 100644
--- a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/AuditLogsTransformer.scala
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/AuditLogsTransformer.scala
@@ -14,7 +14,7 @@
package com.gerritforge.analytics.auditlog.spark
-import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUserInfo, AdditionalUsersInfo, GerritUserIdentifiers}
+import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUsersInfo, GerritProjects, GerritUserIdentifiers}
import com.gerritforge.analytics.auditlog.model.AuditEvent
import com.gerritforge.analytics.auditlog.model.ElasticSearchFields._
import com.gerritforge.analytics.auditlog.range.TimeRange
@@ -23,10 +23,15 @@
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}
-case class AuditLogsTransformer(gerritIdentifiers: GerritUserIdentifiers = GerritUserIdentifiers.empty, additionalUsersInfo: AdditionalUsersInfo = AdditionalUsersInfo.empty)(implicit spark: SparkSession) {
+case class AuditLogsTransformer(
+ gerritIdentifiers: GerritUserIdentifiers = GerritUserIdentifiers.empty,
+ additionalUsersInfo: AdditionalUsersInfo = AdditionalUsersInfo.empty,
+ gerritProjects: GerritProjects = GerritProjects.empty
+)(implicit spark: SparkSession) {
private val broadcastUserIdentifiers = spark.sparkContext.broadcast(gerritIdentifiers)
private val broadcastAdditionalUsersInfo = spark.sparkContext.broadcast(additionalUsersInfo)
+ private val broadcastGerritProjects = spark.sparkContext.broadcast(gerritProjects)
def transform(auditEventsRDD: RDD[AuditEvent], timeAggregation: String, timeRange: TimeRange = TimeRange.always): DataFrame =
auditEventsRDD
@@ -37,5 +42,6 @@
.withTimeBucketColum(TIME_BUCKET_FIELD, timeAggregation)
.withCommandColumns(COMMAND_FIELD, COMMAND_ARGS_FIELD)
.withUserTypeColumn(USER_TYPE_FIELD, broadcastAdditionalUsersInfo.value)
+ .withProjectColumn(PROJECT_FIELD, broadcastGerritProjects.value)
.aggregateNumEventsColumn(NUM_EVENTS_FIELD, FACETING_FIELDS)
}
\ No newline at end of file
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/dataframe/ops/DataFrameOps.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/dataframe/ops/DataFrameOps.scala
index 80f2e24..927556a 100644
--- a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/dataframe/ops/DataFrameOps.scala
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/dataframe/ops/DataFrameOps.scala
@@ -14,11 +14,11 @@
package com.gerritforge.analytics.auditlog.spark.dataframe.ops
-import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUsersInfo, GerritUserIdentifiers}
+import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUsersInfo, GerritProjects, GerritUserIdentifiers}
import com.gerritforge.analytics.auditlog.spark.sql.udf.SparkExtractors.{extractCommandArgumentsUDF, extractCommandUDF}
-import org.apache.spark.sql.{Column, DataFrame}
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.functions.{udf, _}
+import org.apache.spark.sql.{Column, DataFrame}
import scala.util.Try
@@ -53,6 +53,13 @@
.withColumn(commandArgsCol, extractCommandArgumentsUDF(col("what"), col("access_path")))
}
+ def withProjectColumn(projectCol: String, gerritProjects: GerritProjects): DataFrame = {
+ def extractProjectUDF: UserDefinedFunction = udf((what: String, accessPath: String) => gerritProjects.extractProject(what, accessPath))
+
+ dataFrame
+ .withColumn(projectCol, extractProjectUDF(col("what"), col("access_path")))
+ }
+
def withUserTypeColumn(commandCol: String, additionalUsersInfo: AdditionalUsersInfo): DataFrame = {
def extractUserType: UserDefinedFunction = udf((who: Int) => additionalUsersInfo.getUserType(who))
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/rdd/ops/SparkRDDOps.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/rdd/ops/SparkRDDOps.scala
index 22efd88..bfc466e 100644
--- a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/rdd/ops/SparkRDDOps.scala
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/rdd/ops/SparkRDDOps.scala
@@ -14,7 +14,7 @@
package com.gerritforge.analytics.auditlog.spark.rdd.ops
-import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUsersInfo, GerritUserIdentifiers}
+import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUsersInfo, GerritProjects, GerritUserIdentifiers}
import com.gerritforge.analytics.auditlog.model.AuditEvent
import com.gerritforge.analytics.auditlog.range.TimeRange
import com.gerritforge.analytics.auditlog.spark.AuditLogsTransformer
@@ -29,10 +29,15 @@
def toJsonString: RDD[String] = rdd.map(_.toJsonString)
- def transformEvents(gerritUserIdentifiers: GerritUserIdentifiers, additionalUsersInfo: AdditionalUsersInfo, timeAggregation: String, timeRange: TimeRange)
- (implicit spark: SparkSession): DataFrame = {
+ def transformEvents(
+ gerritUserIdentifiers: GerritUserIdentifiers,
+ additionalUsersInfo: AdditionalUsersInfo,
+ gerritProjects: GerritProjects,
+ timeAggregation: String,
+ timeRange: TimeRange
+ )(implicit spark: SparkSession): DataFrame = {
- AuditLogsTransformer(gerritUserIdentifiers, additionalUsersInfo)
+ AuditLogsTransformer(gerritUserIdentifiers, additionalUsersInfo, gerritProjects)
.transform(rdd, timeAggregation, timeRange)
}
}
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/sql/udf/SparkExtractors.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/sql/udf/SparkExtractors.scala
index 013213f..6abe234 100644
--- a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/sql/udf/SparkExtractors.scala
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/spark/sql/udf/SparkExtractors.scala
@@ -14,26 +14,24 @@
package com.gerritforge.analytics.auditlog.spark.sql.udf
+import com.gerritforge.analytics.auditlog.util.RegexUtil
import com.typesafe.scalalogging.LazyLogging
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.functions.udf
-import scala.util.matching.Regex
+case object SparkExtractors extends LazyLogging with RegexUtil {
-case object SparkExtractors extends LazyLogging {
- private val GERRIT_SSH_COMMAND = new Regex("""^(.+?)\.""", "capture")
- private val GERRIT_SSH_COMMAND_ARGUMENTS = new Regex("""^.+?\.(.+)""", "capture")
+ // regular expressions to extract commands
+ private val GERRIT_SSH_COMMAND = capture(r = """^(.+?)\.""")
+ private val GIT_COMMAND = capture(r = """.*(git-upload-pack|git-receive-pack)""")
- private val GIT_COMMAND = new Regex(""".*(git-upload-pack|git-receive-pack)""", "capture")
- private val GIT_SSH_COMMAND_ARGUMENTS = new Regex("""git-(?:upload|receive)-pack\.(.+)""", "capture")
- private val GIT_HTTP_COMMAND_ARGUMENTS = new Regex("""(^http.*)""", "capture")
+ // regular expressions to extract command arguments
+ private val GERRIT_SSH_COMMAND_ARGUMENTS = capture(r = """^.+?\.(.+)""")
+ private val GIT_SSH_COMMAND_ARGUMENTS = capture(r = """git-(?:upload|receive)-pack\.(.+)""")
+ private val GIT_HTTP_COMMAND_ARGUMENTS = capture(r = """(^http.*)""")
val FAILED_SSH_AUTH = "FAILED_SSH_AUTH"
- private def extractOrElse(rx: Regex, target: String, default: String): String = extractGroup(rx, target).getOrElse(default)
-
- private def extractGroup(rx: Regex, target: String): Option[String] = rx.findAllMatchIn(target).toList.headOption.map(_.group("capture"))
-
def extractCommand(what: String, accessPath: String, httpMethod: String = null): String = accessPath match {
case "SSH_COMMAND" => extractOrElse(GERRIT_SSH_COMMAND, what, what)
case "GIT" => extractOrElse(GIT_COMMAND, what, what)
diff --git a/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/util/RegexUtil.scala b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/util/RegexUtil.scala
new file mode 100644
index 0000000..f2cbce3
--- /dev/null
+++ b/auditlog/src/main/scala/com/gerritforge/analytics/auditlog/util/RegexUtil.scala
@@ -0,0 +1,27 @@
+// Copyright (C) 2019 GerritForge Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.gerritforge.analytics.auditlog.util
+import scala.util.matching.Regex
+
+trait RegexUtil {
+ def capture(r: String) = new Regex(r, "capture")
+
+ def matches(regex: Regex, string: String): Boolean = regex.findFirstIn(string).isDefined
+
+ def extractOrElse(rx: Regex, target: String, default: String): String = extractGroup(rx, target).getOrElse(default)
+
+ def extractGroup(rx: Regex, target: String): Option[String] = rx.findAllMatchIn(target).toList.headOption.map(_.group("capture"))
+
+}
diff --git a/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/AuditLogsTransformerSpec.scala b/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/AuditLogsTransformerSpec.scala
index b0a09a2..3b5fefc 100644
--- a/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/AuditLogsTransformerSpec.scala
+++ b/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/AuditLogsTransformerSpec.scala
@@ -16,7 +16,7 @@
import java.sql
import com.gerritforge.analytics.SparkTestSupport
-import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUserInfo, AdditionalUsersInfo, GerritUserIdentifiers}
+import com.gerritforge.analytics.auditlog.broadcast._
import com.gerritforge.analytics.auditlog.model.{ElasticSearchFields, HttpAuditEvent, SshAuditEvent}
import com.gerritforge.analytics.auditlog.spark.AuditLogsTransformer
import com.gerritforge.analytics.support.ops.CommonTimeOperations._
@@ -42,6 +42,7 @@
anonymousHttpAuditEvent.accessPath.get,
GIT_UPLOAD_PACK,
anonymousHttpAuditEvent.what,
+ null, // no project
anonymousHttpAuditEvent.result,
expectedAggregatedCount
)
@@ -63,6 +64,7 @@
authenticatedHttpAuditEvent.accessPath.get,
GIT_UPLOAD_PACK,
authenticatedHttpAuditEvent.what,
+ null, // no project
authenticatedHttpAuditEvent.result,
expectedAggregatedCount
)
@@ -87,6 +89,7 @@
authenticatedHttpAuditEvent.accessPath.get,
GIT_UPLOAD_PACK,
authenticatedHttpAuditEvent.what,
+ null, // no project
authenticatedHttpAuditEvent.result,
expectedAggregatedCount
)
@@ -108,6 +111,7 @@
sshAuditEvent.accessPath.get,
SSH_GERRIT_COMMAND,
SSH_GERRIT_COMMAND_ARGUMENTS,
+ null, // no project
sshAuditEvent.result,
expectedAggregatedCount
)
@@ -129,6 +133,7 @@
sshAuditEvent.accessPath.get,
SSH_GERRIT_COMMAND,
SSH_GERRIT_COMMAND_ARGUMENTS,
+ null, // no project
sshAuditEvent.result,
expectedAggregatedCount
)
@@ -152,6 +157,7 @@
sshAuditEvent.accessPath.get,
SSH_GERRIT_COMMAND,
SSH_GERRIT_COMMAND_ARGUMENTS,
+ null, // no project
sshAuditEvent.result,
expectedAggregatedCount
),
@@ -163,6 +169,7 @@
sshAuditEvent.accessPath.get,
SSH_GERRIT_COMMAND,
SSH_GERRIT_COMMAND_ARGUMENTS,
+ null, // no project
sshAuditEvent.result,
expectedAggregatedCount
)
@@ -187,6 +194,7 @@
sshAuditEvent.accessPath.get,
SSH_GERRIT_COMMAND,
SSH_GERRIT_COMMAND_ARGUMENTS,
+ null, // no project
sshAuditEvent.result,
expectedSshAggregatedCount
),
@@ -198,6 +206,7 @@
authenticatedHttpAuditEvent.accessPath.get,
GIT_UPLOAD_PACK,
authenticatedHttpAuditEvent.what,
+ null, // no project
authenticatedHttpAuditEvent.result,
expectedHttpAggregatedCount
)
@@ -237,6 +246,26 @@
dataFrame.collect.length shouldBe 1
dataFrame.collect.head.getAs[String](ElasticSearchFields.USER_TYPE_FIELD) shouldBe userType
}
+
+ it should "extract gerrit project from an http event" in {
+ val events = Seq(authenticatedHttpAuditEvent)
+
+ val dataFrame = AuditLogsTransformer(gerritProjects = GerritProjects(Map(project -> GerritProject(project))))
+ .transform(sc.parallelize(events), timeAggregation="hour")
+
+ dataFrame.collect.length shouldBe 1
+ dataFrame.collect.head.getAs[String](ElasticSearchFields.PROJECT_FIELD) shouldBe project
+ }
+
+ it should "extract gerrit project from an ssh event" in {
+ val events = Seq(sshAuditEvent)
+
+ val dataFrame = AuditLogsTransformer(gerritProjects = GerritProjects(Map(project -> GerritProject(project))))
+ .transform(sc.parallelize(events), timeAggregation="hour")
+
+ dataFrame.collect.length shouldBe 1
+ dataFrame.collect.head.getAs[String](ElasticSearchFields.PROJECT_FIELD) shouldBe project
+ }
}
object AuditLogsTransformerSpec {
diff --git a/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/TestFixtures.scala b/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/TestFixtures.scala
index 7c5b65e..a69ac29 100644
--- a/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/TestFixtures.scala
+++ b/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/TestFixtures.scala
@@ -23,13 +23,14 @@
val timeAtStart = 1544802407000L
val elapsed = 12
val uuid = "audit:5f10fea5-35d1-4252-b86f-99db7a9b549b"
+ val project = "Mirantis/tcp-qa"
val GIT_UPLOAD_PACK = "git-upload-pack"
val httpMethod = "GET"
val httpStatus = "200"
- val httpWhat=s"https://review.gerrithub.io/Mirantis/tcp-qa/$GIT_UPLOAD_PACK"
+ val httpWhat=s"https://review.gerrithub.io/$project/$GIT_UPLOAD_PACK"
val anonymousHttpAuditEvent = HttpAuditEvent(Some(gitAccessPath), httpMethod, httpStatus, sessionId, None, timeAtStart, httpWhat, elapsed, uuid)
val authenticatedHttpAuditEvent: HttpAuditEvent = anonymousHttpAuditEvent.copy(who=Some(userId))
@@ -37,7 +38,7 @@
val sshAccessPath = "SSH_COMMAND"
val sshResult = "0"
val SSH_GERRIT_COMMAND = "gerrit"
- val SSH_GERRIT_COMMAND_ARGUMENTS = "stream-events.-s.patchset-created.-s.change-restored.-s.comment-added"
+ val SSH_GERRIT_COMMAND_ARGUMENTS = s"query.--format.json.--current-patch-set.project:$project"
val sshWhat = s"$SSH_GERRIT_COMMAND.$SSH_GERRIT_COMMAND_ARGUMENTS"
diff --git a/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/broadcast/GerritProjectsSpec.scala b/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/broadcast/GerritProjectsSpec.scala
new file mode 100644
index 0000000..57598e4
--- /dev/null
+++ b/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/broadcast/GerritProjectsSpec.scala
@@ -0,0 +1,238 @@
+// Copyright (C) 2019 GerritForge Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.gerritforge.analytics.auditlog.broadcast
+import java.net.URLEncoder
+
+import org.scalatest.{FlatSpec, Matchers}
+
+class GerritProjectsSpec extends FlatSpec with Matchers {
+ behavior of "extract project from SSH commands"
+
+ it should "extract nothing where project could not be found in existing projects" in {
+ val project = "spdk/spdk.github.io"
+ val what = s"gerrit.query.--format.json.--current-patch-set.project:$project"
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) shouldBe empty
+ }
+
+ it should "extract nothing where command does not contain project" in {
+ val what = s"gerrit.command.not.targeting.any.project"
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) shouldBe empty
+ }
+
+ it should "extract nothing where command is LOGIN" in {
+ val what = s"LOGIN"
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) shouldBe empty
+ }
+
+ it should "extract a project from a gerrit query command, where the project is delimited by a space" in {
+ val project = "spdk/spdk.github.io"
+ val what = s"gerrit.query.--format.json.--current-patch-set.project:$project commit:7f4e8237114e957e727707ab6549d482d40d7c92 NOT is:draft"
+ val accessPath = "SSH_COMMAND"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract a project from a gerrit query command, where the project is wrapped by curly brackets" in {
+ val project = "dalihub/dali-scene-template"
+ val what = s"gerrit.query.project:{^$project}.--format=JSON.--all-approvals.--comments.--all-reviewers"
+ val accessPath = "SSH_COMMAND"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract a project from a gerrit query command, where the project is dot '.' joined with its arguments" in {
+ val project = "redhat-performance/quads"
+ val what = s"gerrit.query.--format=JSON.project:$project.status:open"
+ val accessPath = "SSH_COMMAND"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract a project from a gerrit index command" in {
+ val project = "Accumbo/iOS"
+ val what = s"gerrit.index.project.$project"
+ val accessPath = "SSH_COMMAND"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract a project included in parenthesis" in {
+
+ val specificProject = s"spdk/spdk.gerrithub.io"
+
+ val what = s"""gerrit.query.--format.json.--current-patch-set.(project:$specificProject) commit:"d2da91b1878f7b7ccd3f30f766ffa2f35cc2011d" NOT is:draft"""
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) should contain(specificProject)
+ }
+
+ it should "extract the most specific project when multiple projects names are substrings of each other" in {
+ val genericProject = "redhat-performance/quads"
+
+ val specificProject = s"$genericProject.github.com"
+
+ val what = s"gerrit.query.--format=JSON.project:$specificProject.status:open"
+ val accessPath = "SSH_COMMAND"
+
+ val existingProjects = GerritProjects(Map(
+ genericProject -> GerritProject(genericProject),
+ specificProject -> GerritProject(specificProject)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(specificProject)
+ }
+
+ it should "extract a project from a receive pack command" in {
+ val project = "att-comdev/prometheus-openstack-exporter"
+ val what = s"git-receive-pack./$project"
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract a project from an upload pack command" in {
+ val project = "dalihub/dali-toolkit"
+ val what = s"git-upload-pack./$project"
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract a project from a replication start command that has some arguments" in {
+ val project = "singh4java/springboot"
+ val what = s"replication.start.$project.--wait.--now"
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract a project from a replication start command that has no arguments" in {
+ val project = "singh4java/springboot.github.com"
+ val what = s"replication.start.$project"
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract nothing when replication start refers to no project" in {
+ val what = s"replication.start.--help"
+ val accessPath = "SSH_COMMAND"
+
+ GerritProjects.empty.extractProject(what, accessPath) shouldBe empty
+ }
+
+ it should "extract a project when is found in at the end of the command, even though the word 'project' is not part of the command" in {
+ val project = "Sonos-Inc/old97s"
+ val what = s"gerrit.query.--format=JSON.--current-patch-set.$project"
+ val accessPath = "SSH_COMMAND"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+ behavior of "REST API calls"
+
+ it should "extract the decoded project name from a /changes/ url" in {
+ val project = "true-wealth/b2b"
+ val what = s"/changes/${URLEncoder.encode(project, "UTF-8")}~425967/revisions/5/actions"
+ val accessPath = "REST_API"
+
+ GerritProjects.empty.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract the decoded project name from a /projects/ url" in {
+ val project = "VidScale/UDNTests"
+ val what = s"/projects/${URLEncoder.encode(project, "UTF-8")}"
+ val accessPath = "REST_API"
+
+ GerritProjects.empty.extractProject(what, accessPath) should contain(project)
+ }
+
+ behavior of "GIT HTTP calls"
+
+ it should "extract the project from an info ref upload pack target" in {
+ val project = "dushyantRathore/Jenkins_Test"
+ val what = s"https://review.gerrithub.io/$project/info/refs?service=git-upload-pack"
+ val accessPath = "GIT"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract the project from an info ref receive pack target" in {
+ val project = "dushyantRathore/Jenkins_Test"
+ val what = s"https://review.gerrithub.io/$project/info/refs?service=git-receive-pack"
+ val accessPath = "GIT"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract the project from an upload pack target" in {
+ val project = "att-comdev/cicd"
+ val what = s"https://review.gerrithub.io/$project/git-upload-pack"
+ val accessPath = "GIT"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+ it should "extract the project from a receive pack target" in {
+ val project = "att-comdev/cicd"
+ val what = s"https://review.gerrithub.io/$project/git-receive-pack"
+ val accessPath = "GIT"
+
+ val existingProjects = GerritProjects(Map(
+ project -> GerritProject(project)
+ ))
+
+ existingProjects.extractProject(what, accessPath) should contain(project)
+ }
+
+}
diff --git a/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/spark/sql/udf/SparkExtractorsSpec.scala b/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/spark/sql/udf/SparkExtractorsSpec.scala
index 4ec9210..cbf6762 100644
--- a/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/spark/sql/udf/SparkExtractorsSpec.scala
+++ b/auditlog/src/test/scala/com/gerritforge/analytics/auditlog/spark/sql/udf/SparkExtractorsSpec.scala
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 GerritForge Ltd
+// Copyright (C) 2019 GerritForge Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -186,5 +186,4 @@
SparkExtractors.extractCommandArguments(what, accessPath) shouldBe empty
}
-
}
\ No newline at end of file