blob: 3cc989982bae0aa12c9bb4323df792cc3fb06208 [file] [log] [blame]
// Copyright (C) 2018 GerritForge Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.gerritforge.analytics.auditlog.spark.sql.udf
import com.gerritforge.analytics.auditlog.util.RegexUtil
import com.typesafe.scalalogging.LazyLogging
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.functions.udf
case object SparkExtractors extends LazyLogging with RegexUtil {
// regular expressions to extract commands
private val GERRIT_SSH_COMMAND = capture(r = """^(.+?)\.""")
private val GIT_COMMAND = capture(r = """.*(git-upload-pack|git-receive-pack)""")
// regular expressions to extract command arguments
private val GERRIT_SSH_COMMAND_ARGUMENTS = capture(r = """^.+?\.(.+)""")
private val GIT_SSH_COMMAND_ARGUMENTS = capture(r = """git-(?:upload|receive)-pack\.(.+)""")
private val GIT_HTTP_COMMAND_ARGUMENTS = capture(r = """(^http.*)""")
// regular expressions to extract sub-commands
// Rest API sub-command example: what = /config/server/version -> sub-command: config
private val REST_API_SUB_COMMAND = capture("""^\/(?:a\/)?(.*?)(?:\/|\s|$)""")
// SSH sub-command example: what = gerrit.plugin.reload.analytics -> sub-command: plugin
private val SSH_SUB_COMMAND = capture("""^.*?\.(.*?)(?:\.|\s|$)""")
val FAILED_SSH_AUTH = "FAILED_SSH_AUTH"
def extractCommand(what: String, accessPath: String, httpMethod: String = null): String = accessPath match {
case "SSH_COMMAND" => extractOrElse(GERRIT_SSH_COMMAND, what, what)
case "GIT" => extractOrElse(GIT_COMMAND, what, what)
case "REST_API"|"UNKNOWN" => Option(httpMethod).getOrElse(what)
case "JSON_RPC" => what
case null if what == "AUTH" => FAILED_SSH_AUTH
case unexpected =>
logger.warn(s"Unexpected access path '$unexpected' encountered when extracting command from '$what'")
what
}
def extractCommandUDF: UserDefinedFunction = udf((rawCommand: String, accessPath: String, httpMethod: String) => extractCommand(rawCommand, accessPath, httpMethod))
def extractCommandArguments(what: String, accessPath: String): Option[String] = accessPath match {
case "SSH_COMMAND" => extractGroup(GERRIT_SSH_COMMAND_ARGUMENTS, what)
case "GIT" => Option(extractGroup(GIT_SSH_COMMAND_ARGUMENTS, what).getOrElse(extractOrElse(GIT_HTTP_COMMAND_ARGUMENTS, what, null)))
case "REST_API"|"UNKNOWN" => Some(what)
case "JSON_RPC" => None
case null if what == "AUTH" => None
case unexpected =>
logger.warn(s"Unexpected access path '$unexpected' encountered when extracting command arguments from '$what'")
None
}
def extractCommandArgumentsUDF: UserDefinedFunction = udf((rawCommand: String, accessPath: String) => extractCommandArguments(rawCommand, accessPath))
def extractSubCommand(what: String, accessPath: String): Option[String] = accessPath match {
case "REST_API"|"UNKNOWN" => Some(extractOrElse(REST_API_SUB_COMMAND, what, what))
case "SSH_COMMAND" => Some(extractOrElse(SSH_SUB_COMMAND, what, what))
case "GIT" => None
case "JSON_RPC" => None
case unexpected =>
logger.warn(s"Unexpected access path '$unexpected' encountered when extracting sub-command from '$what'")
None
}
def extractSubCommandUDF: UserDefinedFunction = udf((rawCommand: String, accessPath: String) => extractSubCommand(rawCommand, accessPath))
}