blob: 14ca58bfcc0e88178ee47d8bfbc7d462dff8e9cc [file] [log] [blame]
// Copyright (C) 2018 GerritForge Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.gerritforge.analytics.auditlog.spark
import com.gerritforge.analytics.auditlog.broadcast.{AdditionalUsersInfo, GerritProjects, GerritUserIdentifiers}
import com.gerritforge.analytics.auditlog.model.{AggregatedAuditEvent, AuditEvent}
import com.gerritforge.analytics.auditlog.model.ElasticSearchFields._
import com.gerritforge.analytics.auditlog.range.TimeRange
import com.gerritforge.analytics.auditlog.spark.dataframe.ops.DataFrameOps._
import com.gerritforge.analytics.auditlog.spark.rdd.ops.SparkRDDOps._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, SparkSession}
case class AuditLogsTransformer(
gerritIdentifiers: GerritUserIdentifiers = GerritUserIdentifiers.empty,
additionalUsersInfo: AdditionalUsersInfo = AdditionalUsersInfo.empty,
gerritProjects: GerritProjects = GerritProjects.empty
)(implicit spark: SparkSession) {
private val broadcastUserIdentifiers = spark.sparkContext.broadcast(gerritIdentifiers)
private val broadcastAdditionalUsersInfo = spark.sparkContext.broadcast(additionalUsersInfo)
private val broadcastGerritProjects = spark.sparkContext.broadcast(gerritProjects)
def transform(auditEventsRDD: RDD[AuditEvent], timeAggregation: String, timeRange: TimeRange = TimeRange.always): Dataset[AggregatedAuditEvent] = {
import spark.implicits._
auditEventsRDD
.filterWithinRange(TimeRange(timeRange.since, timeRange.until))
.toJsonString
.toJsonTableDataFrame
.hydrateWithUserIdentifierColumn(USER_IDENTIFIER_FIELD, broadcastUserIdentifiers.value)
.withTimeBucketColumn(TIME_BUCKET_FIELD, timeAggregation)
.withCommandColumns(COMMAND_FIELD, COMMAND_ARGS_FIELD)
.withSubCommandColumns(SUB_COMMAND_FIELD)
.withUserTypeColumn(USER_TYPE_FIELD, broadcastAdditionalUsersInfo.value)
.withProjectColumn(PROJECT_FIELD, broadcastGerritProjects.value)
.aggregateNumEventsColumn(NUM_EVENTS_FIELD, FACETING_FIELDS)
.as[AggregatedAuditEvent]
}
}