Add support for "is_merge" field to the ETL and Kibana configuration
Support the "is_merge" flag to differentiate between merge and non-merge
commits.
Add extra logging and resolve problem of HDFS libraries required when
saving output to JSON files
Replaced System.out prints with logger.info calls
Jira-Id: GERICS-610
Change-Id: I57c2f4c6eeea7424767427fcb5e1b17c05212a59
diff --git a/build.sbt b/build.sbt
index cb38e0e..f59493c 100644
--- a/build.sbt
+++ b/build.sbt
@@ -15,6 +15,8 @@
// json4s still needed by GerritProjects
"org.json4s" %% "json4s-native" % "3.2.11",
+ "com.typesafe.scala-logging" %% "scala-logging" % "3.7.2",
+
"com.github.scopt" %% "scopt" % "3.6.0",
"org.scalactic" %% "scalactic" % "3.0.1" % "test",
"org.scalatest" %% "scalatest" % "3.0.1" % "test"
diff --git a/dashboard-importer/kibana-config/BB_index-pattern.json b/dashboard-importer/kibana-config/BB_index-pattern.json
index aa5ca6e..0e5df8b 100644
--- a/dashboard-importer/kibana-config/BB_index-pattern.json
+++ b/dashboard-importer/kibana-config/BB_index-pattern.json
@@ -7,6 +7,6 @@
"title": "gerrit",
"timeFieldName": "last_commit_date",
"notExpandable": true,
- "fields": "[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"added_lines\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"author\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"author.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"day\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"deleted_lines\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"email\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"email.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"email_alias\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"email_alias.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"hour\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"last_commit_date\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"month\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"name.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"num_commits\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"num_files\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"organization\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"organization.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"project\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"project.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"year\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"changes_per_commit\",\"type\":\"number\",\"count\":0,\"scripted\":true,\"script\":\"if (doc['num_commits'].value > 0) { \\n return ((float) (doc['added_lines'].value + doc['deleted_lines'].value) / (float) doc['num_commits'].value);\\n} else {\\n return 0;\\n}\",\"lang\":\"painless\",\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false}]"
+ "fields": "[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"added_lines\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"author\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"author.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"day\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"deleted_lines\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"email\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"email.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"email_alias\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"email_alias.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"hour\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"last_commit_date\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"month\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"name\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"name.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"num_commits\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"is_merge\",\"type\":\"boolean\",\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"num_files\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"organization\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"organization.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"project\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"project.keyword\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"year\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"changes_per_commit\",\"type\":\"number\",\"count\":0,\"scripted\":true,\"script\":\"if (doc['num_commits'].value > 0) { \\n return ((float) (doc['added_lines'].value + doc['deleted_lines'].value) / (float) doc['num_commits'].value);\\n} else {\\n return 0;\\n}\",\"lang\":\"painless\",\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false}]"
}
}
diff --git a/dashboard-importer/kibana-config/KK_dashboard_projects.json b/dashboard-importer/kibana-config/KK_dashboard_projects.json
index 43c3f13..99c538d 100644
--- a/dashboard-importer/kibana-config/KK_dashboard_projects.json
+++ b/dashboard-importer/kibana-config/KK_dashboard_projects.json
@@ -1,17 +1,17 @@
{
- "_id": "11bacbf0-e4a9-11e7-b91f-5fb563b02e79",
- "_type": "dashboard",
- "_source": {
- "title": "Projects dashboard",
- "hits": 0,
- "description": "",
- "panelsJSON": "[{\"size_x\":6,\"size_y\":3,\"panelIndex\":1,\"type\":\"visualization\",\"id\":\"0d0d5ae0-d9b0-11e7-bc06-5dd1310bbecc\",\"col\":1,\"row\":1},{\"size_x\":6,\"size_y\":3,\"panelIndex\":2,\"type\":\"visualization\",\"id\":\"63c8f590-d9b2-11e7-bc06-5dd1310bbecc\",\"col\":7,\"row\":1},{\"size_x\":6,\"size_y\":3,\"panelIndex\":3,\"type\":\"visualization\",\"id\":\"cc5678a0-d9b0-11e7-bc06-5dd1310bbecc\",\"col\":1,\"row\":4},{\"size_x\":6,\"size_y\":3,\"panelIndex\":4,\"type\":\"visualization\",\"id\":\"0f78a3a0-d9b2-11e7-bc06-5dd1310bbecc\",\"col\":7,\"row\":4},{\"size_x\":6,\"size_y\":3,\"panelIndex\":5,\"type\":\"visualization\",\"id\":\"176b0730-d9ce-11e7-bc47-b39f13297094\",\"col\":1,\"row\":7},{\"size_x\":6,\"size_y\":3,\"panelIndex\":6,\"type\":\"visualization\",\"id\":\"1b513980-d9b7-11e7-bc47-b39f13297094\",\"col\":7,\"row\":7},{\"size_x\":6,\"size_y\":3,\"panelIndex\":7,\"type\":\"visualization\",\"id\":\"67c28b30-d9cf-11e7-bc47-b39f13297094\",\"col\":1,\"row\":10},{\"size_x\":6,\"size_y\":3,\"panelIndex\":8,\"type\":\"visualization\",\"id\":\"208c4da0-d9c5-11e7-bc47-b39f13297094\",\"col\":7,\"row\":10}]",
- "optionsJSON": "{\"darkTheme\":false}",
- "uiStateJSON": "{\"P-1\":{\"vis\":{\"defaultColors\":{\"0 - 100\":\"rgb(0,104,55)\"}}},\"P-5\":{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}},\"P-6\":{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":2,\"direction\":\"desc\"}}}},\"P-7\":{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}},\"P-8\":{\"vis\":{\"defaultColors\":{\"0% - 10%\":\"rgb(255,255,204)\",\"10% - 20%\":\"rgb(255,241,170)\",\"20% - 30%\":\"rgb(254,225,135)\",\"30% - 40%\":\"rgb(254,201,101)\",\"40% - 50%\":\"rgb(254,171,73)\",\"50% - 60%\":\"rgb(253,141,60)\",\"60% - 70%\":\"rgb(252,91,46)\",\"70% - 80%\":\"rgb(237,47,34)\",\"80% - 90%\":\"rgb(212,16,32)\",\"90% - 100%\":\"rgb(176,0,38)\"}}}}",
- "version": 1,
- "timeRestore": false,
- "kibanaSavedObjectMeta": {
- "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}"
- }
+ "_id": "11bacbf0-e4a9-11e7-b91f-5fb563b02e79",
+ "_type": "dashboard",
+ "_source": {
+ "title": "Projects dashboard",
+ "hits": 0,
+ "description": "",
+ "panelsJSON": "[{\"size_x\":6,\"size_y\":3,\"panelIndex\":1,\"type\":\"visualization\",\"id\":\"6a0b5b60-d1e8-42cc-9934-16673b657934\",\"col\":1,\"row\":1},{\"size_x\":6,\"size_y\":3,\"panelIndex\":2,\"type\":\"visualization\",\"id\":\"63c8f590-d9b2-11e7-bc06-5dd1310bbecc\",\"col\":7,\"row\":1},{\"size_x\":6,\"size_y\":3,\"panelIndex\":3,\"type\":\"visualization\",\"id\":\"cc5678a0-d9b0-11e7-bc06-5dd1310bbecc\",\"col\":1,\"row\":4},{\"size_x\":6,\"size_y\":3,\"panelIndex\":4,\"type\":\"visualization\",\"id\":\"0f78a3a0-d9b2-11e7-bc06-5dd1310bbecc\",\"col\":7,\"row\":4},{\"size_x\":6,\"size_y\":3,\"panelIndex\":5,\"type\":\"visualization\",\"id\":\"176b0730-d9ce-11e7-bc47-b39f13297094\",\"col\":1,\"row\":7},{\"size_x\":6,\"size_y\":3,\"panelIndex\":6,\"type\":\"visualization\",\"id\":\"1b513980-d9b7-11e7-bc47-b39f13297094\",\"col\":7,\"row\":7},{\"size_x\":6,\"size_y\":3,\"panelIndex\":7,\"type\":\"visualization\",\"id\":\"67c28b30-d9cf-11e7-bc47-b39f13297094\",\"col\":1,\"row\":10},{\"size_x\":6,\"size_y\":3,\"panelIndex\":8,\"type\":\"visualization\",\"id\":\"208c4da0-d9c5-11e7-bc47-b39f13297094\",\"col\":7,\"row\":10}]",
+ "optionsJSON": "{\"darkTheme\":false}",
+ "uiStateJSON": "{\"P-1\":{\"vis\":{\"defaultColors\":{\"0 - 100\":\"rgb(0,104,55)\"}}},\"P-5\":{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}},\"P-6\":{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":2,\"direction\":\"desc\"}}}},\"P-7\":{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}},\"P-8\":{\"vis\":{\"defaultColors\":{\"0% - 10%\":\"rgb(255,255,204)\",\"10% - 20%\":\"rgb(255,241,170)\",\"20% - 30%\":\"rgb(254,225,135)\",\"30% - 40%\":\"rgb(254,201,101)\",\"40% - 50%\":\"rgb(254,171,73)\",\"50% - 60%\":\"rgb(253,141,60)\",\"60% - 70%\":\"rgb(252,91,46)\",\"70% - 80%\":\"rgb(237,47,34)\",\"80% - 90%\":\"rgb(212,16,32)\",\"90% - 100%\":\"rgb(176,0,38)\"}}}}",
+ "version": 1,
+ "timeRestore": false,
+ "kibanaSavedObjectMeta": {
+ "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}"
}
+ }
}
diff --git a/dashboard-importer/kibana-config/LL_visualization_merge-vs-non-merge-commits.json b/dashboard-importer/kibana-config/LL_visualization_merge-vs-non-merge-commits.json
new file mode 100644
index 0000000..a5ef04b
--- /dev/null
+++ b/dashboard-importer/kibana-config/LL_visualization_merge-vs-non-merge-commits.json
@@ -0,0 +1,14 @@
+{
+"_id": "6a0b5b60-d1e8-42cc-9934-16673b657934",
+"_type": "visualization",
+"_source": {
+"title": "Merge vs Non-Merge commits",
+"visState": "{\"title\":\"Commits per type\",\"type\":\"pie\",\"params\":{\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"num_commits\",\"customLabel\":\"#Commits\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"is_merge\",\"size\":50,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}",
+"uiStateJSON": "{}",
+"description": "",
+"version": 1,
+"kibanaSavedObjectMeta": {
+"searchSourceJSON": "{\"index\":\"gerrit\",\"query\":{\"match_all\":{}},\"filter\":[]}"
+}
+}
+}
diff --git a/src/main/scala/com/gerritforge/analytics/engine/GerritAnalyticsTransformations.scala b/src/main/scala/com/gerritforge/analytics/engine/GerritAnalyticsTransformations.scala
index 5baafbb..81ea883 100644
--- a/src/main/scala/com/gerritforge/analytics/engine/GerritAnalyticsTransformations.scala
+++ b/src/main/scala/com/gerritforge/analytics/engine/GerritAnalyticsTransformations.scala
@@ -81,7 +81,8 @@
added_lines: Integer,
deleted_lines: Integer,
commits: Array[CommitInfo],
- last_commit_date: Long)
+ last_commit_date: Long,
+ is_merge: Boolean)
import org.apache.spark.sql.Encoders
@@ -96,7 +97,9 @@
"project", "json.name as author", "json.email as email",
"json.year as year", "json.month as month", "json.day as day", "json.hour as hour",
"json.num_files as num_files", "json.added_lines as added_lines", "json.deleted_lines as deleted_lines",
- "json.num_commits as num_commits", "json.last_commit_date as last_commit_date")
+ "json.num_commits as num_commits", "json.last_commit_date as last_commit_date",
+ "json.is_merge as is_merge"
+ )
}
def handleAliases(aliasesDF: Option[DataFrame])(implicit spark: SparkSession): DataFrame = {
diff --git a/src/main/scala/com/gerritforge/analytics/job/Main.scala b/src/main/scala/com/gerritforge/analytics/job/Main.scala
index 87468e5..9f65497 100644
--- a/src/main/scala/com/gerritforge/analytics/job/Main.scala
+++ b/src/main/scala/com/gerritforge/analytics/job/Main.scala
@@ -16,11 +16,13 @@
import com.gerritforge.analytics.engine.GerritAnalyticsTransformations._
import com.gerritforge.analytics.model.{GerritEndpointConfig, GerritProjectsRDD}
+import com.typesafe.scalalogging.LazyLogging
+import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SparkSession}
import scala.io.{Codec, Source}
-object Main extends App with Job {
+object Main extends App with Job with LazyLogging {
new scopt.OptionParser[GerritEndpointConfig]("scopt") {
head("scopt", "3.x")
@@ -54,24 +56,34 @@
} text "\"emails to author alias\" input data path"
}.parse(args, GerritEndpointConfig()) match {
case Some(config) =>
- implicit val spark = SparkSession.builder()
+ implicit val spark: SparkSession = SparkSession.builder()
.appName("Gerrit Analytics ETL")
.getOrCreate()
- implicit val implicitConfig = config;
+
+ implicit val _: GerritEndpointConfig = config
+
+ logger.info(s"Starting analytics app with config $config")
+
val dataFrame = run()
+
+ logger.info(s"ES content created, saving it to '${config.outputDir}'")
dataFrame.write.json(config.outputDir)
+
saveES(dataFrame)
+
case None => // invalid configuration usage has been displayed
}
}
-trait Job {
+trait Job { self: LazyLogging =>
implicit val codec = Codec.ISO8859
def run()(implicit config: GerritEndpointConfig, spark: SparkSession): DataFrame = {
import spark.sqlContext.implicits._ // toDF
- implicit val sc = spark.sparkContext
+ implicit val sc: SparkContext = spark.sparkContext
+
val projects = GerritProjectsRDD(Source.fromURL(config.gerritProjectsUrl))
+
val aliasesDF = getAliasDF(config.emailAlias)
projects
@@ -87,7 +99,12 @@
def saveES(df: DataFrame)(implicit config: GerritEndpointConfig) {
import org.elasticsearch.spark.sql._
- config.elasticIndex.map(df.saveToEs(_))
+ config.elasticIndex.foreach { esIndex =>
+ logger.info(s"ES content created, saving it to elastic search instance at '${config.elasticIndex}'")
+
+ df.saveToEs(esIndex)
+ }
+
}
}
diff --git a/src/main/scala/com/gerritforge/analytics/model/GerritEndpointConfig.scala b/src/main/scala/com/gerritforge/analytics/model/GerritEndpointConfig.scala
index b7bc647..8134544 100644
--- a/src/main/scala/com/gerritforge/analytics/model/GerritEndpointConfig.scala
+++ b/src/main/scala/com/gerritforge/analytics/model/GerritEndpointConfig.scala
@@ -16,7 +16,7 @@
case class GerritEndpointConfig(baseUrl: String = "",
prefix: Option[String] = None,
- outputDir: String = s"${System.getProperty("java.io.tmp")}/analytics-${System.nanoTime()}",
+ outputDir: String = s"file://${System.getProperty("java.io.tmpdir")}/analytics-${System.nanoTime()}",
elasticIndex: Option[String] = None,
since: Option[String] = None,
until: Option[String] = None,
@@ -35,5 +35,5 @@
.flatMap(queryOpt).mkString("?", "&", "")
def contributorsUrl(projectName: String) =
- s"${baseUrl}/projects/$projectName/analytics~contributors${queryString}"
+ s"$baseUrl/projects/$projectName/analytics~contributors$queryString"
}
\ No newline at end of file
diff --git a/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala b/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
index b00d36c..7c9774e 100644
--- a/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
+++ b/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
@@ -94,10 +94,10 @@
import sql.implicits._
val rdd = sc.parallelize(Seq(
- ("p1","""{"name":"a","email":"a@mail.com","year":2017,"month":9, "day":11, "hour":23, "num_commits":1, "num_files": 2, "added_lines":1, "deleted_lines":1, "last_commit_date":0, "commits":[{ "sha1": "e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":0,"merge":false}]}"""),
- ("p2","""{"name":"b","email":"b@mail.com","year":2017,"month":9, "day":11, "hour":23, "num_commits":428, "num_files": 2, "added_lines":1, "deleted_lines":1, "last_commit_date":1500000000000,"commits":[{"sha1":"e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":0,"merge":true },{"sha1":"e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":1500000000000,"merge":true}]}"""),
+ ("p1","""{"name":"a","email":"a@mail.com","year":2017,"month":9, "day":11, "hour":23, "num_commits":1, "num_files": 2, "added_lines":1, "deleted_lines":1, "last_commit_date":0, "is_merge": false, "commits":[{ "sha1": "e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":0,"merge":false}]}"""),
+ ("p2","""{"name":"b","email":"b@mail.com","year":2017,"month":9, "day":11, "hour":23, "num_commits":428, "num_files": 2, "added_lines":1, "deleted_lines":1, "last_commit_date":1500000000000, "is_merge": true, "commits":[{"sha1":"e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":0,"merge":true },{"sha1":"e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":1500000000000,"merge":true}]}"""),
// last commit is missing hour,day,month,year to check optionality
- ("p3","""{"name":"c","email":"c@mail.com","num_commits":12,"num_files": 2, "added_lines":1, "deleted_lines":1, "last_commit_date":1600000000000,"commits":[{"sha1":"e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":0,"merge":true },{"sha1":"e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":1600000000000,"merge":true}]}""")
+ ("p3","""{"name":"c","email":"c@mail.com","num_commits":12,"num_files": 2, "added_lines":1, "deleted_lines":1, "last_commit_date":1600000000000,"is_merge": true,"commits":[{"sha1":"e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":0,"merge":true },{"sha1":"e063a806c33bd524e89a87732bd3f1ad9a77a41e", "date":1600000000000,"merge":true}]}""")
))
val df = rdd.toDF("project", "json")
@@ -106,16 +106,17 @@
df.count should be(3)
val collected = df.collect
- df.schema.fields.map(_.name) should contain allOf(
+ df.schema.fields.map(_.name) should contain inOrder (
"project", "author", "email",
"year", "month", "day", "hour",
"num_files", "added_lines", "deleted_lines",
- "num_commits", "last_commit_date")
+ "num_commits", "last_commit_date",
+ "is_merge")
collected should contain allOf(
- Row("p1", "a", "a@mail.com", 2017, 9, 11, 23, 2, 1, 1, 1, 0),
- Row("p2", "b", "b@mail.com", 2017, 9, 11, 23, 2, 1, 1, 428, 1500000000000L),
- Row("p3", "c", "c@mail.com", null, null, null, null, 2, 1, 1, 12, 1600000000000L)
+ Row("p1", "a", "a@mail.com", 2017, 9, 11, 23, 2, 1, 1, 1, 0, false),
+ Row("p2", "b", "b@mail.com", 2017, 9, 11, 23, 2, 1, 1, 428, 1500000000000L, true),
+ Row("p3", "c", "c@mail.com", null, null, null, null, 2, 1, 1, 12, 1600000000000L, true)
)
}