Merge "Added Scalafmt configuration"
diff --git a/build.sbt b/build.sbt
index 75aea54..2a2c22e 100644
--- a/build.sbt
+++ b/build.sbt
@@ -50,7 +50,7 @@
dockerfile in docker := {
val artifact: File = assembly.value
val artifactTargetPath = s"/app/${name.value}-assembly.jar"
- val entryPointPath = s"/app/gerrit-analytics-etl.sh"
+ val entryPointBase = s"/app"
new Dockerfile {
from("openjdk:8-alpine")
@@ -62,10 +62,11 @@
env("SPARK_JAR_PATH", artifactTargetPath)
env("SPARK_JAR_CLASS",mainClassPackage)
runRaw("curl -sL \"http://www-eu.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz\" | tar -xz -C /usr/local")
- copy(baseDirectory(_ / "scripts" / "gerrit-analytics-etl.sh").value, file(entryPointPath))
+ copy(baseDirectory(_ / "scripts" / "gerrit-analytics-etl.sh").value, file(s"$entryPointBase/gerrit-analytics-etl.sh"))
+ copy(baseDirectory(_ / "scripts" / "wait-for-elasticsearch.sh").value, file(s"$entryPointBase/wait-for-elasticsearch.sh"))
add(artifact, artifactTargetPath)
runRaw(s"chmod +x $artifactTargetPath")
- cmd(s"/bin/sh", entryPointPath)
+ cmd(s"/bin/sh", s"$entryPointBase/gerrit-analytics-etl.sh")
}
}
imageNames in docker := Seq(
diff --git a/scripts/gerrit-analytics-etl.sh b/scripts/gerrit-analytics-etl.sh
index b4c1aca..30cf2cf 100755
--- a/scripts/gerrit-analytics-etl.sh
+++ b/scripts/gerrit-analytics-etl.sh
@@ -2,18 +2,28 @@
set -o errexit
+# Required
test -z "$ES_HOST" && ( echo "ES_HOST is not set; exiting" ; exit 1 )
test -z "$ANALYTICS_ARGS" && ( echo "ANALYTICS_ARGS is not set; exiting" ; exit 1 )
test -z "$GERRIT_URL" && ( echo "GERRIT_URL is not set; exiting" ; exit 1 )
-echo "Elastic Search Host: $ES_HOST"
-echo "Gerrit URL: $GERRIT_URL"
-echo "Analytics arguments: $ANALYTICS_ARGS"
-echo "Spark jar class: $SPARK_JAR_CLASS"
-echo "Spark jar path: $SPARK_JAR_PATH"
+# Optional
+ES_PORT="${ES_PORT:-9200}"
+SPARK_JAR_PATH="${SPARK_JAR_PATH:-/app/analytics-etl-assembly.jar}"
+SPARK_JAR_CLASS="${SPARK_JAR_CLASS:-com.gerritforge.analytics.job.Main}"
+
+echo "* Elastic Search Host: $ES_HOST:$ES_PORT"
+echo "* Gerrit URL: $GERRIT_URL"
+echo "* Analytics arguments: $ANALYTICS_ARGS"
+echo "* Spark jar class: $SPARK_JAR_CLASS"
+echo "* Spark jar path: $SPARK_JAR_PATH"
+
+$(dirname $0)/wait-for-elasticsearch.sh ${ES_HOST} ${ES_PORT}
+
+echo "Elasticsearch is up, now running spark job..."
spark-submit \
--conf spark.es.nodes="$ES_HOST" \
- --class $SPARK_JAR_CLASS $SPARK_JAR_PATH \
- --url $GERRIT_URL \
- $ANALYTICS_ARGS
\ No newline at end of file
+ --class ${SPARK_JAR_CLASS} ${SPARK_JAR_PATH} \
+ --url ${GERRIT_URL} \
+ ${ANALYTICS_ARGS}
\ No newline at end of file
diff --git a/scripts/wait-for-elasticsearch.sh b/scripts/wait-for-elasticsearch.sh
new file mode 100755
index 0000000..538498d
--- /dev/null
+++ b/scripts/wait-for-elasticsearch.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+wait_for() {
+
+ ELASTIC_SEARCH_HOST=$1
+ ELASTIC_SEARCH_PORT=$2
+
+ ELASTIC_SEARCH_URL="http://$ELASTIC_SEARCH_HOST:$ELASTIC_SEARCH_PORT"
+
+ for i in `seq 30` ; do
+ curl -f ${ELASTIC_SEARCH_URL}/_cluster/health > /dev/null 2>&1
+
+ result=$?
+ if [ $result -eq 0 ] ; then
+ exit 0
+ fi
+ echo "* Waiting for Elasticsearch at $ELASTIC_SEARCH_URL ($i/30)"
+ sleep 2
+ done
+ echo "Operation timed out" >&2
+ exit 1
+}
+
+wait_for "$@"