Lowercase aliased organizations

This avoid duplications of organizations, in particular when the same organization
comes from the aliases file and from the email.

Change-Id: I163ee9ecac7ff2a01774408e9585032ea2ee31f9
diff --git a/src/main/scala/com/gerritforge/analytics/engine/GerritAnalyticsTransformations.scala b/src/main/scala/com/gerritforge/analytics/engine/GerritAnalyticsTransformations.scala
index 195cc02..5baafbb 100644
--- a/src/main/scala/com/gerritforge/analytics/engine/GerritAnalyticsTransformations.scala
+++ b/src/main/scala/com/gerritforge/analytics/engine/GerritAnalyticsTransformations.scala
@@ -109,8 +109,8 @@
 
                 df.join(renamedAliasesDF, df("email") === renamedAliasesDF("email_alias"), "left_outer" )
                   .withColumn("organization",
-                    when(renamedAliasesDF("organization_alias").notEqual(""), renamedAliasesDF("organization_alias"))
-                      .otherwise(df("organization")) )
+                    when(renamedAliasesDF("organization_alias").notEqual(""), lower(renamedAliasesDF("organization_alias")))
+                      .otherwise(df("organization")))
                   .withColumn("author", coalesce(renamedAliasesDF("author_alias"), df("author")))
                   .drop("email_alias","author_alias", "organization_alias")
             }
diff --git a/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala b/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
index 0e7702f..b00d36c 100644
--- a/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
+++ b/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
@@ -200,6 +200,25 @@
     df.schema.fields.map(_.name) should contain allOf("author", "email", "organization")
   }
 
+  it should "lowercase aliased organizations" in {
+    import spark.implicits._
+    val inputSampleDF = sc.parallelize(Seq(
+      ("author_name", "email@mail.com", "an_organization")
+    )).toDF("author", "email", "organization")
+
+    val aliasDF = sc.parallelize(Seq(
+      ("author_name", "email@mail.com", "OrGaNiZaTiOnToBeLoWeRcAsEd")
+    )).toDF("author", "email", "organization")
+
+    val df = inputSampleDF.handleAliases(Some(aliasDF))
+
+    val expectedDF = sc.parallelize(Seq(
+      ("author_name", "email@mail.com", "organizationtobelowercased")
+    )).toDF("author", "email", "organization")
+
+    df.collect should contain theSameElementsAs expectedDF.collect
+  }
+
   "addOrganization" should "compute organization column from the email" in {
     import sql.implicits._