Separate UTF-8 test from regular contributors spec

Split the test for non-latin chars from the standard
functionality of fetchRawContributors. Allows a simpler
troubleshooting of encoding vs. functionality regressions.

Change-Id: I1ff59ae655ce1a65d4b2861efbbb4169c45e77ea
diff --git a/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala b/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
index 95b2fe7..a52b89c 100644
--- a/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
+++ b/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
@@ -68,8 +68,8 @@
 
     val line1 = "foo" -> "bar"
     val line2 = "foo1" -> "bar1"
-    val line3 = "foo2" -> "bar2\u0100" // checks UTF-8 as well
-    val line3b = "foo3" -> "bar3\u0101"
+    val line3 = "foo2" -> "bar2"
+    val line3b = "foo3" -> "bar3"
 
     val projectSource1 = ProjectContributionSource("p1", newSource(line1, line2, line3))
     val projectSource2 = ProjectContributionSource("p2", newSource(line3b))
@@ -82,11 +82,20 @@
     rawContributors should contain allOf(
       ("p1","""{"foo":"bar"}"""),
       ("p1","""{"foo1":"bar1"}"""),
-      ("p1", "{\"foo2\":\"bar2\u0100\"}"),
-      ("p2", "{\"foo3\":\"bar3\u0101\"}")
+      ("p1", """{"foo2":"bar2"}"""),
+      ("p2", """{"foo3":"bar3"}""")
     )
   }
 
+  it should "fetch file content from the initial list of project names and file names with non-latin chars" in {
+    val rawContributors = sc.parallelize(Seq(ProjectContributionSource("p1", newSource("foo2" -> "bar2\u0100"))))
+      .fetchRawContributors
+      .collect
+
+    rawContributors should have size (1)
+    rawContributors.head._2 should be ("""{"foo2":"bar2\u0100"}""")
+  }
+
   "transformCommitterInfo" should "transform a DataFrame with project and json to a workable DF with separated columns" in {
     import sql.implicits._