Separate UTF-8 test from regular contributors spec Split the test for non-latin chars from the standard functionality of fetchRawContributors. Allows a simpler troubleshooting of encoding vs. functionality regressions. Change-Id: I1ff59ae655ce1a65d4b2861efbbb4169c45e77ea

commit: 25e61a267b7f9032cd9cd19072ca1bb5f6f9bb92 [log] [tgz]
author: Luca Milanesio <luca.milanesio@gmail.com> Wed Feb 28 09:56:22 2018 +0000
committer: Luca Milanesio <luca.milanesio@gmail.com> Wed Feb 28 09:56:22 2018 +0000
tree: 266009c294bcf380de248b32ce0d9d98d933696d
parent: f2ae7e65d78db4312d3cfddb4edff1b255e826c1 [diff]
diff --git a/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala b/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
index 95b2fe7..a52b89c 100644
--- a/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala
+++ b/src/test/scala/com/gerritforge/analytics/GerritAnalyticsTransformationsSpec.scala

@@ -68,8 +68,8 @@
 
     val line1 = "foo" -> "bar"
     val line2 = "foo1" -> "bar1"
-    val line3 = "foo2" -> "bar2\u0100" // checks UTF-8 as well
-    val line3b = "foo3" -> "bar3\u0101"
+    val line3 = "foo2" -> "bar2"
+    val line3b = "foo3" -> "bar3"
 
     val projectSource1 = ProjectContributionSource("p1", newSource(line1, line2, line3))
     val projectSource2 = ProjectContributionSource("p2", newSource(line3b))
@@ -82,11 +82,20 @@
     rawContributors should contain allOf(
       ("p1","""{"foo":"bar"}"""),
       ("p1","""{"foo1":"bar1"}"""),
-      ("p1", "{\"foo2\":\"bar2\u0100\"}"),
-      ("p2", "{\"foo3\":\"bar3\u0101\"}")
+      ("p1", """{"foo2":"bar2"}"""),
+      ("p2", """{"foo3":"bar3"}""")
     )
   }
 
+  it should "fetch file content from the initial list of project names and file names with non-latin chars" in {
+    val rawContributors = sc.parallelize(Seq(ProjectContributionSource("p1", newSource("foo2" -> "bar2\u0100"))))
+      .fetchRawContributors
+      .collect
+
+    rawContributors should have size (1)
+    rawContributors.head._2 should be ("""{"foo2":"bar2\u0100"}""")
+  }
+
   "transformCommitterInfo" should "transform a DataFrame with project and json to a workable DF with separated columns" in {
     import sql.implicits._
commit	25e61a267b7f9032cd9cd19072ca1bb5f6f9bb92	[log] [tgz]
author	Luca Milanesio <luca.milanesio@gmail.com>	Wed Feb 28 09:56:22 2018 +0000
committer	Luca Milanesio <luca.milanesio@gmail.com>	Wed Feb 28 09:56:22 2018 +0000
tree	266009c294bcf380de248b32ce0d9d98d933696d
parent	f2ae7e65d78db4312d3cfddb4edff1b255e826c1 [diff]