Use filename content for determining byte offsets in filename matches.

Change-Id: I1339ff375e275f13982161fb24db10bb439ce5ad
diff --git a/index_test.go b/index_test.go
index 4819d5a..fc0112e 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1347,3 +1347,25 @@
 		t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
 	}
 }
+
+func TestUTF8CorrectCorpus(t *testing.T) {
+	needle := "neeedle"
+
+	// 6 bytes.
+	unicode := "世界"
+	b := testIndexBuilder(t, nil,
+		Document{
+			Name:    "f1",
+			Content: []byte(strings.Repeat(unicode, 100)),
+		},
+		Document{
+			Name:    "xxxxxneeedle",
+			Content: []byte("hello"),
+		})
+
+	q := &query.Substring{Pattern: needle, FileName: true}
+	res := searchForTest(t, b, q)
+	if len(res.Files) != 1 {
+		t.Errorf("got %v, want 1 result", res)
+	}
+}
diff --git a/search.go b/search.go
index 406d964..bc388b3 100644
--- a/search.go
+++ b/search.go
@@ -82,8 +82,9 @@
 	return p._data
 }
 
-// Find offset in bytes (relative to file start) for an offset in
-// runes (relative to file start).
+// Find offset in bytes (relative to corpus start) for an offset in
+// runes (relative to document start). If filename is set, the corpus
+// is the set of filenames, with the document being the name itself.
 func (p *contentProvider) findOffset(filename bool, r uint32) uint32 {
 	sample := p.id.runeOffsets
 	runeEnds := p.id.fileEndRunes
@@ -103,11 +104,15 @@
 	left := absR % runeOffsetFrequency
 
 	var data []byte
-	data, p.err = p.id.readContentSlice(byteOff, 3*runeOffsetFrequency)
-	if p.err != nil {
-		return 0
-	}
 
+	if filename {
+		data = p.id.fileNameContent[byteOff:]
+	} else {
+		data, p.err = p.id.readContentSlice(byteOff, 3*runeOffsetFrequency)
+		if p.err != nil {
+			return 0
+		}
+	}
 	for left > 0 {
 		_, sz := utf8.DecodeRune(data)
 		byteOff += uint32(sz)