Allow U+FFFD (replacement char) in files.
To detect real errors in utf8.DecodeRune, the returned size must be
checked.
Change-Id: I9832b4dc79a9d505189d147473b9f64b1b01f391
diff --git a/index_test.go b/index_test.go
index af9cff9..576e863 100644
--- a/index_test.go
+++ b/index_test.go
@@ -1790,3 +1790,16 @@
t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
}
}
+
+func TestIsText(t *testing.T) {
+ for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
+ if !IsText([]byte(text)) {
+ t.Errorf("IsText(%q) is false", text)
+ }
+ }
+ for _, text := range []string{"zero\x00byte", "high\xEFbyte"} {
+ if IsText([]byte(text)) {
+ t.Errorf("IsText(%q) is true", text)
+ }
+ }
+}
diff --git a/indexbuilder.go b/indexbuilder.go
index 9475d58..7895235 100644
--- a/indexbuilder.go
+++ b/indexbuilder.go
@@ -278,7 +278,7 @@
}
r, sz := utf8.DecodeRune(content)
- if r == utf8.RuneError {
+ if r == utf8.RuneError && sz < 2 {
return false
}
content = content[sz:]