Check for binary content using bytes.Index before building postings

In commit 8029ba ("Include files without text on index"), I bailed
halfway in newSearchableString on finding problems. Unfortunately, the
postingsBuilder was not meant to process a file only halfway. This lead
to OOB access during the rune/byte translation for shards that had
halfway processed files.

Change-Id: I49d6a82bb1e6d4129f03de9a4293fc1cc5dcb4b3
diff --git a/indexbuilder.go b/indexbuilder.go
index e4678d7..40182ce 100644
--- a/indexbuilder.go
+++ b/indexbuilder.go
@@ -15,6 +15,7 @@
 package zoekt
 
 import (
+	"bytes"
 	"encoding/binary"
 	"fmt"
 	"hash/crc64"
@@ -64,6 +65,9 @@
 	}
 }
 
+// Store trigram offsets for the given UTF-8 data. The
+// DocumentSections must correspond to rune boundaries in the UTF-8
+// data.
 func (s *postingsBuilder) newSearchableString(data []byte, byteSections []DocumentSection) (*searchableString, []DocumentSection, error) {
 	dest := searchableString{
 		data: data,
@@ -87,9 +91,6 @@
 		if sz > 1 {
 			s.isPlainASCII = false
 		}
-		if c == 0 {
-			return nil, nil, &skipError{fmt.Sprintf("binary content at byte offset %d", byteCount)}
-		}
 		data = data[sz:]
 
 		runeGram[0], runeGram[1], runeGram[2] = runeGram[1], runeGram[2], c
@@ -328,19 +329,15 @@
 
 const notIndexedMarker = "NOT-INDEXED: "
 
-// skipError is an error for conditions that we can record in the index.
-type skipError struct {
-	reason string
-}
-
-func (e *skipError) Error() string {
-	return e.reason
-}
-
 // Add a file which only occurs in certain branches.
 func (b *IndexBuilder) Add(doc Document) error {
 	hasher := crc64.New(crc64.MakeTable(crc64.ISO))
 
+	if idx := bytes.IndexByte(doc.Content, 0); idx >= 0 {
+		doc.SkipReason = fmt.Sprintf("binary content at byte offset %d", idx)
+		doc.Language = "binary"
+	}
+
 	if doc.SkipReason != "" {
 		doc.Content = []byte(notIndexedMarker + doc.SkipReason)
 		doc.Symbols = nil
@@ -367,14 +364,7 @@
 		}
 	}
 	docStr, runeSecs, err := b.contentPostings.newSearchableString(doc.Content, doc.Symbols)
-	if t, ok := err.(*skipError); err != nil && ok {
-		doc.SkipReason = t.reason
-		doc.Content = []byte(notIndexedMarker + doc.SkipReason)
-		doc.Symbols = nil
-		doc.Language = "binary"
-
-		docStr, runeSecs, _ = b.contentPostings.newSearchableString(doc.Content, doc.Symbols)
-	} else if err != nil {
+	if err != nil {
 		return err
 	}
 	nameStr, _, err := b.namePostings.newSearchableString([]byte(doc.Name), nil)