Introduce bruteForceMatchTree This moves more doc iterator logic into the match tree, and prepares for accepting short content queries. Change-Id: I32584008f2ecbbba14e3c7bdb76a80a613c140ff

commit: 35c76e391e93def4a2aeff4aed508fdfe49f595e [log] [tgz]
author: Han-Wen Nienhuys <hanwen@google.com> Wed May 17 10:29:40 2017 +0200
committer: Han-Wen Nienhuys <hanwen@google.com> Wed May 17 21:08:13 2017 +0200
tree: 80b8e74a05ef3b763f5cd037665d0db84262e40c
parent: 904e38bc26ee39bfa2d723a0b7b4eaa3fdabf9dd [diff]
diff --git a/eval.go b/eval.go
index 1dd2b07..b4dc688 100644
--- a/eval.go
+++ b/eval.go

@@ -43,6 +43,12 @@
 	String() string
 }
 
+type bruteForceMatchTree struct {
+	// mutable
+	firstDone bool
+	docID     uint32
+}
+
 type andMatchTree struct {
 	children []matchTree
 }
@@ -89,6 +95,11 @@
 
 // all prepare methods
 
+func (t *bruteForceMatchTree) prepare(doc uint32) {
+	t.docID = doc
+	t.firstDone = true
+}
+
 func (t *andMatchTree) prepare(doc uint32) {
 	for _, c := range t.children {
 		c.prepare(doc)
@@ -131,6 +142,13 @@
 
 // nextDoc
 
+func (t *bruteForceMatchTree) nextDoc() uint32 {
+	if !t.firstDone {
+		return 0
+	}
+	return t.docID + 1
+}
+
 func (t *andMatchTree) nextDoc() uint32 {
 	var max uint32
 	for _, c := range t.children {
@@ -184,6 +202,10 @@
 
 // all String methods
 
+func (t *bruteForceMatchTree) String() string {
+	return "all"
+}
+
 func (t *andMatchTree) String() string {
 	return fmt.Sprintf("and%v", t.children)
 }
@@ -335,6 +357,10 @@
 
 // all matches() methods.
 
+func (t *bruteForceMatchTree) matches(known map[matchTree]bool) (bool, bool) {
+	return true, true
+}
+
 func (t *andMatchTree) matches(known map[matchTree]bool) (bool, bool) {
 	sure := true
 
@@ -497,14 +523,7 @@
 		}, nil
 	case *query.Const:
 		if s.Value {
-			iter := d.matchAllDocIterator()
-			return &substrMatchTree{
-				query:         &query.Substring{Pattern: "TRUE"},
-				coversContent: true,
-				caseSensitive: false,
-				fileName:      true,
-				cands:         iter.next(),
-			}, nil
+			return &bruteForceMatchTree{}, nil
 		} else {
 			return &substrMatchTree{
 				query: &query.Substring{Pattern: "FALSE"},
@@ -595,8 +614,9 @@
 	totalAtomCount := 0
 	collectAtoms(mt, func(t matchTree) { totalAtomCount++ })
 
+	docCount := uint32(len(d.fileBranchMasks))
 	canceled := false
-
+	lastDoc := int(-1)
 nextFileMatch:
 	for {
 		if !canceled {
@@ -608,9 +628,14 @@
 		}
 
 		nextDoc := mt.nextDoc()
-		if nextDoc == maxUInt32 {
+		if nextDoc >= docCount {
 			break
 		}
+		if int(nextDoc) <= lastDoc {
+			log.Panicf("lastDoc %d, nextDoc %d", lastDoc, nextDoc)
+		}
+		lastDoc = int(nextDoc)
+
 		res.Stats.FilesConsidered++
 		mt.prepare(nextDoc)
 		if canceled || res.Stats.MatchCount >= opts.ShardMaxMatchCount ||
@@ -692,6 +717,20 @@
 		fileMatch.Score += float64(atomMatchCount) / float64(totalAtomCount) * scoreFactorAtomMatch
 		finalCands := gatherMatches(mt, known)
 
+		if len(finalCands) == 0 {
+			nm := d.fileName(nextDoc)
+			finalCands = append(finalCands,
+				&candidateMatch{
+					caseSensitive: false,
+					fileName:      true,
+					substrBytes:   nm,
+					substrLowered: nm,
+					file:          nextDoc,
+					runeOffset:    0,
+					byteOffset:    0,
+					byteMatchSz:   uint32(len(nm)),
+				})
+		}
 		fileMatch.LineMatches = cp.fillMatches(finalCands)
 
 		maxFileScore := 0.0

diff --git a/index_portable.go b/index_portable.go
index a069be5..68f21c9 100644
--- a/index_portable.go
+++ b/index_portable.go

@@ -17,7 +17,6 @@
 import (
 	"fmt"
 	"hash/crc64"
-	"log"
 	"regexp"
 	"unicode/utf8"
 
@@ -128,28 +127,16 @@
 	return sz
 }
 
-func (data *indexData) getDocIterator(q query.Q) (docIterator, error) {
-	switch s := q.(type) {
-	case *query.Substring:
-		if utf8.RuneCountInString(s.Pattern) < ngramSize {
-			if !s.FileName {
-				return nil, fmt.Errorf("pattern %q less than %d characters", s.Pattern, ngramSize)
-			}
-
-			return data.getBruteForceFileNameDocIterator(s), nil
+func (data *indexData) getDocIterator(s *query.Substring) (docIterator, error) {
+	if utf8.RuneCountInString(s.Pattern) < ngramSize {
+		if !s.FileName {
+			return nil, fmt.Errorf("pattern %q less than %d characters", s.Pattern, ngramSize)
 		}
 
-		return data.getNgramDocIterator(s)
-
-	case *query.Const:
-		if s.Value {
-			return data.matchAllDocIterator(), nil
-		}
-		return &bruteForceIter{}, nil
+		return data.getBruteForceFileNameDocIterator(s), nil
 	}
 
-	log.Panicf("type %T", q)
-	return nil, nil
+	return data.getNgramDocIterator(s)
 }
 
 type bruteForceIter struct {
@@ -166,32 +153,6 @@
 	return true
 }
 
-func (data *indexData) matchAllDocIterator() docIterator {
-	var cands []*candidateMatch
-
-	if len(data.fileNameIndex) == 0 {
-		return &bruteForceIter{cands}
-	}
-
-	cands = make([]*candidateMatch, 0, len(data.fileNameIndex[1:]))
-	var last uint32
-	for i, off := range data.fileNameIndex[1:] {
-		name := data.fileNameContent[last:off]
-		last = off
-		cands = append(cands, &candidateMatch{
-			caseSensitive: false,
-			fileName:      true,
-			substrBytes:   name,
-			substrLowered: name,
-			file:          uint32(i),
-			runeOffset:    0,
-			byteOffset:    0,
-			byteMatchSz:   uint32(len(name)),
-		})
-	}
-	return &bruteForceIter{cands}
-}
-
 func (data *indexData) getBruteForceFileNameDocIterator(query *query.Substring) docIterator {
 	quoted := regexp.QuoteMeta(query.Pattern)
 	if !query.CaseSensitive {

diff --git a/index_test.go b/index_test.go
index e3e4b0e..2c16149 100644
--- a/index_test.go
+++ b/index_test.go

@@ -306,7 +306,6 @@
 	}
 
 	b.AddFile("f1", []byte("x banana y"))
-
 	b.AddFile("f2", []byte("x appelmoes y"))
 	b.AddFile("f3", []byte("x appelmoes y"))
 	b.AddFile("f3", []byte("x appelmoes y"))
commit	35c76e391e93def4a2aeff4aed508fdfe49f595e	[log] [tgz]
author	Han-Wen Nienhuys <hanwen@google.com>	Wed May 17 10:29:40 2017 +0200
committer	Han-Wen Nienhuys <hanwen@google.com>	Wed May 17 21:08:13 2017 +0200
tree	80b8e74a05ef3b763f5cd037665d0db84262e40c
parent	904e38bc26ee39bfa2d723a0b7b4eaa3fdabf9dd [diff]