Introduce bruteForceMatchTree
This moves more doc iterator logic into the match tree, and prepares
for accepting short content queries.
Change-Id: I32584008f2ecbbba14e3c7bdb76a80a613c140ff
diff --git a/eval.go b/eval.go
index 1dd2b07..b4dc688 100644
--- a/eval.go
+++ b/eval.go
@@ -43,6 +43,12 @@
String() string
}
+type bruteForceMatchTree struct {
+ // mutable
+ firstDone bool
+ docID uint32
+}
+
type andMatchTree struct {
children []matchTree
}
@@ -89,6 +95,11 @@
// all prepare methods
+func (t *bruteForceMatchTree) prepare(doc uint32) {
+ t.docID = doc
+ t.firstDone = true
+}
+
func (t *andMatchTree) prepare(doc uint32) {
for _, c := range t.children {
c.prepare(doc)
@@ -131,6 +142,13 @@
// nextDoc
+func (t *bruteForceMatchTree) nextDoc() uint32 {
+ if !t.firstDone {
+ return 0
+ }
+ return t.docID + 1
+}
+
func (t *andMatchTree) nextDoc() uint32 {
var max uint32
for _, c := range t.children {
@@ -184,6 +202,10 @@
// all String methods
+func (t *bruteForceMatchTree) String() string {
+ return "all"
+}
+
func (t *andMatchTree) String() string {
return fmt.Sprintf("and%v", t.children)
}
@@ -335,6 +357,10 @@
// all matches() methods.
+func (t *bruteForceMatchTree) matches(known map[matchTree]bool) (bool, bool) {
+ return true, true
+}
+
func (t *andMatchTree) matches(known map[matchTree]bool) (bool, bool) {
sure := true
@@ -497,14 +523,7 @@
}, nil
case *query.Const:
if s.Value {
- iter := d.matchAllDocIterator()
- return &substrMatchTree{
- query: &query.Substring{Pattern: "TRUE"},
- coversContent: true,
- caseSensitive: false,
- fileName: true,
- cands: iter.next(),
- }, nil
+ return &bruteForceMatchTree{}, nil
} else {
return &substrMatchTree{
query: &query.Substring{Pattern: "FALSE"},
@@ -595,8 +614,9 @@
totalAtomCount := 0
collectAtoms(mt, func(t matchTree) { totalAtomCount++ })
+ docCount := uint32(len(d.fileBranchMasks))
canceled := false
-
+ lastDoc := int(-1)
nextFileMatch:
for {
if !canceled {
@@ -608,9 +628,14 @@
}
nextDoc := mt.nextDoc()
- if nextDoc == maxUInt32 {
+ if nextDoc >= docCount {
break
}
+ if int(nextDoc) <= lastDoc {
+ log.Panicf("lastDoc %d, nextDoc %d", lastDoc, nextDoc)
+ }
+ lastDoc = int(nextDoc)
+
res.Stats.FilesConsidered++
mt.prepare(nextDoc)
if canceled || res.Stats.MatchCount >= opts.ShardMaxMatchCount ||
@@ -692,6 +717,20 @@
fileMatch.Score += float64(atomMatchCount) / float64(totalAtomCount) * scoreFactorAtomMatch
finalCands := gatherMatches(mt, known)
+ if len(finalCands) == 0 {
+ nm := d.fileName(nextDoc)
+ finalCands = append(finalCands,
+ &candidateMatch{
+ caseSensitive: false,
+ fileName: true,
+ substrBytes: nm,
+ substrLowered: nm,
+ file: nextDoc,
+ runeOffset: 0,
+ byteOffset: 0,
+ byteMatchSz: uint32(len(nm)),
+ })
+ }
fileMatch.LineMatches = cp.fillMatches(finalCands)
maxFileScore := 0.0
diff --git a/index_portable.go b/index_portable.go
index a069be5..68f21c9 100644
--- a/index_portable.go
+++ b/index_portable.go
@@ -17,7 +17,6 @@
import (
"fmt"
"hash/crc64"
- "log"
"regexp"
"unicode/utf8"
@@ -128,28 +127,16 @@
return sz
}
-func (data *indexData) getDocIterator(q query.Q) (docIterator, error) {
- switch s := q.(type) {
- case *query.Substring:
- if utf8.RuneCountInString(s.Pattern) < ngramSize {
- if !s.FileName {
- return nil, fmt.Errorf("pattern %q less than %d characters", s.Pattern, ngramSize)
- }
-
- return data.getBruteForceFileNameDocIterator(s), nil
+func (data *indexData) getDocIterator(s *query.Substring) (docIterator, error) {
+ if utf8.RuneCountInString(s.Pattern) < ngramSize {
+ if !s.FileName {
+ return nil, fmt.Errorf("pattern %q less than %d characters", s.Pattern, ngramSize)
}
- return data.getNgramDocIterator(s)
-
- case *query.Const:
- if s.Value {
- return data.matchAllDocIterator(), nil
- }
- return &bruteForceIter{}, nil
+ return data.getBruteForceFileNameDocIterator(s), nil
}
- log.Panicf("type %T", q)
- return nil, nil
+ return data.getNgramDocIterator(s)
}
type bruteForceIter struct {
@@ -166,32 +153,6 @@
return true
}
-func (data *indexData) matchAllDocIterator() docIterator {
- var cands []*candidateMatch
-
- if len(data.fileNameIndex) == 0 {
- return &bruteForceIter{cands}
- }
-
- cands = make([]*candidateMatch, 0, len(data.fileNameIndex[1:]))
- var last uint32
- for i, off := range data.fileNameIndex[1:] {
- name := data.fileNameContent[last:off]
- last = off
- cands = append(cands, &candidateMatch{
- caseSensitive: false,
- fileName: true,
- substrBytes: name,
- substrLowered: name,
- file: uint32(i),
- runeOffset: 0,
- byteOffset: 0,
- byteMatchSz: uint32(len(name)),
- })
- }
- return &bruteForceIter{cands}
-}
-
func (data *indexData) getBruteForceFileNameDocIterator(query *query.Substring) docIterator {
quoted := regexp.QuoteMeta(query.Pattern)
if !query.CaseSensitive {
diff --git a/index_test.go b/index_test.go
index e3e4b0e..2c16149 100644
--- a/index_test.go
+++ b/index_test.go
@@ -306,7 +306,6 @@
}
b.AddFile("f1", []byte("x banana y"))
-
b.AddFile("f2", []byte("x appelmoes y"))
b.AddFile("f3", []byte("x appelmoes y"))
b.AddFile("f3", []byte("x appelmoes y"))