Add nextDoc to the matchTree interface
This simplifies the main eval loop, and opens the way to primitive
positive atoms that are not plain substrings.
Change-Id: I63958ca9c41daa1518b128fcfe90285b274f1d54
diff --git a/eval.go b/eval.go
index d7433e3..1dd2b07 100644
--- a/eval.go
+++ b/eval.go
@@ -26,17 +26,20 @@
"github.com/google/zoekt/query"
)
-var _ = log.Println
-
// An expression tree coupled with matches
type matchTree interface {
- // returns whether this matches, and if we are sure.
- matches(known map[matchTree]bool) (match bool, sure bool)
+ // provide the next document where we can may find something
+ // interesting.
+ nextDoc() uint32
// clears any per-document state of the matchTree, and
// prepares for evaluating the given doc. The argument is
// strictly increasing over time.
prepare(nextDoc uint32)
+
+ // returns whether this matches, and if we are sure.
+ matches(known map[matchTree]bool) (match bool, sure bool)
+
String() string
}
@@ -80,7 +83,8 @@
mask uint64
// mutable
- docID uint32
+ firstDone bool
+ docID uint32
}
// all prepare methods
@@ -121,9 +125,63 @@
}
func (t *branchQueryMatchTree) prepare(doc uint32) {
+ t.firstDone = true
t.docID = doc
}
+// nextDoc
+
+func (t *andMatchTree) nextDoc() uint32 {
+ var max uint32
+ for _, c := range t.children {
+ m := c.nextDoc()
+ if m > max {
+ max = m
+ }
+ }
+ return max
+}
+
+func (t *regexpMatchTree) nextDoc() uint32 {
+ return t.child.nextDoc()
+}
+
+func (t *orMatchTree) nextDoc() uint32 {
+ min := uint32(maxUInt32)
+ for _, c := range t.children {
+ m := c.nextDoc()
+ if m < min {
+ min = m
+ }
+ }
+ return min
+}
+
+func (t *notMatchTree) nextDoc() uint32 {
+ return 0
+}
+
+func (t *substrMatchTree) nextDoc() uint32 {
+ if len(t.cands) > 0 {
+ return t.cands[0].file
+ }
+ return maxUInt32
+}
+
+func (t *branchQueryMatchTree) nextDoc() uint32 {
+ var start uint32
+ if t.firstDone {
+ start = t.docID + 1
+ }
+
+ for i := start; i < uint32(len(t.fileMasks)); i++ {
+ if (t.mask & t.fileMasks[i]) != 0 {
+ return i
+ }
+ }
+ return maxUInt32
+}
+
// all String methods
func (t *andMatchTree) String() string {
@@ -517,16 +575,12 @@
res.Stats.NgramMatches += len(st.cands)
}
- var positiveAtoms, fileAtoms []*substrMatchTree
- collectPositiveSubstrings(mt, func(sq *substrMatchTree) {
- positiveAtoms = append(positiveAtoms, sq)
- })
-
var regexpAtoms []*regexpMatchTree
collectRegexps(mt, func(re *regexpMatchTree) {
regexpAtoms = append(regexpAtoms, re)
})
+ var fileAtoms []*substrMatchTree
for st := range atoms {
if st.fileName {
fileAtoms = append(fileAtoms, st)
@@ -553,14 +607,7 @@
}
}
- var nextDoc uint32
- nextDoc = maxUInt32
- for _, st := range positiveAtoms {
- if len(st.cands) > 0 && st.cands[0].file < nextDoc {
- nextDoc = st.cands[0].file
- }
- }
-
+ nextDoc := mt.nextDoc()
if nextDoc == maxUInt32 {
break
}
diff --git a/index_test.go b/index_test.go
index c58ee09..e3e4b0e 100644
--- a/index_test.go
+++ b/index_test.go
@@ -25,6 +25,7 @@
"unicode"
"unicode/utf8"
+ "github.com/kylelemons/godebug/pretty"
"golang.org/x/net/context"
"github.com/google/zoekt/query"
@@ -261,10 +262,10 @@
NgramMatches: 4,
MatchCount: 1,
FileCount: 1,
- FilesConsidered: 3,
+ FilesConsidered: 2,
}
- if !reflect.DeepEqual(sres.Stats, wantStats) {
- t.Errorf("got stats %#v, want %#v", sres.Stats, wantStats)
+ if diff := pretty.Compare(sres.Stats, wantStats); diff != "" {
+ t.Errorf("got stats diff %s", diff)
}
}
@@ -1453,3 +1454,28 @@
t.Errorf("got %v, want 0 files", res.Files)
}
}
+
+func TestAndOrUnicode(t *testing.T) {
+ q, err := query.Parse("orange.*apple")
+ if err != nil {
+ t.Errorf("parse: %v", err)
+ }
+ finalQ := query.NewAnd(q,
+ query.NewOr(query.NewAnd(&query.Repo{"name"},
+ query.NewOr(&query.Branch{"master"}))))
+
+ b := testIndexBuilder(t, &Repository{
+ Name: "name",
+ Branches: []RepositoryBranch{{"master", "master-version"}},
+ }, Document{
+ Name: "f2",
+ Content: []byte("orange\u2318apple"),
+ // --------------0123456 78901
+ Branches: []string{"master"},
+ })
+
+ res := searchForTest(t, b, finalQ)
+ if len(res.Files) != 1 {
+ t.Errorf("got %v, want 1 result", res.Files)
+ }
+}