Add nextDoc to the matchTree interface

This simplifies the main eval loop, and opens the way to primitive
positive atoms that are not plain substrings.

Change-Id: I63958ca9c41daa1518b128fcfe90285b274f1d54
diff --git a/eval.go b/eval.go
index d7433e3..1dd2b07 100644
--- a/eval.go
+++ b/eval.go
@@ -26,17 +26,20 @@
 	"github.com/google/zoekt/query"
 )
 
-var _ = log.Println
-
 // An expression tree coupled with matches
 type matchTree interface {
-	// returns whether this matches, and if we are sure.
-	matches(known map[matchTree]bool) (match bool, sure bool)
+	// provide the next document where we can may find something
+	// interesting.
+	nextDoc() uint32
 
 	// clears any per-document state of the matchTree, and
 	// prepares for evaluating the given doc. The argument is
 	// strictly increasing over time.
 	prepare(nextDoc uint32)
+
+	// returns whether this matches, and if we are sure.
+	matches(known map[matchTree]bool) (match bool, sure bool)
+
 	String() string
 }
 
@@ -80,7 +83,8 @@
 	mask      uint64
 
 	// mutable
-	docID uint32
+	firstDone bool
+	docID     uint32
 }
 
 // all prepare methods
@@ -121,9 +125,63 @@
 }
 
 func (t *branchQueryMatchTree) prepare(doc uint32) {
+	t.firstDone = true
 	t.docID = doc
 }
 
+// nextDoc
+
+func (t *andMatchTree) nextDoc() uint32 {
+	var max uint32
+	for _, c := range t.children {
+		m := c.nextDoc()
+		if m > max {
+			max = m
+		}
+	}
+	return max
+}
+
+func (t *regexpMatchTree) nextDoc() uint32 {
+	return t.child.nextDoc()
+}
+
+func (t *orMatchTree) nextDoc() uint32 {
+	min := uint32(maxUInt32)
+	for _, c := range t.children {
+		m := c.nextDoc()
+		if m < min {
+			min = m
+		}
+	}
+	return min
+}
+
+func (t *notMatchTree) nextDoc() uint32 {
+	return 0
+}
+
+func (t *substrMatchTree) nextDoc() uint32 {
+	if len(t.cands) > 0 {
+		return t.cands[0].file
+	}
+	return maxUInt32
+}
+
+func (t *branchQueryMatchTree) nextDoc() uint32 {
+	var start uint32
+	if t.firstDone {
+		start = t.docID + 1
+	}
+
+	for i := start; i < uint32(len(t.fileMasks)); i++ {
+		if (t.mask & t.fileMasks[i]) != 0 {
+			return i
+		}
+	}
+	return maxUInt32
+}
+
 // all String methods
 
 func (t *andMatchTree) String() string {
@@ -517,16 +575,12 @@
 		res.Stats.NgramMatches += len(st.cands)
 	}
 
-	var positiveAtoms, fileAtoms []*substrMatchTree
-	collectPositiveSubstrings(mt, func(sq *substrMatchTree) {
-		positiveAtoms = append(positiveAtoms, sq)
-	})
-
 	var regexpAtoms []*regexpMatchTree
 	collectRegexps(mt, func(re *regexpMatchTree) {
 		regexpAtoms = append(regexpAtoms, re)
 	})
 
+	var fileAtoms []*substrMatchTree
 	for st := range atoms {
 		if st.fileName {
 			fileAtoms = append(fileAtoms, st)
@@ -553,14 +607,7 @@
 			}
 		}
 
-		var nextDoc uint32
-		nextDoc = maxUInt32
-		for _, st := range positiveAtoms {
-			if len(st.cands) > 0 && st.cands[0].file < nextDoc {
-				nextDoc = st.cands[0].file
-			}
-		}
-
+		nextDoc := mt.nextDoc()
 		if nextDoc == maxUInt32 {
 			break
 		}
diff --git a/index_test.go b/index_test.go
index c58ee09..e3e4b0e 100644
--- a/index_test.go
+++ b/index_test.go
@@ -25,6 +25,7 @@
 	"unicode"
 	"unicode/utf8"
 
+	"github.com/kylelemons/godebug/pretty"
 	"golang.org/x/net/context"
 
 	"github.com/google/zoekt/query"
@@ -261,10 +262,10 @@
 		NgramMatches:       4,
 		MatchCount:         1,
 		FileCount:          1,
-		FilesConsidered:    3,
+		FilesConsidered:    2,
 	}
-	if !reflect.DeepEqual(sres.Stats, wantStats) {
-		t.Errorf("got stats %#v, want %#v", sres.Stats, wantStats)
+	if diff := pretty.Compare(sres.Stats, wantStats); diff != "" {
+		t.Errorf("got stats diff %s", diff)
 	}
 }
 
@@ -1453,3 +1454,28 @@
 		t.Errorf("got %v, want 0 files", res.Files)
 	}
 }
+
+func TestAndOrUnicode(t *testing.T) {
+	q, err := query.Parse("orange.*apple")
+	if err != nil {
+		t.Errorf("parse: %v", err)
+	}
+	finalQ := query.NewAnd(q,
+		query.NewOr(query.NewAnd(&query.Repo{"name"},
+			query.NewOr(&query.Branch{"master"}))))
+
+	b := testIndexBuilder(t, &Repository{
+		Name:     "name",
+		Branches: []RepositoryBranch{{"master", "master-version"}},
+	}, Document{
+		Name:    "f2",
+		Content: []byte("orange\u2318apple"),
+		// --------------0123456     78901
+		Branches: []string{"master"},
+	})
+
+	res := searchForTest(t, b, finalQ)
+	if len(res.Files) != 1 {
+		t.Errorf("got %v, want 1 result", res.Files)
+	}
+}