blob: 0f84197b39f648d63670b1ecc5d9084df5c1eda7 [file] [log] [blame]
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zoekt
import (
"bytes"
"context"
"fmt"
"reflect"
"regexp/syntax"
"strings"
"testing"
"github.com/kylelemons/godebug/pretty"
"github.com/google/zoekt/query"
)
func clearScores(r *SearchResult) {
for i := range r.Files {
r.Files[i].Score = 0.0
for j := range r.Files[i].LineMatches {
r.Files[i].LineMatches[j].Score = 0.0
}
r.Files[i].Checksum = nil
r.Files[i].Debug = ""
}
}
func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
b, err := NewIndexBuilder(repo)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
for i, d := range docs {
if err := b.Add(d); err != nil {
t.Fatalf("Add %d: %v", i, err)
}
}
return b
}
func TestBoundary(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("x the")},
Document{Name: "f1", Content: []byte("reader")})
res := searchForTest(t, b, &query.Substring{Pattern: "there"})
if len(res.Files) > 0 {
t.Fatalf("got %v, want no matches", res.Files)
}
}
func TestDocSectionInvalid(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
doc := Document{
Name: "f1",
Content: []byte("01234567890123"),
Symbols: []DocumentSection{{5, 8}, {7, 9}},
}
if err := b.Add(doc); err == nil {
t.Errorf("overlapping doc sections should fail")
}
doc = Document{
Name: "f1",
Content: []byte("01234567890123"),
Symbols: []DocumentSection{{0, 20}},
}
if err := b.Add(doc); err == nil {
t.Errorf("doc sections beyond EOF should fail")
}
}
func TestBasic(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{
Name: "f2",
Content: []byte("to carry water in the no later bla"),
// ------------- 0123456789012345678901234567890123456789
})
res := searchForTest(t, b, &query.Substring{
Pattern: "water",
CaseSensitive: true,
})
fmatches := res.Files
if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 matches", fmatches)
}
got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
want := "f2:9"
if got != want {
t.Errorf("1: got %s, want %s", got, want)
}
}
func TestEmptyIndex(t *testing.T) {
b := testIndexBuilder(t, nil)
searcher := searcherForTest(t, b)
var opts SearchOptions
if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
t.Fatalf("Search: %v", err)
}
if _, err := searcher.List(context.Background(), &query.Repo{}); err != nil {
t.Fatalf("List: %v", err)
}
if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
t.Fatalf("Search: %v", err)
}
}
type memSeeker struct {
data []byte
}
func (s *memSeeker) Name() string {
return "memseeker"
}
func (s *memSeeker) Close() {}
func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
return s.data[off : off+sz], nil
}
func (s *memSeeker) Size() (uint32, error) {
return uint32(len(s.data)), nil
}
func TestNewlines(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
matches := sres.Files
want := []FileMatch{{
FileName: "filename",
LineMatches: []LineMatch{
{
LineFragments: []LineFragmentMatch{{
Offset: 8,
LineOffset: 2,
MatchLength: 3,
}},
Line: []byte("line2"),
LineStart: 6,
LineEnd: 11,
LineNumber: 2,
},
},
}}
if !reflect.DeepEqual(matches, want) {
t.Errorf("got %v, want %v", matches, want)
}
}
// A result spanning multiple lines should have LineMatches that only cover
// single lines.
func TestQueryNewlines(t *testing.T) {
text := "line1\nline2\nbla"
b := testIndexBuilder(t, nil,
Document{Name: "filename", Content: []byte(text)})
sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
matches := sres.Files
if len(matches) != 1 {
t.Fatalf("got %d file matches, want exactly one", len(matches))
}
m := matches[0]
if len(m.LineMatches) != 2 {
t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
}
}
func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
searcher := searcherForTest(t, b)
var opts SearchOptions
if len(o) > 0 {
opts = o[0]
}
res, err := searcher.Search(context.Background(), q, &opts)
if err != nil {
t.Fatalf("Search(%s): %v", q, err)
}
clearScores(res)
return res
}
func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
var buf bytes.Buffer
b.Write(&buf)
f := &memSeeker{buf.Bytes()}
searcher, err := NewSearcher(f)
if err != nil {
t.Fatalf("NewSearcher: %v", err)
}
return searcher
}
func TestFileBasedSearch(t *testing.T) {
c1 := []byte("I love bananas without skin")
// -----------0123456789012345678901234567890123456789
c2 := []byte("In Dutch, ananas means pineapple")
// -----------0123456789012345678901234567890123456789
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: c1},
Document{Name: "f2", Content: c2},
)
sres := searchForTest(t, b, &query.Substring{
CaseSensitive: false,
Pattern: "ananas",
})
matches := sres.Files
if len(matches) != 2 {
t.Fatalf("got %v, want 2 matches", matches)
}
if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
t.Fatalf("got %v, want matches {f1,f2}", matches)
}
if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
t.Fatalf("got %#v, want offsets 10,8", matches)
}
}
func TestCaseFold(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
// ---------- 012345678901234567890123456
)
sres := searchForTest(t, b, &query.Substring{
Pattern: "bananas",
CaseSensitive: true,
})
matches := sres.Files
if len(matches) != 0 {
t.Errorf("foldcase: got %#v, want 0 matches", matches)
}
sres = searchForTest(t, b,
&query.Substring{
Pattern: "BaNaNAS",
CaseSensitive: true,
})
matches = sres.Files
if len(matches) != 1 {
t.Errorf("no foldcase: got %v, want 1 matches", matches)
} else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
t.Errorf("foldcase: got %v, want offsets 7", matches)
}
}
func TestAndSearch(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("f1", []byte("x banana y"))
b.AddFile("f2", []byte("x apple y"))
b.AddFile("f3", []byte("x banana apple y"))
// ---------------------0123456789012345
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "banana",
},
&query.Substring{
Pattern: "apple",
},
))
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
}
if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
t.Fatalf("got %#v, want offsets 2,9", matches)
}
wantStats := Stats{
FilesLoaded: 1,
ContentBytesLoaded: 18,
IndexBytesLoaded: 8,
NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
MatchCount: 1,
FileCount: 1,
FilesConsidered: 2,
}
if diff := pretty.Compare(wantStats, sres.Stats); diff != "" {
t.Errorf("got stats diff %s", diff)
}
}
func TestAndNegateSearch(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("f1", []byte("x banana y"))
b.AddFile("f4", []byte("x banana apple y"))
// ---------------------0123456789012345
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "banana",
},
&query.Not{Child: &query.Substring{
Pattern: "apple",
}}))
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", matches)
}
if matches[0].FileName != "f1" {
t.Fatalf("got match %#v, want FileName: f1", matches[0])
}
if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
t.Fatalf("got %v, want offsets 2,9", matches)
}
}
func TestNegativeMatchesOnlyShortcut(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("f1", []byte("x banana y"))
b.AddFile("f2", []byte("x appelmoes y"))
b.AddFile("f3", []byte("x appelmoes y"))
b.AddFile("f3", []byte("x appelmoes y"))
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "banana",
},
&query.Not{Child: &query.Substring{
Pattern: "appel",
}}))
if sres.Stats.FilesConsidered != 1 {
t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
}
}
func TestFileSearch(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("x orange y"))
// --------0123456
b.AddFile("banana", []byte("x apple y"))
// --------789012
sres := searchForTest(t, b, &query.Substring{
Pattern: "anan",
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", matches)
}
got := matches[0].LineMatches[0]
want := LineMatch{
Line: []byte("banana"),
LineFragments: []LineFragmentMatch{{
Offset: 1,
LineOffset: 1,
MatchLength: 4,
}},
FileName: true,
}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %#v, want %#v", got, want)
}
}
func TestFileCase(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("BANANA", []byte("x orange y"))
sres := searchForTest(t, b, &query.Substring{
Pattern: "banana",
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || matches[0].FileName != "BANANA" {
t.Fatalf("got %v, want 1 match 'BANANA'", matches)
}
}
func TestFileRegexpSearchBruteForce(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("x orange y"))
// --------------------------0123456879
b.AddFile("banana", []byte("x apple y"))
sres := searchForTest(t, b, &query.Regexp{
Regexp: mustParseRE("[qn][zx]"),
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || matches[0].FileName != "banzana" {
t.Fatalf("got %v, want 1 match on 'banzana'", matches)
}
}
func TestFileRegexpSearchShortString(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banana.py", []byte("x orange y"))
sres := searchForTest(t, b, &query.Regexp{
Regexp: mustParseRE("ana.py"),
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || matches[0].FileName != "banana.py" {
t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
}
}
func TestFileSubstringSearchBruteForce(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("BANZANA", []byte("x orange y"))
b.AddFile("banana", []byte("x apple y"))
q := &query.Substring{
Pattern: "z",
FileName: true,
}
res := searchForTest(t, b, q)
if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
}
}
func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("BANZANA", []byte("x orange y"))
b.AddFile("bananaq", []byte("x apple y"))
q := &query.Substring{
Pattern: "q",
FileName: true,
}
res := searchForTest(t, b, q)
if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
t.Fatalf("got %v, want 1 match in %q", res.Files, want)
}
}
func TestSearchMatchAll(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("x orange y"))
// --------------------------0123456879
b.AddFile("banana", []byte("x apple y"))
sres := searchForTest(t, b, &query.Const{Value: true})
matches := sres.Files
if len(matches) != 2 {
t.Fatalf("got %v, want 2 matches", matches)
}
}
func TestSearchNewline(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("abcd\ndefg"))
sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
// Just check that we don't crash.
matches := sres.Files
if len(matches) != 1 {
t.Fatalf("got %v, want 1 matches", matches)
}
}
func TestSearchMatchAllRegexp(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("abcd"))
// --------------------------0123456879
b.AddFile("banana", []byte("pqrs"))
sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
matches := sres.Files
if len(matches) != 2 || sres.Stats.MatchCount != 2 {
t.Fatalf("got %v, want 2 matches", matches)
}
if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
t.Fatalf("want 4 chars in every file, got %#v", matches)
}
}
func TestFileRestriction(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banana1", []byte("x orange y"))
// --------------------------0123456879
b.AddFile("banana2", []byte("x apple y"))
b.AddFile("orange", []byte("x apple y"))
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "banana",
FileName: true,
},
&query.Substring{
Pattern: "apple",
}))
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", matches)
}
match := matches[0].LineMatches[0]
got := string(match.Line)
want := "x apple y"
if got != want {
t.Errorf("got match %#v, want line %q", match, want)
}
}
func TestFileNameBoundary(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "banana2", Content: []byte("x apple y")},
Document{Name: "helpers.go", Content: []byte("x apple y")},
Document{Name: "foo", Content: []byte("x apple y")})
sres := searchForTest(t, b, &query.Substring{
Pattern: "helpers.go",
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", matches)
}
}
func TestWordBoundaryRanking(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("xbytex xbytex")},
Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
// -----------------------------------0123456 789012 34567890
Document{Name: "f3", Content: []byte("xbytex ybytex")})
sres := searchForTest(t, b, &query.Substring{
Pattern: "byte",
})
if len(sres.Files) != 3 {
t.Fatalf("got %#v, want 3 files", sres.Files)
}
file0 := sres.Files[0]
if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
}
if file0.LineMatches[0].LineFragments[0].Offset != 13 {
t.Fatalf("got first match %#v, want full word match", sres.Files[0].LineMatches[0])
}
if file0.LineMatches[1].LineFragments[0].Offset != 7 {
t.Fatalf("got second match %#v, want partial word match", sres.Files[0].LineMatches[1])
}
}
func TestDocumentOrder(t *testing.T) {
var docs []Document
for i := 0; i < 3; i++ {
docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
}
b := testIndexBuilder(t, nil, docs...)
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "needle",
}))
want := []string{"f0", "f1", "f2"}
var got []string
for _, f := range sres.Files {
got = append(got, f.FileName)
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("got %v, want %v", got, want)
}
}
func TestBranchMask(t *testing.T) {
b := testIndexBuilder(t, &Repository{
Branches: []RepositoryBranch{
{"master", "v-master"},
{"stable", "v-stable"},
{"bonzai", "v-bonzai"},
},
}, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
)
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "needle",
},
&query.Branch{
Pattern: "table",
}))
if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
}
if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
}
}
func TestBranchLimit(t *testing.T) {
for limit := 64; limit <= 65; limit++ {
r := &Repository{}
for i := 0; i < limit; i++ {
s := fmt.Sprintf("b%d", i)
r.Branches = append(r.Branches, RepositoryBranch{
s, "v-" + s,
})
}
_, err := NewIndexBuilder(r)
if limit == 64 && err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
} else if limit == 65 && err == nil {
t.Fatalf("NewIndexBuilder succeeded")
}
}
}
func TestBranchReport(t *testing.T) {
branches := []string{"stable", "master"}
b := testIndexBuilder(t, &Repository{
Branches: []RepositoryBranch{
{"stable", "vs"},
{"master", "vm"},
},
},
Document{Name: "f2", Content: []byte("needle"), Branches: branches})
sres := searchForTest(t, b, &query.Substring{
Pattern: "needle",
})
if len(sres.Files) != 1 {
t.Fatalf("got %v, want 1 result from f2", sres.Files)
}
f := sres.Files[0]
if !reflect.DeepEqual(f.Branches, branches) {
t.Fatalf("got branches %q, want %q", f.Branches, branches)
}
}
func TestBranchVersions(t *testing.T) {
b := testIndexBuilder(t, &Repository{
Branches: []RepositoryBranch{
{"stable", "v-stable"},
{"master", "v-master"},
},
}, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
sres := searchForTest(t, b, &query.Substring{
Pattern: "needle",
})
if len(sres.Files) != 1 {
t.Fatalf("got %v, want 1 result from f2", sres.Files)
}
f := sres.Files[0]
if f.Version != "v-master" {
t.Fatalf("got file %#v, want version 'v-master'", f)
}
}
func mustParseRE(s string) *syntax.Regexp {
r, err := syntax.Parse(s, 0)
if err != nil {
panic(err)
}
return r
}
func TestRegexp(t *testing.T) {
content := []byte("needle the bla")
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
})
// ------------------------------01234567890123
sres := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("dle.*bla"),
})
if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
}
got := sres.Files[0].LineMatches[0]
want := LineMatch{
LineFragments: []LineFragmentMatch{{
LineOffset: 3,
Offset: 3,
MatchLength: 11,
}},
Line: content,
FileName: false,
LineNumber: 1,
LineStart: 0,
LineEnd: 14,
}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %#v, want %#v", got, want)
}
}
func TestRegexpFile(t *testing.T) {
content := []byte("needle the bla")
// ----------------01234567890123
name := "let's play: find the mussel"
b := testIndexBuilder(t, nil,
Document{Name: name, Content: content},
Document{Name: "play.txt", Content: content})
sres := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("play.*mussel"),
FileName: true,
})
if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
}
if sres.Files[0].FileName != name {
t.Errorf("got match %#v, want name %q", sres.Files[0], name)
}
}
func TestRegexpOrder(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
sres := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("dle.*bla"),
})
if len(sres.Files) != 0 {
t.Fatalf("got %v, want 0 matches", sres.Files)
}
}
func TestRepoName(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, &Repository{Name: "bla"},
Document{Name: "f1", Content: content})
sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Repo{Pattern: "foo"},
))
if len(sres.Files) != 0 {
t.Fatalf("got %v, want 0 matches", sres.Files)
}
if sres.Stats.FilesConsidered > 0 {
t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
}
sres = searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Repo{Pattern: "bla"},
))
if len(sres.Files) != 1 {
t.Fatalf("got %v, want 1 match", sres.Files)
}
}
func TestMergeMatches(t *testing.T) {
content := []byte("blablabla")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
sres := searchForTest(t, b,
&query.Substring{Pattern: "bla"})
if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", sres.Files)
}
}
func TestRepoURL(t *testing.T) {
content := []byte("blablabla")
b := testIndexBuilder(t, &Repository{
Name: "name",
URL: "URL",
CommitURLTemplate: "commit",
FileURLTemplate: "file-url",
LineFragmentTemplate: "fragment",
}, Document{Name: "f1", Content: content})
sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
if sres.RepoURLs["name"] != "file-url" {
t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
}
if sres.LineFragments["name"] != "fragment" {
t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
}
}
func TestRegexpCaseSensitive(t *testing.T) {
content := []byte("bla\nfunc unmarshalGitiles\n")
b := testIndexBuilder(t, nil, Document{
Name: "f1",
Content: content,
})
res := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("func.*Gitiles"),
CaseSensitive: true,
})
if len(res.Files) != 1 {
t.Fatalf("got %v, want one match", res.Files)
}
}
func TestRegexpCaseFolding(t *testing.T) {
content := []byte("bla\nfunc unmarshalGitiles\n")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
res := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("func.*GITILES"),
CaseSensitive: false,
})
if len(res.Files) != 1 {
t.Fatalf("got %v, want one match", res.Files)
}
}
func TestCaseRegexp(t *testing.T) {
content := []byte("BLABLABLA")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
res := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("[xb][xl][xa]"),
CaseSensitive: true,
})
if len(res.Files) > 0 {
t.Fatalf("got %v, want no matches", res.Files)
}
}
func TestNegativeRegexp(t *testing.T) {
content := []byte("BLABLABLA needle bla")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
res := searchForTest(t, b,
query.NewAnd(
&query.Substring{
Pattern: "needle",
},
&query.Not{
Child: &query.Regexp{
Regexp: mustParseRE(".cs"),
},
}))
if len(res.Files) != 1 {
t.Fatalf("got %v, want 1 match", res.Files)
}
}
func TestSymbolRank(t *testing.T) {
content := []byte("func bla() blubxxxxx")
// ----------------01234567890123456789
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
}, Document{
Name: "f2",
Content: content,
Symbols: []DocumentSection{{5, 8}},
}, Document{
Name: "f3",
Content: content,
})
res := searchForTest(t, b,
&query.Substring{
CaseSensitive: false,
Pattern: "bla",
})
if len(res.Files) != 3 {
t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
}
if res.Files[0].FileName != "f2" {
t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
}
}
func TestSymbolRankRegexpUTF8(t *testing.T) {
prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
content := []byte(prefix +
"func bla() blub")
// ------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
}, Document{
Name: "f2",
Content: content,
Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
}, Document{
Name: "f3",
Content: content,
})
res := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("b.a"),
})
if len(res.Files) != 3 {
t.Fatalf("got %#v, want 3 files", res.Files)
}
if res.Files[0].FileName != "f2" {
t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
}
}
func TestPartialSymbolRank(t *testing.T) {
content := []byte("func bla() blub")
// ----------------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
Symbols: []DocumentSection{{4, 9}},
}, Document{
Name: "f2",
Content: content,
Symbols: []DocumentSection{{4, 8}},
}, Document{
Name: "f3",
Content: content,
Symbols: []DocumentSection{{4, 9}},
})
res := searchForTest(t, b,
&query.Substring{
Pattern: "bla",
})
if len(res.Files) != 3 {
t.Fatalf("got %#v, want 3 files", res.Files)
}
if res.Files[0].FileName != "f2" {
t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
}
}
func TestNegativeRepo(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, &Repository{
Name: "bla",
}, Document{Name: "f1", Content: content})
sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Not{Child: &query.Repo{Pattern: "bla"}},
))
if len(sres.Files) != 0 {
t.Fatalf("got %v, want 0 matches", sres.Files)
}
}
func TestListRepos(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, &Repository{
Name: "reponame",
},
Document{Name: "f1", Content: content},
Document{Name: "f2", Content: content})
searcher := searcherForTest(t, b)
q := &query.Repo{Pattern: "epo"}
res, err := searcher.List(context.Background(), q)
if err != nil {
t.Fatalf("List(%v): %v", q, err)
}
if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
t.Fatalf("got %v, want 1 matches", res)
}
if got := res.Repos[0].Stats.Shards; got != 1 {
t.Fatalf("got %d, want 1 shard", got)
}
q = &query.Repo{Pattern: "bla"}
res, err = searcher.List(context.Background(), q)
if err != nil {
t.Fatalf("List(%v): %v", q, err)
}
if len(res.Repos) != 0 {
t.Fatalf("got %v, want 0 matches", res)
}
}
func TestMetadata(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, &Repository{
Name: "reponame",
}, Document{Name: "f1", Content: content},
Document{Name: "f2", Content: content})
var buf bytes.Buffer
b.Write(&buf)
f := &memSeeker{buf.Bytes()}
rd, _, err := ReadMetadata(f)
if err != nil {
t.Fatalf("ReadMetadata: %v", err)
}
if got, want := rd.Name, "reponame"; got != want {
t.Fatalf("got %q want %q", got, want)
}
}
func TestOr(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("needle")},
Document{Name: "f2", Content: []byte("banana")})
sres := searchForTest(t, b, query.NewOr(
&query.Substring{Pattern: "needle"},
&query.Substring{Pattern: "banana"}))
if len(sres.Files) != 2 {
t.Fatalf("got %v, want 2 files", sres.Files)
}
}
func TestAtomCountScore(t *testing.T) {
b := testIndexBuilder(t,
&Repository{
Branches: []RepositoryBranch{
{"branches", "v1"},
{"needle", "v2"},
},
},
Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
sres := searchForTest(t, b,
query.NewOr(
&query.Substring{Pattern: "needle"},
&query.Substring{Pattern: "needle", FileName: true},
&query.Branch{Pattern: "needle"},
))
var got []string
for _, f := range sres.Files {
got = append(got, f.FileName)
}
want := []string{"needle-file-branch", "needle-file", "f1"}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v, want %v", got, want)
}
}
func TestImportantCutoff(t *testing.T) {
content := []byte("func bla() blub")
// ----------------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
Symbols: []DocumentSection{{5, 8}},
}, Document{
Name: "f2",
Content: content,
})
opts := SearchOptions{
ShardMaxImportantMatch: 1,
}
sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}, opts)
if len(sres.Files) != 1 || sres.Files[0].FileName != "f1" {
t.Errorf("got %v, wanted 1 match 'f1'", sres.Files)
}
}
func TestFrequency(t *testing.T) {
content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
// ----------------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
})
sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
if len(sres.Files) != 0 {
t.Errorf("got %v, wanted 0 matches", sres.Files)
}
}
func TestMatchNewline(t *testing.T) {
re, err := syntax.Parse("[^a]a", syntax.ClassNL)
if err != nil {
t.Fatalf("syntax.Parse: %v", err)
}
content := []byte("pqr\nalex")
// ----------------0123 4567
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
})
sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
if len(sres.Files) != 1 {
t.Errorf("got %v, wanted 1 matches", sres.Files)
} else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
t.Errorf("got match line %q, want %q", l, content)
}
}
func TestSubRepo(t *testing.T) {
subRepos := map[string]*Repository{
"sub": {
Name: "sub-name",
LineFragmentTemplate: "sub-line",
},
}
content := []byte("pqr\nalex")
// ----------------0123 4567
b := testIndexBuilder(t, &Repository{
SubRepoMap: subRepos,
}, Document{
Name: "sub/f1",
Content: content,
SubRepositoryPath: "sub",
})
sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
if len(sres.Files) != 1 {
t.Fatalf("got %v, wanted 1 matches", sres.Files)
}
f := sres.Files[0]
if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
}
if sres.LineFragments["sub-name"] != "sub-line" {
t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
}
}
func TestSearchEither(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("bla needle bla")},
Document{Name: "needle-file-branch", Content: []byte("bla content")})
sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
if len(sres.Files) != 2 {
t.Fatalf("got %v, wanted 2 matches", sres.Files)
}
sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
if len(sres.Files) != 1 {
t.Fatalf("got %v, wanted 1 match", sres.Files)
}
if got, want := sres.Files[0].FileName, "f1"; got != want {
t.Errorf("got %q, want %q", got, want)
}
}
func TestUnicodeExactMatch(t *testing.T) {
needle := "néédlÉ"
content := []byte("blá blá " + needle + " blâ")
// ----------------01234567 8
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
}
}
func TestUnicodeCoverContent(t *testing.T) {
needle := "néédlÉ"
content := []byte("blá blá " + needle + " blâ")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
}
res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
if len(res.Files) != 1 {
t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
}
if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
t.Errorf("got %d want %d", got, want)
}
}
func TestUnicodeNonCoverContent(t *testing.T) {
needle := "nééáádlÉ"
//---------01234567
content := []byte("blá blá " + needle + " blâ")
// ----------------01234567 8901234 5678
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
if len(res.Files) != 1 {
t.Fatalf("got %v, wanted 1 match", res.Files)
}
if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
t.Errorf("got %d want %d", got, want)
}
}
const kelvinCodePoint = 8490
func TestUnicodeVariableLength(t *testing.T) {
lower := 'k'
upper := rune(kelvinCodePoint)
needle := "nee" + string([]rune{lower}) + "eed"
corpus := []byte("nee" + string([]rune{upper}) + "eed" +
" ee" + string([]rune{lower}) + "ee" +
" ee" + string([]rune{upper}) + "ee")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte(corpus)})
res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
if len(res.Files) != 1 {
t.Fatalf("got %v, wanted 1 match", res.Files)
}
}
func TestUnicodeFileStartOffsets(t *testing.T) {
unicode := "世界"
wat := "waaaaaat"
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(unicode),
},
Document{
Name: "f2",
Content: []byte(wat),
},
)
q := &query.Substring{Pattern: wat, Content: true}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Fatalf("got %v, wanted 1 match", res.Files)
}
}
func TestLongFileUTF8(t *testing.T) {
needle := "neeedle"
// 6 bytes.
unicode := "世界"
content := []byte(strings.Repeat(unicode, 100) + needle)
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(strings.Repeat("a", 50)),
},
Document{
Name: "f2",
Content: content,
})
q := &query.Substring{Pattern: needle, Content: true}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Errorf("got %v, want 1 result", res)
}
}
func TestEstimateDocCount(t *testing.T) {
content := []byte("bla needle bla")
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: content},
Document{Name: "f2", Content: content},
)
if sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Repo{Pattern: "reponame"},
), SearchOptions{
EstimateDocCount: true,
}); sres.Stats.ShardFilesConsidered != 2 {
t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
}
if sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Repo{Pattern: "nomatch"},
), SearchOptions{
EstimateDocCount: true,
}); sres.Stats.ShardFilesConsidered != 0 {
t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
}
}
func TestUTF8CorrectCorpus(t *testing.T) {
needle := "neeedle"
// 6 bytes.
unicode := "世界"
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(strings.Repeat(unicode, 100)),
},
Document{
Name: "xxxxxneeedle",
Content: []byte("hello"),
})
q := &query.Substring{Pattern: needle, FileName: true}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Errorf("got %v, want 1 result", res)
}
}
func TestBuilderStats(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(strings.Repeat("abcd", 1024)),
})
var buf bytes.Buffer
b.Write(&buf)
if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
t.Errorf("got %d, want %d", got, want)
}
}
func TestIOStats(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(strings.Repeat("abcd", 1024)),
})
q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
res := searchForTest(t, b, q)
// 4096 (content) + 2 (overhead: newlines or doc sections)
if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
t.Errorf("got content I/O %d, want %d", got, want)
}
// 1024 entries, each 4 bytes apart. 4 fits into single byte
// delta encoded.
if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
t.Errorf("got index I/O %d, want %d", got, want)
}
}
func TestStartLineAnchor(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(
`hello
start of middle of line
`),
})
q, err := query.Parse("^start")
if err != nil {
t.Errorf("parse: %v", err)
}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Errorf("got %v, want 1 file", res.Files)
}
q, err = query.Parse("^middle")
if err != nil {
t.Errorf("parse: %v", err)
}
res = searchForTest(t, b, q)
if len(res.Files) != 0 {
t.Errorf("got %v, want 0 files", res.Files)
}
}
func TestAndOrUnicode(t *testing.T) {
q, err := query.Parse("orange.*apple")
if err != nil {
t.Errorf("parse: %v", err)
}
finalQ := query.NewAnd(q,
query.NewOr(query.NewAnd(&query.Repo{Pattern: "name"},
query.NewOr(&query.Branch{Pattern: "master"}))))
b := testIndexBuilder(t, &Repository{
Name: "name",
Branches: []RepositoryBranch{{"master", "master-version"}},
}, Document{
Name: "f2",
Content: []byte("orange\u2318apple"),
// --------------0123456 78901
Branches: []string{"master"},
})
res := searchForTest(t, b, finalQ)
if len(res.Files) != 1 {
t.Errorf("got %v, want 1 result", res.Files)
}
}
func TestAndShort(t *testing.T) {
content := []byte("bla needle at orange bla")
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: content},
Document{Name: "f2", Content: []byte("xx at xx")},
Document{Name: "f3", Content: []byte("yy orange xx")},
)
q := query.NewAnd(&query.Substring{Pattern: "at"},
&query.Substring{Pattern: "orange"})
res := searchForTest(t, b, q)
if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
t.Errorf("got %v, want 1 result", res.Files)
}
}
func TestNoCollectRegexpSubstring(t *testing.T) {
content := []byte("bla final bla\nfoo final, foo")
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: content},
)
q := &query.Regexp{
Regexp: mustParseRE("final[,.]"),
}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Fatalf("got %v, want 1 result", res.Files)
}
if f := res.Files[0]; len(f.LineMatches) != 1 {
t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
}
}
func printLineMatches(ms []LineMatch) string {
var ss []string
for _, m := range ms {
ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
}
return strings.Join(ss, ", ")
}
func TestLang(t *testing.T) {
content := []byte("bla needle bla")
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: content},
Document{Name: "f2", Language: "java", Content: content},
Document{Name: "f3", Language: "cpp", Content: content},
)
q := query.NewAnd(&query.Substring{Pattern: "needle"},
&query.Language{Language: "cpp"})
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Fatalf("got %v, want 1 result in f3", res.Files)
}
f := res.Files[0]
if f.FileName != "f3" || f.Language != "cpp" {
t.Fatalf("got %v, want 1 match with language cpp", f)
}
}
func TestLangShortcut(t *testing.T) {
content := []byte("bla needle bla")
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f2", Language: "java", Content: content},
Document{Name: "f3", Language: "cpp", Content: content},
)
q := query.NewAnd(&query.Substring{Pattern: "needle"},
&query.Language{Language: "fortran"})
res := searchForTest(t, b, q)
if len(res.Files) != 0 {
t.Fatalf("got %v, want 0 results", res.Files)
}
if res.Stats.IndexBytesLoaded > 0 {
t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
}
}
func TestNoTextMatchAtoms(t *testing.T) {
content := []byte("bla needle bla")
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: content},
Document{Name: "f2", Language: "java", Content: content},
Document{Name: "f3", Language: "cpp", Content: content},
)
q := query.NewAnd(&query.Language{Language: "java"})
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Fatalf("got %v, want 1 result in f3", res.Files)
}
}
func TestNoPositiveAtoms(t *testing.T) {
content := []byte("bla needle bla")
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: content},
Document{Name: "f2", Content: content},
)
q := query.NewAnd(
&query.Not{Child: &query.Substring{Pattern: "xyz"}},
&query.Repo{Pattern: "reponame"})
res := searchForTest(t, b, q)
if len(res.Files) != 2 {
t.Fatalf("got %v, want 2 results in f3", res.Files)
}
}
func TestSymbolBoundaryStart(t *testing.T) {
content := []byte("start\nbla bla\nend")
// ----------------012345 67890123 456
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{
Name: "f1",
Content: content,
Symbols: []DocumentSection{{0, 5}, {14, 17}},
},
)
q := &query.Symbol{
Atom: &query.Substring{Pattern: "start"},
}
res := searchForTest(t, b, q)
if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 line in 1 file", res.Files)
}
m := res.Files[0].LineMatches[0].LineFragments[0]
if m.Offset != 0 {
t.Fatalf("got offset %d want 0", m.Offset)
}
}
func TestSymbolBoundaryEnd(t *testing.T) {
content := []byte("start\nbla bla\nend")
// ----------------012345 67890123 456
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{
Name: "f1",
Content: content,
Symbols: []DocumentSection{{14, 17}},
},
)
q := &query.Symbol{
Atom: &query.Substring{Pattern: "end"},
}
res := searchForTest(t, b, q)
if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 line in 1 file", res.Files)
}
m := res.Files[0].LineMatches[0].LineFragments[0]
if m.Offset != 14 {
t.Fatalf("got offset %d want 0", m.Offset)
}
}
func TestSymbolAtom(t *testing.T) {
content := []byte("bla\nsymblabla\nbla")
// ----------------0123 456789012
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{
Name: "f1",
Content: content,
Symbols: []DocumentSection{{4, 12}},
},
)
q := &query.Symbol{
Atom: &query.Substring{Pattern: "bla"},
}
res := searchForTest(t, b, q)
if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 line in 1 file", res.Files)
}
m := res.Files[0].LineMatches[0].LineFragments[0]
if m.Offset != 7 || m.MatchLength != 3 {
t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
}
}
func TestSymbolAtomExact(t *testing.T) {
content := []byte("bla\nsym\nbla\nsym\nasymb")
// ----------------0123 4567 89012
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{
Name: "f1",
Content: content,
Symbols: []DocumentSection{{4, 7}},
},
)
q := &query.Symbol{
Atom: &query.Substring{Pattern: "sym"},
}
res := searchForTest(t, b, q)
if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 line in 1 file", res.Files)
}
m := res.Files[0].LineMatches[0].LineFragments[0]
if m.Offset != 4 {
t.Fatalf("got offset %d, want 7", m.Offset)
}
}
func TestHitIterTerminate(t *testing.T) {
// contrived input: trigram frequencies forces selecting abc +
// def for the distance iteration. There is no match, so this
// will advance the compressedPostingIterator to beyond the
// end.
content := []byte("abc bcdbcd cdecde abcabc def efg")
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
},
)
searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
}
func TestDistanceHitIterBailLast(t *testing.T) {
content := []byte("AST AST AST UASH")
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
},
)
res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
if len(res.Files) != 0 {
t.Fatalf("got %v, want no results", res.Files)
}
}
func TestDocumentSectionRuneBoundary(t *testing.T) {
content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
for i, sec := range []DocumentSection{
{2, 6},
{3, 7},
} {
if err := b.Add(Document{
Name: "f1",
Content: []byte(content),
Symbols: []DocumentSection{sec},
}); err == nil {
t.Errorf("%d: Add succeeded", i)
}
}
}
func TestUnicodeQuery(t *testing.T) {
content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(content),
},
)
q := &query.Substring{Pattern: content}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Fatalf("want 1 match, got %v", res.Files)
}
f := res.Files[0]
if len(f.LineMatches) != 1 {
t.Fatalf("want 1 line, got %v", f.LineMatches)
}
l := f.LineMatches[0]
if len(l.LineFragments) != 1 {
t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
}
fr := l.LineFragments[0]
if fr.MatchLength != len(content) {
t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
}
}
func TestSkipInvalidContent(t *testing.T) {
for _, content := range []string{
// Binary
"abc def \x00 abc",
} {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
if err := b.Add(Document{
Name: "f1",
Content: []byte(content),
}); err != nil {
t.Fatal(err)
}
q := &query.Substring{Pattern: "abc def"}
res := searchForTest(t, b, q)
if len(res.Files) != 0 {
t.Fatalf("got %v, want no results", res.Files)
}
q = &query.Substring{Pattern: "NOT-INDEXED"}
res = searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Fatalf("got %v, want 1 result", res.Files)
}
}
}
func TestCheckText(t *testing.T) {
for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
if err := CheckText([]byte(text), 20000); err != nil {
t.Errorf("CheckText(%q): %v", text, err)
}
}
for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
if err := CheckText([]byte(text), 15); err == nil {
t.Errorf("CheckText(%q) succeeded", text)
}
}
}
func TestLineAnd(t *testing.T) {
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
Document{Name: "f2", Content: []byte("apple orange\nbanana")},
Document{Name: "f3", Content: []byte("banana grape")},
)
pattern := "(apple)(?-s:.)*?(banana)"
r, _ := syntax.Parse(pattern, syntax.Perl)
q := query.Regexp{
Regexp: r,
Content: true,
}
res := searchForTest(t, b, &q)
wantRegexpCount := 1
if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
}
if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
t.Errorf("got %v, want 1 result", res.Files)
}
}
func TestLineAndFileName(t *testing.T) {
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: []byte("apple banana\ngrape")},
Document{Name: "f2", Content: []byte("apple banana\norange")},
Document{Name: "apple banana", Content: []byte("banana grape")},
)
pattern := "(apple)(?-s:.)*?(banana)"
r, _ := syntax.Parse(pattern, syntax.Perl)
q := query.Regexp{
Regexp: r,
FileName: true,
}
res := searchForTest(t, b, &q)
wantRegexpCount := 1
if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
}
if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
t.Errorf("got %v, want 1 result", res.Files)
}
}
func TestMultiLineRegex(t *testing.T) {
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: []byte("apple banana\ngrape")},
Document{Name: "f2", Content: []byte("apple orange")},
Document{Name: "f3", Content: []byte("grape apple")},
)
pattern := "(apple).*?[[:space:]].*?(grape)"
r, _ := syntax.Parse(pattern, syntax.Perl)
q := query.Regexp{
Regexp: r,
}
res := searchForTest(t, b, &q)
wantRegexpCount := 2
if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
}
if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
t.Errorf("got %v, want 1 result", res.Files)
}
}