blob: d98020b511ba76a741ea259cbe1cc2fa9578743b [file] [log] [blame]
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zoekt
import (
"bytes"
"fmt"
"log"
"reflect"
"regexp/syntax"
"strings"
"testing"
"unicode"
"unicode/utf8"
"golang.org/x/net/context"
"github.com/google/zoekt/query"
)
func clearScores(r *SearchResult) {
for i := range r.Files {
r.Files[i].Score = 0.0
for j := range r.Files[i].LineMatches {
r.Files[i].LineMatches[j].Score = 0.0
}
}
}
func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
b, err := NewIndexBuilder(repo)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
for i, d := range docs {
if err := b.Add(d); err != nil {
t.Fatalf("Add %d: %v", i, err)
}
}
return b
}
func TestBoundary(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("x the")},
Document{Name: "f1", Content: []byte("reader")})
res := searchForTest(t, b, &query.Substring{Pattern: "there"})
if len(res.Files) > 0 {
t.Fatalf("got %v, want no matches", res.Files)
}
}
var _ = log.Println
func TestBasic(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{
Name: "f2",
Content: []byte("to carry water in the no later bla"),
// ------------- 0123456789012345678901234567890123456789
})
res := searchForTest(t, b, &query.Substring{Pattern: "water"})
fmatches := res.Files
if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 matches", fmatches)
}
got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
want := "f2:9"
if got != want {
t.Errorf("1: got %s, want %s", got, want)
}
}
func TestEmptyIndex(t *testing.T) {
b := testIndexBuilder(t, nil)
searcher := searcherForTest(t, b)
var opts SearchOptions
if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
t.Fatalf("Search: %v", err)
}
if _, err := searcher.List(context.Background(), &query.Repo{}); err != nil {
t.Fatalf("List: %v", err)
}
if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
t.Fatalf("Search: %v", err)
}
}
type memSeeker struct {
data []byte
}
func (s *memSeeker) Name() string {
return "memseeker"
}
func (s *memSeeker) Close() {}
func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
return s.data[off : off+sz], nil
}
func (s *memSeeker) Size() (uint32, error) {
return uint32(len(s.data)), nil
}
func TestNewlines(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
matches := sres.Files
want := []FileMatch{{
FileName: "filename",
LineMatches: []LineMatch{
{
LineFragments: []LineFragmentMatch{{
Offset: 8,
LineOffset: 2,
MatchLength: 3,
}},
Line: []byte("line2"),
LineStart: 6,
LineEnd: 11,
LineNumber: 2,
},
}}}
if !reflect.DeepEqual(matches, want) {
t.Errorf("got %v, want %v", matches, want)
}
}
func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
searcher := searcherForTest(t, b)
var opts SearchOptions
if len(o) > 0 {
opts = o[0]
}
res, err := searcher.Search(context.Background(), q, &opts)
if err != nil {
t.Fatalf("Search(%s): %v", q, err)
}
clearScores(res)
return res
}
func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
var buf bytes.Buffer
b.Write(&buf)
f := &memSeeker{buf.Bytes()}
searcher, err := NewSearcher(f)
if err != nil {
t.Fatalf("NewSearcher: %v", err)
}
return searcher
}
func TestFileBasedSearch(t *testing.T) {
c1 := []byte("I love bananas without skin")
// -----------0123456789012345678901234567890123456789
c2 := []byte("In Dutch, ananas means pineapple")
// -----------0123456789012345678901234567890123456789
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: c1},
Document{Name: "f2", Content: c2},
)
sres := searchForTest(t, b, &query.Substring{Pattern: "ananas"})
matches := sres.Files
if len(matches) != 2 {
t.Fatalf("got %v, want 2 matches", matches)
}
if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
t.Fatalf("got %v, want matches {f1,f2}", matches)
}
if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
t.Fatalf("got %#v, want offsets 10,8", matches)
}
}
func TestCaseFold(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
// ---------- 012345678901234567890123456
)
sres := searchForTest(t, b, &query.Substring{
Pattern: "bananas",
CaseSensitive: true,
})
matches := sres.Files
if len(matches) != 0 {
t.Errorf("foldcase: got %#v, want 0 matches", matches)
}
sres = searchForTest(t, b,
&query.Substring{
Pattern: "BaNaNAS",
CaseSensitive: true,
})
matches = sres.Files
if len(matches) != 1 {
t.Errorf("no foldcase: got %v, want 1 matches", matches)
} else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
t.Errorf("foldcase: got %v, want offsets 7", matches)
}
}
func TestAndSearch(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("f1", []byte("x banana y"))
b.AddFile("f2", []byte("x apple y"))
b.AddFile("f3", []byte("x banana apple y"))
// ---------------------0123456789012345
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "banana",
},
&query.Substring{
Pattern: "apple",
},
))
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
}
if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
t.Fatalf("got %#v, want offsets 2,9", matches)
}
wantStats := Stats{
FilesLoaded: 1,
ContentBytesLoaded: 18,
IndexBytesLoaded: 4,
NgramMatches: 4,
MatchCount: 1,
FileCount: 1,
FilesConsidered: 3,
}
if !reflect.DeepEqual(sres.Stats, wantStats) {
t.Errorf("got stats %#v, want %#v", sres.Stats, wantStats)
}
}
func TestAndNegateSearch(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("f1", []byte("x banana y"))
b.AddFile("f4", []byte("x banana apple y"))
// ---------------------0123456789012345
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "banana",
},
&query.Not{&query.Substring{
Pattern: "apple",
}}))
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", matches)
}
if matches[0].FileName != "f1" {
t.Fatalf("got match %#v, want FileName: f1", matches[0])
}
if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
t.Fatalf("got %v, want offsets 2,9", matches)
}
}
func TestNegativeMatchesOnlyShortcut(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("f1", []byte("x banana y"))
b.AddFile("f2", []byte("x appelmoes y"))
b.AddFile("f3", []byte("x appelmoes y"))
b.AddFile("f3", []byte("x appelmoes y"))
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "banana",
},
&query.Not{&query.Substring{
Pattern: "appel",
}}))
if sres.Stats.FilesConsidered != 1 {
t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
}
}
func TestFileSearch(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("x orange y"))
// --------------------------0123456879
b.AddFile("banana", []byte("x apple y"))
sres := searchForTest(t, b, &query.Substring{
Pattern: "anan",
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", matches)
}
got := matches[0].LineMatches[0]
want := LineMatch{
Line: []byte("banana"),
LineFragments: []LineFragmentMatch{{
Offset: 1,
LineOffset: 1,
MatchLength: 4,
}},
FileName: true,
}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %#v, want %#v", got, want)
}
}
func TestFileCase(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("BANANA", []byte("x orange y"))
sres := searchForTest(t, b, &query.Substring{
Pattern: "banana",
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || matches[0].FileName != "BANANA" {
t.Fatalf("got %v, want 1 match 'BANANA'", matches)
}
}
func TestFileRegexpSearchBruteForce(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("x orange y"))
// --------------------------0123456879
b.AddFile("banana", []byte("x apple y"))
sres := searchForTest(t, b, &query.Regexp{
Regexp: mustParseRE("[qn][zx]"),
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || matches[0].FileName != "banzana" {
t.Fatalf("got %v, want 1 match on 'banzana'", matches)
}
}
func TestFileRegexpSearchShortString(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banana.py", []byte("x orange y"))
sres := searchForTest(t, b, &query.Regexp{
Regexp: mustParseRE("ana.py"),
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || matches[0].FileName != "banana.py" {
t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
}
}
func TestFileSubstringSearchBruteForce(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("BANZANA", []byte("x orange y"))
b.AddFile("banana", []byte("x apple y"))
q := &query.Substring{
Pattern: "z",
FileName: true,
}
res := searchForTest(t, b, q)
if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
}
}
func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("BANZANA", []byte("x orange y"))
b.AddFile("bananaq", []byte("x apple y"))
q := &query.Substring{
Pattern: "q",
FileName: true,
}
res := searchForTest(t, b, q)
if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
t.Fatalf("got %v, want 1 match in %q", res.Files, want)
}
}
func TestSearchMatchAll(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("x orange y"))
// --------------------------0123456879
b.AddFile("banana", []byte("x apple y"))
sres := searchForTest(t, b, &query.Const{true})
matches := sres.Files
if len(matches) != 2 {
t.Fatalf("got %v, want 2 matches", matches)
}
}
func TestSearchNewline(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("abcd\ndefg"))
sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
// Just check that we don't crash.
matches := sres.Files
if len(matches) != 1 {
t.Fatalf("got %v, want 1 matches", matches)
}
}
func TestSearchMatchAllRegexp(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banzana", []byte("abcd"))
// --------------------------0123456879
b.AddFile("banana", []byte("pqrs"))
sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
matches := sres.Files
if len(matches) != 2 || sres.Stats.MatchCount != 2 {
t.Fatalf("got %v, want 2 matches", matches)
}
if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
t.Fatalf("want 4 chars in every file, got %#v", matches)
}
}
func TestFileRestriction(t *testing.T) {
b, err := NewIndexBuilder(nil)
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
b.AddFile("banana1", []byte("x orange y"))
// --------------------------0123456879
b.AddFile("banana2", []byte("x apple y"))
b.AddFile("orange", []byte("x apple y"))
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "banana",
FileName: true,
},
&query.Substring{
Pattern: "apple",
}))
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", matches)
}
match := matches[0].LineMatches[0]
got := string(match.Line)
want := "x apple y"
if got != want {
t.Errorf("got match %#v, want line %q", match, want)
}
}
func TestFileNameBoundary(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "banana2", Content: []byte("x apple y")},
Document{Name: "helpers.go", Content: []byte("x apple y")},
Document{Name: "foo", Content: []byte("x apple y")})
sres := searchForTest(t, b, &query.Substring{
Pattern: "helpers.go",
FileName: true,
})
matches := sres.Files
if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", matches)
}
}
func TestWordBoundaryRanking(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("xbytex xbytex")},
Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
// -----------------------------------0123456 789012 34567890
Document{Name: "f3", Content: []byte("xbytex ybytex")})
sres := searchForTest(t, b, &query.Substring{
Pattern: "byte",
})
if len(sres.Files) != 3 {
t.Fatalf("got %#v, want 3 files", sres.Files)
}
file0 := sres.Files[0]
if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
}
if file0.LineMatches[0].LineFragments[0].Offset != 13 {
t.Fatalf("got first match %#v, want full word match", sres.Files[0].LineMatches[0])
}
if file0.LineMatches[1].LineFragments[0].Offset != 7 {
t.Fatalf("got second match %#v, want partial word match", sres.Files[0].LineMatches[1])
}
}
func TestBranchMask(t *testing.T) {
b := testIndexBuilder(t, &Repository{
Branches: []RepositoryBranch{
{"master", "v-master"},
{"stable", "v-stable"},
{"bonzai", "v-bonzai"},
},
}, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
)
sres := searchForTest(t, b, query.NewAnd(
&query.Substring{
Pattern: "needle",
},
&query.Branch{
Pattern: "table",
}))
if len(sres.Files) != 2 || sres.Files[0].FileName != "f3" || sres.Files[1].FileName != "f2" {
t.Fatalf("got %v, want 1 result from f2", sres.Files)
}
if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
}
}
func TestBranchReport(t *testing.T) {
branches := []string{"stable", "master"}
b := testIndexBuilder(t, &Repository{
Branches: []RepositoryBranch{
{"stable", "vs"},
{"master", "vm"},
},
},
Document{Name: "f2", Content: []byte("needle"), Branches: branches})
sres := searchForTest(t, b, &query.Substring{
Pattern: "needle",
})
if len(sres.Files) != 1 {
t.Fatalf("got %v, want 1 result from f2", sres.Files)
}
f := sres.Files[0]
if !reflect.DeepEqual(f.Branches, branches) {
t.Fatalf("got branches %q, want %q", f.Branches, branches)
}
}
func TestBranchVersions(t *testing.T) {
b := testIndexBuilder(t, &Repository{
Branches: []RepositoryBranch{
{"stable", "v-stable"},
{"master", "v-master"},
},
}, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
sres := searchForTest(t, b, &query.Substring{
Pattern: "needle",
})
if len(sres.Files) != 1 {
t.Fatalf("got %v, want 1 result from f2", sres.Files)
}
f := sres.Files[0]
if f.Version != "v-master" {
t.Fatalf("got file %#v, want version 'v-master'", f)
}
}
func TestCoversContent(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("needle the bla")})
sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{
Pattern: "needle",
},
&query.Not{&query.Substring{
Pattern: "the",
}}))
if len(sres.Files) > 0 {
t.Fatalf("got %v, want no results", sres.Files)
}
if sres.Stats.FilesLoaded > 0 {
t.Errorf("got %#v, want no FilesLoaded", sres.Stats)
}
}
func mustParseRE(s string) *syntax.Regexp {
r, err := syntax.Parse(s, 0)
if err != nil {
panic(err)
}
return r
}
func TestRegexp(t *testing.T) {
content := []byte("needle the bla")
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content})
// ------------------------------01234567890123
sres := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("dle.*bla"),
})
if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
}
got := sres.Files[0].LineMatches[0]
want := LineMatch{
LineFragments: []LineFragmentMatch{{
LineOffset: 3,
Offset: 3,
MatchLength: 11,
}},
Line: content,
FileName: false,
LineNumber: 1,
LineStart: 0,
LineEnd: 14,
}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %#v, want %#v", got, want)
}
}
func TestRegexpFile(t *testing.T) {
content := []byte("needle the bla")
// ----------------01234567890123
name := "let's play: find the mussel"
b := testIndexBuilder(t, nil,
Document{Name: name, Content: content},
Document{Name: "play.txt", Content: content})
sres := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("play.*mussel"),
FileName: true,
})
if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
}
if sres.Files[0].FileName != name {
t.Errorf("got match %#v, want name %q", sres.Files[0])
}
}
func TestRegexpOrder(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
sres := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("dle.*bla"),
})
if len(sres.Files) != 0 {
t.Fatalf("got %v, want 0 matches", sres.Files)
}
}
func TestRepoName(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, &Repository{Name: "bla"},
Document{Name: "f1", Content: content})
sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Repo{Pattern: "foo"},
))
if len(sres.Files) != 0 {
t.Fatalf("got %v, want 0 matches", sres.Files)
}
if sres.Stats.FilesConsidered > 0 {
t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
}
sres = searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Repo{Pattern: "bla"},
))
if len(sres.Files) != 1 {
t.Fatalf("got %v, want 1 match", sres.Files)
}
}
func TestMergeMatches(t *testing.T) {
content := []byte("blablabla")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
sres := searchForTest(t, b,
&query.Substring{Pattern: "bla"})
if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
t.Fatalf("got %v, want 1 match", sres.Files)
}
}
func TestRepoURL(t *testing.T) {
content := []byte("blablabla")
b := testIndexBuilder(t, &Repository{
Name: "name",
URL: "URL",
CommitURLTemplate: "commit",
FileURLTemplate: "file-url",
LineFragmentTemplate: "fragment",
}, Document{Name: "f1", Content: content})
sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
if sres.RepoURLs["name"] != "file-url" {
t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
}
if sres.LineFragments["name"] != "fragment" {
t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
}
}
func TestRegexpCaseSensitive(t *testing.T) {
content := []byte("bla\nfunc unmarshalGitiles\n")
b := testIndexBuilder(t, nil, Document{
Name: "f1",
Content: content})
res := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("func.*Gitiles"),
CaseSensitive: true,
})
if len(res.Files) != 1 {
t.Fatalf("got %v, want one match", res.Files)
}
}
func TestRegexpCaseFolding(t *testing.T) {
content := []byte("bla\nfunc unmarshalGitiles\n")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
res := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("func.*GITILES"),
CaseSensitive: false,
})
if len(res.Files) != 1 {
t.Fatalf("got %v, want one match", res.Files)
}
}
func TestCaseRegexp(t *testing.T) {
content := []byte("BLABLABLA")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
res := searchForTest(t, b,
&query.Regexp{
Regexp: mustParseRE("[xb][xl][xa]"),
CaseSensitive: true,
})
if len(res.Files) > 0 {
t.Fatalf("got %v, want no matches", res.Files)
}
}
func TestNegativeRegexp(t *testing.T) {
content := []byte("BLABLABLA needle bla")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
res := searchForTest(t, b,
query.NewAnd(
&query.Substring{
Pattern: "needle",
},
&query.Not{
&query.Regexp{
Regexp: mustParseRE(".cs"),
},
}))
if len(res.Files) != 1 {
t.Fatalf("got %v, want 1 match", res.Files)
}
}
func TestSymbolRank(t *testing.T) {
content := []byte("func bla() blub")
// ----------------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
}, Document{
Name: "f2",
Content: content,
Symbols: []DocumentSection{{5, 8}},
}, Document{
Name: "f3",
Content: content,
})
res := searchForTest(t, b,
&query.Substring{
Pattern: "bla",
})
if len(res.Files) != 3 {
t.Fatalf("got %#v, want 3 files", res.Files)
}
if res.Files[0].FileName != "f2" {
t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
}
}
func TestPartialSymbolRank(t *testing.T) {
content := []byte("func bla() blub")
// ----------------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
Symbols: []DocumentSection{{4, 9}},
}, Document{
Name: "f2",
Content: content,
Symbols: []DocumentSection{{4, 8}},
}, Document{
Name: "f3",
Content: content,
Symbols: []DocumentSection{{4, 9}},
})
res := searchForTest(t, b,
&query.Substring{
Pattern: "bla",
})
if len(res.Files) != 3 {
t.Fatalf("got %#v, want 3 files", res.Files)
}
if res.Files[0].FileName != "f2" {
t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
}
}
func TestNegativeRepo(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, &Repository{
Name: "bla",
}, Document{Name: "f1", Content: content})
sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Not{&query.Repo{Pattern: "bla"}},
))
if len(sres.Files) != 0 {
t.Fatalf("got %v, want 0 matches", sres.Files)
}
}
func TestListRepos(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, &Repository{
Name: "reponame",
},
Document{Name: "f1", Content: content},
Document{Name: "f2", Content: content})
searcher := searcherForTest(t, b)
q := &query.Repo{Pattern: "epo"}
res, err := searcher.List(context.Background(), q)
if err != nil {
t.Fatalf("List(%v): %v", q, err)
}
if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
t.Fatalf("got %v, want 1 matches", res)
}
q = &query.Repo{Pattern: "bla"}
res, err = searcher.List(context.Background(), q)
if err != nil {
t.Fatalf("List(%v): %v", q, err)
}
if len(res.Repos) != 0 {
t.Fatalf("got %v, want 0 matches", res)
}
}
func TestMetadata(t *testing.T) {
content := []byte("bla the needle")
// ----------------01234567890123
b := testIndexBuilder(t, &Repository{
Name: "reponame",
}, Document{Name: "f1", Content: content},
Document{Name: "f2", Content: content})
var buf bytes.Buffer
b.Write(&buf)
f := &memSeeker{buf.Bytes()}
rd, _, err := ReadMetadata(f)
if err != nil {
t.Fatalf("ReadMetadata: %v", err)
}
if got, want := rd.Name, "reponame"; got != want {
t.Fatalf("got %q want %q", got, want)
}
}
func TestOr(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("needle")},
Document{Name: "f2", Content: []byte("banana")})
sres := searchForTest(t, b, query.NewOr(
&query.Substring{Pattern: "needle"},
&query.Substring{Pattern: "banana"}))
if len(sres.Files) != 2 {
t.Fatalf("got %v, want 2 files", sres.Files)
}
}
func TestAtomCountScore(t *testing.T) {
b := testIndexBuilder(t,
&Repository{
Branches: []RepositoryBranch{
{"branches", "v1"},
{"needle", "v2"},
},
},
Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
sres := searchForTest(t, b,
query.NewOr(
&query.Substring{Pattern: "needle"},
&query.Substring{Pattern: "needle", FileName: true},
&query.Branch{Pattern: "needle"},
))
var got []string
for _, f := range sres.Files {
got = append(got, f.FileName)
}
want := []string{"needle-file-branch", "needle-file", "f1"}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v, want %v", got, want)
}
}
func TestImportantCutoff(t *testing.T) {
content := []byte("func bla() blub")
// ----------------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
Symbols: []DocumentSection{{5, 8}},
}, Document{
Name: "f2",
Content: content,
})
opts := SearchOptions{
ShardMaxImportantMatch: 1,
}
sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}, opts)
if len(sres.Files) != 1 || sres.Files[0].FileName != "f1" {
t.Errorf("got %v, wanted 1 match 'f1'", sres.Files)
}
}
func TestFrequency(t *testing.T) {
content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
// ----------------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
})
sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
if len(sres.Files) != 0 {
t.Errorf("got %v, wanted 0 matches", sres.Files)
}
}
func TestMatchNewline(t *testing.T) {
re, err := syntax.Parse("[^a]a", syntax.ClassNL)
if err != nil {
panic(err)
}
content := []byte("pqr\nalex")
// ----------------0123 4567
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
})
sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
if len(sres.Files) != 1 {
t.Errorf("got %v, wanted 1 matches", sres.Files)
} else if l := sres.Files[0].LineMatches[0].Line; bytes.Compare(l, content) != 0 {
t.Errorf("got match line %q, want %q", l, content)
}
}
func TestSubRepo(t *testing.T) {
subRepos := map[string]*Repository{
"sub": &Repository{
Name: "sub-name",
LineFragmentTemplate: "sub-line",
}}
content := []byte("pqr\nalex")
// ----------------0123 4567
b := testIndexBuilder(t, &Repository{
SubRepoMap: subRepos,
}, Document{
Name: "sub/f1",
Content: content,
SubRepositoryPath: "sub",
})
sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
if len(sres.Files) != 1 {
t.Fatalf("got %v, wanted 1 matches", sres.Files)
}
f := sres.Files[0]
if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
}
if sres.LineFragments["sub-name"] != "sub-line" {
t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
}
}
func TestSearchEither(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte("bla needle bla")},
Document{Name: "needle-file-branch", Content: []byte("bla content")})
sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
if len(sres.Files) != 2 {
t.Fatalf("got %v, wanted 2 matches", sres.Files)
}
sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
if len(sres.Files) != 1 {
t.Fatalf("got %v, wanted 1 match", sres.Files)
}
if got, want := sres.Files[0].FileName, "f1"; got != want {
t.Errorf("got %q, want %q", got, want)
}
}
func TestUnicodeExactMatch(t *testing.T) {
needle := "néédlÉ"
content := []byte("blá blá " + needle + " blâ")
// ----------------01234567 8
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
}
}
func TestUnicodeCoverContent(t *testing.T) {
needle := "néédlÉ"
content := []byte("blá blá " + needle + " blâ")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
}
res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
if len(res.Files) != 1 {
t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
}
if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
t.Errorf("got %d want %d", got, want)
}
}
func TestUnicodeNonCoverContent(t *testing.T) {
needle := "nééáádlÉ"
//---------01234567
content := []byte("blá blá " + needle + " blâ")
// ----------------01234567 8901234 5678
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: content})
res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
if len(res.Files) != 1 {
t.Fatalf("got %v, wanted 1 match", res.Files)
}
if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
t.Errorf("got %d want %d", got, want)
}
}
func TestUnicodeVariableLength(t *testing.T) {
var lower, upper rune
var buf [4]byte
for l := rune(0); l < (1 << 21); l++ {
u := unicode.SimpleFold(l)
lSz := utf8.EncodeRune(buf[:], l)
uSz := utf8.EncodeRune(buf[:], u)
if lSz != uSz {
lower = l
upper = u
t.Logf("char %c (%d sz %d) %c (%d sz %d)", l, l, lSz, u, u, uSz)
break
}
}
if lower == 0 {
t.Fatal("rune not found")
}
needle := "nee" + string([]rune{lower}) + "eed"
corpus := []byte("nee" + string([]rune{upper}) + "eed" +
" ee" + string([]rune{lower}) + "ee" +
" ee" + string([]rune{upper}) + "ee")
b := testIndexBuilder(t, nil,
Document{Name: "f1", Content: []byte(corpus)})
res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
if len(res.Files) != 1 {
t.Fatalf("got %v, wanted 1 match", res.Files)
}
}
func TestShortUnicode(t *testing.T) {
world := "世界"
content := []byte("world = " + world)
// ----------------012345678901234
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: content,
})
q := &query.Substring{Pattern: world}
searcher := searcherForTest(t, b)
var opts SearchOptions
_, err := searcher.Search(context.Background(), q, &opts)
if err == nil {
t.Error("search should have failed")
}
}
func TestUnicodeFileStartOffsets(t *testing.T) {
unicode := "世界"
wat := "waaaaaat"
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(unicode),
},
Document{
Name: "f2",
Content: []byte(wat),
},
)
q := &query.Substring{Pattern: wat, Content: true}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Fatalf("got %v, wanted 1 match", res.Files)
}
}
func TestLongFileUTF8(t *testing.T) {
needle := "neeedle"
// 6 bytes.
unicode := "世界"
content := []byte(strings.Repeat(unicode, 100) + needle)
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(strings.Repeat("a", 50)),
},
Document{
Name: "f2",
Content: content,
})
q := &query.Substring{Pattern: needle, Content: true}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Errorf("got %v, want 1 result", res)
}
}
func TestEstimateDocCount(t *testing.T) {
content := []byte("bla needle bla")
b := testIndexBuilder(t, &Repository{Name: "reponame"},
Document{Name: "f1", Content: content},
Document{Name: "f2", Content: content},
)
if sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Repo{Pattern: "reponame"},
), SearchOptions{
EstimateDocCount: true,
}); sres.Stats.ShardFilesConsidered != 2 {
t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
}
if sres := searchForTest(t, b,
query.NewAnd(
&query.Substring{Pattern: "needle"},
&query.Repo{Pattern: "nomatch"},
), SearchOptions{
EstimateDocCount: true,
}); sres.Stats.ShardFilesConsidered != 0 {
t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
}
}
func TestUTF8CorrectCorpus(t *testing.T) {
needle := "neeedle"
// 6 bytes.
unicode := "世界"
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(strings.Repeat(unicode, 100)),
},
Document{
Name: "xxxxxneeedle",
Content: []byte("hello"),
})
q := &query.Substring{Pattern: needle, FileName: true}
res := searchForTest(t, b, q)
if len(res.Files) != 1 {
t.Errorf("got %v, want 1 result", res)
}
}
func TestIOStats(t *testing.T) {
b := testIndexBuilder(t, nil,
Document{
Name: "f1",
Content: []byte(strings.Repeat("abcd", 1024)),
})
q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
res := searchForTest(t, b, q)
// 4096 (content) + 2 (overhead: newlines or doc sections)
if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
t.Errorf("got content I/O %d, want %d", got, want)
}
// 1024 entries, each 4 bytes apart. 4 fits into single byte
// delta encoded.
if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
t.Errorf("got index I/O %d, want %d", got, want)
}
}