blob: 108ba4f64213e6f32ed26027c2610f33497434de [file] [log] [blame]
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package shards
import (
"context"
"fmt"
"log"
"os"
"runtime"
"runtime/debug"
"sort"
"time"
"golang.org/x/net/trace"
"golang.org/x/sync/semaphore"
"github.com/google/zoekt"
"github.com/google/zoekt/query"
)
type rankedShard struct {
zoekt.Searcher
rank uint16
}
type shardedSearcher struct {
// Limit the number of parallel queries. Since searching is
// CPU bound, we can't do better than #CPU queries in
// parallel. If we do so, we just create more memory
// pressure.
throttle *semaphore.Weighted
capacity int64
shards map[string]rankedShard
rankedVersion uint64
ranked []rankedShard
}
func newShardedSearcher(n int64) *shardedSearcher {
ss := &shardedSearcher{
shards: make(map[string]rankedShard),
throttle: semaphore.NewWeighted(n),
capacity: n,
}
return ss
}
// NewDirectorySearcher returns a searcher instance that loads all
// shards corresponding to a glob into memory.
func NewDirectorySearcher(dir string) (zoekt.Searcher, error) {
ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
tl := &throttledLoader{
ss: ss,
throttle: make(chan struct{}, runtime.GOMAXPROCS(0)),
}
_, err := NewDirectoryWatcher(dir, tl)
if err != nil {
return nil, err
}
return ss, nil
}
// throttledLoader tries to load up to throttle shards in parallel.
type throttledLoader struct {
ss *shardedSearcher
throttle chan struct{}
}
func (tl *throttledLoader) load(key string) {
tl.throttle <- struct{}{}
shard, err := loadShard(key)
<-tl.throttle
if err != nil {
log.Printf("reloading: %s, err %v ", key, err)
return
}
tl.ss.replace(key, shard)
}
func (tl *throttledLoader) drop(key string) {
tl.ss.replace(key, nil)
}
func (ss *shardedSearcher) String() string {
return "shardedSearcher"
}
// Close closes references to open files. It may be called only once.
func (ss *shardedSearcher) Close() {
ss.lock()
defer ss.unlock()
for _, s := range ss.shards {
s.Close()
}
}
func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
tr := trace.New("shardedSearcher.Search", "")
tr.LazyLog(q, true)
tr.LazyPrintf("opts: %+v", opts)
defer func() {
if sr != nil {
tr.LazyPrintf("num files: %d", len(sr.Files))
tr.LazyPrintf("stats: %+v", sr.Stats)
}
if err != nil {
tr.LazyPrintf("error: %v", err)
tr.SetError()
}
tr.Finish()
}()
start := time.Now()
aggregate := &zoekt.SearchResult{
RepoURLs: map[string]string{},
LineFragments: map[string]string{},
}
// This critical section is large, but we don't want to deal with
// searches on shards that have just been closed.
if err := ss.rlock(ctx); err != nil {
return aggregate, err
}
defer ss.runlock()
tr.LazyPrintf("acquired lock")
aggregate.Wait = time.Since(start)
start = time.Now()
shards := ss.getShards()
all := make(chan shardResult, len(shards))
var childCtx context.Context
var cancel context.CancelFunc
if opts.MaxWallTime == 0 {
childCtx, cancel = context.WithCancel(ctx)
} else {
childCtx, cancel = context.WithTimeout(ctx, opts.MaxWallTime)
}
defer cancel()
// For each query, throttle the number of parallel
// actions. Since searching is mostly CPU bound, we limit the
// number of parallel searches. This reduces the peak working
// set, which hopefully stops https://cs.bazel.build from crashing
// when looking for the string "com".
feeder := make(chan zoekt.Searcher, len(shards))
for _, s := range shards {
feeder <- s
}
close(feeder)
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
go func() {
for s := range feeder {
searchOneShard(childCtx, s, q, opts, all)
}
}()
}
for range shards {
r := <-all
if r.err != nil {
return nil, r.err
}
aggregate.Files = append(aggregate.Files, r.sr.Files...)
aggregate.Stats.Add(r.sr.Stats)
if len(r.sr.Files) > 0 {
for k, v := range r.sr.RepoURLs {
aggregate.RepoURLs[k] = v
}
for k, v := range r.sr.LineFragments {
aggregate.LineFragments[k] = v
}
}
if cancel != nil && opts.TotalMaxMatchCount > 0 && aggregate.Stats.MatchCount > opts.TotalMaxMatchCount {
cancel()
cancel = nil
}
}
zoekt.SortFilesByScore(aggregate.Files)
if max := opts.MaxDocDisplayCount; max > 0 && len(aggregate.Files) > max {
aggregate.Files = aggregate.Files[:max]
}
for i := range aggregate.Files {
copySlice(&aggregate.Files[i].Content)
copySlice(&aggregate.Files[i].Checksum)
for l := range aggregate.Files[i].LineMatches {
copySlice(&aggregate.Files[i].LineMatches[l].Line)
}
}
aggregate.Duration = time.Since(start)
return aggregate, nil
}
func copySlice(src *[]byte) {
dst := make([]byte, len(*src))
copy(dst, *src)
*src = dst
}
type shardResult struct {
sr *zoekt.SearchResult
err error
}
func searchOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.SearchOptions, sink chan shardResult) {
defer func() {
if r := recover(); r != nil {
log.Printf("crashed shard: %s: %s, %s", s.String(), r, debug.Stack())
var r zoekt.SearchResult
r.Stats.Crashes = 1
sink <- shardResult{&r, nil}
}
}()
ms, err := s.Search(ctx, q, opts)
sink <- shardResult{ms, err}
}
func (ss *shardedSearcher) List(ctx context.Context, r query.Q) (rl *zoekt.RepoList, err error) {
tr := trace.New("shardedSearcher.List", "")
tr.LazyLog(r, true)
defer func() {
if rl != nil {
tr.LazyPrintf("repos size: %d", len(rl.Repos))
tr.LazyPrintf("crashes: %d", rl.Crashes)
}
if err != nil {
tr.LazyPrintf("error: %v", err)
tr.SetError()
}
tr.Finish()
}()
type res struct {
rl *zoekt.RepoList
err error
}
if err := ss.rlock(ctx); err != nil {
return nil, err
}
defer ss.runlock()
tr.LazyPrintf("acquired lock")
shards := ss.getShards()
shardCount := len(shards)
all := make(chan res, shardCount)
tr.LazyPrintf("shardCount: %d", len(shards))
for _, s := range shards {
go func(s zoekt.Searcher) {
defer func() {
if r := recover(); r != nil {
all <- res{
&zoekt.RepoList{Crashes: 1}, nil,
}
}
}()
ms, err := s.List(ctx, r)
all <- res{ms, err}
}(s.Searcher)
}
crashes := 0
uniq := map[string]*zoekt.RepoListEntry{}
for i := 0; i < shardCount; i++ {
r := <-all
if r.err != nil {
return nil, r.err
}
crashes += r.rl.Crashes
for _, r := range r.rl.Repos {
prev, ok := uniq[r.Repository.Name]
if !ok {
cp := *r
uniq[r.Repository.Name] = &cp
} else {
prev.Stats.Add(&r.Stats)
}
}
}
aggregate := make([]*zoekt.RepoListEntry, 0, len(uniq))
for _, v := range uniq {
aggregate = append(aggregate, v)
}
return &zoekt.RepoList{
Repos: aggregate,
Crashes: crashes,
}, nil
}
func (s *shardedSearcher) rlock(ctx context.Context) error {
return s.throttle.Acquire(ctx, 1)
}
// getShards returns the currently loaded shards. The shards must be accessed
// under a rlock call. The shards are sorted by decreasing rank and should not
// be mutated.
func (s *shardedSearcher) getShards() []rankedShard {
if len(s.ranked) > 0 {
return s.ranked
}
var res []rankedShard
for _, sh := range s.shards {
res = append(res, sh)
}
sort.Slice(res, func(i, j int) bool {
return res[i].rank > res[j].rank
})
// Cache ranked. We currently hold a read lock, so start a goroutine which
// acquires a write lock to update. Use requiredVersion to ensure our
// cached slice is still current after acquiring the write lock.
go func(ranked []rankedShard, requiredVersion uint64) {
s.lock()
if s.rankedVersion == requiredVersion {
s.ranked = ranked
}
s.unlock()
}(res, s.rankedVersion)
return res
}
func (s *shardedSearcher) runlock() {
s.throttle.Release(1)
}
func (s *shardedSearcher) lock() {
// won't error since context.Background won't expire
_ = s.throttle.Acquire(context.Background(), s.capacity)
}
func (s *shardedSearcher) unlock() {
s.throttle.Release(s.capacity)
}
func shardRank(s zoekt.Searcher) uint16 {
q := query.Repo{}
result, err := s.List(context.Background(), &q)
if err != nil {
return 0
}
if len(result.Repos) == 0 {
return 0
}
return result.Repos[0].Repository.Rank
}
func (s *shardedSearcher) replace(key string, shard zoekt.Searcher) {
var rank uint16
if shard != nil {
rank = shardRank(shard)
}
s.lock()
defer s.unlock()
old := s.shards[key]
if old.Searcher != nil {
old.Close()
}
if shard == nil {
delete(s.shards, key)
} else {
s.shards[key] = rankedShard{
rank: rank,
Searcher: shard,
}
}
s.rankedVersion++
s.ranked = nil
}
func loadShard(fn string) (zoekt.Searcher, error) {
f, err := os.Open(fn)
if err != nil {
return nil, err
}
iFile, err := zoekt.NewIndexFile(f)
if err != nil {
return nil, err
}
s, err := zoekt.NewSearcher(iFile)
if err != nil {
iFile.Close()
return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
}
return s, nil
}