blob: 54ae5bfa43f63c457b123bed774226237170dc22 [file] [log] [blame]
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"io/ioutil"
"log"
"os"
"path/filepath"
"runtime/pprof"
"strings"
"github.com/google/zoekt/build"
)
type fileAggregator struct {
ignoreDirs map[string]struct{}
sizeMax int64
sink chan string
}
func (a *fileAggregator) add(path string, info os.FileInfo, err error) error {
if info.IsDir() {
base := filepath.Base(path)
if _, ok := a.ignoreDirs[base]; ok {
return filepath.SkipDir
}
}
sz := info.Size()
if sz > a.sizeMax || !info.Mode().IsRegular() {
return nil
}
a.sink <- path
return nil
}
func main() {
var cpuProfile = flag.String("cpu_profile", "", "write cpu profile to file")
var sizeMax = flag.Int("file_limit", 128*1024, "maximum file size")
var shardLimit = flag.Int("shard_limit", 100<<20, "maximum corpus size for a shard")
var parallelism = flag.Int("parallelism", 4, "maximum number of parallel indexing processes.")
ignoreDirs := flag.String("ignore_dirs", ".git,.hg,.svn", "comma separated list of directories to ignore.")
indexDir := flag.String("index", build.DefaultDir, "directory for search indices")
flag.Parse()
opts := build.Options{
Parallelism: *parallelism,
SizeMax: *sizeMax,
ShardMax: *shardLimit,
IndexDir: *indexDir,
}
opts.SetDefaults()
if *cpuProfile != "" {
f, err := os.Create(*cpuProfile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
ignoreDirMap := map[string]struct{}{}
if *ignoreDirs != "" {
dirs := strings.Split(*ignoreDirs, ",")
for _, d := range dirs {
d = strings.TrimSpace(d)
if d != "" {
ignoreDirMap[d] = struct{}{}
}
}
}
for _, arg := range flag.Args() {
if err := indexArg(arg, opts, ignoreDirMap); err != nil {
log.Fatal(err)
}
}
}
func indexArg(arg string, opts build.Options, ignore map[string]struct{}) error {
dir, err := filepath.Abs(filepath.Clean(arg))
if err != nil {
return err
}
opts.RepoDir = dir
opts.RepositoryDescription.Name = filepath.Base(dir)
builder, err := build.NewBuilder(opts)
if err != nil {
return err
}
comm := make(chan string, 100)
agg := fileAggregator{
ignoreDirs: ignore,
sink: comm,
sizeMax: int64(opts.SizeMax),
}
go func() {
if err := filepath.Walk(dir, agg.add); err != nil {
log.Fatal(err)
}
close(comm)
}()
for f := range comm {
content, err := ioutil.ReadFile(f)
if err != nil {
return err
}
f = strings.TrimPrefix(f, dir+"/")
builder.AddFile(f, content)
}
return builder.Finish()
}