blob: bf926ea14dc6c472a493a7839fda1e255120a92b [file] [log] [blame]
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gitindex
import (
"fmt"
"log"
"net/url"
"os"
"path/filepath"
"reflect"
"sort"
"strings"
"time"
"github.com/google/zoekt"
"github.com/google/zoekt/build"
git "github.com/libgit2/git2go"
)
// RepoModTime returns the time of last fetch of a git repository.
func RepoModTime(dir string) (time.Time, error) {
var last time.Time
refDir := filepath.Join(dir, "refs")
if _, err := os.Lstat(refDir); err == nil {
if err := filepath.Walk(refDir,
func(name string, fi os.FileInfo, err error) error {
if !fi.IsDir() && last.Before(fi.ModTime()) {
last = fi.ModTime()
}
return nil
}); err != nil {
return last, err
}
}
// git gc compresses refs into the following file:
for _, fn := range []string{"info/refs", "packed-refs"} {
if fi, err := os.Lstat(filepath.Join(dir, fn)); err == nil && !fi.IsDir() && last.Before(fi.ModTime()) {
last = fi.ModTime()
}
}
return last, nil
}
// FindGitRepos finds directories holding git repositories.
func FindGitRepos(arg string) ([]string, error) {
arg, err := filepath.Abs(arg)
if err != nil {
return nil, err
}
var dirs []string
if err := filepath.Walk(arg, func(name string, fi os.FileInfo, err error) error {
if fi, err := os.Lstat(filepath.Join(name, ".git")); err == nil && fi.IsDir() {
dirs = append(dirs, filepath.Join(name, ".git"))
return filepath.SkipDir
}
if !strings.HasSuffix(name, ".git") || !fi.IsDir() {
return nil
}
fi, err = os.Lstat(filepath.Join(name, "objects"))
if err != nil || !fi.IsDir() {
return nil
}
dirs = append(dirs, name)
return filepath.SkipDir
}); err != nil {
return nil, err
}
return dirs, nil
}
type templates struct {
repo, commit, file, line string
}
// RepoURL returns the canonical URL for a repo, based on its
// configured remotes.
func RepoURL(repoDir string) (*url.URL, error) {
base, err := git.NewConfig()
if err != nil {
return nil, err
}
defer base.Free()
cfg, err := git.OpenOndisk(base, filepath.Join(repoDir, "config"))
if err != nil {
return nil, err
}
defer cfg.Free()
remoteURL, err := cfg.LookupString("remote.origin.url")
if err != nil {
return nil, err
}
parsed, err := url.Parse(remoteURL)
if err != nil {
return nil, err
}
return parsed, nil
}
// Templates fills in URL templates for known git hosting sites.
func Templates(u *url.URL) (*zoekt.Repository, error) {
if strings.HasSuffix(u.Host, "googlesource.com") {
/// eg. https://gerrit.googlesource.com/gitiles/+/master/tools/run_dev.sh#20
return &zoekt.Repository{
Name: filepath.Join(u.Host, u.Path),
URL: u.String(),
CommitURLTemplate: u.String() + "/+/{{.Version}}",
FileURLTemplate: u.String() + "/+/{{.Version}}/{{.Path}}",
LineFragmentTemplate: "{{.LineNumber}}",
}, nil
} else if u.Host == "github.com" {
t := *u
// CloneURL from the JSON API has .git
t.Path = strings.TrimSuffix(t.Path, ".git")
// eg. https://github.com/hanwen/go-fuse/blob/notify/genversion.sh#L10
return &zoekt.Repository{
Name: filepath.Join(t.Host, t.Path),
URL: t.String(),
CommitURLTemplate: t.String() + "/commit/{{.Version}}",
FileURLTemplate: t.String() + "/blob/{{.Version}}/{{.Path}}",
LineFragmentTemplate: "L{{.LineNumber}}",
}, nil
}
return nil, fmt.Errorf("scheme unknown for URL %s", u)
}
// getCommit returns a tree object for the given reference.
func getCommit(repo *git.Repository, ref string) (*git.Commit, error) {
obj, err := repo.RevparseSingle(ref)
if err != nil {
return nil, err
}
defer obj.Free()
commitObj, err := obj.Peel(git.ObjectCommit)
if err != nil {
return nil, err
}
return commitObj.AsCommit()
}
// IndexGitRepo indexes the git repository as specified by the options and arguments.
func IndexGitRepo(opts build.Options, branchPrefix string, branches []string, submodules, incremental bool, repoCacheDir string) error {
repo, err := git.OpenRepository(opts.RepoDir)
if err != nil {
return err
}
if url, err := RepoURL(opts.RepoDir); err != nil {
log.Printf("RepoURL(%s): %s", opts.RepoDir, err)
} else if desc, err := Templates(url); err != nil {
log.Printf("Templates(%s): %s", url, err)
} else {
opts.RepositoryDescription.URL = desc.URL
opts.RepositoryDescription.CommitURLTemplate = desc.CommitURLTemplate
opts.RepositoryDescription.FileURLTemplate = desc.FileURLTemplate
opts.RepositoryDescription.LineFragmentTemplate = desc.LineFragmentTemplate
}
repoCache := NewRepoCache(repoCacheDir)
defer repoCache.Close()
// branch => (path, sha1) => repo.
repos := map[FileKey]BlobLocation{}
// FileKey => branches
branchMap := map[FileKey][]string{}
// Branch => Repo => SHA1
branchVersions := map[string]map[string]git.Oid{}
for _, b := range branches {
fullName := b
if b != "HEAD" {
fullName = filepath.Join(branchPrefix, b)
} else {
_, ref, err := repo.RevparseExt(b)
if err != nil {
return err
}
fullName = ref.Name()
b = strings.TrimPrefix(fullName, branchPrefix)
}
commit, err := getCommit(repo, fullName)
if err != nil {
return err
}
defer commit.Free()
opts.RepositoryDescription.Branches = append(opts.RepositoryDescription.Branches, zoekt.RepositoryBranch{
Name: b,
Version: commit.Id().String(),
})
tree, err := commit.Tree()
if err != nil {
return err
}
defer tree.Free()
files, subVersions, err := TreeToFiles(repo, tree, opts.RepositoryDescription.URL, repoCache)
if err != nil {
return err
}
for k, v := range files {
repos[k] = v
branchMap[k] = append(branchMap[k], b)
}
branchVersions[b] = subVersions
}
if incremental {
versions := opts.IndexVersions()
if reflect.DeepEqual(versions, opts.RepositoryDescription.Branches) {
return nil
}
}
reposByPath := map[string]BlobLocation{}
for key, location := range repos {
reposByPath[key.SubRepoPath] = location
}
opts.SubRepositories = map[string]*zoekt.Repository{}
for path, location := range reposByPath {
tpl, err := Templates(location.URL)
if err != nil {
log.Printf("Templates(%s): %s", location.URL, err)
tpl = &zoekt.Repository{URL: location.URL.String()}
}
opts.SubRepositories[path] = tpl
}
for _, br := range opts.RepositoryDescription.Branches {
for path, repo := range opts.SubRepositories {
id := branchVersions[br.Name][path]
repo.Branches = append(repo.Branches, zoekt.RepositoryBranch{
Name: br.Name,
Version: id.String(),
})
}
}
builder, err := build.NewBuilder(opts)
if err != nil {
return err
}
var names []string
fileKeys := map[string][]FileKey{}
for key := range repos {
n := key.FullPath()
fileKeys[n] = append(fileKeys[n], key)
names = append(names, n)
}
// not strictly necessary, but nice for reproducibility.
sort.Strings(names)
for _, name := range names {
keys := fileKeys[name]
for _, key := range keys {
brs := branchMap[key]
blob, err := repos[key].Repo.LookupBlob(&key.ID)
if err != nil {
return err
}
if blob.Size() > int64(opts.SizeMax) {
continue
}
builder.Add(zoekt.Document{
SubRepositoryPath: key.SubRepoPath,
Name: key.FullPath(),
Content: blob.Contents(),
Branches: brs,
})
}
}
return builder.Finish()
}