| // Copyright 2016 Google Inc. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package gitindex |
| |
| import ( |
| "fmt" |
| "io" |
| "io/ioutil" |
| "log" |
| "net/url" |
| "os" |
| "path/filepath" |
| "reflect" |
| "sort" |
| "strings" |
| "time" |
| |
| "github.com/google/zoekt" |
| "github.com/google/zoekt/build" |
| |
| "gopkg.in/src-d/go-git.v4/config" |
| "gopkg.in/src-d/go-git.v4/plumbing" |
| "gopkg.in/src-d/go-git.v4/plumbing/object" |
| |
| git "gopkg.in/src-d/go-git.v4" |
| ) |
| |
| // RepoModTime returns the time of last fetch of a git repository. |
| func RepoModTime(dir string) (time.Time, error) { |
| var last time.Time |
| refDir := filepath.Join(dir, "refs") |
| if _, err := os.Lstat(refDir); err == nil { |
| if err := filepath.Walk(refDir, |
| func(name string, fi os.FileInfo, err error) error { |
| if !fi.IsDir() && last.Before(fi.ModTime()) { |
| last = fi.ModTime() |
| } |
| return nil |
| }); err != nil { |
| return last, err |
| } |
| } |
| |
| // git gc compresses refs into the following file: |
| for _, fn := range []string{"info/refs", "packed-refs"} { |
| if fi, err := os.Lstat(filepath.Join(dir, fn)); err == nil && !fi.IsDir() && last.Before(fi.ModTime()) { |
| last = fi.ModTime() |
| } |
| } |
| |
| return last, nil |
| } |
| |
| // FindGitRepos finds directories holding git repositories. |
| func FindGitRepos(arg string) ([]string, error) { |
| arg, err := filepath.Abs(arg) |
| if err != nil { |
| return nil, err |
| } |
| var dirs []string |
| if err := filepath.Walk(arg, func(name string, fi os.FileInfo, err error) error { |
| if fi, err := os.Lstat(filepath.Join(name, ".git")); err == nil && fi.IsDir() { |
| dirs = append(dirs, filepath.Join(name, ".git")) |
| return filepath.SkipDir |
| } |
| |
| if !strings.HasSuffix(name, ".git") || !fi.IsDir() { |
| return nil |
| } |
| |
| fi, err = os.Lstat(filepath.Join(name, "objects")) |
| if err != nil || !fi.IsDir() { |
| return nil |
| } |
| |
| dirs = append(dirs, name) |
| return filepath.SkipDir |
| }); err != nil { |
| return nil, err |
| } |
| |
| return dirs, nil |
| } |
| |
| func templatesForOrigin(u *url.URL) (*zoekt.Repository, error) { |
| return nil, fmt.Errorf("unknown URL %s", u) |
| } |
| |
| // setTemplates fills in URL templates for known git hosting |
| // sites. |
| func setTemplates(repo *zoekt.Repository, u *url.URL, typ string) error { |
| repo.URL = u.String() |
| switch typ { |
| case "gitiles": |
| /// eg. https://gerrit.googlesource.com/gitiles/+/master/tools/run_dev.sh#20 |
| repo.CommitURLTemplate = u.String() + "/+/{{.Version}}" |
| repo.FileURLTemplate = u.String() + "/+/{{.Version}}/{{.Path}}" |
| repo.LineFragmentTemplate = "{{.LineNumber}}" |
| case "github": |
| // eg. https://github.com/hanwen/go-fuse/blob/notify/genversion.sh#L10 |
| repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}" |
| repo.FileURLTemplate = u.String() + "/blob/{{.Version}}/{{.Path}}" |
| repo.LineFragmentTemplate = "L{{.LineNumber}}" |
| |
| case "cgit": |
| |
| // http://git.savannah.gnu.org/cgit/lilypond.git/tree/elisp/lilypond-mode.el?h=dev/philh&id=b2ca0fefe3018477aaca23b6f672c7199ba5238e#n100 |
| |
| repo.CommitURLTemplate = u.String() + "/commit/?id={{.Version}}" |
| repo.FileURLTemplate = u.String() + "/tree/{{.Path}}/?id={{.Version}}" |
| repo.LineFragmentTemplate = "n{{.LineNumber}}" |
| |
| case "gitweb": |
| // https://gerrit.libreoffice.org/gitweb?p=online.git;a=blob;f=Makefile.am;h=cfcfd7c36fbae10e269653dc57a9b68c92d4c10b;hb=848145503bf7b98ce4a4aa0a858a0d71dd0dbb26#l10 |
| repo.FileURLTemplate = u.String() + ";a=blob;f={{.Path}};hb={{.Version}}" |
| repo.CommitURLTemplate = u.String() + ";a=commit;h={{.Version}}" |
| repo.LineFragmentTemplate = "l{{.LineNumber}}" |
| |
| default: |
| return fmt.Errorf("URL scheme type %q unknown", typ) |
| } |
| return nil |
| } |
| |
| // getCommit returns a tree object for the given reference. |
| func getCommit(repo *git.Repository, ref string) (*object.Commit, error) { |
| sha1, err := repo.ResolveRevision(plumbing.Revision(ref)) |
| if err != nil { |
| return nil, err |
| } |
| |
| commitObj, err := repo.CommitObject(*sha1) |
| if err != nil { |
| return nil, err |
| } |
| return commitObj, nil |
| } |
| |
| func configLookupRemoteURL(cfg *config.Config, key string) string { |
| rc := cfg.Remotes[key] |
| if rc == nil || len(rc.URLs) == 0 { |
| return "" |
| } |
| return rc.URLs[0] |
| } |
| |
| func configLookupString(cfg *config.Config, key string) string { |
| fields := strings.Split(key, ".") |
| for _, s := range cfg.Raw.Sections { |
| if s.Name != fields[0] { |
| continue |
| } |
| |
| for _, o := range s.Options { |
| if o.Key != fields[1] { |
| continue |
| } |
| return o.Value |
| } |
| } |
| |
| return "" |
| } |
| |
| func isMissingBranchError(err error) bool { |
| return err != nil && err.Error() == "reference not found" |
| } |
| |
| func setTemplatesFromConfig(desc *zoekt.Repository, repoDir string) error { |
| repo, err := git.PlainOpen(repoDir) |
| if err != nil { |
| return err |
| } |
| |
| cfg, err := repo.Config() |
| if err != nil { |
| return err |
| } |
| |
| webURLStr := configLookupString(cfg, "zoekt.web-url") |
| |
| webURLType := configLookupString(cfg, "zoekt.web-url-type") |
| |
| if webURLType != "" && webURLStr != "" { |
| webURL, err := url.Parse(webURLStr) |
| if err != nil { |
| return err |
| } |
| if err := setTemplates(desc, webURL, webURLType); err != nil { |
| return err |
| } |
| } |
| |
| name := configLookupString(cfg, "zoekt.name") |
| if name != "" { |
| desc.Name = name |
| } else { |
| remoteURL := configLookupRemoteURL(cfg, "origin") |
| if remoteURL == "" { |
| return nil |
| } |
| u, err := url.Parse(remoteURL) |
| if err != nil { |
| return err |
| } |
| if err := SetTemplatesFromOrigin(desc, u); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| |
| // SetTemplates fills in templates based on the origin URL. |
| func SetTemplatesFromOrigin(desc *zoekt.Repository, u *url.URL) error { |
| desc.Name = filepath.Join(u.Host, strings.TrimSuffix(u.Path, ".git")) |
| |
| if strings.HasSuffix(u.Host, ".googlesource.com") { |
| return setTemplates(desc, u, "gitiles") |
| } else if u.Host == "github.com" { |
| u.Path = strings.TrimSuffix(u.Path, ".git") |
| return setTemplates(desc, u, "github") |
| } else { |
| return fmt.Errorf("unknown git hosting site %q", u) |
| } |
| |
| found, err := templatesForOrigin(u) |
| |
| if err != nil { |
| return err |
| |
| } |
| |
| desc.URL = found.URL |
| desc.CommitURLTemplate = found.CommitURLTemplate |
| desc.FileURLTemplate = found.FileURLTemplate |
| desc.LineFragmentTemplate = found.LineFragmentTemplate |
| return nil |
| } |
| |
| type Options struct { |
| Submodules bool |
| Incremental bool |
| AllowMissingBranch bool |
| RepoCacheDir string |
| BuildOptions build.Options |
| |
| BranchPrefix string |
| Branches []string |
| } |
| |
| func expandBranches(repo *git.Repository, bs []string, prefix string) ([]string, error) { |
| var result []string |
| for _, b := range bs { |
| if b == "HEAD" { |
| ref, err := repo.Head() |
| if err != nil { |
| return nil, err |
| } |
| |
| result = append(result, strings.TrimPrefix(ref.Name().String(), prefix)) |
| continue |
| } |
| |
| if strings.Contains(b, "*") { |
| iter, err := repo.Branches() |
| if err != nil { |
| return nil, err |
| } |
| |
| defer iter.Close() |
| for { |
| ref, err := iter.Next() |
| if err == io.EOF { |
| break |
| } |
| if err != nil { |
| return nil, err |
| } |
| |
| name := ref.Name().Short() |
| if matched, err := filepath.Match(b, name); err != nil { |
| return nil, err |
| } else if !matched { |
| continue |
| } |
| |
| result = append(result, strings.TrimPrefix(name, prefix)) |
| } |
| continue |
| } |
| |
| result = append(result, b) |
| } |
| |
| return result, nil |
| } |
| |
| // IndexGitRepo indexes the git repository as specified by the options. |
| func IndexGitRepo(opts Options) error { |
| repo, err := git.PlainOpen(opts.BuildOptions.RepoDir) |
| if err != nil { |
| return err |
| } |
| |
| if err := setTemplatesFromConfig(&opts.BuildOptions.RepositoryDescription, opts.BuildOptions.RepoDir); err != nil { |
| log.Printf("setTemplatesFromConfig(%s): %s", opts.BuildOptions.RepoDir, err) |
| } |
| |
| repoCache := NewRepoCache(opts.RepoCacheDir) |
| defer repoCache.Close() |
| |
| // branch => (path, sha1) => repo. |
| repos := map[FileKey]BlobLocation{} |
| |
| // FileKey => branches |
| branchMap := map[FileKey][]string{} |
| |
| // Branch => Repo => SHA1 |
| branchVersions := map[string]map[string]plumbing.Hash{} |
| |
| branches, err := expandBranches(repo, opts.Branches, opts.BranchPrefix) |
| if err != nil { |
| return err |
| } |
| for _, b := range branches { |
| fullName := filepath.Join(opts.BranchPrefix, b) |
| |
| commit, err := getCommit(repo, fullName) |
| if opts.AllowMissingBranch && isMissingBranchError(err) { |
| continue |
| } |
| |
| if err != nil { |
| return err |
| } |
| opts.BuildOptions.RepositoryDescription.Branches = append(opts.BuildOptions.RepositoryDescription.Branches, zoekt.RepositoryBranch{ |
| Name: b, |
| Version: commit.Hash.String(), |
| }) |
| |
| tree, err := commit.Tree() |
| if err != nil { |
| return err |
| } |
| |
| files, subVersions, err := TreeToFiles(repo, tree, opts.BuildOptions.RepositoryDescription.URL, repoCache) |
| if err != nil { |
| return err |
| } |
| for k, v := range files { |
| repos[k] = v |
| branchMap[k] = append(branchMap[k], b) |
| } |
| |
| branchVersions[b] = subVersions |
| } |
| |
| if opts.Incremental { |
| versions := opts.BuildOptions.IndexVersions() |
| if reflect.DeepEqual(versions, opts.BuildOptions.RepositoryDescription.Branches) { |
| return nil |
| } |
| } |
| |
| reposByPath := map[string]BlobLocation{} |
| for key, location := range repos { |
| reposByPath[key.SubRepoPath] = location |
| } |
| |
| opts.BuildOptions.SubRepositories = map[string]*zoekt.Repository{} |
| for path, location := range reposByPath { |
| tpl := opts.BuildOptions.RepositoryDescription |
| if path != "" { |
| tpl = zoekt.Repository{URL: location.URL.String()} |
| if err := SetTemplatesFromOrigin(&tpl, location.URL); err != nil { |
| log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, location.URL, err) |
| } |
| } |
| opts.BuildOptions.SubRepositories[path] = &tpl |
| } |
| for _, br := range opts.BuildOptions.RepositoryDescription.Branches { |
| for path, repo := range opts.BuildOptions.SubRepositories { |
| id := branchVersions[br.Name][path] |
| repo.Branches = append(repo.Branches, zoekt.RepositoryBranch{ |
| Name: br.Name, |
| Version: id.String(), |
| }) |
| } |
| } |
| |
| builder, err := build.NewBuilder(opts.BuildOptions) |
| if err != nil { |
| return err |
| } |
| |
| var names []string |
| fileKeys := map[string][]FileKey{} |
| for key := range repos { |
| n := key.FullPath() |
| fileKeys[n] = append(fileKeys[n], key) |
| names = append(names, n) |
| } |
| // not strictly necessary, but nice for reproducibility. |
| sort.Strings(names) |
| |
| for _, name := range names { |
| keys := fileKeys[name] |
| for _, key := range keys { |
| brs := branchMap[key] |
| blob, err := repos[key].Repo.BlobObject(key.ID) |
| if err != nil { |
| return err |
| } |
| |
| if blob.Size > int64(opts.BuildOptions.SizeMax) { |
| continue |
| } |
| |
| contents, err := blobContents(blob) |
| if err != nil { |
| return err |
| } |
| builder.Add(zoekt.Document{ |
| SubRepositoryPath: key.SubRepoPath, |
| Name: key.FullPath(), |
| Content: contents, |
| Branches: brs, |
| }) |
| } |
| } |
| return builder.Finish() |
| } |
| |
| func blobContents(blob *object.Blob) ([]byte, error) { |
| r, err := blob.Reader() |
| if err != nil { |
| return nil, err |
| } |
| defer r.Close() |
| |
| c, err := ioutil.ReadAll(r) |
| if err != nil { |
| return nil, err |
| } |
| return c, nil |
| } |