blob: 0818be7c23713f93bfee28f3807f255fb7bee02e [file] [log] [blame]
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package build
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
"time"
"github.com/google/zoekt"
"github.com/google/zoekt/ctags"
)
func runCTags(bin string, sandboxBin string, inputs map[string][]byte) ([]*ctags.Entry, error) {
const debug = false
if len(inputs) == 0 {
return nil, nil
}
dir, err := ioutil.TempDir("", "ctags-input")
if err != nil {
return nil, err
}
if !debug {
defer os.RemoveAll(dir)
}
// --sort shells out to sort(1).
args := []string{bin, "-n", "-f", "-", "--sort=no"}
fileCount := 0
for n, c := range inputs {
if len(c) == 0 {
continue
}
full := filepath.Join(dir, n)
if err := os.MkdirAll(filepath.Dir(full), 0700); err != nil {
return nil, err
}
err := ioutil.WriteFile(full, c, 0600)
if err != nil {
return nil, err
}
args = append(args, n)
fileCount++
}
if fileCount == 0 {
return nil, nil
}
if sandboxBin != "" {
// ctags parses untrusted input and is written in C.
// Run it in a sandbox as defense in depth. The
// namespace sandbox only works on Linux,
// unfortunately. If someone packages a complex
// exploit (eg. dirty COW) inside the repository, we
// may still be SOL, but this is better than nothing.
sandboxDir, err := ioutil.TempDir("", "")
if err != nil {
return nil, err
}
if !debug {
defer os.RemoveAll(sandboxDir)
}
sandboxArgs := []string{
sandboxBin,
"-s", sandboxDir, "-b", dir + "=input", "-d", "/input",
// Make sure the binary is available in the sandbox.
"-b", bin + "=" + "ctags",
"-u", "3333", "-g", "3333",
}
args[0] = "/ctags"
for _, d := range []string{"/lib", "/lib64"} {
if _, err := os.Lstat(d); err == nil {
sandboxArgs = append(sandboxArgs, "-b", d+"="+d[1:])
}
}
sandboxArgs = append(sandboxArgs, "-t", "tmp", "--")
args = append(sandboxArgs, args...)
} else {
log.Println("WARNING: running ctags without sandboxing.")
}
cmd := exec.Command(args[0], args[1:]...)
cmd.Dir = dir
var errBuf, outBuf bytes.Buffer
cmd.Stderr = &errBuf
cmd.Stdout = &outBuf
if err := cmd.Start(); err != nil {
return nil, err
}
errChan := make(chan error, 1)
go func() {
err := cmd.Wait()
errChan <- err
}()
timeout := time.After(5 * time.Second)
select {
case <-timeout:
cmd.Process.Kill()
return nil, fmt.Errorf("timeout executing ctags.")
case err := <-errChan:
if err != nil {
return nil, fmt.Errorf("exec(%s): %v, stderr: %s", cmd.Args, err, errBuf.String())
}
}
var entries []*ctags.Entry
for _, l := range bytes.Split(outBuf.Bytes(), []byte{'\n'}) {
if len(l) == 0 {
continue
}
e, err := ctags.Parse(string(l))
if err != nil {
return nil, err
}
if len(e.Sym) == 1 {
continue
}
entries = append(entries, e)
}
return entries, nil
}
func runCTagsChunked(bin, sandboxBin string, in map[string][]byte) ([]*ctags.Entry, error) {
var res []*ctags.Entry
cur := map[string][]byte{}
sz := 0
for k, v := range in {
cur[k] = v
sz += len(k)
// 100k seems reasonable.
if sz > (100 << 10) {
r, err := runCTags(bin, sandboxBin, cur)
if err != nil {
return nil, err
}
res = append(res, r...)
cur = map[string][]byte{}
sz = 0
}
}
r, err := runCTags(bin, sandboxBin, cur)
if err != nil {
return nil, err
}
res = append(res, r...)
return res, nil
}
func ctagsAddSymbols(todo []*zoekt.Document, bin, sandboxBin string) error {
pathIndices := map[string]int{}
contents := map[string][]byte{}
for i, t := range todo {
if t.Symbols != nil {
continue
}
_, ok := pathIndices[t.Name]
if ok {
continue
}
pathIndices[t.Name] = i
contents[t.Name] = t.Content
}
entries, err := runCTagsChunked(bin, sandboxBin, contents)
if err != nil {
return err
}
fileTags := map[string][]*ctags.Entry{}
for _, e := range entries {
fileTags[e.Path] = append(fileTags[e.Path], e)
}
for k, tags := range fileTags {
symOffsets, err := tagsToSections(contents[k], tags)
if err != nil {
return err
}
todo[pathIndices[k]].Symbols = symOffsets
}
return nil
}
func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, error) {
nls := newLinesIndices(content)
nls = append(nls, uint32(len(content)))
var symOffsets []zoekt.DocumentSection
for _, t := range tags {
if t.Line <= 0 {
// Observed this with a .JS file.
continue
}
lineIdx := t.Line - 1
if lineIdx >= len(nls) {
log.Println("nls", nls)
return nil, fmt.Errorf("linenum for entry out of range %v", t)
}
lineOff := uint32(0)
if lineIdx > 0 {
lineOff = nls[lineIdx-1] + 1
}
end := nls[lineIdx]
line := content[lineOff:end]
intraOff := bytes.Index(line, []byte(t.Sym))
if intraOff < 0 {
// for Go code, this is very common, since
// ctags barfs on multi-line declarations
// log.Printf("symbol %s not found in line
// %q", t.Sym, line)
continue
}
symOffsets = append(symOffsets, zoekt.DocumentSection{
Start: lineOff + uint32(intraOff),
End: lineOff + uint32(intraOff) + uint32(len(t.Sym)),
})
}
return symOffsets, nil
}
func newLinesIndices(in []byte) []uint32 {
out := make([]uint32, 0, len(in)/30)
for i, c := range in {
if c == '\n' {
out = append(out, uint32(i))
}
}
return out
}