Add cache/ , a collection of caches for blobs, trees and repositories.

Change-Id: I5377a6e2c9921681d0d6ecd2006e71d8120ec5c5
diff --git a/cache/cache.go b/cache/cache.go
new file mode 100644
index 0000000..fb68e6c
--- /dev/null
+++ b/cache/cache.go
@@ -0,0 +1,56 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package cache implements a simplistic persistent cache based on the
+// filesystem.
+package cache
+
+import (
+	"os"
+	"path/filepath"
+)
+
+// Cache combines a blob, tree and git repo cache.
+type Cache struct {
+	git  *gitCache
+	tree *TreeCache
+	blob *CAS
+}
+
+func NewCache(d string) (*Cache, error) {
+	d, err := filepath.Abs(d)
+	if err != nil {
+		return nil, err
+	}
+	if err := os.MkdirAll(d, 0700); err != nil {
+		return nil, err
+	}
+
+	g, err := newGitCache(filepath.Join(d, "git"))
+	if err != nil {
+		return nil, err
+	}
+
+	c, err := NewCAS(filepath.Join(d, "blobs"))
+	if err != nil {
+		return nil, err
+	}
+
+	t, err := NewTreeCache(filepath.Join(d, "tree"))
+	if err != nil {
+		return nil, err
+	}
+
+	return &Cache{git: g, tree: t, blob: c}, nil
+}
diff --git a/cache/cas.go b/cache/cas.go
new file mode 100644
index 0000000..f5100fb
--- /dev/null
+++ b/cache/cas.go
@@ -0,0 +1,80 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	git "github.com/libgit2/git2go"
+)
+
+// CAS is a content addressable storage. It is intended to be used
+// with git SHA1 data. It stores blobs as uncompressed files without
+// git headers. This means that we can wire up files from the CAS
+// directly with a FUSE file system.
+type CAS struct {
+	dir string
+}
+
+// NewCAS creates a new CAS object.
+func NewCAS(dir string) (*CAS, error) {
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return nil, err
+	}
+	return &CAS{
+		dir: dir,
+	}, nil
+}
+
+func (c *CAS) path(id git.Oid) string {
+	str := id.String()
+	return fmt.Sprintf("%s/%s/%s", c.dir, str[:3], str[3:])
+}
+
+// Open returns a file corresponding to the blob, opened for reading.
+func (c *CAS) Open(id git.Oid) (*os.File, bool) {
+	f, err := os.Open(c.path(id))
+	return f, err == nil
+}
+
+// Write writes the given data under the given ID atomically.
+func (c *CAS) Write(id git.Oid, data []byte) error {
+	// TODO(hanwen): we should run data through the git hash to
+	// verify that it is what it says it is.
+	f, err := ioutil.TempFile(c.dir, "tmp")
+	if err != nil {
+		return err
+	}
+
+	if err := f.Chmod(0444); err != nil {
+		return err
+	}
+
+	if _, err := f.Write(data); err != nil {
+		return err
+	}
+	if err := f.Close(); err != nil {
+		return err
+	}
+	p := c.path(id)
+	dir := filepath.Dir(p)
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return err
+	}
+	return os.Rename(f.Name(), c.path(id))
+}
diff --git a/cache/gitcache.go b/cache/gitcache.go
new file mode 100644
index 0000000..a24cba4
--- /dev/null
+++ b/cache/gitcache.go
@@ -0,0 +1,144 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+	"bytes"
+	"fmt"
+	"log"
+	"net/url"
+	"os"
+	"os/exec"
+	"path"
+	"path/filepath"
+	"strings"
+	"time"
+
+	git "github.com/libgit2/git2go"
+)
+
+// gitCache manages a set of bare git repositories.
+type gitCache struct {
+	// directory to hold the repositories.
+	dir string
+
+	// Directory to store log files for fetches and clones.
+	logDir string
+}
+
+// newGitCache constructs a gitCache object.
+func newGitCache(baseDir string) (*gitCache, error) {
+	c := gitCache{
+		dir:    filepath.Join(baseDir),
+		logDir: filepath.Join(baseDir, "gitfs-logs"),
+	}
+	if err := os.MkdirAll(c.logDir, 0700); err != nil {
+		return nil, err
+	}
+	if err := os.MkdirAll(c.dir, 0700); err != nil {
+		return nil, err
+	}
+	return &c, nil
+}
+
+// logfile returns a logfile open for writing with a unique name.
+func (c *gitCache) logfile() (*os.File, error) {
+	nm := fmt.Sprintf("%s/git.%s.log", c.logDir, time.Now().Format(time.RFC3339Nano))
+	nm = strings.Replace(nm, ":", "_", -1)
+	return os.Create(nm)
+}
+
+// Fetch updates the local clone of the given repository.
+func (c *gitCache) Fetch(url string) error {
+	path, err := c.gitPath(url)
+	if err != nil {
+		return err
+	}
+	if err := c.runGit(c.dir, "--git-dir="+path, "fetch", "origin"); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// gitPath transforms a URL into a path under the gitCache directory.
+func (c *gitCache) gitPath(u string) (string, error) {
+	parsed, err := url.Parse(u)
+	if err != nil {
+		return "", err
+	}
+
+	p := path.Clean(parsed.Path)
+	if path.Base(p) == ".git" {
+		p = path.Dir(p)
+	}
+	return filepath.Join(c.dir, parsed.Host, p+".git"), nil
+}
+
+// runGit runs git with the given arguments under the given directory.
+func (c *gitCache) runGit(dir string, args ...string) error {
+	logfile, err := c.logfile()
+	if err != nil {
+		return err
+	}
+	defer logfile.Close()
+
+	cmd := exec.Command("git", args...)
+	log.Printf("running %s (log: %s)", cmd.Args, logfile.Name())
+	cmd.Dir = dir
+
+	var out, errOut bytes.Buffer
+	cmd.Stdout = &out
+	cmd.Stderr = &errOut
+	runErr := cmd.Run()
+
+	if _, err := fmt.Fprintf(logfile, "args: %s\ndir:%s\nEXIT: %s\n\nOUT\n%s\n\nERR\n\n", cmd.Args,
+		cmd.Dir, out.String(), errOut.String()); err != nil {
+		return fmt.Errorf("logfile write for %s (%v): %v",
+			args, runErr, err)
+	}
+
+	if err := logfile.Close(); err != nil {
+		return fmt.Errorf("logfile close for %s (%v): %v",
+			args, runErr, err)
+	}
+
+	log.Printf("ran %s exit %v", cmd.Args, err)
+	return runErr
+}
+
+// Open returns an opened repository for the given URL. If necessary,
+// the repository is cloned.
+func (c *gitCache) Open(url string) (*git.Repository, error) {
+	// TODO(hanwen): multiple concurrent calls to Open() with the
+	// same URL may race, resulting in a double clone. It's unclear
+	// what will happen in that case.
+	p, err := c.gitPath(url)
+	if err != nil {
+		return nil, err
+	}
+
+	if _, err := os.Lstat(p); os.IsNotExist(err) {
+		dir, base := filepath.Split(p)
+		if err := os.MkdirAll(dir, 0755); err != nil {
+			return nil, err
+		}
+		if err := c.runGit(dir, "clone", "--bare", "--progress", "--verbose", url, base); err != nil {
+			return nil, err
+		}
+	}
+	repo, err := git.OpenRepository(p)
+	return repo, err
+}
diff --git a/cache/gitcache_test.go b/cache/gitcache_test.go
new file mode 100644
index 0000000..a49a900
--- /dev/null
+++ b/cache/gitcache_test.go
@@ -0,0 +1,47 @@
+package cache
+
+import (
+	"io/ioutil"
+	"testing"
+	"time"
+)
+
+func TestGitCache(t *testing.T) {
+	testRepo, err := initTest()
+	if err != nil {
+		t.Fatalf("init: %v", err)
+	}
+	defer testRepo.Cleanup()
+
+	dir, err := ioutil.TempDir("", "")
+	if err != nil {
+		t.Fatalf("TempDir: %v", err)
+	}
+
+	cache, err := newGitCache(dir)
+	if err != nil {
+		t.Fatalf("newGitCache(%s): %v", dir, err)
+	}
+
+	url := "file://" + testRepo.dir
+
+	lazy := newLazyRepo(url, cache, true)
+	if r := lazy.Repository(); r != nil {
+		t.Errorf("got %v for lazy.Repository", r)
+	}
+
+	go lazy.Clone()
+	if r := lazy.Repository(); r != nil {
+		t.Errorf("got %v for lazy.Repository", r)
+	}
+
+	// The API doesn't let us synchronize on finished clone, so we
+	// have no better way to test than sleep. This test may be
+	// flaky on highly loaded machines.
+	dt := 50 * time.Millisecond
+	time.Sleep(dt)
+
+	if repo := lazy.Repository(); repo == nil {
+		t.Errorf("lazyRepo still not loaded after %s.", dt)
+	}
+}
diff --git a/cache/lazyrepo.go b/cache/lazyrepo.go
new file mode 100644
index 0000000..cc4d935
--- /dev/null
+++ b/cache/lazyrepo.go
@@ -0,0 +1,84 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+	"log"
+	"sync"
+
+	git "github.com/libgit2/git2go"
+)
+
+// lazyRepo represents a git repository that might be fetched on
+// demand.
+type lazyRepo struct {
+	allowClone bool
+	url        string
+	cache      *gitCache
+
+	repoMu  sync.Mutex
+	cloning bool
+	repo    *git.Repository
+}
+
+func newLazyRepo(url string, cache *gitCache, allowClone bool) *lazyRepo {
+	r := &lazyRepo{
+		url:        url,
+		cache:      cache,
+		allowClone: allowClone,
+	}
+	return r
+}
+
+// Repository returns a git.Repository for this repo, or nil if it
+// wasn't loaded.  This method is safe for concurrent use from
+// multiple goroutines.
+func (r *lazyRepo) Repository() *git.Repository {
+	r.repoMu.Lock()
+	defer r.repoMu.Unlock()
+	return r.repo
+}
+
+// runClone initiates a clone. It makes sure that only one clone
+// process runs at any time.
+func (r *lazyRepo) runClone() {
+	repo, err := r.cache.Open(r.url)
+
+	r.repoMu.Lock()
+	defer r.repoMu.Unlock()
+	r.allowClone = false
+	r.cloning = false
+	r.repo = repo
+
+	if err != nil {
+		log.Printf("runClone: %v", err)
+	}
+}
+
+// Clone schedules the repository to be cloned.  This method is safe
+// for concurrent use from multiple goroutines.
+func (r *lazyRepo) Clone() {
+	r.repoMu.Lock()
+	defer r.repoMu.Unlock()
+	if !r.allowClone || r.repo != nil {
+		return
+	}
+
+	if r.cloning {
+		return
+	}
+	r.cloning = true
+	go r.runClone()
+}
diff --git a/cache/treecache.go b/cache/treecache.go
new file mode 100644
index 0000000..2ea5f8a
--- /dev/null
+++ b/cache/treecache.go
@@ -0,0 +1,186 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	"github.com/google/gitfs/gitiles"
+	git "github.com/libgit2/git2go"
+)
+
+// A TreeCache caches recursively expanded trees by their git commit and tree IDs.
+type TreeCache struct {
+	dir string
+}
+
+// NewTreeCache constructs a new TreeCache.
+func NewTreeCache(d string) (*TreeCache, error) {
+	if err := os.MkdirAll(d, 0700); err != nil {
+		return nil, err
+	}
+	return &TreeCache{dir: d}, nil
+}
+
+func (c *TreeCache) path(id *git.Oid) string {
+	str := id.String()
+	return fmt.Sprintf("%s/%s/%s", c.dir, str[:3], str[3:])
+}
+
+// Get returns a tree, if available.
+func (c *TreeCache) Get(id *git.Oid) (*gitiles.Tree, error) {
+	content, err := ioutil.ReadFile(c.path(id))
+	if err != nil {
+		return nil, err
+	}
+	var t gitiles.Tree
+	if err := json.Unmarshal(content, &t); err != nil {
+		return nil, err
+	}
+
+	return &t, nil
+}
+
+// Add adds a Tree to the cache
+func (c *TreeCache) Add(id *git.Oid, tree *gitiles.Tree) error {
+	if err := c.add(id, tree); err != nil {
+		return err
+	}
+
+	if id.String() != tree.ID {
+		treeID, err := git.NewOid(tree.ID)
+		if err != nil {
+			return err
+		}
+		return c.add(treeID, tree)
+	}
+	return nil
+}
+
+func (c *TreeCache) add(id *git.Oid, tree *gitiles.Tree) error {
+	f, err := ioutil.TempFile(c.dir, "tmp")
+	if err != nil {
+		return err
+	}
+
+	content, err := json.Marshal(tree)
+	if err != nil {
+		return err
+	}
+	if _, err := f.Write(content); err != nil {
+		return err
+	}
+
+	if err := f.Close(); err != nil {
+		return err
+
+	}
+
+	dir := filepath.Dir(c.path(id))
+	if err := os.MkdirAll(dir, 0700); err != nil {
+		return err
+	}
+	if err := os.Rename(f.Name(), c.path(id)); err != nil {
+		return err
+	}
+	return nil
+}
+
+// GetTree loads the Tree from an on-disk Git repository.
+func GetTree(repo *git.Repository, id *git.Oid) (*gitiles.Tree, error) {
+	obj, err := repo.Lookup(id)
+	if err != nil {
+		return nil, err
+	}
+
+	obj, err = obj.Peel(git.ObjectTree)
+	if err != nil {
+		return nil, err
+	}
+
+	asTree, err := obj.AsTree()
+	if err != nil {
+		return nil, err
+	}
+
+	var tree gitiles.Tree
+	tree.ID = obj.Id().String()
+
+	odb, err := repo.Odb()
+
+	if err != nil {
+		return nil, err
+	}
+
+	cb := func(n string, e *git.TreeEntry) int {
+		t := ""
+		var size *int
+		switch e.Type {
+		case git.ObjectTree:
+			return 0
+		case git.ObjectCommit:
+			t = "commit"
+		case git.ObjectBlob:
+			t = "blob"
+			sz, _, rhErr := odb.ReadHeader(e.Id)
+			if rhErr != nil {
+				err = rhErr
+				return -1
+			}
+			size = new(int)
+			*size = int(sz)
+
+		default:
+			err = fmt.Errorf("illegal object %d for %s", e.Type, n)
+		}
+
+		gEntry := gitiles.TreeEntry{
+			Name: filepath.Join(n, e.Name),
+			ID:   e.Id.String(),
+			Mode: int(e.Filemode),
+			Size: size,
+			Type: t,
+		}
+		if e.Filemode == git.FilemodeLink {
+			obj, lookErr := repo.Lookup(e.Id)
+			if err != nil {
+				err = lookErr
+				return -1
+			}
+
+			blob, blobErr := obj.AsBlob()
+			if blobErr != nil {
+				err = blobErr
+				return -1
+			}
+
+			target := string(blob.Contents())
+			gEntry.Target = &target
+		}
+
+		tree.Entries = append(tree.Entries, gEntry)
+		return 0
+	}
+
+	if err := asTree.Walk(cb); err != nil {
+		return nil, err
+	}
+
+	return &tree, nil
+}
diff --git a/cache/treecache_test.go b/cache/treecache_test.go
new file mode 100644
index 0000000..1de119a
--- /dev/null
+++ b/cache/treecache_test.go
@@ -0,0 +1,197 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+	"io/ioutil"
+	"os"
+	"reflect"
+	"testing"
+
+	"github.com/google/gitfs/gitiles"
+	git "github.com/libgit2/git2go"
+)
+
+func newInt(i int) *int          { return &i }
+func newString(s string) *string { return &s }
+
+func TestGetTree(t *testing.T) {
+	testRepo, err := initTest()
+	if err != nil {
+		t.Fatalf("init: %v", err)
+	}
+	defer testRepo.Cleanup()
+
+	treeResp, err := GetTree(testRepo.repo, testRepo.treeID)
+	if err != nil {
+		t.Fatalf("getTree: %v", err)
+	}
+
+	str := "abcd1234abcd1234abcd1234abcd1234abcd1234"
+
+	want := []gitiles.TreeEntry{
+		{
+			ID:   str,
+			Name: "dir/f1",
+			Type: "blob",
+			Mode: 0100644,
+			Size: newInt(5),
+		},
+		{
+			Name: "dir/f2",
+			Type: "blob",
+			Mode: 0100755,
+			ID:   str,
+			Size: newInt(11),
+		},
+		{
+			ID:     str,
+			Name:   "link",
+			Type:   "blob",
+			Mode:   0120000,
+			Size:   newInt(5),
+			Target: newString("hello"),
+		},
+	}
+	if len(treeResp.Entries) != 3 {
+		t.Fatalf("got %d entries, want 3 entries", len(treeResp.Entries))
+	}
+	for i := range treeResp.Entries {
+		treeResp.Entries[i].ID = str
+		if !reflect.DeepEqual(want[i], treeResp.Entries[i]) {
+			t.Errorf("entry %d: got %v, want %v", i, &treeResp.Entries[i], &want[i])
+		}
+	}
+}
+
+func TestTreeCache(t *testing.T) {
+	testRepo, err := initTest()
+	if err != nil {
+		t.Fatalf("init: %v", err)
+	}
+	defer testRepo.Cleanup()
+
+	dir, err := ioutil.TempDir("", "")
+	if err != nil {
+		t.Fatalf("TempDir: %v", err)
+	}
+
+	cache := &TreeCache{dir}
+
+	treeResp, err := GetTree(testRepo.repo, testRepo.treeID)
+	if err != nil {
+		t.Fatalf("getTree: %v", err)
+	}
+
+	randomID, err := git.NewOid("abcd1234abcd1234abcd1234abcd1234abcd1234")
+	if err != nil {
+		t.Fatalf("NewOid: %v", err)
+	}
+
+	if err := cache.Add(randomID, treeResp); err != nil {
+		t.Fatalf("cache.add %v", err)
+	}
+
+	roundtrip, err := cache.Get(randomID)
+	if err != nil {
+		t.Fatalf("cache.get: %v", err)
+	}
+	if !reflect.DeepEqual(roundtrip, treeResp) {
+		t.Fatalf("got %#v, want %#v", roundtrip, treeResp)
+	}
+
+	asTree, err := cache.Get(testRepo.treeID)
+	if err != nil {
+		t.Fatalf("cache.get: %v", err)
+	}
+	if !reflect.DeepEqual(asTree, treeResp) {
+		t.Fatalf("got %#v, want %#v", roundtrip, treeResp)
+	}
+}
+
+type testRepo struct {
+	dir       string
+	subTreeID *git.Oid
+	treeID    *git.Oid
+	repo      *git.Repository
+}
+
+func (r *testRepo) Cleanup() {
+	os.RemoveAll(r.dir)
+}
+
+func initTest() (*testRepo, error) {
+	d, err := ioutil.TempDir("", "tmpgit")
+	if err != nil {
+		return nil, err
+	}
+
+	repo, err := git.InitRepository(d, true)
+	if err != nil {
+		return nil, err
+	}
+
+	c1 := []byte("hello")
+	c2 := []byte("goedemiddag")
+
+	id1, err := repo.CreateBlobFromBuffer(c1)
+	if err != nil {
+		return nil, err
+	}
+
+	id2, err := repo.CreateBlobFromBuffer(c2)
+	if err != nil {
+		return nil, err
+	}
+	b, err := repo.TreeBuilder()
+
+	if err != nil {
+		return nil, err
+	}
+	defer b.Free()
+
+	if err = b.Insert("f1", id1, git.FilemodeBlob); err != nil {
+		return nil, err
+	}
+
+	if err := b.Insert("f2", id2, git.FilemodeBlobExecutable); err != nil {
+		return nil, err
+	}
+	subTreeID, err := b.Write()
+
+	b, err = repo.TreeBuilder()
+	if err != nil {
+		return nil, err
+	}
+	defer b.Free()
+
+	if err = b.Insert("dir", subTreeID, git.FilemodeTree); err != nil {
+		return nil, err
+	}
+	if err = b.Insert("link", id1, git.FilemodeLink); err != nil {
+		return nil, err
+	}
+	treeID, err := b.Write()
+	if err != nil {
+		return nil, err
+	}
+
+	return &testRepo{
+		dir:       d,
+		repo:      repo,
+		treeID:    treeID,
+		subTreeID: subTreeID,
+	}, nil
+}