Add cache/ , a collection of caches for blobs, trees and repositories.
Change-Id: I5377a6e2c9921681d0d6ecd2006e71d8120ec5c5
diff --git a/cache/cache.go b/cache/cache.go
new file mode 100644
index 0000000..fb68e6c
--- /dev/null
+++ b/cache/cache.go
@@ -0,0 +1,56 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package cache implements a simplistic persistent cache based on the
+// filesystem.
+package cache
+
+import (
+ "os"
+ "path/filepath"
+)
+
+// Cache combines a blob, tree and git repo cache.
+type Cache struct {
+ git *gitCache
+ tree *TreeCache
+ blob *CAS
+}
+
+func NewCache(d string) (*Cache, error) {
+ d, err := filepath.Abs(d)
+ if err != nil {
+ return nil, err
+ }
+ if err := os.MkdirAll(d, 0700); err != nil {
+ return nil, err
+ }
+
+ g, err := newGitCache(filepath.Join(d, "git"))
+ if err != nil {
+ return nil, err
+ }
+
+ c, err := NewCAS(filepath.Join(d, "blobs"))
+ if err != nil {
+ return nil, err
+ }
+
+ t, err := NewTreeCache(filepath.Join(d, "tree"))
+ if err != nil {
+ return nil, err
+ }
+
+ return &Cache{git: g, tree: t, blob: c}, nil
+}
diff --git a/cache/cas.go b/cache/cas.go
new file mode 100644
index 0000000..f5100fb
--- /dev/null
+++ b/cache/cas.go
@@ -0,0 +1,80 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ git "github.com/libgit2/git2go"
+)
+
+// CAS is a content addressable storage. It is intended to be used
+// with git SHA1 data. It stores blobs as uncompressed files without
+// git headers. This means that we can wire up files from the CAS
+// directly with a FUSE file system.
+type CAS struct {
+ dir string
+}
+
+// NewCAS creates a new CAS object.
+func NewCAS(dir string) (*CAS, error) {
+ if err := os.MkdirAll(dir, 0700); err != nil {
+ return nil, err
+ }
+ return &CAS{
+ dir: dir,
+ }, nil
+}
+
+func (c *CAS) path(id git.Oid) string {
+ str := id.String()
+ return fmt.Sprintf("%s/%s/%s", c.dir, str[:3], str[3:])
+}
+
+// Open returns a file corresponding to the blob, opened for reading.
+func (c *CAS) Open(id git.Oid) (*os.File, bool) {
+ f, err := os.Open(c.path(id))
+ return f, err == nil
+}
+
+// Write writes the given data under the given ID atomically.
+func (c *CAS) Write(id git.Oid, data []byte) error {
+ // TODO(hanwen): we should run data through the git hash to
+ // verify that it is what it says it is.
+ f, err := ioutil.TempFile(c.dir, "tmp")
+ if err != nil {
+ return err
+ }
+
+ if err := f.Chmod(0444); err != nil {
+ return err
+ }
+
+ if _, err := f.Write(data); err != nil {
+ return err
+ }
+ if err := f.Close(); err != nil {
+ return err
+ }
+ p := c.path(id)
+ dir := filepath.Dir(p)
+ if err := os.MkdirAll(dir, 0700); err != nil {
+ return err
+ }
+ return os.Rename(f.Name(), c.path(id))
+}
diff --git a/cache/gitcache.go b/cache/gitcache.go
new file mode 100644
index 0000000..a24cba4
--- /dev/null
+++ b/cache/gitcache.go
@@ -0,0 +1,144 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+ "bytes"
+ "fmt"
+ "log"
+ "net/url"
+ "os"
+ "os/exec"
+ "path"
+ "path/filepath"
+ "strings"
+ "time"
+
+ git "github.com/libgit2/git2go"
+)
+
+// gitCache manages a set of bare git repositories.
+type gitCache struct {
+ // directory to hold the repositories.
+ dir string
+
+ // Directory to store log files for fetches and clones.
+ logDir string
+}
+
+// newGitCache constructs a gitCache object.
+func newGitCache(baseDir string) (*gitCache, error) {
+ c := gitCache{
+ dir: filepath.Join(baseDir),
+ logDir: filepath.Join(baseDir, "gitfs-logs"),
+ }
+ if err := os.MkdirAll(c.logDir, 0700); err != nil {
+ return nil, err
+ }
+ if err := os.MkdirAll(c.dir, 0700); err != nil {
+ return nil, err
+ }
+ return &c, nil
+}
+
+// logfile returns a logfile open for writing with a unique name.
+func (c *gitCache) logfile() (*os.File, error) {
+ nm := fmt.Sprintf("%s/git.%s.log", c.logDir, time.Now().Format(time.RFC3339Nano))
+ nm = strings.Replace(nm, ":", "_", -1)
+ return os.Create(nm)
+}
+
+// Fetch updates the local clone of the given repository.
+func (c *gitCache) Fetch(url string) error {
+ path, err := c.gitPath(url)
+ if err != nil {
+ return err
+ }
+ if err := c.runGit(c.dir, "--git-dir="+path, "fetch", "origin"); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// gitPath transforms a URL into a path under the gitCache directory.
+func (c *gitCache) gitPath(u string) (string, error) {
+ parsed, err := url.Parse(u)
+ if err != nil {
+ return "", err
+ }
+
+ p := path.Clean(parsed.Path)
+ if path.Base(p) == ".git" {
+ p = path.Dir(p)
+ }
+ return filepath.Join(c.dir, parsed.Host, p+".git"), nil
+}
+
+// runGit runs git with the given arguments under the given directory.
+func (c *gitCache) runGit(dir string, args ...string) error {
+ logfile, err := c.logfile()
+ if err != nil {
+ return err
+ }
+ defer logfile.Close()
+
+ cmd := exec.Command("git", args...)
+ log.Printf("running %s (log: %s)", cmd.Args, logfile.Name())
+ cmd.Dir = dir
+
+ var out, errOut bytes.Buffer
+ cmd.Stdout = &out
+ cmd.Stderr = &errOut
+ runErr := cmd.Run()
+
+ if _, err := fmt.Fprintf(logfile, "args: %s\ndir:%s\nEXIT: %s\n\nOUT\n%s\n\nERR\n\n", cmd.Args,
+ cmd.Dir, out.String(), errOut.String()); err != nil {
+ return fmt.Errorf("logfile write for %s (%v): %v",
+ args, runErr, err)
+ }
+
+ if err := logfile.Close(); err != nil {
+ return fmt.Errorf("logfile close for %s (%v): %v",
+ args, runErr, err)
+ }
+
+ log.Printf("ran %s exit %v", cmd.Args, err)
+ return runErr
+}
+
+// Open returns an opened repository for the given URL. If necessary,
+// the repository is cloned.
+func (c *gitCache) Open(url string) (*git.Repository, error) {
+ // TODO(hanwen): multiple concurrent calls to Open() with the
+ // same URL may race, resulting in a double clone. It's unclear
+ // what will happen in that case.
+ p, err := c.gitPath(url)
+ if err != nil {
+ return nil, err
+ }
+
+ if _, err := os.Lstat(p); os.IsNotExist(err) {
+ dir, base := filepath.Split(p)
+ if err := os.MkdirAll(dir, 0755); err != nil {
+ return nil, err
+ }
+ if err := c.runGit(dir, "clone", "--bare", "--progress", "--verbose", url, base); err != nil {
+ return nil, err
+ }
+ }
+ repo, err := git.OpenRepository(p)
+ return repo, err
+}
diff --git a/cache/gitcache_test.go b/cache/gitcache_test.go
new file mode 100644
index 0000000..a49a900
--- /dev/null
+++ b/cache/gitcache_test.go
@@ -0,0 +1,47 @@
+package cache
+
+import (
+ "io/ioutil"
+ "testing"
+ "time"
+)
+
+func TestGitCache(t *testing.T) {
+ testRepo, err := initTest()
+ if err != nil {
+ t.Fatalf("init: %v", err)
+ }
+ defer testRepo.Cleanup()
+
+ dir, err := ioutil.TempDir("", "")
+ if err != nil {
+ t.Fatalf("TempDir: %v", err)
+ }
+
+ cache, err := newGitCache(dir)
+ if err != nil {
+ t.Fatalf("newGitCache(%s): %v", dir, err)
+ }
+
+ url := "file://" + testRepo.dir
+
+ lazy := newLazyRepo(url, cache, true)
+ if r := lazy.Repository(); r != nil {
+ t.Errorf("got %v for lazy.Repository", r)
+ }
+
+ go lazy.Clone()
+ if r := lazy.Repository(); r != nil {
+ t.Errorf("got %v for lazy.Repository", r)
+ }
+
+ // The API doesn't let us synchronize on finished clone, so we
+ // have no better way to test than sleep. This test may be
+ // flaky on highly loaded machines.
+ dt := 50 * time.Millisecond
+ time.Sleep(dt)
+
+ if repo := lazy.Repository(); repo == nil {
+ t.Errorf("lazyRepo still not loaded after %s.", dt)
+ }
+}
diff --git a/cache/lazyrepo.go b/cache/lazyrepo.go
new file mode 100644
index 0000000..cc4d935
--- /dev/null
+++ b/cache/lazyrepo.go
@@ -0,0 +1,84 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+ "log"
+ "sync"
+
+ git "github.com/libgit2/git2go"
+)
+
+// lazyRepo represents a git repository that might be fetched on
+// demand.
+type lazyRepo struct {
+ allowClone bool
+ url string
+ cache *gitCache
+
+ repoMu sync.Mutex
+ cloning bool
+ repo *git.Repository
+}
+
+func newLazyRepo(url string, cache *gitCache, allowClone bool) *lazyRepo {
+ r := &lazyRepo{
+ url: url,
+ cache: cache,
+ allowClone: allowClone,
+ }
+ return r
+}
+
+// Repository returns a git.Repository for this repo, or nil if it
+// wasn't loaded. This method is safe for concurrent use from
+// multiple goroutines.
+func (r *lazyRepo) Repository() *git.Repository {
+ r.repoMu.Lock()
+ defer r.repoMu.Unlock()
+ return r.repo
+}
+
+// runClone initiates a clone. It makes sure that only one clone
+// process runs at any time.
+func (r *lazyRepo) runClone() {
+ repo, err := r.cache.Open(r.url)
+
+ r.repoMu.Lock()
+ defer r.repoMu.Unlock()
+ r.allowClone = false
+ r.cloning = false
+ r.repo = repo
+
+ if err != nil {
+ log.Printf("runClone: %v", err)
+ }
+}
+
+// Clone schedules the repository to be cloned. This method is safe
+// for concurrent use from multiple goroutines.
+func (r *lazyRepo) Clone() {
+ r.repoMu.Lock()
+ defer r.repoMu.Unlock()
+ if !r.allowClone || r.repo != nil {
+ return
+ }
+
+ if r.cloning {
+ return
+ }
+ r.cloning = true
+ go r.runClone()
+}
diff --git a/cache/treecache.go b/cache/treecache.go
new file mode 100644
index 0000000..2ea5f8a
--- /dev/null
+++ b/cache/treecache.go
@@ -0,0 +1,186 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ "github.com/google/gitfs/gitiles"
+ git "github.com/libgit2/git2go"
+)
+
+// A TreeCache caches recursively expanded trees by their git commit and tree IDs.
+type TreeCache struct {
+ dir string
+}
+
+// NewTreeCache constructs a new TreeCache.
+func NewTreeCache(d string) (*TreeCache, error) {
+ if err := os.MkdirAll(d, 0700); err != nil {
+ return nil, err
+ }
+ return &TreeCache{dir: d}, nil
+}
+
+func (c *TreeCache) path(id *git.Oid) string {
+ str := id.String()
+ return fmt.Sprintf("%s/%s/%s", c.dir, str[:3], str[3:])
+}
+
+// Get returns a tree, if available.
+func (c *TreeCache) Get(id *git.Oid) (*gitiles.Tree, error) {
+ content, err := ioutil.ReadFile(c.path(id))
+ if err != nil {
+ return nil, err
+ }
+ var t gitiles.Tree
+ if err := json.Unmarshal(content, &t); err != nil {
+ return nil, err
+ }
+
+ return &t, nil
+}
+
+// Add adds a Tree to the cache
+func (c *TreeCache) Add(id *git.Oid, tree *gitiles.Tree) error {
+ if err := c.add(id, tree); err != nil {
+ return err
+ }
+
+ if id.String() != tree.ID {
+ treeID, err := git.NewOid(tree.ID)
+ if err != nil {
+ return err
+ }
+ return c.add(treeID, tree)
+ }
+ return nil
+}
+
+func (c *TreeCache) add(id *git.Oid, tree *gitiles.Tree) error {
+ f, err := ioutil.TempFile(c.dir, "tmp")
+ if err != nil {
+ return err
+ }
+
+ content, err := json.Marshal(tree)
+ if err != nil {
+ return err
+ }
+ if _, err := f.Write(content); err != nil {
+ return err
+ }
+
+ if err := f.Close(); err != nil {
+ return err
+
+ }
+
+ dir := filepath.Dir(c.path(id))
+ if err := os.MkdirAll(dir, 0700); err != nil {
+ return err
+ }
+ if err := os.Rename(f.Name(), c.path(id)); err != nil {
+ return err
+ }
+ return nil
+}
+
+// GetTree loads the Tree from an on-disk Git repository.
+func GetTree(repo *git.Repository, id *git.Oid) (*gitiles.Tree, error) {
+ obj, err := repo.Lookup(id)
+ if err != nil {
+ return nil, err
+ }
+
+ obj, err = obj.Peel(git.ObjectTree)
+ if err != nil {
+ return nil, err
+ }
+
+ asTree, err := obj.AsTree()
+ if err != nil {
+ return nil, err
+ }
+
+ var tree gitiles.Tree
+ tree.ID = obj.Id().String()
+
+ odb, err := repo.Odb()
+
+ if err != nil {
+ return nil, err
+ }
+
+ cb := func(n string, e *git.TreeEntry) int {
+ t := ""
+ var size *int
+ switch e.Type {
+ case git.ObjectTree:
+ return 0
+ case git.ObjectCommit:
+ t = "commit"
+ case git.ObjectBlob:
+ t = "blob"
+ sz, _, rhErr := odb.ReadHeader(e.Id)
+ if rhErr != nil {
+ err = rhErr
+ return -1
+ }
+ size = new(int)
+ *size = int(sz)
+
+ default:
+ err = fmt.Errorf("illegal object %d for %s", e.Type, n)
+ }
+
+ gEntry := gitiles.TreeEntry{
+ Name: filepath.Join(n, e.Name),
+ ID: e.Id.String(),
+ Mode: int(e.Filemode),
+ Size: size,
+ Type: t,
+ }
+ if e.Filemode == git.FilemodeLink {
+ obj, lookErr := repo.Lookup(e.Id)
+ if err != nil {
+ err = lookErr
+ return -1
+ }
+
+ blob, blobErr := obj.AsBlob()
+ if blobErr != nil {
+ err = blobErr
+ return -1
+ }
+
+ target := string(blob.Contents())
+ gEntry.Target = &target
+ }
+
+ tree.Entries = append(tree.Entries, gEntry)
+ return 0
+ }
+
+ if err := asTree.Walk(cb); err != nil {
+ return nil, err
+ }
+
+ return &tree, nil
+}
diff --git a/cache/treecache_test.go b/cache/treecache_test.go
new file mode 100644
index 0000000..1de119a
--- /dev/null
+++ b/cache/treecache_test.go
@@ -0,0 +1,197 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cache
+
+import (
+ "io/ioutil"
+ "os"
+ "reflect"
+ "testing"
+
+ "github.com/google/gitfs/gitiles"
+ git "github.com/libgit2/git2go"
+)
+
+func newInt(i int) *int { return &i }
+func newString(s string) *string { return &s }
+
+func TestGetTree(t *testing.T) {
+ testRepo, err := initTest()
+ if err != nil {
+ t.Fatalf("init: %v", err)
+ }
+ defer testRepo.Cleanup()
+
+ treeResp, err := GetTree(testRepo.repo, testRepo.treeID)
+ if err != nil {
+ t.Fatalf("getTree: %v", err)
+ }
+
+ str := "abcd1234abcd1234abcd1234abcd1234abcd1234"
+
+ want := []gitiles.TreeEntry{
+ {
+ ID: str,
+ Name: "dir/f1",
+ Type: "blob",
+ Mode: 0100644,
+ Size: newInt(5),
+ },
+ {
+ Name: "dir/f2",
+ Type: "blob",
+ Mode: 0100755,
+ ID: str,
+ Size: newInt(11),
+ },
+ {
+ ID: str,
+ Name: "link",
+ Type: "blob",
+ Mode: 0120000,
+ Size: newInt(5),
+ Target: newString("hello"),
+ },
+ }
+ if len(treeResp.Entries) != 3 {
+ t.Fatalf("got %d entries, want 3 entries", len(treeResp.Entries))
+ }
+ for i := range treeResp.Entries {
+ treeResp.Entries[i].ID = str
+ if !reflect.DeepEqual(want[i], treeResp.Entries[i]) {
+ t.Errorf("entry %d: got %v, want %v", i, &treeResp.Entries[i], &want[i])
+ }
+ }
+}
+
+func TestTreeCache(t *testing.T) {
+ testRepo, err := initTest()
+ if err != nil {
+ t.Fatalf("init: %v", err)
+ }
+ defer testRepo.Cleanup()
+
+ dir, err := ioutil.TempDir("", "")
+ if err != nil {
+ t.Fatalf("TempDir: %v", err)
+ }
+
+ cache := &TreeCache{dir}
+
+ treeResp, err := GetTree(testRepo.repo, testRepo.treeID)
+ if err != nil {
+ t.Fatalf("getTree: %v", err)
+ }
+
+ randomID, err := git.NewOid("abcd1234abcd1234abcd1234abcd1234abcd1234")
+ if err != nil {
+ t.Fatalf("NewOid: %v", err)
+ }
+
+ if err := cache.Add(randomID, treeResp); err != nil {
+ t.Fatalf("cache.add %v", err)
+ }
+
+ roundtrip, err := cache.Get(randomID)
+ if err != nil {
+ t.Fatalf("cache.get: %v", err)
+ }
+ if !reflect.DeepEqual(roundtrip, treeResp) {
+ t.Fatalf("got %#v, want %#v", roundtrip, treeResp)
+ }
+
+ asTree, err := cache.Get(testRepo.treeID)
+ if err != nil {
+ t.Fatalf("cache.get: %v", err)
+ }
+ if !reflect.DeepEqual(asTree, treeResp) {
+ t.Fatalf("got %#v, want %#v", roundtrip, treeResp)
+ }
+}
+
+type testRepo struct {
+ dir string
+ subTreeID *git.Oid
+ treeID *git.Oid
+ repo *git.Repository
+}
+
+func (r *testRepo) Cleanup() {
+ os.RemoveAll(r.dir)
+}
+
+func initTest() (*testRepo, error) {
+ d, err := ioutil.TempDir("", "tmpgit")
+ if err != nil {
+ return nil, err
+ }
+
+ repo, err := git.InitRepository(d, true)
+ if err != nil {
+ return nil, err
+ }
+
+ c1 := []byte("hello")
+ c2 := []byte("goedemiddag")
+
+ id1, err := repo.CreateBlobFromBuffer(c1)
+ if err != nil {
+ return nil, err
+ }
+
+ id2, err := repo.CreateBlobFromBuffer(c2)
+ if err != nil {
+ return nil, err
+ }
+ b, err := repo.TreeBuilder()
+
+ if err != nil {
+ return nil, err
+ }
+ defer b.Free()
+
+ if err = b.Insert("f1", id1, git.FilemodeBlob); err != nil {
+ return nil, err
+ }
+
+ if err := b.Insert("f2", id2, git.FilemodeBlobExecutable); err != nil {
+ return nil, err
+ }
+ subTreeID, err := b.Write()
+
+ b, err = repo.TreeBuilder()
+ if err != nil {
+ return nil, err
+ }
+ defer b.Free()
+
+ if err = b.Insert("dir", subTreeID, git.FilemodeTree); err != nil {
+ return nil, err
+ }
+ if err = b.Insert("link", id1, git.FilemodeLink); err != nil {
+ return nil, err
+ }
+ treeID, err := b.Write()
+ if err != nil {
+ return nil, err
+ }
+
+ return &testRepo{
+ dir: d,
+ repo: repo,
+ treeID: treeID,
+ subTreeID: subTreeID,
+ }, nil
+}