Parallelize slothfs-populate.

* Walk R/W, new R/O and old R/O tree in parallel.

* Within a tree walk, do each repository in parallel. This also
  cleans up the tree construction as a side effect.

* Check inode identity to avoid a costly XAttr lookup.

Test data for AOSP:
  Before: 90 seconds.
  After : 15 seconds

Change-Id: If0c8c8b05d7fb9cf81f382153c8c53e10ae1e086
diff --git a/cmd/slothfs-populate/main.go b/cmd/slothfs-populate/main.go
index 80101b4..2c67fa1 100644
--- a/cmd/slothfs-populate/main.go
+++ b/cmd/slothfs-populate/main.go
@@ -15,6 +15,7 @@
 package main
 
 import (
+	"bytes"
 	"flag"
 	"fmt"
 	"io/ioutil"
@@ -25,22 +26,39 @@
 	"strings"
 	"syscall"
 	"time"
+
+	git "github.com/libgit2/git2go"
 )
 
+type fileInfo struct {
+	isRegular bool
+	size      int64
+	inode     uint64
+}
+
 type repoTree struct {
 	// repositories under this repository
 	children map[string]*repoTree
 
 	// files in this repository.
-	entries []string
+	entries map[string]*fileInfo
 }
 
-func newRepoTree(localRoot string) *repoTree {
+func makeRepoTree() *repoTree {
 	return &repoTree{
-		children: make(map[string]*repoTree),
+		children: map[string]*repoTree{},
+		entries:  map[string]*fileInfo{},
 	}
 }
 
+func newRepoTree(dir string) (*repoTree, error) {
+	t := makeRepoTree()
+	if err := t.fill(dir, ""); err != nil {
+		return nil, err
+	}
+	return t, nil
+}
+
 // allChildren returns all the repositories (including the receiver)
 // as a map keyed by relative path.
 func (t *repoTree) allChildren() map[string]*repoTree {
@@ -53,41 +71,78 @@
 	return r
 }
 
+// allFiles returns all the files below this repoTree.
+func (t *repoTree) allFiles() map[string]*fileInfo {
+	r := map[string]*fileInfo{}
+	for nm, info := range t.entries {
+		r[nm] = info
+	}
+	for nm, ch := range t.children {
+		for sub, subCh := range ch.allFiles() {
+			r[filepath.Join(nm, sub)] = subCh
+		}
+	}
+	return r
+}
+
+func isRepoDir(path string) bool {
+	if stat, err := os.Stat(filepath.Join(path, ".git")); err == nil && stat.IsDir() {
+		return true
+	} else if stat, err := os.Stat(filepath.Join(path, ".gitid")); err == nil && !stat.IsDir() {
+		return true
+	}
+	return false
+}
+
 // construct fills `parent` looking through `dir` subdir of `repoRoot`.
-func construct(repoRoot, dir string, parent *repoTree) error {
-	isRepo := false
-	localRoot := filepath.Join(repoRoot, dir)
-	if stat, err := os.Stat(filepath.Join(localRoot, ".git")); err == nil && stat.IsDir() {
-		isRepo = true
-	} else if stat, err := os.Stat(filepath.Join(localRoot, ".gitid")); err == nil && !stat.IsDir() {
-		isRepo = true
-	}
-
-	if isRepo {
-		sub := newRepoTree(localRoot)
-		parent.children[dir] = sub
-		parent = sub
-
-		repoRoot = localRoot
-		dir = ""
-	}
-
-	entries, err := ioutil.ReadDir(localRoot)
+func (parent *repoTree) fill(repoRoot, dir string) error {
+	entries, err := ioutil.ReadDir(filepath.Join(repoRoot, dir))
 	if err != nil {
 		return err
 	}
+
+	todo := map[string]*repoTree{}
 	for _, e := range entries {
 		if (e.IsDir() && e.Name() == ".git") || (!e.IsDir() && e.Name() == ".gitid") {
 			continue
 		}
+		if e.IsDir() && e.Name() == "out" && dir == "" {
+			// Ignore the build output directory.
+			continue
+		}
 
 		subName := filepath.Join(dir, e.Name())
 		if e.IsDir() {
-			construct(repoRoot, subName, parent)
+			if newRoot := filepath.Join(repoRoot, subName); isRepoDir(newRoot) {
+				ch := makeRepoTree()
+				parent.children[subName] = ch
+				todo[newRoot] = ch
+			} else {
+				parent.fill(repoRoot, subName)
+			}
 		} else {
-			parent.entries = append(parent.entries, subName)
+			parent.entries[subName] = &fileInfo{
+				isRegular: e.Mode()&os.ModeType == 0,
+				size:      e.Size(),
+				inode:     e.Sys().(*syscall.Stat_t).Ino,
+			}
 		}
 	}
+
+	errs := make(chan error, len(todo))
+	for newRoot, ch := range todo {
+		go func(r string, t *repoTree) {
+			errs <- t.fill(r, "")
+		}(newRoot, ch)
+	}
+
+	for range todo {
+		err := <-errs
+		if err != nil {
+			return err
+		}
+	}
+
 	return nil
 }
 
@@ -98,7 +153,7 @@
 		return nil
 	}
 
-	for _, e := range child.entries {
+	for e := range child.entries {
 		dest := filepath.Join(rwRoot, name, e)
 
 		if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
@@ -220,61 +275,69 @@
 	return prefix, nil
 }
 
-func getSHA1s(dir string) (map[string]string, error) {
-	attr := "user.gitsha1"
+const attrName = "user.gitsha1"
 
-	shamap := map[string]string{}
-
-	data := make([]byte, 1024)
-
-	if err := filepath.Walk(dir, func(n string, fi os.FileInfo, err error) error {
-		if n == filepath.Join(dir, "manifest.xml") {
-			return nil
-		}
-		if fi.Mode()&os.ModeType != 0 {
-			return nil
-		}
-		if filepath.Base(n) == ".gitid" {
-			return nil
-		}
-
-		sz, err := syscall.Getxattr(n, attr, data)
-		if err != nil {
-			return fmt.Errorf("Getxattr(%s, %s): %v", n, attr, err)
-		}
-		rel, err := filepath.Rel(dir, n)
-		if err != nil {
-			return err
-		}
-		shamap[rel] = string(data[:sz])
-		return nil
-	}); err != nil {
-		return nil, err
+func getSHA1(fn string) (*git.Oid, error) {
+	var data [40]byte
+	sz, err := syscall.Getxattr(fn, attrName, data[:])
+	if err != nil {
+		return nil, fmt.Errorf("Getxattr(%s, %s): %v", fn, attrName, err)
 	}
 
-	return shamap, nil
+	oid, err := git.NewOid(string(data[:sz]))
+	if err != nil {
+		return nil, err
+	}
+	return oid, nil
 }
 
 // Returns the filenames (as relative paths) in newDir that have
 // changed relative to the files in oldDir.
-func changedFiles(oldDir, newDir string) ([]string, error) {
-	// TODO(hanwen): could be parallel.
-	oldSHA1s, err := getSHA1s(oldDir)
-	if err != nil {
-		return nil, err
-	}
-	newSHA1s, err := getSHA1s(newDir)
-	if err != nil {
-		return nil, err
-	}
-
+func changedFiles(oldDir string, oldInfos map[string]*fileInfo,
+	newDir string, newInfos map[string]*fileInfo) ([]string, error) {
 	var changed []string
-	for k, v := range newSHA1s {
-		old, ok := oldSHA1s[k]
-		if !ok || old != v {
-			changed = append(changed, k)
+	for path, info := range newInfos {
+		if path == "manifest.xml" {
+			continue
+		}
+
+		if !info.isRegular {
+			// TODO(hanwen): this is incorrect. If a file
+			// changes from a blob to a symlink, we should
+			// deref the symlink and check if the blob has
+			// changed.
+			continue
+		}
+
+		old, ok := oldInfos[path]
+		if !ok {
+			changed = append(changed, path)
+			continue
+		}
+
+		if old.inode == info.inode {
+			continue
+		}
+
+		if old.size != info.size {
+			changed = append(changed, path)
+			continue
+		}
+
+		oldSHA1, err := getSHA1(filepath.Join(oldDir, path))
+		if err != nil {
+			return nil, err
+		}
+		newSHA1, err := getSHA1(filepath.Join(newDir, path))
+		if err != nil {
+			return nil, err
+		}
+
+		if bytes.Compare(oldSHA1[:], newSHA1[:]) != 0 {
+			changed = append(changed, path)
 		}
 	}
+
 	sort.Strings(changed)
 	return changed, nil
 }
@@ -284,34 +347,67 @@
 	ro = filepath.Clean(ro)
 	wsName, err := clearLinks(filepath.Dir(ro), rw)
 	if err != nil {
-		log.Fatal(err)
-	}
-
-	rwTree := newRepoTree(rw)
-	if err := construct(rw, "", rwTree); err != nil {
 		return err
 	}
+	oldRoot := filepath.Join(filepath.Dir(ro), wsName)
 
-	roTree := newRepoTree(ro)
-	if err := construct(ro, "", roTree); err != nil {
-		return err
+	// Do the file system traversals in parallel.
+	errs := make(chan error, 3)
+	var rwTree, roTree *repoTree
+	var oldInfos map[string]*fileInfo
+
+	if wsName != "" {
+		go func() {
+			t, err := newRepoTree(oldRoot)
+			oldInfos = t.allFiles()
+			errs <- err
+		}()
+	} else {
+		oldInfos = map[string]*fileInfo{}
+		errs <- nil
+	}
+
+	go func() {
+		t, err := newRepoTree(rw)
+		rwTree = t
+		errs <- err
+	}()
+	go func() {
+		t, err := newRepoTree(ro)
+		roTree = t
+		errs <- err
+	}()
+
+	for i := 0; i < cap(errs); i++ {
+		err := <-errs
+		if err != nil {
+			return err
+		}
 	}
 
 	if err := createLinks(roTree, rwTree, ro, rw); err != nil {
 		return err
 	}
 
-	// TODO(hanwen): can be done in parallel to the other processes.
-	oldRoot := filepath.Join(filepath.Dir(ro), wsName)
-	changed, err := changedFiles(oldRoot, ro)
+	changed, err := changedFiles(oldRoot, oldInfos, ro, roTree.allFiles())
 	if err != nil {
 		return fmt.Errorf("changedFiles: %v", err)
 	}
 
-	// TODO(hanwen): parallel?
-	now := time.Now()
-	for _, n := range changed {
-		if err := os.Chtimes(filepath.Join(ro, n), now, now); err != nil {
+	for i, p := range changed {
+		changed[i] = filepath.Join(ro, p)
+	}
+
+	if err := seqTouch(changed, time.Now()); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func seqTouch(fs []string, t time.Time) error {
+	for _, f := range fs {
+		if err := os.Chtimes(f, t, t); err != nil {
 			return err
 		}
 	}
diff --git a/cmd/slothfs-populate/main_test.go b/cmd/slothfs-populate/main_test.go
index 63f494b..91d88a5 100644
--- a/cmd/slothfs-populate/main_test.go
+++ b/cmd/slothfs-populate/main_test.go
@@ -61,42 +61,52 @@
 	if err != nil {
 		t.Fatal(err)
 	}
-	parent := newRepoTree(dir)
-	construct(dir, "", parent)
-
 	songT := &repoTree{
 		children: map[string]*repoTree{},
-		entries:  []string{"song.mp3"},
+		entries: map[string]*fileInfo{
+			"song.mp3": &fileInfo{isRegular: true, size: 1},
+		},
 	}
 	coreT := &repoTree{
 		children: map[string]*repoTree{"song": songT},
-		entries: []string{"subdir/core.h",
-			"top",
+		entries: map[string]*fileInfo{
+			"subdir/core.h": &fileInfo{isRegular: true, size: 1},
+			"top":           &fileInfo{isRegular: true, size: 1},
 		},
 	}
 	topT := &repoTree{
 		children: map[string]*repoTree{
 			"build/core": coreT,
 		},
-		entries: []string{
-			"build/subfile",
-			"toplevel",
+		entries: map[string]*fileInfo{
+			"build/subfile": &fileInfo{isRegular: true, size: 1},
+			"toplevel": &fileInfo{
+				isRegular: true, size: 1},
 		},
 	}
 
-	if !reflect.DeepEqual(topT, parent) {
-		t.Errorf("got %#v want %#v", parent, topT)
+	got, err := newRepoTree(dir)
+	if err != nil {
+		t.Fatalf("newRepoTree: %v", err)
 	}
 
-	all := topT.allChildren()
-	want := map[string]*repoTree{
-		"":                topT,
-		"build/core":      coreT,
-		"build/core/song": songT,
+	// Clear unpredictable data.
+	gotCh := got.allChildren()
+	for _, t := range gotCh {
+		for _, e := range t.entries {
+			e.inode = 0
+		}
 	}
 
-	if !reflect.DeepEqual(all, want) {
-		t.Errorf("got %#v want %#v", all, want)
+	wantCh := topT.allChildren()
+	for k, v := range wantCh {
+		if !reflect.DeepEqual(v, gotCh[k]) {
+			t.Fatalf("subrepo %q: got %#v want %#v", v, gotCh[k])
+		}
+	}
+
+	if !reflect.DeepEqual(got, topT) {
+		t.Errorf("got %#v want %#v", got, topT)
 	}
 }
 
@@ -164,29 +174,54 @@
 func TestChangedFiles(t *testing.T) {
 	dir, err := createFSTree([]string{
 		"r1/manifest.xml",
-		"r1/a",
-		"r1/b",
+		"r1/same",
+		"r1/checksum",
+		"r1/size",
 		"r2/manifest.xml",
-		"r2/a",
-		"r2/b",
-		"r2/c",
+		"r2/same",
+		"r2/checksum",
+		"r2/newfile",
+		"r2/size",
 	})
 	if err != nil {
 		t.Fatalf("createFSTree: %v", err)
 	}
 
-	ck2 := "3f75526aa8f01eea5d76cee10722195dc73676df"
-	for _, changed := range []string{"r2/b", "r2/manifest.xml"} {
-		if err := syscall.Setxattr(filepath.Join(dir, changed), attr, []byte(ck2), 0); err != nil {
-			t.Fatalf("Setxattr: %v", err)
-		}
+	if err := os.Symlink("same", filepath.Join(dir, "r2/symlink")); err != nil {
+		t.Fatalf("symlink: %v", err)
 	}
 
-	got, err := changedFiles(filepath.Join(dir, "r1"), filepath.Join(dir, "r2"))
+	// same size, different checksum.
+	ck2 := "3f75526aa8f01eea5d76cee10722195dc73676df"
+	if err := syscall.Setxattr(filepath.Join(dir, "r2/checksum"), attr, []byte(ck2), 0); err != nil {
+		t.Fatalf("Setxattr: %v", err)
+	}
+
+	// different size.
+	if err := ioutil.WriteFile(filepath.Join(dir, "r2/size"), []byte("changed"), 0644); err != nil {
+		t.Fatalf("WriteFile(%s/r2/size): %v", dir, err)
+	}
+
+	// Manifest should be ignored.
+	if err := ioutil.WriteFile(filepath.Join(dir, "r2/manifest.xml"), []byte("changed"), 0644); err != nil {
+		t.Fatalf("WriteFile(%s/r2/manifest): %v", dir, err)
+	}
+
+	r2tree, err := newRepoTree(filepath.Join(dir, "r2"))
+	if err != nil {
+		t.Fatalf("newRepoTree: %v", err)
+	}
+	r1tree, err := newRepoTree(filepath.Join(dir, "r1"))
+	if err != nil {
+		t.Fatalf("newRepoTree: %v", err)
+	}
+
+	oldRoot := filepath.Join(dir, "r1")
+	got, err := changedFiles(oldRoot, r1tree.allFiles(), filepath.Join(dir, "r2"), r2tree.allFiles())
 	if err != nil {
 		t.Fatalf("changedFiles: %v", err)
 	}
-	if want := []string{"b", "c"}; !reflect.DeepEqual(want, got) {
+	if want := []string{"checksum", "newfile", "size"}; !reflect.DeepEqual(want, got) {
 		t.Errorf("got %v, want %v", got, want)
 	}
 }