project: migrate worktree .git/ dirs to symlinks

Historically we created a .git/ subdir in each source checkout and
symlinked individual files to the .repo/projects/ paths.  This layer
of indirection isn't actually needed: the .repo/projects/ paths are
guaranteed to only ever have a 1-to-1 mapping with the actual git
checkout.  So we don't need to worry about having files in .git/ be
isolated.

To that end, change how we manage the actual project checkouts from
a dir full of symlinks (and a few files) to a symlink to the internal
.repo/projects/ dir.  This makes the code simpler & faster.

The directory structure we have today is:
.repo/
  project-objects/chromiumos/third_party/kernel.git/
    <paths omitted as not relevant to this change>
  projects/src/third_party/kernel/
    v3.8.git/
      config
      description   -> …/project-objects/…/config
      FETCH_HEAD
      HEAD
      hooks/        -> …/project-objects/…/hooks/
      info/         -> …/project-objects/…/info/
      logs/
      objects/      -> …/project-objects/…/objects/
      packed-refs
      refs/
      rr-cache/     -> …/project-objects/…/rr-cache/
src/third_party/kernel/
  v3.8/
    .git/
      config        -> …/projects/…/v3.8.git/config
      description   -> …/project-objects/…/v3.8.git/description
      HEAD
      hooks/        -> …/project-objects/…/v3.8.git/hooks/
      index
      info/         -> …/project-objects/…/v3.8.git/info/
      logs/         -> …/projects/…/v3.8.git/logs/
      objects/      -> …/project-objects/…/v3.8.git/objects/
      packed-refs   -> …/projects/…/v3.8.git/packed-refs
      refs/         -> …/projects/…/v3.8.git/refs/
      rr-cache/     -> …/project-objects/…/v3.8.git/rr-cache/

The directory structure we have after this commit:
.repo/
  <nothing changes>
src/third_party/kernel/
  v3.8/
    .git            -> …/projects/…/v3.8.git

Bug: https://crbug.com/gerrit/15273
Change-Id: I9dd8def23fbfb2f4cb209a93f8b1b2b24002a444
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/323695
Reviewed-by: Mike Nichols <mikenichols@google.com>
Reviewed-by: Xin Li <delphij@google.com>
Tested-by: Mike Frysinger <vapier@google.com>
diff --git a/project.py b/project.py
index 4b85bb9..e0d645b 100644
--- a/project.py
+++ b/project.py
@@ -2781,50 +2781,95 @@
     self._InitMRef()
 
   def _InitWorkTree(self, force_sync=False, submodules=False):
-    realdotgit = os.path.join(self.worktree, '.git')
-    tmpdotgit = realdotgit + '.tmp'
-    init_dotgit = not os.path.exists(realdotgit)
-    if init_dotgit:
-      if self.use_git_worktrees:
+    """Setup the worktree .git path.
+
+    This is the user-visible path like src/foo/.git/.
+
+    With non-git-worktrees, this will be a symlink to the .repo/projects/ path.
+    With git-worktrees, this will be a .git file using "gitdir: ..." syntax.
+
+    Older checkouts had .git/ directories.  If we see that, migrate it.
+
+    This also handles changes in the manifest.  Maybe this project was backed
+    by "foo/bar" on the server, but now it's "new/foo/bar".  We have to update
+    the path we point to under .repo/projects/ to match.
+    """
+    dotgit = os.path.join(self.worktree, '.git')
+
+    # If using an old layout style (a directory), migrate it.
+    if not platform_utils.islink(dotgit) and platform_utils.isdir(dotgit):
+      self._MigrateOldWorkTreeGitDir(dotgit)
+
+    init_dotgit = not os.path.exists(dotgit)
+    if self.use_git_worktrees:
+      if init_dotgit:
         self._InitGitWorktree()
         self._CopyAndLinkFiles()
-        return
-
-      dotgit = tmpdotgit
-      platform_utils.rmtree(tmpdotgit, ignore_errors=True)
-      os.makedirs(tmpdotgit)
-      self._ReferenceGitDir(self.gitdir, tmpdotgit, share_refs=True,
-                            copy_all=False)
     else:
-      dotgit = realdotgit
+      if not init_dotgit:
+        # See if the project has changed.
+        if platform_utils.realpath(self.gitdir) != platform_utils.realpath(dotgit):
+          platform_utils.remove(dotgit)
 
-    try:
-      self._CheckDirReference(self.gitdir, dotgit, share_refs=True)
-    except GitError as e:
-      if force_sync and not init_dotgit:
-        try:
-          platform_utils.rmtree(dotgit)
-          return self._InitWorkTree(force_sync=False, submodules=submodules)
-        except Exception:
-          raise e
-      raise e
+      if init_dotgit or not os.path.exists(dotgit):
+        os.makedirs(self.worktree, exist_ok=True)
+        platform_utils.symlink(os.path.relpath(self.gitdir, self.worktree), dotgit)
 
-    if init_dotgit:
-      _lwrite(os.path.join(tmpdotgit, HEAD), '%s\n' % self.GetRevisionId())
+      if init_dotgit:
+        _lwrite(os.path.join(dotgit, HEAD), '%s\n' % self.GetRevisionId())
 
-      # Now that the .git dir is fully set up, move it to its final home.
-      platform_utils.rename(tmpdotgit, realdotgit)
+        # Finish checking out the worktree.
+        cmd = ['read-tree', '--reset', '-u', '-v', HEAD]
+        if GitCommand(self, cmd).Wait() != 0:
+          raise GitError('Cannot initialize work tree for ' + self.name)
 
-      # Finish checking out the worktree.
-      cmd = ['read-tree', '--reset', '-u']
-      cmd.append('-v')
-      cmd.append(HEAD)
-      if GitCommand(self, cmd).Wait() != 0:
-        raise GitError('Cannot initialize work tree for ' + self.name)
+        if submodules:
+          self._SyncSubmodules(quiet=True)
+        self._CopyAndLinkFiles()
 
-      if submodules:
-        self._SyncSubmodules(quiet=True)
-      self._CopyAndLinkFiles()
+  @classmethod
+  def _MigrateOldWorkTreeGitDir(cls, dotgit):
+    """Migrate the old worktree .git/ dir style to a symlink.
+
+    This logic specifically only uses state from |dotgit| to figure out where to
+    move content and not |self|.  This way if the backing project also changed
+    places, we only do the .git/ dir to .git symlink migration here.  The path
+    updates will happen independently.
+    """
+    # Figure out where in .repo/projects/ it's pointing to.
+    if not os.path.islink(os.path.join(dotgit, 'refs')):
+      raise GitError(f'{dotgit}: unsupported checkout state')
+    gitdir = os.path.dirname(os.path.realpath(os.path.join(dotgit, 'refs')))
+
+    # Remove known symlink paths that exist in .repo/projects/.
+    KNOWN_LINKS = {
+        'config', 'description', 'hooks', 'info', 'logs', 'objects',
+        'packed-refs', 'refs', 'rr-cache', 'shallow', 'svn',
+    }
+    # Paths that we know will be in both, but are safe to clobber in .repo/projects/.
+    SAFE_TO_CLOBBER = {
+        'COMMIT_EDITMSG', 'FETCH_HEAD', 'HEAD', 'index', 'ORIG_HEAD',
+    }
+
+    # Now walk the paths and sync the .git/ to .repo/projects/.
+    for name in platform_utils.listdir(dotgit):
+      dotgit_path = os.path.join(dotgit, name)
+      if name in KNOWN_LINKS:
+        if platform_utils.islink(dotgit_path):
+          platform_utils.remove(dotgit_path)
+        else:
+          raise GitError(f'{dotgit_path}: should be a symlink')
+      else:
+        gitdir_path = os.path.join(gitdir, name)
+        if name in SAFE_TO_CLOBBER or not os.path.exists(gitdir_path):
+          platform_utils.remove(gitdir_path, missing_ok=True)
+          platform_utils.rename(dotgit_path, gitdir_path)
+        else:
+          raise GitError(f'{dotgit_path}: unknown file; please file a bug')
+
+    # Now that the dir should be empty, clear it out, and symlink it over.
+    platform_utils.rmdir(dotgit)
+    platform_utils.symlink(os.path.relpath(gitdir, os.path.dirname(dotgit)), dotgit)
 
   def _get_symlink_error_message(self):
     if platform_utils.isWindows():
diff --git a/tests/test_project.py b/tests/test_project.py
index 9b2cc4e..d578fe8 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -16,6 +16,7 @@
 
 import contextlib
 import os
+from pathlib import Path
 import shutil
 import subprocess
 import tempfile
@@ -335,3 +336,52 @@
     platform_utils.symlink(self.tempdir, dest)
     lf._Link()
     self.assertEqual(os.path.join('git-project', 'foo.txt'), os.readlink(dest))
+
+
+class MigrateWorkTreeTests(unittest.TestCase):
+  """Check _MigrateOldWorkTreeGitDir handling."""
+
+  _SYMLINKS = {
+      'config', 'description', 'hooks', 'info', 'logs', 'objects',
+      'packed-refs', 'refs', 'rr-cache', 'shallow', 'svn',
+  }
+  _FILES = {
+      'COMMIT_EDITMSG', 'FETCH_HEAD', 'HEAD', 'index', 'ORIG_HEAD',
+  }
+
+  @classmethod
+  @contextlib.contextmanager
+  def _simple_layout(cls):
+    """Create a simple repo client checkout to test against."""
+    with tempfile.TemporaryDirectory() as tempdir:
+      tempdir = Path(tempdir)
+
+      gitdir = tempdir / '.repo/projects/src/test.git'
+      gitdir.mkdir(parents=True)
+      cmd = ['git', 'init', '--bare', str(gitdir)]
+      subprocess.check_call(cmd)
+
+      dotgit = tempdir / 'src/test/.git'
+      dotgit.mkdir(parents=True)
+      for name in cls._SYMLINKS:
+        (dotgit / name).symlink_to(f'../../../.repo/projects/src/test.git/{name}')
+      for name in cls._FILES:
+        (dotgit / name).write_text(name)
+
+      subprocess.run(['tree', '-a', str(dotgit)])
+      yield tempdir
+
+  def test_standard(self):
+    """Migrate a standard checkout that we expect."""
+    with self._simple_layout() as tempdir:
+      dotgit = tempdir / 'src/test/.git'
+      project.Project._MigrateOldWorkTreeGitDir(str(dotgit))
+
+      # Make sure the dir was transformed into a symlink.
+      self.assertTrue(dotgit.is_symlink())
+      self.assertEqual(str(dotgit.readlink()), '../../.repo/projects/src/test.git')
+
+      # Make sure files were moved over.
+      gitdir = tempdir / '.repo/projects/src/test.git'
+      for name in self._FILES:
+        self.assertEqual(name, (gitdir / name).read_text())