project: implement stateless sync pruning logic

Implement in-situ shallow re-fetching and garbage collection logic.
Enables repositories with sync-strategy="stateless" to reclaim disk
space by running reflog expire and git gc --prune=now if the working
tree is clean and has no local commits.

Bug: 498730431
Change-Id: I940bdc9b74da29d3f7b13566667dcddea769ebd3
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/568463
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: Gavin Mak <gavinmak@google.com>
Commit-Queue: Gavin Mak <gavinmak@google.com>
diff --git a/project.py b/project.py
index 1766c9a..35c0fe1 100644
--- a/project.py
+++ b/project.py
@@ -629,6 +629,7 @@
         self.linkfiles = {}
         self.annotations = []
         self.dest_branch = dest_branch
+        self.stateless_prune_needed = False
 
         # This will be filled in if a project is later identified to be the
         # project containing repo hooks.
@@ -758,6 +759,18 @@
             return True
         return False
 
+    def HasStash(self) -> bool:
+        """Returns True if there is a stash in the repository."""
+        p = GitCommand(
+            self,
+            ["rev-parse", "--verify", "refs/stash"],
+            bare=True,
+            capture_stdout=True,
+            capture_stderr=True,
+            log_as_error=False,
+        )
+        return p.Wait() == 0
+
     _userident_name = None
     _userident_email = None
 
@@ -1241,6 +1254,67 @@
             logger.error("error: Cannot extract archive %s: %s", tarpath, e)
         return False
 
+    def _ShouldStatelessPrune(
+        self, use_superproject: Optional[bool] = None
+    ) -> bool:
+        """Determines if a stateless prune should be performed.
+
+        Stateless pruning reclaims space by running a reflog expiration and
+        garbage collection instead of an incremental fetch. It is only performed
+        if the repository is clean and has no local-only state.
+        """
+        if not self.Exists:
+            return False
+
+        if self._CheckForImmutableRevision(use_superproject=use_superproject):
+            return False
+
+        # Query the target hash from remote to see if we are up-to-date.
+        target_hash = None
+        if IsId(self.revisionExpr):
+            target_hash = self.revisionExpr
+        else:
+            output = self._LsRemote(self.upstream or self.revisionExpr)
+            if output:
+                target_hash = output.splitlines()[0].split()[0]
+
+        if not target_hash:
+            return False
+
+        try:
+            local_head = self.bare_git.rev_parse("HEAD")
+        except GitError:
+            local_head = None
+
+        if target_hash == local_head:
+            return False
+
+        # Skip if sharing objects with other projects.
+        shares_objdir = self.UseAlternates or self.use_git_worktrees
+        if not shares_objdir:
+            for p in self.manifest.GetProjectsWithName(self.name):
+                if p != self and p.objdir == self.objdir:
+                    shares_objdir = True
+                    break
+
+        if shares_objdir:
+            return False
+
+        # Skip if HEAD contains any unpushed local commits.
+        try:
+            local_commits = self.bare_git.rev_list(
+                "--count", "HEAD", "--not", "--remotes", "--tags"
+            )
+            if int(local_commits[0]) > 0:
+                return False
+        except (GitError, IndexError, ValueError):
+            return False
+
+        if self.IsDirty(consider_untracked=True) or self.HasStash():
+            return False
+
+        return True
+
     def Sync_NetworkHalf(
         self,
         quiet=False,
@@ -1318,6 +1392,11 @@
             clone_bundle = True
             clone_filter = None
 
+        if self.sync_strategy == "stateless" and self._ShouldStatelessPrune(
+            use_superproject
+        ):
+            self.stateless_prune_needed = True
+
         if is_new is None:
             is_new = not self.Exists
         if is_new:
@@ -1602,6 +1681,23 @@
         def _dosubmodules():
             self._SyncSubmodules(quiet=True)
 
+        def _doprune() -> None:
+            """Expire reflogs and run prune-now GC for stateless sync."""
+            GitCommand(
+                self,
+                ["reflog", "expire", "--expire=all", "--all"],
+                bare=True,
+            ).Wait()
+            p = GitCommand(
+                self,
+                ["gc", "--prune=now"],
+                bare=True,
+                capture_stdout=True,
+                capture_stderr=True,
+            )
+            if p.Wait() != 0:
+                logger.warning("warn: %s: stateless gc failed", self.name)
+
         head = self.work_git.GetHead()
         if head.startswith(R_HEADS):
             branch = head[len(R_HEADS) :]
@@ -1647,6 +1743,8 @@
                 fail(e)
                 return
             self._CopyAndLinkFiles()
+            if self.stateless_prune_needed:
+                syncbuf.later2(self, _doprune, not verbose)
             return
 
         if head == revid:
@@ -1793,6 +1891,9 @@
             if submodules:
                 syncbuf.later1(self, _dosubmodules, not verbose)
 
+        if self.stateless_prune_needed:
+            syncbuf.later2(self, _doprune, not verbose)
+
     def AddCopyFile(self, src, dest, topdir):
         """Mark |src| for copying to |dest| (relative to |topdir|).
 
diff --git a/tests/test_project.py b/tests/test_project.py
index 501707e..a2d90d8 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -21,6 +21,7 @@
 import tempfile
 from typing import Optional
 import unittest
+from unittest import mock
 
 import utils_for_test
 
@@ -565,3 +566,120 @@
 
             fakeproj.config.SetString("manifest.platform", "auto")
             self.assertEqual(fakeproj.manifest_platform, "auto")
+
+
+class StatelessSyncTests(unittest.TestCase):
+    """Tests for stateless sync strategy."""
+
+    def _get_project(self, tempdir):
+        manifest = mock.MagicMock()
+        manifest.manifestProject.depth = None
+        manifest.manifestProject.dissociate = False
+        manifest.manifestProject.clone_filter = None
+        manifest.is_multimanifest = False
+        manifest.manifestProject.config.GetBoolean.return_value = False
+
+        remote = mock.MagicMock()
+        remote.name = "origin"
+        remote.url = "http://"
+
+        proj = project.Project(
+            manifest=manifest,
+            name="test-project",
+            remote=remote,
+            gitdir=os.path.join(tempdir, ".git"),
+            objdir=os.path.join(tempdir, ".git"),
+            worktree=tempdir,
+            relpath="test-project",
+            revisionExpr="1234abcd",
+            revisionId=None,
+            sync_strategy="stateless",
+        )
+        proj._CheckForImmutableRevision = mock.MagicMock(return_value=False)
+        proj._LsRemote = mock.MagicMock(
+            return_value="1234abcd\trefs/heads/main\n"
+        )
+        proj.bare_git = mock.MagicMock()
+        proj.bare_git.rev_parse.return_value = "5678abcd"
+        proj.bare_git.rev_list.return_value = ["0"]
+        proj.IsDirty = mock.MagicMock(return_value=False)
+        proj.GetBranches = mock.MagicMock(return_value=[])
+        proj.DeleteWorktree = mock.MagicMock()
+        proj._InitGitDir = mock.MagicMock()
+        proj._RemoteFetch = mock.MagicMock(return_value=True)
+        proj._InitRemote = mock.MagicMock()
+        proj._InitMRef = mock.MagicMock()
+        return proj
+
+    def test_sync_network_half_stateless_prune_needed(self):
+        """Test stateless sync queues prune when needed."""
+        with utils_for_test.TempGitTree() as tempdir:
+            proj = self._get_project(tempdir)
+            res = proj.Sync_NetworkHalf()
+
+            self.assertTrue(res.success)
+            proj.DeleteWorktree.assert_not_called()
+            self.assertTrue(proj.stateless_prune_needed)
+            proj._RemoteFetch.assert_called_once()
+
+    def test_sync_local_half_stateless_prune(self):
+        """Test stateless GC pruning is queued in Sync_LocalHalf."""
+        with utils_for_test.TempGitTree() as tempdir:
+            proj = self._get_project(tempdir)
+            proj.stateless_prune_needed = True
+
+            proj._Checkout = mock.MagicMock()
+            proj._InitWorkTree = mock.MagicMock()
+            proj.IsRebaseInProgress = mock.MagicMock(return_value=False)
+            proj.IsCherryPickInProgress = mock.MagicMock(return_value=False)
+            proj.bare_ref = mock.MagicMock()
+            proj.bare_ref.all = {}
+            proj.GetRevisionId = mock.MagicMock(return_value="1234abcd")
+            proj._CopyAndLinkFiles = mock.MagicMock()
+
+            proj.work_git = mock.MagicMock()
+            proj.work_git.GetHead.return_value = "5678abcd"
+
+            syncbuf = project.SyncBuffer(proj.config)
+
+            with mock.patch("project.GitCommand") as mock_git_cmd:
+                mock_cmd_instance = mock.MagicMock()
+                mock_cmd_instance.Wait.return_value = 0
+                mock_git_cmd.return_value = mock_cmd_instance
+
+                proj.Sync_LocalHalf(syncbuf)
+                syncbuf.Finish()
+
+            self.assertEqual(mock_git_cmd.call_count, 2)
+            mock_git_cmd.assert_any_call(
+                proj, ["reflog", "expire", "--expire=all", "--all"], bare=True
+            )
+            mock_git_cmd.assert_any_call(
+                proj,
+                ["gc", "--prune=now"],
+                bare=True,
+                capture_stdout=True,
+                capture_stderr=True,
+            )
+
+    def test_sync_network_half_stateless_skips_if_stash(self):
+        """Test stateless sync skips if stash exists."""
+        with utils_for_test.TempGitTree() as tempdir:
+            proj = self._get_project(tempdir)
+            proj.HasStash = mock.MagicMock(return_value=True)
+
+            res = proj.Sync_NetworkHalf()
+
+            self.assertTrue(res.success)
+            self.assertFalse(getattr(proj, "stateless_prune_needed", False))
+
+    def test_sync_network_half_stateless_skips_if_local_commits(self):
+        """Test stateless sync skips if there are local-only commits."""
+        with utils_for_test.TempGitTree() as tempdir:
+            proj = self._get_project(tempdir)
+            proj.bare_git.rev_list.return_value = ["1"]
+
+            res = proj.Sync_NetworkHalf()
+
+            self.assertTrue(res.success)
+            self.assertFalse(getattr(proj, "stateless_prune_needed", False))