project: implement stateless sync pruning logic Implement in-situ shallow re-fetching and garbage collection logic. Enables repositories with sync-strategy="stateless" to reclaim disk space by running reflog expire and git gc --prune=now if the working tree is clean and has no local commits. Bug: 498730431 Change-Id: I940bdc9b74da29d3f7b13566667dcddea769ebd3 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/568463 Reviewed-by: Mike Frysinger <vapier@google.com> Tested-by: Gavin Mak <gavinmak@google.com> Commit-Queue: Gavin Mak <gavinmak@google.com>
diff --git a/project.py b/project.py index 1766c9a..35c0fe1 100644 --- a/project.py +++ b/project.py
@@ -629,6 +629,7 @@ self.linkfiles = {} self.annotations = [] self.dest_branch = dest_branch + self.stateless_prune_needed = False # This will be filled in if a project is later identified to be the # project containing repo hooks. @@ -758,6 +759,18 @@ return True return False + def HasStash(self) -> bool: + """Returns True if there is a stash in the repository.""" + p = GitCommand( + self, + ["rev-parse", "--verify", "refs/stash"], + bare=True, + capture_stdout=True, + capture_stderr=True, + log_as_error=False, + ) + return p.Wait() == 0 + _userident_name = None _userident_email = None @@ -1241,6 +1254,67 @@ logger.error("error: Cannot extract archive %s: %s", tarpath, e) return False + def _ShouldStatelessPrune( + self, use_superproject: Optional[bool] = None + ) -> bool: + """Determines if a stateless prune should be performed. + + Stateless pruning reclaims space by running a reflog expiration and + garbage collection instead of an incremental fetch. It is only performed + if the repository is clean and has no local-only state. + """ + if not self.Exists: + return False + + if self._CheckForImmutableRevision(use_superproject=use_superproject): + return False + + # Query the target hash from remote to see if we are up-to-date. + target_hash = None + if IsId(self.revisionExpr): + target_hash = self.revisionExpr + else: + output = self._LsRemote(self.upstream or self.revisionExpr) + if output: + target_hash = output.splitlines()[0].split()[0] + + if not target_hash: + return False + + try: + local_head = self.bare_git.rev_parse("HEAD") + except GitError: + local_head = None + + if target_hash == local_head: + return False + + # Skip if sharing objects with other projects. + shares_objdir = self.UseAlternates or self.use_git_worktrees + if not shares_objdir: + for p in self.manifest.GetProjectsWithName(self.name): + if p != self and p.objdir == self.objdir: + shares_objdir = True + break + + if shares_objdir: + return False + + # Skip if HEAD contains any unpushed local commits. + try: + local_commits = self.bare_git.rev_list( + "--count", "HEAD", "--not", "--remotes", "--tags" + ) + if int(local_commits[0]) > 0: + return False + except (GitError, IndexError, ValueError): + return False + + if self.IsDirty(consider_untracked=True) or self.HasStash(): + return False + + return True + def Sync_NetworkHalf( self, quiet=False, @@ -1318,6 +1392,11 @@ clone_bundle = True clone_filter = None + if self.sync_strategy == "stateless" and self._ShouldStatelessPrune( + use_superproject + ): + self.stateless_prune_needed = True + if is_new is None: is_new = not self.Exists if is_new: @@ -1602,6 +1681,23 @@ def _dosubmodules(): self._SyncSubmodules(quiet=True) + def _doprune() -> None: + """Expire reflogs and run prune-now GC for stateless sync.""" + GitCommand( + self, + ["reflog", "expire", "--expire=all", "--all"], + bare=True, + ).Wait() + p = GitCommand( + self, + ["gc", "--prune=now"], + bare=True, + capture_stdout=True, + capture_stderr=True, + ) + if p.Wait() != 0: + logger.warning("warn: %s: stateless gc failed", self.name) + head = self.work_git.GetHead() if head.startswith(R_HEADS): branch = head[len(R_HEADS) :] @@ -1647,6 +1743,8 @@ fail(e) return self._CopyAndLinkFiles() + if self.stateless_prune_needed: + syncbuf.later2(self, _doprune, not verbose) return if head == revid: @@ -1793,6 +1891,9 @@ if submodules: syncbuf.later1(self, _dosubmodules, not verbose) + if self.stateless_prune_needed: + syncbuf.later2(self, _doprune, not verbose) + def AddCopyFile(self, src, dest, topdir): """Mark |src| for copying to |dest| (relative to |topdir|).
diff --git a/tests/test_project.py b/tests/test_project.py index 501707e..a2d90d8 100644 --- a/tests/test_project.py +++ b/tests/test_project.py
@@ -21,6 +21,7 @@ import tempfile from typing import Optional import unittest +from unittest import mock import utils_for_test @@ -565,3 +566,120 @@ fakeproj.config.SetString("manifest.platform", "auto") self.assertEqual(fakeproj.manifest_platform, "auto") + + +class StatelessSyncTests(unittest.TestCase): + """Tests for stateless sync strategy.""" + + def _get_project(self, tempdir): + manifest = mock.MagicMock() + manifest.manifestProject.depth = None + manifest.manifestProject.dissociate = False + manifest.manifestProject.clone_filter = None + manifest.is_multimanifest = False + manifest.manifestProject.config.GetBoolean.return_value = False + + remote = mock.MagicMock() + remote.name = "origin" + remote.url = "http://" + + proj = project.Project( + manifest=manifest, + name="test-project", + remote=remote, + gitdir=os.path.join(tempdir, ".git"), + objdir=os.path.join(tempdir, ".git"), + worktree=tempdir, + relpath="test-project", + revisionExpr="1234abcd", + revisionId=None, + sync_strategy="stateless", + ) + proj._CheckForImmutableRevision = mock.MagicMock(return_value=False) + proj._LsRemote = mock.MagicMock( + return_value="1234abcd\trefs/heads/main\n" + ) + proj.bare_git = mock.MagicMock() + proj.bare_git.rev_parse.return_value = "5678abcd" + proj.bare_git.rev_list.return_value = ["0"] + proj.IsDirty = mock.MagicMock(return_value=False) + proj.GetBranches = mock.MagicMock(return_value=[]) + proj.DeleteWorktree = mock.MagicMock() + proj._InitGitDir = mock.MagicMock() + proj._RemoteFetch = mock.MagicMock(return_value=True) + proj._InitRemote = mock.MagicMock() + proj._InitMRef = mock.MagicMock() + return proj + + def test_sync_network_half_stateless_prune_needed(self): + """Test stateless sync queues prune when needed.""" + with utils_for_test.TempGitTree() as tempdir: + proj = self._get_project(tempdir) + res = proj.Sync_NetworkHalf() + + self.assertTrue(res.success) + proj.DeleteWorktree.assert_not_called() + self.assertTrue(proj.stateless_prune_needed) + proj._RemoteFetch.assert_called_once() + + def test_sync_local_half_stateless_prune(self): + """Test stateless GC pruning is queued in Sync_LocalHalf.""" + with utils_for_test.TempGitTree() as tempdir: + proj = self._get_project(tempdir) + proj.stateless_prune_needed = True + + proj._Checkout = mock.MagicMock() + proj._InitWorkTree = mock.MagicMock() + proj.IsRebaseInProgress = mock.MagicMock(return_value=False) + proj.IsCherryPickInProgress = mock.MagicMock(return_value=False) + proj.bare_ref = mock.MagicMock() + proj.bare_ref.all = {} + proj.GetRevisionId = mock.MagicMock(return_value="1234abcd") + proj._CopyAndLinkFiles = mock.MagicMock() + + proj.work_git = mock.MagicMock() + proj.work_git.GetHead.return_value = "5678abcd" + + syncbuf = project.SyncBuffer(proj.config) + + with mock.patch("project.GitCommand") as mock_git_cmd: + mock_cmd_instance = mock.MagicMock() + mock_cmd_instance.Wait.return_value = 0 + mock_git_cmd.return_value = mock_cmd_instance + + proj.Sync_LocalHalf(syncbuf) + syncbuf.Finish() + + self.assertEqual(mock_git_cmd.call_count, 2) + mock_git_cmd.assert_any_call( + proj, ["reflog", "expire", "--expire=all", "--all"], bare=True + ) + mock_git_cmd.assert_any_call( + proj, + ["gc", "--prune=now"], + bare=True, + capture_stdout=True, + capture_stderr=True, + ) + + def test_sync_network_half_stateless_skips_if_stash(self): + """Test stateless sync skips if stash exists.""" + with utils_for_test.TempGitTree() as tempdir: + proj = self._get_project(tempdir) + proj.HasStash = mock.MagicMock(return_value=True) + + res = proj.Sync_NetworkHalf() + + self.assertTrue(res.success) + self.assertFalse(getattr(proj, "stateless_prune_needed", False)) + + def test_sync_network_half_stateless_skips_if_local_commits(self): + """Test stateless sync skips if there are local-only commits.""" + with utils_for_test.TempGitTree() as tempdir: + proj = self._get_project(tempdir) + proj.bare_git.rev_list.return_value = ["1"] + + res = proj.Sync_NetworkHalf() + + self.assertTrue(res.success) + self.assertFalse(getattr(proj, "stateless_prune_needed", False))