sync: save any cruft after calling git gc.

This is ENABLED BY DEFAULT due to data corruption potential.  To disable
it, set REPO_BACKUP_OBJECTS=0 in the environment.

While the workspace will grow over time, this provides a recovery path
for an issue where objects are erroneously deleted from the workspace,
resulting in lost work.  Once the root cause is determined, we will be
able to stop saving backups again.

Backups are kept in .git/objects/.repo/pack.bak

Bug: https://crbug.com/gerrit/16247
Change-Id: Ib8b5c9b4bf0dfa9e29606e0f5c881d65996b2a40
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/345114
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
Tested-by: LaMont Jones <lamontjones@google.com>
Reviewed-by: Xin Li <delphij@google.com>
diff --git a/subcmds/sync.py b/subcmds/sync.py
index de4ac3a..caa5588 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -21,6 +21,7 @@
 import netrc
 from optparse import SUPPRESS_HELP
 import os
+import shutil
 import socket
 import sys
 import tempfile
@@ -58,11 +59,16 @@
 import platform_utils
 from project import SyncBuffer
 from progress import Progress
+from repo_trace import IsTrace, Trace
 import ssh
 from wrapper import Wrapper
 from manifest_xml import GitcManifest
 
 _ONE_DAY_S = 24 * 60 * 60
+# Env var to implicitly turn off object backups.
+REPO_BACKUP_OBJECTS = 'REPO_BACKUP_OBJECTS'
+
+_BACKUP_OBJECTS = os.environ.get(REPO_BACKUP_OBJECTS) != '0'
 
 
 class Sync(Command, MirrorSafeCommand):
@@ -694,13 +700,46 @@
 
     jobs = opt.jobs
 
+    def _backup_cruft(bare_git):
+      # Find any cruft packs in the current gitdir, and save them.
+      # b/221065125 (repo sync complains that objects are missing).  This does
+      # not prevent that state, but makes it so that the missing objects are
+      # available.
+      if not _BACKUP_OBJECTS:
+        return
+      saved = []
+      objdir = bare_git.GetDotgitPath('objects')
+      pack_dir = os.path.join(objdir, 'pack')
+      bak_dir = os.path.join(objdir, '.repo','pack.bak')
+      files = set(platform_utils.listdir(pack_dir))
+      to_backup = []
+      for f in files:
+        base, ext = os.path.splitext(f)
+        if base + ".mtimes" in files:
+          to_backup.append(f)
+      if to_backup and not platform_utils.isdir(bak_dir):
+        os.makedirs(bak_dir)
+      for fname in to_backup:
+        bak_fname = os.path.join(bak_dir, fname)
+        if not os.path.exists(bak_fname):
+          saved.append(fname)
+          # Use a tmp file so that we are sure of a complete copy.
+          shutil.copy(os.path.join(pack_dir, fname), bak_fname + '.tmp')
+          shutil.move(bak_fname + '.tmp', bak_fname)
+      if saved and IsTrace():
+        Trace('%s saved %s', bare_git._project.name, ' '.join(saved))
+
+    gc_args = ('--auto', '--cruft')
+    pack_refs_args = ()
     if jobs < 2:
       for (run_gc, bare_git) in tidy_dirs.values():
         pm.update(msg=bare_git._project.name)
+
         if run_gc:
-          bare_git.gc('--auto')
+          bare_git.gc(*gc_args)
         else:
-          bare_git.pack_refs()
+          bare_git.pack_refs(*pack_refs_args)
+        _backup_cruft(bare_git)
       pm.end()
       return
 
@@ -715,15 +754,16 @@
       try:
         try:
           if run_gc:
-            bare_git.gc('--auto', config=config)
+            bare_git.gc(*gc_args, config=config)
           else:
-            bare_git.pack_refs(config=config)
+            bare_git.pack_refs(*pack_refs_args, config=config)
         except GitError:
           err_event.set()
         except Exception:
           err_event.set()
           raise
       finally:
+        _backup_cruft(bare_git)
         pm.finish(bare_git._project.name)
         sem.release()