sync: rework --jobs to provide better defaults

For --jobs-network, the logic is now:
* If the user specifies --jobs-network, use that.
* Else, if the user specifies --jobs, use that.
* Else, if the manifest specifies sync-j, use that.
* Else, default to 1.
Then we limit the jobs count based on the softlimit RLIMIT_NOFILE.

For --jobs-checkout, the logic is now:
* If the user specifies --jobs-checkout, use that.
* Else, if the user specifies --jobs, use that.
* Else, if the manifest specifies sync-j, use that.
* Else, default to DEFAULT_LOCAL_JOBS which is based on user's ncpus.
Then we limit the jobs count based on the softlimit RLIMIT_NOFILE.

For garbage collecting, the logic is now:
* If the user specifies --jobs, use that.
* Else, if the manifest specifies sync-j, use that.
* Else, default to the user's ncpus.
Then we limit the jobs count based on the softlimit RLIMIT_NOFILE.

Having to factor in the manifest settings makes this more complicated
which is why we delay processing of defaults until after we've synced
the manifest projects.

Bug: http://b/239712300
Change-Id: Id27cda63c76c156f1d63f6a20cb2c4ceeb3d547c
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/341394
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: LaMont Jones <lamontjones@google.com>
diff --git a/manifest_xml.py b/manifest_xml.py
index 12614c6..ee513a7 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -123,7 +123,7 @@
   destBranchExpr = None
   upstreamExpr = None
   remote = None
-  sync_j = 1
+  sync_j = None
   sync_c = False
   sync_s = False
   sync_tags = True
@@ -548,7 +548,7 @@
     if d.upstreamExpr:
       have_default = True
       e.setAttribute('upstream', d.upstreamExpr)
-    if d.sync_j > 1:
+    if d.sync_j is not None:
       have_default = True
       e.setAttribute('sync-j', '%d' % d.sync_j)
     if d.sync_c:
@@ -1462,8 +1462,8 @@
     d.destBranchExpr = node.getAttribute('dest-branch') or None
     d.upstreamExpr = node.getAttribute('upstream') or None
 
-    d.sync_j = XmlInt(node, 'sync-j', 1)
-    if d.sync_j <= 0:
+    d.sync_j = XmlInt(node, 'sync-j', None)
+    if d.sync_j is not None and d.sync_j <= 0:
       raise ManifestParseError('%s: sync-j must be greater than 0, not "%s"' %
                                (self.manifestFile, d.sync_j))
 
diff --git a/subcmds/sync.py b/subcmds/sync.py
index fa61d55..57ee054 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -52,7 +52,7 @@
 import gitc_utils
 from project import Project
 from project import RemoteSpec
-from command import Command, MirrorSafeCommand, WORKER_BATCH_SIZE
+from command import Command, DEFAULT_LOCAL_JOBS, MirrorSafeCommand, WORKER_BATCH_SIZE
 from error import RepoChangedException, GitError, ManifestParseError
 import platform_utils
 from project import SyncBuffer
@@ -65,7 +65,6 @@
 
 
 class Sync(Command, MirrorSafeCommand):
-  jobs = 1
   COMMON = True
   MULTI_MANIFEST_SUPPORT = True
   helpSummary = "Update working tree to the latest revision"
@@ -168,21 +167,16 @@
 later is required to fix a server side protocol bug.
 
 """
-  PARALLEL_JOBS = 1
-
-  def _CommonOptions(self, p):
-    if self.outer_client and self.outer_client.manifest:
-      try:
-        self.PARALLEL_JOBS = self.outer_client.manifest.default.sync_j
-      except ManifestParseError:
-        pass
-    super()._CommonOptions(p)
+  # A value of 0 means we want parallel jobs, but we'll determine the default
+  # value later on.
+  PARALLEL_JOBS = 0
 
   def _Options(self, p, show_smart=True):
     p.add_option('--jobs-network', default=None, type=int, metavar='JOBS',
-                 help='number of network jobs to run in parallel (defaults to --jobs)')
+                 help='number of network jobs to run in parallel (defaults to --jobs or 1)')
     p.add_option('--jobs-checkout', default=None, type=int, metavar='JOBS',
-                 help='number of local checkout jobs to run in parallel (defaults to --jobs)')
+                 help='number of local checkout jobs to run in parallel (defaults to --jobs or '
+                      f'{DEFAULT_LOCAL_JOBS})')
 
     p.add_option('-f', '--force-broken',
                  dest='force_broken', action='store_true',
@@ -451,7 +445,7 @@
   def _Fetch(self, projects, opt, err_event, ssh_proxy):
     ret = True
 
-    jobs = opt.jobs_network if opt.jobs_network else self.jobs
+    jobs = opt.jobs_network
     fetched = set()
     pm = Progress('Fetching', len(projects), delay=False, quiet=opt.quiet)
 
@@ -651,7 +645,7 @@
       return ret
 
     return self.ExecuteInParallel(
-        opt.jobs_checkout if opt.jobs_checkout else self.jobs,
+        opt.jobs_checkout,
         functools.partial(self._CheckoutOne, opt.detach_head, opt.force_sync),
         all_projects,
         callback=_ProcessResults,
@@ -691,8 +685,7 @@
             project.bare_git,
         )
 
-    cpu_count = os.cpu_count()
-    jobs = min(self.jobs, cpu_count)
+    jobs = opt.jobs
 
     if jobs < 2:
       for (run_gc, bare_git) in tidy_dirs.values():
@@ -704,6 +697,7 @@
       pm.end()
       return
 
+    cpu_count = os.cpu_count()
     config = {'pack.threads': cpu_count // jobs if cpu_count > jobs else 1}
 
     threads = set()
@@ -1011,9 +1005,6 @@
         sys.exit(1)
       self._ReloadManifest(manifest_name, mp.manifest)
 
-      if opt.jobs is None:
-        self.jobs = mp.manifest.default.sync_j
-
   def ValidateOptions(self, opt, args):
     if opt.force_broken:
       print('warning: -f/--force-broken is now the default behavior, and the '
@@ -1036,12 +1027,6 @@
       opt.prune = True
 
   def Execute(self, opt, args):
-    if opt.jobs:
-      self.jobs = opt.jobs
-    if self.jobs > 1:
-      soft_limit, _ = _rlimit_nofile()
-      self.jobs = min(self.jobs, (soft_limit - 5) // 3)
-
     manifest = self.outer_manifest
     if not opt.outer_manifest:
       manifest = self.manifest
@@ -1092,6 +1077,36 @@
     else:
       print('Skipping update of local manifest project.')
 
+    # Now that the manifests are up-to-date, setup the jobs value.
+    if opt.jobs is None:
+      # User has not made a choice, so use the manifest settings.
+      opt.jobs = mp.default.sync_j
+    if opt.jobs is not None:
+      # Neither user nor manifest have made a choice.
+      if opt.jobs_network is None:
+        opt.jobs_network = opt.jobs
+      if opt.jobs_checkout is None:
+        opt.jobs_checkout = opt.jobs
+    # Setup defaults if jobs==0.
+    if not opt.jobs:
+      if not opt.jobs_network:
+        opt.jobs_network = 1
+      if not opt.jobs_checkout:
+        opt.jobs_checkout = DEFAULT_LOCAL_JOBS
+      opt.jobs = os.cpu_count()
+
+    # Try to stay under user rlimit settings.
+    #
+    # Since each worker requires at 3 file descriptors to run `git fetch`, use
+    # that to scale down the number of jobs.  Unfortunately there isn't an easy
+    # way to determine this reliably as systems change, but it was last measured
+    # by hand in 2011.
+    soft_limit, _ = _rlimit_nofile()
+    jobs_soft_limit = max(1, (soft_limit - 5) // 3)
+    opt.jobs = min(opt.jobs, jobs_soft_limit)
+    opt.jobs_network = min(opt.jobs_network, jobs_soft_limit)
+    opt.jobs_checkout = min(opt.jobs_checkout, jobs_soft_limit)
+
     superproject_logging_data = {}
     self._UpdateProjectsRevisionId(opt, args, superproject_logging_data,
                                    manifest)