status: improve parallel execution stability

The status command runs a bunch of jobs in parallel, and each one
is responsible for writing to stdout directly.  When running many
noisy jobs in parallel, output can get intermingled.  Pass down a
StringIO buffer for writing to so we can return the entire output
as a string so the main job can handle displaying it.  This fixes
interleaved output as well as making the output stable: we always
display results in the same project order now.  By switching from
map to imap, this ends up not really adding any overhead.

Bug: https://crbug.com/gerrit/12231
Change-Id: Ic18b07c8074c046ff36e306eb8d392fb34fb6eca
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297242
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Chris Mcdonald <cjmcdonald@google.com>
diff --git a/command.py b/command.py
index 7737ec7..90bd002 100644
--- a/command.py
+++ b/command.py
@@ -23,6 +23,15 @@
 from error import InvalidProjectGroupsError
 
 
+# Number of projects to submit to a single worker process at a time.
+# This number represents a tradeoff between the overhead of IPC and finer
+# grained opportunity for parallelism. This particular value was chosen by
+# iterating through powers of two until the overall performance no longer
+# improved. The performance of this batch size is not a function of the
+# number of cores on the system.
+WORKER_BATCH_SIZE = 32
+
+
 # How many jobs to run in parallel by default?  This assumes the jobs are
 # largely I/O bound and do not hit the network.
 DEFAULT_LOCAL_JOBS = min(os.cpu_count(), 8)
diff --git a/subcmds/branches.py b/subcmds/branches.py
index 9665e85..d5ea580 100644
--- a/subcmds/branches.py
+++ b/subcmds/branches.py
@@ -16,15 +16,7 @@
 import multiprocessing
 import sys
 from color import Coloring
-from command import Command, DEFAULT_LOCAL_JOBS
-
-# Number of projects to submit to a single worker process at a time.
-# This number represents a tradeoff between the overhead of IPC and finer
-# grained opportunity for parallelism. This particular value was chosen by
-# iterating through powers of two until the overall performance no longer
-# improved. The performance of this batch size is not a function of the
-# number of cores on the system.
-WORKER_BATCH_SIZE = 32
+from command import Command, DEFAULT_LOCAL_JOBS, WORKER_BATCH_SIZE
 
 
 class BranchColoring(Coloring):
diff --git a/subcmds/status.py b/subcmds/status.py
index f0f2e03..6c8e22e 100644
--- a/subcmds/status.py
+++ b/subcmds/status.py
@@ -14,10 +14,11 @@
 
 import functools
 import glob
+import io
 import multiprocessing
 import os
 
-from command import DEFAULT_LOCAL_JOBS, PagedCommand
+from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE
 
 from color import Coloring
 import platform_utils
@@ -99,7 +100,9 @@
     Returns:
       The status of the project.
     """
-    return project.PrintWorkTreeStatus(quiet=quiet)
+    buf = io.StringIO()
+    ret = project.PrintWorkTreeStatus(quiet=quiet, output_redir=buf)
+    return (ret, buf.getvalue())
 
   def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring):
     """find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'"""
@@ -128,8 +131,13 @@
           counter += 1
     else:
       with multiprocessing.Pool(opt.jobs) as pool:
-        states = pool.map(functools.partial(self._StatusHelper, opt.quiet), all_projects)
-        counter += states.count('CLEAN')
+        states = pool.imap(functools.partial(self._StatusHelper, opt.quiet),
+                           all_projects, chunksize=WORKER_BATCH_SIZE)
+        for (state, output) in states:
+          if output:
+            print(output, end='')
+          if state == 'CLEAN':
+            counter += 1
     if not opt.quiet and len(all_projects) == counter:
       print('nothing to commit (working directory clean)')