status: improve parallel execution stability
The status command runs a bunch of jobs in parallel, and each one
is responsible for writing to stdout directly. When running many
noisy jobs in parallel, output can get intermingled. Pass down a
StringIO buffer for writing to so we can return the entire output
as a string so the main job can handle displaying it. This fixes
interleaved output as well as making the output stable: we always
display results in the same project order now. By switching from
map to imap, this ends up not really adding any overhead.
Bug: https://crbug.com/gerrit/12231
Change-Id: Ic18b07c8074c046ff36e306eb8d392fb34fb6eca
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297242
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Chris Mcdonald <cjmcdonald@google.com>
diff --git a/command.py b/command.py
index 7737ec7..90bd002 100644
--- a/command.py
+++ b/command.py
@@ -23,6 +23,15 @@
from error import InvalidProjectGroupsError
+# Number of projects to submit to a single worker process at a time.
+# This number represents a tradeoff between the overhead of IPC and finer
+# grained opportunity for parallelism. This particular value was chosen by
+# iterating through powers of two until the overall performance no longer
+# improved. The performance of this batch size is not a function of the
+# number of cores on the system.
+WORKER_BATCH_SIZE = 32
+
+
# How many jobs to run in parallel by default? This assumes the jobs are
# largely I/O bound and do not hit the network.
DEFAULT_LOCAL_JOBS = min(os.cpu_count(), 8)
diff --git a/subcmds/branches.py b/subcmds/branches.py
index 9665e85..d5ea580 100644
--- a/subcmds/branches.py
+++ b/subcmds/branches.py
@@ -16,15 +16,7 @@
import multiprocessing
import sys
from color import Coloring
-from command import Command, DEFAULT_LOCAL_JOBS
-
-# Number of projects to submit to a single worker process at a time.
-# This number represents a tradeoff between the overhead of IPC and finer
-# grained opportunity for parallelism. This particular value was chosen by
-# iterating through powers of two until the overall performance no longer
-# improved. The performance of this batch size is not a function of the
-# number of cores on the system.
-WORKER_BATCH_SIZE = 32
+from command import Command, DEFAULT_LOCAL_JOBS, WORKER_BATCH_SIZE
class BranchColoring(Coloring):
diff --git a/subcmds/status.py b/subcmds/status.py
index f0f2e03..6c8e22e 100644
--- a/subcmds/status.py
+++ b/subcmds/status.py
@@ -14,10 +14,11 @@
import functools
import glob
+import io
import multiprocessing
import os
-from command import DEFAULT_LOCAL_JOBS, PagedCommand
+from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE
from color import Coloring
import platform_utils
@@ -99,7 +100,9 @@
Returns:
The status of the project.
"""
- return project.PrintWorkTreeStatus(quiet=quiet)
+ buf = io.StringIO()
+ ret = project.PrintWorkTreeStatus(quiet=quiet, output_redir=buf)
+ return (ret, buf.getvalue())
def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring):
"""find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'"""
@@ -128,8 +131,13 @@
counter += 1
else:
with multiprocessing.Pool(opt.jobs) as pool:
- states = pool.map(functools.partial(self._StatusHelper, opt.quiet), all_projects)
- counter += states.count('CLEAN')
+ states = pool.imap(functools.partial(self._StatusHelper, opt.quiet),
+ all_projects, chunksize=WORKER_BATCH_SIZE)
+ for (state, output) in states:
+ if output:
+ print(output, end='')
+ if state == 'CLEAN':
+ counter += 1
if not opt.quiet and len(all_projects) == counter:
print('nothing to commit (working directory clean)')