sync: Added --use-superproject option and support for superproject.

Added "--use-superporject" option to sync.py to fetch project SHAs from
superproject. If there are any missing projects in superprojects, it
prints the missing entries and exits. If there are no missing entries,
it will use SHAs from superproject to fetch the projects from git.

Tested the code with the following commands.

$ ./run_tests tests/test_manifest_xml.py
$ ./run_tests -v tests/test_git_superproject.py
$ ./run_tests -v

Tested the sync code by copying all the repo changes into my Android
AOSP checkout and adding <superporject> tag to default.xml. With
local modification to the code to print the status,

.../WORKING_DIRECTORY$ repo sync --use-superproject
repo: executing 'git clone' url: sso://android/platform/superproject
repo: executing 'git ls-tree'
Success: []

Bug: https://crbug.com/gerrit/13709
Tested-by: Raman Tenneti <rtenneti@google.com>
Change-Id: Id18665992428dd684c04b0e0b3a52f46316873a0
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/293822
Reviewed-by: Mike Frysinger <vapier@google.com>
diff --git a/error.py b/error.py
index 225eb59..8bb64b8 100644
--- a/error.py
+++ b/error.py
@@ -13,6 +13,10 @@
 # limitations under the License.
 
 
+# URL to file bug reports for repo tool issues.
+BUG_REPORT_URL = 'https://bugs.chromium.org/p/gerrit/issues/entry?template=Repo+tool+issue'
+
+
 class ManifestParseError(Exception):
   """Failed to parse the manifest file.
   """
diff --git a/git_superproject.py b/git_superproject.py
new file mode 100644
index 0000000..3e87e92
--- /dev/null
+++ b/git_superproject.py
@@ -0,0 +1,149 @@
+# Copyright (C) 2021 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provide functionality to get all projects and their SHAs from Superproject.
+
+For more information on superproject, check out:
+https://en.wikibooks.org/wiki/Git/Submodules_and_Superprojects
+
+Examples:
+  superproject = Superproject()
+  project_shas = superproject.GetAllProjectsSHAs()
+"""
+
+import os
+import sys
+
+from error import GitError
+from git_command import GitCommand
+import platform_utils
+
+
+class Superproject(object):
+  """Get SHAs from superproject.
+
+  It does a 'git clone' of superproject and 'git ls-tree' to get list of SHAs for all projects.
+  It contains project_shas which is a dictionary with project/sha entries.
+  """
+  def __init__(self, repodir, superproject_dir='exp-superproject'):
+    """Initializes superproject.
+
+    Args:
+      repodir: Path to the .repo/ dir for holding all internal checkout state.
+      superproject_dir: Relative path under |repodir| to checkout superproject.
+    """
+    self._project_shas = None
+    self._repodir = os.path.abspath(repodir)
+    self._superproject_dir = superproject_dir
+    self._superproject_path = os.path.join(self._repodir, superproject_dir)
+
+  @property
+  def project_shas(self):
+    """Returns a dictionary of projects and their SHAs."""
+    return self._project_shas
+
+  def _Clone(self, url, branch=None):
+    """Do a 'git clone' for the given url and branch.
+
+    Args:
+      url: superproject's url to be passed to git clone.
+      branch: the branchname to be passed as argument to git clone.
+
+    Returns:
+      True if 'git clone <url> <branch>' is successful, or False.
+    """
+    cmd = ['clone', url, '--depth', '1']
+    if branch:
+      cmd += ['--branch', branch]
+    p = GitCommand(None,
+                   cmd,
+                   cwd=self._superproject_path,
+                   capture_stdout=True,
+                   capture_stderr=True)
+    retval = p.Wait()
+    if retval:
+      # `git clone` is documented to produce an exit status of `128` if
+      # the requested url or branch are not present in the configuration.
+      print('repo: error: git clone call failed with return code: %r, stderr: %r' %
+            (retval, p.stderr), file=sys.stderr)
+      return False
+    return True
+
+  def _LsTree(self):
+    """Returns the data from 'git ls-tree -r HEAD'.
+
+    Works only in git repositories.
+
+    Returns:
+      data: data returned from 'git ls-tree -r HEAD' instead of None.
+    """
+    git_dir = os.path.join(self._superproject_path, 'superproject')
+    if not os.path.exists(git_dir):
+      raise GitError('git ls-tree. Missing drectory: %s' % git_dir)
+    data = None
+    cmd = ['ls-tree', '-z', '-r', 'HEAD']
+    p = GitCommand(None,
+                   cmd,
+                   cwd=git_dir,
+                   capture_stdout=True,
+                   capture_stderr=True)
+    retval = p.Wait()
+    if retval == 0:
+      data = p.stdout
+    else:
+      # `git clone` is documented to produce an exit status of `128` if
+      # the requested url or branch are not present in the configuration.
+      print('repo: error: git ls-tree call failed with return code: %r, stderr: %r' % (
+          retval, p.stderr), file=sys.stderr)
+    return data
+
+  def GetAllProjectsSHAs(self, url, branch=None):
+    """Get SHAs for all projects from superproject and save them in _project_shas.
+
+    Args:
+      url: superproject's url to be passed to git clone.
+      branch: the branchname to be passed as argument to git clone.
+
+    Returns:
+      A dictionary with the projects/SHAs instead of None.
+    """
+    if not url:
+      raise ValueError('url argument is not supplied.')
+    if os.path.exists(self._superproject_path):
+      platform_utils.rmtree(self._superproject_path)
+    os.mkdir(self._superproject_path)
+
+    # TODO(rtenneti): we shouldn't be cloning the repo from scratch every time.
+    if not self._Clone(url, branch):
+      raise GitError('git clone failed for url: %s' % url)
+
+    data = self._LsTree()
+    if not data:
+      raise GitError('git ls-tree failed for url: %s' % url)
+
+    # Parse lines like the following to select lines starting with '160000' and
+    # build a dictionary with project path (last element) and its SHA (3rd element).
+    #
+    # 160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00
+    # 120000 blob acc2cbdf438f9d2141f0ae424cec1d8fc4b5d97f\tbootstrap.bash\x00
+    shas = {}
+    for line in data.split('\x00'):
+      ls_data = line.split(None, 3)
+      if not ls_data:
+        break
+      if ls_data[0] == '160000':
+        shas[ls_data[3]] = ls_data[2]
+
+    self._project_shas = shas
+    return shas
diff --git a/project.py b/project.py
index 6c6534d..17c75b4 100644
--- a/project.py
+++ b/project.py
@@ -1197,6 +1197,9 @@
       raise ManifestInvalidRevisionError('revision %s in %s not found' %
                                          (self.revisionExpr, self.name))
 
+  def SetRevisionId(self, revisionId):
+    self.revisionId = revisionId
+
   def Sync_LocalHalf(self, syncbuf, force_sync=False, submodules=False):
     """Perform only the local IO portion of the sync process.
        Network access is not required.
diff --git a/subcmds/sync.py b/subcmds/sync.py
index 3482946..d6b8f9d 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -51,11 +51,12 @@
 from git_command import GIT, git_require
 from git_config import GetUrlCookieFile
 from git_refs import R_HEADS, HEAD
+import git_superproject
 import gitc_utils
 from project import Project
 from project import RemoteSpec
 from command import Command, MirrorSafeCommand
-from error import RepoChangedException, GitError, ManifestParseError
+from error import BUG_REPORT_URL, RepoChangedException, GitError, ManifestParseError
 import platform_utils
 from project import SyncBuffer
 from progress import Progress
@@ -241,6 +242,8 @@
     p.add_option('--fetch-submodules',
                  dest='fetch_submodules', action='store_true',
                  help='fetch submodules from server')
+    p.add_option('--use-superproject', action='store_true',
+                 help='use the manifest superproject to sync projects')
     p.add_option('--no-tags',
                  dest='tags', default=True, action='store_false',
                  help="don't fetch tags")
@@ -894,6 +897,41 @@
                                     missing_ok=True,
                                     submodules_ok=opt.fetch_submodules)
 
+    if opt.use_superproject:
+      if not self.manifest.superproject:
+        print('error: superproject tag is not defined in manifest.xml',
+              file=sys.stderr)
+        sys.exit(1)
+      print('WARNING: --use-superproject is experimental and not '
+            'for general use', file=sys.stderr)
+      superproject_url = self.manifest.superproject['remote'].url
+      if not superproject_url:
+        print('error: superproject URL is not defined in manifest.xml',
+              file=sys.stderr)
+        sys.exit(1)
+      superproject = git_superproject.Superproject(self.manifest.repodir)
+      try:
+        superproject_shas = superproject.GetAllProjectsSHAs(url=superproject_url)
+      except Exception as e:
+        print('error: Cannot get project SHAs for %s: %s: %s' %
+              (superproject_url, type(e).__name__, str(e)),
+              file=sys.stderr)
+        sys.exit(1)
+      projects_missing_shas = []
+      for project in all_projects:
+        path = project.relpath
+        if not path:
+          continue
+        sha = superproject_shas.get(path)
+        if sha:
+          project.SetRevisionId(sha)
+        else:
+          projects_missing_shas.append(path)
+      if projects_missing_shas:
+        print('error: please file a bug using %s to report missing shas for: %s' %
+              (BUG_REPORT_URL, projects_missing_shas), file=sys.stderr)
+        sys.exit(1)
+
     err_network_sync = False
     err_update_projects = False
     err_checkout = False
diff --git a/tests/test_git_superproject.py b/tests/test_git_superproject.py
new file mode 100644
index 0000000..67a75a1
--- /dev/null
+++ b/tests/test_git_superproject.py
@@ -0,0 +1,82 @@
+# Copyright (C) 2021 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unittests for the git_superproject.py module."""
+
+import os
+import tempfile
+import unittest
+from unittest import mock
+
+from error import GitError
+import git_superproject
+import platform_utils
+
+
+class SuperprojectTestCase(unittest.TestCase):
+  """TestCase for the Superproject module."""
+
+  def setUp(self):
+    """Set up superproject every time."""
+    self.tempdir = tempfile.mkdtemp(prefix='repo_tests')
+    self.repodir = os.path.join(self.tempdir, '.repo')
+    os.mkdir(self.repodir)
+    self._superproject = git_superproject.Superproject(self.repodir)
+
+  def tearDown(self):
+    """Tear down superproject every time."""
+    platform_utils.rmtree(self.tempdir)
+
+  def test_superproject_get_project_shas_no_url(self):
+    """Test with no url."""
+    with self.assertRaises(ValueError):
+      self._superproject.GetAllProjectsSHAs(url=None)
+
+  def test_superproject_get_project_shas_invalid_url(self):
+    """Test with an invalid url."""
+    with self.assertRaises(GitError):
+      self._superproject.GetAllProjectsSHAs(url='localhost')
+
+  def test_superproject_get_project_shas_invalid_branch(self):
+    """Test with an invalid branch."""
+    with self.assertRaises(GitError):
+      self._superproject.GetAllProjectsSHAs(
+          url='sso://android/platform/superproject',
+          branch='junk')
+
+  def test_superproject_get_project_shas_mock_clone(self):
+    """Test with _Clone failing."""
+    with self.assertRaises(GitError):
+      with mock.patch.object(self._superproject, '_Clone', return_value=False):
+        self._superproject.GetAllProjectsSHAs(url='localhost')
+
+  def test_superproject_get_project_shas_mock_ls_tree(self):
+    """Test with LsTree being a mock."""
+    data = ('120000 blob 158258bdf146f159218e2b90f8b699c4d85b5804\tAndroid.bp\x00'
+            '160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00'
+            '160000 commit e9d25da64d8d365dbba7c8ee00fe8c4473fe9a06\tbootable/recovery\x00'
+            '120000 blob acc2cbdf438f9d2141f0ae424cec1d8fc4b5d97f\tbootstrap.bash\x00'
+            '160000 commit ade9b7a0d874e25fff4bf2552488825c6f111928\tbuild/bazel\x00')
+    with mock.patch.object(self._superproject, '_Clone', return_value=True):
+      with mock.patch.object(self._superproject, '_LsTree', return_value=data):
+        shas = self._superproject.GetAllProjectsSHAs(url='localhost', branch='junk')
+        self.assertEqual(shas, {
+            'art': '2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea',
+            'bootable/recovery': 'e9d25da64d8d365dbba7c8ee00fe8c4473fe9a06',
+            'build/bazel': 'ade9b7a0d874e25fff4bf2552488825c6f111928'
+        })
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/tests/test_manifest_xml.py b/tests/test_manifest_xml.py
index e2c83af..370eb4f 100644
--- a/tests/test_manifest_xml.py
+++ b/tests/test_manifest_xml.py
@@ -232,6 +232,7 @@
 """)
     self.assertEqual(manifest.superproject['name'], 'superproject')
     self.assertEqual(manifest.superproject['remote'].name, 'test-remote')
+    self.assertEqual(manifest.superproject['remote'].url, 'http://localhost/superproject')
     self.assertEqual(
         manifest.ToXml().toxml(),
         '<?xml version="1.0" ?><manifest>' +
@@ -245,20 +246,21 @@
     manifest = self.getXmlManifest("""
 <manifest>
   <remote name="default-remote" fetch="http://localhost" />
-  <remote name="test-remote" fetch="http://localhost" />
+  <remote name="superproject-remote" fetch="http://localhost" />
   <default remote="default-remote" revision="refs/heads/main" />
-  <superproject name="superproject" remote="test-remote"/>
+  <superproject name="platform/superproject" remote="superproject-remote"/>
 </manifest>
 """)
-    self.assertEqual(manifest.superproject['name'], 'superproject')
-    self.assertEqual(manifest.superproject['remote'].name, 'test-remote')
+    self.assertEqual(manifest.superproject['name'], 'platform/superproject')
+    self.assertEqual(manifest.superproject['remote'].name, 'superproject-remote')
+    self.assertEqual(manifest.superproject['remote'].url, 'http://localhost/platform/superproject')
     self.assertEqual(
         manifest.ToXml().toxml(),
         '<?xml version="1.0" ?><manifest>' +
         '<remote name="default-remote" fetch="http://localhost"/>' +
-        '<remote name="test-remote" fetch="http://localhost"/>' +
+        '<remote name="superproject-remote" fetch="http://localhost"/>' +
         '<default remote="default-remote" revision="refs/heads/main"/>' +
-        '<superproject name="superproject" remote="test-remote"/>' +
+        '<superproject name="platform/superproject" remote="superproject-remote"/>' +
         '</manifest>')
 
   def test_superproject_with_defalut_remote(self):