project: add basic path checks for <copyfile> & <linkfile>

Reject paths in <copyfile> & <linkfile> that try to use symlinks or
non-file or non-dirs.

We don't fully validate <linkfile> when src is a glob as it's a bit
complicated -- any component in the src could be the glob.  We make
sure the destination is a directory, and that any paths in that dir
are created as symlinks.  So while this can be used to read any path,
it can't be abused to write to any paths.

Bug: https://crbug.com/gerrit/11218
Change-Id: I68b6d789b5ca4e43f569e75e8b293b3e13d3224b
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/233074
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Michael Mortensen <mmortensen@google.com>
diff --git a/manifest_xml.py b/manifest_xml.py
index 69105c9..4f7bd49 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -1026,7 +1026,7 @@
       # dest is relative to the top of the tree.
       # We only validate paths if we actually plan to process them.
       self._ValidateFilePaths('copyfile', src, dest)
-      project.AddCopyFile(src, dest, os.path.join(self.topdir, dest))
+      project.AddCopyFile(src, dest, self.topdir)
 
   def _ParseLinkFile(self, project, node):
     src = self._reqatt(node, 'src')
@@ -1036,7 +1036,7 @@
       # dest is relative to the top of the tree.
       # We only validate paths if we actually plan to process them.
       self._ValidateFilePaths('linkfile', src, dest)
-      project.AddLinkFile(src, dest, os.path.join(self.topdir, dest))
+      project.AddLinkFile(src, dest, self.topdir)
 
   def _ParseAnnotation(self, project, node):
     name = self._reqatt(node, 'name')
diff --git a/project.py b/project.py
index d12d466..24fbf4f 100755
--- a/project.py
+++ b/project.py
@@ -36,7 +36,7 @@
 from git_config import GitConfig, IsId, GetSchemeFromUrl, GetUrlCookieFile, \
     ID_RE
 from error import GitError, HookError, UploadError, DownloadError
-from error import ManifestInvalidRevisionError
+from error import ManifestInvalidRevisionError, ManifestInvalidPathError
 from error import NoManifestException
 import platform_utils
 import progress
@@ -261,17 +261,70 @@
     self.keep = keep
 
 
-class _CopyFile(object):
+def _SafeExpandPath(base, subpath, skipfinal=False):
+  """Make sure |subpath| is completely safe under |base|.
 
-  def __init__(self, src, dest, abssrc, absdest):
+  We make sure no intermediate symlinks are traversed, and that the final path
+  is not a special file (e.g. not a socket or fifo).
+
+  NB: We rely on a number of paths already being filtered out while parsing the
+  manifest.  See the validation logic in manifest_xml.py for more details.
+  """
+  components = subpath.split(os.path.sep)
+  if skipfinal:
+    # Whether the caller handles the final component itself.
+    finalpart = components.pop()
+
+  path = base
+  for part in components:
+    if part in {'.', '..'}:
+      raise ManifestInvalidPathError(
+          '%s: "%s" not allowed in paths' % (subpath, part))
+
+    path = os.path.join(path, part)
+    if platform_utils.islink(path):
+      raise ManifestInvalidPathError(
+          '%s: traversing symlinks not allow' % (path,))
+
+    if os.path.exists(path):
+      if not os.path.isfile(path) and not platform_utils.isdir(path):
+        raise ManifestInvalidPathError(
+            '%s: only regular files & directories allowed' % (path,))
+
+  if skipfinal:
+    path = os.path.join(path, finalpart)
+
+  return path
+
+
+class _CopyFile(object):
+  """Container for <copyfile> manifest element."""
+
+  def __init__(self, git_worktree, src, topdir, dest):
+    """Register a <copyfile> request.
+
+    Args:
+      git_worktree: Absolute path to the git project checkout.
+      src: Relative path under |git_worktree| of file to read.
+      topdir: Absolute path to the top of the repo client checkout.
+      dest: Relative path under |topdir| of file to write.
+    """
+    self.git_worktree = git_worktree
+    self.topdir = topdir
     self.src = src
     self.dest = dest
-    self.abs_src = abssrc
-    self.abs_dest = absdest
 
   def _Copy(self):
-    src = self.abs_src
-    dest = self.abs_dest
+    src = _SafeExpandPath(self.git_worktree, self.src)
+    dest = _SafeExpandPath(self.topdir, self.dest)
+
+    if platform_utils.isdir(src):
+      raise ManifestInvalidPathError(
+          '%s: copying from directory not supported' % (self.src,))
+    if platform_utils.isdir(dest):
+      raise ManifestInvalidPathError(
+          '%s: copying to directory not allowed' % (self.dest,))
+
     # copy file if it does not exist or is out of date
     if not os.path.exists(dest) or not filecmp.cmp(src, dest):
       try:
@@ -292,13 +345,21 @@
 
 
 class _LinkFile(object):
+  """Container for <linkfile> manifest element."""
 
-  def __init__(self, git_worktree, src, dest, relsrc, absdest):
+  def __init__(self, git_worktree, src, topdir, dest):
+    """Register a <linkfile> request.
+
+    Args:
+      git_worktree: Absolute path to the git project checkout.
+      src: Target of symlink relative to path under |git_worktree|.
+      topdir: Absolute path to the top of the repo client checkout.
+      dest: Relative path under |topdir| of symlink to create.
+    """
     self.git_worktree = git_worktree
+    self.topdir = topdir
     self.src = src
     self.dest = dest
-    self.src_rel_to_dest = relsrc
-    self.abs_dest = absdest
 
   def __linkIt(self, relSrc, absDest):
     # link file if it does not exist or is out of date
@@ -316,35 +377,37 @@
         _error('Cannot link file %s to %s', relSrc, absDest)
 
   def _Link(self):
-    """Link the self.rel_src_to_dest and self.abs_dest. Handles wild cards
-    on the src linking all of the files in the source in to the destination
-    directory.
+    """Link the self.src & self.dest paths.
+
+    Handles wild cards on the src linking all of the files in the source in to
+    the destination directory.
     """
-    # We use the absSrc to handle the situation where the current directory
-    # is not the root of the repo
-    absSrc = os.path.join(self.git_worktree, self.src)
-    if os.path.exists(absSrc):
-      # Entity exists so just a simple one to one link operation
-      self.__linkIt(self.src_rel_to_dest, self.abs_dest)
+    src = _SafeExpandPath(self.git_worktree, self.src)
+
+    if os.path.exists(src):
+      # Entity exists so just a simple one to one link operation.
+      dest = _SafeExpandPath(self.topdir, self.dest, skipfinal=True)
+      # dest & src are absolute paths at this point.  Make sure the target of
+      # the symlink is relative in the context of the repo client checkout.
+      relpath = os.path.relpath(src, os.path.dirname(dest))
+      self.__linkIt(relpath, dest)
     else:
+      dest = _SafeExpandPath(self.topdir, self.dest)
       # Entity doesn't exist assume there is a wild card
-      absDestDir = self.abs_dest
-      if os.path.exists(absDestDir) and not platform_utils.isdir(absDestDir):
-        _error('Link error: src with wildcard, %s must be a directory',
-               absDestDir)
+      if os.path.exists(dest) and not platform_utils.isdir(dest):
+        _error('Link error: src with wildcard, %s must be a directory', dest)
       else:
-        absSrcFiles = glob.glob(absSrc)
-        for absSrcFile in absSrcFiles:
+        for absSrcFile in glob.glob(src):
           # Create a releative path from source dir to destination dir
           absSrcDir = os.path.dirname(absSrcFile)
-          relSrcDir = os.path.relpath(absSrcDir, absDestDir)
+          relSrcDir = os.path.relpath(absSrcDir, dest)
 
           # Get the source file name
           srcFile = os.path.basename(absSrcFile)
 
           # Now form the final full paths to srcFile. They will be
           # absolute for the desintaiton and relative for the srouce.
-          absDest = os.path.join(absDestDir, srcFile)
+          absDest = os.path.join(dest, srcFile)
           relSrc = os.path.join(relSrcDir, srcFile)
           self.__linkIt(relSrc, absDest)
 
@@ -1712,18 +1775,25 @@
       if submodules:
         syncbuf.later1(self, _dosubmodules)
 
-  def AddCopyFile(self, src, dest, absdest):
-    # dest should already be an absolute path, but src is project relative
-    # make src an absolute path
-    abssrc = os.path.join(self.worktree, src)
-    self.copyfiles.append(_CopyFile(src, dest, abssrc, absdest))
+  def AddCopyFile(self, src, dest, topdir):
+    """Mark |src| for copying to |dest| (relative to |topdir|).
 
-  def AddLinkFile(self, src, dest, absdest):
-    # dest should already be an absolute path, but src is project relative
-    # make src relative path to dest
-    absdestdir = os.path.dirname(absdest)
-    relsrc = os.path.relpath(os.path.join(self.worktree, src), absdestdir)
-    self.linkfiles.append(_LinkFile(self.worktree, src, dest, relsrc, absdest))
+    No filesystem changes occur here.  Actual copying happens later on.
+
+    Paths should have basic validation run on them before being queued.
+    Further checking will be handled when the actual copy happens.
+    """
+    self.copyfiles.append(_CopyFile(self.worktree, src, topdir, dest))
+
+  def AddLinkFile(self, src, dest, topdir):
+    """Mark |dest| to create a symlink (relative to |topdir|) pointing to |src|.
+
+    No filesystem changes occur here.  Actual linking happens later on.
+
+    Paths should have basic validation run on them before being queued.
+    Further checking will be handled when the actual link happens.
+    """
+    self.linkfiles.append(_LinkFile(self.worktree, src, topdir, dest))
 
   def AddAnnotation(self, name, value, keep):
     self.annotations.append(_Annotation(name, value, keep))
diff --git a/tests/test_project.py b/tests/test_project.py
index 77126df..6d82da1 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -25,6 +25,7 @@
 import tempfile
 import unittest
 
+import error
 import git_config
 import project
 
@@ -134,3 +135,206 @@
       self.assertFalse(rb.base_exists)
       # Hard to assert anything useful about this.
       self.assertTrue(rb.date)
+
+
+class CopyLinkTestCase(unittest.TestCase):
+  """TestCase for stub repo client checkouts.
+
+  It'll have a layout like:
+    tempdir/          # self.tempdir
+      checkout/       # self.topdir
+        git-project/  # self.worktree
+
+  Attributes:
+    tempdir: A dedicated temporary directory.
+    worktree: The top of the repo client checkout.
+    topdir: The top of a project checkout.
+  """
+
+  def setUp(self):
+    self.tempdir = tempfile.mkdtemp(prefix='repo_tests')
+    self.topdir = os.path.join(self.tempdir, 'checkout')
+    self.worktree = os.path.join(self.topdir, 'git-project')
+    os.makedirs(self.topdir)
+    os.makedirs(self.worktree)
+
+  def tearDown(self):
+    shutil.rmtree(self.tempdir, ignore_errors=True)
+
+  @staticmethod
+  def touch(path):
+    with open(path, 'w') as f:
+      pass
+
+  def assertExists(self, path, msg=None):
+    """Make sure |path| exists."""
+    if os.path.exists(path):
+      return
+
+    if msg is None:
+      msg = ['path is missing: %s' % path]
+      while path != '/':
+        path = os.path.dirname(path)
+        if not path:
+          # If we're given something like "foo", abort once we get to "".
+          break
+        result = os.path.exists(path)
+        msg.append('\tos.path.exists(%s): %s' % (path, result))
+        if result:
+          msg.append('\tcontents: %r' % os.listdir(path))
+          break
+      msg = '\n'.join(msg)
+
+    raise self.failureException(msg)
+
+
+class CopyFile(CopyLinkTestCase):
+  """Check _CopyFile handling."""
+
+  def CopyFile(self, src, dest):
+    return project._CopyFile(self.worktree, src, self.topdir, dest)
+
+  def test_basic(self):
+    """Basic test of copying a file from a project to the toplevel."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    self.touch(src)
+    cf = self.CopyFile('foo.txt', 'foo')
+    cf._Copy()
+    self.assertExists(os.path.join(self.topdir, 'foo'))
+
+  def test_src_subdir(self):
+    """Copy a file from a subdir of a project."""
+    src = os.path.join(self.worktree, 'bar', 'foo.txt')
+    os.makedirs(os.path.dirname(src))
+    self.touch(src)
+    cf = self.CopyFile('bar/foo.txt', 'new.txt')
+    cf._Copy()
+    self.assertExists(os.path.join(self.topdir, 'new.txt'))
+
+  def test_dest_subdir(self):
+    """Copy a file to a subdir of a checkout."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    self.touch(src)
+    cf = self.CopyFile('foo.txt', 'sub/dir/new.txt')
+    self.assertFalse(os.path.exists(os.path.join(self.topdir, 'sub')))
+    cf._Copy()
+    self.assertExists(os.path.join(self.topdir, 'sub', 'dir', 'new.txt'))
+
+  def test_update(self):
+    """Make sure changed files get copied again."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    dest = os.path.join(self.topdir, 'bar')
+    with open(src, 'w') as f:
+      f.write('1st')
+    cf = self.CopyFile('foo.txt', 'bar')
+    cf._Copy()
+    self.assertExists(dest)
+    with open(dest) as f:
+      self.assertEqual(f.read(), '1st')
+
+    with open(src, 'w') as f:
+      f.write('2nd!')
+    cf._Copy()
+    with open(dest) as f:
+      self.assertEqual(f.read(), '2nd!')
+
+  def test_src_block_symlink(self):
+    """Do not allow reading from a symlinked path."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    sym = os.path.join(self.worktree, 'sym')
+    self.touch(src)
+    os.symlink('foo.txt', sym)
+    self.assertExists(sym)
+    cf = self.CopyFile('sym', 'foo')
+    self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
+
+  def test_src_block_symlink_traversal(self):
+    """Do not allow reading through a symlink dir."""
+    src = os.path.join(self.worktree, 'bar', 'passwd')
+    os.symlink('/etc', os.path.join(self.worktree, 'bar'))
+    self.assertExists(src)
+    cf = self.CopyFile('bar/foo.txt', 'foo')
+    self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
+
+  def test_src_block_dir(self):
+    """Do not allow copying from a directory."""
+    src = os.path.join(self.worktree, 'dir')
+    os.makedirs(src)
+    cf = self.CopyFile('dir', 'foo')
+    self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
+
+  def test_dest_block_symlink(self):
+    """Do not allow writing to a symlink."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    self.touch(src)
+    os.symlink('dest', os.path.join(self.topdir, 'sym'))
+    cf = self.CopyFile('foo.txt', 'sym')
+    self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
+
+  def test_dest_block_symlink_traversal(self):
+    """Do not allow writing through a symlink dir."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    self.touch(src)
+    os.symlink('/tmp', os.path.join(self.topdir, 'sym'))
+    cf = self.CopyFile('foo.txt', 'sym/foo.txt')
+    self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
+
+  def test_src_block_dir(self):
+    """Do not allow copying to a directory."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    self.touch(src)
+    os.makedirs(os.path.join(self.topdir, 'dir'))
+    cf = self.CopyFile('foo.txt', 'dir')
+    self.assertRaises(error.ManifestInvalidPathError, cf._Copy)
+
+
+class LinkFile(CopyLinkTestCase):
+  """Check _LinkFile handling."""
+
+  def LinkFile(self, src, dest):
+    return project._LinkFile(self.worktree, src, self.topdir, dest)
+
+  def test_basic(self):
+    """Basic test of linking a file from a project into the toplevel."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    self.touch(src)
+    lf = self.LinkFile('foo.txt', 'foo')
+    lf._Link()
+    dest = os.path.join(self.topdir, 'foo')
+    self.assertExists(dest)
+    self.assertTrue(os.path.islink(dest))
+    self.assertEqual('git-project/foo.txt', os.readlink(dest))
+
+  def test_src_subdir(self):
+    """Link to a file in a subdir of a project."""
+    src = os.path.join(self.worktree, 'bar', 'foo.txt')
+    os.makedirs(os.path.dirname(src))
+    self.touch(src)
+    lf = self.LinkFile('bar/foo.txt', 'foo')
+    lf._Link()
+    self.assertExists(os.path.join(self.topdir, 'foo'))
+
+  def test_dest_subdir(self):
+    """Link a file to a subdir of a checkout."""
+    src = os.path.join(self.worktree, 'foo.txt')
+    self.touch(src)
+    lf = self.LinkFile('foo.txt', 'sub/dir/foo/bar')
+    self.assertFalse(os.path.exists(os.path.join(self.topdir, 'sub')))
+    lf._Link()
+    self.assertExists(os.path.join(self.topdir, 'sub', 'dir', 'foo', 'bar'))
+
+  def test_update(self):
+    """Make sure changed targets get updated."""
+    dest = os.path.join(self.topdir, 'sym')
+
+    src = os.path.join(self.worktree, 'foo.txt')
+    self.touch(src)
+    lf = self.LinkFile('foo.txt', 'sym')
+    lf._Link()
+    self.assertEqual('git-project/foo.txt', os.readlink(dest))
+
+    # Point the symlink somewhere else.
+    os.unlink(dest)
+    os.symlink('/', dest)
+    lf._Link()
+    self.assertEqual('git-project/foo.txt', os.readlink(dest))