manifest_xml: ban use of newlines in paths
There should be no valid use of these anywhere, so just ban them
to make things easier for people.
Bug: https://crbug.com/gerrit/14156
Bug: https://crbug.com/gerrit/14200
Change-Id: I8d2cf988c510c98194c43a329a2b9bf313a3f0a8
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/304662
Reviewed-by: Raman Tenneti <rtenneti@google.com>
Tested-by: Mike Frysinger <vapier@google.com>
diff --git a/manifest_xml.py b/manifest_xml.py
index 0c2b45e..64b7fb4 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -1199,6 +1199,8 @@
if '~' in path:
return '~ not allowed (due to 8.3 filenames on Windows filesystems)'
+ path_codepoints = set(path)
+
# Some filesystems (like Apple's HFS+) try to normalize Unicode codepoints
# which means there are alternative names for ".git". Reject paths with
# these in it as there shouldn't be any reasonable need for them here.
@@ -1222,10 +1224,17 @@
u'\u206F', # NOMINAL DIGIT SHAPES
u'\uFEFF', # ZERO WIDTH NO-BREAK SPACE
}
- if BAD_CODEPOINTS & set(path):
+ if BAD_CODEPOINTS & path_codepoints:
# This message is more expansive than reality, but should be fine.
return 'Unicode combining characters not allowed'
+ # Reject newlines as there shouldn't be any legitmate use for them, they'll
+ # be confusing to users, and they can easily break tools that expect to be
+ # able to iterate over newline delimited lists. This even applies to our
+ # own code like .repo/project.list.
+ if {'\r', '\n'} & path_codepoints:
+ return 'Newlines not allowed'
+
# Assume paths might be used on case-insensitive filesystems.
path = path.lower()
diff --git a/tests/test_manifest_xml.py b/tests/test_manifest_xml.py
index eda0696..e78d85c 100644
--- a/tests/test_manifest_xml.py
+++ b/tests/test_manifest_xml.py
@@ -52,6 +52,9 @@
'blah/foo~',
# Block Unicode characters that get normalized out by filesystems.
u'foo\u200Cbar',
+ # Block newlines.
+ 'f\n/bar',
+ 'f\r/bar',
)
# Make sure platforms that use path separators (e.g. Windows) are also
@@ -91,6 +94,11 @@
fp.write(data)
return manifest_xml.XmlManifest(self.repodir, self.manifest_file)
+ @staticmethod
+ def encodeXmlAttr(attr):
+ """Encode |attr| using XML escape rules."""
+ return attr.replace('\r', '
').replace('\n', '
')
+
class ManifestValidateFilePaths(unittest.TestCase):
"""Check _ValidateFilePaths helper.
@@ -303,6 +311,7 @@
def test_allow_bad_name_from_user(self):
"""Check handling of bad name attribute from the user's input."""
def parse(name):
+ name = self.encodeXmlAttr(name)
manifest = self.getXmlManifest(f"""
<manifest>
<remote name="default-remote" fetch="http://localhost" />
@@ -327,6 +336,7 @@
def test_bad_name_checks(self):
"""Check handling of bad name attribute."""
def parse(name):
+ name = self.encodeXmlAttr(name)
# Setup target of the include.
with open(os.path.join(self.manifest_dir, 'target.xml'), 'w') as fp:
fp.write(f'<manifest><include name="{name}"/></manifest>')
@@ -408,6 +418,8 @@
def test_trailing_slash(self):
"""Check handling of trailing slashes in attributes."""
def parse(name, path):
+ name = self.encodeXmlAttr(name)
+ path = self.encodeXmlAttr(path)
return self.getXmlManifest(f"""
<manifest>
<remote name="default-remote" fetch="http://localhost" />
@@ -437,6 +449,8 @@
def test_toplevel_path(self):
"""Check handling of path=. specially."""
def parse(name, path):
+ name = self.encodeXmlAttr(name)
+ path = self.encodeXmlAttr(path)
return self.getXmlManifest(f"""
<manifest>
<remote name="default-remote" fetch="http://localhost" />
@@ -453,6 +467,8 @@
def test_bad_path_name_checks(self):
"""Check handling of bad path & name attributes."""
def parse(name, path):
+ name = self.encodeXmlAttr(name)
+ path = self.encodeXmlAttr(path)
manifest = self.getXmlManifest(f"""
<manifest>
<remote name="default-remote" fetch="http://localhost" />