webapp/codereview/patching.py - gerrit - Git at Google

 # Copyright 2008 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """Utility to read and apply a unified diff without forking patch(1).

 For a discussion of the unified diff format, see my blog on Artima:
 http://www.artima.com/weblogs/viewpost.jsp?thread=164293
 """

 import difflib
 import logging
 import re
 import sys


 _CHUNK_RE = re.compile(r"""
   @@
   \s+
   -
   (?: (\d+) (?: , (\d+) )?)
   \s+
   \+
   (?: (\d+) (?: , (\d+) )?)
   \s+
   @@
 """, re.VERBOSE)


 def PatchLines(old_lines, patch_lines, name="<patch>"):
   """Patches the old_lines with patches read from patch_lines.

   This only reads unified diffs.  The header lines are ignored.
   Yields (tag, old, new) tuples where old and new are lists of lines.
   The tag can either start with "error" or be a tag from difflib: "equal",
   "insert", "delete", "replace".  After "error" is yielded, no more
   tuples are yielded.  It is possible that consecutive "equal" tuples
   are yielded.
   """
   chunks = ParsePatchToChunks(patch_lines, name)
   if chunks is None:
     return iter([("error: ParsePatchToChunks failed", [], [])])
   return PatchChunks(old_lines, chunks)


 def PatchChunks(old_lines, chunks):
   """Patche old_lines with chunks.

   Yields (tag, old, new) tuples where old and new are lists of lines.
   The tag can either start with "error" or be a tag from difflib: "equal",
   "insert", "delete", "replace".  After "error" is yielded, no more
   tuples are yielded.  It is possible that consecutive "equal" tuples
   are yielded.
   """
   if not chunks:
     # The patch is a no-op
     yield ("equal", old_lines, old_lines)
     return

   old_pos = 0
   for (old_i, old_j), (new_i, new_j), old_chunk, new_chunk in chunks:
     eq = old_lines[old_pos:old_i]
     if eq:
       yield "equal", eq, eq
     old_pos = old_i
     # Check that the patch matches the target file
     if old_lines[old_i:old_j] != old_chunk:
       logging.error("mismatch:%s.%s.", old_lines[old_i:old_j], old_chunk)
       yield ("error: old chunk mismatch", old_lines[old_i:old_j], old_chunk)
       return
     # TODO(guido): ParsePatch knows the diff details, but throws the info away
     sm = difflib.SequenceMatcher(None, old_chunk, new_chunk)
     for tag, i1, i2, j1, j2 in sm.get_opcodes():
       yield tag, old_chunk[i1:i2], new_chunk[j1:j2]
     old_pos = old_j

   # Copy the final matching chunk if any.
   eq = old_lines[old_pos:]
   if eq:
     yield ("equal", eq, eq)


 _NO_NEWLINE_MESSAGE = "\\ No newline at end of file"


 def ParsePatchToChunks(lines, name="<patch>"):
   """Parses a patch from a list of lines.

   Return a list of chunks, where each chunk is a tuple:

     old_range, new_range, old_lines, new_lines

   Returns a list of chunks (possibly empty); or None if there's a problem.
   """
   lineno = 0
   raw_chunk = []
   chunks = []
   old_range = new_range = None
   old_last = new_last = 0
   in_prelude = True
   for line in lines:
     lineno += 1
     if in_prelude:
       # Skip leading lines until after we've seen one starting with '+++'
       if line.startswith("+++"):
         in_prelude = False
       continue
     match = _CHUNK_RE.match(line)
     if match:
       if raw_chunk:
         # Process the lines in the previous chunk
         old_chunk = []
         new_chunk = []
         for tag, rest in raw_chunk:
           if tag in (" ", "-"):
             old_chunk.append(rest)
           if tag in (" ", "+"):
             new_chunk.append(rest)
         # Check consistency
         old_i, old_j = old_range
         new_i, new_j = new_range
         if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i:
           logging.warn("%s:%s: previous chunk has incorrect length",
                        name, lineno)
           return None
         chunks.append((old_range, new_range, old_chunk, new_chunk))
         raw_chunk = []
       # Parse the @@ header
       old_ln, old_n, new_ln, new_n = match.groups()
       old_ln, old_n, new_ln, new_n = map(long,
                                          (old_ln, old_n or 1,
                                           new_ln, new_n or 1))
       # Convert the numbers to list indices we can use
       if old_n == 0:
         old_i = old_ln
       else:
         old_i = old_ln - 1
       old_j = old_i + old_n
       old_range = old_i, old_j
       if new_n == 0:
         new_i = new_ln
       else:
         new_i = new_ln - 1
       new_j =new_i + new_n
       new_range = new_i, new_j
       # Check header consistency with previous header
       if old_i < old_last or new_i < new_last:
         logging.warn("%s:%s: chunk header out of order: %r",
                      name, lineno, line)
         return None
       if old_i - old_last != new_i - new_last:
         logging.warn("%s:%s: inconsistent chunk header: %r",
                      name, lineno, line)
         return None
       old_last = old_j
       new_last = new_j
     else:
       tag, rest = line[0], line[1:]
       if tag in (" ", "-", "+"):
         raw_chunk.append((tag, rest))
       elif line.startswith(_NO_NEWLINE_MESSAGE):
         # TODO(guido): need to check that no more lines follow for this file
         if raw_chunk:
           last_tag, last_rest = raw_chunk[-1]
           if last_rest.endswith("\n"):
             raw_chunk[-1] = (last_tag, last_rest[:-1])
       else:
         # Only log if it's a non-blank line.  Blank lines we see a lot.
         if line and line.strip():
           logging.warn("%s:%d: indecypherable input: %r", name, lineno, line)
         if chunks or raw_chunk:
           break  # Trailing garbage isn't so bad
         return None
   if raw_chunk:
     # Process the lines in the last chunk
     old_chunk = []
     new_chunk = []
     for tag, rest in raw_chunk:
       if tag in (" ", "-"):
         old_chunk.append(rest)
       if tag in (" ", "+"):
         new_chunk.append(rest)
     # Check consistency
     old_i, old_j = old_range
     new_i, new_j = new_range
     if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i:
       print >>sys.stderr, ("%s:%s: last chunk has incorrect length" %
                            (name, lineno))
       return None
     chunks.append((old_range, new_range, old_chunk, new_chunk))
     raw_chunk = []
   return chunks


 # TODO: can we share some of this code with ParsePatchToChunks?
 def ParsePatchToLines(lines):
   """Parses a patch from a list of lines.

   Returns None on error, otherwise a list of 3-tuples:
     (old_line_no, new_line_no, line)

     A line number can be 0 if it doesn't exist in the old/new file.
   """
   result = []
   in_prelude = True
   for line in lines:
     if in_prelude:
       result.append((0, 0, line))
       # Skip leading lines until after we've seen one starting with '+++'
       if line.startswith("+++"):
         in_prelude = False
     elif line.startswith("@"):
       result.append((0, 0, line))
       match = _CHUNK_RE.match(line)
       if not match:
         logging.warn("ParsePatchToLines match failed on %s", line)
         return None
       old_ln = int(match.groups()[0])
       new_ln = int(match.groups()[2])
     else:
       if line[0] == "-":
         result.append((old_ln, 0, line))
         old_ln += 1
       elif line[0] == "+":
         result.append((0, new_ln, line))
         new_ln += 1
       elif line[0] == " ":
         result.append((old_ln, new_ln, line))
         old_ln += 1
         new_ln += 1
       elif line.startswith(_NO_NEWLINE_MESSAGE):
         continue
       else:  # Something else, could be property changes etc.
         result.append((0, 0, line))
   return result
	# Copyright 2008 Google Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Utility to read and apply a unified diff without forking patch(1).

	For a discussion of the unified diff format, see my blog on Artima:
	http://www.artima.com/weblogs/viewpost.jsp?thread=164293
	"""

	import difflib
	import logging
	import re
	import sys


	_CHUNK_RE = re.compile(r"""
	@@
	\s+
	-
	(?: (\d+) (?: , (\d+) )?)
	\s+
	\+
	(?: (\d+) (?: , (\d+) )?)
	\s+
	@@
	""", re.VERBOSE)


	def PatchLines(old_lines, patch_lines, name="<patch>"):
	"""Patches the old_lines with patches read from patch_lines.

	This only reads unified diffs. The header lines are ignored.
	Yields (tag, old, new) tuples where old and new are lists of lines.
	The tag can either start with "error" or be a tag from difflib: "equal",
	"insert", "delete", "replace". After "error" is yielded, no more
	tuples are yielded. It is possible that consecutive "equal" tuples
	are yielded.
	"""
	chunks = ParsePatchToChunks(patch_lines, name)
	if chunks is None:
	return iter([("error: ParsePatchToChunks failed", [], [])])
	return PatchChunks(old_lines, chunks)


	def PatchChunks(old_lines, chunks):
	"""Patche old_lines with chunks.

	Yields (tag, old, new) tuples where old and new are lists of lines.
	The tag can either start with "error" or be a tag from difflib: "equal",
	"insert", "delete", "replace". After "error" is yielded, no more
	tuples are yielded. It is possible that consecutive "equal" tuples
	are yielded.
	"""
	if not chunks:
	# The patch is a no-op
	yield ("equal", old_lines, old_lines)
	return

	old_pos = 0
	for (old_i, old_j), (new_i, new_j), old_chunk, new_chunk in chunks:
	eq = old_lines[old_pos:old_i]
	if eq:
	yield "equal", eq, eq
	old_pos = old_i
	# Check that the patch matches the target file
	if old_lines[old_i:old_j] != old_chunk:
	logging.error("mismatch:%s.%s.", old_lines[old_i:old_j], old_chunk)
	yield ("error: old chunk mismatch", old_lines[old_i:old_j], old_chunk)
	return
	# TODO(guido): ParsePatch knows the diff details, but throws the info away
	sm = difflib.SequenceMatcher(None, old_chunk, new_chunk)
	for tag, i1, i2, j1, j2 in sm.get_opcodes():
	yield tag, old_chunk[i1:i2], new_chunk[j1:j2]
	old_pos = old_j

	# Copy the final matching chunk if any.
	eq = old_lines[old_pos:]
	if eq:
	yield ("equal", eq, eq)


	_NO_NEWLINE_MESSAGE = "\\ No newline at end of file"


	def ParsePatchToChunks(lines, name="<patch>"):
	"""Parses a patch from a list of lines.

	Return a list of chunks, where each chunk is a tuple:

	old_range, new_range, old_lines, new_lines

	Returns a list of chunks (possibly empty); or None if there's a problem.
	"""
	lineno = 0
	raw_chunk = []
	chunks = []
	old_range = new_range = None
	old_last = new_last = 0
	in_prelude = True
	for line in lines:
	lineno += 1
	if in_prelude:
	# Skip leading lines until after we've seen one starting with '+++'
	if line.startswith("+++"):
	in_prelude = False
	continue
	match = _CHUNK_RE.match(line)
	if match:
	if raw_chunk:
	# Process the lines in the previous chunk
	old_chunk = []
	new_chunk = []
	for tag, rest in raw_chunk:
	if tag in (" ", "-"):
	old_chunk.append(rest)
	if tag in (" ", "+"):
	new_chunk.append(rest)
	# Check consistency
	old_i, old_j = old_range
	new_i, new_j = new_range
	if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i:
	logging.warn("%s:%s: previous chunk has incorrect length",
	name, lineno)
	return None
	chunks.append((old_range, new_range, old_chunk, new_chunk))
	raw_chunk = []
	# Parse the @@ header
	old_ln, old_n, new_ln, new_n = match.groups()
	old_ln, old_n, new_ln, new_n = map(long,
	(old_ln, old_n or 1,
	new_ln, new_n or 1))
	# Convert the numbers to list indices we can use
	if old_n == 0:
	old_i = old_ln
	else:
	old_i = old_ln - 1
	old_j = old_i + old_n
	old_range = old_i, old_j
	if new_n == 0:
	new_i = new_ln
	else:
	new_i = new_ln - 1
	new_j =new_i + new_n
	new_range = new_i, new_j
	# Check header consistency with previous header
	if old_i < old_last or new_i < new_last:
	logging.warn("%s:%s: chunk header out of order: %r",
	name, lineno, line)
	return None
	if old_i - old_last != new_i - new_last:
	logging.warn("%s:%s: inconsistent chunk header: %r",
	name, lineno, line)
	return None
	old_last = old_j
	new_last = new_j
	else:
	tag, rest = line[0], line[1:]
	if tag in (" ", "-", "+"):
	raw_chunk.append((tag, rest))
	elif line.startswith(_NO_NEWLINE_MESSAGE):
	# TODO(guido): need to check that no more lines follow for this file
	if raw_chunk:
	last_tag, last_rest = raw_chunk[-1]
	if last_rest.endswith("\n"):
	raw_chunk[-1] = (last_tag, last_rest[:-1])
	else:
	# Only log if it's a non-blank line. Blank lines we see a lot.
	if line and line.strip():
	logging.warn("%s:%d: indecypherable input: %r", name, lineno, line)
	if chunks or raw_chunk:
	break # Trailing garbage isn't so bad
	return None
	if raw_chunk:
	# Process the lines in the last chunk
	old_chunk = []
	new_chunk = []
	for tag, rest in raw_chunk:
	if tag in (" ", "-"):
	old_chunk.append(rest)
	if tag in (" ", "+"):
	new_chunk.append(rest)
	# Check consistency
	old_i, old_j = old_range
	new_i, new_j = new_range
	if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i:
	print >>sys.stderr, ("%s:%s: last chunk has incorrect length" %
	(name, lineno))
	return None
	chunks.append((old_range, new_range, old_chunk, new_chunk))
	raw_chunk = []
	return chunks


	# TODO: can we share some of this code with ParsePatchToChunks?
	def ParsePatchToLines(lines):
	"""Parses a patch from a list of lines.

	Returns None on error, otherwise a list of 3-tuples:
	(old_line_no, new_line_no, line)

	A line number can be 0 if it doesn't exist in the old/new file.
	"""
	result = []
	in_prelude = True
	for line in lines:
	if in_prelude:
	result.append((0, 0, line))
	# Skip leading lines until after we've seen one starting with '+++'
	if line.startswith("+++"):
	in_prelude = False
	elif line.startswith("@"):
	result.append((0, 0, line))
	match = _CHUNK_RE.match(line)
	if not match:
	logging.warn("ParsePatchToLines match failed on %s", line)
	return None
	old_ln = int(match.groups()[0])
	new_ln = int(match.groups()[2])
	else:
	if line[0] == "-":
	result.append((old_ln, 0, line))
	old_ln += 1
	elif line[0] == "+":
	result.append((0, new_ln, line))
	new_ln += 1
	elif line[0] == " ":
	result.append((old_ln, new_ln, line))
	old_ln += 1
	new_ln += 1
	elif line.startswith(_NO_NEWLINE_MESSAGE):
	continue
	else: # Something else, could be property changes etc.
	result.append((0, 0, line))
	return result