blob: 7175cd8295b4782ce2cd895fb01c02d809760cff [file] [log] [blame]
# Copyright 2008 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility to read and apply a unified diff without forking patch(1).
For a discussion of the unified diff format, see my blog on Artima:
http://www.artima.com/weblogs/viewpost.jsp?thread=164293
"""
import difflib
import logging
import re
import sys
_CHUNK_RE = re.compile(r"""
@@
\s+
-
(?: (\d+) (?: , (\d+) )?)
\s+
\+
(?: (\d+) (?: , (\d+) )?)
\s+
@@
""", re.VERBOSE)
def PatchLines(old_lines, patch_lines, name="<patch>"):
"""Patches the old_lines with patches read from patch_lines.
This only reads unified diffs. The header lines are ignored.
Yields (tag, old, new) tuples where old and new are lists of lines.
The tag can either start with "error" or be a tag from difflib: "equal",
"insert", "delete", "replace". After "error" is yielded, no more
tuples are yielded. It is possible that consecutive "equal" tuples
are yielded.
"""
chunks = ParsePatchToChunks(patch_lines, name)
if chunks is None:
return iter([("error: ParsePatchToChunks failed", [], [])])
return PatchChunks(old_lines, chunks)
def PatchChunks(old_lines, chunks):
"""Patche old_lines with chunks.
Yields (tag, old, new) tuples where old and new are lists of lines.
The tag can either start with "error" or be a tag from difflib: "equal",
"insert", "delete", "replace". After "error" is yielded, no more
tuples are yielded. It is possible that consecutive "equal" tuples
are yielded.
"""
if not chunks:
# The patch is a no-op
yield ("equal", old_lines, old_lines)
return
old_pos = 0
for (old_i, old_j), (new_i, new_j), old_chunk, new_chunk in chunks:
eq = old_lines[old_pos:old_i]
if eq:
yield "equal", eq, eq
old_pos = old_i
# Check that the patch matches the target file
if old_lines[old_i:old_j] != old_chunk:
logging.error("mismatch:%s.%s.", old_lines[old_i:old_j], old_chunk)
yield ("error: old chunk mismatch", old_lines[old_i:old_j], old_chunk)
return
# TODO(guido): ParsePatch knows the diff details, but throws the info away
sm = difflib.SequenceMatcher(None, old_chunk, new_chunk)
for tag, i1, i2, j1, j2 in sm.get_opcodes():
yield tag, old_chunk[i1:i2], new_chunk[j1:j2]
old_pos = old_j
# Copy the final matching chunk if any.
eq = old_lines[old_pos:]
if eq:
yield ("equal", eq, eq)
_NO_NEWLINE_MESSAGE = "\\ No newline at end of file"
def ParsePatchToChunks(lines, name="<patch>"):
"""Parses a patch from a list of lines.
Return a list of chunks, where each chunk is a tuple:
old_range, new_range, old_lines, new_lines
Returns a list of chunks (possibly empty); or None if there's a problem.
"""
lineno = 0
raw_chunk = []
chunks = []
old_range = new_range = None
old_last = new_last = 0
in_prelude = True
for line in lines:
lineno += 1
if in_prelude:
# Skip leading lines until after we've seen one starting with '+++'
if line.startswith("+++"):
in_prelude = False
continue
match = _CHUNK_RE.match(line)
if match:
if raw_chunk:
# Process the lines in the previous chunk
old_chunk = []
new_chunk = []
for tag, rest in raw_chunk:
if tag in (" ", "-"):
old_chunk.append(rest)
if tag in (" ", "+"):
new_chunk.append(rest)
# Check consistency
old_i, old_j = old_range
new_i, new_j = new_range
if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i:
logging.warn("%s:%s: previous chunk has incorrect length",
name, lineno)
return None
chunks.append((old_range, new_range, old_chunk, new_chunk))
raw_chunk = []
# Parse the @@ header
old_ln, old_n, new_ln, new_n = match.groups()
old_ln, old_n, new_ln, new_n = map(long,
(old_ln, old_n or 1,
new_ln, new_n or 1))
# Convert the numbers to list indices we can use
if old_n == 0:
old_i = old_ln
else:
old_i = old_ln - 1
old_j = old_i + old_n
old_range = old_i, old_j
if new_n == 0:
new_i = new_ln
else:
new_i = new_ln - 1
new_j =new_i + new_n
new_range = new_i, new_j
# Check header consistency with previous header
if old_i < old_last or new_i < new_last:
logging.warn("%s:%s: chunk header out of order: %r",
name, lineno, line)
return None
if old_i - old_last != new_i - new_last:
logging.warn("%s:%s: inconsistent chunk header: %r",
name, lineno, line)
return None
old_last = old_j
new_last = new_j
else:
tag, rest = line[0], line[1:]
if tag in (" ", "-", "+"):
raw_chunk.append((tag, rest))
elif line.startswith(_NO_NEWLINE_MESSAGE):
# TODO(guido): need to check that no more lines follow for this file
if raw_chunk:
last_tag, last_rest = raw_chunk[-1]
if last_rest.endswith("\n"):
raw_chunk[-1] = (last_tag, last_rest[:-1])
else:
# Only log if it's a non-blank line. Blank lines we see a lot.
if line and line.strip():
logging.warn("%s:%d: indecypherable input: %r", name, lineno, line)
if chunks or raw_chunk:
break # Trailing garbage isn't so bad
return None
if raw_chunk:
# Process the lines in the last chunk
old_chunk = []
new_chunk = []
for tag, rest in raw_chunk:
if tag in (" ", "-"):
old_chunk.append(rest)
if tag in (" ", "+"):
new_chunk.append(rest)
# Check consistency
old_i, old_j = old_range
new_i, new_j = new_range
if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i:
print >>sys.stderr, ("%s:%s: last chunk has incorrect length" %
(name, lineno))
return None
chunks.append((old_range, new_range, old_chunk, new_chunk))
raw_chunk = []
return chunks
# TODO: can we share some of this code with ParsePatchToChunks?
def ParsePatchToLines(lines):
"""Parses a patch from a list of lines.
Returns None on error, otherwise a list of 3-tuples:
(old_line_no, new_line_no, line)
A line number can be 0 if it doesn't exist in the old/new file.
"""
result = []
in_prelude = True
for line in lines:
if in_prelude:
result.append((0, 0, line))
# Skip leading lines until after we've seen one starting with '+++'
if line.startswith("+++"):
in_prelude = False
elif line.startswith("@"):
result.append((0, 0, line))
match = _CHUNK_RE.match(line)
if not match:
logging.warn("ParsePatchToLines match failed on %s", line)
return None
old_ln = int(match.groups()[0])
new_ln = int(match.groups()[2])
else:
if line[0] == "-":
result.append((old_ln, 0, line))
old_ln += 1
elif line[0] == "+":
result.append((0, new_ln, line))
new_ln += 1
elif line[0] == " ":
result.append((old_ln, new_ln, line))
old_ln += 1
new_ln += 1
elif line.startswith(_NO_NEWLINE_MESSAGE):
continue
else: # Something else, could be property changes etc.
result.append((0, 0, line))
return result