| # Copyright 2008 Google Inc. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Utility to read and apply a unified diff without forking patch(1). |
| |
| For a discussion of the unified diff format, see my blog on Artima: |
| http://www.artima.com/weblogs/viewpost.jsp?thread=164293 |
| """ |
| |
| import difflib |
| import logging |
| import re |
| import sys |
| |
| |
| _CHUNK_RE = re.compile(r""" |
| @@ |
| \s+ |
| - |
| (?: (\d+) (?: , (\d+) )?) |
| \s+ |
| \+ |
| (?: (\d+) (?: , (\d+) )?) |
| \s+ |
| @@ |
| """, re.VERBOSE) |
| _MULTI_WAY_CHUNK_RE = re.compile(r""" |
| @@@@* |
| \s+ |
| - |
| (?: (\d+) (?: , (\d+) )?) |
| \s+ |
| [^@]* |
| \s+ |
| \+ |
| (?: (\d+) (?: , (\d+) )?) |
| \s+ |
| @@@@* |
| """, re.VERBOSE) |
| |
| |
| def PatchLines(old_lines, patch_lines, name="<patch>"): |
| """Patches the old_lines with patches read from patch_lines. |
| |
| This only reads unified diffs. The header lines are ignored. |
| Yields (tag, old, new) tuples where old and new are lists of lines. |
| The tag can either start with "error" or be a tag from difflib: "equal", |
| "insert", "delete", "replace". After "error" is yielded, no more |
| tuples are yielded. It is possible that consecutive "equal" tuples |
| are yielded. |
| """ |
| chunks = ParsePatchToChunks(patch_lines, name) |
| if chunks is None: |
| return iter([("error: ParsePatchToChunks failed", [], [])]) |
| return PatchChunks(old_lines, chunks) |
| |
| |
| def PatchChunks(old_lines, chunks): |
| """Patche old_lines with chunks. |
| |
| Yields (tag, old, new) tuples where old and new are lists of lines. |
| The tag can either start with "error" or be a tag from difflib: "equal", |
| "insert", "delete", "replace". After "error" is yielded, no more |
| tuples are yielded. It is possible that consecutive "equal" tuples |
| are yielded. |
| """ |
| if not chunks: |
| # The patch is a no-op |
| yield ("equal", old_lines, old_lines) |
| return |
| |
| old_pos = 0 |
| for (old_i, old_j), (new_i, new_j), old_chunk, new_chunk in chunks: |
| eq = old_lines[old_pos:old_i] |
| if eq: |
| yield "equal", eq, eq |
| old_pos = old_i |
| # Check that the patch matches the target file |
| if old_lines[old_i:old_j] != old_chunk: |
| logging.error("mismatch:%s.%s.", old_lines[old_i:old_j], old_chunk) |
| yield ("error: old chunk mismatch", old_lines[old_i:old_j], old_chunk) |
| return |
| # TODO(guido): ParsePatch knows the diff details, but throws the info away |
| sm = difflib.SequenceMatcher(None, old_chunk, new_chunk) |
| for tag, i1, i2, j1, j2 in sm.get_opcodes(): |
| yield tag, old_chunk[i1:i2], new_chunk[j1:j2] |
| old_pos = old_j |
| |
| # Copy the final matching chunk if any. |
| eq = old_lines[old_pos:] |
| if eq: |
| yield ("equal", eq, eq) |
| |
| |
| _NO_NEWLINE_MESSAGE = "\\ No newline at end of file" |
| |
| |
| def ParsePatchToChunks(lines, name="<patch>"): |
| """Parses a patch from a list of lines. |
| |
| Return a list of chunks, where each chunk is a tuple: |
| |
| old_range, new_range, old_lines, new_lines |
| |
| Returns a list of chunks (possibly empty); or None if there's a problem. |
| """ |
| lineno = 0 |
| raw_chunk = [] |
| chunks = [] |
| old_range = new_range = None |
| old_last = new_last = 0 |
| in_prelude = True |
| for line in lines: |
| lineno += 1 |
| if in_prelude: |
| # Skip leading lines until after we've seen one starting with '+++' |
| if line.startswith("+++"): |
| in_prelude = False |
| continue |
| match = _CHUNK_RE.match(line) |
| if match: |
| if raw_chunk: |
| # Process the lines in the previous chunk |
| old_chunk = [] |
| new_chunk = [] |
| for tag, rest in raw_chunk: |
| if tag in (" ", "-"): |
| old_chunk.append(rest) |
| if tag in (" ", "+"): |
| new_chunk.append(rest) |
| # Check consistency |
| old_i, old_j = old_range |
| new_i, new_j = new_range |
| if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i: |
| logging.warn("%s:%s: previous chunk has incorrect length", |
| name, lineno) |
| return None |
| chunks.append((old_range, new_range, old_chunk, new_chunk)) |
| raw_chunk = [] |
| # Parse the @@ header |
| old_ln, old_n, new_ln, new_n = match.groups() |
| old_ln, old_n, new_ln, new_n = map(long, |
| (old_ln, old_n or 1, |
| new_ln, new_n or 1)) |
| # Convert the numbers to list indices we can use |
| if old_n == 0: |
| old_i = old_ln |
| else: |
| old_i = old_ln - 1 |
| old_j = old_i + old_n |
| old_range = old_i, old_j |
| if new_n == 0: |
| new_i = new_ln |
| else: |
| new_i = new_ln - 1 |
| new_j =new_i + new_n |
| new_range = new_i, new_j |
| # Check header consistency with previous header |
| if old_i < old_last or new_i < new_last: |
| logging.warn("%s:%s: chunk header out of order: %r", |
| name, lineno, line) |
| return None |
| if old_i - old_last != new_i - new_last: |
| logging.warn("%s:%s: inconsistent chunk header: %r", |
| name, lineno, line) |
| return None |
| old_last = old_j |
| new_last = new_j |
| else: |
| tag, rest = line[0], line[1:] |
| if tag in (" ", "-", "+"): |
| raw_chunk.append((tag, rest)) |
| elif line.startswith(_NO_NEWLINE_MESSAGE): |
| # TODO(guido): need to check that no more lines follow for this file |
| if raw_chunk: |
| last_tag, last_rest = raw_chunk[-1] |
| if last_rest.endswith("\n"): |
| raw_chunk[-1] = (last_tag, last_rest[:-1]) |
| else: |
| # Only log if it's a non-blank line. Blank lines we see a lot. |
| if line and line.strip(): |
| logging.warn("%s:%d: indecypherable input: %r", name, lineno, line) |
| if chunks or raw_chunk: |
| break # Trailing garbage isn't so bad |
| return None |
| if raw_chunk: |
| # Process the lines in the last chunk |
| old_chunk = [] |
| new_chunk = [] |
| for tag, rest in raw_chunk: |
| if tag in (" ", "-"): |
| old_chunk.append(rest) |
| if tag in (" ", "+"): |
| new_chunk.append(rest) |
| # Check consistency |
| old_i, old_j = old_range |
| new_i, new_j = new_range |
| if len(old_chunk) != old_j - old_i or len(new_chunk) != new_j - new_i: |
| print >>sys.stderr, ("%s:%s: last chunk has incorrect length" % |
| (name, lineno)) |
| return None |
| chunks.append((old_range, new_range, old_chunk, new_chunk)) |
| raw_chunk = [] |
| return chunks |
| |
| |
| # TODO: can we share some of this code with ParsePatchToChunks? |
| def ParsePatchToLines(lines, multi_way_diff=False): |
| """Parses a patch from a list of lines. |
| |
| Returns None on error, otherwise a list of 3-tuples: |
| (old_line_no, new_line_no, line) |
| |
| A line number can be 0 if it doesn't exist in the old/new file. |
| """ |
| if multi_way_diff: |
| hdr_pat = _MULTI_WAY_CHUNK_RE |
| else: |
| hdr_pat = _CHUNK_RE |
| |
| result = [] |
| in_prelude = True |
| for line in lines: |
| if in_prelude: |
| result.append((0, 0, line)) |
| # Skip leading lines until after we've seen one starting with '+++' |
| if line.startswith("+++"): |
| in_prelude = False |
| elif line.startswith("@"): |
| result.append((0, 0, line)) |
| match = hdr_pat.match(line) |
| if not match: |
| logging.warn("ParsePatchToLines match failed on %s", line) |
| return None |
| old_ln = int(match.groups()[0]) |
| new_ln = int(match.groups()[2]) |
| else: |
| if line[0] == "-": |
| result.append((old_ln, 0, line)) |
| old_ln += 1 |
| elif line[0] == "+": |
| result.append((0, new_ln, line)) |
| new_ln += 1 |
| elif line[0] == " ": |
| result.append((old_ln, new_ln, line)) |
| old_ln += 1 |
| new_ln += 1 |
| elif line.startswith(_NO_NEWLINE_MESSAGE): |
| continue |
| else: # Something else, could be property changes etc. |
| result.append((0, 0, line)) |
| return result |