| /* |
| * Copyright (C) 2008-2009, Google Inc. |
| * and other copyright owners as documented in the project's IP log. |
| * |
| * This program and the accompanying materials are made available |
| * under the terms of the Eclipse Distribution License v1.0 which |
| * accompanies this distribution, is reproduced below, and is |
| * available at http://www.eclipse.org/org/documents/edl-v10.php |
| * |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or |
| * without modification, are permitted provided that the following |
| * conditions are met: |
| * |
| * - Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer in the documentation and/or other materials provided |
| * with the distribution. |
| * |
| * - Neither the name of the Eclipse Foundation, Inc. nor the |
| * names of its contributors may be used to endorse or promote |
| * products derived from this software without specific prior |
| * written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
| * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
| * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
| * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| package org.eclipse.jgit.patch; |
| |
| import static org.eclipse.jgit.lib.Constants.encodeASCII; |
| import static org.eclipse.jgit.util.RawParseUtils.decode; |
| import static org.eclipse.jgit.util.RawParseUtils.decodeNoFallback; |
| import static org.eclipse.jgit.util.RawParseUtils.extractBinaryString; |
| import static org.eclipse.jgit.util.RawParseUtils.match; |
| import static org.eclipse.jgit.util.RawParseUtils.nextLF; |
| import static org.eclipse.jgit.util.RawParseUtils.parseBase10; |
| |
| import java.io.IOException; |
| import java.nio.charset.CharacterCodingException; |
| import java.nio.charset.Charset; |
| import java.text.MessageFormat; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.List; |
| |
| import org.eclipse.jgit.diff.DiffEntry; |
| import org.eclipse.jgit.diff.EditList; |
| import org.eclipse.jgit.internal.JGitText; |
| import org.eclipse.jgit.lib.AbbreviatedObjectId; |
| import org.eclipse.jgit.lib.Constants; |
| import org.eclipse.jgit.lib.FileMode; |
| import org.eclipse.jgit.util.QuotedString; |
| import org.eclipse.jgit.util.RawParseUtils; |
| import org.eclipse.jgit.util.TemporaryBuffer; |
| |
| /** |
| * Patch header describing an action for a single file path. |
| */ |
| public class FileHeader extends DiffEntry { |
| private static final byte[] OLD_MODE = encodeASCII("old mode "); //$NON-NLS-1$ |
| |
| private static final byte[] NEW_MODE = encodeASCII("new mode "); //$NON-NLS-1$ |
| |
| static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode "); //$NON-NLS-1$ |
| |
| static final byte[] NEW_FILE_MODE = encodeASCII("new file mode "); //$NON-NLS-1$ |
| |
| private static final byte[] COPY_FROM = encodeASCII("copy from "); //$NON-NLS-1$ |
| |
| private static final byte[] COPY_TO = encodeASCII("copy to "); //$NON-NLS-1$ |
| |
| private static final byte[] RENAME_OLD = encodeASCII("rename old "); //$NON-NLS-1$ |
| |
| private static final byte[] RENAME_NEW = encodeASCII("rename new "); //$NON-NLS-1$ |
| |
| private static final byte[] RENAME_FROM = encodeASCII("rename from "); //$NON-NLS-1$ |
| |
| private static final byte[] RENAME_TO = encodeASCII("rename to "); //$NON-NLS-1$ |
| |
| private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index "); //$NON-NLS-1$ |
| |
| private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index "); //$NON-NLS-1$ |
| |
| static final byte[] INDEX = encodeASCII("index "); //$NON-NLS-1$ |
| |
| static final byte[] OLD_NAME = encodeASCII("--- "); //$NON-NLS-1$ |
| |
| static final byte[] NEW_NAME = encodeASCII("+++ "); //$NON-NLS-1$ |
| |
| /** Type of patch used by this file. */ |
| public static enum PatchType { |
| /** A traditional unified diff style patch of a text file. */ |
| UNIFIED, |
| |
| /** An empty patch with a message "Binary files ... differ" */ |
| BINARY, |
| |
| /** A Git binary patch, holding pre and post image deltas */ |
| GIT_BINARY; |
| } |
| |
| /** Buffer holding the patch data for this file. */ |
| final byte[] buf; |
| |
| /** Offset within {@link #buf} to the "diff ..." line. */ |
| final int startOffset; |
| |
| /** Position 1 past the end of this file within {@link #buf}. */ |
| int endOffset; |
| |
| /** Type of patch used to modify this file */ |
| PatchType patchType; |
| |
| /** The hunks of this file */ |
| private List<HunkHeader> hunks; |
| |
| /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */ |
| BinaryHunk forwardBinaryHunk; |
| |
| /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */ |
| BinaryHunk reverseBinaryHunk; |
| |
| /** |
| * Constructs a new FileHeader |
| * |
| * @param headerLines |
| * buffer holding the diff header for this file |
| * @param edits |
| * the edits for this file |
| * @param type |
| * the type of patch used to modify this file |
| */ |
| public FileHeader(byte[] headerLines, EditList edits, PatchType type) { |
| this(headerLines, 0); |
| endOffset = headerLines.length; |
| int ptr = parseGitFileName(Patch.DIFF_GIT.length, headerLines.length); |
| parseGitHeaders(ptr, headerLines.length); |
| this.patchType = type; |
| addHunk(new HunkHeader(this, edits)); |
| } |
| |
| FileHeader(byte[] b, int offset) { |
| buf = b; |
| startOffset = offset; |
| changeType = ChangeType.MODIFY; // unless otherwise designated |
| patchType = PatchType.UNIFIED; |
| } |
| |
| int getParentCount() { |
| return 1; |
| } |
| |
| /** |
| * Get the byte array holding this file's patch script. |
| * |
| * @return the byte array holding this file's patch script. |
| */ |
| public byte[] getBuffer() { |
| return buf; |
| } |
| |
| /** |
| * Get offset of the start of this file's script in {@link #getBuffer()}. |
| * |
| * @return offset of the start of this file's script in |
| * {@link #getBuffer()}. |
| */ |
| public int getStartOffset() { |
| return startOffset; |
| } |
| |
| /** |
| * Get offset one past the end of the file script. |
| * |
| * @return offset one past the end of the file script. |
| */ |
| public int getEndOffset() { |
| return endOffset; |
| } |
| |
| /** |
| * Convert the patch script for this file into a string. |
| * <p> |
| * The default character encoding |
| * ({@link org.eclipse.jgit.lib.Constants#CHARSET}) is assumed for both the |
| * old and new files. |
| * |
| * @return the patch script, as a Unicode string. |
| */ |
| public String getScriptText() { |
| return getScriptText(null, null); |
| } |
| |
| /** |
| * Convert the patch script for this file into a string. |
| * |
| * @param oldCharset |
| * hint character set to decode the old lines with. |
| * @param newCharset |
| * hint character set to decode the new lines with. |
| * @return the patch script, as a Unicode string. |
| */ |
| public String getScriptText(Charset oldCharset, Charset newCharset) { |
| return getScriptText(new Charset[] { oldCharset, newCharset }); |
| } |
| |
| String getScriptText(Charset[] charsetGuess) { |
| if (getHunks().isEmpty()) { |
| // If we have no hunks then we can safely assume the entire |
| // patch is a binary style patch, or a meta-data only style |
| // patch. Either way the encoding of the headers should be |
| // strictly 7-bit US-ASCII and the body is either 7-bit ASCII |
| // (due to the base 85 encoding used for a BinaryHunk) or is |
| // arbitrary noise we have chosen to ignore and not understand |
| // (e.g. the message "Binary files ... differ"). |
| // |
| return extractBinaryString(buf, startOffset, endOffset); |
| } |
| |
| if (charsetGuess != null && charsetGuess.length != getParentCount() + 1) |
| throw new IllegalArgumentException(MessageFormat.format( |
| JGitText.get().expectedCharacterEncodingGuesses, |
| Integer.valueOf(getParentCount() + 1))); |
| |
| if (trySimpleConversion(charsetGuess)) { |
| Charset cs = charsetGuess != null ? charsetGuess[0] : null; |
| if (cs == null) |
| cs = Constants.CHARSET; |
| try { |
| return decodeNoFallback(cs, buf, startOffset, endOffset); |
| } catch (CharacterCodingException cee) { |
| // Try the much slower, more-memory intensive version which |
| // can handle a character set conversion patch. |
| } |
| } |
| |
| final StringBuilder r = new StringBuilder(endOffset - startOffset); |
| |
| // Always treat the headers as US-ASCII; Git file names are encoded |
| // in a C style escape if any character has the high-bit set. |
| // |
| final int hdrEnd = getHunks().get(0).getStartOffset(); |
| for (int ptr = startOffset; ptr < hdrEnd;) { |
| final int eol = Math.min(hdrEnd, nextLF(buf, ptr)); |
| r.append(extractBinaryString(buf, ptr, eol)); |
| ptr = eol; |
| } |
| |
| final String[] files = extractFileLines(charsetGuess); |
| final int[] offsets = new int[files.length]; |
| for (HunkHeader h : getHunks()) |
| h.extractFileLines(r, files, offsets); |
| return r.toString(); |
| } |
| |
| private static boolean trySimpleConversion(Charset[] charsetGuess) { |
| if (charsetGuess == null) |
| return true; |
| for (int i = 1; i < charsetGuess.length; i++) { |
| if (charsetGuess[i] != charsetGuess[0]) |
| return false; |
| } |
| return true; |
| } |
| |
| private String[] extractFileLines(Charset[] csGuess) { |
| final TemporaryBuffer[] tmp = new TemporaryBuffer[getParentCount() + 1]; |
| try { |
| for (int i = 0; i < tmp.length; i++) |
| tmp[i] = new TemporaryBuffer.Heap(Integer.MAX_VALUE); |
| for (HunkHeader h : getHunks()) |
| h.extractFileLines(tmp); |
| |
| final String[] r = new String[tmp.length]; |
| for (int i = 0; i < tmp.length; i++) { |
| Charset cs = csGuess != null ? csGuess[i] : null; |
| if (cs == null) |
| cs = Constants.CHARSET; |
| r[i] = RawParseUtils.decode(cs, tmp[i].toByteArray()); |
| } |
| return r; |
| } catch (IOException ioe) { |
| throw new RuntimeException(JGitText.get().cannotConvertScriptToText, ioe); |
| } |
| } |
| |
| /** |
| * Get style of patch used to modify this file. |
| * |
| * @return style of patch used to modify this file. |
| */ |
| public PatchType getPatchType() { |
| return patchType; |
| } |
| |
| /** |
| * Whether this patch modifies metadata about a file |
| * |
| * @return {@code true} if this patch modifies metadata about a file . |
| */ |
| public boolean hasMetaDataChanges() { |
| return changeType != ChangeType.MODIFY || newMode != oldMode; |
| } |
| |
| /** |
| * Get hunks altering this file; in order of appearance in patch |
| * |
| * @return hunks altering this file; in order of appearance in patch. |
| */ |
| public List<? extends HunkHeader> getHunks() { |
| if (hunks == null) |
| return Collections.emptyList(); |
| return hunks; |
| } |
| |
| void addHunk(HunkHeader h) { |
| if (h.getFileHeader() != this) |
| throw new IllegalArgumentException(JGitText.get().hunkBelongsToAnotherFile); |
| if (hunks == null) |
| hunks = new ArrayList<>(); |
| hunks.add(h); |
| } |
| |
| HunkHeader newHunkHeader(int offset) { |
| return new HunkHeader(this, offset); |
| } |
| |
| /** |
| * Get the new-image delta/literal if this is a |
| * {@link PatchType#GIT_BINARY}. |
| * |
| * @return the new-image delta/literal if this is a |
| * {@link PatchType#GIT_BINARY}. |
| */ |
| public BinaryHunk getForwardBinaryHunk() { |
| return forwardBinaryHunk; |
| } |
| |
| /** |
| * Get the old-image delta/literal if this is a |
| * {@link PatchType#GIT_BINARY}. |
| * |
| * @return the old-image delta/literal if this is a |
| * {@link PatchType#GIT_BINARY}. |
| */ |
| public BinaryHunk getReverseBinaryHunk() { |
| return reverseBinaryHunk; |
| } |
| |
| /** |
| * Convert to a list describing the content edits performed on this file. |
| * |
| * @return a list describing the content edits performed on this file. |
| */ |
| public EditList toEditList() { |
| final EditList r = new EditList(); |
| for (HunkHeader hunk : hunks) |
| r.addAll(hunk.toEditList()); |
| return r; |
| } |
| |
| /** |
| * Parse a "diff --git" or "diff --cc" line. |
| * |
| * @param ptr |
| * first character after the "diff --git " or "diff --cc " part. |
| * @param end |
| * one past the last position to parse. |
| * @return first character after the LF at the end of the line; -1 on error. |
| */ |
| int parseGitFileName(int ptr, int end) { |
| final int eol = nextLF(buf, ptr); |
| final int bol = ptr; |
| if (eol >= end) { |
| return -1; |
| } |
| |
| // buffer[ptr..eol] looks like "a/foo b/foo\n". After the first |
| // A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There |
| // is only one way to split the line such that text to the left |
| // of the space matches the text to the right, excluding the part |
| // before the first slash. |
| // |
| |
| final int aStart = nextLF(buf, ptr, '/'); |
| if (aStart >= eol) |
| return eol; |
| |
| while (ptr < eol) { |
| final int sp = nextLF(buf, ptr, ' '); |
| if (sp >= eol) { |
| // We can't split the header, it isn't valid. |
| // This may be OK if this is a rename patch. |
| // |
| return eol; |
| } |
| final int bStart = nextLF(buf, sp, '/'); |
| if (bStart >= eol) |
| return eol; |
| |
| // If buffer[aStart..sp - 1] = buffer[bStart..eol - 1] |
| // we have a valid split. |
| // |
| if (eq(aStart, sp - 1, bStart, eol - 1)) { |
| if (buf[bol] == '"') { |
| // We're a double quoted name. The region better end |
| // in a double quote too, and we need to decode the |
| // characters before reading the name. |
| // |
| if (buf[sp - 2] != '"') { |
| return eol; |
| } |
| oldPath = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1); |
| oldPath = p1(oldPath); |
| } else { |
| oldPath = decode(Constants.CHARSET, buf, aStart, sp - 1); |
| } |
| newPath = oldPath; |
| return eol; |
| } |
| |
| // This split wasn't correct. Move past the space and try |
| // another split as the space must be part of the file name. |
| // |
| ptr = sp; |
| } |
| |
| return eol; |
| } |
| |
| int parseGitHeaders(int ptr, int end) { |
| while (ptr < end) { |
| final int eol = nextLF(buf, ptr); |
| if (isHunkHdr(buf, ptr, eol) >= 1) { |
| // First hunk header; break out and parse them later. |
| break; |
| |
| } else if (match(buf, ptr, OLD_NAME) >= 0) { |
| parseOldName(ptr, eol); |
| |
| } else if (match(buf, ptr, NEW_NAME) >= 0) { |
| parseNewName(ptr, eol); |
| |
| } else if (match(buf, ptr, OLD_MODE) >= 0) { |
| oldMode = parseFileMode(ptr + OLD_MODE.length, eol); |
| |
| } else if (match(buf, ptr, NEW_MODE) >= 0) { |
| newMode = parseFileMode(ptr + NEW_MODE.length, eol); |
| |
| } else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) { |
| oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol); |
| newMode = FileMode.MISSING; |
| changeType = ChangeType.DELETE; |
| |
| } else if (match(buf, ptr, NEW_FILE_MODE) >= 0) { |
| parseNewFileMode(ptr, eol); |
| |
| } else if (match(buf, ptr, COPY_FROM) >= 0) { |
| oldPath = parseName(oldPath, ptr + COPY_FROM.length, eol); |
| changeType = ChangeType.COPY; |
| |
| } else if (match(buf, ptr, COPY_TO) >= 0) { |
| newPath = parseName(newPath, ptr + COPY_TO.length, eol); |
| changeType = ChangeType.COPY; |
| |
| } else if (match(buf, ptr, RENAME_OLD) >= 0) { |
| oldPath = parseName(oldPath, ptr + RENAME_OLD.length, eol); |
| changeType = ChangeType.RENAME; |
| |
| } else if (match(buf, ptr, RENAME_NEW) >= 0) { |
| newPath = parseName(newPath, ptr + RENAME_NEW.length, eol); |
| changeType = ChangeType.RENAME; |
| |
| } else if (match(buf, ptr, RENAME_FROM) >= 0) { |
| oldPath = parseName(oldPath, ptr + RENAME_FROM.length, eol); |
| changeType = ChangeType.RENAME; |
| |
| } else if (match(buf, ptr, RENAME_TO) >= 0) { |
| newPath = parseName(newPath, ptr + RENAME_TO.length, eol); |
| changeType = ChangeType.RENAME; |
| |
| } else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) { |
| score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null); |
| |
| } else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) { |
| score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null); |
| |
| } else if (match(buf, ptr, INDEX) >= 0) { |
| parseIndexLine(ptr + INDEX.length, eol); |
| |
| } else { |
| // Probably an empty patch (stat dirty). |
| break; |
| } |
| |
| ptr = eol; |
| } |
| return ptr; |
| } |
| |
| void parseOldName(int ptr, int eol) { |
| oldPath = p1(parseName(oldPath, ptr + OLD_NAME.length, eol)); |
| if (oldPath == DEV_NULL) |
| changeType = ChangeType.ADD; |
| } |
| |
| void parseNewName(int ptr, int eol) { |
| newPath = p1(parseName(newPath, ptr + NEW_NAME.length, eol)); |
| if (newPath == DEV_NULL) |
| changeType = ChangeType.DELETE; |
| } |
| |
| void parseNewFileMode(int ptr, int eol) { |
| oldMode = FileMode.MISSING; |
| newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol); |
| changeType = ChangeType.ADD; |
| } |
| |
| int parseTraditionalHeaders(int ptr, int end) { |
| while (ptr < end) { |
| final int eol = nextLF(buf, ptr); |
| if (isHunkHdr(buf, ptr, eol) >= 1) { |
| // First hunk header; break out and parse them later. |
| break; |
| |
| } else if (match(buf, ptr, OLD_NAME) >= 0) { |
| parseOldName(ptr, eol); |
| |
| } else if (match(buf, ptr, NEW_NAME) >= 0) { |
| parseNewName(ptr, eol); |
| |
| } else { |
| // Possibly an empty patch. |
| break; |
| } |
| |
| ptr = eol; |
| } |
| return ptr; |
| } |
| |
| private String parseName(String expect, int ptr, int end) { |
| if (ptr == end) |
| return expect; |
| |
| String r; |
| if (buf[ptr] == '"') { |
| // New style GNU diff format |
| // |
| r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1); |
| } else { |
| // Older style GNU diff format, an optional tab ends the name. |
| // |
| int tab = end; |
| while (ptr < tab && buf[tab - 1] != '\t') |
| tab--; |
| if (ptr == tab) |
| tab = end; |
| r = decode(Constants.CHARSET, buf, ptr, tab - 1); |
| } |
| |
| if (r.equals(DEV_NULL)) |
| r = DEV_NULL; |
| return r; |
| } |
| |
| private static String p1(final String r) { |
| final int s = r.indexOf('/'); |
| return s > 0 ? r.substring(s + 1) : r; |
| } |
| |
| FileMode parseFileMode(int ptr, int end) { |
| int tmp = 0; |
| while (ptr < end - 1) { |
| tmp <<= 3; |
| tmp += buf[ptr++] - '0'; |
| } |
| return FileMode.fromBits(tmp); |
| } |
| |
| void parseIndexLine(int ptr, int end) { |
| // "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1 |
| // can be unique abbreviations |
| // |
| final int dot2 = nextLF(buf, ptr, '.'); |
| final int mode = nextLF(buf, dot2, ' '); |
| |
| oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1); |
| newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1); |
| |
| if (mode < end) |
| newMode = oldMode = parseFileMode(mode, end); |
| } |
| |
| private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) { |
| if (aEnd - aPtr != bEnd - bPtr) { |
| return false; |
| } |
| while (aPtr < aEnd) { |
| if (buf[aPtr++] != buf[bPtr++]) |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * Determine if this is a patch hunk header. |
| * |
| * @param buf |
| * the buffer to scan |
| * @param start |
| * first position in the buffer to evaluate |
| * @param end |
| * last position to consider; usually the end of the buffer ( |
| * <code>buf.length</code>) or the first position on the next |
| * line. This is only used to avoid very long runs of '@' from |
| * killing the scan loop. |
| * @return the number of "ancestor revisions" in the hunk header. A |
| * traditional two-way diff ("@@ -...") returns 1; a combined diff |
| * for a 3 way-merge returns 3. If this is not a hunk header, 0 is |
| * returned instead. |
| */ |
| static int isHunkHdr(byte[] buf, int start, int end) { |
| int ptr = start; |
| while (ptr < end && buf[ptr] == '@') |
| ptr++; |
| if (ptr - start < 2) |
| return 0; |
| if (ptr == end || buf[ptr++] != ' ') |
| return 0; |
| if (ptr == end || buf[ptr++] != '-') |
| return 0; |
| return (ptr - 3) - start; |
| } |
| } |