org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java - jgit - Git at Google

 /*
  * Copyright (C) 2008-2009, Google Inc.
  * and other copyright owners as documented in the project's IP log.
  *
  * This program and the accompanying materials are made available
  * under the terms of the Eclipse Distribution License v1.0 which
  * accompanies this distribution, is reproduced below, and is
  * available at http://www.eclipse.org/org/documents/edl-v10.php
  *
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or
  * without modification, are permitted provided that the following
  * conditions are met:
  *
  * - Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  *
  * - Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following
  *   disclaimer in the documentation and/or other materials provided
  *   with the distribution.
  *
  * - Neither the name of the Eclipse Foundation, Inc. nor the
  *   names of its contributors may be used to endorse or promote
  *   products derived from this software without specific prior
  *   written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 package org.eclipse.jgit.patch;

 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.eclipse.jgit.lib.Constants.encodeASCII;
 import static org.eclipse.jgit.util.RawParseUtils.decode;
 import static org.eclipse.jgit.util.RawParseUtils.decodeNoFallback;
 import static org.eclipse.jgit.util.RawParseUtils.extractBinaryString;
 import static org.eclipse.jgit.util.RawParseUtils.match;
 import static org.eclipse.jgit.util.RawParseUtils.nextLF;
 import static org.eclipse.jgit.util.RawParseUtils.parseBase10;

 import java.io.IOException;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
 import java.text.MessageFormat;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;

 import org.eclipse.jgit.diff.DiffEntry;
 import org.eclipse.jgit.diff.EditList;
 import org.eclipse.jgit.internal.JGitText;
 import org.eclipse.jgit.lib.AbbreviatedObjectId;
 import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.util.QuotedString;
 import org.eclipse.jgit.util.RawParseUtils;
 import org.eclipse.jgit.util.TemporaryBuffer;

 /**
  * Patch header describing an action for a single file path.
  */
 public class FileHeader extends DiffEntry {
 	private static final byte[] OLD_MODE = encodeASCII("old mode "); //$NON-NLS-1$

 	private static final byte[] NEW_MODE = encodeASCII("new mode "); //$NON-NLS-1$

 	static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode "); //$NON-NLS-1$

 	static final byte[] NEW_FILE_MODE = encodeASCII("new file mode "); //$NON-NLS-1$

 	private static final byte[] COPY_FROM = encodeASCII("copy from "); //$NON-NLS-1$

 	private static final byte[] COPY_TO = encodeASCII("copy to "); //$NON-NLS-1$

 	private static final byte[] RENAME_OLD = encodeASCII("rename old "); //$NON-NLS-1$

 	private static final byte[] RENAME_NEW = encodeASCII("rename new "); //$NON-NLS-1$

 	private static final byte[] RENAME_FROM = encodeASCII("rename from "); //$NON-NLS-1$

 	private static final byte[] RENAME_TO = encodeASCII("rename to "); //$NON-NLS-1$

 	private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index "); //$NON-NLS-1$

 	private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index "); //$NON-NLS-1$

 	static final byte[] INDEX = encodeASCII("index "); //$NON-NLS-1$

 	static final byte[] OLD_NAME = encodeASCII("--- "); //$NON-NLS-1$

 	static final byte[] NEW_NAME = encodeASCII("+++ "); //$NON-NLS-1$

 	/** Type of patch used by this file. */
 	public static enum PatchType {
 		/** A traditional unified diff style patch of a text file. */
 		UNIFIED,

 		/** An empty patch with a message "Binary files ... differ" */
 		BINARY,

 		/** A Git binary patch, holding pre and post image deltas */
 		GIT_BINARY;
 	}

 	/** Buffer holding the patch data for this file. */
 	final byte[] buf;

 	/** Offset within {@link #buf} to the "diff ..." line. */
 	final int startOffset;

 	/** Position 1 past the end of this file within {@link #buf}. */
 	int endOffset;

 	/** Type of patch used to modify this file */
 	PatchType patchType;

 	/** The hunks of this file */
 	private List<HunkHeader> hunks;

 	/** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */
 	BinaryHunk forwardBinaryHunk;

 	/** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
 	BinaryHunk reverseBinaryHunk;

 	/**
 	 * Constructs a new FileHeader
 	 *
 	 * @param headerLines
 	 *            buffer holding the diff header for this file
 	 * @param edits
 	 *            the edits for this file
 	 * @param type
 	 *            the type of patch used to modify this file
 	 */
 	public FileHeader(byte[] headerLines, EditList edits, PatchType type) {
 		this(headerLines, 0);
 		endOffset = headerLines.length;
 		int ptr = parseGitFileName(Patch.DIFF_GIT.length, headerLines.length);
 		parseGitHeaders(ptr, headerLines.length);
 		this.patchType = type;
 		addHunk(new HunkHeader(this, edits));
 	}

 	FileHeader(byte[] b, int offset) {
 		buf = b;
 		startOffset = offset;
 		changeType = ChangeType.MODIFY; // unless otherwise designated
 		patchType = PatchType.UNIFIED;
 	}

 	int getParentCount() {
 		return 1;
 	}

 	/**
 	 * Get the byte array holding this file's patch script.
 	 *
 	 * @return the byte array holding this file's patch script.
 	 */
 	public byte[] getBuffer() {
 		return buf;
 	}

 	/**
 	 * Get offset of the start of this file's script in {@link #getBuffer()}.
 	 *
 	 * @return offset of the start of this file's script in
 	 *         {@link #getBuffer()}.
 	 */
 	public int getStartOffset() {
 		return startOffset;
 	}

 	/**
 	 * Get offset one past the end of the file script.
 	 *
 	 * @return offset one past the end of the file script.
 	 */
 	public int getEndOffset() {
 		return endOffset;
 	}

 	/**
 	 * Convert the patch script for this file into a string.
 	 * <p>
 	 * The default character encoding
 	 * ({@link java.nio.charset.StandardCharsets#UTF_8}) is assumed for both the
 	 * old and new files.
 	 *
 	 * @return the patch script, as a Unicode string.
 	 */
 	public String getScriptText() {
 		return getScriptText(null, null);
 	}

 	/**
 	 * Convert the patch script for this file into a string.
 	 *
 	 * @param oldCharset
 	 *            hint character set to decode the old lines with.
 	 * @param newCharset
 	 *            hint character set to decode the new lines with.
 	 * @return the patch script, as a Unicode string.
 	 */
 	public String getScriptText(Charset oldCharset, Charset newCharset) {
 		return getScriptText(new Charset[] { oldCharset, newCharset });
 	}

 	String getScriptText(Charset[] charsetGuess) {
 		if (getHunks().isEmpty()) {
 			// If we have no hunks then we can safely assume the entire
 			// patch is a binary style patch, or a meta-data only style
 			// patch. Either way the encoding of the headers should be
 			// strictly 7-bit US-ASCII and the body is either 7-bit ASCII
 			// (due to the base 85 encoding used for a BinaryHunk) or is
 			// arbitrary noise we have chosen to ignore and not understand
 			// (e.g. the message "Binary files ... differ").
 			//
 			return extractBinaryString(buf, startOffset, endOffset);
 		}

 		if (charsetGuess != null && charsetGuess.length != getParentCount() + 1)
 			throw new IllegalArgumentException(MessageFormat.format(
 					JGitText.get().expectedCharacterEncodingGuesses,
 					Integer.valueOf(getParentCount() + 1)));

 		if (trySimpleConversion(charsetGuess)) {
 			Charset cs = charsetGuess != null ? charsetGuess[0] : null;
 			if (cs == null) {
 				cs = UTF_8;
 			}
 			try {
 				return decodeNoFallback(cs, buf, startOffset, endOffset);
 			} catch (CharacterCodingException cee) {
 				// Try the much slower, more-memory intensive version which
 				// can handle a character set conversion patch.
 			}
 		}

 		final StringBuilder r = new StringBuilder(endOffset - startOffset);

 		// Always treat the headers as US-ASCII; Git file names are encoded
 		// in a C style escape if any character has the high-bit set.
 		//
 		final int hdrEnd = getHunks().get(0).getStartOffset();
 		for (int ptr = startOffset; ptr < hdrEnd;) {
 			final int eol = Math.min(hdrEnd, nextLF(buf, ptr));
 			r.append(extractBinaryString(buf, ptr, eol));
 			ptr = eol;
 		}

 		final String[] files = extractFileLines(charsetGuess);
 		final int[] offsets = new int[files.length];
 		for (HunkHeader h : getHunks())
 			h.extractFileLines(r, files, offsets);
 		return r.toString();
 	}

 	private static boolean trySimpleConversion(Charset[] charsetGuess) {
 		if (charsetGuess == null)
 			return true;
 		for (int i = 1; i < charsetGuess.length; i++) {
 			if (charsetGuess[i] != charsetGuess[0])
 				return false;
 		}
 		return true;
 	}

 	private String[] extractFileLines(Charset[] csGuess) {
 		final TemporaryBuffer[] tmp = new TemporaryBuffer[getParentCount() + 1];
 		try {
 			for (int i = 0; i < tmp.length; i++)
 				tmp[i] = new TemporaryBuffer.Heap(Integer.MAX_VALUE);
 			for (HunkHeader h : getHunks())
 				h.extractFileLines(tmp);

 			final String[] r = new String[tmp.length];
 			for (int i = 0; i < tmp.length; i++) {
 				Charset cs = csGuess != null ? csGuess[i] : null;
 				if (cs == null) {
 					cs = UTF_8;
 				}
 				r[i] = RawParseUtils.decode(cs, tmp[i].toByteArray());
 			}
 			return r;
 		} catch (IOException ioe) {
 			throw new RuntimeException(JGitText.get().cannotConvertScriptToText, ioe);
 		}
 	}

 	/**
 	 * Get style of patch used to modify this file.
 	 *
 	 * @return style of patch used to modify this file.
 	 */
 	public PatchType getPatchType() {
 		return patchType;
 	}

 	/**
 	 * Whether this patch modifies metadata about a file
 	 *
 	 * @return {@code true} if this patch modifies metadata about a file .
 	 */
 	public boolean hasMetaDataChanges() {
 		return changeType != ChangeType.MODIFY || newMode != oldMode;
 	}

 	/**
 	 * Get hunks altering this file; in order of appearance in patch
 	 *
 	 * @return hunks altering this file; in order of appearance in patch.
 	 */
 	public List<? extends HunkHeader> getHunks() {
 		if (hunks == null)
 			return Collections.emptyList();
 		return hunks;
 	}

 	void addHunk(HunkHeader h) {
 		if (h.getFileHeader() != this)
 			throw new IllegalArgumentException(JGitText.get().hunkBelongsToAnotherFile);
 		if (hunks == null)
 			hunks = new ArrayList<>();
 		hunks.add(h);
 	}

 	HunkHeader newHunkHeader(int offset) {
 		return new HunkHeader(this, offset);
 	}

 	/**
 	 * Get the new-image delta/literal if this is a
 	 * {@link PatchType#GIT_BINARY}.
 	 *
 	 * @return the new-image delta/literal if this is a
 	 *         {@link PatchType#GIT_BINARY}.
 	 */
 	public BinaryHunk getForwardBinaryHunk() {
 		return forwardBinaryHunk;
 	}

 	/**
 	 * Get the old-image delta/literal if this is a
 	 * {@link PatchType#GIT_BINARY}.
 	 *
 	 * @return the old-image delta/literal if this is a
 	 *         {@link PatchType#GIT_BINARY}.
 	 */
 	public BinaryHunk getReverseBinaryHunk() {
 		return reverseBinaryHunk;
 	}

 	/**
 	 * Convert to a list describing the content edits performed on this file.
 	 *
 	 * @return a list describing the content edits performed on this file.
 	 */
 	public EditList toEditList() {
 		final EditList r = new EditList();
 		for (HunkHeader hunk : hunks)
 			r.addAll(hunk.toEditList());
 		return r;
 	}

 	/**
 	 * Parse a "diff --git" or "diff --cc" line.
 	 *
 	 * @param ptr
 	 *            first character after the "diff --git " or "diff --cc " part.
 	 * @param end
 	 *            one past the last position to parse.
 	 * @return first character after the LF at the end of the line; -1 on error.
 	 */
 	int parseGitFileName(int ptr, int end) {
 		final int eol = nextLF(buf, ptr);
 		final int bol = ptr;
 		if (eol >= end) {
 			return -1;
 		}

 		// buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
 		// A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
 		// is only one way to split the line such that text to the left
 		// of the space matches the text to the right, excluding the part
 		// before the first slash.
 		//

 		final int aStart = nextLF(buf, ptr, '/');
 		if (aStart >= eol)
 			return eol;

 		while (ptr < eol) {
 			final int sp = nextLF(buf, ptr, ' ');
 			if (sp >= eol) {
 				// We can't split the header, it isn't valid.
 				// This may be OK if this is a rename patch.
 				//
 				return eol;
 			}
 			final int bStart = nextLF(buf, sp, '/');
 			if (bStart >= eol)
 				return eol;

 			// If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
 			// we have a valid split.
 			//
 			if (eq(aStart, sp - 1, bStart, eol - 1)) {
 				if (buf[bol] == '"') {
 					// We're a double quoted name. The region better end
 					// in a double quote too, and we need to decode the
 					// characters before reading the name.
 					//
 					if (buf[sp - 2] != '"') {
 						return eol;
 					}
 					oldPath = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1);
 					oldPath = p1(oldPath);
 				} else {
 					oldPath = decode(UTF_8, buf, aStart, sp - 1);
 				}
 				newPath = oldPath;
 				return eol;
 			}

 			// This split wasn't correct. Move past the space and try
 			// another split as the space must be part of the file name.
 			//
 			ptr = sp;
 		}

 		return eol;
 	}

 	int parseGitHeaders(int ptr, int end) {
 		while (ptr < end) {
 			final int eol = nextLF(buf, ptr);
 			if (isHunkHdr(buf, ptr, eol) >= 1) {
 				// First hunk header; break out and parse them later.
 				break;

 			} else if (match(buf, ptr, OLD_NAME) >= 0) {
 				parseOldName(ptr, eol);

 			} else if (match(buf, ptr, NEW_NAME) >= 0) {
 				parseNewName(ptr, eol);

 			} else if (match(buf, ptr, OLD_MODE) >= 0) {
 				oldMode = parseFileMode(ptr + OLD_MODE.length, eol);

 			} else if (match(buf, ptr, NEW_MODE) >= 0) {
 				newMode = parseFileMode(ptr + NEW_MODE.length, eol);

 			} else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) {
 				oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol);
 				newMode = FileMode.MISSING;
 				changeType = ChangeType.DELETE;

 			} else if (match(buf, ptr, NEW_FILE_MODE) >= 0) {
 				parseNewFileMode(ptr, eol);

 			} else if (match(buf, ptr, COPY_FROM) >= 0) {
 				oldPath = parseName(oldPath, ptr + COPY_FROM.length, eol);
 				changeType = ChangeType.COPY;

 			} else if (match(buf, ptr, COPY_TO) >= 0) {
 				newPath = parseName(newPath, ptr + COPY_TO.length, eol);
 				changeType = ChangeType.COPY;

 			} else if (match(buf, ptr, RENAME_OLD) >= 0) {
 				oldPath = parseName(oldPath, ptr + RENAME_OLD.length, eol);
 				changeType = ChangeType.RENAME;

 			} else if (match(buf, ptr, RENAME_NEW) >= 0) {
 				newPath = parseName(newPath, ptr + RENAME_NEW.length, eol);
 				changeType = ChangeType.RENAME;

 			} else if (match(buf, ptr, RENAME_FROM) >= 0) {
 				oldPath = parseName(oldPath, ptr + RENAME_FROM.length, eol);
 				changeType = ChangeType.RENAME;

 			} else if (match(buf, ptr, RENAME_TO) >= 0) {
 				newPath = parseName(newPath, ptr + RENAME_TO.length, eol);
 				changeType = ChangeType.RENAME;

 			} else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) {
 				score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null);

 			} else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) {
 				score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null);

 			} else if (match(buf, ptr, INDEX) >= 0) {
 				parseIndexLine(ptr + INDEX.length, eol);

 			} else {
 				// Probably an empty patch (stat dirty).
 				break;
 			}

 			ptr = eol;
 		}
 		return ptr;
 	}

 	void parseOldName(int ptr, int eol) {
 		oldPath = p1(parseName(oldPath, ptr + OLD_NAME.length, eol));
 		if (oldPath == DEV_NULL)
 			changeType = ChangeType.ADD;
 	}

 	void parseNewName(int ptr, int eol) {
 		newPath = p1(parseName(newPath, ptr + NEW_NAME.length, eol));
 		if (newPath == DEV_NULL)
 			changeType = ChangeType.DELETE;
 	}

 	void parseNewFileMode(int ptr, int eol) {
 		oldMode = FileMode.MISSING;
 		newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol);
 		changeType = ChangeType.ADD;
 	}

 	int parseTraditionalHeaders(int ptr, int end) {
 		while (ptr < end) {
 			final int eol = nextLF(buf, ptr);
 			if (isHunkHdr(buf, ptr, eol) >= 1) {
 				// First hunk header; break out and parse them later.
 				break;

 			} else if (match(buf, ptr, OLD_NAME) >= 0) {
 				parseOldName(ptr, eol);

 			} else if (match(buf, ptr, NEW_NAME) >= 0) {
 				parseNewName(ptr, eol);

 			} else {
 				// Possibly an empty patch.
 				break;
 			}

 			ptr = eol;
 		}
 		return ptr;
 	}

 	private String parseName(String expect, int ptr, int end) {
 		if (ptr == end)
 			return expect;

 		String r;
 		if (buf[ptr] == '"') {
 			// New style GNU diff format
 			//
 			r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1);
 		} else {
 			// Older style GNU diff format, an optional tab ends the name.
 			//
 			int tab = end;
 			while (ptr < tab && buf[tab - 1] != '\t')
 				tab--;
 			if (ptr == tab)
 				tab = end;
 			r = decode(UTF_8, buf, ptr, tab - 1);
 		}

 		if (r.equals(DEV_NULL))
 			r = DEV_NULL;
 		return r;
 	}

 	private static String p1(final String r) {
 		final int s = r.indexOf('/');
 		return s > 0 ? r.substring(s + 1) : r;
 	}

 	FileMode parseFileMode(int ptr, int end) {
 		int tmp = 0;
 		while (ptr < end - 1) {
 			tmp <<= 3;
 			tmp += buf[ptr++] - '0';
 		}
 		return FileMode.fromBits(tmp);
 	}

 	void parseIndexLine(int ptr, int end) {
 		// "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
 		// can be unique abbreviations
 		//
 		final int dot2 = nextLF(buf, ptr, '.');
 		final int mode = nextLF(buf, dot2, ' ');

 		oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1);
 		newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1);

 		if (mode < end)
 			newMode = oldMode = parseFileMode(mode, end);
 	}

 	private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) {
 		if (aEnd - aPtr != bEnd - bPtr) {
 			return false;
 		}
 		while (aPtr < aEnd) {
 			if (buf[aPtr++] != buf[bPtr++])
 				return false;
 		}
 		return true;
 	}

 	/**
 	 * Determine if this is a patch hunk header.
 	 *
 	 * @param buf
 	 *            the buffer to scan
 	 * @param start
 	 *            first position in the buffer to evaluate
 	 * @param end
 	 *            last position to consider; usually the end of the buffer (
 	 *            <code>buf.length</code>) or the first position on the next
 	 *            line. This is only used to avoid very long runs of '@' from
 	 *            killing the scan loop.
 	 * @return the number of "ancestor revisions" in the hunk header. A
 	 *         traditional two-way diff ("@@ -...") returns 1; a combined diff
 	 *         for a 3 way-merge returns 3. If this is not a hunk header, 0 is
 	 *         returned instead.
 	 */
 	static int isHunkHdr(byte[] buf, int start, int end) {
 		int ptr = start;
 		while (ptr < end && buf[ptr] == '@')
 			ptr++;
 		if (ptr - start < 2)
 			return 0;
 		if (ptr == end || buf[ptr++] != ' ')
 			return 0;
 		if (ptr == end || buf[ptr++] != '-')
 			return 0;
 		return (ptr - 3) - start;
 	}
 }
	/*
	* Copyright (C) 2008-2009, Google Inc.
	* and other copyright owners as documented in the project's IP log.
	*
	* This program and the accompanying materials are made available
	* under the terms of the Eclipse Distribution License v1.0 which
	* accompanies this distribution, is reproduced below, and is
	* available at http://www.eclipse.org/org/documents/edl-v10.php
	*
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or
	* without modification, are permitted provided that the following
	* conditions are met:
	*
	* - Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* - Redistributions in binary form must reproduce the above
	* copyright notice, this list of conditions and the following
	* disclaimer in the documentation and/or other materials provided
	* with the distribution.
	*
	* - Neither the name of the Eclipse Foundation, Inc. nor the
	* names of its contributors may be used to endorse or promote
	* products derived from this software without specific prior
	* written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
	* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
	* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	package org.eclipse.jgit.patch;

	import static java.nio.charset.StandardCharsets.UTF_8;
	import static org.eclipse.jgit.lib.Constants.encodeASCII;
	import static org.eclipse.jgit.util.RawParseUtils.decode;
	import static org.eclipse.jgit.util.RawParseUtils.decodeNoFallback;
	import static org.eclipse.jgit.util.RawParseUtils.extractBinaryString;
	import static org.eclipse.jgit.util.RawParseUtils.match;
	import static org.eclipse.jgit.util.RawParseUtils.nextLF;
	import static org.eclipse.jgit.util.RawParseUtils.parseBase10;

	import java.io.IOException;
	import java.nio.charset.CharacterCodingException;
	import java.nio.charset.Charset;
	import java.text.MessageFormat;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.List;

	import org.eclipse.jgit.diff.DiffEntry;
	import org.eclipse.jgit.diff.EditList;
	import org.eclipse.jgit.internal.JGitText;
	import org.eclipse.jgit.lib.AbbreviatedObjectId;
	import org.eclipse.jgit.lib.FileMode;
	import org.eclipse.jgit.util.QuotedString;
	import org.eclipse.jgit.util.RawParseUtils;
	import org.eclipse.jgit.util.TemporaryBuffer;

	/**
	* Patch header describing an action for a single file path.
	*/
	public class FileHeader extends DiffEntry {
	private static final byte[] OLD_MODE = encodeASCII("old mode "); //$NON-NLS-1$

	private static final byte[] NEW_MODE = encodeASCII("new mode "); //$NON-NLS-1$

	static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode "); //$NON-NLS-1$

	static final byte[] NEW_FILE_MODE = encodeASCII("new file mode "); //$NON-NLS-1$

	private static final byte[] COPY_FROM = encodeASCII("copy from "); //$NON-NLS-1$

	private static final byte[] COPY_TO = encodeASCII("copy to "); //$NON-NLS-1$

	private static final byte[] RENAME_OLD = encodeASCII("rename old "); //$NON-NLS-1$

	private static final byte[] RENAME_NEW = encodeASCII("rename new "); //$NON-NLS-1$

	private static final byte[] RENAME_FROM = encodeASCII("rename from "); //$NON-NLS-1$

	private static final byte[] RENAME_TO = encodeASCII("rename to "); //$NON-NLS-1$

	private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index "); //$NON-NLS-1$

	private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index "); //$NON-NLS-1$

	static final byte[] INDEX = encodeASCII("index "); //$NON-NLS-1$

	static final byte[] OLD_NAME = encodeASCII("--- "); //$NON-NLS-1$

	static final byte[] NEW_NAME = encodeASCII("+++ "); //$NON-NLS-1$

	/** Type of patch used by this file. */
	public static enum PatchType {
	/** A traditional unified diff style patch of a text file. */
	UNIFIED,

	/** An empty patch with a message "Binary files ... differ" */
	BINARY,

	/** A Git binary patch, holding pre and post image deltas */
	GIT_BINARY;
	}

	/** Buffer holding the patch data for this file. */
	final byte[] buf;

	/** Offset within {@link #buf} to the "diff ..." line. */
	final int startOffset;

	/** Position 1 past the end of this file within {@link #buf}. */
	int endOffset;

	/** Type of patch used to modify this file */
	PatchType patchType;

	/** The hunks of this file */
	private List<HunkHeader> hunks;

	/** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */
	BinaryHunk forwardBinaryHunk;

	/** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
	BinaryHunk reverseBinaryHunk;

	/**
	* Constructs a new FileHeader
	*
	* @param headerLines
	* buffer holding the diff header for this file
	* @param edits
	* the edits for this file
	* @param type
	* the type of patch used to modify this file
	*/
	public FileHeader(byte[] headerLines, EditList edits, PatchType type) {
	this(headerLines, 0);
	endOffset = headerLines.length;
	int ptr = parseGitFileName(Patch.DIFF_GIT.length, headerLines.length);
	parseGitHeaders(ptr, headerLines.length);
	this.patchType = type;
	addHunk(new HunkHeader(this, edits));
	}

	FileHeader(byte[] b, int offset) {
	buf = b;
	startOffset = offset;
	changeType = ChangeType.MODIFY; // unless otherwise designated
	patchType = PatchType.UNIFIED;
	}

	int getParentCount() {
	return 1;
	}

	/**
	* Get the byte array holding this file's patch script.
	*
	* @return the byte array holding this file's patch script.
	*/
	public byte[] getBuffer() {
	return buf;
	}

	/**
	* Get offset of the start of this file's script in {@link #getBuffer()}.
	*
	* @return offset of the start of this file's script in
	* {@link #getBuffer()}.
	*/
	public int getStartOffset() {
	return startOffset;
	}

	/**
	* Get offset one past the end of the file script.
	*
	* @return offset one past the end of the file script.
	*/
	public int getEndOffset() {
	return endOffset;
	}

	/**
	* Convert the patch script for this file into a string.
	* <p>
	* The default character encoding
	* ({@link java.nio.charset.StandardCharsets#UTF_8}) is assumed for both the
	* old and new files.
	*
	* @return the patch script, as a Unicode string.
	*/
	public String getScriptText() {
	return getScriptText(null, null);
	}

	/**
	* Convert the patch script for this file into a string.
	*
	* @param oldCharset
	* hint character set to decode the old lines with.
	* @param newCharset
	* hint character set to decode the new lines with.
	* @return the patch script, as a Unicode string.
	*/
	public String getScriptText(Charset oldCharset, Charset newCharset) {
	return getScriptText(new Charset[] { oldCharset, newCharset });
	}

	String getScriptText(Charset[] charsetGuess) {
	if (getHunks().isEmpty()) {
	// If we have no hunks then we can safely assume the entire
	// patch is a binary style patch, or a meta-data only style
	// patch. Either way the encoding of the headers should be
	// strictly 7-bit US-ASCII and the body is either 7-bit ASCII
	// (due to the base 85 encoding used for a BinaryHunk) or is
	// arbitrary noise we have chosen to ignore and not understand
	// (e.g. the message "Binary files ... differ").
	//
	return extractBinaryString(buf, startOffset, endOffset);
	}

	if (charsetGuess != null && charsetGuess.length != getParentCount() + 1)
	throw new IllegalArgumentException(MessageFormat.format(
	JGitText.get().expectedCharacterEncodingGuesses,
	Integer.valueOf(getParentCount() + 1)));

	if (trySimpleConversion(charsetGuess)) {
	Charset cs = charsetGuess != null ? charsetGuess[0] : null;
	if (cs == null) {
	cs = UTF_8;
	}
	try {
	return decodeNoFallback(cs, buf, startOffset, endOffset);
	} catch (CharacterCodingException cee) {
	// Try the much slower, more-memory intensive version which
	// can handle a character set conversion patch.
	}
	}

	final StringBuilder r = new StringBuilder(endOffset - startOffset);

	// Always treat the headers as US-ASCII; Git file names are encoded
	// in a C style escape if any character has the high-bit set.
	//
	final int hdrEnd = getHunks().get(0).getStartOffset();
	for (int ptr = startOffset; ptr < hdrEnd;) {
	final int eol = Math.min(hdrEnd, nextLF(buf, ptr));
	r.append(extractBinaryString(buf, ptr, eol));
	ptr = eol;
	}

	final String[] files = extractFileLines(charsetGuess);
	final int[] offsets = new int[files.length];
	for (HunkHeader h : getHunks())
	h.extractFileLines(r, files, offsets);
	return r.toString();
	}

	private static boolean trySimpleConversion(Charset[] charsetGuess) {
	if (charsetGuess == null)
	return true;
	for (int i = 1; i < charsetGuess.length; i++) {
	if (charsetGuess[i] != charsetGuess[0])
	return false;
	}
	return true;
	}

	private String[] extractFileLines(Charset[] csGuess) {
	final TemporaryBuffer[] tmp = new TemporaryBuffer[getParentCount() + 1];
	try {
	for (int i = 0; i < tmp.length; i++)
	tmp[i] = new TemporaryBuffer.Heap(Integer.MAX_VALUE);
	for (HunkHeader h : getHunks())
	h.extractFileLines(tmp);

	final String[] r = new String[tmp.length];
	for (int i = 0; i < tmp.length; i++) {
	Charset cs = csGuess != null ? csGuess[i] : null;
	if (cs == null) {
	cs = UTF_8;
	}
	r[i] = RawParseUtils.decode(cs, tmp[i].toByteArray());
	}
	return r;
	} catch (IOException ioe) {
	throw new RuntimeException(JGitText.get().cannotConvertScriptToText, ioe);
	}
	}

	/**
	* Get style of patch used to modify this file.
	*
	* @return style of patch used to modify this file.
	*/
	public PatchType getPatchType() {
	return patchType;
	}

	/**
	* Whether this patch modifies metadata about a file
	*
	* @return {@code true} if this patch modifies metadata about a file .
	*/
	public boolean hasMetaDataChanges() {
	return changeType != ChangeType.MODIFY \|\| newMode != oldMode;
	}

	/**
	* Get hunks altering this file; in order of appearance in patch
	*
	* @return hunks altering this file; in order of appearance in patch.
	*/
	public List<? extends HunkHeader> getHunks() {
	if (hunks == null)
	return Collections.emptyList();
	return hunks;
	}

	void addHunk(HunkHeader h) {
	if (h.getFileHeader() != this)
	throw new IllegalArgumentException(JGitText.get().hunkBelongsToAnotherFile);
	if (hunks == null)
	hunks = new ArrayList<>();
	hunks.add(h);
	}

	HunkHeader newHunkHeader(int offset) {
	return new HunkHeader(this, offset);
	}

	/**
	* Get the new-image delta/literal if this is a
	* {@link PatchType#GIT_BINARY}.
	*
	* @return the new-image delta/literal if this is a
	* {@link PatchType#GIT_BINARY}.
	*/
	public BinaryHunk getForwardBinaryHunk() {
	return forwardBinaryHunk;
	}

	/**
	* Get the old-image delta/literal if this is a
	* {@link PatchType#GIT_BINARY}.
	*
	* @return the old-image delta/literal if this is a
	* {@link PatchType#GIT_BINARY}.
	*/
	public BinaryHunk getReverseBinaryHunk() {
	return reverseBinaryHunk;
	}

	/**
	* Convert to a list describing the content edits performed on this file.
	*
	* @return a list describing the content edits performed on this file.
	*/
	public EditList toEditList() {
	final EditList r = new EditList();
	for (HunkHeader hunk : hunks)
	r.addAll(hunk.toEditList());
	return r;
	}

	/**
	* Parse a "diff --git" or "diff --cc" line.
	*
	* @param ptr
	* first character after the "diff --git " or "diff --cc " part.
	* @param end
	* one past the last position to parse.
	* @return first character after the LF at the end of the line; -1 on error.
	*/
	int parseGitFileName(int ptr, int end) {
	final int eol = nextLF(buf, ptr);
	final int bol = ptr;
	if (eol >= end) {
	return -1;
	}

	// buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
	// A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
	// is only one way to split the line such that text to the left
	// of the space matches the text to the right, excluding the part
	// before the first slash.
	//

	final int aStart = nextLF(buf, ptr, '/');
	if (aStart >= eol)
	return eol;

	while (ptr < eol) {
	final int sp = nextLF(buf, ptr, ' ');
	if (sp >= eol) {
	// We can't split the header, it isn't valid.
	// This may be OK if this is a rename patch.
	//
	return eol;
	}
	final int bStart = nextLF(buf, sp, '/');
	if (bStart >= eol)
	return eol;

	// If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
	// we have a valid split.
	//
	if (eq(aStart, sp - 1, bStart, eol - 1)) {
	if (buf[bol] == '"') {
	// We're a double quoted name. The region better end
	// in a double quote too, and we need to decode the
	// characters before reading the name.
	//
	if (buf[sp - 2] != '"') {
	return eol;
	}
	oldPath = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1);
	oldPath = p1(oldPath);
	} else {
	oldPath = decode(UTF_8, buf, aStart, sp - 1);
	}
	newPath = oldPath;
	return eol;
	}

	// This split wasn't correct. Move past the space and try
	// another split as the space must be part of the file name.
	//
	ptr = sp;
	}

	return eol;
	}

	int parseGitHeaders(int ptr, int end) {
	while (ptr < end) {
	final int eol = nextLF(buf, ptr);
	if (isHunkHdr(buf, ptr, eol) >= 1) {
	// First hunk header; break out and parse them later.
	break;

	} else if (match(buf, ptr, OLD_NAME) >= 0) {
	parseOldName(ptr, eol);

	} else if (match(buf, ptr, NEW_NAME) >= 0) {
	parseNewName(ptr, eol);

	} else if (match(buf, ptr, OLD_MODE) >= 0) {
	oldMode = parseFileMode(ptr + OLD_MODE.length, eol);

	} else if (match(buf, ptr, NEW_MODE) >= 0) {
	newMode = parseFileMode(ptr + NEW_MODE.length, eol);

	} else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) {
	oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol);
	newMode = FileMode.MISSING;
	changeType = ChangeType.DELETE;

	} else if (match(buf, ptr, NEW_FILE_MODE) >= 0) {
	parseNewFileMode(ptr, eol);

	} else if (match(buf, ptr, COPY_FROM) >= 0) {
	oldPath = parseName(oldPath, ptr + COPY_FROM.length, eol);
	changeType = ChangeType.COPY;

	} else if (match(buf, ptr, COPY_TO) >= 0) {
	newPath = parseName(newPath, ptr + COPY_TO.length, eol);
	changeType = ChangeType.COPY;

	} else if (match(buf, ptr, RENAME_OLD) >= 0) {
	oldPath = parseName(oldPath, ptr + RENAME_OLD.length, eol);
	changeType = ChangeType.RENAME;

	} else if (match(buf, ptr, RENAME_NEW) >= 0) {
	newPath = parseName(newPath, ptr + RENAME_NEW.length, eol);
	changeType = ChangeType.RENAME;

	} else if (match(buf, ptr, RENAME_FROM) >= 0) {
	oldPath = parseName(oldPath, ptr + RENAME_FROM.length, eol);
	changeType = ChangeType.RENAME;

	} else if (match(buf, ptr, RENAME_TO) >= 0) {
	newPath = parseName(newPath, ptr + RENAME_TO.length, eol);
	changeType = ChangeType.RENAME;

	} else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) {
	score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null);

	} else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) {
	score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null);

	} else if (match(buf, ptr, INDEX) >= 0) {
	parseIndexLine(ptr + INDEX.length, eol);

	} else {
	// Probably an empty patch (stat dirty).
	break;
	}

	ptr = eol;
	}
	return ptr;
	}

	void parseOldName(int ptr, int eol) {
	oldPath = p1(parseName(oldPath, ptr + OLD_NAME.length, eol));
	if (oldPath == DEV_NULL)
	changeType = ChangeType.ADD;
	}

	void parseNewName(int ptr, int eol) {
	newPath = p1(parseName(newPath, ptr + NEW_NAME.length, eol));
	if (newPath == DEV_NULL)
	changeType = ChangeType.DELETE;
	}

	void parseNewFileMode(int ptr, int eol) {
	oldMode = FileMode.MISSING;
	newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol);
	changeType = ChangeType.ADD;
	}

	int parseTraditionalHeaders(int ptr, int end) {
	while (ptr < end) {
	final int eol = nextLF(buf, ptr);
	if (isHunkHdr(buf, ptr, eol) >= 1) {
	// First hunk header; break out and parse them later.
	break;

	} else if (match(buf, ptr, OLD_NAME) >= 0) {
	parseOldName(ptr, eol);

	} else if (match(buf, ptr, NEW_NAME) >= 0) {
	parseNewName(ptr, eol);

	} else {
	// Possibly an empty patch.
	break;
	}

	ptr = eol;
	}
	return ptr;
	}

	private String parseName(String expect, int ptr, int end) {
	if (ptr == end)
	return expect;

	String r;
	if (buf[ptr] == '"') {
	// New style GNU diff format
	//
	r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1);
	} else {
	// Older style GNU diff format, an optional tab ends the name.
	//
	int tab = end;
	while (ptr < tab && buf[tab - 1] != '\t')
	tab--;
	if (ptr == tab)
	tab = end;
	r = decode(UTF_8, buf, ptr, tab - 1);
	}

	if (r.equals(DEV_NULL))
	r = DEV_NULL;
	return r;
	}

	private static String p1(final String r) {
	final int s = r.indexOf('/');
	return s > 0 ? r.substring(s + 1) : r;
	}

	FileMode parseFileMode(int ptr, int end) {
	int tmp = 0;
	while (ptr < end - 1) {
	tmp <<= 3;
	tmp += buf[ptr++] - '0';
	}
	return FileMode.fromBits(tmp);
	}

	void parseIndexLine(int ptr, int end) {
	// "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
	// can be unique abbreviations
	//
	final int dot2 = nextLF(buf, ptr, '.');
	final int mode = nextLF(buf, dot2, ' ');

	oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1);
	newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1);

	if (mode < end)
	newMode = oldMode = parseFileMode(mode, end);
	}

	private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) {
	if (aEnd - aPtr != bEnd - bPtr) {
	return false;
	}
	while (aPtr < aEnd) {
	if (buf[aPtr++] != buf[bPtr++])
	return false;
	}
	return true;
	}

	/**
	* Determine if this is a patch hunk header.
	*
	* @param buf
	* the buffer to scan
	* @param start
	* first position in the buffer to evaluate
	* @param end
	* last position to consider; usually the end of the buffer (
	* <code>buf.length</code>) or the first position on the next
	* line. This is only used to avoid very long runs of '@' from
	* killing the scan loop.
	* @return the number of "ancestor revisions" in the hunk header. A
	* traditional two-way diff ("@@ -...") returns 1; a combined diff
	* for a 3 way-merge returns 3. If this is not a hunk header, 0 is
	* returned instead.
	*/
	static int isHunkHdr(byte[] buf, int start, int end) {
	int ptr = start;
	while (ptr < end && buf[ptr] == '@')
	ptr++;
	if (ptr - start < 2)
	return 0;
	if (ptr == end \|\| buf[ptr++] != ' ')
	return 0;
	if (ptr == end \|\| buf[ptr++] != '-')
	return 0;
	return (ptr - 3) - start;
	}
	}