org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java - jgit

blob: 0c41b8598b43579757bfc2d737e066468b871efe [file] [log] [blame]

	/*
	* Copyright (C) 2009-2010, Google Inc.
	* Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de> and others
	*
	* This program and the accompanying materials are made available under the
	* terms of the Eclipse Distribution License v. 1.0 which is available at
	* https://www.eclipse.org/org/documents/edl-v10.php.
	*
	* SPDX-License-Identifier: BSD-3-Clause
	*/

	package org.eclipse.jgit.diff;

	import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
	import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
	import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;

	import org.eclipse.jgit.util.IntList;

	/**
	* Equivalence function for {@link org.eclipse.jgit.diff.RawText}.
	*/
	public abstract class RawTextComparator extends SequenceComparator<RawText> {
	/** No special treatment. */
	public static final RawTextComparator DEFAULT = new RawTextComparator() {
	@Override
	public boolean equals(RawText a, int ai, RawText b, int bi) {
	ai++;
	bi++;

	int as = a.lines.get(ai);
	int bs = b.lines.get(bi);
	final int ae = a.lines.get(ai + 1);
	final int be = b.lines.get(bi + 1);

	if (ae - as != be - bs)
	return false;

	while (as < ae) {
	if (a.content[as++] != b.content[bs++])
	return false;
	}
	return true;
	}

	@Override
	protected int hashRegion(byte[] raw, int ptr, int end) {
	int hash = 5381;
	for (; ptr < end; ptr++)
	hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
	return hash;
	}
	};

	/** Ignores all whitespace. */
	public static final RawTextComparator WS_IGNORE_ALL = new RawTextComparator() {
	@Override
	public boolean equals(RawText a, int ai, RawText b, int bi) {
	ai++;
	bi++;

	int as = a.lines.get(ai);
	int bs = b.lines.get(bi);
	int ae = a.lines.get(ai + 1);
	int be = b.lines.get(bi + 1);

	ae = trimTrailingWhitespace(a.content, as, ae);
	be = trimTrailingWhitespace(b.content, bs, be);

	while (as < ae && bs < be) {
	byte ac = a.content[as];
	byte bc = b.content[bs];

	while (as < ae - 1 && isWhitespace(ac)) {
	as++;
	ac = a.content[as];
	}

	while (bs < be - 1 && isWhitespace(bc)) {
	bs++;
	bc = b.content[bs];
	}

	if (ac != bc)
	return false;

	as++;
	bs++;
	}

	return as == ae && bs == be;
	}

	@Override
	protected int hashRegion(byte[] raw, int ptr, int end) {
	int hash = 5381;
	for (; ptr < end; ptr++) {
	byte c = raw[ptr];
	if (!isWhitespace(c))
	hash = ((hash << 5) + hash) + (c & 0xff);
	}
	return hash;
	}
	};

	/**
	* Ignore leading whitespace.
	**/
	public static final RawTextComparator WS_IGNORE_LEADING = new RawTextComparator() {
	@Override
	public boolean equals(RawText a, int ai, RawText b, int bi) {
	ai++;
	bi++;

	int as = a.lines.get(ai);
	int bs = b.lines.get(bi);
	int ae = a.lines.get(ai + 1);
	int be = b.lines.get(bi + 1);

	as = trimLeadingWhitespace(a.content, as, ae);
	bs = trimLeadingWhitespace(b.content, bs, be);

	if (ae - as != be - bs)
	return false;

	while (as < ae) {
	if (a.content[as++] != b.content[bs++])
	return false;
	}
	return true;
	}

	@Override
	protected int hashRegion(byte[] raw, int ptr, int end) {
	int hash = 5381;
	ptr = trimLeadingWhitespace(raw, ptr, end);
	for (; ptr < end; ptr++)
	hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
	return hash;
	}
	};

	/** Ignores trailing whitespace. */
	public static final RawTextComparator WS_IGNORE_TRAILING = new RawTextComparator() {
	@Override
	public boolean equals(RawText a, int ai, RawText b, int bi) {
	ai++;
	bi++;

	int as = a.lines.get(ai);
	int bs = b.lines.get(bi);
	int ae = a.lines.get(ai + 1);
	int be = b.lines.get(bi + 1);

	ae = trimTrailingWhitespace(a.content, as, ae);
	be = trimTrailingWhitespace(b.content, bs, be);

	if (ae - as != be - bs)
	return false;

	while (as < ae) {
	if (a.content[as++] != b.content[bs++])
	return false;
	}
	return true;
	}

	@Override
	protected int hashRegion(byte[] raw, int ptr, int end) {
	int hash = 5381;
	end = trimTrailingWhitespace(raw, ptr, end);
	for (; ptr < end; ptr++)
	hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
	return hash;
	}
	};

	/** Ignores whitespace occurring between non-whitespace characters. */
	public static final RawTextComparator WS_IGNORE_CHANGE = new RawTextComparator() {
	@Override
	public boolean equals(RawText a, int ai, RawText b, int bi) {
	ai++;
	bi++;

	int as = a.lines.get(ai);
	int bs = b.lines.get(bi);
	int ae = a.lines.get(ai + 1);
	int be = b.lines.get(bi + 1);

	ae = trimTrailingWhitespace(a.content, as, ae);
	be = trimTrailingWhitespace(b.content, bs, be);

	while (as < ae && bs < be) {
	byte ac = a.content[as++];
	byte bc = b.content[bs++];

	if (isWhitespace(ac) && isWhitespace(bc)) {
	as = trimLeadingWhitespace(a.content, as, ae);
	bs = trimLeadingWhitespace(b.content, bs, be);
	} else if (ac != bc) {
	return false;
	}
	}
	return as == ae && bs == be;
	}

	@Override
	protected int hashRegion(byte[] raw, int ptr, int end) {
	int hash = 5381;
	end = trimTrailingWhitespace(raw, ptr, end);
	while (ptr < end) {
	byte c = raw[ptr++];
	if (isWhitespace(c)) {
	ptr = trimLeadingWhitespace(raw, ptr, end);
	c = ' ';
	}
	hash = ((hash << 5) + hash) + (c & 0xff);
	}
	return hash;
	}
	};

	@Override
	public int hash(RawText seq, int lno) {
	final int begin = seq.lines.get(lno + 1);
	final int end = seq.lines.get(lno + 2);
	return hashRegion(seq.content, begin, end);
	}

	/** {@inheritDoc} */
	@Override
	public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) {
	// This is a faster exact match based form that tries to improve
	// performance for the common case of the header and trailer of
	// a text file not changing at all. After this fast path we use
	// the slower path based on the super class' using equals() to
	// allow for whitespace ignore modes to still work.

	if (e.beginA == e.endA \|\| e.beginB == e.endB)
	return e;

	byte[] aRaw = a.content;
	byte[] bRaw = b.content;

	int aPtr = a.lines.get(e.beginA + 1);
	int bPtr = a.lines.get(e.beginB + 1);

	int aEnd = a.lines.get(e.endA + 1);
	int bEnd = b.lines.get(e.endB + 1);

	// This can never happen, but the JIT doesn't know that. If we
	// define this assertion before the tight while loops below it
	// should be able to skip the array bound checks on access.
	//
	if (aPtr < 0 \|\| bPtr < 0 \|\| aEnd > aRaw.length \|\| bEnd > bRaw.length)
	throw new ArrayIndexOutOfBoundsException();

	while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) {
	aPtr++;
	bPtr++;
	}

	while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) {
	aEnd--;
	bEnd--;
	}

	e.beginA = findForwardLine(a.lines, e.beginA, aPtr);
	e.beginB = findForwardLine(b.lines, e.beginB, bPtr);

	e.endA = findReverseLine(a.lines, e.endA, aEnd);

	final boolean partialA = aEnd < a.lines.get(e.endA + 1);
	if (partialA)
	bEnd += a.lines.get(e.endA + 1) - aEnd;

	e.endB = findReverseLine(b.lines, e.endB, bEnd);

	if (!partialA && bEnd < b.lines.get(e.endB + 1))
	e.endA++;

	return super.reduceCommonStartEnd(a, b, e);
	}

	private static int findForwardLine(IntList lines, int idx, int ptr) {
	final int end = lines.size() - 2;
	while (idx < end && lines.get(idx + 2) < ptr)
	idx++;
	return idx;
	}

	private static int findReverseLine(IntList lines, int idx, int ptr) {
	while (0 < idx && ptr <= lines.get(idx))
	idx--;
	return idx;
	}

	/**
	* Compute a hash code for a region.
	*
	* @param raw
	* the raw file content.
	* @param ptr
	* first byte of the region to hash.
	* @param end
	* 1 past the last byte of the region.
	* @return hash code for the region <code>[ptr, end)</code> of raw.
	*/
	protected abstract int hashRegion(byte[] raw, int ptr, int end);
	}