JGit blame very slow for large merge commits that rename files

Adjusted BlameGenerator to filter rename detection with the blame path.
This reduces the running time of the blame computation significantly,
for repositories with massive commits involving renames.

The filtered rename detection is made (internally) available with:
org.eclipse.jgit.internal.diff.FilteredRenameDetector

Bug: 578900
Change-Id: I6580004e81102d685081b8180da1587a35073d36
Signed-off-by: Simeon Andreev <simeon.danailov.andreev@gmail.com>
diff --git a/org.eclipse.jgit.test/META-INF/MANIFEST.MF b/org.eclipse.jgit.test/META-INF/MANIFEST.MF
index 76f69a6..0ba71f9 100644
--- a/org.eclipse.jgit.test/META-INF/MANIFEST.MF
+++ b/org.eclipse.jgit.test/META-INF/MANIFEST.MF
@@ -33,6 +33,7 @@
  org.eclipse.jgit.ignore;version="[6.3.0,6.4.0)",
  org.eclipse.jgit.ignore.internal;version="[6.3.0,6.4.0)",
  org.eclipse.jgit.internal;version="[6.3.0,6.4.0)",
+ org.eclipse.jgit.internal.diff;version="[6.3.0,6.4.0)",
  org.eclipse.jgit.internal.diffmergetool;version="[6.3.0,6.4.0)",
  org.eclipse.jgit.internal.fsck;version="[6.3.0,6.4.0)",
  org.eclipse.jgit.internal.revwalk;version="[6.3.0,6.4.0)",
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractRenameDetectionTestCase.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractRenameDetectionTestCase.java
new file mode 100644
index 0000000..a8967f2
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractRenameDetectionTestCase.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2022, Google Inc. and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+package org.eclipse.jgit.diff;
+
+import static org.junit.Assert.assertEquals;
+
+import org.eclipse.jgit.diff.DiffEntry.ChangeType;
+import org.eclipse.jgit.junit.RepositoryTestCase;
+import org.eclipse.jgit.junit.TestRepository;
+import org.eclipse.jgit.lib.AbbreviatedObjectId;
+import org.eclipse.jgit.lib.FileMode;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.Repository;
+import org.junit.Before;
+
+public abstract class AbstractRenameDetectionTestCase
+		extends RepositoryTestCase {
+
+	protected static final String PATH_A = "src/A";
+
+	protected static final String PATH_B = "src/B";
+
+	protected static final String PATH_H = "src/H";
+
+	protected static final String PATH_Q = "src/Q";
+
+	protected TestRepository<Repository> testDb;
+
+	@Override
+	@Before
+	public void setUp() throws Exception {
+		super.setUp();
+		testDb = new TestRepository<>(db);
+	}
+
+	protected ObjectId blob(String content) throws Exception {
+		return testDb.blob(content).copy();
+	}
+
+	protected static void assertRename(DiffEntry o, DiffEntry n, int score,
+			DiffEntry rename) {
+		assertEquals(ChangeType.RENAME, rename.getChangeType());
+
+		assertEquals(o.getOldPath(), rename.getOldPath());
+		assertEquals(n.getNewPath(), rename.getNewPath());
+
+		assertEquals(o.getOldMode(), rename.getOldMode());
+		assertEquals(n.getNewMode(), rename.getNewMode());
+
+		assertEquals(o.getOldId(), rename.getOldId());
+		assertEquals(n.getNewId(), rename.getNewId());
+
+		assertEquals(score, rename.getScore());
+	}
+
+	protected static void assertCopy(DiffEntry o, DiffEntry n, int score,
+			DiffEntry copy) {
+		assertEquals(ChangeType.COPY, copy.getChangeType());
+
+		assertEquals(o.getOldPath(), copy.getOldPath());
+		assertEquals(n.getNewPath(), copy.getNewPath());
+
+		assertEquals(o.getOldMode(), copy.getOldMode());
+		assertEquals(n.getNewMode(), copy.getNewMode());
+
+		assertEquals(o.getOldId(), copy.getOldId());
+		assertEquals(n.getNewId(), copy.getNewId());
+
+		assertEquals(score, copy.getScore());
+	}
+
+	protected static void assertAdd(String newName, ObjectId newId,
+			FileMode newMode, DiffEntry add) {
+		assertEquals(DiffEntry.DEV_NULL, add.oldPath);
+		assertEquals(DiffEntry.A_ZERO, add.oldId);
+		assertEquals(FileMode.MISSING, add.oldMode);
+		assertEquals(ChangeType.ADD, add.changeType);
+		assertEquals(newName, add.newPath);
+		assertEquals(AbbreviatedObjectId.fromObjectId(newId), add.newId);
+		assertEquals(newMode, add.newMode);
+	}
+
+	protected static void assertDelete(String oldName, ObjectId oldId,
+			FileMode oldMode, DiffEntry delete) {
+		assertEquals(DiffEntry.DEV_NULL, delete.newPath);
+		assertEquals(DiffEntry.A_ZERO, delete.newId);
+		assertEquals(FileMode.MISSING, delete.newMode);
+		assertEquals(ChangeType.DELETE, delete.changeType);
+		assertEquals(oldName, delete.oldPath);
+		assertEquals(AbbreviatedObjectId.fromObjectId(oldId), delete.oldId);
+		assertEquals(oldMode, delete.oldMode);
+	}
+}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/FilteredRenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/FilteredRenameDetectorTest.java
new file mode 100644
index 0000000..bfda36d
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/FilteredRenameDetectorTest.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2022, Simeon Andreev and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+package org.eclipse.jgit.diff;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+import java.util.Arrays;
+import java.util.List;
+import org.eclipse.jgit.internal.diff.FilteredRenameDetector;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.treewalk.filter.PathFilter;
+import org.junit.Before;
+import org.junit.Test;
+
+public class FilteredRenameDetectorTest extends AbstractRenameDetectionTestCase {
+
+	private FilteredRenameDetector frd;
+
+	@Override
+	@Before
+	public void setUp() throws Exception {
+		super.setUp();
+		frd = new FilteredRenameDetector(db);
+	}
+
+	@Test
+	public void testExactRename() throws Exception {
+		ObjectId foo = blob("foo");
+		ObjectId bar = blob("bar");
+
+		DiffEntry a = DiffEntry.add(PATH_A, foo);
+		DiffEntry b = DiffEntry.delete(PATH_Q, foo);
+
+		DiffEntry c = DiffEntry.add(PATH_H, bar);
+		DiffEntry d = DiffEntry.delete(PATH_B, bar);
+
+		List<DiffEntry> changes = Arrays.asList(a, b, c, d);
+		PathFilter filter = PathFilter.create(PATH_A);
+		List<DiffEntry> entries = frd.compute(changes, filter);
+		assertEquals("Unexpected entries in: " + entries, 1, entries.size());
+		assertRename(b, a, 100, entries.get(0));
+	}
+
+	@Test
+	public void testExactRename_multipleFilters() throws Exception {
+		ObjectId foo = blob("foo");
+		ObjectId bar = blob("bar");
+
+		DiffEntry a = DiffEntry.add(PATH_A, foo);
+		DiffEntry b = DiffEntry.delete(PATH_Q, foo);
+
+		DiffEntry c = DiffEntry.add(PATH_H, bar);
+		DiffEntry d = DiffEntry.delete(PATH_B, bar);
+
+		List<DiffEntry> changes = Arrays.asList(a, b, c, d);
+		List<PathFilter> filters = Arrays.asList(PathFilter.create(PATH_A),
+				PathFilter.create(PATH_H));
+		List<DiffEntry> entries = frd.compute(changes, filters);
+		assertEquals("Unexpected entries in: " + entries, 2, entries.size());
+		assertRename(b, a, 100, entries.get(0));
+		assertRename(d, c, 100, entries.get(1));
+	}
+
+	@Test
+	public void testInexactRename() throws Exception {
+		ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
+		ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
+		DiffEntry a = DiffEntry.add(PATH_A, aId);
+		DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+		ObjectId cId = blob("some\nsort\nof\ntext\n");
+		ObjectId dId = blob("completely\nunrelated\ntext\n");
+		DiffEntry c = DiffEntry.add(PATH_B, cId);
+		DiffEntry d = DiffEntry.delete(PATH_H, dId);
+
+		List<DiffEntry> changes = Arrays.asList(a, b, c, d);
+		PathFilter filter = PathFilter.create(PATH_A);
+		List<DiffEntry> entries = frd.compute(changes, filter);
+		assertEquals("Unexpected entries: " + entries, 1, entries.size());
+		assertRename(b, a, 66, entries.get(0));
+	}
+
+	@Test
+	public void testInexactRename_multipleFilters() throws Exception {
+		ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
+		ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
+		DiffEntry a = DiffEntry.add(PATH_A, aId);
+		DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+		ObjectId cId = blob("some\nsort\nof\ntext\n");
+		ObjectId dId = blob("completely\nunrelated\ntext\n");
+		DiffEntry c = DiffEntry.add(PATH_B, cId);
+		DiffEntry d = DiffEntry.delete(PATH_H, dId);
+
+		List<DiffEntry> changes = Arrays.asList(a, b, c, d);
+		List<PathFilter> filters = Arrays.asList(PathFilter.create(PATH_A),
+				PathFilter.create(PATH_H));
+		List<DiffEntry> entries = frd.compute(changes, filters);
+		assertEquals("Unexpected entries: " + entries, 2, entries.size());
+		assertRename(b, a, 66, entries.get(0));
+		assertSame(d, entries.get(1));
+	}
+
+	@Test
+	public void testNoRenames() throws Exception {
+		ObjectId aId = blob("");
+		ObjectId bId = blob("blah1");
+		ObjectId cId = blob("");
+		ObjectId dId = blob("blah2");
+
+		DiffEntry a = DiffEntry.add(PATH_A, aId);
+		DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+		DiffEntry c = DiffEntry.add(PATH_H, cId);
+		DiffEntry d = DiffEntry.delete(PATH_B, dId);
+
+		List<DiffEntry> changes = Arrays.asList(a, b, c, d);
+		PathFilter filter = PathFilter.create(PATH_A);
+		List<DiffEntry> entries = frd.compute(changes, filter);
+		assertEquals("Unexpected entries in: " + entries, 1, entries.size());
+		assertSame(a, entries.get(0));
+	}
+
+	@Test
+	public void testNoRenames_multipleFilters() throws Exception {
+		ObjectId aId = blob("");
+		ObjectId bId = blob("blah1");
+		ObjectId cId = blob("");
+		ObjectId dId = blob("blah2");
+
+		DiffEntry a = DiffEntry.add(PATH_A, aId);
+		DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+		DiffEntry c = DiffEntry.add(PATH_H, cId);
+		DiffEntry d = DiffEntry.delete(PATH_B, dId);
+
+		List<DiffEntry> changes = Arrays.asList(a, b, c, d);
+		List<PathFilter> filters = Arrays.asList(PathFilter.create(PATH_A),
+				PathFilter.create(PATH_H));
+		List<DiffEntry> entries = frd.compute(changes, filters);
+		assertEquals("Unexpected entries in: " + entries, 2, entries.size());
+		assertSame(a, entries.get(0));
+		assertSame(c, entries.get(1));
+	}
+}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
index 5edb60c..ad560e3 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
@@ -18,31 +18,20 @@
 import java.util.Arrays;
 import java.util.List;
 
-import org.eclipse.jgit.diff.DiffEntry.ChangeType;
-import org.eclipse.jgit.junit.RepositoryTestCase;
-import org.eclipse.jgit.junit.TestRepository;
 import org.eclipse.jgit.lib.AbbreviatedObjectId;
 import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectId;
-import org.eclipse.jgit.lib.Repository;
 import org.junit.Before;
 import org.junit.Test;
 
-public class RenameDetectorTest extends RepositoryTestCase {
-	private static final String PATH_A = "src/A";
-	private static final String PATH_B = "src/B";
-	private static final String PATH_H = "src/H";
-	private static final String PATH_Q = "src/Q";
+public class RenameDetectorTest extends AbstractRenameDetectionTestCase {
 
 	private RenameDetector rd;
 
-	private TestRepository<Repository> testDb;
-
 	@Override
 	@Before
 	public void setUp() throws Exception {
 		super.setUp();
-		testDb = new TestRepository<>(db);
 		rd = new RenameDetector(db);
 	}
 
@@ -675,62 +664,4 @@
 		assertSame(c, entries.get(2));
 		assertSame(d, entries.get(3));
 	}
-
-	private ObjectId blob(String content) throws Exception {
-		return testDb.blob(content).copy();
-	}
-
-	private static void assertRename(DiffEntry o, DiffEntry n, int score,
-			DiffEntry rename) {
-		assertEquals(ChangeType.RENAME, rename.getChangeType());
-
-		assertEquals(o.getOldPath(), rename.getOldPath());
-		assertEquals(n.getNewPath(), rename.getNewPath());
-
-		assertEquals(o.getOldMode(), rename.getOldMode());
-		assertEquals(n.getNewMode(), rename.getNewMode());
-
-		assertEquals(o.getOldId(), rename.getOldId());
-		assertEquals(n.getNewId(), rename.getNewId());
-
-		assertEquals(score, rename.getScore());
-	}
-
-	private static void assertCopy(DiffEntry o, DiffEntry n, int score,
-			DiffEntry copy) {
-		assertEquals(ChangeType.COPY, copy.getChangeType());
-
-		assertEquals(o.getOldPath(), copy.getOldPath());
-		assertEquals(n.getNewPath(), copy.getNewPath());
-
-		assertEquals(o.getOldMode(), copy.getOldMode());
-		assertEquals(n.getNewMode(), copy.getNewMode());
-
-		assertEquals(o.getOldId(), copy.getOldId());
-		assertEquals(n.getNewId(), copy.getNewId());
-
-		assertEquals(score, copy.getScore());
-	}
-
-	private static void assertAdd(String newName, ObjectId newId,
-			FileMode newMode, DiffEntry add) {
-		assertEquals(DiffEntry.DEV_NULL, add.oldPath);
-		assertEquals(DiffEntry.A_ZERO, add.oldId);
-		assertEquals(FileMode.MISSING, add.oldMode);
-		assertEquals(ChangeType.ADD, add.changeType);
-		assertEquals(newName, add.newPath);
-		assertEquals(AbbreviatedObjectId.fromObjectId(newId), add.newId);
-		assertEquals(newMode, add.newMode);
-	}
-
-	private static void assertDelete(String oldName, ObjectId oldId,
-			FileMode oldMode, DiffEntry delete) {
-		assertEquals(DiffEntry.DEV_NULL, delete.newPath);
-		assertEquals(DiffEntry.A_ZERO, delete.newId);
-		assertEquals(FileMode.MISSING, delete.newMode);
-		assertEquals(ChangeType.DELETE, delete.changeType);
-		assertEquals(oldName, delete.oldPath);
-		assertEquals(AbbreviatedObjectId.fromObjectId(oldId), delete.oldId);
-		assertEquals(oldMode, delete.oldMode);
-	}
 }
diff --git a/org.eclipse.jgit/META-INF/MANIFEST.MF b/org.eclipse.jgit/META-INF/MANIFEST.MF
index 6d43eba..b919140 100644
--- a/org.eclipse.jgit/META-INF/MANIFEST.MF
+++ b/org.eclipse.jgit/META-INF/MANIFEST.MF
@@ -70,6 +70,8 @@
  org.eclipse.jgit.internal;version="6.3.0";
   x-friends:="org.eclipse.jgit.test,
    org.eclipse.jgit.http.test",
+ org.eclipse.jgit.internal.diff;version="6.3.0";
+  x-friends:="org.eclipse.jgit.test",
  org.eclipse.jgit.internal.diffmergetool;version="6.3.0";
   x-friends:="org.eclipse.jgit.test,
    org.eclipse.jgit.pgm.test,
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/blame/BlameGenerator.java b/org.eclipse.jgit/src/org/eclipse/jgit/blame/BlameGenerator.java
index 10d7752..77967df 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/blame/BlameGenerator.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/blame/BlameGenerator.java
@@ -41,6 +41,7 @@
 import org.eclipse.jgit.dircache.DirCacheIterator;
 import org.eclipse.jgit.errors.NoWorkTreeException;
 import org.eclipse.jgit.internal.JGitText;
+import org.eclipse.jgit.internal.diff.FilteredRenameDetector;
 import org.eclipse.jgit.lib.AnyObjectId;
 import org.eclipse.jgit.lib.Constants;
 import org.eclipse.jgit.lib.MutableObjectId;
@@ -1109,9 +1110,10 @@
 
 		treeWalk.setFilter(TreeFilter.ANY_DIFF);
 		treeWalk.reset(parent.getTree(), commit.getTree());
-		renameDetector.reset();
-		renameDetector.addAll(DiffEntry.scan(treeWalk));
-		for (DiffEntry ent : renameDetector.compute()) {
+		List<DiffEntry> diffs = DiffEntry.scan(treeWalk);
+		FilteredRenameDetector filteredRenameDetector = new FilteredRenameDetector(
+				renameDetector);
+		for (DiffEntry ent : filteredRenameDetector.compute(diffs, path)) {
 			if (isRename(ent) && ent.getNewPath().equals(path.getPath()))
 				return ent;
 		}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/diff/FilteredRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/diff/FilteredRenameDetector.java
new file mode 100644
index 0000000..d65624f
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/diff/FilteredRenameDetector.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2022, Simeon Andreev and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+package org.eclipse.jgit.internal.diff;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.eclipse.jgit.diff.DiffEntry;
+import org.eclipse.jgit.diff.DiffEntry.ChangeType;
+import org.eclipse.jgit.diff.RenameDetector;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.treewalk.filter.PathFilter;
+
+/**
+ * Provides rename detection in special cases such as blame, where only a subset
+ * of the renames detected by {@link RenameDetector} is of interest.
+ */
+public class FilteredRenameDetector {
+
+	private final RenameDetector renameDetector;
+
+	/**
+	 * @param repository
+	 *            The repository in which to check for renames.
+	 */
+	public FilteredRenameDetector(Repository repository) {
+		this(new RenameDetector(repository));
+	}
+
+	/**
+	 * @param renameDetector
+	 *            The {@link RenameDetector} to use when checking for renames.
+	 */
+	public FilteredRenameDetector(RenameDetector renameDetector) {
+		this.renameDetector = renameDetector;
+	}
+
+	/**
+	 * @param diffs
+	 *            The set of changes to check.
+	 * @param pathFilter
+	 *            Filter out changes that didn't affect this path.
+	 * @return The subset of changes that affect only the filtered path.
+	 * @throws IOException
+	 */
+	public List<DiffEntry> compute(List<DiffEntry> diffs,
+			PathFilter pathFilter) throws IOException {
+		return compute(diffs, Arrays.asList(pathFilter));
+	}
+
+	/**
+	 * Tries to avoid computation overhead in {@link RenameDetector#compute()}
+	 * by filtering diffs related to the path filters only.
+	 * <p>
+	 * Note: current implementation only optimizes added or removed diffs,
+	 * further optimization is possible.
+	 *
+	 * @param changes
+	 *            The set of changes to check.
+	 * @param pathFilters
+	 *            Filter out changes that didn't affect these paths.
+	 * @return The subset of changes that affect only the filtered paths.
+	 * @throws IOException
+	 * @see RenameDetector#compute()
+	 */
+	public List<DiffEntry> compute(List<DiffEntry> changes,
+			List<PathFilter> pathFilters) throws IOException {
+
+		if (pathFilters == null) {
+			throw new IllegalArgumentException("Must specify path filters"); //$NON-NLS-1$
+		}
+
+		Set<String> paths = new HashSet<>(pathFilters.size());
+		for (PathFilter pathFilter : pathFilters) {
+			paths.add(pathFilter.getPath());
+		}
+
+		List<DiffEntry> filtered = new ArrayList<>();
+
+		// For new path: skip ADD's that don't match given paths
+		for (DiffEntry diff : changes) {
+			ChangeType changeType = diff.getChangeType();
+			if (changeType != ChangeType.ADD
+					|| paths.contains(diff.getNewPath())) {
+				filtered.add(diff);
+			}
+		}
+
+		renameDetector.reset();
+		renameDetector.addAll(filtered);
+		List<DiffEntry> sourceChanges = renameDetector.compute();
+
+		filtered.clear();
+
+		// For old path: skip DELETE's that don't match given paths
+		for (DiffEntry diff : changes) {
+			ChangeType changeType = diff.getChangeType();
+			if (changeType != ChangeType.DELETE
+					|| paths.contains(diff.getOldPath())) {
+				filtered.add(diff);
+			}
+		}
+
+		renameDetector.reset();
+		renameDetector.addAll(filtered);
+		List<DiffEntry> targetChanges = renameDetector.compute();
+
+		List<DiffEntry> result = new ArrayList<>();
+
+		for (DiffEntry sourceChange : sourceChanges) {
+			if (paths.contains(sourceChange.getNewPath())) {
+				result.add(sourceChange);
+			}
+		}
+		for (DiffEntry targetChange : targetChanges) {
+			if (paths.contains(targetChange.getOldPath())) {
+				result.add(targetChange);
+			}
+		}
+
+		renameDetector.reset();
+		return result;
+	}
+}