AddCommand: ability to switch off renormalization
JGit's AddCommand always renormalizes tracked files. C git does so only
on git add --renormalize. Especially for git add . and the JGit
equivalent git.add().addFilepattern(".").call() this can make a big
difference if there are many files, or large files.
Add a "renormalize" option to AddCommand. To maintain compatibility with
existing uses, this option is "true" by default, and the behavior of
AddCommand is as it has always been in JGit.
If set to "false", use an IndexDiffFilter (in addition to a path filter,
if any). This skips any unchanged files (that are not racily clean) from
content checks. Note that changes in CRLF settings or in filters will be
ignored for such files if renormalize == false.
Add the "--renormalize" option to the Add command in the JGit command
line program. For the command-line program, the default is as in C git:
renormalize is off by default and enabled only if the option is given.
Note that --renormalize implies --update in the command line program, as
in C git. In AddCommand, the two settings are independent.
Additionally, avoid opening input streams unnecessarily in
WorkingTreeIterator.getEntryContentLength() and fix some bogus
indentation.
Add a simple test that adds 1000 files of 10kB in 10 directories twice
and that fails if the second invocation (without any changes) with
renormalize=false is not significantly faster.
Locally, I observe for that second invocation
* git.add().addFilepattern(".").call() ~660ms
* git.add().addFilepattern(".").setRenormalize(false).call() ~16ms
Bug: 494323
Change-Id: I30f9d518563fa55d7058a48c27c425f3b60aeb4c
Signed-off-by: Thomas Wolf <twolf@apache.org>diff --git a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties
index 7b81af5..9e82c82 100644
--- a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties
+++ b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties
@@ -255,6 +255,7 @@
untrackedFiles=Untracked files:
updating=Updating {0}..{1}
usage_Abbrev=Instead of using the default number of hexadecimal digits (which will vary according to the number of objects in the repository with a default of 7) of the abbreviated object name, use <n> digits, or as many digits as needed to form a unique object name. An <n> of 0 will suppress long format, only showing the closest tag.
+usage_addRenormalize=Apply the "clean" process freshly to tracked files to forcibly add them again to the index. This implies -u.
usage_Aggressive=This option will cause gc to more aggressively optimize the repository at the expense of taking much more time
usage_AlwaysFallback=Show uniquely abbreviated commit object as fallback
usage_bareClone=Make a bare Git repository. That is, instead of creating [DIRECTORY] and placing the administrative files in [DIRECTORY]/.git, make the [DIRECTORY] itself the $GIT_DIR.
@@ -452,7 +453,7 @@
usage_runLfsStore=Run LFS Store in a given directory
usage_S3NoSslVerify=Skip verification of Amazon server certificate and hostname
usage_setTheGitRepositoryToOperateOn=set the git repository to operate on
-usage_shallowExclude=Deepen or shorten the history of a shallow repository to exclude commits reachable from a specified remote branch or tag.
+usage_shallowExclude=Deepen or shorten the history of a shallow repository to exclude commits reachable from a specified remote branch or tag.
usage_shallowSince=Deepen or shorten the history of a shallow repository to include all reachable commits after <date>.
usage_show=Display one commit
usage_showRefNamesMatchingCommits=Show ref names matching commits
diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Add.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Add.java
index 460f246..ff0b55d 100644
--- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Add.java
+++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Add.java
@@ -22,6 +22,9 @@
@Command(common = true, usage = "usage_addFileContentsToTheIndex")
class Add extends TextBuiltin {
+ @Option(name = "--renormalize", usage = "usage_addRenormalize")
+ private boolean renormalize = false;
+
@Option(name = "--update", aliases = { "-u" }, usage = "usage_onlyMatchAgainstAlreadyTrackedFiles")
private boolean update = false;
@@ -33,9 +36,13 @@ class Add extends TextBuiltin {
protected void run() throws Exception {
try (Git git = new Git(db)) {
AddCommand addCmd = git.add();
- addCmd.setUpdate(update);
- for (String p : filepatterns)
+ if (renormalize) {
+ update = true;
+ }
+ addCmd.setUpdate(update).setRenormalize(renormalize);
+ for (String p : filepatterns) {
addCmd.addFilepattern(p);
+ }
addCmd.call();
} catch (GitAPIException e) {
throw die(e.getMessage(), e);
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/AddCommandTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/AddCommandTest.java
index 57661a7..db2d5d1 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/AddCommandTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/AddCommandTest.java
@@ -17,12 +17,16 @@
import static org.junit.Assert.fail;
import static org.junit.Assume.assumeTrue;
+import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.OutputStream;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.util.Set;
+import java.util.concurrent.TimeUnit;
import org.eclipse.jgit.api.ResetCommand.ResetType;
import org.eclipse.jgit.api.errors.FilterFailedException;
@@ -825,7 +829,7 @@ public void testAddIgnoredFile() throws Exception {
}
@Test
- public void testAddWholeRepo() throws Exception {
+ public void testAddWholeRepo() throws Exception {
FileUtils.mkdir(new File(db.getWorkTree(), "sub"));
File file = new File(db.getWorkTree(), "sub/a.txt");
FileUtils.createNewFile(file);
@@ -848,6 +852,72 @@ public void testAddWholeRepo() throws Exception {
}
}
+ @Test
+ public void testAddAllNoRenormalize() throws Exception {
+ final int nOfFiles = 1000;
+ final int filesPerDir = nOfFiles / 10;
+ final int fileSizeInBytes = 10_000;
+ assertTrue(nOfFiles > 0);
+ assertTrue(filesPerDir > 0);
+ File dir = null;
+ File lastFile = null;
+ for (int i = 0; i < nOfFiles; i++) {
+ if (i % filesPerDir == 0) {
+ dir = new File(db.getWorkTree(), "dir" + (i / filesPerDir));
+ FileUtils.mkdir(dir);
+ }
+ lastFile = new File(dir, "file" + i);
+ try (OutputStream out = new BufferedOutputStream(
+ new FileOutputStream(lastFile))) {
+ for (int b = 0; b < fileSizeInBytes; b++) {
+ out.write('a' + (b % 26));
+ if (((b + 1) % 70) == 0) {
+ out.write('\n');
+ }
+ }
+ }
+ }
+ // Help null pointer analysis.
+ assert lastFile != null;
+ // Wait a bit. If entries are "racily clean", we'll recompute
+ // hashes from the disk files, and then the second add is also slow.
+ // We want to test the normal case.
+ fsTick(lastFile);
+ try (Git git = new Git(db)) {
+ long start = System.nanoTime();
+ git.add().addFilepattern(".").call();
+ long initialElapsed = System.nanoTime() - start;
+ assertEquals("Unexpected number on index entries", nOfFiles,
+ db.readDirCache().getEntryCount());
+ start = System.nanoTime();
+ git.add().addFilepattern(".").setRenormalize(false).call();
+ long secondElapsed = System.nanoTime() - start;
+ assertEquals("Unexpected number on index entries", nOfFiles,
+ db.readDirCache().getEntryCount());
+ // Fail the test if the second add all was not significantly faster.
+ // A factor of 4 is rather generous. The speed-up depends on the
+ // file system and OS caching and is hard to predict.
+ assertTrue(
+ "Second add all was too slow; initial took "
+ + TimeUnit.NANOSECONDS.toMillis(initialElapsed)
+ + ", second took "
+ + TimeUnit.NANOSECONDS.toMillis(secondElapsed),
+ secondElapsed * 4 <= initialElapsed);
+ // Change one file. The index should be updated even if
+ // renormalize==false. It doesn't matter what kind of change we do.
+ final String newData = "Hello";
+ Files.writeString(lastFile.toPath(), newData);
+ git.add().addFilepattern(".").setRenormalize(false).call();
+ DirCache dc = db.readDirCache();
+ DirCacheEntry e = dc.getEntry(lastFile.getParentFile().getName()
+ + '/' + lastFile.getName());
+ String blob = new String(db
+ .open(e.getObjectId(), Constants.OBJ_BLOB).getCachedBytes(),
+ UTF_8);
+ assertEquals("Unexpected index content", newData, blob);
+ }
+ }
+
// the same three cases as in testAddWithParameterUpdate
// file a exists in workdir and in index -> added
// file b exists not in workdir but in index -> unchanged
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/FilterCommandsTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/FilterCommandsTest.java
index 89d31c3..0513da2 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/FilterCommandsTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/FilterCommandsTest.java
@@ -100,8 +100,7 @@ public void tearDown() throws Exception {
}
@Test
- public void testBuiltinCleanFilter()
- throws IOException, GitAPIException {
+ public void testBuiltinCleanFilter() throws Exception {
String builtinCommandName = "jgit://builtin/test/clean";
FilterCommandRegistry.register(builtinCommandName,
new TestCommandFactory('c'));
@@ -113,28 +112,40 @@ public void testBuiltinCleanFilter()
git.add().addFilepattern(".gitattributes").call();
git.commit().setMessage("add filter").call();
- writeTrashFile("Test.txt", "Hello again");
+ File testFile = writeTrashFile("Test.txt", "Hello again");
+ // Wait a little bit to ensure that the call with setRenormalize(false)
+ // below doesn't consider the file "racily clean".
+ fsTick(testFile);
git.add().addFilepattern("Test.txt").call();
assertEquals(
- "[.gitattributes, mode:100644, content:*.txt filter=test][Test.txt, mode:100644, content:cHceclclcoc cacgcacicn]",
+ "[.gitattributes, mode:100644, content:*.txt filter=test]"
+ + "[Test.txt, mode:100644, content:cHceclclcoc cacgcacicn]",
indexState(CONTENT));
writeTrashFile("Test.bin", "Hello again");
git.add().addFilepattern("Test.bin").call();
assertEquals(
- "[.gitattributes, mode:100644, content:*.txt filter=test][Test.bin, mode:100644, content:Hello again][Test.txt, mode:100644, content:cHceclclcoc cacgcacicn]",
+ "[.gitattributes, mode:100644, content:*.txt filter=test]"
+ + "[Test.bin, mode:100644, content:Hello again]"
+ + "[Test.txt, mode:100644, content:cHceclclcoc cacgcacicn]",
indexState(CONTENT));
config.setString("filter", "test", "clean", null);
config.save();
+ git.add().addFilepattern("Test.txt").setRenormalize(false).call();
+ assertEquals("No index update expected with renormalize==false",
+ "[.gitattributes, mode:100644, content:*.txt filter=test]"
+ + "[Test.bin, mode:100644, content:Hello again]"
+ + "[Test.txt, mode:100644, content:cHceclclcoc cacgcacicn]",
+ indexState(CONTENT));
+
git.add().addFilepattern("Test.txt").call();
- assertEquals(
- "[.gitattributes, mode:100644, content:*.txt filter=test][Test.bin, mode:100644, content:Hello again][Test.txt, mode:100644, content:Hello again]",
+ assertEquals("Index update expected with renormalize==true",
+ "[.gitattributes, mode:100644, content:*.txt filter=test]"
+ + "[Test.bin, mode:100644, content:Hello again]"
+ + "[Test.txt, mode:100644, content:Hello again]",
indexState(CONTENT));
-
- config.setString("filter", "test", "clean", null);
- config.save();
}
@Test
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java b/org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java
index ae75d46..cb32324 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/api/AddCommand.java
@@ -39,7 +39,10 @@
import org.eclipse.jgit.treewalk.NameConflictTreeWalk;
import org.eclipse.jgit.treewalk.TreeWalk.OperationType;
import org.eclipse.jgit.treewalk.WorkingTreeIterator;
+import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
+import org.eclipse.jgit.treewalk.filter.IndexDiffFilter;
import org.eclipse.jgit.treewalk.filter.PathFilterGroup;
+import org.eclipse.jgit.treewalk.filter.TreeFilter;
/**
* A class used to execute a {@code Add} command. It has setters for all
@@ -58,6 +61,10 @@ public class AddCommand extends GitCommand<DirCache> {
private boolean update = false;
+ // This defaults to true because it's what JGit has been doing
+ // traditionally. The C git default would be false.
+ private boolean renormalize = true;
+
/**
* Constructor for AddCommand
*
@@ -127,8 +134,20 @@ public DirCache call() throws GitAPIException, NoFilepatternException {
workingTreeIterator = new FileTreeIterator(repo);
workingTreeIterator.setDirCacheIterator(tw, 0);
tw.addTree(workingTreeIterator);
- if (!addAll)
- tw.setFilter(PathFilterGroup.createFromStrings(filepatterns));
+ TreeFilter pathFilter = null;
+ if (!addAll) {
+ pathFilter = PathFilterGroup.createFromStrings(filepatterns);
+ }
+ if (!renormalize) {
+ if (pathFilter == null) {
+ tw.setFilter(new IndexDiffFilter(0, 1));
+ } else {
+ tw.setFilter(AndTreeFilter.create(new IndexDiffFilter(0, 1),
+ pathFilter));
+ }
+ } else if (pathFilter != null) {
+ tw.setFilter(pathFilter);
+ }
byte[] lastAdded = null;
@@ -260,4 +279,39 @@ public AddCommand setUpdate(boolean update) {
public boolean isUpdate() {
return update;
}
+
+ /**
+ * Defines whether the command will renormalize by re-applying the "clean"
+ * process to tracked files.
+ * <p>
+ * This does not automatically call {@link #setUpdate(boolean)}.
+ * </p>
+ *
+ * @param renormalize
+ * whether to renormalize tracked files
+ * @return {@code this}
+ * @since 6.6
+ */
+ public AddCommand setRenormalize(boolean renormalize) {
+ this.renormalize = renormalize;
+ return this;
+ }
+
+ /**
+ * Tells whether the command will renormalize by re-applying the "clean"
+ * process to tracked files.
+ * <p>
+ * For legacy reasons, this is {@code true} by default.
+ * </p>
+ * <p>
+ * This setting is independent of {@link #isUpdate()}. In C git,
+ * command-line option --renormalize implies --update.
+ * </p>
+ *
+ * @return whether files will be renormalized
+ * @since 6.6
+ */
+ public boolean isRenormalize() {
+ return renormalize;
+ }
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java
index d8a61ec..b5d6610 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java
@@ -399,6 +399,35 @@ public boolean isWorkTree() {
}
}
+ private long possiblyFilteredLength(Entry e, long len) throws IOException {
+ if (getCleanFilterCommand() == null && getEolStreamType(
+ OperationType.CHECKIN_OP) == EolStreamType.DIRECT) {
+ return len;
+ }
+
+ if (len <= MAXIMUM_FILE_SIZE_TO_READ_FULLY) {
+ InputStream is = e.openInputStream();
+ try {
+ ByteBuffer rawbuf = IO.readWholeStream(is, (int) len);
+ rawbuf = filterClean(rawbuf.array(), rawbuf.limit());
+ return rawbuf.limit();
+ } finally {
+ safeClose(is);
+ }
+ }
+
+ if (getCleanFilterCommand() == null && isBinary(e)) {
+ return len;
+ }
+
+ InputStream is = filterClean(e.openInputStream());
+ try {
+ return computeLength(is);
+ } finally {
+ safeClose(is);
+ }
+ }
+
private InputStream possiblyFilteredInputStream(final Entry e,
final InputStream is, final long len)
throws IOException {
@@ -417,11 +446,11 @@ && getEolStreamType(
}
if (getCleanFilterCommand() == null && isBinary(e)) {
- canonLen = len;
- return is;
- }
+ canonLen = len;
+ return is;
+ }
- final InputStream lenIs = filterClean(e.openInputStream());
+ final InputStream lenIs = filterClean(e.openInputStream());
try {
canonLen = computeLength(lenIs);
} finally {
@@ -595,15 +624,11 @@ public long getEntryLength() {
public long getEntryContentLength() throws IOException {
if (canonLen == -1) {
long rawLen = getEntryLength();
- if (rawLen == 0)
+ if (rawLen == 0) {
canonLen = 0;
- InputStream is = current().openInputStream();
- try {
- // canonLen gets updated here
- possiblyFilteredInputStream(current(), is, current()
- .getLength());
- } finally {
- safeClose(is);
+ } else {
+ canonLen = possiblyFilteredLength(current(),
+ current().getLength());
}
}
return canonLen;