reftable: drop code for truncated reads The reftable format is a block based format, but allows for variably sized blocks. This obviously happens for reflog blocks (which are zlib compressed), but is also accepted for index blocks: In the spec, this is motivated as To achieve constant O(1) disk seeks for lookups the index must be a single level, which is permitted to exceed the file's configured block size, but not the format's max block size of 15.99 MiB. Hence, when parsing a block, one cannot be sure of its exact size: after reading a default-size block (eg. 4kb), the block header may state that the block is in fact larger. Before, the code would mark the block as `truncated`, noting // Its OK during sequential scan for an index block to have been // partially read and be truncated in-memory. This happens when // the index block is larger than the file's blockSize. Caller // will break out of its scan loop once it sees the blockType. This looks like either * a remnant of never-implemented functionality. There is no reason to ever sequentially scan an index block. * alluding to sequential scan of the data blocks before the index blocks (eg. scanning refs, which ends when we find the first ref index block, and we can then ignore the index block). This comment is followed by code that populates the restartTbl/restartCnt fields relative to the (possibly truncated) buffer. If the buffer is truncated, this essentially reads garbage, leading to OOB array access when using the index block. Fix this by dropping the truncated logic and issuing a second read if the first read was short. Add a test. We have never observed this failure scenario at Google. We use 64kb blocksize, which requires us to need fewer index entries. The reftable spec mentions an Android repo of size 36M. With 64kb blocks, that's just 562 index entries. Even with historical growth, we are long from requiring an index whose size exceeds a single block. When adding the analogous test for seeking refs, there was no failure. This points to another possibility which is that the code tries to avoid writing large index blocks for refs. I did not investigate further which one it is. Fixes https://bugs.eclipse.org/bugs/show_bug.cgi?id=576250 Bug: 576250 Change-Id: I41ec21fac9e526ef57b3d6fb57b988bd353ee338 Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>

commit: 5f8c48413623ea4d1685063582c74a216207ef51 [log] [tgz]
author: Han-Wen Nienhuys <hanwen@google.com> Tue Sep 28 20:05:03 2021 +0200
committer: Han-Wen Nienhuys <hanwen@google.com> Tue Sep 28 20:35:02 2021 +0200
tree: 7ad4a9862b47965c25e8033f2f762e38e8098e0d
parent: b4782d74fdb8e730e06b2fb6c4285fcb4e38439f [diff]
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java
index ec8b759..229c753 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java

@@ -836,6 +836,12 @@ public void logScan() throws IOException {
 			}
 			assertFalse(lc.next());
 		}
+
+		for (Ref exp : refs) {
+ 			try (LogCursor lc = t.seekLog(exp.getName())) {
+				assertTrue("has " + exp.getName(), lc.next());
+			}
+		}
 	}
 
 	@Test

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java
index b66751b..6ae7e45 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/BlockReader.java

@@ -92,7 +92,6 @@
 class BlockReader {
 	private byte blockType;
 	private long endPosition;
-	private boolean truncated;
 
 	private byte[] buf;
 	private int bufLen;
@@ -112,10 +111,6 @@ byte type() {
 		return blockType;
 	}
 
-	boolean truncated() {
-		return truncated;
-	}
-
 	long endPosition() {
 		return endPosition;
 	}
@@ -331,16 +326,8 @@ private void parseBlockStart(BlockSource src, long pos, int fileBlockSize)
 			// Log blocks must be inflated after the header.
 			long deflatedSize = inflateBuf(src, pos, blockLen, fileBlockSize);
 			endPosition = pos + 4 + deflatedSize;
-		}
-		if (bufLen < blockLen) {
-			if (blockType != INDEX_BLOCK_TYPE) {
-				throw invalidBlock();
-			}
-			// Its OK during sequential scan for an index block to have been
-			// partially read and be truncated in-memory. This happens when
-			// the index block is larger than the file's blockSize. Caller
-			// will break out of its scan loop once it sees the blockType.
-			truncated = true;
+		} else if (bufLen < blockLen) {
+			readBlockIntoBuf(src, pos, blockLen);
 		} else if (bufLen > blockLen) {
 			bufLen = blockLen;
 		}
@@ -405,7 +392,7 @@ private void setupEmptyFileBlock() {
 	}
 
 	void verifyIndex() throws IOException {
-		if (blockType != INDEX_BLOCK_TYPE || truncated) {
+		if (blockType != INDEX_BLOCK_TYPE) {
 			throw invalidBlock();
 		}
 	}

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java
index fef5fa5..3b912ea 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java

@@ -468,7 +468,7 @@ private BlockReader readBlock(long pos, long end) throws IOException {
 
 		BlockReader b = new BlockReader();
 		b.readBlock(src, pos, sz);
-		if (b.type() == INDEX_BLOCK_TYPE && !b.truncated()) {
+		if (b.type() == INDEX_BLOCK_TYPE) {
 			if (indexCache == null) {
 				indexCache = new LongMap<>();
 			}
commit	5f8c48413623ea4d1685063582c74a216207ef51	[log] [tgz]
author	Han-Wen Nienhuys <hanwen@google.com>	Tue Sep 28 20:05:03 2021 +0200
committer	Han-Wen Nienhuys <hanwen@google.com>	Tue Sep 28 20:35:02 2021 +0200
tree	7ad4a9862b47965c25e8033f2f762e38e8098e0d
parent	b4782d74fdb8e730e06b2fb6c4285fcb4e38439f [diff]