blob: 328643688d29f581566ea4887e473b19e5477039 [file] [log] [blame]
/*
* Copyright (C) 2022, Google LLC and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.file;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.transport.PackedObjectInfo;
import org.eclipse.jgit.util.NB;
/**
* Write an object index in the output stream
*/
public abstract class PackObjectSizeIndexWriter {
private static final int MAX_24BITS_UINT = 0xffffff;
private static final PackObjectSizeIndexWriter NULL_WRITER = new PackObjectSizeIndexWriter() {
@Override
public void write(List<? extends PackedObjectInfo> objs) {
// Do nothing
}
};
/** Magic constant for the object size index file */
protected static final byte[] HEADER = { -1, 's', 'i', 'z' };
/**
* Returns a writer for the latest index version
*
* @param os
* Output stream where to write the index
* @param minSize
* objects strictly smaller than this size won't be added to the
* index. Negative size won't write AT ALL. Other sizes could write
* an empty index.
* @return the index writer
*/
public static PackObjectSizeIndexWriter createWriter(OutputStream os,
int minSize) {
if (minSize < 0) {
return NULL_WRITER;
}
return new PackObjectSizeWriterV1(os, minSize);
}
/**
* Add the objects to the index
*
* @param objs
* objects in the pack, in sha1 order. Their position in the list
* matches their position in the primary index.
* @throws IOException
* problem writing to the stream
*/
public abstract void write(List<? extends PackedObjectInfo> objs)
throws IOException;
/**
* Object size index v1.
*
* Store position (in the main index) to size as parallel arrays.
*
* <p>
* Positions in the main index fit well in unsigned 24 bits (16M) for most
* repositories, but some outliers have even more objects, so we need to
* store also 32 bits positions.
*
* <p>
* Sizes are stored as a first array parallel to positions. If a size
* doesn't fit in an element of that array, then we encode there a position
* on the next-size array. This "overflow" array doesn't have entries for
* all positions.
*
* <pre>
*
* positions [10, 500, 1000, 1001]
* sizes (32bits) [15MB, -1, 6MB, -2]
* ___/ ______/
* / /
* sizes (64 bits) [3GB, 6GB]
* </pre>
*
* <p>
* For sizes we use 32 bits as the first level and 64 for the rare objects
* over 2GB.
*
* <p>
* A 24/32/64 bits hierarchy of arrays saves space if we have a lot of small
* objects, but wastes space if we have only big ones. The min size to index
* is controlled by conf and in principle we want to index only rather big
* objects (e.g. > 10MB). We could support more dynamics read/write of sizes
* (e.g. 24 only if the threshold will include many of those objects) but it
* complicates a lot code and spec. If needed it could go for a v2 of the
* protocol.
*
* <p>
* Format:
* <ul>
* <li>A header with the magic number (4 bytes)
* <li>The index version (1 byte)
* <li>The minimum object size (4 bytes)
* <li>Total count of objects indexed (C, 4 bytes)
* </ul>
* (if count == 0, stop here)
* <p>
* Blocks of
* <ul>
* <li>Size per entry in bits (1 byte, either 24 (0x18) or 32 (0x20))
* <li>Count of entries (4 bytes) (c, as a signed int)
* <li>positions encoded in s bytes each (i.e s*c bytes)
*
* <li>0 (as a "size-per-entry = 0", marking end of the section)
*
* <li>32 bit sizes (C * 4 bytes). Negative size means
* nextLevel[abs(size)-1]
* <li>Count of 64 bit sizes (s64) (or 0 if no more indirections)
* <li>64 bit sizes (s64 * 8 bytes)
* <li>0 (end)
* </ul>
*/
static class PackObjectSizeWriterV1 extends PackObjectSizeIndexWriter {
private final OutputStream os;
private final int minObjSize;
private final byte[] intBuffer = new byte[4];
PackObjectSizeWriterV1(OutputStream os, int minSize) {
this.os = new BufferedOutputStream(os);
this.minObjSize = minSize;
}
@Override
public void write(List<? extends PackedObjectInfo> allObjects)
throws IOException {
os.write(HEADER);
writeUInt8(1); // Version
writeInt32(minObjSize);
PackedObjectStats stats = countIndexableObjects(allObjects);
int[] indexablePositions = findIndexablePositions(allObjects,
stats.indexableObjs);
writeInt32(indexablePositions.length); // Total # of objects
if (indexablePositions.length == 0) {
os.flush();
return;
}
// Positions that fit in 3 bytes
if (stats.pos24Bits > 0) {
writeUInt8(24);
writeInt32(stats.pos24Bits);
applyToRange(indexablePositions, 0, stats.pos24Bits,
this::writeInt24);
}
// Positions that fit in 4 bytes
// We only use 31 bits due to sign,
// but that covers 2 billion objs
if (stats.pos31Bits > 0) {
writeUInt8(32);
writeInt32(stats.pos31Bits);
applyToRange(indexablePositions, stats.pos24Bits,
stats.pos24Bits + stats.pos31Bits, this::writeInt32);
}
writeUInt8(0);
writeSizes(allObjects, indexablePositions, stats.sizeOver2GB);
os.flush();
}
private void writeUInt8(int i) throws IOException {
if (i > 255) {
throw new IllegalStateException(
JGitText.get().numberDoesntFit);
}
NB.encodeInt32(intBuffer, 0, i);
os.write(intBuffer, 3, 1);
}
private void writeInt24(int i) throws IOException {
NB.encodeInt24(intBuffer, 1, i);
os.write(intBuffer, 1, 3);
}
private void writeInt32(int i) throws IOException {
NB.encodeInt32(intBuffer, 0, i);
os.write(intBuffer);
}
private void writeSizes(List<? extends PackedObjectInfo> allObjects,
int[] indexablePositions, int objsBiggerThan2Gb)
throws IOException {
if (indexablePositions.length == 0) {
writeInt32(0);
return;
}
byte[] sizes64bits = new byte[8 * objsBiggerThan2Gb];
int s64 = 0;
for (int i = 0; i < indexablePositions.length; i++) {
PackedObjectInfo info = allObjects.get(indexablePositions[i]);
if (info.getFullSize() < Integer.MAX_VALUE) {
writeInt32((int) info.getFullSize());
} else {
// Size needs more than 32 bits. Store -1 * offset in the
// next table as size.
writeInt32(-1 * (s64 + 1));
NB.encodeInt64(sizes64bits, s64 * 8, info.getFullSize());
s64++;
}
}
if (objsBiggerThan2Gb > 0) {
writeInt32(objsBiggerThan2Gb);
os.write(sizes64bits);
}
writeInt32(0);
}
private int[] findIndexablePositions(
List<? extends PackedObjectInfo> allObjects,
int indexableObjs) {
int[] positions = new int[indexableObjs];
int positionIdx = 0;
for (int i = 0; i < allObjects.size(); i++) {
PackedObjectInfo o = allObjects.get(i);
if (!shouldIndex(o)) {
continue;
}
positions[positionIdx++] = i;
}
return positions;
}
private PackedObjectStats countIndexableObjects(
List<? extends PackedObjectInfo> objs) {
PackedObjectStats stats = new PackedObjectStats();
for (int i = 0; i < objs.size(); i++) {
PackedObjectInfo o = objs.get(i);
if (!shouldIndex(o)) {
continue;
}
stats.indexableObjs++;
if (o.getFullSize() > Integer.MAX_VALUE) {
stats.sizeOver2GB++;
}
if (i <= MAX_24BITS_UINT) {
stats.pos24Bits++;
} else {
stats.pos31Bits++;
// i is a positive int, cannot be bigger than this
}
}
return stats;
}
private boolean shouldIndex(PackedObjectInfo o) {
return (o.getType() == Constants.OBJ_BLOB)
&& (o.getFullSize() >= minObjSize);
}
private static class PackedObjectStats {
int indexableObjs;
int pos24Bits;
int pos31Bits;
int sizeOver2GB;
}
@FunctionalInterface
interface IntEncoder {
void encode(int i) throws IOException;
}
private static void applyToRange(int[] allPositions, int start, int end,
IntEncoder encoder) throws IOException {
for (int i = start; i < end; i++) {
encoder.encode(allPositions[i]);
}
}
}
}