blob: 1f1c35ac19ddb59d1e2d9bde446e1f8a266f772f [file] [log] [blame]
/*
* Copyright (C) 2021, Tencent.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.commitgraph;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_LOOKUP;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_LOOKUP_WIDTH;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.COMMIT_DATA_WIDTH;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.COMMIT_GRAPH_MAGIC;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.GRAPH_EXTRA_EDGES_NEEDED;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.GRAPH_LAST_EDGE;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.GRAPH_NO_PARENT;
import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_NOT_COMPUTED;
import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_UNKNOWN;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Stack;
import org.eclipse.jgit.annotations.NonNull;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.io.CancellableDigestOutputStream;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.treewalk.EmptyTreeIterator;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.util.NB;
/**
* Writes a commit-graph formatted file.
*
* @since 6.5
*/
public class CommitGraphWriter {
private static final int COMMIT_GRAPH_VERSION_GENERATED = 1;
private static final int OID_HASH_VERSION = 1;
private static final int GRAPH_FANOUT_SIZE = 4 * 256;
private static final int GENERATION_NUMBER_MAX = 0x3FFFFFFF;
private static final int MAX_CHANGED_PATHS = 512;
private final int hashsz;
private final GraphCommits graphCommits;
private final boolean generateChangedPathFilters;
/**
* Create commit-graph writer for these commits.
*
* @param graphCommits
* the commits which will be writen to the commit-graph.
*/
public CommitGraphWriter(@NonNull GraphCommits graphCommits) {
this(graphCommits, false);
}
/**
* Create commit-graph writer for these commits.
*
* @param graphCommits
* the commits which will be writen to the commit-graph.
* @param generateChangedPathFilters
* whether changed path filters are generated
*/
public CommitGraphWriter(@NonNull GraphCommits graphCommits,
boolean generateChangedPathFilters) {
this.graphCommits = graphCommits;
this.hashsz = OBJECT_ID_LENGTH;
this.generateChangedPathFilters = generateChangedPathFilters;
}
/**
* Write commit-graph to the supplied stream.
*
* @param monitor
* progress monitor to report the number of items written.
* @param commitGraphStream
* output stream of commit-graph data. The stream should be
* buffered by the caller. The caller is responsible for closing
* the stream.
* @return statistics gathered during the run
* @throws IOException
* if an error occurred
*/
public Stats write(@NonNull ProgressMonitor monitor,
@NonNull OutputStream commitGraphStream) throws IOException {
if (graphCommits.size() == 0) {
return Stats.EMPTY;
}
BloomFilterChunks bloomFilterChunks = generateChangedPathFilters
? computeBloomFilterChunks(monitor)
: null;
List<ChunkHeader> chunks = new ArrayList<>();
chunks.addAll(createCoreChunks(hashsz, graphCommits));
chunks.addAll(createBloomFilterChunkHeaders(bloomFilterChunks));
chunks = Collections.unmodifiableList(chunks);
long expectedSize = calculateExpectedSize(chunks);
try (CancellableDigestOutputStream out = new CancellableDigestOutputStream(
monitor, commitGraphStream)) {
writeHeader(out, chunks.size());
writeChunkLookup(out, chunks);
writeChunks(out, chunks);
writeCheckSum(out);
if (expectedSize != out.length()) {
throw new IllegalStateException(String.format(
JGitText.get().commitGraphUnexpectedSize,
Long.valueOf(expectedSize),
Long.valueOf(out.length())));
}
} catch (InterruptedIOException e) {
throw new IOException(JGitText.get().commitGraphWritingCancelled,
e);
}
return Stats.from(bloomFilterChunks);
}
private static List<ChunkHeader> createCoreChunks(int hashsz,
GraphCommits graphCommits) {
List<ChunkHeader> chunks = new ArrayList<>();
chunks.add(new ChunkHeader(CHUNK_ID_OID_FANOUT, GRAPH_FANOUT_SIZE));
chunks.add(new ChunkHeader(CHUNK_ID_OID_LOOKUP,
hashsz * graphCommits.size()));
chunks.add(new ChunkHeader(CHUNK_ID_COMMIT_DATA,
(hashsz + 16) * graphCommits.size()));
if (graphCommits.getExtraEdgeCnt() > 0) {
chunks.add(new ChunkHeader(CHUNK_ID_EXTRA_EDGE_LIST,
graphCommits.getExtraEdgeCnt() * 4));
}
return Collections.unmodifiableList(chunks);
}
private static List<ChunkHeader> createBloomFilterChunkHeaders(
@Nullable BloomFilterChunks bloomFilterChunks) {
List<ChunkHeader> chunks = new ArrayList<>();
if (bloomFilterChunks != null) {
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_INDEX,
bloomFilterChunks.index));
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_DATA,
bloomFilterChunks.data));
}
return Collections.unmodifiableList(chunks);
}
private static long calculateExpectedSize(List<ChunkHeader> chunks) {
int chunkLookup = (chunks.size() + 1) * CHUNK_LOOKUP_WIDTH;
long chunkContent = chunks.stream().mapToLong(c -> c.size).sum();
return /* header */ 8 + chunkLookup + chunkContent + /* CRC */ 20;
}
private void writeHeader(CancellableDigestOutputStream out, int numChunks)
throws IOException {
byte[] headerBuffer = new byte[8];
NB.encodeInt32(headerBuffer, 0, COMMIT_GRAPH_MAGIC);
byte[] buff = { (byte) COMMIT_GRAPH_VERSION_GENERATED,
(byte) OID_HASH_VERSION, (byte) numChunks, (byte) 0 };
System.arraycopy(buff, 0, headerBuffer, 4, 4);
out.write(headerBuffer, 0, 8);
out.flush();
}
private void writeChunkLookup(CancellableDigestOutputStream out,
List<ChunkHeader> chunks) throws IOException {
int numChunks = chunks.size();
long chunkOffset = 8 + (numChunks + 1L) * CHUNK_LOOKUP_WIDTH;
byte[] buffer = new byte[CHUNK_LOOKUP_WIDTH];
for (ChunkHeader chunk : chunks) {
NB.encodeInt32(buffer, 0, chunk.id);
NB.encodeInt64(buffer, 4, chunkOffset);
out.write(buffer);
chunkOffset += chunk.size;
}
NB.encodeInt32(buffer, 0, 0);
NB.encodeInt64(buffer, 4, chunkOffset);
out.write(buffer);
}
private void writeChunks(CancellableDigestOutputStream out,
List<ChunkHeader> chunks) throws IOException {
for (ChunkHeader chunk : chunks) {
int chunkId = chunk.id;
switch (chunkId) {
case CHUNK_ID_OID_FANOUT:
writeFanoutTable(out);
break;
case CHUNK_ID_OID_LOOKUP:
writeOidLookUp(out);
break;
case CHUNK_ID_COMMIT_DATA:
writeCommitData(out);
break;
case CHUNK_ID_EXTRA_EDGE_LIST:
writeExtraEdges(out);
break;
case CHUNK_ID_BLOOM_FILTER_INDEX:
case CHUNK_ID_BLOOM_FILTER_DATA:
if (!chunk.data.isPresent()) {
throw new IllegalStateException(
"data for this chunk must be precomputed"); //$NON-NLS-1$
}
chunk.data.get().writeTo(out);
break;
default:
throw new IllegalStateException(
"Don't know how to write chunk " + chunkId); //$NON-NLS-1$
}
}
}
private void writeCheckSum(CancellableDigestOutputStream out)
throws IOException {
out.write(out.getDigest());
out.flush();
}
private void writeFanoutTable(CancellableDigestOutputStream out)
throws IOException {
byte[] tmp = new byte[4];
int[] fanout = new int[256];
for (RevCommit c : graphCommits) {
fanout[c.getFirstByte() & 0xff]++;
}
for (int i = 1; i < fanout.length; i++) {
fanout[i] += fanout[i - 1];
}
for (int n : fanout) {
NB.encodeInt32(tmp, 0, n);
out.write(tmp, 0, 4);
}
}
private void writeOidLookUp(CancellableDigestOutputStream out)
throws IOException {
byte[] tmp = new byte[4 + hashsz];
for (RevCommit c : graphCommits) {
c.copyRawTo(tmp, 0);
out.write(tmp, 0, hashsz);
}
}
private void writeCommitData(CancellableDigestOutputStream out)
throws IOException {
ProgressMonitor monitor = out.getWriteMonitor();
int[] generations = computeGenerationNumbers(monitor);
monitor.beginTask(JGitText.get().writingOutCommitGraph,
graphCommits.size());
int num = 0;
byte[] tmp = new byte[hashsz + COMMIT_DATA_WIDTH];
int i = 0;
for (RevCommit commit : graphCommits) {
int edgeValue;
int[] packedDate = new int[2];
ObjectId treeId = commit.getTree();
treeId.copyRawTo(tmp, 0);
RevCommit[] parents = commit.getParents();
if (parents.length == 0) {
edgeValue = GRAPH_NO_PARENT;
} else {
RevCommit parent = parents[0];
edgeValue = graphCommits.getOidPosition(parent);
}
NB.encodeInt32(tmp, hashsz, edgeValue);
if (parents.length == 1) {
edgeValue = GRAPH_NO_PARENT;
} else if (parents.length == 2) {
RevCommit parent = parents[1];
edgeValue = graphCommits.getOidPosition(parent);
} else if (parents.length > 2) {
edgeValue = GRAPH_EXTRA_EDGES_NEEDED | num;
num += parents.length - 1;
}
NB.encodeInt32(tmp, hashsz + 4, edgeValue);
packedDate[0] = 0; // commitTime is an int in JGit now
packedDate[0] |= generations[i] << 2;
packedDate[1] = commit.getCommitTime();
NB.encodeInt32(tmp, hashsz + 8, packedDate[0]);
NB.encodeInt32(tmp, hashsz + 12, packedDate[1]);
out.write(tmp);
monitor.update(1);
i++;
}
monitor.endTask();
}
private int[] computeGenerationNumbers(ProgressMonitor monitor)
throws MissingObjectException {
int[] generations = new int[graphCommits.size()];
monitor.beginTask(JGitText.get().computingCommitGeneration,
graphCommits.size());
for (RevCommit cmit : graphCommits) {
monitor.update(1);
int generation = generations[graphCommits.getOidPosition(cmit)];
if (generation != COMMIT_GENERATION_NOT_COMPUTED
&& generation != COMMIT_GENERATION_UNKNOWN) {
continue;
}
Stack<RevCommit> commitStack = new Stack<>();
commitStack.push(cmit);
while (!commitStack.empty()) {
int maxGeneration = 0;
boolean allParentComputed = true;
RevCommit current = commitStack.peek();
RevCommit parent;
for (int i = 0; i < current.getParentCount(); i++) {
parent = current.getParent(i);
generation = generations[graphCommits
.getOidPosition(parent)];
if (generation == COMMIT_GENERATION_NOT_COMPUTED
|| generation == COMMIT_GENERATION_UNKNOWN) {
allParentComputed = false;
commitStack.push(parent);
break;
} else if (generation > maxGeneration) {
maxGeneration = generation;
}
}
if (allParentComputed) {
RevCommit commit = commitStack.pop();
generation = maxGeneration + 1;
if (generation > GENERATION_NUMBER_MAX) {
generation = GENERATION_NUMBER_MAX;
}
generations[graphCommits
.getOidPosition(commit)] = generation;
}
}
}
monitor.endTask();
return generations;
}
private static Optional<HashSet<ByteBuffer>> computeBloomFilterPaths(
ObjectReader or, RevCommit cmit) throws MissingObjectException,
IncorrectObjectTypeException, CorruptObjectException, IOException {
HashSet<ByteBuffer> paths = new HashSet<>();
try (TreeWalk walk = new TreeWalk(null, or)) {
walk.setRecursive(true);
if (cmit.getParentCount() == 0) {
walk.addTree(new EmptyTreeIterator());
} else {
walk.addTree(cmit.getParent(0).getTree());
}
walk.addTree(cmit.getTree());
while (walk.next()) {
if (walk.idEqual(0, 1)) {
continue;
}
byte[] rawPath = walk.getRawPath();
paths.add(ByteBuffer.wrap(rawPath));
for (int i = 0; i < rawPath.length; i++) {
if (rawPath[i] == '/') {
paths.add(ByteBuffer.wrap(rawPath, 0, i));
}
if (paths.size() > MAX_CHANGED_PATHS) {
return Optional.empty();
}
}
}
}
return Optional.of(paths);
}
private BloomFilterChunks computeBloomFilterChunks(ProgressMonitor monitor)
throws MissingObjectException, IncorrectObjectTypeException,
CorruptObjectException, IOException {
ByteArrayOutputStream index = new ByteArrayOutputStream();
ByteArrayOutputStream data = new ByteArrayOutputStream();
long filtersReused = 0;
long filtersComputed =0;
// Allocate scratch buffer for converting integers into
// big-endian bytes.
byte[] scratch = new byte[4];
NB.encodeInt32(scratch, 0, 1); // version 1
data.write(scratch);
NB.encodeInt32(scratch, 0, ChangedPathFilter.PATH_HASH_COUNT);
data.write(scratch);
NB.encodeInt32(scratch, 0, ChangedPathFilter.BITS_PER_ENTRY);
data.write(scratch);
int dataHeaderSize = data.size();
try (RevWalk rw = new RevWalk(graphCommits.getObjectReader())) {
monitor.beginTask(JGitText.get().computingPathBloomFilters,
graphCommits.size());
for (RevCommit cmit : graphCommits) {
ChangedPathFilter cpf = cmit.getChangedPathFilter(rw);
if (cpf != null) {
filtersReused++;
} else {
filtersComputed++;
Optional<HashSet<ByteBuffer>> paths = computeBloomFilterPaths(
graphCommits.getObjectReader(), cmit);
if (paths.isEmpty()) {
cpf = ChangedPathFilter.FULL;
} else {
cpf = ChangedPathFilter.fromPaths(paths.get());
}
}
cpf.writeTo(data);
NB.encodeInt32(scratch, 0, data.size() - dataHeaderSize);
index.write(scratch);
monitor.update(1);
}
monitor.endTask();
return new BloomFilterChunks(index, data, filtersReused, filtersComputed);
}
}
private void writeExtraEdges(CancellableDigestOutputStream out)
throws IOException {
byte[] tmp = new byte[4];
for (RevCommit commit : graphCommits) {
RevCommit[] parents = commit.getParents();
if (parents.length > 2) {
int edgeValue;
for (int n = 1; n < parents.length; n++) {
RevCommit parent = parents[n];
edgeValue = graphCommits.getOidPosition(parent);
if (n == parents.length - 1) {
edgeValue |= GRAPH_LAST_EDGE;
}
NB.encodeInt32(tmp, 0, edgeValue);
out.write(tmp);
}
}
}
}
private static class ChunkHeader {
final int id;
final long size;
final Optional<ByteArrayOutputStream> data;
public ChunkHeader(int id, long size) {
this.id = id;
this.size = size;
this.data = Optional.empty();
}
ChunkHeader(int id, ByteArrayOutputStream data) {
this.id = id;
this.size = data.size();
this.data = Optional.of(data);
}
}
private static class BloomFilterChunks {
final ByteArrayOutputStream index;
final ByteArrayOutputStream data;
final long filtersReused;
final long filtersComputed;
BloomFilterChunks(ByteArrayOutputStream index,
ByteArrayOutputStream data,
long filtersReused,
long filtersComputed) {
this.index = index;
this.data = data;
this.filtersReused = filtersReused;
this.filtersComputed = filtersComputed;
}
}
/**
* Statistics collected during a single commit graph write.
*/
public static class Stats {
static final Stats EMPTY = new Stats();
static final Stats from(@Nullable BloomFilterChunks bloomFilterChunks) {
Stats stats = new Stats();
if (bloomFilterChunks != null) {
stats.changedPathFiltersComputed = bloomFilterChunks.filtersComputed;
stats.changedPathFiltersReused = bloomFilterChunks.filtersReused;
}
return stats;
}
private Stats() {}
private long changedPathFiltersReused = 0;
private long changedPathFiltersComputed = 0;
/**
* Returns the number of existing changed path filters that were reused
* when writing, for statistical purposes.
*
* @return count of changed path filters
*/
public long getChangedPathFiltersReused() {
return changedPathFiltersReused;
}
/**
* Returns the number of changed path filters that were computed from
* scratch, for statistical purposes.
*
* @return count of changed path filters
*/
public long getChangedPathFiltersComputed() {
return changedPathFiltersComputed;
}
}
}