blob: 7d8ff3e1a51a25751097babb4e7f01ec4ac1f9b0 [file] [log] [blame]
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.dfs;
import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.GC;
import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.storage.file.PackIndex;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.util.io.CountingOutputStream;
/** Repack and garbage collect a repository. */
public class DfsGarbageCollector {
private final DfsRepository repo;
private final DfsRefDatabase refdb;
private final DfsObjDatabase objdb;
private final List<DfsPackDescription> newPackDesc;
private final List<PackWriter.Statistics> newPackStats;
private final List<DfsPackFile> newPackList;
private DfsReader ctx;
private PackConfig packConfig;
private Map<String, Ref> refsBefore;
private List<DfsPackFile> packsBefore;
private Set<ObjectId> allHeads;
private Set<ObjectId> nonHeads;
/** Sum of object counts in {@link #packsBefore}. */
private long objectsBefore;
/** Sum of object counts iN {@link #newPackDesc}. */
private long objectsPacked;
private Set<ObjectId> tagTargets;
/**
* Initialize a garbage collector.
*
* @param repository
* repository objects to be packed will be read from.
*/
public DfsGarbageCollector(DfsRepository repository) {
repo = repository;
refdb = repo.getRefDatabase();
objdb = repo.getObjectDatabase();
newPackDesc = new ArrayList<DfsPackDescription>(4);
newPackStats = new ArrayList<PackWriter.Statistics>(4);
newPackList = new ArrayList<DfsPackFile>(4);
packConfig = new PackConfig(repo);
packConfig.setIndexVersion(2);
}
/** @return configuration used to generate the new pack file. */
public PackConfig getPackConfig() {
return packConfig;
}
/**
* @param newConfig
* the new configuration to use when creating the pack file.
* @return {@code this}
*/
public DfsGarbageCollector setPackConfig(PackConfig newConfig) {
packConfig = newConfig;
return this;
}
/**
* Create a single new pack file containing all of the live objects.
* <p>
* This method safely decides which packs can be expired after the new pack
* is created by validating the references have not been modified in an
* incompatible way.
*
* @param pm
* progress monitor to receive updates on as packing may take a
* while, depending on the size of the repository.
* @return true if the repack was successful without race conditions. False
* if a race condition was detected and the repack should be run
* again later.
* @throws IOException
* a new pack cannot be created.
*/
public boolean pack(ProgressMonitor pm) throws IOException {
if (pm == null)
pm = NullProgressMonitor.INSTANCE;
if (packConfig.getIndexVersion() != 2)
throw new IllegalStateException("Only index version 2");
ctx = (DfsReader) objdb.newReader();
try {
refdb.clearCache();
objdb.clearCache();
refsBefore = repo.getAllRefs();
packsBefore = Arrays.asList(objdb.getPacks());
if (packsBefore.isEmpty())
return true;
allHeads = new HashSet<ObjectId>();
nonHeads = new HashSet<ObjectId>();
tagTargets = new HashSet<ObjectId>();
for (Ref ref : refsBefore.values()) {
if (ref.isSymbolic() || ref.getObjectId() == null)
continue;
if (isHead(ref))
allHeads.add(ref.getObjectId());
else
nonHeads.add(ref.getObjectId());
if (ref.getPeeledObjectId() != null)
tagTargets.add(ref.getPeeledObjectId());
}
tagTargets.addAll(allHeads);
boolean rollback = true;
try {
packHeads(pm);
packRest(pm);
packGarbage(pm);
objdb.commitPack(newPackDesc, toPrune());
rollback = false;
return true;
} finally {
if (rollback)
objdb.rollbackPack(newPackDesc);
}
} finally {
ctx.release();
}
}
/** @return all of the source packs that fed into this compaction. */
public List<DfsPackDescription> getSourcePacks() {
return toPrune();
}
/** @return new packs created by this compaction. */
public List<DfsPackDescription> getNewPacks() {
return newPackDesc;
}
/** @return statistics corresponding to the {@link #getNewPacks()}. */
public List<PackWriter.Statistics> getNewPackStatistics() {
return newPackStats;
}
private List<DfsPackDescription> toPrune() {
int cnt = packsBefore.size();
List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
for (DfsPackFile pack : packsBefore)
all.add(pack.getPackDescription());
return all;
}
private void packHeads(ProgressMonitor pm) throws IOException {
if (allHeads.isEmpty())
return;
PackWriter pw = newPackWriter();
try {
pw.preparePack(pm, allHeads, Collections.<ObjectId> emptySet());
if (0 < pw.getObjectCount())
writePack(GC, pw, pm).setTips(allHeads);
} finally {
pw.release();
}
}
private void packRest(ProgressMonitor pm) throws IOException {
if (nonHeads.isEmpty() || objectsPacked == getObjectsBefore())
return;
PackWriter pw = newPackWriter();
try {
for (DfsPackFile pack : newPackList)
pw.excludeObjects(pack.getPackIndex(ctx));
pw.preparePack(pm, nonHeads, allHeads);
if (0 < pw.getObjectCount())
writePack(GC, pw, pm);
} finally {
pw.release();
}
}
private void packGarbage(ProgressMonitor pm) throws IOException {
if (objectsPacked == getObjectsBefore())
return;
// TODO(sop) This is ugly. The garbage pack needs to be deleted.
List<PackIndex> newIdx = new ArrayList<PackIndex>(newPackList.size());
for (DfsPackFile pack : newPackList)
newIdx.add(pack.getPackIndex(ctx));
PackWriter pw = newPackWriter();
try {
RevWalk pool = new RevWalk(ctx);
for (DfsPackFile oldPack : packsBefore) {
PackIndex oldIdx = oldPack.getPackIndex(ctx);
pm.beginTask("Finding garbage", (int) oldIdx.getObjectCount());
for (PackIndex.MutableEntry ent : oldIdx) {
pm.update(1);
ObjectId id = ent.toObjectId();
if (pool.lookupOrNull(id) != null || anyIndexHas(newIdx, id))
continue;
int type = oldPack.getObjectType(ctx, ent.getOffset());
pw.addObject(pool.lookupAny(id, type));
}
pm.endTask();
}
if (0 < pw.getObjectCount())
writePack(UNREACHABLE_GARBAGE, pw, pm);
} finally {
pw.release();
}
}
private static boolean anyIndexHas(List<PackIndex> list, AnyObjectId id) {
for (PackIndex idx : list)
if (idx.hasObject(id))
return true;
return false;
}
private static boolean isHead(Ref ref) {
return ref.getName().startsWith(Constants.R_HEADS);
}
private long getObjectsBefore() {
if (objectsBefore == 0) {
for (DfsPackFile p : packsBefore)
objectsBefore += p.getPackDescription().getObjectCount();
}
return objectsBefore;
}
private PackWriter newPackWriter() {
PackWriter pw = new PackWriter(packConfig, ctx);
pw.setDeltaBaseAsOffset(true);
pw.setReuseDeltaCommits(false);
pw.setTagTargets(tagTargets);
return pw;
}
private DfsPackDescription writePack(PackSource source, PackWriter pw,
ProgressMonitor pm) throws IOException {
DfsOutputStream out;
DfsPackDescription pack = repo.getObjectDatabase().newPack(source);
newPackDesc.add(pack);
out = objdb.writePackFile(pack);
try {
pw.writePack(pm, pm, out);
} finally {
out.close();
}
out = objdb.writePackIndex(pack);
try {
CountingOutputStream cnt = new CountingOutputStream(out);
pw.writeIndex(cnt);
pack.setIndexSize(cnt.getCount());
} finally {
out.close();
}
PackWriter.Statistics stats = pw.getStatistics();
pack.setPackStats(stats);
pack.setPackSize(stats.getTotalBytes());
pack.setObjectCount(stats.getTotalObjects());
pack.setDeltaCount(stats.getTotalDeltas());
objectsPacked += stats.getTotalObjects();
newPackStats.add(stats);
newPackList.add(DfsBlockCache.getInstance().getOrCreate(pack, null));
return pack;
}
}