| /* |
| * Copyright (C) 2011, Google Inc. |
| * and other copyright owners as documented in the project's IP log. |
| * |
| * This program and the accompanying materials are made available |
| * under the terms of the Eclipse Distribution License v1.0 which |
| * accompanies this distribution, is reproduced below, and is |
| * available at http://www.eclipse.org/org/documents/edl-v10.php |
| * |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or |
| * without modification, are permitted provided that the following |
| * conditions are met: |
| * |
| * - Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer in the documentation and/or other materials provided |
| * with the distribution. |
| * |
| * - Neither the name of the Eclipse Foundation, Inc. nor the |
| * names of its contributors may be used to endorse or promote |
| * products derived from this software without specific prior |
| * written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
| * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
| * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
| * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| package org.eclipse.jgit.internal.storage.dfs; |
| |
| import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT; |
| import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC; |
| import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; |
| import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; |
| import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE; |
| import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.eclipse.jgit.errors.IncorrectObjectTypeException; |
| import org.eclipse.jgit.internal.JGitText; |
| import org.eclipse.jgit.internal.storage.file.PackIndex; |
| import org.eclipse.jgit.internal.storage.file.PackReverseIndex; |
| import org.eclipse.jgit.internal.storage.pack.PackWriter; |
| import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor; |
| import org.eclipse.jgit.internal.storage.reftable.ReftableConfig; |
| import org.eclipse.jgit.lib.AnyObjectId; |
| import org.eclipse.jgit.lib.NullProgressMonitor; |
| import org.eclipse.jgit.lib.ObjectId; |
| import org.eclipse.jgit.lib.ObjectIdSet; |
| import org.eclipse.jgit.lib.ProgressMonitor; |
| import org.eclipse.jgit.revwalk.RevFlag; |
| import org.eclipse.jgit.revwalk.RevObject; |
| import org.eclipse.jgit.revwalk.RevWalk; |
| import org.eclipse.jgit.storage.pack.PackConfig; |
| import org.eclipse.jgit.storage.pack.PackStatistics; |
| import org.eclipse.jgit.util.BlockList; |
| import org.eclipse.jgit.util.io.CountingOutputStream; |
| |
| /** |
| * Combine several pack files into one pack. |
| * <p> |
| * The compactor combines several pack files together by including all objects |
| * contained in each pack file into the same output pack. If an object appears |
| * multiple times, it is only included once in the result. Because the new pack |
| * is constructed by enumerating the indexes of the source packs, it is quicker |
| * than doing a full repack of the repository, however the result is not nearly |
| * as space efficient as new delta compression is disabled. |
| * <p> |
| * This method is suitable for quickly combining several packs together after |
| * receiving a number of small fetch or push operations into a repository, |
| * allowing the system to maintain reasonable read performance without expending |
| * a lot of time repacking the entire repository. |
| */ |
| public class DfsPackCompactor { |
| private final DfsRepository repo; |
| private final List<DfsPackFile> srcPacks; |
| private final List<DfsReftable> srcReftables; |
| private final List<ObjectIdSet> exclude; |
| |
| private PackStatistics newStats; |
| private DfsPackDescription outDesc; |
| |
| private int autoAddSize; |
| private ReftableConfig reftableConfig; |
| |
| private RevWalk rw; |
| private RevFlag added; |
| private RevFlag isBase; |
| |
| /** |
| * Initialize a pack compactor. |
| * |
| * @param repository |
| * repository objects to be packed will be read from. |
| */ |
| public DfsPackCompactor(DfsRepository repository) { |
| repo = repository; |
| autoAddSize = 5 * 1024 * 1024; // 5 MiB |
| srcPacks = new ArrayList<>(); |
| srcReftables = new ArrayList<>(); |
| exclude = new ArrayList<>(4); |
| } |
| |
| /** |
| * Set configuration to write a reftable. |
| * |
| * @param cfg |
| * configuration to write a reftable. Reftable compacting is |
| * disabled (default) when {@code cfg} is {@code null}. |
| * @return {@code this} |
| */ |
| public DfsPackCompactor setReftableConfig(ReftableConfig cfg) { |
| reftableConfig = cfg; |
| return this; |
| } |
| |
| /** |
| * Add a pack to be compacted. |
| * <p> |
| * All of the objects in this pack will be copied into the resulting pack. |
| * The resulting pack will order objects according to the source pack's own |
| * description ordering (which is based on creation date), and then by the |
| * order the objects appear in the source pack. |
| * |
| * @param pack |
| * a pack to combine into the resulting pack. |
| * @return {@code this} |
| */ |
| public DfsPackCompactor add(DfsPackFile pack) { |
| srcPacks.add(pack); |
| return this; |
| } |
| |
| /** |
| * Add a reftable to be compacted. |
| * |
| * @param table |
| * a reftable to combine. |
| * @return {@code this} |
| */ |
| public DfsPackCompactor add(DfsReftable table) { |
| srcReftables.add(table); |
| return this; |
| } |
| |
| /** |
| * Automatically select pack and reftables to be included, and add them. |
| * <p> |
| * Packs are selected based on size, smaller packs get included while bigger |
| * ones are omitted. |
| * |
| * @return {@code this} |
| * @throws java.io.IOException |
| * existing packs cannot be read. |
| */ |
| public DfsPackCompactor autoAdd() throws IOException { |
| DfsObjDatabase objdb = repo.getObjectDatabase(); |
| for (DfsPackFile pack : objdb.getPacks()) { |
| DfsPackDescription d = pack.getPackDescription(); |
| if (d.getFileSize(PACK) < autoAddSize) |
| add(pack); |
| else |
| exclude(pack); |
| } |
| |
| if (reftableConfig != null) { |
| for (DfsReftable table : objdb.getReftables()) { |
| DfsPackDescription d = table.getPackDescription(); |
| if (d.getPackSource() != GC |
| && d.getFileSize(REFTABLE) < autoAddSize) { |
| add(table); |
| } |
| } |
| } |
| return this; |
| } |
| |
| /** |
| * Exclude objects from the compacted pack. |
| * |
| * @param set |
| * objects to not include. |
| * @return {@code this}. |
| */ |
| public DfsPackCompactor exclude(ObjectIdSet set) { |
| exclude.add(set); |
| return this; |
| } |
| |
| /** |
| * Exclude objects from the compacted pack. |
| * |
| * @param pack |
| * objects to not include. |
| * @return {@code this}. |
| * @throws java.io.IOException |
| * pack index cannot be loaded. |
| */ |
| public DfsPackCompactor exclude(DfsPackFile pack) throws IOException { |
| final PackIndex idx; |
| try (DfsReader ctx = (DfsReader) repo.newObjectReader()) { |
| idx = pack.getPackIndex(ctx); |
| } |
| return exclude(idx); |
| } |
| |
| /** |
| * Compact the pack files together. |
| * |
| * @param pm |
| * progress monitor to receive updates on as packing may take a |
| * while, depending on the size of the repository. |
| * @throws java.io.IOException |
| * the packs cannot be compacted. |
| */ |
| public void compact(ProgressMonitor pm) throws IOException { |
| if (pm == null) { |
| pm = NullProgressMonitor.INSTANCE; |
| } |
| |
| DfsObjDatabase objdb = repo.getObjectDatabase(); |
| try (DfsReader ctx = objdb.newReader()) { |
| if (reftableConfig != null && !srcReftables.isEmpty()) { |
| compactReftables(ctx); |
| } |
| compactPacks(ctx, pm); |
| |
| List<DfsPackDescription> commit = getNewPacks(); |
| Collection<DfsPackDescription> remove = toPrune(); |
| if (!commit.isEmpty() || !remove.isEmpty()) { |
| objdb.commitPack(commit, remove); |
| } |
| } finally { |
| rw = null; |
| } |
| } |
| |
| private void compactPacks(DfsReader ctx, ProgressMonitor pm) |
| throws IOException, IncorrectObjectTypeException { |
| DfsObjDatabase objdb = repo.getObjectDatabase(); |
| PackConfig pc = new PackConfig(repo); |
| pc.setIndexVersion(2); |
| pc.setDeltaCompress(false); |
| pc.setReuseDeltas(true); |
| pc.setReuseObjects(true); |
| |
| try (PackWriter pw = new PackWriter(pc, ctx)) { |
| pw.setDeltaBaseAsOffset(true); |
| pw.setReuseDeltaCommits(false); |
| |
| addObjectsToPack(pw, ctx, pm); |
| if (pw.getObjectCount() == 0) { |
| return; |
| } |
| |
| boolean rollback = true; |
| initOutDesc(objdb); |
| try { |
| writePack(objdb, outDesc, pw, pm); |
| writeIndex(objdb, outDesc, pw); |
| |
| PackStatistics stats = pw.getStatistics(); |
| |
| outDesc.setPackStats(stats); |
| newStats = stats; |
| rollback = false; |
| } finally { |
| if (rollback) { |
| objdb.rollbackPack(Collections.singletonList(outDesc)); |
| } |
| } |
| } |
| } |
| |
| private long estimatePackSize() { |
| // Every pack file contains 12 bytes of header and 20 bytes of trailer. |
| // Include the final pack file header and trailer size here and ignore |
| // the same from individual pack files. |
| long size = 32; |
| for (DfsPackFile pack : srcPacks) { |
| size += pack.getPackDescription().getFileSize(PACK) - 32; |
| } |
| return size; |
| } |
| |
| private void compactReftables(DfsReader ctx) throws IOException { |
| DfsObjDatabase objdb = repo.getObjectDatabase(); |
| Collections.sort(srcReftables, objdb.reftableComparator()); |
| |
| try (ReftableStack stack = ReftableStack.open(ctx, srcReftables)) { |
| initOutDesc(objdb); |
| ReftableCompactor compact = new ReftableCompactor(); |
| compact.addAll(stack.readers()); |
| compact.setIncludeDeletes(true); |
| writeReftable(objdb, outDesc, compact); |
| } |
| } |
| |
| private void initOutDesc(DfsObjDatabase objdb) throws IOException { |
| if (outDesc == null) { |
| outDesc = objdb.newPack(COMPACT, estimatePackSize()); |
| } |
| } |
| |
| /** |
| * Get all of the source packs that fed into this compaction. |
| * |
| * @return all of the source packs that fed into this compaction. |
| */ |
| public Collection<DfsPackDescription> getSourcePacks() { |
| Set<DfsPackDescription> src = new HashSet<>(); |
| for (DfsPackFile pack : srcPacks) { |
| src.add(pack.getPackDescription()); |
| } |
| for (DfsReftable table : srcReftables) { |
| src.add(table.getPackDescription()); |
| } |
| return src; |
| } |
| |
| /** |
| * Get new packs created by this compaction. |
| * |
| * @return new packs created by this compaction. |
| */ |
| public List<DfsPackDescription> getNewPacks() { |
| return outDesc != null |
| ? Collections.singletonList(outDesc) |
| : Collections.emptyList(); |
| } |
| |
| /** |
| * Get statistics corresponding to the {@link #getNewPacks()}. |
| * May be null if statistics are not available. |
| * |
| * @return statistics corresponding to the {@link #getNewPacks()}. |
| * |
| */ |
| public List<PackStatistics> getNewPackStatistics() { |
| return outDesc != null |
| ? Collections.singletonList(newStats) |
| : Collections.emptyList(); |
| } |
| |
| private Collection<DfsPackDescription> toPrune() { |
| Set<DfsPackDescription> packs = new HashSet<>(); |
| for (DfsPackFile pack : srcPacks) { |
| packs.add(pack.getPackDescription()); |
| } |
| |
| Set<DfsPackDescription> reftables = new HashSet<>(); |
| for (DfsReftable table : srcReftables) { |
| reftables.add(table.getPackDescription()); |
| } |
| |
| for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) { |
| DfsPackDescription d = i.next(); |
| if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) { |
| i.remove(); |
| } |
| } |
| |
| for (Iterator<DfsPackDescription> i = reftables.iterator(); |
| i.hasNext();) { |
| DfsPackDescription d = i.next(); |
| if (d.hasFileExt(PACK) && !packs.contains(d)) { |
| i.remove(); |
| } |
| } |
| |
| Set<DfsPackDescription> toPrune = new HashSet<>(); |
| toPrune.addAll(packs); |
| toPrune.addAll(reftables); |
| return toPrune; |
| } |
| |
| private void addObjectsToPack(PackWriter pw, DfsReader ctx, |
| ProgressMonitor pm) throws IOException, |
| IncorrectObjectTypeException { |
| // Sort packs by description ordering, this places newer packs before |
| // older packs, allowing the PackWriter to be handed newer objects |
| // first and older objects last. |
| Collections.sort( |
| srcPacks, |
| Comparator.comparing( |
| DfsPackFile::getPackDescription, |
| DfsPackDescription.objectLookupComparator())); |
| |
| rw = new RevWalk(ctx); |
| added = rw.newFlag("ADDED"); //$NON-NLS-1$ |
| isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$ |
| List<RevObject> baseObjects = new BlockList<>(); |
| |
| pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN); |
| for (DfsPackFile src : srcPacks) { |
| List<ObjectIdWithOffset> want = toInclude(src, ctx); |
| if (want.isEmpty()) |
| continue; |
| |
| PackReverseIndex rev = src.getReverseIdx(ctx); |
| DfsObjectRepresentation rep = new DfsObjectRepresentation(src); |
| for (ObjectIdWithOffset id : want) { |
| int type = src.getObjectType(ctx, id.offset); |
| RevObject obj = rw.lookupAny(id, type); |
| if (obj.has(added)) |
| continue; |
| |
| pm.update(1); |
| pw.addObject(obj); |
| obj.add(added); |
| |
| src.representation(rep, id.offset, ctx, rev); |
| if (rep.getFormat() != PACK_DELTA) |
| continue; |
| |
| RevObject base = rw.lookupAny(rep.getDeltaBase(), type); |
| if (!base.has(added) && !base.has(isBase)) { |
| baseObjects.add(base); |
| base.add(isBase); |
| } |
| } |
| } |
| for (RevObject obj : baseObjects) { |
| if (!obj.has(added)) { |
| pm.update(1); |
| pw.addObject(obj); |
| obj.add(added); |
| } |
| } |
| pm.endTask(); |
| } |
| |
| private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx) |
| throws IOException { |
| PackIndex srcIdx = src.getPackIndex(ctx); |
| List<ObjectIdWithOffset> want = new BlockList<>( |
| (int) srcIdx.getObjectCount()); |
| SCAN: for (PackIndex.MutableEntry ent : srcIdx) { |
| ObjectId id = ent.toObjectId(); |
| RevObject obj = rw.lookupOrNull(id); |
| if (obj != null && (obj.has(added) || obj.has(isBase))) |
| continue; |
| for (ObjectIdSet e : exclude) |
| if (e.contains(id)) |
| continue SCAN; |
| want.add(new ObjectIdWithOffset(id, ent.getOffset())); |
| } |
| Collections.sort(want, new Comparator<ObjectIdWithOffset>() { |
| @Override |
| public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) { |
| return Long.signum(a.offset - b.offset); |
| } |
| }); |
| return want; |
| } |
| |
| private static void writePack(DfsObjDatabase objdb, |
| DfsPackDescription pack, |
| PackWriter pw, ProgressMonitor pm) throws IOException { |
| try (DfsOutputStream out = objdb.writeFile(pack, PACK)) { |
| pw.writePack(pm, pm, out); |
| pack.addFileExt(PACK); |
| pack.setBlockSize(PACK, out.blockSize()); |
| } |
| } |
| |
| private static void writeIndex(DfsObjDatabase objdb, |
| DfsPackDescription pack, |
| PackWriter pw) throws IOException { |
| try (DfsOutputStream out = objdb.writeFile(pack, INDEX)) { |
| CountingOutputStream cnt = new CountingOutputStream(out); |
| pw.writeIndex(cnt); |
| pack.addFileExt(INDEX); |
| pack.setFileSize(INDEX, cnt.getCount()); |
| pack.setBlockSize(INDEX, out.blockSize()); |
| pack.setIndexVersion(pw.getIndexVersion()); |
| } |
| } |
| |
| private void writeReftable(DfsObjDatabase objdb, DfsPackDescription pack, |
| ReftableCompactor compact) throws IOException { |
| try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) { |
| compact.setConfig(configureReftable(reftableConfig, out)); |
| compact.compact(out); |
| pack.addFileExt(REFTABLE); |
| pack.setReftableStats(compact.getStats()); |
| } |
| } |
| |
| static ReftableConfig configureReftable(ReftableConfig cfg, |
| DfsOutputStream out) { |
| int bs = out.blockSize(); |
| if (bs > 0) { |
| cfg = new ReftableConfig(cfg); |
| cfg.setRefBlockSize(bs); |
| cfg.setAlignBlocks(true); |
| } |
| return cfg; |
| } |
| |
| private static class ObjectIdWithOffset extends ObjectId { |
| final long offset; |
| |
| ObjectIdWithOffset(AnyObjectId id, long ofs) { |
| super(id); |
| offset = ofs; |
| } |
| } |
| } |