blob: fd671e2a8e469ffca41306d61d7c8a30ac3dd26a [file] [log] [blame]
# Copyright (C) 2024 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import logging
import os
from datetime import datetime, timedelta
from glob import glob
from pathlib import Path
from .config import GitConfigReader
from . import repo
LOG = logging.getLogger(__name__)
AGGRESSIVE_FLAG = "--aggressive"
MAX_AGE_GC_LOCK = timedelta(hours=12)
MAX_AGE_EMPTY_REF_DIRS = timedelta(hours=1)
MAX_AGE_INCOMING_PACKS = timedelta(days=1)
MAX_LOOSE_REF_COUNT = 10
PACK_PATH = "objects/pack"
PRESERVED_PACK_PATH = f"{PACK_PATH}/preserved"
class Util:
@staticmethod
def is_file_stale(file, max_age):
return datetime.fromtimestamp(os.stat(file).st_mtime) + max_age < datetime.now()
class GCStep(abc.ABC):
@abc.abstractmethod
def run(self, repo_dir):
pass
class GCLockHandlingInitStep(GCStep):
def run(self, repo_dir):
gc_lock_path = os.path.join(repo_dir, "gc.pid")
if os.path.exists(gc_lock_path) and Util.is_file_stale(
gc_lock_path, MAX_AGE_GC_LOCK
):
LOG.warning(
"Pruning stale 'gc.pid' lock file older than %s min: %s",
MAX_AGE_GC_LOCK.min,
gc_lock_path,
)
os.remove(gc_lock_path)
class PreservePacksInitStep(GCStep):
def run(self, repo_dir):
with GitConfigReader(os.path.join(repo_dir, "config")) as config_reader:
is_prune_preserved = config_reader.get("gc", None, "prunepreserved", False)
is_preserve_old_packs = config_reader.get(
"gc", None, "preserveoldpacks", False
)
if is_prune_preserved:
self._prune_preserved(repo_dir)
if is_preserve_old_packs:
self._preserve_packs(repo_dir)
def _prune_preserved(self, repo_dir):
full_preserved_pack_path = os.path.join(repo_dir, PRESERVED_PACK_PATH)
if os.path.exists(full_preserved_pack_path):
LOG.info("Pruning old preserved packs.")
count = 0
for file in os.listdir(full_preserved_pack_path):
if file.endswith(".old-pack") or file.endswith(".old-idx"):
count += 1
full_old_pack_path = os.path.join(full_preserved_pack_path, file)
LOG.debug("Deleting %s", full_old_pack_path)
os.remove(full_old_pack_path)
LOG.info("Done pruning %d old preserved packs.", count)
def _preserve_packs(self, repo_dir):
full_pack_path = os.path.join(repo_dir, PACK_PATH)
full_preserved_pack_path = os.path.join(repo_dir, PRESERVED_PACK_PATH)
if not os.path.exists(full_preserved_pack_path):
os.makedirs(full_preserved_pack_path)
LOG.info("Preserving packs.")
count = 0
for file in os.listdir(full_pack_path):
full_file_path = os.path.join(full_pack_path, file)
filename, ext = os.path.splitext(file)
if (
os.path.isfile(full_file_path)
and filename.startswith("pack-")
and ext in [".pack", ".idx"]
):
LOG.debug("Preserving pack %s", file)
os.link(
os.path.join(full_pack_path, file),
os.path.join(
full_preserved_pack_path,
self._get_preserved_packfile_name(file),
),
)
if ext == ".pack":
count += 1
LOG.info("Preserved %d packs", count)
def _get_preserved_packfile_name(self, file):
filename, ext = os.path.splitext(file)
return f"{filename}.old-{ext[1:]}"
DEFAULT_INIT_STEPS = [GCLockHandlingInitStep(), PreservePacksInitStep()]
class DeleteEmptyRefDirsCleanupStep(GCStep):
def run(self, repo_dir):
refs_path = os.path.join(repo_dir, "refs")
self.to_delete = {}
for dir, dirnames, filenames in os.walk(refs_path, topdown=False):
relative = os.path.relpath(dir, refs_path)
depth = len(relative.split(os.sep))
if (
not self.listdir(dir)
and depth >= 2
and Util.is_file_stale(dir, MAX_AGE_EMPTY_REF_DIRS)
):
LOG.info("Queuing empty ref directory for deletion: %s", dir)
self.to_delete[dir] = None
for d in self.to_delete:
LOG.info("Deleting %s", d)
self.rmdir(d)
def listdir(self, dir):
children = (str(e) for e in Path(dir).iterdir())
return set(children) - self.to_delete.keys()
def rmdir(self, dir):
try:
os.rmdir(dir)
except (FileNotFoundError, OSError) as e:
LOG.warning("Couldn't delete %s: %s", dir, e)
class DeleteStaleIncomingPacksCleanupStep(GCStep):
def run(self, repo_dir):
objects_path = os.path.join(repo_dir, "objects")
for file in glob(os.path.join(objects_path, "incoming_*.pack")):
if Util.is_file_stale(file, MAX_AGE_INCOMING_PACKS):
LOG.warning(
"Pruning stale incoming pack/index file older than %d days: %s",
MAX_AGE_INCOMING_PACKS.days,
file,
)
os.remove(file)
class PackAllRefsAfterStep(GCStep):
def run(self, repo_dir):
loose_ref_count = 0
for _, _, files in os.walk(os.path.join(repo_dir, "refs"), topdown=True):
loose_ref_count += len([file for file in files])
if loose_ref_count > MAX_LOOSE_REF_COUNT:
repo.pack_refs(repo_dir, all=True)
LOG.info("Found %d loose refs -> pack all refs", loose_ref_count)
else:
LOG.info(
"Found less than %d refs -> skipping pack all refs"
% MAX_LOOSE_REF_COUNT
)
DEFAULT_AFTER_STEPS = [
DeleteEmptyRefDirsCleanupStep(),
DeleteStaleIncomingPacksCleanupStep(),
]
class GitGarbageCollectionProvider:
@staticmethod
def get(pack_refs=True, git_config=None):
init_steps = DEFAULT_INIT_STEPS.copy()
after_steps = DEFAULT_AFTER_STEPS.copy()
if pack_refs:
after_steps.append(PackAllRefsAfterStep())
return GitGarbageCollection(init_steps, after_steps, git_config)
class GitGarbageCollection:
def __init__(self, init_steps, after_steps, git_config=None):
self.init_steps = init_steps
self.after_steps = after_steps
self.git_config = git_config
def run(self, repo_dir=None, args=None):
LOG.info("Started gc in %s", repo_dir)
if not repo_dir:
repo_dir = repo.git_dir()
if not os.path.exists(repo_dir) or not os.path.isdir(repo_dir):
LOG.error("Failed: Directory does not exist: %s", repo_dir)
return
for init_step in self.init_steps:
init_step.run(repo_dir)
if self._is_aggressive(repo_dir) and AGGRESSIVE_FLAG not in args:
args.append(AGGRESSIVE_FLAG)
try:
repo.gc(repo_dir, self.git_config, args)
except repo.GitCommandException:
LOG.error("Failed to run gc in %s", repo_dir)
for after_step in self.after_steps:
after_step.run(repo_dir)
LOG.info("Finished gc in %s", repo_dir)
def _is_aggressive(self, project_dir):
if os.path.exists(os.path.join(project_dir, "gc-aggressive")):
LOG.info("Running aggressive gc in %s", project_dir)
return True
elif os.path.exists(os.path.join(project_dir, "gc-aggressive-once")):
LOG.info("Running aggressive gc once in %s", project_dir)
os.remove(os.path.join(project_dir, "gc-aggressive-once"))
return True
return False