Add a sample ref-updated hook to repack geometrically Make a simple "least effort" attempt to run geometric repacking after every known update which may have written git objects, all while avoiding overloading a server with too much repacking work. Change-Id: I7b67bedb47e3e27923932c2a959bda32c930a84a Release-Notes: Added sample ref-updated hook to repack repositories geometrically
diff --git a/contrib/hooks/ref-updated_repack-geometric.sh b/contrib/hooks/ref-updated_repack-geometric.sh new file mode 100755 index 0000000..0fe640d --- /dev/null +++ b/contrib/hooks/ref-updated_repack-geometric.sh
@@ -0,0 +1,117 @@ +#!/bin/bash -e +# +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Best run from the Gerrit ref-updated hook +# + +# Make a simple "least effort" attempt to run geometric repacking after every +# known update which may have written git objects, all while avoiding overloading +# a server with too much repacking work. + +# The least effort avoids running more than one git repack on the same repo at a +# time, or while a git gc is already running on a repo (by using .git/gc.pid as +# a lock). To avoid overloading the server, it also avoids running more than 3 +# git repacks total across all repos. If any of these conditions would be violated, +# this script simply does nothing and exits. The intention is to avoid doing too +# much work during a burst, assuming that future updates will likely be good enough +# to service the repos which were missed. +# +# Since this is an event based approach to repository maintenance, it is +# recommended that another time based GC approach, perhaps a more significant and +# costly one, repacking refs, creating bitmaps... be used in parallel with this +# script. This simple policy of "least effort" should keep most repos from +# degrading much even with very infrequent time based GCs. +# +# Since this script uses gc.pid to lock the repo against other git gcs, it means +# that this script could potentially starve any time based gc maintenance from +# happening on busy repos. It is therefore advisable for any such time based gc +# jobs to spin for a while attempting to run if the job cannot acquire the gc.pid +# lock to help ensure that time based gc also gets a chance to run. +# +# In order to be able to skip repacking for each update happening during repacking, +# this script returns immediately after starting repacking in the background. If +# this script were to instead block during repacking, it would simply delay +# repacking for those updates instead of having a consolidating effect. That being +# said, a smarter script might consider tracking that some updates happened after +# repacking started and ensure that it gets repacked once again (while still +# consolidating many updates), but that would likely no longer qualify as least +# effort. +# + +[ -z "$GERRIT_SITE" ] && { echo "ERROR: GERRIT_SITE not set" ; exit 1 ; } +[ -z "$GIT_DIR" ] && { echo "ERROR: GIT_DIR not set" ; exit 2 ; } + +# ---- Generic ---- + +debug() { true || echo "---- debug: $@" ; } + +cleanup() { [ -n "$GC_LOCK" ] && rm -- "$GCLOCK" ; } + +exec_locked() { # <lock> <cmd> [<args>...] + local lock=$1 rtn=0 + shift + if ( set -o noclobber ; echo $$ > "$lock" ) > /dev/null 2>&1 ; then + GC_LOCK=$lock + debug "locked $lock" + "$@" || rtn=$? + rm -- "$lock" && unset GC_LOCK + debug "unlocked $lock" + return $rtn + fi + debug "already locked $lock" + return 20 +} + +exec_acquired() { # <lock> <max> <cmd> [<args>...] + local semaphore=$1 max=$2 rtn=0 slot lock + shift 2 + mkdir -p -- "$semaphore" + for slot in $(seq "$max") ; do + lock="$semaphore/$slot" + touch -- "$lock" + exec 3<> "$lock" + if flock -n 3 ; then + debug "acquired semaphore $slot" + "$@" || rtn=$? + flock -o 3 + debug "released semaphore $slot" + return $rtn + fi + done + debug "semaphore loaded $semaphore" + return 30 +} + +# ---- Policy ---- + +gc_lock() { # <cmd> [<args>...] + exec_locked "$LOCK" "$@" +} + +gc_runner() { # <cmd> [<args>...] + exec_acquired "$SEMAPHORE" "$MAX_RUNNERS" "$@" +} + +trap cleanup EXIT + +MAX_RUNNERS=3 +SEMAPHORE=$GERRIT_SITE/logs/git-geometric.semaphore +LOCK=$GIT_DIR/gc.pid + +gc_runner gc_lock git repack -n -d --no-write-bitmap-index --geometric=2 & +