Dan McGregor | 26c0d09 | 2015-08-26 09:03:24 -0600 | [diff] [blame] | 1 | #!/usr/bin/env bash |
Martin Fick | 43a4e17 | 2012-05-31 14:14:56 -0600 | [diff] [blame] | 2 | # Copyright (c) 2012, Code Aurora Forum. All rights reserved. |
| 3 | # |
| 4 | # Redistribution and use in source and binary forms, with or without |
| 5 | # modification, are permitted provided that the following conditions are |
| 6 | # met: |
| 7 | # # Redistributions of source code must retain the above copyright |
| 8 | # notice, this list of conditions and the following disclaimer. |
| 9 | # # Redistributions in binary form must reproduce the above |
| 10 | # copyright notice, this list of conditions and the following |
| 11 | # disclaimer in the documentation and/or other materials provided |
| 12 | # with the distribution. |
| 13 | # # Neither the name of Code Aurora Forum, Inc. nor the names of its |
| 14 | # contributors may be used to endorse or promote products derived |
| 15 | # from this software without specific prior written permission. |
| 16 | # |
| 17 | # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED |
| 18 | # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
| 19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT |
| 20 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS |
| 21 | # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 22 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 23 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
| 24 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
| 25 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
| 26 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN |
| 27 | # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 28 | |
| 29 | usage() { # error_message |
| 30 | |
| 31 | cat <<-EOF |
| 32 | usage: $(basename $0) [-unvt] [--noref] [--nolosse] [-r|--ratio number] |
| 33 | [git gc option...] git.repo |
| 34 | |
| 35 | -u|-h usage/help |
| 36 | -v verbose |
| 37 | -n dry-run don't actually repack anything |
| 38 | -t touch treat repo as if it had been touched |
| 39 | --noref avoid extra ref packing timestamp checking |
| 40 | --noloose do not run just because there are loose object dirs |
| 41 | (repacking may still run if they are referenced) |
| 42 | -r ratio <number> packfile ratio to aim for (default 10) |
| 43 | |
| 44 | git gc option will be passed as args to git gc |
| 45 | |
| 46 | git.repo to run gc against |
| 47 | |
| 48 | Garbage collect using a pseudo logarithmic packfile maintenance |
| 49 | approach. This approach attempts to minimize packfile churn |
| 50 | by keeping several generations of varying sized packfiles around |
| 51 | and only consolidating packfiles (or loose objects) which are |
| 52 | either new packfiles, or packfiles close to the same size as |
| 53 | another packfile. |
| 54 | |
| 55 | An estimate is used to predict when rollups (one consolidation |
| 56 | would cause another consolidation) would occur so that this |
| 57 | rollup can be done all at once via a single repack. This reduces |
| 58 | both the runtime and the pack file churn in rollup cases. |
| 59 | |
| 60 | Approach: plan each consolidation by creating a table like this: |
| 61 | |
| 62 | Id Keep Size Sha1(or consolidation list) Actions(repack down up note) |
| 63 | 1 - 11356 9052edfb7392646cd4e5f362b953675985f01f96 y - - New |
| 64 | 2 - 429088 010904d5c11cd26a79fda91b01ab454d1001b402 y - - New |
| 65 | c1 - 440444 [1,2] - - - |
| 66 | |
| 67 | Id: numbers preceded by a c are estimated "c pack" files |
| 68 | Keep: - none, k private keep, o our keep |
| 69 | Size: in disk blocks (default du output) |
| 70 | Sha1: of packfile, or consolidation list of packfile ids |
| 71 | Actions |
| 72 | repack: - n no, y yes |
| 73 | down: - noop, ^ consolidate with a file above |
| 74 | up: - noop, v consolidate with a file below |
| 75 | note: Human description of script decisions: |
| 76 | New (file is a new packfile) |
| 77 | Consolidate with:<list of packfile ids> |
| 78 | (too far from:<list of packfile ids>) |
| 79 | |
| 80 | On the first pass, always consolidate any new packfiles along |
| 81 | with loose objects and along with any packfiles which are within |
| 82 | the ratio size of their predecessors (note, the list is ordered |
| 83 | by increasing size). After each consolidation, insert a fake |
| 84 | consolidation, or "c pack", to naively represent the size and |
| 85 | ordered positioning of the anticipated new consolidated pack. |
| 86 | Every time a new pack is planned, rescan the list in case the |
| 87 | new "c pack" would cause more consolidation... |
| 88 | |
| 89 | Once the packfiles which need consolidation are determined, the |
| 90 | packfiles which will not be consolidated are marked with a .keep |
| 91 | file, and those which will be consolidated will have their .keep |
| 92 | removed if they have one. Thus, the packfiles with a .keep will |
| 93 | not get repacked. |
| 94 | |
| 95 | Packfile consolidation is determined by the --ratio parameter |
| 96 | (default is 10). This ratio is somewhat of a tradeoff. The |
| 97 | smaller the number, the more packfiles will be kept on average; |
| 98 | this increases disk utilization somewhat. However, a larger |
| 99 | ratio causes greater churn and may increase disk utilization due |
| 100 | to deleted packfiles not being reclaimed since they may still be |
| 101 | kept open by long running applications such as Gerrit. Sane |
| 102 | ratio values are probably between 2 and 10. Since most |
| 103 | consolidations actually end up smaller than the estimated |
| 104 | consolidated packfile size (due to compression), the true ratio |
| 105 | achieved will likely be 1 to 2 greater than the target ratio. |
| 106 | The smaller the target ratio, the greater this discrepancy. |
| 107 | |
| 108 | Finally, attempt to skip garbage collection entirely on untouched |
| 109 | repos. In order to determine if a repo has been touched, use the |
| 110 | timestamp on the script's keep files, if any relevant file/dir |
| 111 | is newer than a keep marker file, assume that the repo has been |
| 112 | touched and gc needs to run. Also assume gc needs to run whenever |
| 113 | there are loose object dirs since they may contain untouched |
| 114 | unreferenced loose objects which need to be pruned (once they |
| 115 | expire). |
| 116 | |
| 117 | In order to allow the keep files to be an effective timestamp |
| 118 | marker to detect relevant changes in a repo since the last run, |
| 119 | all relevant files and directories which may be modified during a |
| 120 | gc run (even during a noop gc run), must have their timestamps |
| 121 | reset to the same time as the keep files or gc will always run |
| 122 | even on untouched repos. The relevant files/dirs are all those |
| 123 | files and directories which garbage collection, object packing, |
| 124 | ref packing and pruning might change during noop actions. |
| 125 | EOF |
| 126 | |
| 127 | [ -n "$1" ] && info "ERROR $1" |
| 128 | |
Brian Harring | c068f33 | 2012-12-23 04:00:29 -0800 | [diff] [blame] | 129 | exit 128 |
Martin Fick | 43a4e17 | 2012-05-31 14:14:56 -0600 | [diff] [blame] | 130 | } |
| 131 | |
| 132 | debug() { [ -n "$SW_V" ] && info "$1" ; } |
| 133 | info() { echo "$1" >&2 ; } |
| 134 | |
| 135 | array_copy() { #v2 # array_src array_dst |
| 136 | local src=$1 dst=$2 |
| 137 | local s i=0 |
| 138 | eval s=\${#$src[@]} |
| 139 | while [ $i -lt $s ] ; do |
| 140 | eval $dst[$i]=\"\${$src[$i]}\" |
| 141 | i=$(($i + 1)) |
| 142 | done |
| 143 | } |
| 144 | |
| 145 | array_equals() { #v2 # array_name [vals...] |
| 146 | local a=$1 ; shift |
| 147 | local s=0 t=() val |
| 148 | array_copy "$a" t |
| 149 | for s in "${!t[@]}" ; do s=$((s+1)) ; done |
| 150 | [ "$s" -ne "$#" ] && return 1 |
| 151 | for val in "${t[@]}" ; do |
| 152 | [ "$val" = "$1" ] || return 2 |
| 153 | shift |
| 154 | done |
| 155 | return 0 |
| 156 | } |
| 157 | |
| 158 | packs_sizes() { # git.repo > "size pack"... |
| 159 | du -s "$1"/objects/pack/pack-$SHA1.pack | sort -n 2> /dev/null |
| 160 | } |
| 161 | |
| 162 | is_ourkeep() { grep -q "$KEEP" "$1" 2> /dev/null ; } # keep |
| 163 | has_ourkeep() { is_ourkeep "$(keep_for "$1")" ; } # pack |
| 164 | has_keep() { [ -f "$(keep_for "$1")" ] ; } # pack |
| 165 | is_repo() { [ -d "$1/objects" ] && [ -d "$1/refs/heads" ] ; } # git.repo |
| 166 | |
| 167 | keep() { # pack # returns true if we added our keep |
| 168 | keep=$(keep_for "$1") |
| 169 | [ -f "$keep" ] && return 1 |
| 170 | echo "$KEEP" > "$keep" |
| 171 | return 0 |
| 172 | } |
| 173 | |
| 174 | keep_for() { # packfile > keepfile |
| 175 | local keep=$(echo "$1" | sed -es'/\.pack$/.keep/') |
| 176 | [ "${keep/.keep}" = "$keep" ] && return 1 |
| 177 | echo "$keep" |
| 178 | } |
| 179 | |
| 180 | idx_for() { # packfile > idxfile |
| 181 | local idx=$(echo "$1" | sed -es'/\.pack$/.idx/') |
| 182 | [ "${idx/.idx}" = "$idx" ] && return 1 |
| 183 | echo "$idx" |
| 184 | } |
| 185 | |
| 186 | # pack_or_keep_file > sha |
| 187 | sha_for() { echo "$1" | sed -es'|\(.*/\)*pack-\([^.]*\)\..*$|\2|' ; } |
| 188 | |
| 189 | private_keeps() { # git.repo -> sets pkeeps |
| 190 | local repo=$1 ary=$2 |
| 191 | local keep keeps=("$repo"/objects/pack/pack-$SHA1.keep) |
| 192 | pkeeps=() |
| 193 | for keep in "${keeps[@]}" ; do |
| 194 | is_ourkeep "$keep" || pkeeps=("${pkeeps[@]}" "$keep") |
| 195 | done |
| 196 | } |
| 197 | |
| 198 | is_tooclose() { [ "$(($1 * $RATIO))" -gt "$2" ] ; } # smaller larger |
| 199 | |
| 200 | unique() { # [args...] > unique_words |
| 201 | local lines=$(while [ $# -gt 0 ] ; do echo "$1" ; shift ; done) |
| 202 | lines=$(echo "$lines" | sort -u) |
| 203 | echo $lines # as words |
| 204 | } |
| 205 | |
| 206 | outfs() { # fs [args...] > argfs... |
| 207 | local fs=$1 ; shift |
| 208 | [ $# -gt 0 ] && echo -n "$1" ; shift |
| 209 | while [ $# -gt 0 ] ; do echo -n "$fs$1" ; shift ; done |
| 210 | } |
| 211 | |
| 212 | sort_list() { # < list > formatted_list |
| 213 | # n has_keep size sha repack down up note |
| 214 | awk '{ note=$8; for(i=8;i<NF;i++) note=note " "$(i+1) |
| 215 | printf("%-5s %s %-14s %-40s %s %s %s %s\n", \ |
| 216 | $1,$2, $3, $4, $5,$6,$7,note)}' |\ |
| 217 | sort -k 3,3n -k 1,1n |
| 218 | } |
| 219 | |
| 220 | is_touched() { # git.repo |
| 221 | local repo=$1 |
| 222 | local loose keep ours newer |
| 223 | [ -n "$SW_T" ] && { debug "$SW_T -> treat as touched" ; return 0 ; } |
| 224 | |
| 225 | if [ -z "$SW_LOOSE" ] ; then |
| 226 | # If there are loose objects, they may need to be pruned, |
| 227 | # run even if nothing has really been touched. |
| 228 | loose=$(find "$repo/objects" -type d \ |
| 229 | -wholename "$repo/objects/[0-9][0-9]" |
| 230 | -print -quit 2>/dev/null) |
| 231 | [ -n "$loose" ] && { info "There are loose object directories" ; return 0 ; } |
| 232 | fi |
| 233 | |
| 234 | # If we don't have a keep, the current packfiles may not have been |
| 235 | # compressed with the current gc policy (gc may never have been run), |
| 236 | # so run at least once to repack everything. Also, we need a marker |
| 237 | # file for timestamp tracking (a dir needs to detect changes within |
| 238 | # it, so it cannot be a marker) and our keeps are something we control, |
| 239 | # use them. |
| 240 | for keep in "$repo"/objects/pack/pack-$SHA1.keep ; do |
| 241 | is_ourkeep "$keep" && { ours=$keep ; break ; } |
| 242 | done |
| 243 | [ -z "$ours" ] && { info 'We have no keep (we have never run?): run' ; return 0 ; } |
| 244 | |
| 245 | debug "Our timestamp keep: $ours" |
| 246 | # The wholename stuff seems to get touched by a noop git gc |
| 247 | newer=$(find "$repo/objects" "$repo/refs" "$repo/packed-refs" \ |
| 248 | '!' -wholename "$repo/objects/info" \ |
| 249 | '!' -wholename "$repo/objects/info/*" \ |
| 250 | -newer "$ours" \ |
| 251 | -print -quit 2>/dev/null) |
| 252 | [ -z "$newer" ] && return 1 |
| 253 | |
| 254 | info "Touched since last run: $newer" |
| 255 | return 0 |
| 256 | } |
| 257 | |
| 258 | touch_refs() { # git.repo start_date refs |
| 259 | local repo=$1 start_date=$2 refs=$3 |
| 260 | ( |
| 261 | debug "Setting start date($start_date) on unpacked refs:" |
| 262 | debug "$refs" |
| 263 | cd "$repo/refs" || return |
| 264 | # safe to assume no newlines in a ref name |
| 265 | echo "$refs" | xargs -d '\n' -n 1 touch -c -d "$start_date" |
| 266 | ) |
| 267 | } |
| 268 | |
| 269 | set_start_date() { # git.repo start_date refs refdirs packedrefs [packs] |
| 270 | local repo=$1 start_date=$2 refs=$3 refdirs=$4 packedrefs=$5 ; shift 5 |
| 271 | local pack keep idx repacked |
| 272 | |
| 273 | # This stuff is touched during object packs |
| 274 | while [ $# -gt 0 ] ; do |
| 275 | pack=$1 ; shift |
| 276 | keep="$(keep_for "$pack")" |
| 277 | idx="$(idx_for "$pack")" |
| 278 | touch -c -d "$start_date" "$pack" "$keep" "$idx" |
| 279 | debug "Setting start date on: $pack $keep $idx" |
| 280 | done |
| 281 | # This will prevent us from detecting any deletes in the pack dir |
| 282 | # since gc ran, except for private keeps which we are checking |
| 283 | # manually. But there really shouldn't be any other relevant deletes |
| 284 | # in this dir which should cause us to rerun next time, deleting a |
| 285 | # pack or index file by anything but gc would be bad! |
| 286 | debug "Setting start date on pack dir: $start_date" |
| 287 | touch -c -d "$start_date" "$repo/objects/pack" |
| 288 | |
| 289 | |
| 290 | if [ -z "$SW_REFS" ] ; then |
| 291 | repacked=$(find "$repo/packed-refs" -newer "$repo/objects/pack" |
| 292 | -print -quit 2>/dev/null) |
| 293 | if [ -n "$repacked" ] ; then |
| 294 | # The ref dirs and packed-ref files seem to get touched even on |
| 295 | # a noop refpacking |
| 296 | debug "Setting start date on packed-refs" |
| 297 | touch -c -d "$start_date" "$repo/packed-refs" |
| 298 | touch_refs "$repo" "$start_date" "$refdirs" |
| 299 | |
| 300 | # A ref repack does not imply a ref change, but since it is |
| 301 | # hard to tell, simply assume so |
| 302 | if [ "$refs" != "$(cd "$repo/refs" ; find -depth)" ] || \ |
| 303 | [ "$packedrefs" != "$(<"$repo/packed-refs")" ] ; then |
| 304 | # We retouch if needed (instead of simply checking then |
| 305 | # touching) to avoid a race between the check and the set. |
| 306 | debug " but refs actually got packed, so retouch packed-refs" |
| 307 | touch -c "$repo/packed-refs" |
| 308 | fi |
| 309 | fi |
| 310 | fi |
| 311 | } |
| 312 | |
| 313 | note_consolidate() { # note entry > note (no duplicated consolidated entries) |
| 314 | local note=$1 entry=$2 |
| 315 | local entries=() ifs=$IFS |
| 316 | if echo "$note" | grep -q 'Consolidate with:[0-9,c]' ; then |
| 317 | IFS=, |
| 318 | entries=( $(echo "$note" | sed -es'/^.*Consolidate with:\([0-9,c]*\).*$/\1/') ) |
| 319 | note=( $(echo "$note" | sed -es'/Consolidate with:[0-9,c]*//') ) |
| 320 | IFS=$ifs |
| 321 | fi |
| 322 | entries=( $(unique "${entries[@]}" "$entry") ) |
| 323 | echo "$note Consolidate with:$(outfs , "${entries[@]}")" |
| 324 | } |
| 325 | |
| 326 | note_toofar() { # note entry > note (no duplicated "too far" entries) |
| 327 | local note=$1 entry=$2 |
| 328 | local entries=() ifs=$IFS |
| 329 | if echo "$note" | grep -q '(too far from:[0-9,c]*)' ; then |
| 330 | IFS=, |
| 331 | entries=( $(echo "$note" | sed -es'/^.*(too far from:\([0-9,c]*\)).*$/\1/') ) |
| 332 | note=( $(echo "$note" | sed -es'/(too far from:[0-9,c]*)//') ) |
| 333 | IFS=$ifs |
| 334 | fi |
| 335 | entries=( $(unique "${entries[@]}" "$entry") ) |
| 336 | echo "$note (too far from:$(outfs , "${entries[@]}"))" |
| 337 | } |
| 338 | |
| 339 | last_entry() { # isRepack pline repackline > last_rows_entry |
| 340 | local size_hit=$1 pline=$2 repackline=$3 |
| 341 | if [ -n "$pline" ] ; then |
| 342 | if [ -n "$size_hit" ] ; then |
| 343 | echo "$repack_line" |
| 344 | else |
| 345 | echo "$pline" |
| 346 | fi |
| 347 | fi |
| 348 | } |
| 349 | |
| 350 | init_list() { # git.repo > shortlist |
| 351 | local repo=$1 |
| 352 | local file |
| 353 | local n has_keep size sha repack |
| 354 | |
| 355 | packs_sizes "$1" | { |
| 356 | while read size file ; do |
| 357 | n=$((n+1)) |
| 358 | repack=n |
| 359 | has_keep=- |
| 360 | if has_keep "$file" ; then |
| 361 | has_keep=k |
| 362 | has_ourkeep "$file" && has_keep=o |
| 363 | fi |
| 364 | sha=$(sha_for "$file") |
| 365 | echo "$n $has_keep $size $sha $repack" |
| 366 | done |
| 367 | } | sort_list |
| 368 | } |
| 369 | |
| 370 | consolidate_list() { # run < list > list |
| 371 | local run=$1 |
| 372 | local sum=0 psize=0 sum_size=0 size_hit pn clist pline repackline |
| 373 | local n has_keep size sha repack down up note |
| 374 | |
| 375 | { |
| 376 | while read n has_keep size sha repack down up note; do |
| 377 | [ -z "$up" ] && up='-' |
| 378 | [ -z "$down" ] && down="-" |
| 379 | |
| 380 | if [ "$has_keep" = "k" ] ; then |
| 381 | echo "$n $has_keep $size $sha $repack - - Private" |
| 382 | continue |
| 383 | fi |
| 384 | |
| 385 | if [ "$repack" = "n" ] ; then |
| 386 | if is_tooclose $psize $size ; then |
| 387 | size_hit=y |
| 388 | repack=y |
| 389 | sum=$(($sum + $sum_size + $size)) |
| 390 | sum_size=0 # Prevents double summing this entry |
| 391 | clist=($(unique "${clist[@]}" $pn $n)) |
| 392 | down="^" |
| 393 | [ "$has_keep" = "-" ] && note="$note New +" |
| 394 | note=$(note_consolidate "$note" "$pn") |
| 395 | elif [ "$has_keep" = "-" ] ; then |
| 396 | repack=y |
| 397 | sum=$(($sum + $size)) |
| 398 | sum_size=0 # Prevents double summing this entry |
| 399 | clist=($(unique "${clist[@]}" $n)) |
| 400 | note="$note New" |
| 401 | elif [ $psize -ne 0 ] ; then |
| 402 | sum_size=$size |
| 403 | down="!" |
| 404 | note=$(note_toofar "$note" "$pn") |
| 405 | else |
| 406 | sum_size=$size |
| 407 | fi |
| 408 | else |
| 409 | sum_size=$size |
| 410 | fi |
| 411 | |
| 412 | # By preventing "c files" (consolidated) from being marked |
| 413 | # "repack" they won't get keeps |
| 414 | repack2=y |
| 415 | [ "${n/c}" != "$n" ] && { repack=- ; repack2=- ; } |
| 416 | |
| 417 | last_entry "$size_hit" "$pline" "$repack_line" |
| 418 | # Delay the printout until we know whether we are |
| 419 | # being consolidated with the entry following us |
| 420 | # (we won't know until the next iteration). |
| 421 | # size_hit is used to determine which of the lines |
| 422 | # below will actually get printed above on the next |
| 423 | # iteration. |
| 424 | pline="$n $has_keep $size $sha $repack $down $up $note" |
| 425 | repack_line="$n $has_keep $size $sha $repack2 $down v $note" |
| 426 | |
| 427 | pn=$n ; psize=$size # previous entry data |
| 428 | size_hit='' # will not be consolidated up |
| 429 | |
| 430 | done |
| 431 | last_entry "$size_hit" "$pline" "$repack_line" |
| 432 | |
| 433 | [ $sum -gt 0 ] && echo "c$run - $sum [$(outfs , "${clist[@]}")] - - -" |
| 434 | |
| 435 | } | sort_list |
| 436 | } |
| 437 | |
| 438 | process_list() { # git.repo > list |
| 439 | local list=$(init_list "$1") plist run=0 |
| 440 | |
| 441 | while true ; do |
| 442 | plist=$list |
| 443 | run=$((run +1)) |
| 444 | list=$(echo "$list" | consolidate_list "$run") |
| 445 | if [ "$plist" != "$list" ] ; then |
| 446 | debug "------------------------------------------------------------------------------------" |
| 447 | debug "$HEADER" |
| 448 | debug "$list" |
| 449 | else |
| 450 | break |
| 451 | fi |
| 452 | done |
| 453 | debug "------------------------------------------------------------------------------------" |
| 454 | echo "$list" |
| 455 | } |
| 456 | |
| 457 | repack_list() { # git.repo < list |
| 458 | local repo=$1 |
| 459 | local start_date newpacks=0 pkeeps keeps=1 refs refdirs rtn |
| 460 | local packedrefs=$(<"$repo/packed-refs") |
| 461 | |
| 462 | # so they don't appear touched after a noop refpacking |
| 463 | if [ -z "$SW_REFS" ] ; then |
| 464 | refs=$(cd "$repo/refs" ; find -depth) |
| 465 | refdirs=$(cd "$repo/refs" ; find -type d -depth) |
| 466 | debug "Before refs:" |
| 467 | debug "$refs" |
| 468 | fi |
| 469 | |
| 470 | # Find a private keep snapshot which has not changed from |
| 471 | # before our start_date so private keep deletions during gc |
| 472 | # can be detected |
| 473 | while ! array_equals pkeeps "${keeps[@]}" ; do |
| 474 | debug "Getting a private keep snapshot" |
| 475 | private_keeps "$repo" |
| 476 | keeps=("${pkeeps[@]}") |
| 477 | debug "before keeps: ${keeps[*]}" |
| 478 | start_date=$(date) |
| 479 | private_keeps "$repo" |
| 480 | debug "after keeps: ${pkeeps[*]}" |
| 481 | done |
| 482 | |
| 483 | while read n has_keep size sha repack down up note; do |
| 484 | if [ "$repack" = "y" ] ; then |
| 485 | keep="$repo/objects/pack/pack-$sha.keep" |
| 486 | info "Repacking $repo/objects/pack/pack-$sha.pack" |
| 487 | [ -f "$keep" ] && rm -f "$keep" |
| 488 | fi |
| 489 | done |
| 490 | |
| 491 | ( cd "$repo" && git gc "${GC_OPTS[@]}" ) ; rtn=$? |
| 492 | |
| 493 | # Mark any files withoug a .keep with our .keep |
| 494 | packs=("$repo"/objects/pack/pack-$SHA1.pack) |
| 495 | for pack in "${packs[@]}" ; do |
| 496 | if keep "$pack" ; then |
| 497 | info "New pack: $pack" |
| 498 | newpacks=$((newpacks+1)) |
| 499 | fi |
| 500 | done |
| 501 | |
| 502 | # Record start_time. If there is more than 1 new packfile, we |
| 503 | # don't want to risk touching it with an older date since that |
| 504 | # would prevent consolidation on the next run. If the private |
| 505 | # keeps have changed, then we should run next time no matter what. |
| 506 | if [ $newpacks -le 1 ] || ! array_equals pkeeps "${keeps[@]}" ; then |
| 507 | set_start_date "$repo" "$start_date" "$refs" "$refdirs" "$packedrefs" "${packs[@]}" |
| 508 | fi |
| 509 | |
| 510 | return $rtn # we really only care about the gc error code |
| 511 | } |
| 512 | |
| 513 | git_gc() { # git.repo |
| 514 | local list=$(process_list "$1") |
| 515 | if [ -z "$SW_V" ] ; then |
| 516 | info "Running $PROG on $1. git gc options: ${GC_OPTS[@]}" |
| 517 | echo "$HEADER" >&2 |
| 518 | echo "$list" >&2 ; |
| 519 | fi |
| 520 | echo "$list" | repack_list "$1" |
| 521 | } |
| 522 | |
| 523 | |
| 524 | PROG=$(basename "$0") |
| 525 | HEADER="Id Keep Size Sha1(or consolidation list) Actions(repack down up note)" |
| 526 | KEEP=git-exproll |
| 527 | HEX='[0-9a-f]' |
| 528 | HEX10=$HEX$HEX$HEX$HEX$HEX$HEX$HEX$HEX$HEX$HEX |
| 529 | SHA1=$HEX10$HEX10$HEX10$HEX10 |
| 530 | |
| 531 | RATIO=10 |
| 532 | SW_N='' ; SW_V='' ; SW_T='' ; SW_REFS='' ; SW_LOOSE='' ; GC_OPTS=() |
| 533 | while [ $# -gt 0 ] ; do |
| 534 | case "$1" in |
| 535 | -u|-h) usage ;; |
| 536 | -n) SW_N="$1" ;; |
| 537 | -v) SW_V="$1" ;; |
| 538 | |
| 539 | -t) SW_T="$1" ;; |
| 540 | --norefs) SW_REFS="$1" ;; |
| 541 | --noloose) SW_LOOSE="$1" ;; |
| 542 | |
| 543 | -r|--ratio) shift ; RATIO="$1" ;; |
| 544 | |
| 545 | *) [ $# -le 1 ] && break |
| 546 | GC_OPTS=( "${GC_OPTS[@]}" "$1" ) |
| 547 | ;; |
| 548 | esac |
| 549 | shift |
| 550 | done |
| 551 | |
| 552 | |
| 553 | REPO="$1" |
| 554 | if ! is_repo "$REPO" ; then |
| 555 | REPO=$REPO/.git |
| 556 | is_repo "$REPO" || usage "($1) is not likely a git repo" |
| 557 | fi |
| 558 | |
| 559 | |
| 560 | if [ -z "$SW_N" ] ; then |
| 561 | is_touched "$REPO" || { info "Repo untouched since last run" ; exit ; } |
| 562 | git_gc "$REPO" |
| 563 | else |
| 564 | is_touched "$REPO" || info "Repo untouched since last run, analyze anyway." |
| 565 | process_list "$REPO" >&2 |
| 566 | fi |