Introduce GC scheduled task for dual-master
Allow to deploy and destroy resources related to perform scheduled git
garbage collection against a specific list of projects.
The garbage collection is triggered with the wanted frequency by
installing a cloudwatch event rule, which runs a dedicated ECS target.
Feature: Issue 13620
Change-Id: I3635d296e9b8789ef6128e1e97f3afc4dd67d9dc
diff --git a/Configuration.md b/Configuration.md
index 194ef3c..b7c7cf5 100644
--- a/Configuration.md
+++ b/Configuration.md
@@ -26,6 +26,17 @@
* `GERRIT_KEY_PREFIX` : Optional. Secrets prefix used during the [Import into AWS Secret Manager](#import-into-aws-secret-manager).
`gerrit_secret` by default.
+#### Scheduled Git Garbage Collection
+
+* `GIT_GC_ENABLED`. Optional. Whether to schedule a git garbage collection task
+as part of the cluster deployment. "false" by default.
+* `SERVICE_GIT_GC_STACK_NAME`. Required. The name of the cloudformation stack.
+* `GIT_GC_CRON_EXPRESSION`. Required. a cronjob string, expressing the scheduling
+of the garbage collection. More information
+[here](https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions)
+* `GIT_GC_PROJECT_LIST`. Required. A comma separated list of projects to run GC
+against.
+
#### SPECS
Configuration values to spec up Gerrit containers.
diff --git a/Prerequisites.md b/Prerequisites.md
index 6dbccab..0913f1d 100644
--- a/Prerequisites.md
+++ b/Prerequisites.md
@@ -17,6 +17,7 @@
aws ecr create-repository --repository-name aws-gerrit/syslog-sidecar
aws ecr create-repository --repository-name aws-gerrit/prometheus
aws ecr create-repository --repository-name aws-gerrit/grafana
+aws ecr create-repository --repository-name aws-gerrit/git-gc
```
* to upload required credentials to AWS Secret Manager execute the [secrets configuration documentation steps](Secrets.md).
diff --git a/common-templates/cf-gerrit-task-execution-role.yml b/common-templates/cf-gerrit-task-execution-role.yml
index ddb6ded..65c0fc7 100644
--- a/common-templates/cf-gerrit-task-execution-role.yml
+++ b/common-templates/cf-gerrit-task-execution-role.yml
@@ -45,3 +45,5 @@
Outputs:
TaskExecutionRoleRef:
Value: !Ref ECSTaskExecutionRole
+ TaskExecutionRoleArn:
+ Value: !GetAtt ECSTaskExecutionRole.Arn
diff --git a/dual-master/Makefile b/dual-master/Makefile
index 1da24a9..d295f7c 100644
--- a/dual-master/Makefile
+++ b/dual-master/Makefile
@@ -1,6 +1,7 @@
include ../common.env # Must be included before setup.env because the latter depends on it
include setup.env
include ../Makefile.common
+include ../maintenance/git-gc/Makefile
CLUSTER_TEMPLATE:=cf-cluster.yml
SERVICE_MASTER_TEMPLATE:=cf-service-master.yml
@@ -10,6 +11,7 @@
LOAD_BALANCER_TEMPLATE:=cf-service-lb.yml
SERVICE_REPLICATION_TEMPLATE:=cf-service-replication.yml
AWS_FC_COMMAND=export AWS_PAGER=;aws cloudformation
+GIT_GC_SOURCE_PATH=/mnt/efs/gerrit-shared/git
SINGLE_SITE_PLUGINS=javamelody high-availability healthcheck metrics-reporter-cloudwatch
@@ -45,6 +47,7 @@
service-master-2 wait-for-service-master-2-creation \
$(optional_dashboard_targets_creation) \
service-lb wait-for-service-lb-creation \
+ $(optional_git_gc_targets_creation) \
dns-routing wait-for-dns-routing-creation
cluster: cluster-keys set-optional-gerrit-master-volume
@@ -482,6 +485,7 @@
wait-for-service-master-1-deletion wait-for-service-master-2-deletion wait-for-service-slave-deletion \
$(optional_dashboard_targets_deletion) \
$(optional_replication_targets_deletion) \
+ $(optional_git_gc_targets_deletion) \
delete-cluster wait-for-cluster-deletion
confirm-persistent-stack-deletion:
diff --git a/dual-master/cf-cluster.yml b/dual-master/cf-cluster.yml
index 7c506b6..caf10ac 100644
--- a/dual-master/cf-cluster.yml
+++ b/dual-master/cf-cluster.yml
@@ -339,4 +339,8 @@
Value: !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.PublicSubnetOneRef, !Ref SubnetIdProp]
Export:
Name: !Join [ ':', [ !Ref 'AWS::StackName', 'PublicSubnetOne' ] ]
-
+ ClusterArn:
+ Description: The ARN of the ECS cluster
+ Value: !GetAtt ECSCluster.Arn
+ Export:
+ Name: !Join [ ':', [ !Ref 'AWS::StackName', 'ClusterArn' ] ]
diff --git a/dual-master/setup.env.template b/dual-master/setup.env.template
index f3b01ad..2867e2a 100644
--- a/dual-master/setup.env.template
+++ b/dual-master/setup.env.template
@@ -64,4 +64,9 @@
GERRIT_CONTAINER_FDS_SOFT_LIMIT:=32768
GERRIT_CONTAINER_FDS_HARD_LIMIT:=32768
-AUTOREINDEX_POLL_INTERVAL:=10m
\ No newline at end of file
+AUTOREINDEX_POLL_INTERVAL:=10m
+
+GIT_GC_ENABLED=false
+SERVICE_GIT_GC_STACK_NAME=$(AWS_PREFIX)-scheduled-gc
+GIT_GC_CRON_EXPRESSION="0 2 ? * SAT *"
+GIT_GC_PROJECT_LIST="All-Users"
\ No newline at end of file
diff --git a/maintenance/git-gc/Dockerfile b/maintenance/git-gc/Dockerfile
new file mode 100644
index 0000000..a648021
--- /dev/null
+++ b/maintenance/git-gc/Dockerfile
@@ -0,0 +1,17 @@
+FROM lpicanco/java11-alpine:latest
+
+MAINTAINER Antonio Barone <tony@gerritforge.com>
+
+ARG JGIT_VERSION=5.10.0.202012080955
+
+COPY scripts/* /usr/bin/scripts/
+
+RUN apk add --update curl bash git \
+ && curl -o /bin/jgit https://repo.eclipse.org/content/groups/releases//org/eclipse/jgit/org.eclipse.jgit.pgm/${JGIT_VERSION}-r/org.eclipse.jgit.pgm-${JGIT_VERSION}-r.sh \
+ && chmod -R +x /bin/jgit /usr/bin/scripts \
+ && rm -rf /var/cache/apk/*
+
+VOLUME /git
+WORKDIR /usr/bin/scripts
+
+ENTRYPOINT [ "/usr/bin/scripts/gc.sh" ]
\ No newline at end of file
diff --git a/maintenance/git-gc/Makefile b/maintenance/git-gc/Makefile
new file mode 100644
index 0000000..eb039c3
--- /dev/null
+++ b/maintenance/git-gc/Makefile
@@ -0,0 +1,62 @@
+AWS=export AWS_PAGER=;aws
+AWS_FC_COMMAND=export AWS_PAGER=;aws cloudformation
+DOCKER_NAME=aws-gerrit/git-gc
+ECR_REPO=$(DOCKER_REGISTRY_URI)/$(DOCKER_NAME)
+GIT_GC_SHA1=$(shell find . -type f -exec cat {} \; | sha1sum | cut -c 1-20)
+GIT_GC_IMAGE=$(DOCKER_NAME):$(GIT_GC_SHA1)
+GIT_GC_IMAGE_FQDN=$(ECR_REPO):$(GIT_GC_SHA1)
+
+SERVICE_GIT_GC_TEMPLATE:=../maintenance/git-gc/cf-task-git-gc.yml
+
+ifeq ($(GIT_GC_ENABLED),true)
+optional_git_gc_targets_creation=create-scheduled-gc-task
+optional_git_gc_targets_deletion=delete-scheduled-gc-task
+endif
+
+docker-registry-login:
+ $(AWS) ecr get-login-password --region $(AWS_REGION) \
+ | docker login --username AWS --password-stdin ${ECR_REPO}
+
+git-gc-build:
+ docker build -t aws-gerrit/git-gc:$(GIT_GC_SHA1) ../maintenance/git-gc
+ docker tag $(GIT_GC_IMAGE) $(ECR_REPO):$(GIT_GC_SHA1)
+
+git-gc-publish: docker-registry-login git-gc-build
+ docker push $(GIT_GC_IMAGE_FQDN)
+
+create-scheduled-gc-task: service-git-gc wait-for-service-git-gc-creation
+delete-scheduled-gc-task: delete-service-git-gc wait-for-service-git-gc-deletion
+
+service-git-gc: git-gc-publish
+
+ $(AWS_FC_COMMAND) create-stack \
+ --stack-name $(SERVICE_GIT_GC_STACK_NAME) \
+ --capabilities CAPABILITY_IAM \
+ --template-body file://`pwd`/$(SERVICE_GIT_GC_TEMPLATE) \
+ --region $(AWS_REGION) \
+ --parameters \
+ ParameterKey=ClusterStackName,ParameterValue=$(CLUSTER_STACK_NAME) \
+ ParameterKey=TemplateBucketName,ParameterValue=$(TEMPLATE_BUCKET_NAME) \
+ ParameterKey=DockerImageFQN,ParameterValue="$(GIT_GC_IMAGE_FQDN)" \
+ ParameterKey=ProjectList,ParameterValue=\"$(GIT_GC_PROJECT_LIST)\" \
+ ParameterKey=ScheduleCronExpression,ParameterValue=$(GIT_GC_CRON_EXPRESSION) \
+ ParameterKey=GitSourcePath,ParameterValue=$(GIT_GC_SOURCE_PATH)
+
+wait-for-service-git-gc-creation:
+ @echo "*** Wait for service-git-gc stack '$(SERVICE_GIT_GC_STACK_NAME)' creation"
+ $(AWS_FC_COMMAND) wait stack-create-complete \
+ --stack-name $(SERVICE_GIT_GC_STACK_NAME) \
+ --region $(AWS_REGION)
+ @echo "*** Cluster stack '$(SERVICE_GIT_GC_STACK_NAME)' created"
+
+wait-for-service-git-gc-deletion:
+ @echo "*** Wait for service-git-gc stack '$(SERVICE_GIT_GC_STACK_NAME)' deletion"
+ $(AWS_FC_COMMAND) wait stack-delete-complete \
+ --stack-name $(SERVICE_GIT_GC_STACK_NAME) \
+ --region $(AWS_REGION)
+ @echo "*** service-git-gc stack '$(SERVICE_GIT_GC_STACK_NAME)' deleted"
+
+delete-service-git-gc:
+ $(AWS_FC_COMMAND) delete-stack \
+ --stack-name $(SERVICE_GIT_GC_STACK_NAME) \
+ --region $(AWS_REGION)
\ No newline at end of file
diff --git a/maintenance/git-gc/README.md b/maintenance/git-gc/README.md
new file mode 100644
index 0000000..0d53000
--- /dev/null
+++ b/maintenance/git-gc/README.md
@@ -0,0 +1,60 @@
+## Git Repo Garbage Collection
+
+Optionally any recipe can be deployed so that a garbage collection task is
+scheduled to run periodically against a specified list of repositories.
+
+By setting the environment variable `GIT_GC_ENABLED=true`, a new stack will be
+deployed to provision the resources needed to run garbage collection as a
+scheduled ECS task.
+
+Please refer to the relevant [configuration section](../../Configuration.md#scheduled-git-garbage-collection)
+to understand which parameters need to be set for this.
+
+You can also deploy and destroy this stack separately, as such:
+
+* Add GC scheduled task to an existing deployment
+```bash
+make [AWS_REGION=a-valid-aws-region] [AWS_PREFIX=some-cluster-prefix] create-scheduled-gc-task
+```
+* Delete GC scheduled task from an existing deployment
+```bash
+make [AWS_REGION=a-valid-aws-region] [AWS_PREFIX=some-cluster-prefix] delete-scheduled-gc-task
+```
+
+The scheduled task will be executed on any master EC2 instance.
+You will need to account for this when deciding the instance type and the
+allocated CPU and Memory running on those EC2 instances.
+
+## Limitations
+
+### Resources
+
+CPU and memory allocated to the GC task are hardcoded to 1 vCpu and 1GB,
+respectively. Depending on the amount and size of repositories, these might not
+be fitting values.
+
+* Issue: https://bugs.chromium.org/p/gerrit/issues/detail?id=13888
+
+### Docker image
+
+The docker image onto which the GC task is based is not the official
+[OpenJDK](https://hub.docker.com/_/openjdk).
+
+* Issue: https://bugs.chromium.org/p/gerrit/issues/detail?id=13889
+
+### Managing repositories
+
+The GC task requires a list of projects to perform GC on.
+
+Whilst this provides flexibility for the Gerrit admin to decide which projects
+should be GC'd, it might also make it difficult to manage for installations with
+a very large number of projects.
+
+There is already a Gerrit plugin named gc-conductor that can offload this burden
+by evaluating the dirtiness of repositories and add them to a queue to be
+garbage collected.
+
+This approach should and can be considered as a valid alternative to perform GC
+activities.
+
+* Issue: https://bugs.chromium.org/p/gerrit/issues/detail?id=13890
\ No newline at end of file
diff --git a/maintenance/git-gc/cf-task-git-gc.yml b/maintenance/git-gc/cf-task-git-gc.yml
new file mode 100644
index 0000000..7e48cf1
--- /dev/null
+++ b/maintenance/git-gc/cf-task-git-gc.yml
@@ -0,0 +1,127 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Description: ECS service scheduling GC against specified git projects
+Parameters:
+ ClusterStackName:
+ Description: Stack name of the ECS cluster to deploy this service onto
+ Type: String
+ Default: gerrit-cluster
+ ProjectList:
+ Description: Comma separated list of projects to perform GC against
+ Type: CommaDelimitedList
+ Default: ''
+ EnvironmentName:
+ Description: An environment name used to build the log stream names
+ Type: String
+ Default: test
+ TemplateBucketName:
+ Description: S3 bucket containing cloudformation templates
+ Type: String
+ DockerImageFQN:
+ Description: Fully qualified name of the git-gc docker image
+ Type: String
+ ScheduleCronExpression:
+ Description: Cron expression string to schedule GC at
+ Type: String
+ GitSourcePath:
+ Description: The absolute path storing git data
+ Type: String
+
+Mappings:
+ Gerrit:
+ Volume:
+ Git: gerrit-git
+ GitGC:
+ Task:
+ Name: git-gc
+
+Resources:
+ TaskDefinition:
+ Type: AWS::ECS::TaskDefinition
+ Properties:
+ Family: !FindInMap ['GitGC', 'Task', 'Name']
+ TaskRoleArn: !GetAtt ECSTaskExecutionRoleStack.Outputs.TaskExecutionRoleRef
+ ExecutionRoleArn: !GetAtt ECSTaskExecutionRoleStack.Outputs.TaskExecutionRoleRef
+ NetworkMode: bridge
+ PlacementConstraints:
+ - Expression: !Sub 'attribute:target_group =~ master.*'
+ Type: "memberOf"
+ ContainerDefinitions:
+ - Name: !FindInMap ['GitGC', 'Task', 'Name']
+ Essential: true
+ Image: !Ref DockerImageFQN
+ Environment:
+ - Name: GC_PROJECT_LIST
+ Value: !Join [',', !Ref ProjectList]
+ MountPoints:
+ - SourceVolume: !FindInMap ['Gerrit', 'Volume', 'Git']
+ ContainerPath: /git
+ Cpu: 1024
+ Memory: 1024
+ LogConfiguration:
+ LogDriver: awslogs
+ Options:
+ awslogs-group: !Ref ClusterStackName
+ awslogs-region: !Ref AWS::Region
+ awslogs-stream-prefix: !Ref EnvironmentName
+ Volumes:
+ - Name: !FindInMap ['Gerrit', 'Volume', 'Git']
+ Host:
+ SourcePath: !Ref GitSourcePath
+
+ ECSTaskExecutionRoleStack:
+ Type: AWS::CloudFormation::Stack
+ Properties:
+ TemplateURL: !Join [ '', ['https://', !Ref TemplateBucketName, '.s3.amazonaws.com/cf-gerrit-task-execution-role.yml'] ]
+ TimeoutInMinutes: '5'
+
+ EventsInvokeTaskRole:
+ Type: AWS::IAM::Role
+ Properties:
+ AssumeRolePolicyDocument:
+ Statement:
+ - Effect: Allow
+ Principal:
+ Service: [events.amazonaws.com]
+ Action:
+ - sts:AssumeRole
+ Path: /
+ Policies:
+ - PolicyName: "AllowTaskInvoke"
+ PolicyDocument:
+ Statement:
+ - Effect: "Allow"
+ Action:
+ - 'ecs:RunTask'
+ Resource: !Sub
+ - "arn:aws:ecs:*:${AWS::AccountId}:task-definition/${TaskName}:*"
+ - { TaskName: !FindInMap ['GitGC', 'Task', 'Name'] }
+ Condition:
+ ArnLike:
+ ecs:cluster: !Sub
+ - "arn:aws:ecs:*:${AWS::AccountId}:cluster/${ClusterName}"
+ - { ClusterName:
+ { Fn::ImportValue: !Join [':', [!Ref 'ClusterStackName', 'ClusterName']] }
+ }
+ - Effect: "Allow"
+ Action: "iam:PassRole"
+ Resource: "*"
+ Condition:
+ StringLike:
+ iam:PassedToService: "ecs-tasks.amazonaws.com"
+
+ TaskSchedule:
+ Type: AWS::Events::Rule
+ Properties:
+ Description: "Run git garbage collection on a list of specified projects"
+ Name: git-GC
+ ScheduleExpression: !Sub "cron(${ScheduleCronExpression})"
+ State: ENABLED
+ Targets:
+ - Id: git-gc-master
+ RoleArn: !GetAtt EventsInvokeTaskRole.Arn
+ EcsParameters:
+ TaskDefinitionArn: !Ref TaskDefinition
+ TaskCount: 1
+ Arn:
+ Fn::ImportValue:
+ !Join [':', [!Ref 'ClusterStackName', 'ClusterArn']]
\ No newline at end of file
diff --git a/maintenance/git-gc/scripts/gc.sh b/maintenance/git-gc/scripts/gc.sh
new file mode 100644
index 0000000..aeaad8e
--- /dev/null
+++ b/maintenance/git-gc/scripts/gc.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+#####################################################
+# Garbage collect specific repositories, using jgit #
+#####################################################
+set -eo pipefail
+set +e
+
+source ./utils.sh
+
+start_process=$SECONDS
+log "START GC PROCESS"
+
+if [ -z "$GC_PROJECT_LIST" ]; then
+ echo "GC_PROJECT_LIST environment variable is empty. Nothing to do."
+ exit 1
+fi
+
+for proj in $(echo "$GC_PROJECT_LIST" | sed "s/,/ /g"); do
+ gc_project "$proj"
+done
+
+end_process=$SECONDS
+log "END GC PROCESS"
+
+duration_process=$(( end_process - start_process ))
+log "GC process took $duration_process seconds"
+
+set -e
diff --git a/maintenance/git-gc/scripts/utils.sh b/maintenance/git-gc/scripts/utils.sh
new file mode 100644
index 0000000..1ed4bc3
--- /dev/null
+++ b/maintenance/git-gc/scripts/utils.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+JGIT="/bin/jgit"
+GIT_HOME="/git"
+
+function gc_project {
+ proj=$1
+
+ PROJECT_PATH=$GIT_HOME/"$proj".git
+ pushd "$PROJECT_PATH" || {
+ status_code=$?
+ err_proj "$proj" "Could not move into $PROJECT_PATH ($status_code). Skipping."
+ return 1
+ }
+
+ log_project "$proj" "stats before GC"
+ print_stats "$proj"
+
+ do_gc
+
+ log_project "$proj" "stats after GC"
+ print_stats "$proj"
+
+ popd || {
+ status_code=$?
+ err_proj "$proj" "Could not step out of $PROJECT_PATH ($status_code). Aborting"
+ exit 1
+ }
+}
+
+function do_gc() {
+ start=$SECONDS
+ $JGIT gc || {
+ status_code=$?
+ err_proj "$proj" "Could not GC $proj ($status_code)."
+ return 1
+ }
+ end=$SECONDS
+ duration=$(( end - start ))
+ log_project "$proj" "GC took $duration seconds"
+ return 0
+}
+
+function print_stats {
+ proj=$1
+
+ log_project "$proj" "#num_objects: $(count_objects)"
+
+ for ext in "pack" "bitmap" "idx" "keep"; do
+ log_project "$proj" "#num_$ext: $(count_pack_objects $ext) files"
+ log_project "$proj" "#size_$ext: $(size_pack_objects $ext) Kb"
+ log_project "$proj" "#oldest_$ext: $(oldest_pack_object $ext)"
+ done
+}
+
+function count_pack_objects {
+ find objects/pack -type f -name "*.$1" | wc -l | sed 's/\ //g'
+}
+
+function size_pack_objects {
+ out=$(find objects/pack -type f -name "*.$1" -exec du -ck {} + | grep total$ | cut -d$'\t' -f1)
+ out="${out:-0}"
+ echo "$out"
+}
+
+function oldest_pack_object {
+ out=$(find objects/pack -type f -name "*.$1" -print0 | xargs -0 ls -tl | tail -1)
+ out="${out:-NONE}"
+ echo "$out"
+}
+
+function count_objects {
+ git count-objects | awk '{print $1}'
+}
+
+function now {
+ date '+%s'
+}
+
+function log_project {
+ echo "$(now)|INFO|$1|$2"
+}
+
+function log {
+ echo "$(now)|INFO|$1"
+}
+
+function err_proj {
+ >&2 echo "$(now)|ERROR|$1|$2"
+}
\ No newline at end of file