Introduce GC scheduled task for dual-master

Allow to deploy and destroy resources related to perform scheduled git
garbage collection against a specific list of projects.

The garbage collection is triggered with the wanted frequency by
installing a cloudwatch event rule, which runs a dedicated ECS target.

Feature: Issue 13620
Change-Id: I3635d296e9b8789ef6128e1e97f3afc4dd67d9dc
diff --git a/Configuration.md b/Configuration.md
index 194ef3c..b7c7cf5 100644
--- a/Configuration.md
+++ b/Configuration.md
@@ -26,6 +26,17 @@
 * `GERRIT_KEY_PREFIX` : Optional. Secrets prefix used during the [Import into AWS Secret Manager](#import-into-aws-secret-manager).
   `gerrit_secret` by default.
 
+#### Scheduled Git Garbage Collection
+
+* `GIT_GC_ENABLED`. Optional. Whether to schedule a git garbage collection task
+as part of the cluster deployment. "false" by default.
+* `SERVICE_GIT_GC_STACK_NAME`. Required. The name of the cloudformation stack.
+* `GIT_GC_CRON_EXPRESSION`. Required. a cronjob string, expressing the scheduling
+of the garbage collection. More information
+[here](https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions)
+* `GIT_GC_PROJECT_LIST`. Required. A comma separated list of projects to run GC
+against.
+
 #### SPECS
 
 Configuration values to spec up Gerrit containers.
diff --git a/Prerequisites.md b/Prerequisites.md
index 6dbccab..0913f1d 100644
--- a/Prerequisites.md
+++ b/Prerequisites.md
@@ -17,6 +17,7 @@
 aws ecr create-repository --repository-name aws-gerrit/syslog-sidecar
 aws ecr create-repository --repository-name aws-gerrit/prometheus
 aws ecr create-repository --repository-name aws-gerrit/grafana
+aws ecr create-repository --repository-name aws-gerrit/git-gc
 ```
 
 * to upload required credentials to AWS Secret Manager execute the [secrets configuration documentation steps](Secrets.md).
diff --git a/common-templates/cf-gerrit-task-execution-role.yml b/common-templates/cf-gerrit-task-execution-role.yml
index ddb6ded..65c0fc7 100644
--- a/common-templates/cf-gerrit-task-execution-role.yml
+++ b/common-templates/cf-gerrit-task-execution-role.yml
@@ -45,3 +45,5 @@
 Outputs:
   TaskExecutionRoleRef:
     Value: !Ref ECSTaskExecutionRole
+  TaskExecutionRoleArn:
+    Value: !GetAtt ECSTaskExecutionRole.Arn
diff --git a/dual-master/Makefile b/dual-master/Makefile
index 1da24a9..d295f7c 100644
--- a/dual-master/Makefile
+++ b/dual-master/Makefile
@@ -1,6 +1,7 @@
 include ../common.env # Must be included before setup.env because the latter depends on it
 include setup.env
 include ../Makefile.common
+include ../maintenance/git-gc/Makefile
 
 CLUSTER_TEMPLATE:=cf-cluster.yml
 SERVICE_MASTER_TEMPLATE:=cf-service-master.yml
@@ -10,6 +11,7 @@
 LOAD_BALANCER_TEMPLATE:=cf-service-lb.yml
 SERVICE_REPLICATION_TEMPLATE:=cf-service-replication.yml
 AWS_FC_COMMAND=export AWS_PAGER=;aws cloudformation
+GIT_GC_SOURCE_PATH=/mnt/efs/gerrit-shared/git
 
 SINGLE_SITE_PLUGINS=javamelody high-availability healthcheck metrics-reporter-cloudwatch
 
@@ -45,6 +47,7 @@
 						service-master-2 wait-for-service-master-2-creation \
 						$(optional_dashboard_targets_creation) \
 						service-lb wait-for-service-lb-creation \
+						$(optional_git_gc_targets_creation) \
 						dns-routing wait-for-dns-routing-creation
 
 cluster: cluster-keys set-optional-gerrit-master-volume
@@ -482,6 +485,7 @@
 						wait-for-service-master-1-deletion wait-for-service-master-2-deletion wait-for-service-slave-deletion \
 						$(optional_dashboard_targets_deletion) \
 						$(optional_replication_targets_deletion) \
+						$(optional_git_gc_targets_deletion) \
 						delete-cluster wait-for-cluster-deletion
 
 confirm-persistent-stack-deletion:
diff --git a/dual-master/cf-cluster.yml b/dual-master/cf-cluster.yml
index 7c506b6..caf10ac 100644
--- a/dual-master/cf-cluster.yml
+++ b/dual-master/cf-cluster.yml
@@ -339,4 +339,8 @@
     Value: !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.PublicSubnetOneRef, !Ref SubnetIdProp]
     Export:
       Name: !Join [ ':', [ !Ref 'AWS::StackName', 'PublicSubnetOne' ] ]
-
+  ClusterArn:
+    Description: The ARN of the ECS cluster
+    Value: !GetAtt ECSCluster.Arn
+    Export:
+      Name: !Join [ ':', [ !Ref 'AWS::StackName', 'ClusterArn' ] ]
diff --git a/dual-master/setup.env.template b/dual-master/setup.env.template
index f3b01ad..2867e2a 100644
--- a/dual-master/setup.env.template
+++ b/dual-master/setup.env.template
@@ -64,4 +64,9 @@
 GERRIT_CONTAINER_FDS_SOFT_LIMIT:=32768
 GERRIT_CONTAINER_FDS_HARD_LIMIT:=32768
 
-AUTOREINDEX_POLL_INTERVAL:=10m
\ No newline at end of file
+AUTOREINDEX_POLL_INTERVAL:=10m
+
+GIT_GC_ENABLED=false
+SERVICE_GIT_GC_STACK_NAME=$(AWS_PREFIX)-scheduled-gc
+GIT_GC_CRON_EXPRESSION="0 2 ? * SAT *"
+GIT_GC_PROJECT_LIST="All-Users"
\ No newline at end of file
diff --git a/maintenance/git-gc/Dockerfile b/maintenance/git-gc/Dockerfile
new file mode 100644
index 0000000..a648021
--- /dev/null
+++ b/maintenance/git-gc/Dockerfile
@@ -0,0 +1,17 @@
+FROM lpicanco/java11-alpine:latest
+
+MAINTAINER Antonio Barone <tony@gerritforge.com>
+
+ARG JGIT_VERSION=5.10.0.202012080955
+
+COPY scripts/* /usr/bin/scripts/
+
+RUN apk add --update curl bash git \
+      && curl -o /bin/jgit https://repo.eclipse.org/content/groups/releases//org/eclipse/jgit/org.eclipse.jgit.pgm/${JGIT_VERSION}-r/org.eclipse.jgit.pgm-${JGIT_VERSION}-r.sh \
+      && chmod -R +x /bin/jgit /usr/bin/scripts \
+      && rm -rf /var/cache/apk/*
+
+VOLUME /git
+WORKDIR /usr/bin/scripts
+
+ENTRYPOINT [ "/usr/bin/scripts/gc.sh" ]
\ No newline at end of file
diff --git a/maintenance/git-gc/Makefile b/maintenance/git-gc/Makefile
new file mode 100644
index 0000000..eb039c3
--- /dev/null
+++ b/maintenance/git-gc/Makefile
@@ -0,0 +1,62 @@
+AWS=export AWS_PAGER=;aws
+AWS_FC_COMMAND=export AWS_PAGER=;aws cloudformation
+DOCKER_NAME=aws-gerrit/git-gc
+ECR_REPO=$(DOCKER_REGISTRY_URI)/$(DOCKER_NAME)
+GIT_GC_SHA1=$(shell find . -type f -exec cat {} \; | sha1sum | cut -c 1-20)
+GIT_GC_IMAGE=$(DOCKER_NAME):$(GIT_GC_SHA1)
+GIT_GC_IMAGE_FQDN=$(ECR_REPO):$(GIT_GC_SHA1)
+
+SERVICE_GIT_GC_TEMPLATE:=../maintenance/git-gc/cf-task-git-gc.yml
+
+ifeq ($(GIT_GC_ENABLED),true)
+optional_git_gc_targets_creation=create-scheduled-gc-task
+optional_git_gc_targets_deletion=delete-scheduled-gc-task
+endif
+
+docker-registry-login:
+	$(AWS) ecr get-login-password --region $(AWS_REGION) \
+		| docker login --username AWS --password-stdin ${ECR_REPO}
+
+git-gc-build:
+	docker build -t aws-gerrit/git-gc:$(GIT_GC_SHA1) ../maintenance/git-gc
+	docker tag $(GIT_GC_IMAGE) $(ECR_REPO):$(GIT_GC_SHA1)
+
+git-gc-publish: docker-registry-login git-gc-build
+	docker push $(GIT_GC_IMAGE_FQDN)
+
+create-scheduled-gc-task: service-git-gc wait-for-service-git-gc-creation
+delete-scheduled-gc-task: delete-service-git-gc wait-for-service-git-gc-deletion
+
+service-git-gc: git-gc-publish
+
+	$(AWS_FC_COMMAND) create-stack \
+		--stack-name $(SERVICE_GIT_GC_STACK_NAME) \
+		--capabilities CAPABILITY_IAM  \
+		--template-body file://`pwd`/$(SERVICE_GIT_GC_TEMPLATE) \
+		--region $(AWS_REGION) \
+		--parameters \
+		ParameterKey=ClusterStackName,ParameterValue=$(CLUSTER_STACK_NAME) \
+		ParameterKey=TemplateBucketName,ParameterValue=$(TEMPLATE_BUCKET_NAME) \
+		ParameterKey=DockerImageFQN,ParameterValue="$(GIT_GC_IMAGE_FQDN)" \
+		ParameterKey=ProjectList,ParameterValue=\"$(GIT_GC_PROJECT_LIST)\" \
+		ParameterKey=ScheduleCronExpression,ParameterValue=$(GIT_GC_CRON_EXPRESSION) \
+		ParameterKey=GitSourcePath,ParameterValue=$(GIT_GC_SOURCE_PATH)
+
+wait-for-service-git-gc-creation:
+	@echo "*** Wait for service-git-gc stack '$(SERVICE_GIT_GC_STACK_NAME)' creation"
+	$(AWS_FC_COMMAND) wait stack-create-complete \
+	--stack-name $(SERVICE_GIT_GC_STACK_NAME) \
+	--region $(AWS_REGION)
+	@echo "*** Cluster stack '$(SERVICE_GIT_GC_STACK_NAME)' created"
+
+wait-for-service-git-gc-deletion:
+	@echo "*** Wait for service-git-gc stack '$(SERVICE_GIT_GC_STACK_NAME)' deletion"
+	$(AWS_FC_COMMAND) wait stack-delete-complete \
+	--stack-name $(SERVICE_GIT_GC_STACK_NAME) \
+	--region $(AWS_REGION)
+	@echo "*** service-git-gc stack '$(SERVICE_GIT_GC_STACK_NAME)' deleted"
+
+delete-service-git-gc:
+	$(AWS_FC_COMMAND) delete-stack \
+	--stack-name $(SERVICE_GIT_GC_STACK_NAME) \
+	--region $(AWS_REGION)
\ No newline at end of file
diff --git a/maintenance/git-gc/README.md b/maintenance/git-gc/README.md
new file mode 100644
index 0000000..0d53000
--- /dev/null
+++ b/maintenance/git-gc/README.md
@@ -0,0 +1,60 @@
+## Git Repo Garbage Collection
+
+Optionally any recipe can be deployed so that a garbage collection task is
+scheduled to run periodically against a specified list of repositories.
+
+By setting the environment variable `GIT_GC_ENABLED=true`, a new stack will be
+deployed to provision the resources needed to run garbage collection as a
+scheduled ECS task.
+
+Please refer to the relevant [configuration section](../../Configuration.md#scheduled-git-garbage-collection)
+to understand which parameters need to be set for this.
+
+You can also deploy and destroy this stack separately, as such:
+
+* Add GC scheduled task to an existing deployment
+```bash
+make [AWS_REGION=a-valid-aws-region] [AWS_PREFIX=some-cluster-prefix] create-scheduled-gc-task
+```
+* Delete GC scheduled task from an existing deployment
+```bash
+make [AWS_REGION=a-valid-aws-region] [AWS_PREFIX=some-cluster-prefix] delete-scheduled-gc-task
+```
+
+The scheduled task will be executed on any master EC2 instance.
+You will need to account for this when deciding the instance type and the
+allocated CPU and Memory running on those EC2 instances.
+
+## Limitations
+
+### Resources
+
+CPU and memory allocated to the GC task are hardcoded to 1 vCpu and 1GB,
+respectively. Depending on the amount and size of repositories, these might not
+be fitting values.
+
+* Issue: https://bugs.chromium.org/p/gerrit/issues/detail?id=13888
+
+### Docker image
+
+The docker image onto which the GC task is based is not the official
+[OpenJDK](https://hub.docker.com/_/openjdk).
+
+* Issue: https://bugs.chromium.org/p/gerrit/issues/detail?id=13889
+
+### Managing repositories
+
+The GC task requires a list of projects to perform GC on.
+
+Whilst this provides flexibility for the Gerrit admin to decide which projects
+should be GC'd, it might also make it difficult to manage for installations with
+a very large number of projects.
+
+There is already a Gerrit plugin named gc-conductor that can offload this burden
+by evaluating the dirtiness of repositories and add them to a queue to be
+garbage collected.
+
+This approach should and can be considered as a valid alternative to perform GC
+activities.
+
+* Issue: https://bugs.chromium.org/p/gerrit/issues/detail?id=13890
\ No newline at end of file
diff --git a/maintenance/git-gc/cf-task-git-gc.yml b/maintenance/git-gc/cf-task-git-gc.yml
new file mode 100644
index 0000000..7e48cf1
--- /dev/null
+++ b/maintenance/git-gc/cf-task-git-gc.yml
@@ -0,0 +1,127 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Description: ECS service scheduling GC against specified git projects
+Parameters:
+  ClusterStackName:
+    Description: Stack name of the ECS cluster to deploy this service onto
+    Type: String
+    Default: gerrit-cluster
+  ProjectList:
+    Description: Comma separated list of projects to perform GC against
+    Type: CommaDelimitedList
+    Default: ''
+  EnvironmentName:
+    Description: An environment name used to build the log stream names
+    Type: String
+    Default: test
+  TemplateBucketName:
+    Description: S3 bucket containing cloudformation templates
+    Type: String
+  DockerImageFQN:
+    Description: Fully qualified name of the git-gc docker image
+    Type: String
+  ScheduleCronExpression:
+    Description: Cron expression string to schedule GC at
+    Type: String
+  GitSourcePath:
+    Description: The absolute path storing git data
+    Type: String
+
+Mappings:
+  Gerrit:
+    Volume:
+      Git: gerrit-git
+  GitGC:
+    Task:
+      Name: git-gc
+
+Resources:
+    TaskDefinition:
+        Type: AWS::ECS::TaskDefinition
+        Properties:
+            Family: !FindInMap ['GitGC', 'Task', 'Name']
+            TaskRoleArn: !GetAtt ECSTaskExecutionRoleStack.Outputs.TaskExecutionRoleRef
+            ExecutionRoleArn: !GetAtt ECSTaskExecutionRoleStack.Outputs.TaskExecutionRoleRef
+            NetworkMode: bridge
+            PlacementConstraints:
+                - Expression: !Sub 'attribute:target_group =~ master.*'
+                  Type: "memberOf"
+            ContainerDefinitions:
+                - Name: !FindInMap ['GitGC', 'Task', 'Name']
+                  Essential: true
+                  Image: !Ref DockerImageFQN
+                  Environment:
+                    - Name: GC_PROJECT_LIST
+                      Value: !Join [',', !Ref ProjectList]
+                  MountPoints:
+                    - SourceVolume: !FindInMap ['Gerrit', 'Volume', 'Git']
+                      ContainerPath: /git
+                  Cpu: 1024
+                  Memory: 1024
+                  LogConfiguration:
+                    LogDriver: awslogs
+                    Options:
+                        awslogs-group: !Ref ClusterStackName
+                        awslogs-region: !Ref AWS::Region
+                        awslogs-stream-prefix: !Ref EnvironmentName
+            Volumes:
+              - Name: !FindInMap ['Gerrit', 'Volume', 'Git']
+                Host:
+                  SourcePath: !Ref GitSourcePath
+
+    ECSTaskExecutionRoleStack:
+      Type: AWS::CloudFormation::Stack
+      Properties:
+        TemplateURL: !Join [ '', ['https://', !Ref TemplateBucketName, '.s3.amazonaws.com/cf-gerrit-task-execution-role.yml'] ]
+        TimeoutInMinutes: '5'
+
+    EventsInvokeTaskRole:
+      Type: AWS::IAM::Role
+      Properties:
+        AssumeRolePolicyDocument:
+          Statement:
+            - Effect: Allow
+              Principal:
+                Service: [events.amazonaws.com]
+              Action:
+                - sts:AssumeRole
+        Path: /
+        Policies:
+          - PolicyName: "AllowTaskInvoke"
+            PolicyDocument:
+              Statement:
+                - Effect: "Allow"
+                  Action:
+                    - 'ecs:RunTask'
+                  Resource: !Sub
+                    - "arn:aws:ecs:*:${AWS::AccountId}:task-definition/${TaskName}:*"
+                    - { TaskName: !FindInMap ['GitGC', 'Task', 'Name'] }
+                  Condition:
+                    ArnLike:
+                      ecs:cluster: !Sub
+                        - "arn:aws:ecs:*:${AWS::AccountId}:cluster/${ClusterName}"
+                        - { ClusterName:
+                              { Fn::ImportValue: !Join [':', [!Ref 'ClusterStackName', 'ClusterName']] }
+                        }
+                - Effect: "Allow"
+                  Action: "iam:PassRole"
+                  Resource: "*"
+                  Condition:
+                    StringLike:
+                      iam:PassedToService: "ecs-tasks.amazonaws.com"
+
+    TaskSchedule:
+      Type: AWS::Events::Rule
+      Properties:
+        Description: "Run git garbage collection on a list of specified projects"
+        Name: git-GC
+        ScheduleExpression: !Sub "cron(${ScheduleCronExpression})"
+        State: ENABLED
+        Targets:
+          - Id: git-gc-master
+            RoleArn: !GetAtt EventsInvokeTaskRole.Arn
+            EcsParameters:
+              TaskDefinitionArn: !Ref TaskDefinition
+              TaskCount: 1
+            Arn:
+              Fn::ImportValue:
+                !Join [':', [!Ref 'ClusterStackName', 'ClusterArn']]
\ No newline at end of file
diff --git a/maintenance/git-gc/scripts/gc.sh b/maintenance/git-gc/scripts/gc.sh
new file mode 100644
index 0000000..aeaad8e
--- /dev/null
+++ b/maintenance/git-gc/scripts/gc.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+#####################################################
+# Garbage collect specific repositories, using jgit #
+#####################################################
+set -eo pipefail
+set +e
+
+source ./utils.sh
+
+start_process=$SECONDS
+log "START GC PROCESS"
+
+if [ -z "$GC_PROJECT_LIST" ]; then
+  echo "GC_PROJECT_LIST environment variable is empty. Nothing to do."
+  exit 1
+fi
+
+for proj in $(echo "$GC_PROJECT_LIST" | sed "s/,/ /g"); do
+  gc_project "$proj"
+done
+
+end_process=$SECONDS
+log "END GC PROCESS"
+
+duration_process=$(( end_process - start_process ))
+log "GC process took $duration_process seconds"
+
+set -e
diff --git a/maintenance/git-gc/scripts/utils.sh b/maintenance/git-gc/scripts/utils.sh
new file mode 100644
index 0000000..1ed4bc3
--- /dev/null
+++ b/maintenance/git-gc/scripts/utils.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+JGIT="/bin/jgit"
+GIT_HOME="/git"
+
+function gc_project {
+  proj=$1
+
+  PROJECT_PATH=$GIT_HOME/"$proj".git
+  pushd "$PROJECT_PATH" || {
+    status_code=$?
+    err_proj "$proj" "Could not move into $PROJECT_PATH ($status_code). Skipping."
+    return 1
+  }
+
+  log_project "$proj" "stats before GC"
+  print_stats "$proj"
+
+  do_gc
+
+  log_project "$proj" "stats after GC"
+  print_stats "$proj"
+
+  popd || {
+    status_code=$?
+    err_proj "$proj" "Could not step out of $PROJECT_PATH ($status_code). Aborting"
+    exit 1
+  }
+}
+
+function do_gc() {
+    start=$SECONDS
+    $JGIT gc || {
+      status_code=$?
+      err_proj "$proj" "Could not GC $proj ($status_code)."
+      return 1
+    }
+    end=$SECONDS
+    duration=$(( end - start ))
+    log_project "$proj" "GC took $duration seconds"
+    return 0
+}
+
+function print_stats {
+   proj=$1
+
+   log_project "$proj" "#num_objects: $(count_objects)"
+
+   for ext in "pack" "bitmap" "idx" "keep"; do
+    log_project "$proj" "#num_$ext: $(count_pack_objects $ext) files"
+    log_project "$proj" "#size_$ext: $(size_pack_objects $ext) Kb"
+    log_project "$proj" "#oldest_$ext: $(oldest_pack_object $ext)"
+  done
+}
+
+function count_pack_objects {
+   find objects/pack -type f -name "*.$1" | wc -l | sed 's/\ //g'
+}
+
+function size_pack_objects {
+   out=$(find objects/pack -type f -name "*.$1" -exec du -ck {} + | grep total$ | cut -d$'\t' -f1)
+   out="${out:-0}"
+   echo "$out"
+}
+
+function oldest_pack_object {
+   out=$(find objects/pack -type f -name "*.$1" -print0 | xargs -0 ls -tl | tail -1)
+   out="${out:-NONE}"
+   echo "$out"
+}
+
+function count_objects {
+  git count-objects  | awk '{print $1}'
+}
+
+function now {
+  date '+%s'
+}
+
+function log_project {
+  echo "$(now)|INFO|$1|$2"
+}
+
+function log {
+  echo "$(now)|INFO|$1"
+}
+
+function err_proj {
+  >&2 echo "$(now)|ERROR|$1|$2"
+}
\ No newline at end of file