primary-replica: share git data on replicas over EFS

Store git data for replicas over EFS so that it can persist beyond the
lifespan of a single instance.

This is the first step in the direction of allowing replicas to scale.

Similarly to the dual-primary stack, the replica EFS stack, when created
as part of the cluster stack, will be retained by default, in order
to allow blue/green deployments.

Feature: Issue 13619
Change-Id: I3a0c6110f87686186534c72c2920387581059d97
diff --git a/Makefile.common b/Makefile.common
index e34c0d0..78c1f9a 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -128,4 +128,20 @@
 		echo "*** Network stack '$(NETWORK_STACK_NAME)' deleted" \
 		, \
 		echo "No network stack found. Nothing to do." \
+	)
+
+delete-git-replica-persistent-stack:
+	$(eval REPLICA_EFS_STACK_NAME := $(shell $(AWS_FC_COMMAND) list-stacks --stack-status-filter CREATE_COMPLETE --query "StackSummaries[*].StackName" | jq -r '.[]| select(startswith("$(CLUSTER_STACK_NAME)-ReplicaGitFileSystemPermanentStack"))'))
+
+	$(if $(REPLICA_EFS_STACK_NAME), \
+		$(AWS_FC_COMMAND) delete-stack \
+			--stack-name $(REPLICA_EFS_STACK_NAME) \
+			--region $(AWS_REGION) && \
+		echo "*** Wait for Git persistent stack of replicas '$(REPLICA_EFS_STACK_NAME)' deletion" && \
+		$(AWS_FC_COMMAND) wait stack-delete-complete \
+			--stack-name $(REPLICA_EFS_STACK_NAME) \
+			--region $(AWS_REGION) && \
+		echo "*** Git persistent stack '$(EFSREPLICA_EFS_STACK_NAME_STACK_NAME)' deleted" \
+		, \
+		echo "No Git persistent stack for replicas found. Nothing to do." \
 	)
\ No newline at end of file
diff --git a/dual-primary/Makefile b/dual-primary/Makefile
index 6be1334..933ce14 100644
--- a/dual-primary/Makefile
+++ b/dual-primary/Makefile
@@ -508,23 +508,6 @@
 		echo "No Git persistent stack found. Nothing to do." \
 	)
 
-delete-git-replica-persistent-stack:
-
-	$(eval REPLICA_EFS_STACK_NAME := $(shell $(AWS_FC_COMMAND) list-stacks --stack-status-filter CREATE_COMPLETE --query "StackSummaries[*].StackName" | jq -r '.[]| select(startswith("$(CLUSTER_STACK_NAME)-ReplicaGitFileSystemPermanentStack"))'))
-
-	$(if $(REPLICA_EFS_STACK_NAME), \
-		$(AWS_FC_COMMAND) delete-stack \
-			--stack-name $(REPLICA_EFS_STACK_NAME) \
-			--region $(AWS_REGION) && \
-		echo "*** Wait for Git persistent stack of replicas '$(REPLICA_EFS_STACK_NAME)' deletion" && \
-		$(AWS_FC_COMMAND) wait stack-delete-complete \
-			--stack-name $(REPLICA_EFS_STACK_NAME) \
-			--region $(AWS_REGION) && \
-		echo "*** Git persistent stack '$(EFSREPLICA_EFS_STACK_NAME_STACK_NAME)' deleted" \
-		, \
-		echo "No Git persistent stack for replicas found. Nothing to do." \
-	)
-
 gerrit-publish:
 ifeq ($(MULTISITE_ENABLED),true)
 	$(MAKE) -C ../gerrit gerrit-publish RECIPE=dual-primary PLUGINS="$(MULTI_SITE_PLUGINS)" PLUGINS_LIBS_LINKS="$(MULTI_SITE_PLUGINS_LIBS_LINKS)" MAVEN_LIBS="$(MULTI_SITE_MAVEN_LIBS)"
diff --git a/primary-replica/Makefile b/primary-replica/Makefile
index 2e7bd7f..4378812 100644
--- a/primary-replica/Makefile
+++ b/primary-replica/Makefile
@@ -31,7 +31,7 @@
 						$(optional_git_gc_targets_creation) \
 						dns-routing wait-for-dns-routing-creation
 
-cluster: cluster-keys set-optional-gerrit-primary-volume
+cluster: cluster-keys set-optional-gerrit-primary-volume set-optional-params-for-replica-filesystem
 ifdef CLUSTER_INSTANCE_TYPE
 		$(eval CLUSTER_OPTIONAL_PARAMS := $(CLUSTER_OPTIONAL_PARAMS) ParameterKey=InstanceType,ParameterValue=$(CLUSTER_INSTANCE_TYPE))
 endif
@@ -57,7 +57,8 @@
 		ParameterKey=VPCIdProp,ParameterValue=$(VPC_ID) \
 		ParameterKey=SubnetIdProp,ParameterValue=$(SUBNET_ID) \
 		$(CLUSTER_OPTIONAL_PARAMS) \
-		$(GERRIT_OPTIONAL_PRIMARY_VOLUME)
+		$(GERRIT_OPTIONAL_PRIMARY_VOLUME) \
+		$(GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM)
 
 service-primary: set-optional-params-metrics-cloudwatch set-optional-params-smtp set-ldap-account-pattern set-optional-gerrit-ulimits set-optional-jgit-conf
 ifdef LOAD_BALANCER_SCHEME
@@ -172,7 +173,7 @@
 		@echo "METRICS_CLOUDWATCH_ENABLED is set to false. Dashboard creation skipped".
 endif
 
-delete-all-including-retained-stack: confirm-persistent-stack-deletion delete-all delete-network-persistent-stack
+delete-all-including-retained-stack: confirm-persistent-stack-deletion delete-all delete-git-replica-persistent-stack delete-network-persistent-stack
 
 wait-for-cluster-creation:
 	@echo "*** Wait for cluster stack '$(CLUSTER_STACK_NAME)' creation"
diff --git a/primary-replica/README.md b/primary-replica/README.md
index 33008ff..f7bcddf 100644
--- a/primary-replica/README.md
+++ b/primary-replica/README.md
@@ -93,6 +93,27 @@
 * `PROMETHEUS_SUBDOMAIN`: Optional. Prometheus subdomain. For example: `<AWS_PREFIX>-prometheus`
 * `GRAFANA_SUBDOMAIN`: Optional. Grafana subdomain. For example: `<AWS_PREFIX>-grafana`
 
+##### Shared filesystem for replicas
+
+replicas share a data via an EFS filesystem which is
+mounted under the `/var/gerrit/git` directory. This allows git data to persist
+beyond the lifespan of a single instance and to be shared so that replicas can
+scale down and up according to needs.
+
+* `REPLICA_FILESYSTEM_ID`: Optional. An existing EFS filesystem id to mount on replicas.
+
+    If empty, a new EFS will be created to store git data.
+    Setting this value is required when deploying a dual-primary cluster using
+    existing data as well as performing blue/green deployments.
+    The nested stack will be *retained* when the cluster is deleted, so that
+    existing data can be used to perform blue/green deployments.
+
+* `REPLICA_FILESYSTEM_THROUGHPUT_MODE`: Optional. The throughput mode for the file system to be created.
+default: `bursting`. More info [here](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-efs-filesystem.html)
+
+* `REPLICA_FILESYSTEM_PROVISIONED_THROUGHPUT_IN_MIBPS`: Optional. Only used when `REPLICA_FILESYSTEM_THROUGHPUT_MODE` is set to `provisioned`.
+default: `256`.
+
 ### 2 - Deploy
 
 * Create the cluster, services and DNS routing stacks:
@@ -123,6 +144,7 @@
 * Secrets stored in Secret Manager
 * SSL certificates
 * ECR repositories
+* Replica EFS stack
 * VPC and subnets (if created as part of this deployment, rather than externally
 provided)
 
diff --git a/primary-replica/cf-cluster.yml b/primary-replica/cf-cluster.yml
index 5cbb44c..8360292 100644
--- a/primary-replica/cf-cluster.yml
+++ b/primary-replica/cf-cluster.yml
@@ -71,8 +71,22 @@
     Description: Gerrit volume size in GiB
     Type: Number
     Default: 10
+  ReplicaFileSystemID:
+    Description: Gerrit replicas shared filesystem id
+    Type: String
+    Default: ""
+  ReplicaFileSystemThroughputMode:
+    Description: Gerrit replicas shared filesystem throughput mode
+    Type: String
+    Default: bursting
+    AllowedValues: [bursting, provisioned]
+  ReplicaProvisionedThroughputInMibps:
+    Description: Gerrit replicas shared filesystem throughput, measured in MiB/s. Valid values are 1-1024.
+    Type: Number
+    Default: 256
 
 Conditions:
+  CreateReplicaEFS: !Equals [!Ref ReplicaFileSystemID, ""]
   NetworkStackNeeded: !Or
     - !Equals [!Ref VPCIdProp, ""]
     - !And
@@ -359,6 +373,21 @@
         GerritVolumeSnapshotId: !Ref 'GerritVolumeSnapshotId'
         GerritVolumeSizeInGiB: !Ref 'GerritVolumeSizeInGiB'
 
+  ReplicaGitFileSystemPermanentStack:
+    Type: AWS::CloudFormation::Stack
+    Condition: CreateReplicaEFS
+    DeletionPolicy: Retain
+    Properties:
+      TemplateURL: !Join [ '', ['https://', !Ref TemplateBucketName, '.s3.amazonaws.com/cf-efs-stack.yml'] ]
+      TimeoutInMinutes: '25'
+      Parameters:
+        FileSystemThroughputMode: !Ref ReplicaFileSystemThroughputMode
+        ProvisionedThroughputInMibps: !Ref ReplicaProvisionedThroughputInMibps
+        PublicSubnet: !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.PublicSubnetOneRef, !Ref SubnetIdProp]
+        SecurityGroupVPCID: !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.VPCRef, !Ref VPCIdProp]
+        SecurityGroupCidrIp: !Ref SubnetCIDR
+        TagValue: "efs-for-gerrit-replicas"
+
 Outputs:
   ClusterName:
     Description: The name of the ECS cluster
@@ -380,3 +409,8 @@
     Value: !GetAtt ECSCluster.Arn
     Export:
       Name: !Join [ ':', [ !Ref 'AWS::StackName', 'ClusterArn' ] ]
+  ReplicaFileSystemID:
+    Description: The ID of the EFS to be mounted by replicas
+    Value: !If [CreateReplicaEFS, !GetAtt ReplicaGitFileSystemPermanentStack.Outputs.FileSystemID, !Ref ReplicaFileSystemID ]
+    Export:
+      Name: !Join [ ':', [ !Ref 'AWS::StackName', 'ReplicaFileSystemID' ] ]
diff --git a/primary-replica/cf-service-replica.yml b/primary-replica/cf-service-replica.yml
index 642ec80..fdc8350 100644
--- a/primary-replica/cf-service-replica.yml
+++ b/primary-replica/cf-service-replica.yml
@@ -359,12 +359,10 @@
                   Labels:
                     gerrit-db: !Join ['-', [!Ref EnvironmentName, !Ref GerritDbVolume]]
               - Name: !Ref 'GerritGitVolume'
-                DockerVolumeConfiguration:
-                  Scope: shared
-                  Autoprovision: true
-                  Driver: local
-                  Labels:
-                    gerrit-git: !Join ['-', [!Ref EnvironmentName, !Ref GerritGitVolume]]
+                EFSVolumeConfiguration:
+                  FilesystemId:
+                    Fn::ImportValue:
+                      !Join [':', [!Ref 'ClusterStackName', 'ReplicaFileSystemID']]
               - Name: !Ref 'GerritDataVolume'
                 DockerVolumeConfiguration:
                   Scope: shared