dual-primary: share git data on replicas over EFS

Store git data for replicas over EFS so that it can persist beyond the
lifespan of a single instance.
This is the first step in the direction of allowing replicas to scale.

Feature: Issue 13619
Change-Id: I2c3e265577cfa35e8040a9858b628ce964d9657d
diff --git a/Makefile.common b/Makefile.common
index e5d0d8f..4c166c5 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -93,4 +93,16 @@
 endif
 ifdef GERRIT_VOLUME_SIZE_IN_GIB
 		$(eval GERRIT_OPTIONAL_PRIMARY_VOLUME := $(GERRIT_OPTIONAL_PRIMARY_VOLUME) ParameterKey=GerritVolumeSizeInGiB,ParameterValue=$(GERRIT_VOLUME_SIZE_IN_GIB))
+endif
+
+set-optional-params-for-replica-filesystem:
+	$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM=)
+ifdef REPLICA_FILESYSTEM_ID
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM := $(GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM) ParameterKey=ReplicaFileSystemID,ParameterValue=$(REPLICA_FILESYSTEM_ID))
+endif
+ifdef REPLICA_FILESYSTEM_THROUGHPUT_MODE
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM := $(GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM) ParameterKey=ReplicaFileSystemThroughputMode,ParameterValue=$(REPLICA_FILESYSTEM_THROUGHPUT_MODE))
+endif
+ifdef REPLICA_FILESYSTEM_PROVISIONED_THROUGHPUT_IN_MIBPS
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM := $(GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM) ParameterKey=ReplicaProvisionedThroughputInMibps,ParameterValue=$(REPLICA_FILESYSTEM_PROVISIONED_THROUGHPUT_IN_MIBPS))
 endif
\ No newline at end of file
diff --git a/common-templates/cf-efs-stack.yml b/common-templates/cf-efs-stack.yml
index 3c465cc..643f987 100644
--- a/common-templates/cf-efs-stack.yml
+++ b/common-templates/cf-efs-stack.yml
@@ -16,6 +16,9 @@
   SecurityGroupCidrIp:
     Description: The IPv4 address range for the security group, in CIDR format
     Type: String
+  TagValue:
+    Description: A tag value for this EFS resource
+    Type: String
 
 Conditions:
   isProvisionedThroughput: !Equals [!Ref FileSystemThroughputMode, "provisioned"]
@@ -28,7 +31,7 @@
       ProvisionedThroughputInMibps: !If [isProvisionedThroughput, !Ref ProvisionedThroughputInMibps, !Ref "AWS::NoValue"]
       FileSystemTags:
         - Key: Name
-          Value: "multi-primary-git-repo"
+          Value: !Ref TagValue
 
   GitMountTarget:
     Type: AWS::EFS::MountTarget
diff --git a/dual-primary/Makefile b/dual-primary/Makefile
index 7e2b6ab..309d997 100644
--- a/dual-primary/Makefile
+++ b/dual-primary/Makefile
@@ -50,7 +50,7 @@
 						$(optional_git_gc_targets_creation) \
 						dns-routing wait-for-dns-routing-creation
 
-cluster: cluster-keys set-optional-gerrit-primary-volume
+cluster: cluster-keys set-optional-gerrit-primary-volume set-optional-params-for-replica-filesystem
 ifdef CLUSTER_INSTANCE_TYPE
 		$(eval CLUSTER_OPTIONAL_PARAMS := $(CLUSTER_OPTIONAL_PARAMS) ParameterKey=InstanceType,ParameterValue=$(CLUSTER_INSTANCE_TYPE))
 endif
@@ -91,7 +91,8 @@
 		ParameterKey=VPCIdProp,ParameterValue=$(VPC_ID) \
 		ParameterKey=SubnetIdProp,ParameterValue=$(SUBNET_ID) \
 		$(CLUSTER_OPTIONAL_PARAMS) \
-		$(GERRIT_OPTIONAL_PRIMARY_VOLUME)
+		$(GERRIT_OPTIONAL_PRIMARY_VOLUME) \
+		$(GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM)
 
 service-primary-1: set-optional-params-metrics-cloudwatch set-optional-params-smtp \
 					set-optional-params-multisite set-ldap-account-pattern \
@@ -494,7 +495,7 @@
 	@echo ""
 	@echo -n "Are you sure you want to continue? [y/N] " && read ans && [ $${ans:-N} = y ]
 
-delete-all-including-retained-stack: confirm-persistent-stack-deletion delete-all delete-git-persistent-stack delete-network-persistent-stack
+delete-all-including-retained-stack: confirm-persistent-stack-deletion delete-all delete-git-persistent-stack delete-git-replica-persistent-stack delete-network-persistent-stack
 
 delete-git-persistent-stack:
 
@@ -513,6 +514,23 @@
 		echo "No Git persistent stack found. Nothing to do." \
 	)
 
+delete-git-replica-persistent-stack:
+
+	$(eval REPLICA_EFS_STACK_NAME := $(shell $(AWS_FC_COMMAND) list-stacks --stack-status-filter CREATE_COMPLETE --query "StackSummaries[*].StackName" | jq -r '.[]| select(startswith("$(CLUSTER_STACK_NAME)-ReplicaGitFileSystemPermanentStack"))'))
+
+	$(if $(REPLICA_EFS_STACK_NAME), \
+		$(AWS_FC_COMMAND) delete-stack \
+			--stack-name $(REPLICA_EFS_STACK_NAME) \
+			--region $(AWS_REGION) && \
+		echo "*** Wait for Git persistent stack of replicas '$(REPLICA_EFS_STACK_NAME)' deletion" && \
+		$(AWS_FC_COMMAND) wait stack-delete-complete \
+			--stack-name $(REPLICA_EFS_STACK_NAME) \
+			--region $(AWS_REGION) && \
+		echo "*** Git persistent stack '$(EFSREPLICA_EFS_STACK_NAME_STACK_NAME)' deleted" \
+		, \
+		echo "No Git persistent stack for replicas found. Nothing to do." \
+	)
+
 delete-network-persistent-stack:
 	$(eval NETWORK_STACK_NAME=$(shell $(AWS_FC_COMMAND) list-stacks --stack-status-filter CREATE_COMPLETE --query "StackSummaries[*].StackName" | jq -r '.[]| select(startswith("$(CLUSTER_STACK_NAME)-ECSTaskNetworkStack"))'))
 
diff --git a/dual-primary/README.md b/dual-primary/README.md
index 767bdab..7c3e989 100644
--- a/dual-primary/README.md
+++ b/dual-primary/README.md
@@ -221,6 +221,27 @@
 Default: `10m`
 high-availability docs [here](https://gerrit.googlesource.com/plugins/high-availability/+/refs/heads/master/src/main/resources/Documentation/config.md)
 
+##### Shared filesystem for replicas
+
+Similarly to primary nodes, replicas share a data via an EFS filesystem which is
+mounted under the `/var/gerrit/git` directory. This allows git data to persist
+beyond the lifespan of a single instance and to be shared so that replicas can
+scale down and up according to needs.
+
+* `REPLICA_FILESYSTEM_ID`: Optional. An existing EFS filesystem id to mount on replicas.
+
+    If empty, a new EFS will be created to store git data.
+    Setting this value is required when deploying a dual-primary cluster using
+    existing data as well as performing blue/green deployments.
+    The nested stack will be *retained* when the cluster is deleted, so that
+    existing data can be used to perform blue/green deployments.
+
+* `REPLICA_FILESYSTEM_THROUGHPUT_MODE`: Optional. The throughput mode for the file system to be created.
+default: `bursting`. More info [here](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-efs-filesystem.html)
+
+* `REPLICA_FILESYSTEM_PROVISIONED_THROUGHPUT_IN_MIBPS`: Optional. Only used when `REPLICA_FILESYSTEM_THROUGHPUT_MODE` is set to `provisioned`.
+default: `256`.
+
 #### REPLICATION SERVICE
 
 * `REPLICATION_SERVICE_ENABLED`: Optional. Whether to expose a replication endpoint.
diff --git a/dual-primary/cf-cluster.yml b/dual-primary/cf-cluster.yml
index 22d0594..7923dc1 100644
--- a/dual-primary/cf-cluster.yml
+++ b/dual-primary/cf-cluster.yml
@@ -93,10 +93,24 @@
     Description: Gerrit volume size in GiB
     Type: Number
     Default: 10
+  ReplicaFileSystemID:
+    Description: Gerrit replicas shared filesystem id
+    Type: String
+    Default: ""
+  ReplicaFileSystemThroughputMode:
+    Description: Gerrit replicas shared filesystem throughput mode
+    Type: String
+    Default: bursting
+    AllowedValues: [bursting, provisioned]
+  ReplicaProvisionedThroughputInMibps:
+    Description: Gerrit replicas shared filesystem throughput, measured in MiB/s. Valid values are 1-1024.
+    Type: Number
+    Default: 256
 
 Conditions:
   isProvisionedThroughput: !Equals [!Ref FileSystemThroughputMode, "provisioned"]
   CreateEFS: !Equals [!Ref FileSystemID, ""]
+  CreateReplicaEFS: !Equals [!Ref ReplicaFileSystemID, ""]
   NetworkStackNeeded: !Or
     - !Equals [!Ref VPCIdProp, ""]
     - !And
@@ -308,6 +322,22 @@
         PublicSubnet: !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.PublicSubnetOneRef, !Ref SubnetIdProp]
         SecurityGroupVPCID: !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.VPCRef, !Ref VPCIdProp]
         SecurityGroupCidrIp: !Ref SubnetCIDR
+        TagValue: "efs-for-gerrit-primaries"
+
+  ReplicaGitFileSystemPermanentStack:
+    Type: AWS::CloudFormation::Stack
+    DeletionPolicy: Retain
+    Condition: CreateReplicaEFS
+    Properties:
+      TemplateURL: !Join [ '', ['https://', !Ref TemplateBucketName, '.s3.amazonaws.com/cf-efs-stack.yml'] ]
+      TimeoutInMinutes: '25'
+      Parameters:
+        FileSystemThroughputMode: !Ref ReplicaFileSystemThroughputMode
+        ProvisionedThroughputInMibps: !Ref ReplicaProvisionedThroughputInMibps
+        PublicSubnet: !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.PublicSubnetOneRef, !Ref SubnetIdProp]
+        SecurityGroupVPCID: !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.VPCRef, !Ref VPCIdProp]
+        SecurityGroupCidrIp: !Ref SubnetCIDR
+        TagValue: "efs-for-gerrit-replicas"
 
   ECSTaskNetworkStack:
     Type: AWS::CloudFormation::Stack
@@ -344,3 +374,8 @@
     Value: !GetAtt ECSCluster.Arn
     Export:
       Name: !Join [ ':', [ !Ref 'AWS::StackName', 'ClusterArn' ] ]
+  ReplicaFileSystemID:
+    Description: The ID of the EFS to be mounted by replicas
+    Value: !If [CreateReplicaEFS, !GetAtt ReplicaGitFileSystemPermanentStack.Outputs.FileSystemID, !Ref ReplicaFileSystemID ]
+    Export:
+      Name: !Join [ ':', [ !Ref 'AWS::StackName', 'ReplicaFileSystemID' ] ]
diff --git a/dual-primary/cf-service-replica.yml b/dual-primary/cf-service-replica.yml
index 58ed57d..ff59a9a 100644
--- a/dual-primary/cf-service-replica.yml
+++ b/dual-primary/cf-service-replica.yml
@@ -359,12 +359,10 @@
                   Labels:
                     gerrit-db: !Join ['-', [!Ref EnvironmentName, !Ref GerritDbVolume]]
               - Name: !Ref 'GerritGitVolume'
-                DockerVolumeConfiguration:
-                  Scope: shared
-                  Autoprovision: true
-                  Driver: local
-                  Labels:
-                    gerrit-git: !Join ['-', [!Ref EnvironmentName, !Ref GerritGitVolume]]
+                EFSVolumeConfiguration:
+                  FilesystemId:
+                    Fn::ImportValue:
+                      !Join [':', [!Ref 'ClusterStackName', 'ReplicaFileSystemID']]
               - Name: !Ref 'GerritDataVolume'
                 DockerVolumeConfiguration:
                   Scope: shared