primary-replica: auto-scaling policy for replicas

Allow replicas to scale in and out based on average CPU value.

Note that this does not automatically scale the underlying autoscaling
group, which would require the usage of capacity provider and it will be
addressed separately.

Bug: Issue 14211
Change-Id: I7adb8c5e3b6a97ec5019e7f5efdc25faa21ce833
diff --git a/Makefile.common b/Makefile.common
index 78c1f9a..2e01b03 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -21,6 +21,7 @@
 	aws s3 cp ../common-templates/cf-gerrit-volume.yml s3://$(TEMPLATE_BUCKET_NAME)/
 	aws s3 cp ../common-templates/cf-primary-asg.yml s3://$(TEMPLATE_BUCKET_NAME)/
 	aws s3 cp ../common-templates/cf-efs-stack.yml s3://$(TEMPLATE_BUCKET_NAME)/
+	aws s3 cp ../common-templates/cf-ecs-service-cpu-autoscaling.yml s3://$(TEMPLATE_BUCKET_NAME)/
 
 set-optional-params-metrics-cloudwatch:
 ifdef METRICS_CLOUDWATCH_ENABLED
@@ -108,6 +109,30 @@
 		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM := $(GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM) ParameterKey=ReplicaProvisionedThroughputInMibps,ParameterValue=$(REPLICA_FILESYSTEM_PROVISIONED_THROUGHPUT_IN_MIBPS))
 endif
 
+set-optional-params-for-replica-auto-scaling-capacity:
+	$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY=)
+ifdef REPLICA_AUTOSCALING_MIN_CAPACITY
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY := $(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY) ParameterKey=ReplicaAutoScalingMinCapacity,ParameterValue=$(REPLICA_AUTOSCALING_MIN_CAPACITY))
+endif
+ifdef REPLICA_AUTOSCALING_DESIRED_CAPACITY
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY := $(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY) ParameterKey=ReplicaAutoScalingDesiredCapacity,ParameterValue=$(REPLICA_AUTOSCALING_DESIRED_CAPACITY))
+endif
+ifdef REPLICA_AUTOSCALING_MAX_CAPACITY
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY := $(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY) ParameterKey=ReplicaAutoScalingMaxCapacity,ParameterValue=$(REPLICA_AUTOSCALING_MAX_CAPACITY))
+endif
+
+set-optional-params-for-replica-auto-scaling-policy:
+	$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_POLICY=)
+ifdef REPLICA_AUTOSCALING_SCALE_IN_COOLDOWN
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_POLICY := $(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_POLICY) ParameterKey=ReplicaAutoScalingScaleInCooldown,ParameterValue=$(REPLICA_AUTOSCALING_SCALE_IN_COOLDOWN))
+endif
+ifdef REPLICA_AUTOSCALING_SCALE_OUT_COOLDOWN
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_POLICY := $(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_POLICY) ParameterKey=ReplicaAutoScalingScaleOutCooldown,ParameterValue=$(REPLICA_AUTOSCALING_SCALE_OUT_COOLDOWN))
+endif
+ifdef REPLICA_AUTOSCALING_TARGET_CPU_PERCENTAGE
+		$(eval GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_POLICY := $(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_POLICY) ParameterKey=ReplicaAutoScalingTargetCPUPercentage,ParameterValue=$(REPLICA_AUTOSCALING_TARGET_CPU_PERCENTAGE))
+endif
+
 confirm-persistent-stack-deletion:
 	@echo ""
 	@echo "* * * * WARNING * * * * this is going to completely destroy the stack, including git data."
diff --git a/common-templates/cf-ecs-service-cpu-autoscaling.yml b/common-templates/cf-ecs-service-cpu-autoscaling.yml
new file mode 100644
index 0000000..0c3b77b
--- /dev/null
+++ b/common-templates/cf-ecs-service-cpu-autoscaling.yml
@@ -0,0 +1,71 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Description: Resources related to the EFS filesystem apt to store git data.
+Parameters:
+  AutoScalingMinCapacity:
+    Type: Number
+    Description: The minimum number of tasks the service should scale in to
+  AutoScalingDesiredCapacity:
+    Description: The desired number of tasks to run
+    Type: Number
+  AutoScalingMaxCapacity:
+    Type: Number
+    Description: The maximum number of tasks the service should scale out to
+  AutoScalingScaleInCooldown:
+    Type: Number
+    Description: The amount of time, in seconds, after a scale-in activity completes before another scale-in activity can start
+  AutoScalingScaleOutCooldown:
+    Type: Number
+    Description: The amount of time, in seconds, to wait for a previous scale-out activity to take effect.
+  AutoScalingTargetCPUPercentage:
+    Type: Number
+    Description: Aggregate CPU utilization target for auto-scaling
+  ResourceId:
+    Type: String
+    Description: The identifier of the resource associated with the scalable target.
+
+Resources:
+    GerritServiceScalingTarget:
+      Type: AWS::ApplicationAutoScaling::ScalableTarget
+      Properties:
+        MinCapacity: !Ref AutoScalingMinCapacity
+        MaxCapacity: !Ref AutoScalingMaxCapacity
+        ResourceId: !Ref ResourceId
+        RoleARN: !GetAtt [AutoscalingRole, Arn]
+        ScalableDimension: ecs:service:DesiredCount
+        ServiceNamespace: ecs
+
+    GerritServiceScalingPolicy:
+      Type: AWS::ApplicationAutoScaling::ScalingPolicy
+      Properties:
+        PolicyName: ReplicaCPUTrackingPolicy
+        PolicyType: TargetTrackingScaling
+        ScalingTargetId: !Ref GerritServiceScalingTarget
+        TargetTrackingScalingPolicyConfiguration:
+          PredefinedMetricSpecification:
+            PredefinedMetricType: ECSServiceAverageCPUUtilization
+          ScaleInCooldown: !Ref AutoScalingScaleInCooldown
+          ScaleOutCooldown: !Ref AutoScalingScaleOutCooldown
+          TargetValue: !Ref AutoScalingTargetCPUPercentage
+
+    AutoscalingRole:
+      Type: AWS::IAM::Role
+      Properties:
+        AssumeRolePolicyDocument:
+          Statement:
+            - Effect: Allow
+              Principal:
+                Service: [application-autoscaling.amazonaws.com]
+              Action: ['sts:AssumeRole']
+        Path: /
+        Policies:
+          - PolicyName: gerrit-service-autoscaling
+            PolicyDocument:
+              Statement:
+                - Effect: Allow
+                  Action:
+                    - 'application-autoscaling:*'
+                    - 'cloudwatch:DescribeAlarms'
+                    - 'cloudwatch:PutMetricAlarm'
+                    - 'ecs:DescribeServices'
+                    - 'ecs:UpdateService'
+                  Resource: '*'
\ No newline at end of file
diff --git a/primary-replica/Makefile b/primary-replica/Makefile
index 4378812..691f138 100644
--- a/primary-replica/Makefile
+++ b/primary-replica/Makefile
@@ -31,7 +31,9 @@
 						$(optional_git_gc_targets_creation) \
 						dns-routing wait-for-dns-routing-creation
 
-cluster: cluster-keys set-optional-gerrit-primary-volume set-optional-params-for-replica-filesystem
+cluster: cluster-keys set-optional-gerrit-primary-volume \
+			set-optional-params-for-replica-filesystem \
+			set-optional-params-for-replica-auto-scaling-capacity
 ifdef CLUSTER_INSTANCE_TYPE
 		$(eval CLUSTER_OPTIONAL_PARAMS := $(CLUSTER_OPTIONAL_PARAMS) ParameterKey=InstanceType,ParameterValue=$(CLUSTER_INSTANCE_TYPE))
 endif
@@ -58,7 +60,8 @@
 		ParameterKey=SubnetIdProp,ParameterValue=$(SUBNET_ID) \
 		$(CLUSTER_OPTIONAL_PARAMS) \
 		$(GERRIT_OPTIONAL_PRIMARY_VOLUME) \
-		$(GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM)
+		$(GERRIT_OPTIONAL_PARAMS_REPLICA_FILESYSTEM) \
+		$(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY)
 
 service-primary: set-optional-params-metrics-cloudwatch set-optional-params-smtp set-ldap-account-pattern set-optional-gerrit-ulimits set-optional-jgit-conf
 ifdef LOAD_BALANCER_SCHEME
@@ -103,7 +106,12 @@
 		$(GERRIT_ULIMITS)
 
 
-service-replica: set-optional-params-metrics-cloudwatch set-ldap-account-pattern set-optional-gerrit-ulimits set-optional-jgit-conf
+service-replica: set-optional-params-metrics-cloudwatch \
+					set-ldap-account-pattern \
+					set-optional-gerrit-ulimits set-optional-jgit-conf \
+					set-optional-params-for-replica-auto-scaling-capacity \
+					set-optional-params-for-replica-auto-scaling-policy
+
 ifdef LOAD_BALANCER_SCHEME
 		$(eval REPLICA_SERVICE_OPTIONAL_PARAMS := $(REPLICA_SERVICE_OPTIONAL_PARAMS) ParameterKey=LoadBalancerScheme,ParameterValue=$(LOAD_BALANCER_SCHEME))
 endif
@@ -137,7 +145,9 @@
 		$(LDAP_ACCOUNT_PATTERN_PARAM) \
 		$(REPLICA_SERVICE_OPTIONAL_PARAMS) \
 		$(METRICS_CW_OPTIONAL_PARAMS) \
-		$(GERRIT_ULIMITS)
+		$(GERRIT_ULIMITS) \
+		$(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_CAPACITY) \
+		$(GERRIT_OPTIONAL_PARAMS_REPLICA_AUTO_SCALING_POLICY)
 
 dns-routing:
 	$(AWS_FC_COMMAND) create-stack \
diff --git a/primary-replica/README.md b/primary-replica/README.md
index f7bcddf..7d8da55 100644
--- a/primary-replica/README.md
+++ b/primary-replica/README.md
@@ -114,6 +114,55 @@
 * `REPLICA_FILESYSTEM_PROVISIONED_THROUGHPUT_IN_MIBPS`: Optional. Only used when `REPLICA_FILESYSTEM_THROUGHPUT_MODE` is set to `provisioned`.
 default: `256`.
 
+##### Auto Scaling of replicas instances
+
+Gerrit replicas have the ability to scale in or out automatically to accommodate
+to the increase or decrease of traffic. The traffic might be typically coming
+from build or test jobs executed by some sort of automated build pipeline.
+
+Since they all [share the same git data over EFS](#shared-filesystem-for-replicas),
+replicas are immediately ready to serve traffic as soon as they come up and
+register behind the loadbalancer.
+
+There is a 1 to 1 relationship between replica and EC2 instances: on each EC2
+instance in the 'replica' ASG, runs one and only one replica task.
+Because of this, when specifying the capacity for replicas (minimum, desired and
+maximum), they will both configure for the capacity of tasks as well as the
+capacity of the ASG, since they always need to be in sync.
+
+The scaling policy adds or removes capacity as required to keep the average CPU
+Usage (of the replica service) close to the specified target value.
+
+These are the available settings:
+
+* `REPLICA_AUTOSCALING_MIN_CAPACITY` Optional. The minimum number of tasks that
+replicas should scale in to. This is also the minimum number of EC2 instances in
+the replica ASG
+default: *1*
+
+* `REPLICA_AUTOSCALING_DESIRED_CAPACITY` Optional. The desired number of
+replica tasks to run. This is also the desired number of EC2 instances in the
+replica ASG.
+default: *1*
+
+* `REPLICA_AUTOSCALING_MAX_CAPACITY` Optional. The maximum number of tasks that
+replicas should scale out to. This is also the maximum number of EC2 instances
+in the replica ASG
+default: *2*
+
+* `REPLICA_AUTOSCALING_SCALE_IN_COOLDOWN` Optional. The amount of time, in
+seconds, after a scale-in activity completes before another scale-in activity
+can start
+default: *300* seconds
+
+* `REPLICA_AUTOSCALING_SCALE_OUT_COOLDOWN` Optional. The amount of time, in
+seconds, to wait for a previous scale-out activity to take effect
+default: *300* seconds
+
+* `REPLICA_AUTOSCALING_TARGET_CPU_PERCENTAGE` Optional. Aggregate CPU
+utilization target for auto-scaling. Auto-scaling will add or remove tasks in
+the replica service to be as close as possible to this value
+
 ### 2 - Deploy
 
 * Create the cluster, services and DNS routing stacks:
diff --git a/primary-replica/cf-cluster.yml b/primary-replica/cf-cluster.yml
index 8360292..8c0015a 100644
--- a/primary-replica/cf-cluster.yml
+++ b/primary-replica/cf-cluster.yml
@@ -84,6 +84,18 @@
     Description: Gerrit replicas shared filesystem throughput, measured in MiB/s. Valid values are 1-1024.
     Type: Number
     Default: 256
+  ReplicaAutoScalingMinCapacity:
+    Type: Number
+    Description: The minimum number of EC2 instances in the replica ASG
+    Default: 1
+  ReplicaAutoScalingDesiredCapacity:
+    Description: The desired number of EC2 instances in the replica ASG
+    Type: Number
+    Default: 1
+  ReplicaAutoScalingMaxCapacity:
+    Type: Number
+    Description: The maximum number of EC2 instances in the replica ASG
+    Default: 2
 
 Conditions:
   CreateReplicaEFS: !Equals [!Ref ReplicaFileSystemID, ""]
@@ -238,9 +250,9 @@
       VPCZoneIdentifier:
         - !If [NetworkStackNeeded, !GetAtt ECSTaskNetworkStack.Outputs.PublicSubnetOneRef, !Ref SubnetIdProp]
       LaunchConfigurationName: !Ref 'ReplicaLaunchConfiguration'
-      MinSize: '1'
-      MaxSize: '1'
-      DesiredCapacity: '1'
+      MinSize: !Ref ReplicaAutoScalingMinCapacity
+      MaxSize: !Ref ReplicaAutoScalingMaxCapacity
+      DesiredCapacity: !Ref ReplicaAutoScalingDesiredCapacity
     CreationPolicy:
       ResourceSignal:
         Timeout: PT15M
diff --git a/primary-replica/cf-service-replica.yml b/primary-replica/cf-service-replica.yml
index fdc8350..7ad5be5 100644
--- a/primary-replica/cf-service-replica.yml
+++ b/primary-replica/cf-service-replica.yml
@@ -36,10 +36,6 @@
   DockerRegistryUrl:
       Description: Docker registry URL
       Type: String
-  DesiredCount:
-      Description: How many instances of this task should we run across our cluster?
-      Type: Number
-      Default: 1
   HTTPHostPort:
       Description: Gerrit Host HTTP port
       Type: Number
@@ -189,6 +185,30 @@
     Description: Comma separated list of regex patterns to exclude metrics reported to CloudWatch
     Type: CommaDelimitedList
     Default: ''
+  ReplicaAutoScalingMinCapacity:
+    Type: Number
+    Description: The minimum number of tasks that replicas should scale in to
+    Default: 1
+  ReplicaAutoScalingDesiredCapacity:
+    Description: The desired number of replica tasks to run
+    Type: Number
+    Default: 1
+  ReplicaAutoScalingMaxCapacity:
+    Type: Number
+    Description: The maximum number of tasks that replicas should scale out to
+    Default: 2
+  ReplicaAutoScalingScaleInCooldown:
+    Type: Number
+    Description: The amount of time, in seconds, after a scale-in activity completes before another scale-in activity can start
+    Default: 300
+  ReplicaAutoScalingScaleOutCooldown:
+    Type: Number
+    Description: The amount of time, in seconds, to wait for a previous scale-out activity to take effect.
+    Default: 300
+  ReplicaAutoScalingTargetCPUPercentage:
+    Type: Number
+    Description: Aggregate CPU utilization target for auto-scaling
+    Default: 75.0
 
 Resources:
     GerritService:
@@ -202,7 +222,7 @@
             Cluster:
               Fn::ImportValue:
                   !Join [':', [!Ref 'ClusterStackName', 'ClusterName']]
-            DesiredCount: !Ref DesiredCount
+            DesiredCount: !Ref ReplicaAutoScalingDesiredCapacity
             TaskDefinition: !Ref GerritTaskDefinition
             LoadBalancers:
                 - ContainerName: !Ref GerritServiceName
@@ -385,6 +405,26 @@
                   Labels:
                     gerrit-logs: !Join ['-', [!Ref EnvironmentName, !Ref GerritLogsVolume]]
 
+    ReplicaCPUAutoScaling:
+      Type: AWS::CloudFormation::Stack
+      Properties:
+        TemplateURL: !Join [ '', ['https://', !Ref TemplateBucketName, '.s3.amazonaws.com/cf-ecs-service-cpu-autoscaling.yml'] ]
+        TimeoutInMinutes: '5'
+        Parameters:
+          AutoScalingMinCapacity: !Ref ReplicaAutoScalingMinCapacity
+          AutoScalingDesiredCapacity: !Ref ReplicaAutoScalingDesiredCapacity
+          AutoScalingMaxCapacity: !Ref ReplicaAutoScalingMaxCapacity
+          AutoScalingScaleInCooldown: !Ref ReplicaAutoScalingScaleInCooldown
+          AutoScalingScaleOutCooldown: !Ref ReplicaAutoScalingScaleOutCooldown
+          AutoScalingTargetCPUPercentage: !Ref ReplicaAutoScalingTargetCPUPercentage
+          ResourceId:
+            !Join
+            - ''
+            - - 'service/'
+              - Fn::ImportValue: !Join [':', [!Ref 'ClusterStackName', 'ClusterName']]
+              - '/'
+              - !GetAtt GerritService.Name
+
     LoadBalancer:
         Type: AWS::ElasticLoadBalancingV2::LoadBalancer
         Properties:
diff --git a/primary-replica/setup.env.template b/primary-replica/setup.env.template
index 5d2418c..cc3b284 100644
--- a/primary-replica/setup.env.template
+++ b/primary-replica/setup.env.template
@@ -48,7 +48,15 @@
 SERVICE_GIT_GC_STACK_NAME=$(AWS_PREFIX)-scheduled-gc
 GIT_GC_CRON_EXPRESSION="0 2 ? * SAT *"
 GIT_GC_PROJECT_LIST="All-Users"
-             
+
 REPLICA_FILESYSTEM_ID=""
 REPLICA_FILESYSTEM_THROUGHPUT_MODE="provisioned"
-REPLICA_FILESYSTEM_PROVISIONED_THROUGHPUT_IN_MIBPS="256"
\ No newline at end of file
+REPLICA_FILESYSTEM_PROVISIONED_THROUGHPUT_IN_MIBPS="256"
+
+REPLICA_AUTOSCALING_MIN_CAPACITY=1
+REPLICA_AUTOSCALING_DESIRED_CAPACITY=1
+REPLICA_AUTOSCALING_MAX_CAPACITY=1
+
+REPLICA_AUTOSCALING_SCALE_IN_COOLDOWN=300
+REPLICA_AUTOSCALING_SCALE_OUT_COOLDOWN=300
+REPLICA_AUTOSCALING_TARGET_CPU_PERCENTAGE=75
\ No newline at end of file