Introduce X-Ray tracing for single-primary

Bug: Issue 14897
Change-Id: I47243bb7ea2acb9b78c8a5920af95e9632272816
diff --git a/Configuration.md b/Configuration.md
index 84eabc4..adbd90a 100644
--- a/Configuration.md
+++ b/Configuration.md
@@ -161,3 +161,8 @@
   See [Gerrit documentation](https://gerrit-review.googlesource.com/Documentation/config-gerrit.html#sendemail.sslVerify)
   Default: false
 
+#### X-Ray
+
+To enable X-Ray tracing just set the `XRAY_ENABLED` environment variable to `true`.
+This will install an x-ray daemon task alongside gerrit and will automatically
+instrument Gerrit to trace all HTTP and jdbc related traffic (such as H2 caches).
\ No newline at end of file
diff --git a/Makefile.common b/Makefile.common
index 371001e..3c4ea94 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -173,6 +173,12 @@
 		$(eval GERRIT_OPTIONAL_PARAMS_NETWORK := $(GERRIT_OPTIONAL_PARAMS_NETWORK) ParameterKey=Subnet2AZProp,ParameterValue=$(SUBNET2_AZ))
 endif
 
+set-optional-x-ray:
+	$(eval XRAY_OPTIONAL_PARAMS=)
+ifdef XRAY_ENABLED
+		$(eval XRAY_OPTIONAL_PARAMS := $(XRAY_OPTIONAL_PARAMS) ParameterKey=EnableXray,ParameterValue=$(XRAY_ENABLED))
+endif
+
 confirm-persistent-stack-deletion:
 	@echo ""
 	@echo "* * * * WARNING * * * * this is going to completely destroy the stack, including git data."
diff --git a/common-templates/cf-gerrit-task-execution-role.yml b/common-templates/cf-gerrit-task-execution-role.yml
index 2c748d0..9f1df0b 100644
--- a/common-templates/cf-gerrit-task-execution-role.yml
+++ b/common-templates/cf-gerrit-task-execution-role.yml
@@ -52,6 +52,17 @@
                   - 'secretsmanager:GetSecretValue'
                   - 'kms:Decrypt'
                 Resource: '*'
+          - PolicyName: AmazonECSTaskXRayRolePolicy
+            PolicyDocument:
+              Statement:
+              - Effect: Allow
+                Action:
+                  - "xray:PutTraceSegments"
+                  - "xray:PutTelemetryRecords"
+                  - "xray:GetSamplingRules"
+                  - "xray:GetSamplingTargets"
+                  - "xray:GetSamplingStatisticSummaries"
+                Resource: '*'
 
 Outputs:
   TaskExecutionRoleRef:
diff --git a/gerrit/Dockerfile b/gerrit/Dockerfile
index dd3ce2a..4497b3b 100644
--- a/gerrit/Dockerfile
+++ b/gerrit/Dockerfile
@@ -1,8 +1,10 @@
 FROM gerritcodereview/gerrit:$GERRIT_VERSION.$GERRIT_PATCH-centos8
 
+ARG withXRay=false
+
 USER root
 
-RUN  yum install -y python36 python3-libs python36-devel python3-pip
+RUN  yum install -y python36 python3-libs python36-devel python3-pip unzip wget
 
 COPY ssh-config /var/gerrit/.ssh/config
 RUN chown -R gerrit:gerrit /var/gerrit/.ssh
@@ -21,6 +23,12 @@
 COPY lib /var/gerrit/lib
 COPY etc /var/gerrit/etc
 
+RUN if [ "$withXRay" = "true" ]; then \
+    echo "Building docker with xray-agent" && \
+    wget -P /tmp/ https://github.com/aws/aws-xray-java-agent/releases/latest/download/xray-agent.zip && \
+    unzip /tmp/xray-agent.zip -d /tmp; \
+  fi
+
 RUN chown -R gerrit:gerrit \
     /var/gerrit/plugins \
     /var/gerrit/lib \
diff --git a/gerrit/Makefile b/gerrit/Makefile
index 53d9b80..689fdde 100644
--- a/gerrit/Makefile
+++ b/gerrit/Makefile
@@ -45,8 +45,8 @@
 
 gerrit-build: check-gerrit-version-vs-war-url
 	cat Dockerfile | \
-		GERRIT_VERSION=$(GERRIT_VERSION) GERRIT_PATCH=$(GERRIT_PATCH) GERRIT_WAR_URL=$(GERRIT_WAR_URL) envsubst | \
-		docker build -f - -t aws-gerrit/gerrit:$(IMAGE_TAG) .
+		envsubst '$${GERRIT_VERSION} $${GERRIT_PATCH} $${GERRIT_WAR_URL} $${PATH}' | \
+		docker build -f - --build-arg withXRay="$(XRAY_ENABLED)" -t aws-gerrit/gerrit:$(IMAGE_TAG) .
 	docker tag aws-gerrit/gerrit:$(IMAGE_TAG) $(DOCKER_REGISTRY_URI)/aws-gerrit/gerrit:$(IMAGE_TAG)
 
 check-gerrit-version-vs-war-url:
diff --git a/gerrit/etc/gerrit.config.template b/gerrit/etc/gerrit.config.template
index 0656386..e48ab86 100644
--- a/gerrit/etc/gerrit.config.template
+++ b/gerrit/etc/gerrit.config.template
@@ -66,6 +66,9 @@
 	javaOptions = "-Dflogger.backend_factory=com.google.common.flogger.backend.log4j.Log4jBackendFactory#getInstance"
 	javaOptions = "-Dflogger.logging_context=com.google.gerrit.server.logging.LoggingContext#getInstance"
 	javaOptions = "-verbose:gc -XX:+PrintGCDetails -Xloggc:/var/gerrit/logs/gc_log"
+{% if XRAY_ENABLED == "true" %}
+	javaOptions = "-javaagent:/tmp/disco/disco-java-agent.jar=pluginPath=/tmp/disco/disco-plugins:loggerfactory=software.amazon.disco.agent.reflect.logging.StandardOutputLoggerFactory:verbose"
+{% endif %}
 	user = gerrit
 	javaHome = /usr/lib/jvm/jre
 	javaOptions = -Djava.security.egd=file:/dev/./urandom
diff --git a/gerrit/setup_gerrit.py b/gerrit/setup_gerrit.py
index d18d95d..b181698 100755
--- a/gerrit/setup_gerrit.py
+++ b/gerrit/setup_gerrit.py
@@ -174,6 +174,7 @@
         'DYNAMODB_LOCKS_TABLE_NAME': os.getenv('DYNAMODB_LOCKS_TABLE_NAME'),
         'DYNAMODB_REFS_TABLE_NAME': os.getenv('DYNAMODB_REFS_TABLE_NAME'),
         'SSHD_ADVERTISED_ADDRESS': os.getenv('SSHD_ADVERTISED_ADDRESS'),
+        'XRAY_ENABLED': os.getenv('XRAY_ENABLED'),
     })
     f.write(template.render(config_for_template))
 
diff --git a/single-primary/Makefile b/single-primary/Makefile
index 4c62493..605ed3d 100644
--- a/single-primary/Makefile
+++ b/single-primary/Makefile
@@ -39,7 +39,12 @@
 		$(CLUSTER_OPTIONAL_PARAMS) \
 		$(GERRIT_OPTIONAL_PRIMARY_VOLUME)
 
-service: set-optional-params-metrics-cloudwatch set-optional-params-smtp set-ldap-account-pattern set-optional-gerrit-ulimits set-optional-jgit-conf
+service: set-optional-params-metrics-cloudwatch \
+			set-optional-params-smtp \
+			set-ldap-account-pattern \
+			set-optional-gerrit-ulimits \
+			set-optional-jgit-conf \
+			set-optional-x-ray
 ifdef LOAD_BALANCER_SCHEME
 		$(eval SERVICE_OPTIONAL_PARAMS := $(SERVICE_OPTIONAL_PARAMS) ParameterKey=LoadBalancerScheme,ParameterValue=$(LOAD_BALANCER_SCHEME))
 endif
@@ -78,7 +83,8 @@
 		$(SERVICE_OPTIONAL_PARAMS) \
 		$(METRICS_CW_OPTIONAL_PARAMS) \
 		$(SMTP_OPTIONAL_PARAMS) \
-		$(GERRIT_ULIMITS)
+		$(GERRIT_ULIMITS) \
+		$(XRAY_OPTIONAL_PARAMS)
 
 dns-routing:
 	$(AWS_FC_COMMAND) create-stack \
diff --git a/single-primary/README.md b/single-primary/README.md
index 2aaa012..30e3f23 100644
--- a/single-primary/README.md
+++ b/single-primary/README.md
@@ -37,6 +37,8 @@
 the dual-primary recipe and it could be easily adapted (you can find the relevant issue
 [here](https://bugs.chromium.org/p/gerrit/issues/detail?id=13092)).
 
+
+
 ## How to run it
 
 You can find [on GerritForge's YouTube Channel](https://www.youtube.com/watch?v=zr2zCSuclIU) a
diff --git a/single-primary/cf-service.yml b/single-primary/cf-service.yml
index 72994a5..61b13f1 100644
--- a/single-primary/cf-service.yml
+++ b/single-primary/cf-service.yml
@@ -162,8 +162,17 @@
     Description: Comma separated list of regex patterns to exclude metrics reported to CloudWatch
     Type: CommaDelimitedList
     Default: '^(?!.*(sshd\\/sessions\\/connected|proc\\/cpu\\/usage|jgit\\/block_cache\\/cache_used|jvm\\.memory\\.total.used|proc\\/cpu\\/system_load|jvm\\.gc\\.G1-Old-Generation\\.time|proc\\/jvm\\/thread\\/num_live|git\\/upload-pack\\/request_count_total|http\\/server\\/rest_api\\/server_latency_total|http\\/server\\/success_count_total|http\\/server\\/error_count_total|queue\\/index_batch\\/total_scheduled_tasks_count|queue\\/receive_commits\\/total_scheduled_tasks_count|queue\\/work_queue\\/total_scheduled_tasks_count|queue\\/ssh_command_start\\/total_scheduled_tasks_count|queue\\/send_email\\/scheduled_tasks|jgit\\/block_cache\\/open_files|jgit\\/block_cache\\/cache_used)).*'
+  EnableXray:
+    Description: Whether to enable X-Ray tracing for Gerrit
+    Type: String
+    Default: false
+    AllowedValues: [true, false]
 
 Mappings:
+  XRay:
+    Info:
+      ContainerName: xray-daemon
+      Port: 2000
   Gerrit:
     Volume:
       Git: gerrit-git
@@ -175,6 +184,7 @@
 
 Conditions:
   CreateCloudwatchDashboard: !Equals [!Ref MetricsCloudwatchEnabled, true]
+  ShouldEnableXRay: !Equals [!Ref EnableXray, true]
 Resources:
     Service:
         Type: AWS::ECS::Service
@@ -210,6 +220,12 @@
                   Essential: true
                   Image: !Sub '${DockerRegistryUrl}/${DockerImage}'
                   Environment:
+                    - Name: XRAY_ENABLED
+                      Value: !Ref EnableXray
+                    - Name: AWS_XRAY_TRACING_NAME
+                      Value: !Ref InstanceId
+                    - Name: AWS_XRAY_DAEMON_ADDRESS
+                      Value: !Join [':', [!FindInMap ['XRay', 'Info', 'ContainerName'], !FindInMap ['XRay', 'Info', 'Port']]]
                     - Name: CANONICAL_WEB_URL
                       Value: !Sub 'https://${HttpSubdomain}.${HostedZoneName}'
                     - Name: SSHD_ADVERTISED_ADDRESS
@@ -298,6 +314,20 @@
                         awslogs-group: !Ref ClusterStackName
                         awslogs-region: !Ref AWS::Region
                         awslogs-stream-prefix: !Ref EnvironmentName
+                  Links:
+                    - !FindInMap ['XRay', 'Info', 'ContainerName']
+                - Fn::If:
+                    - ShouldEnableXRay
+                    - Name: !FindInMap ['XRay', 'Info', 'ContainerName']
+                      Essential: false
+                      Image: "amazon/aws-xray-daemon"
+                      Cpu: 32
+                      MemoryReservation: 256
+                      PortMappings:
+                        - HostPort: 2000
+                          ContainerPort: 2000
+                          Protocol: "udp"
+                    - AWS::NoValue
             Volumes:
               - Name: !FindInMap ['Gerrit', 'Volume', 'Db']
                 Host:
diff --git a/single-primary/setup.env.template b/single-primary/setup.env.template
index f7398dc..81c694e 100644
--- a/single-primary/setup.env.template
+++ b/single-primary/setup.env.template
@@ -53,3 +53,5 @@
 SUBNET2_CIDR=
 SUBNET2_ID=
 SUBNET2_AZ=
+
+XRAY_ENABLED=false