blob: 8dbe49ee95c3d3d45bb0585e5b338d16ef4d3c9c [file] [log] [blame]
AWSTemplateFormatVersion: '2010-09-09'
Description: Deploy a Primary-Replica CloudWatch dashboard.
Parameters:
PrimaryInstanceId:
Description: Optional identifier for the Gerrit primary instance
Default: gerrit-primary-replica-PRIMARY
Type: String
ReplicaInstanceId:
Description: Optional identifier for the Gerrit replica instance
Default: gerrit-primary-replica-REPLICA
Type: String
MetricsCloudwatchNamespace:
Description: The CloudWatch namespace for Gerrit metrics
Type: String
Default: gerrit
Resources:
CloudWatchDashboard:
Type: AWS::CloudWatch::Dashboard
Properties:
DashboardName: !Ref AWS::StackName
DashboardBody: !Sub |
{
"start": "-PT6H",
"periodOverride": "auto",
"widgets": [
{
"type": "metric",
"x": 0,
"y": 0,
"width": 24,
"height": 3,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "sshd/sessions/connected", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge" ],
[ "${MetricsCloudwatchNamespace}", "sshd/sessions/connected", "InstanceId", "${ReplicaInstanceId}", "Type", "gauge" ]
],
"view": "singleValue",
"region": "${AWS::Region}",
"stat": "Maximum",
"period": 60,
"title": "SSH Connection",
"stacked": false
}
},
{
"type": "metric",
"x": 6,
"y": 3,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ { "expression": "RATE(METRICS())", "label": "Expression1", "id": "e1", "region": "${AWS::Region}" } ],
[ "${MetricsCloudwatchNamespace}", "proc/cpu/usage", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge", { "id": "m1", "visible": false } ],
[ "${MetricsCloudwatchNamespace}", "proc/cpu/usage", "InstanceId", "${ReplicaInstanceId}", "Type", "gauge", { "id": "m2", "visible": false } ]
],
"view": "timeSeries",
"stacked": false,
"region": "${AWS::Region}",
"stat": "Average",
"period": 60,
"title": "Gerrit Cpu Load",
"yAxis": {
"right": {
"showUnits": true
},
"left": {
"showUnits": false,
"min": 0
}
}
}
},
{
"type": "metric",
"x": 12,
"y": 33,
"width": 12,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "jgit/block_cache/cache_used", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge", { "id": "m1" } ],
[ "${MetricsCloudwatchNamespace}", "jgit/block_cache/cache_used", "InstanceId", "${ReplicaInstanceId}", "Type", "gauge", { "id": "m2" } ]
],
"view": "timeSeries",
"stacked": false,
"region": "${AWS::Region}",
"yAxis": {
"left": {
"showUnits": false,
"label": ""
}
},
"title": "JGit Cache",
"period": 60,
"stat": "Average"
}
},
{
"type": "metric",
"x": 18,
"y": 3,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "jvm.memory.total.used", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge" ],
[ "${MetricsCloudwatchNamespace}", "jvm.memory.total.used", "InstanceId", "${ReplicaInstanceId}", "Type", "gauge" ]
],
"view": "timeSeries",
"stacked": false,
"region": "${AWS::Region}",
"yAxis": {
"left": {
"showUnits": false
}
},
"stat": "Average",
"period": 60,
"title": "Gerrit Used memory"
}
},
{
"type": "metric",
"x": 12,
"y": 3,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "proc/cpu/system_load", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge" ],
[ "${MetricsCloudwatchNamespace}", "proc/cpu/system_load", "InstanceId", "${ReplicaInstanceId}", "Type", "gauge" ]
],
"view": "timeSeries",
"stacked": false,
"title": "Gerrit System Load",
"region": "${AWS::Region}",
"yAxis": {
"left": {
"showUnits": false
}
},
"stat": "Average",
"period": 60
}
},
{
"type": "metric",
"x": 0,
"y": 9,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "jvm.gc.G1-Old-Generation.time", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge" ],
[ "${MetricsCloudwatchNamespace}", "jvm.gc.G1-Old-Generation.time", "InstanceId", "${ReplicaInstanceId}", "Type", "gauge" ]
],
"view": "timeSeries",
"stacked": false,
"title": "GC Time",
"region": "${AWS::Region}",
"stat": "Average",
"period": 60,
"yAxis": {
"left": {
"showUnits": false
}
}
}
},
{
"type": "metric",
"x": 6,
"y": 9,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "proc/jvm/thread/num_live", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge" ],
[ "${MetricsCloudwatchNamespace}", "proc/jvm/thread/num_live", "InstanceId", "${ReplicaInstanceId}", "Type", "gauge" ]
],
"view": "timeSeries",
"stacked": false,
"title": "Active Threads",
"region": "${AWS::Region}",
"period": 60,
"stat": "Average",
"yAxis": {
"left": {
"showUnits": false
}
}
}
},
{
"type": "metric",
"x": 0,
"y": 15,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "http/server/rest_api/server_latency_total", "InstanceId", "${PrimaryInstanceId}", "Type", "99.9%", { "id": "m2" } ]
],
"view": "timeSeries",
"stacked": true,
"region": "${AWS::Region}",
"stat": "Average",
"period": 60,
"title": "HTTP requests latency",
"yAxis": {
"left": {
"showUnits": false,
"label": "ms"
}
}
}
},
{
"type": "metric",
"x": 6,
"y": 15,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ { "expression": "m1+m2", "label": "HTTP hits per second", "id": "e2", "region": "${AWS::Region}" } ],
[ "${MetricsCloudwatchNamespace}", "http/server/success_count_total", "InstanceId", "${PrimaryInstanceId}", "Type", "count", { "id": "m2", "visible": false } ],
[ ".", "http/server/error_count_total", ".", ".", ".", ".", { "id": "m1", "visible": false } ]
],
"view": "timeSeries",
"stacked": true,
"title": "HTTP hits per second",
"region": "${AWS::Region}",
"stat": "Sum",
"period": 60,
"yAxis": {
"left": {
"showUnits": false,
"label": "ops",
"min": 0
}
}
}
},
{
"type": "metric",
"x": 12,
"y": 15,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ { "expression": "(m1/(m1+m2))*100", "label": "% of HTTP Errors", "id": "e1", "region": "${AWS::Region}" } ],
[ "${MetricsCloudwatchNamespace}", "http/server/error_count_total", "InstanceId", "${PrimaryInstanceId}", "Type", "count", { "id": "m1", "visible": false } ],
[ ".", "http/server/success_count_total", ".", ".", ".", ".", { "id": "m2", "visible": false } ]
],
"view": "timeSeries",
"stacked": true,
"region": "${AWS::Region}",
"stat": "Sum",
"period": 60,
"title": "% of HTTP Errors",
"yAxis": {
"left": {
"showUnits": false,
"max": 100,
"min": 0
}
}
}
},
{
"type": "metric",
"x": 12,
"y": 9,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "git/upload-pack/request_count_total", "InstanceId", "${PrimaryInstanceId}", "Type", "count", { "id": "m1" } ],
[ "${MetricsCloudwatchNamespace}", "git/upload-pack/request_count_total", "InstanceId", "${ReplicaInstanceId}", "Type", "count", { "id": "m1" } ]
],
"view": "timeSeries",
"stacked": false,
"region": "${AWS::Region}",
"title": "Git upload pack - count",
"stat": "Sum",
"period": 60,
"yAxis": {
"left": {
"showUnits": false,
"min": 0
}
}
}
},
{
"type": "metric",
"x": 0,
"y": 21,
"width": 24,
"height": 6,
"properties": {
"metrics": [
[ { "expression": "RATE(METRICS())*PERIOD(m1)", "label": "Expression1", "id": "e1" } ],
[ "${MetricsCloudwatchNamespace}", "queue/index_batch/total_scheduled_tasks_count", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge", { "id": "m1", "visible": false } ],
[ ".", "queue/receive_commits/total_scheduled_tasks_count", ".", ".", ".", ".", { "id": "m2", "visible": false } ],
[ ".", "queue/work_queue/total_scheduled_tasks_count", ".", ".", ".", ".", { "id": "m3", "visible": false } ],
[ ".", "queue/ssh_command_start/total_scheduled_tasks_count", ".", ".", ".", ".", { "id": "m4", "visible": false } ]
],
"view": "timeSeries",
"stacked": true,
"title": "Scheduled Tasks Queues",
"region": "${AWS::Region}",
"stat": "Sum",
"period": 60,
"yAxis": {
"left": {
"showUnits": false,
"min": 0
}
}
}
},
{
"type": "metric",
"x": 0,
"y": 27,
"width": 24,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "queue/send_email/scheduled_tasks", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge" ]
],
"view": "timeSeries",
"stacked": true,
"region": "${AWS::Region}",
"title": "Scheduled email tasks in the queue",
"stat": "Sum",
"period": 300,
"yAxis": {
"left": {
"showUnits": false
}
}
}
},
{
"type": "metric",
"x": 0,
"y": 33,
"width": 12,
"height": 6,
"properties": {
"metrics": [
[ "${MetricsCloudwatchNamespace}", "jgit/block_cache/open_files", "InstanceId", "${PrimaryInstanceId}", "Type", "gauge" ],
[ "${MetricsCloudwatchNamespace}", "jgit/block_cache/open_files", "InstanceId", "${ReplicaInstanceId}", "Type", "gauge" ]
],
"view": "timeSeries",
"stacked": false,
"title": "Pack files cached",
"region": "${AWS::Region}",
"stat": "Sum",
"period": 60,
"yAxis": {
"left": {
"showUnits": false
}
}
}
},
{
"type": "metric",
"x": 0,
"y": 3,
"width": 6,
"height": 6,
"properties": {
"metrics": [
[ "AWS/EC2", "CPUUtilization" ]
],
"view": "timeSeries",
"stacked": false,
"region": "${AWS::Region}",
"title": "Cluster CPU Load",
"period": 60,
"stat": "Average"
}
}
]
}