Skip to content

Commit 2060ac2

Browse files
authored
Jeanschmidt/runners send metrics (#843)
This change changes CW agent config, enabling runners to send host metrics, so dashboards/alerts can be built for disk usage, cpu, memory, etc.
1 parent f17e8be commit 2060ac2

File tree

8 files changed

+141
-25
lines changed

8 files changed

+141
-25
lines changed

terraform-aws-github-runner/main.tf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ module "runners" {
109109
lambda_security_group_ids = var.lambda_security_group_ids
110110
logging_retention_in_days = var.logging_retention_in_days
111111
enable_cloudwatch_agent = var.enable_cloudwatch_agent
112-
cloudwatch_config = var.cloudwatch_config
113112
scale_up_lambda_concurrency = var.scale_up_lambda_concurrency
114113

115114
instance_profile_path = var.instance_profile_path

terraform-aws-github-runner/modules/runners/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ No requirements.
6868
| block\_device\_mappings | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops` | `map(string)` | `{}` | no |
6969
| cloudwatch\_config | (optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. | `string` | `null` | no |
7070
| create\_service\_linked\_role\_spot | (optional) create the serviced linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no |
71-
| enable\_cloudwatch\_agent | Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`. | `bool` | `true` | no |
71+
| enable\_cloudwatch\_agent | Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. | `bool` | `true` | no |
7272
| enable\_organization\_runners | n/a | `bool` | n/a | yes |
7373
| enable\_ssm\_on\_runners | Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances. | `bool` | n/a | yes |
7474
| encryption | KMS key to encrypted lambda environment secrets. Either provide a key and `encrypt` set to `true`. Or set the key to `null` and encrypt to `false`. | <pre>object({<br> kms_key_id = string<br> encrypt = bool<br> })</pre> | n/a | yes |

terraform-aws-github-runner/modules/runners/logging.tf

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,19 @@ resource "aws_ssm_parameter" "cloudwatch_agent_config_runner_linux" {
6161
count = var.enable_cloudwatch_agent ? 1 : 0
6262
name = "${var.environment}-cloudwatch_agent_config_runner_linux"
6363
type = "String"
64-
value = var.cloudwatch_config != null ? var.cloudwatch_config : templatefile("${path.module}/templates/cloudwatch_config.json", {
65-
logfiles = jsonencode(local.logfiles_linux)
66-
})
64+
value = jsonencode(
65+
jsondecode(
66+
templatefile(
67+
"${path.module}/templates/cloudwatch_config.json",
68+
{
69+
aws_region = var.aws_region
70+
environment = var.environment
71+
logfiles = jsonencode(local.logfiles_linux)
72+
metrics_collected = templatefile("${path.module}/templates/cloudwatch_config_linux.json", {})
73+
}
74+
)
75+
)
76+
)
6777
tags = local.tags
6878
}
6979

@@ -85,14 +95,23 @@ resource "aws_iam_role_policy" "cloudwatch_linux" {
8595
)
8696
}
8797

88-
8998
resource "aws_ssm_parameter" "cloudwatch_agent_config_runner_windows" {
9099
count = var.enable_cloudwatch_agent ? 1 : 0
91100
name = "${var.environment}-cloudwatch_agent_config_runner_windows"
92101
type = "String"
93-
value = var.cloudwatch_config != null ? var.cloudwatch_config : templatefile("${path.module}/templates/cloudwatch_config.json", {
94-
logfiles = jsonencode(local.logfiles_windows)
95-
})
102+
value = jsonencode(
103+
jsondecode(
104+
templatefile(
105+
"${path.module}/templates/cloudwatch_config.json",
106+
{
107+
aws_region = var.aws_region
108+
environment = var.environment
109+
metrics_collected = templatefile("${path.module}/templates/cloudwatch_config_windows.json", {})
110+
logfiles = jsonencode(local.logfiles_linux)
111+
}
112+
)
113+
)
114+
)
96115
tags = local.tags
97116
}
98117

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
{
22
"agent": {
3-
"metrics_collection_interval": 5
3+
"metrics_collection_interval": 10,
4+
"region": "${aws_region}"
5+
},
6+
"metrics": {
7+
"namespace": "GHARunners/${environment}",
8+
"append_dimensions": {
9+
"ImageID":"$${aws:ImageId}",
10+
"InstanceId":"$${aws:InstanceId}",
11+
"InstanceType":"$${aws:InstanceType}"
12+
},
13+
"aggregation_dimensions": [
14+
["ImageID"],
15+
["InstanceType"],
16+
["ImageID", "InstanceType"]
17+
],
18+
"metrics_collected": ${metrics_collected}
419
},
520
"logs": {
621
"logs_collected": {
@@ -9,4 +24,4 @@
924
}
1025
}
1126
}
12-
}
27+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"cpu": {
3+
"measurement": [
4+
"cpu_usage_idle",
5+
"cpu_usage_iowait",
6+
"cpu_usage_user",
7+
"cpu_usage_system"
8+
],
9+
"metrics_collection_interval": 10
10+
},
11+
"disk": {
12+
"measurement": [
13+
"free",
14+
"total",
15+
"used",
16+
"used_percent",
17+
"inodes_free",
18+
"inodes_total"
19+
],
20+
"metrics_collection_interval": 10,
21+
"resources": [
22+
"*"
23+
]
24+
},
25+
"diskio": {
26+
"measurement": [
27+
"io_time"
28+
],
29+
"metrics_collection_interval": 10,
30+
"resources": [
31+
"*"
32+
]
33+
},
34+
"mem": {
35+
"measurement": [
36+
"total",
37+
"used",
38+
"free",
39+
"used_percent"
40+
],
41+
"metrics_collection_interval": 10
42+
},
43+
"swap": {
44+
"measurement": [
45+
"swap_used_percent"
46+
],
47+
"metrics_collection_interval": 10
48+
}
49+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"LogicalDisk": {
3+
"measurement": [
4+
"% Free Space"
5+
],
6+
"metrics_collection_interval": 10,
7+
"resources": [
8+
"*"
9+
]
10+
},
11+
"Memory": {
12+
"measurement": [
13+
"% Committed Bytes In Use"
14+
],
15+
"metrics_collection_interval": 10
16+
},
17+
"Paging File": {
18+
"measurement": [
19+
"% Usage"
20+
],
21+
"metrics_collection_interval": 10,
22+
"resources": [
23+
"*"
24+
]
25+
},
26+
"PhysicalDisk": {
27+
"measurement": [
28+
"% Disk Time"
29+
],
30+
"metrics_collection_interval": 10,
31+
"resources": [
32+
"*"
33+
]
34+
},
35+
"Processor": {
36+
"measurement": [
37+
"% User Time",
38+
"% Idle Time",
39+
"% Interrupt Time"
40+
],
41+
"metrics_collection_interval": 10,
42+
"resources": [
43+
"_Total"
44+
]
45+
}
46+
}

terraform-aws-github-runner/modules/runners/variables.tf

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -257,17 +257,11 @@ variable "runner_iam_role_managed_policy_arns" {
257257
}
258258

259259
variable "enable_cloudwatch_agent" {
260-
description = "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
260+
description = "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config."
261261
type = bool
262262
default = true
263263
}
264264

265-
variable "cloudwatch_config" {
266-
description = "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
267-
type = string
268-
default = null
269-
}
270-
271265
variable "ghes_url" {
272266
description = "GitHub Enterprise Server URL. DO NOT SET IF USING PUBLIC GITHUB"
273267
type = string

terraform-aws-github-runner/variables.tf

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -272,17 +272,11 @@ variable "runner_iam_role_managed_policy_arns" {
272272
}
273273

274274
variable "enable_cloudwatch_agent" {
275-
description = "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
275+
description = "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config."
276276
type = bool
277277
default = true
278278
}
279279

280-
variable "cloudwatch_config" {
281-
description = "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
282-
type = string
283-
default = null
284-
}
285-
286280
variable "ghes_url" {
287281
description = "GitHub Enterprise Server URL. Example: https://github.internal.co - DO NOT SET IF USING PUBLIC GITHUB"
288282
type = string

0 commit comments

Comments
 (0)