Skip to content

Commit

Permalink
Reject recording rules with incompatible fields (#2000)
Browse files Browse the repository at this point in the history
* Reject recording rules with incompatible fields

* Hack validation in place around SDK limitations

* Update docs

* Fix diff for empty values
  • Loading branch information
rwwiv authored Feb 4, 2025
1 parent efd0ac2 commit e93a250
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 27 deletions.
6 changes: 3 additions & 3 deletions docs/resources/rule_group.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,18 +132,18 @@ EOT

Required:

- `condition` (String) The `ref_id` of the query node in the `data` field to use as the alert condition.
- `data` (Block List, Min: 1) A sequence of stages that describe the contents of the rule. (see [below for nested schema](#nestedblock--rule--data))
- `name` (String) The name of the alert rule.

Optional:

- `annotations` (Map of String) Key-value pairs of metadata to attach to the alert rule. They add additional information, such as a `summary` or `runbook_url`, to help identify and investigate alerts. The `dashboardUId` and `panelId` annotations, which link alerts to a panel, must be set together. Defaults to `map[]`.
- `exec_err_state` (String) Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to `Alerting`.
- `condition` (String) The `ref_id` of the query node in the `data` field to use as the alert condition.
- `exec_err_state` (String) Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to Alerting if not set.
- `for` (String) The amount of time for which the rule must be breached for the rule to be considered to be Firing. Before this time has elapsed, the rule is only considered to be Pending. Defaults to `0`.
- `is_paused` (Boolean) Sets whether the alert should be paused or not. Defaults to `false`.
- `labels` (Map of String) Key-value pairs to attach to the alert rule that can be used in matching, grouping, and routing. Defaults to `map[]`.
- `no_data_state` (String) Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to `NoData`.
- `no_data_state` (String) Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to NoData if not set.
- `notification_settings` (Block List, Max: 1) Notification settings for the rule. If specified, it overrides the notification policies. Available since Grafana 10.4, requires feature flag 'alertingSimplifiedRouting' to be enabled. (see [below for nested schema](#nestedblock--rule--notification_settings))
- `record` (Block List, Max: 1) Settings for a recording rule. Available since Grafana 11.2, requires feature flag 'grafanaManagedRecordingRules' to be enabled. (see [below for nested schema](#nestedblock--rule--record))

Expand Down
18 changes: 9 additions & 9 deletions internal/resources/grafana/resource_alerting_mute_timing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,29 +118,29 @@ func TestAccMuteTiming_RemoveInUse(t *testing.T) {
locals {
use_mute = %t
}
resource "grafana_organization" "my_org" {
name = "mute-timing-test"
}
resource "grafana_contact_point" "default_policy" {
org_id = grafana_organization.my_org.id
name = "default-policy"
email {
addresses = ["[email protected]"]
}
}
resource "grafana_notification_policy" "org_policy" {
org_id = grafana_organization.my_org.id
group_by = ["..."]
group_wait = "45s"
group_interval = "6m"
repeat_interval = "3h"
contact_point = grafana_contact_point.default_policy.name
policy {
mute_timings = local.use_mute ? [grafana_mute_timing.test[0].name] : []
mute_timings = local.use_mute ? [grafana_mute_timing.test[0].name] : []
contact_point = grafana_contact_point.default_policy.name
}
}
Expand All @@ -167,7 +167,7 @@ func TestAccMuteTiming_RemoveInUse(t *testing.T) {
}

func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
testutils.CheckCloudInstanceTestsEnabled(t) // TODO: Switch to OSS when this is released: https://github.com/grafana/grafana/pull/90500
testutils.CheckOSSTestsEnabled(t, ">=11.2.0")

randomStr := acctest.RandString(6)

Expand All @@ -176,11 +176,11 @@ func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
locals {
use_mute = %[2]t
}
resource "grafana_folder" "rule_folder" {
title = "%[1]s"
}
resource "grafana_contact_point" "default_policy" {
name = "%[1]s"
email {
Expand Down Expand Up @@ -219,7 +219,7 @@ func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
}
}
resource "grafana_mute_timing" "test" {
count = local.use_mute ? 1 : 0
name = "%[1]s"
Expand Down
64 changes: 56 additions & 8 deletions internal/resources/grafana/resource_alerting_rule_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,30 @@ This resource requires Grafana 9.1.0 or later.
"no_data_state": {
Type: schema.TypeString,
Optional: true,
Default: "NoData",
Description: "Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting.",
Description: "Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to NoData if not set.",
DiffSuppressFunc: func(k, oldValue, newValue string, d *schema.ResourceData) bool {
// We default to this value later in the pipeline, so we need to account for that here.
if newValue == "" {
return oldValue == "NoData"
}
return oldValue == newValue
},
},
"exec_err_state": {
Type: schema.TypeString,
Optional: true,
Default: "Alerting",
Description: "Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting.",
Description: "Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to Alerting if not set.",
DiffSuppressFunc: func(k, oldValue, newValue string, d *schema.ResourceData) bool {
// We default to this value later in the pipeline, so we need to account for that here.
if newValue == "" {
return oldValue == "Alerting"
}
return oldValue == newValue
},
},
"condition": {
Type: schema.TypeString,
Required: true,
Optional: true,
Description: "The `ref_id` of the query node in the `data` field to use as the alert condition.",
},
"data": {
Expand Down Expand Up @@ -527,17 +539,53 @@ func unpackAlertRule(raw interface{}, groupName string, folderUID string, orgID
return nil, err
}

// Check for conflicting fields before unpacking the rest of the rule.
// This is a workaround due to the lack of support for ConflictsWith in Lists in the SDK.
errState := json["exec_err_state"].(string)
noDataState := json["no_data_state"].(string)
condition := json["condition"].(string)

record := unpackRecord(json["record"])
if record != nil {
incompatFieldMsgFmt := `conflicting fields "record" and "%s"`
if forDuration != 0 {
return nil, fmt.Errorf(incompatFieldMsgFmt, "for")
}
if noDataState != "" {
return nil, fmt.Errorf(incompatFieldMsgFmt, "no_data_state")
}
if errState != "" {
return nil, fmt.Errorf(incompatFieldMsgFmt, "exec_err_state")
}
if condition != "" {
return nil, fmt.Errorf(incompatFieldMsgFmt, "condition")
}
}
if record == nil {
if condition == "" {
return nil, fmt.Errorf(`"condition" is required`)
}
// Compute defaults here to avoid issues related to the above, setting a default in the schema will cause these
// to be set before we can validate the configuration.
if noDataState == "" {
noDataState = "NoData"
}
if errState == "" {
errState = "Alerting"
}
}

rule := models.ProvisionedAlertRule{
UID: json["uid"].(string),
Title: common.Ref(json["name"].(string)),
FolderUID: common.Ref(folderUID),
RuleGroup: common.Ref(groupName),
OrgID: common.Ref(orgID),
ExecErrState: common.Ref(json["exec_err_state"].(string)),
NoDataState: common.Ref(json["no_data_state"].(string)),
ExecErrState: common.Ref(errState),
NoDataState: common.Ref(noDataState),
For: common.Ref(strfmt.Duration(forDuration)),
Data: data,
Condition: common.Ref(json["condition"].(string)),
Condition: common.Ref(condition),
Labels: unpackMap(json["labels"]),
Annotations: unpackMap(json["annotations"]),
IsPaused: json["is_paused"].(bool),
Expand Down
73 changes: 66 additions & 7 deletions internal/resources/grafana/resource_alerting_rule_group_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ func TestAccAlertRule_NotificationSettings(t *testing.T) {
}

func TestAccRecordingRule(t *testing.T) {
testutils.CheckCloudInstanceTestsEnabled(t) // TODO: change to 11.3.1 when available
testutils.CheckOSSTestsEnabled(t, ">=11.4.0")

var group models.AlertRuleGroup
var name = acctest.RandString(10)
Expand All @@ -696,17 +696,34 @@ func TestAccRecordingRule(t *testing.T) {
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.data.0.model", "{\"refId\":\"A\"}"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.record.0.metric", metric),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.record.0.from", "A"),
// ensure fields are cleared as expected
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.for", "2m0s"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.condition", "A"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.no_data_state", "NoData"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.exec_err_state", "Alerting"),
// ensure fields are empty as expected
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.for", "0s"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.condition", ""),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.no_data_state", ""),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.exec_err_state", ""),
),
},
},
})
}

func TestAccRecordingRule_incompatibleSettings(t *testing.T) {
testutils.CheckOSSTestsEnabled(t, ">=11.4.0")

var name = acctest.RandString(10)
var metric = "valid_metric"

resource.ParallelTest(t, resource.TestCase{
ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories,
Steps: []resource.TestStep{
{
Config: testAccRecordingRuleInvalid(name, metric, "A"),
ExpectError: regexp.MustCompile(`conflicting fields "record" and "for"`),
},
},
})
}

func testAccAlertRuleGroupInOrgConfig(name string, interval int, disableProvenance bool) string {
return fmt.Sprintf(`
resource "grafana_organization" "test" {
Expand Down Expand Up @@ -877,7 +894,49 @@ resource "grafana_rule_group" "my_rule_group" {
rule {
name = "My Random Walk Alert"
// following should be cleared by Grafana
// Query the datasource.
data {
ref_id = "A"
relative_time_range {
from = 600
to = 0
}
datasource_uid = grafana_data_source.testdata_datasource.uid
model = jsonencode({
intervalMs = 1000
maxDataPoints = 43200
refId = "A"
})
}
record {
metric = "%[2]s"
from = "%[3]s"
}
}
}`, name, metric, refID)
}

func testAccRecordingRuleInvalid(name string, metric string, refID string) string {
return fmt.Sprintf(`
resource "grafana_folder" "rule_folder" {
title = "%[1]s"
}
resource "grafana_data_source" "testdata_datasource" {
name = "%[1]s"
type = "grafana-testdata-datasource"
url = "http://localhost:3333"
}
resource "grafana_rule_group" "my_rule_group" {
name = "%[1]s"
folder_uid = grafana_folder.rule_folder.uid
interval_seconds = 60
rule {
name = "My Random Walk Alert"
// following should be rejected
condition = "A"
no_data_state = "NoData"
exec_err_state = "Alerting"
Expand Down

0 comments on commit e93a250

Please sign in to comment.