fix(DMVP-5017): Added multiple policies

dasmeta · Aug 28, 2024 · 98f3f43 · 98f3f43
1 parent 04fd258
commit 98f3f43
Show file tree

Hide file tree

Showing 10 changed files with 94 additions and 58 deletions.
diff --git a/README.md b/README.md
@@ -235,10 +235,10 @@ No resources.
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | <a name="input_alert_interval_seconds"></a> [alert\_interval\_seconds](#input\_alert\_interval\_seconds) | The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially. | `number` | `10` | no |
-| <a name="input_alert_rules"></a> [alert\_rules](#input\_alert\_rules) | This variable describes alert folders, groups and rules. | <pre>list(object({<br>    name                 = string                          # The name of the alert rule<br>    no_data_state        = optional(string, "NoData")      # Describes what state to enter when the rule's query returns No Data<br>    exec_err_state       = optional(string, "Error")       # Describes what state to enter when the rule's query is invalid and the rule cannot be executed<br>    summary              = optional(string, "")            # Rule annotation as a summary<br>    priority             = optional(string, "P2")          # Rule priority level: P2 is for non-critical alerts, P1 will be set for critical alerts<br>    folder_name          = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created<br>    datasource           = string                          # Name of the datasource used for the alert<br>    expr                 = optional(string, null)          # Full expression for the alert<br>    metric_name          = optional(string, "")            # Prometheus metric name which queries the data for the alert<br>    metric_function      = optional(string, "")            # Prometheus function used with metric for queries, like rate, sum etc.<br>    metric_interval      = optional(string, "")            # The time interval with using functions like rate<br>    settings_mode        = optional(string, "replaceNN")   # The mode used in B block, possible values are Strict, replaceNN, dropNN<br>    settings_replaceWith = optional(number, 0)             # The value by which NaN results of the query will be replaced<br>    filters              = optional(any, {})               # Filters object to identify each service for alerting<br>    function             = optional(string, "mean")        # One of Reduce functions which will be used in B block for alerting<br>    equation             = string                          # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e<br>    threshold            = number                          # The value against which B blocks are compared in the math expression<br>  }))</pre> | `[]` | no |
+| <a name="input_alert_rules"></a> [alert\_rules](#input\_alert\_rules) | This variable describes alert folders, groups and rules. | <pre>list(object({<br>    name                 = string                     # The name of the alert rule<br>    no_data_state        = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data<br>    exec_err_state       = optional(string, "Error")  # Describes what state to enter when the rule's query is invalid and the rule cannot be executed<br>    summary              = optional(string, "")       # Rule annotation as a summary<br>    labels               = optional(map(any), { "priorityLow" : "true" })<br>    folder_name          = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created<br>    datasource           = string                          # Name of the datasource used for the alert<br>    expr                 = optional(string, null)          # Full expression for the alert<br>    metric_name          = optional(string, "")            # Prometheus metric name which queries the data for the alert<br>    metric_function      = optional(string, "")            # Prometheus function used with metric for queries, like rate, sum etc.<br>    metric_interval      = optional(string, "")            # The time interval with using functions like rate<br>    settings_mode        = optional(string, "replaceNN")   # The mode used in B block, possible values are Strict, replaceNN, dropNN<br>    settings_replaceWith = optional(number, 0)             # The value by which NaN results of the query will be replaced<br>    filters              = optional(any, {})               # Filters object to identify each service for alerting<br>    function             = optional(string, "mean")        # One of Reduce functions which will be used in B block for alerting<br>    equation             = string                          # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e<br>    threshold            = number                          # The value against which B blocks are compared in the math expression<br>  }))</pre> | `[]` | no |
 | <a name="input_application_dashboard"></a> [application\_dashboard](#input\_application\_dashboard) | Dashboard for monitoring applications | <pre>object({<br>    rows = optional(any, [])<br>    data_source = object({ # global/default datasource, TODO: create datasource inside the module<br>      uid  = string<br>      type = optional(string, "prometheus")<br>    })<br>    variables = optional(list(object({ # Allows to define variables to be used in dashboard<br>      name        = string<br>      type        = optional(string, "custom")<br>      hide        = optional(number, 0)<br>      includeAll  = optional(bool, false)<br>      multi       = optional(bool, false)<br>      query       = optional(string, "")<br>      queryValue  = optional(string, "")<br>      skipUrlSync = optional(bool, false)<br>      options = optional(list(object({<br>        selected = optional(bool, false)<br>        value    = string<br>        text     = optional(string, null)<br>      })), [])<br>    })), [])<br>  })</pre> | <pre>{<br>  "data_source": null,<br>  "rows": [],<br>  "variables": []<br>}</pre> | no |
 | <a name="input_name"></a> [name](#input\_name) | Dashboard name | `string` | n/a | yes |
-| <a name="input_notifications"></a> [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. | <pre>object({<br>    contact_point   = optional(string, "Slack")                               # The default contact point to route all unmatched notifications to.<br>    group_by        = optional(list(string), ["grafana_folder", "alertname"]) # A list of alert labels to group alerts into notifications by.<br>    group_interval  = optional(string, "5m")                                  # Minimum time interval between two notifications for the same group.<br>    repeat_interval = optional(string, "4h")                                  # Minimum time interval for re-sending a notification if an alert is still firing.<br><br>    policy = optional(object({<br>      contact_point = optional(string, null) # The contact point to route notifications that match this rule to.<br>      continue      = optional(bool, false)  # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it.<br>      group_by      = optional(list(string), [])<br>      mute_timings  = optional(list(string), []) # A list of mute timing names to apply to alerts that match this policy.<br><br>      matcher = optional(object({<br>        label = optional(string, "priority") # The name of the label to match against.<br>        match = optional(string, "=")        # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality.<br>        value = optional(string, "P1")       # The label value to match against.<br>      }))<br>    }))<br>  })</pre> | `{}` | no |
+| <a name="input_notifications"></a> [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. | <pre>object({<br>    contact_point   = optional(string, "Slack")       # The default contact point to route all unmatched notifications to.<br>    group_by        = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by.<br>    group_interval  = optional(string, "5m")          # Minimum time interval between two notifications for the same group.<br>    repeat_interval = optional(string, "4h")          # Minimum time interval for re-sending a notification if an alert is still firing.<br><br>    policies = optional(list(object({<br>      contact_point = optional(string, null) # The contact point to route notifications that match this rule to.<br>      continue      = optional(bool, false)  # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it.<br>      group_by      = optional(list(string), ["..."])<br>      mute_timings  = optional(list(string), []) # A list of mute timing names to apply to alerts that match this policy.<br><br>      matchers = optional(list(object({<br>        label = optional(string, "priority") # The name of the label to match against.<br>        match = optional(string, "=")        # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality.<br>        value = optional(string, "P1")       # The label value to match against.<br>      })), [])<br>    })), [])<br>  })</pre> | `{}` | no |
 | <a name="input_opsgenie_endpoints"></a> [opsgenie\_endpoints](#input\_opsgenie\_endpoints) | OpsGenie contact points list. | <pre>list(object({<br>    name                    = string                                                 # The name of the contact point.<br>    api_key                 = string                                                 # The OpsGenie API key to use.<br>    auto_close              = optional(bool, false)                                  # Whether to auto-close alerts in OpsGenie when they resolve in the Alertmanager.<br>    message                 = optional(string, "")                                   # The templated content of the message.<br>    api_url                 = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL.<br>    disable_resolve_message = optional(bool, false)                                  # Whether to disable sending resolve messages.<br>  }))</pre> | `[]` | no |
 | <a name="input_slack_endpoints"></a> [slack\_endpoints](#input\_slack\_endpoints) | Slack contact points list. | <pre>list(object({<br>    name                    = string                                                     # The name of the contact point.<br>    endpoint_url            = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to.<br>    icon_emoji              = optional(string, "")                                       # The name of a Slack workspace emoji to use as the bot icon.<br>    icon_url                = optional(string, "")                                       # A URL of an image to use as the bot icon.<br>    recipient               = optional(string, null)                                     # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to.<br>    text                    = optional(string, "")                                       # Templated content of the message.<br>    title                   = optional(string, "")                                       # Templated title of the message.<br>    token                   = optional(string, "")                                       # A Slack API token,for sending messages directly without the webhook method.<br>    webhook_url             = optional(string, "")                                       # A Slack webhook URL,for sending messages via the webhook method.<br>    username                = optional(string, "")                                       # Username for the bot to use.<br>    disable_resolve_message = optional(bool, false)                                      # Whether to disable sending resolve messages.<br>  }))</pre> | `[]` | no |
 

diff --git a/main.tf b/main.tf
@@ -8,12 +8,16 @@ module "alerts" {
 module "contact_points" {
   source = "./modules/contact-points"
 
+  count = length(var.alert_rules) != 0 ? 1 : 0
+
   slack_endpoints    = var.slack_endpoints
   opsgenie_endpoints = var.opsgenie_endpoints
 }
 
 module "notifications" {
   source = "./modules/notifications"
 
+  count = length(var.alert_rules) != 0 ? 1 : 0
+
   notifications = var.notifications
 }
diff --git a/modules/alerts/README.md b/modules/alerts/README.md
@@ -49,7 +49,7 @@ No modules.
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | <a name="input_alert_interval_seconds"></a> [alert\_interval\_seconds](#input\_alert\_interval\_seconds) | The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially. | `number` | `10` | no |
-| <a name="input_alert_rules"></a> [alert\_rules](#input\_alert\_rules) | This varibale describes alert folders, groups and rules. | <pre>list(object({<br>    name                 = string                          # The name of the alert rule<br>    no_data_state        = optional(string, "NoData")      # Describes what state to enter when the rule's query returns No Data<br>    exec_err_state       = optional(string, "Error")       # Describes what state to enter when the rule's query is invalid and the rule cannot be executed<br>    summary              = optional(string, "")            # Rule annotation as a summary<br>    priority             = optional(string, "P2")          # Rule priority level: P2 is for non-critical alerts, P1 will be set for critical alerts<br>    folder_name          = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created<br>    datasource           = string                          # Name of the datasource used for the alert<br>    expr                 = optional(string, null)          # Full expression for the alert<br>    metric_name          = optional(string, "")            # Prometheus metric name which queries the data for the alert<br>    metric_function      = optional(string, "")            # Prometheus function used with metric for queries, like rate, sum etc.<br>    metric_interval      = optional(string, "")            # The time interval with using functions like rate<br>    settings_mode        = optional(string, "replaceNN")   # The mode used in B block, possible values are Strict, replaceNN, dropNN<br>    settings_replaceWith = optional(number, 0)             # The value by which NaN results of the query will be replaced<br>    filters              = optional(any, {})               # Filters object to identify each service for alerting<br>    function             = optional(string, "mean")        # One of Reduce functions which will be used in B block for alerting<br>    equation             = string                          # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e<br>    threshold            = number                          # The value against which B blocks are compared in the math expression<br>  }))</pre> | `[]` | no |
+| <a name="input_alert_rules"></a> [alert\_rules](#input\_alert\_rules) | This varibale describes alert folders, groups and rules. | <pre>list(object({<br>    name                 = string                                         # The name of the alert rule<br>    no_data_state        = optional(string, "NoData")                     # Describes what state to enter when the rule's query returns No Data<br>    exec_err_state       = optional(string, "Error")                      # Describes what state to enter when the rule's query is invalid and the rule cannot be executed<br>    summary              = optional(string, "")                           # Rule annotation as a summary<br>    labels               = optional(map(any), { "priorityLow" : "true" }) # Labels help to define where to send each alert<br>    folder_name          = optional(string, "Main Alerts")                # Grafana folder name in which the rule will be created<br>    datasource           = string                                         # Name of the datasource used for the alert<br>    expr                 = optional(string, null)                         # Full expression for the alert<br>    metric_name          = optional(string, "")                           # Prometheus metric name which queries the data for the alert<br>    metric_function      = optional(string, "")                           # Prometheus function used with metric for queries, like rate, sum etc.<br>    metric_interval      = optional(string, "")                           # The time interval with using functions like rate<br>    settings_mode        = optional(string, "replaceNN")                  # The mode used in B block, possible values are Strict, replaceNN, dropNN<br>    settings_replaceWith = optional(number, 0)                            # The value by which NaN results of the query will be replaced<br>    filters              = optional(any, {})                              # Filters object to identify each service for alerting<br>    function             = optional(string, "mean")                       # One of Reduce functions which will be used in B block for alerting<br>    equation             = string                                         # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e<br>    threshold            = number                                         # The value against which B blocks are compared in the math expression<br>  }))</pre> | `[]` | no |
 
 ## Outputs
 

diff --git a/modules/alerts/main.tf b/modules/alerts/main.tf
@@ -36,9 +36,9 @@ resource "grafana_rule_group" "alert_rule" {
         "Managed By" = "Terraform"
         "Summary"    = lookup(rule.value, "summary", rule.value.name)
       }
-      labels = {
-        "priority" = lookup(rule.value, "priority", "P2")
-      }
+      labels = lookup(rule.value, "labels", {
+        "priorityLow" : "true"
+      })
       is_paused = false
       data {
         ref_id     = "A"