From 4022e6fe9c5f233a66aa57b9d77cd5404ce28249 Mon Sep 17 00:00:00 2001 From: Tigran Muradyan Date: Fri, 30 Aug 2024 18:03:56 +0400 Subject: [PATCH 1/2] feat(DMVP-5017): refactor alerts related config and sub-module structure --- README.md | 8 +- dashboard.tf | 10 - main.tf | 31 ++- modules/alert-contact-points/README.md | 43 +++++ .../main.tf | 29 ++- .../tests/mixed-contact-points/0-setup.tf | 0 .../tests/mixed-contact-points/1-example.tf | 0 .../tests/mixed-contact-points}/2-assert.tf | 0 .../tests/mixed-contact-points/README.md | 0 .../tests/opsgenie/0-setup.tf | 0 .../tests/opsgenie/1-example.tf | 0 .../tests/opsgenie}/2-assert.tf | 0 .../tests/opsgenie/README.md | 0 .../tests/slack/0-setup.tf | 0 .../tests/slack/1-example.tf | 0 .../tests/slack}/2-assert.tf | 0 .../tests/slack/README.md | 0 modules/alert-contact-points/variables.tf | 51 +++++ .../versions.tf | 0 modules/alert-notifications/README.md | 42 ++++ modules/alert-notifications/main.tf | 80 ++++++++ .../tests/slack-with-opsgenie}/0-setup.tf | 0 .../tests/slack-with-opsgenie/1-example.tf | 0 .../tests/slack-with-opsgenie}/2-assert.tf | 0 .../tests/slack-with-opsgenie/README.md | 0 .../tests/slack}/0-setup.tf | 0 .../tests/slack/1-example.tf | 0 .../tests/slack}/2-assert.tf | 0 .../tests/slack/README.md | 0 modules/alert-notifications/variables.tf | 55 ++++++ .../versions.tf | 0 modules/alert-rules/README.md | 58 ++++++ modules/alert-rules/main.tf | 159 +++++++++++++++ .../tests/autoscaling-max-usage/0-setup.tf | 0 .../tests/autoscaling-max-usage/1-example.tf | 0 .../tests/autoscaling-max-usage}/2-assert.tf | 0 .../tests/autoscaling-max-usage/README.md | 0 .../tests/available-replica-count/0-setup.tf | 0 .../available-replica-count/1-example.tf | 0 .../available-replica-count}/2-assert.tf | 0 .../tests/available-replica-count/README.md | 0 .../tests/container-restarts/0-setup.tf | 0 .../tests/container-restarts/1-example.tf | 0 .../tests/container-restarts}/2-assert.tf | 0 .../tests/container-restarts/README.md | 0 .../tests/expressions/0-setup.tf | 0 .../tests/expressions/1-example.tf | 0 .../tests/expressions}/2-assert.tf | 0 .../tests/expressions/README.md | 0 .../tests/mixed-metrics/0-setup.tf | 0 .../tests/mixed-metrics/1-example.tf | 0 .../tests/mixed-metrics}/2-assert.tf | 0 .../tests/mixed-metrics/README.md | 0 .../tests/node-autoscaling}/0-setup.tf | 0 .../tests/node-autoscaling/1-example.tf | 0 .../tests/node-autoscaling}/2-assert.tf | 0 .../tests/node-autoscaling/README.md | 0 modules/alert-rules/variables.tf | 35 ++++ modules/alert-rules/versions.tf | 10 + modules/alerts/README.md | 20 +- modules/alerts/main.tf | 181 +++--------------- modules/alerts/variables.tf | 135 +++++++++++-- modules/contact-points/README.md | 40 ---- modules/contact-points/variables.tf | 30 --- modules/notifications/README.md | 40 ---- modules/notifications/main.tf | 27 --- modules/notifications/variables.tf | 23 --- tests/base/0-setup.tf | 2 + tests/base/1-example.tf | 97 +++++++--- tests/base/README.md | 1 + variables.tf | 177 +++++++++-------- 71 files changed, 895 insertions(+), 489 deletions(-) delete mode 100644 dashboard.tf create mode 100644 modules/alert-contact-points/README.md rename modules/{contact-points => alert-contact-points}/main.tf (56%) rename modules/{contact-points => alert-contact-points}/tests/mixed-contact-points/0-setup.tf (100%) rename modules/{contact-points => alert-contact-points}/tests/mixed-contact-points/1-example.tf (100%) rename modules/{alerts/tests/autoscaling-max-usage => alert-contact-points/tests/mixed-contact-points}/2-assert.tf (100%) rename modules/{contact-points => alert-contact-points}/tests/mixed-contact-points/README.md (100%) rename modules/{contact-points => alert-contact-points}/tests/opsgenie/0-setup.tf (100%) rename modules/{contact-points => alert-contact-points}/tests/opsgenie/1-example.tf (100%) rename modules/{alerts/tests/available-replica-count => alert-contact-points/tests/opsgenie}/2-assert.tf (100%) rename modules/{contact-points => alert-contact-points}/tests/opsgenie/README.md (100%) rename modules/{contact-points => alert-contact-points}/tests/slack/0-setup.tf (100%) rename modules/{contact-points => alert-contact-points}/tests/slack/1-example.tf (100%) rename modules/{alerts/tests/container-restarts => alert-contact-points/tests/slack}/2-assert.tf (100%) rename modules/{contact-points => alert-contact-points}/tests/slack/README.md (100%) create mode 100644 modules/alert-contact-points/variables.tf rename modules/{contact-points => alert-contact-points}/versions.tf (100%) create mode 100644 modules/alert-notifications/README.md create mode 100644 modules/alert-notifications/main.tf rename modules/{alerts/tests/node-autoscaling => alert-notifications/tests/slack-with-opsgenie}/0-setup.tf (100%) rename modules/{notifications => alert-notifications}/tests/slack-with-opsgenie/1-example.tf (100%) rename modules/{alerts/tests/expressions => alert-notifications/tests/slack-with-opsgenie}/2-assert.tf (100%) rename modules/{notifications => alert-notifications}/tests/slack-with-opsgenie/README.md (100%) rename modules/{notifications/tests/slack-with-opsgenie => alert-notifications/tests/slack}/0-setup.tf (100%) rename modules/{notifications => alert-notifications}/tests/slack/1-example.tf (100%) rename modules/{alerts/tests/mixed-metrics => alert-notifications/tests/slack}/2-assert.tf (100%) rename modules/{notifications => alert-notifications}/tests/slack/README.md (100%) create mode 100644 modules/alert-notifications/variables.tf rename modules/{notifications => alert-notifications}/versions.tf (100%) create mode 100644 modules/alert-rules/README.md create mode 100644 modules/alert-rules/main.tf rename modules/{alerts => alert-rules}/tests/autoscaling-max-usage/0-setup.tf (100%) rename modules/{alerts => alert-rules}/tests/autoscaling-max-usage/1-example.tf (100%) rename modules/{alerts/tests/node-autoscaling => alert-rules/tests/autoscaling-max-usage}/2-assert.tf (100%) rename modules/{alerts => alert-rules}/tests/autoscaling-max-usage/README.md (100%) rename modules/{alerts => alert-rules}/tests/available-replica-count/0-setup.tf (100%) rename modules/{alerts => alert-rules}/tests/available-replica-count/1-example.tf (100%) rename modules/{contact-points/tests/mixed-contact-points => alert-rules/tests/available-replica-count}/2-assert.tf (100%) rename modules/{alerts => alert-rules}/tests/available-replica-count/README.md (100%) rename modules/{alerts => alert-rules}/tests/container-restarts/0-setup.tf (100%) rename modules/{alerts => alert-rules}/tests/container-restarts/1-example.tf (100%) rename modules/{contact-points/tests/opsgenie => alert-rules/tests/container-restarts}/2-assert.tf (100%) rename modules/{alerts => alert-rules}/tests/container-restarts/README.md (100%) rename modules/{alerts => alert-rules}/tests/expressions/0-setup.tf (100%) rename modules/{alerts => alert-rules}/tests/expressions/1-example.tf (100%) rename modules/{contact-points/tests/slack => alert-rules/tests/expressions}/2-assert.tf (100%) rename modules/{alerts => alert-rules}/tests/expressions/README.md (100%) rename modules/{alerts => alert-rules}/tests/mixed-metrics/0-setup.tf (100%) rename modules/{alerts => alert-rules}/tests/mixed-metrics/1-example.tf (100%) rename modules/{notifications/tests/slack-with-opsgenie => alert-rules/tests/mixed-metrics}/2-assert.tf (100%) rename modules/{alerts => alert-rules}/tests/mixed-metrics/README.md (100%) rename modules/{notifications/tests/slack => alert-rules/tests/node-autoscaling}/0-setup.tf (100%) rename modules/{alerts => alert-rules}/tests/node-autoscaling/1-example.tf (100%) rename modules/{notifications/tests/slack => alert-rules/tests/node-autoscaling}/2-assert.tf (100%) rename modules/{alerts => alert-rules}/tests/node-autoscaling/README.md (100%) create mode 100644 modules/alert-rules/variables.tf create mode 100644 modules/alert-rules/versions.tf delete mode 100644 modules/contact-points/README.md delete mode 100644 modules/contact-points/variables.tf delete mode 100644 modules/notifications/README.md delete mode 100644 modules/notifications/main.tf delete mode 100644 modules/notifications/variables.tf diff --git a/README.md b/README.md index 6f15c88..2c73a02 100644 --- a/README.md +++ b/README.md @@ -223,8 +223,6 @@ No providers. |------|--------|---------| | [alerts](#module\_alerts) | ./modules/alerts | n/a | | [application\_dashboard](#module\_application\_dashboard) | ./modules/dashboard/ | n/a | -| [contact\_points](#module\_contact\_points) | ./modules/contact-points | n/a | -| [notifications](#module\_notifications) | ./modules/notifications | n/a | ## Resources @@ -234,13 +232,9 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [alert\_interval\_seconds](#input\_alert\_interval\_seconds) | The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially. | `number` | `10` | no | -| [alert\_rules](#input\_alert\_rules) | This variable describes alert folders, groups and rules. |
list(object({
name = string # The name of the alert rule
no_data_state = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data
exec_err_state = optional(string, "Error") # Describes what state to enter when the rule's query is invalid and the rule cannot be executed
summary = optional(string, "") # Rule annotation as a summary
labels = optional(map(any), { "priorityLow" : "true" })
folder_name = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created
datasource = string # Name of the datasource used for the alert
expr = optional(string, null) # Full expression for the alert
metric_name = optional(string, "") # Prometheus metric name which queries the data for the alert
metric_function = optional(string, "") # Prometheus function used with metric for queries, like rate, sum etc.
metric_interval = optional(string, "") # The time interval with using functions like rate
settings_mode = optional(string, "replaceNN") # The mode used in B block, possible values are Strict, replaceNN, dropNN
settings_replaceWith = optional(number, 0) # The value by which NaN results of the query will be replaced
filters = optional(any, {}) # Filters object to identify each service for alerting
function = optional(string, "mean") # One of Reduce functions which will be used in B block for alerting
equation = string # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e
threshold = number # The value against which B blocks are compared in the math expression
}))
| `[]` | no | +| [alerts](#input\_alerts) | n/a |
object({
alert_interval_seconds = optional(number, 10) # The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially
disable_provenance = optional(bool, true) # Allow modifying resources from other sources than Terraform or the Grafana API
rules = optional( # Describes alert folders, groups and rules
list(object({
name = string # The name of the alert rule
no_data_state = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data
exec_err_state = optional(string, "Error") # Describes what state to enter when the rule's query is invalid and the rule cannot be executed
summary = optional(string, null) # Rule annotation as a summary, if not passed automatically generated based on data
labels = optional(map(any), { "priority" : "P1" }) # Labels help to define matchers in notification policy to control where to send each alert
folder_name = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created, the folder name used also as alert group name with suffix " Group"
datasource = string # Name of the datasource used for the alert
expr = optional(string, null) # Full expression for the alert
metric_name = optional(string, "") # Prometheus metric name which queries the data for the alert
metric_function = optional(string, "") # Prometheus function used with metric for queries, like rate, sum etc.
metric_interval = optional(string, "") # The time interval with using functions like rate
settings_mode = optional(string, "replaceNN") # The mode used in B block, possible values are Strict, replaceNN, dropNN
settings_replaceWith = optional(number, 0) # The value by which NaN results of the query will be replaced
filters = optional(any, null) # Filters object to identify each service for alerting
function = optional(string, "mean") # One of Reduce functions which will be used in B block for alerting
equation = string # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e
threshold = number # The value against which B blocks are compared in the math expression
})), [])
contact_points = optional(object({
slack = optional(list(object({ # Slack contact points list
name = string # The name of the contact point
endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to
icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon
icon_url = optional(string, "") # A URL of an image to use as the bot icon
recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to
text = optional(string, "") # Templated content of the message
title = optional(string, "") # Templated title of the message
token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method
webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method
username = optional(string, "") # Username for the bot to use
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
})), [])
opsgenie = optional(list(object({ # OpsGenie contact points list
name = string # The name of the contact point
api_key = string # The OpsGenie API key to use
auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alert manager
message = optional(string, "") # The templated content of the message
api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
})), [])
webhook = optional(list(object({ # Contact points that send notifications to an arbitrary webhook, using the Prometheus webhook format
name = string # The name of the contact point
url = string # The URL to send webhook requests to
authorization_credentials = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this value. Do not use in conjunction with basic auth parameters
authorization_scheme = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this name. Do not use in conjunction with basic auth parameters
basic_auth_password = optional(string, null) # The password component of the basic auth credentials to use
basic_auth_user = optional(string, null) # The username component of the basic auth credentials to use
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. Defaults to
settings = optional(any, null) # Additional custom properties to attach to the notifier
})), [])
}), null)
notifications = optional(object({
contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to
group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by
group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group
repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing

mute_timing = optional(object({ # Mute timing config, which will be applied on all policies
name = optional(string, "Default mute timing") # the name of mute timing
intervals = optional(list(object({ # the mute timing interval configs
weekdays = optional(string, null)
days_of_month = optional(string, null)
months = optional(string, null)
years = optional(string, null)
location = optional(string, null)
times = optional(object({
start = optional(string, "00:00")
end = optional(string, "24:59")
}), null)
})), [])
}), null)

policies = optional(list(object({
contact_point = optional(string, null) # The contact point to route notifications that match this rule to
continue = optional(bool, true) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it
group_by = optional(list(string), ["..."])

matchers = optional(list(object({
label = optional(string, "priority") # The name of the label to match against
match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality
value = optional(string, "P1") # The label value to match against
})), [])
policies = optional(list(object({ # sub-policies(there is also possibility to implement also ability for sub.sub.sub-policies, but for not seems existing configs are enough)
contact_point = optional(string, null)
continue = optional(bool, true)
group_by = optional(list(string), ["..."])
mute_timings = optional(list(string), [])

matchers = optional(list(object({
label = optional(string, "priority")
match = optional(string, "=")
value = optional(string, "P1")
})), [])
})), [])
})), [])
}), null)
})
| `{}` | no | | [application\_dashboard](#input\_application\_dashboard) | Dashboard for monitoring applications |
object({
rows = optional(any, [])
data_source = object({ # global/default datasource, TODO: create datasource inside the module
uid = string
type = optional(string, "prometheus")
})
variables = optional(list(object({ # Allows to define variables to be used in dashboard
name = string
type = optional(string, "custom")
hide = optional(number, 0)
includeAll = optional(bool, false)
multi = optional(bool, false)
query = optional(string, "")
queryValue = optional(string, "")
skipUrlSync = optional(bool, false)
options = optional(list(object({
selected = optional(bool, false)
value = string
text = optional(string, null)
})), [])
})), [])
})
|
{
"data_source": null,
"rows": [],
"variables": []
}
| no | | [name](#input\_name) | Dashboard name | `string` | n/a | yes | -| [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. |
object({
contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to.
group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by.
group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group.
repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing.

policies = optional(list(object({
contact_point = optional(string, null) # The contact point to route notifications that match this rule to.
continue = optional(bool, false) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it.
group_by = optional(list(string), ["..."])
mute_timings = optional(list(string), []) # A list of mute timing names to apply to alerts that match this policy.

matchers = optional(list(object({
label = optional(string, "priority") # The name of the label to match against.
match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality.
value = optional(string, "P1") # The label value to match against.
})), [])
})), [])
})
| `{}` | no | -| [opsgenie\_endpoints](#input\_opsgenie\_endpoints) | OpsGenie contact points list. |
list(object({
name = string # The name of the contact point.
api_key = string # The OpsGenie API key to use.
auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alertmanager.
message = optional(string, "") # The templated content of the message.
api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL.
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages.
}))
| `[]` | no | -| [slack\_endpoints](#input\_slack\_endpoints) | Slack contact points list. |
list(object({
name = string # The name of the contact point.
endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to.
icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon.
icon_url = optional(string, "") # A URL of an image to use as the bot icon.
recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to.
text = optional(string, "") # Templated content of the message.
title = optional(string, "") # Templated title of the message.
token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method.
webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method.
username = optional(string, "") # Username for the bot to use.
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages.
}))
| `[]` | no | ## Outputs diff --git a/dashboard.tf b/dashboard.tf deleted file mode 100644 index d4a049f..0000000 --- a/dashboard.tf +++ /dev/null @@ -1,10 +0,0 @@ -module "application_dashboard" { - source = "./modules/dashboard/" - - count = length(var.application_dashboard) > 0 ? 1 : 0 - - name = var.name - rows = var.application_dashboard.rows - data_source = var.application_dashboard.data_source - variables = var.application_dashboard.variables -} diff --git a/main.tf b/main.tf index 0290688..c68f954 100644 --- a/main.tf +++ b/main.tf @@ -1,23 +1,22 @@ -module "alerts" { - source = "./modules/alerts" - - alert_interval_seconds = var.alert_interval_seconds - alert_rules = var.alert_rules -} +module "application_dashboard" { + source = "./modules/dashboard/" -module "contact_points" { - source = "./modules/contact-points" + count = length(var.application_dashboard) > 0 ? 1 : 0 - count = length(var.alert_rules) != 0 ? 1 : 0 - - slack_endpoints = var.slack_endpoints - opsgenie_endpoints = var.opsgenie_endpoints + name = var.name + rows = var.application_dashboard.rows + data_source = var.application_dashboard.data_source + variables = var.application_dashboard.variables } -module "notifications" { - source = "./modules/notifications" +module "alerts" { + source = "./modules/alerts" - count = length(var.alert_rules) != 0 ? 1 : 0 + count = var.alerts != null ? 1 : 0 - notifications = var.notifications + alert_interval_seconds = var.alerts.alert_interval_seconds + disable_provenance = var.alerts.disable_provenance + rules = var.alerts.rules + contact_points = var.alerts.contact_points + notifications = var.alerts.notifications } diff --git a/modules/alert-contact-points/README.md b/modules/alert-contact-points/README.md new file mode 100644 index 0000000..f75a6e1 --- /dev/null +++ b/modules/alert-contact-points/README.md @@ -0,0 +1,43 @@ +## Usage +This Terraform module enables the creation of Grafana contact points for various integrations such as Slack and OpsGenie. Contact points allow you to configure alert notifications to different services based on your requirements. + +There are numerous integrations available for Grafana, but currently, this module supports only Slack and OpsGenie. We are continuously working to add more integrations in the future. If you have any questions or need assistance, feel free to open an issue or contact our team. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 1.40.0 | + +## Providers + +| Name | Version | +|------|---------| +| [grafana](#provider\_grafana) | >= 1.40.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [grafana_contact_point.opsgenie_contact_point](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/contact_point) | resource | +| [grafana_contact_point.slack_contact_point](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/contact_point) | resource | +| [grafana_contact_point.webhook_contact_point](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/contact_point) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [disable\_provenance](#input\_disable\_provenance) | Allow modifying the contact point from other sources than Terraform or the Grafana API. | `bool` | `true` | no | +| [opsgenie\_endpoints](#input\_opsgenie\_endpoints) | OpsGenie contact points list. |
list(object({
name = string # The name of the contact point
api_key = string # The OpsGenie API key to use
auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alert manager
message = optional(string, "") # The templated content of the message
api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
}))
| `[]` | no | +| [slack\_endpoints](#input\_slack\_endpoints) | Slack contact points list. |
list(object({
name = string # The name of the contact point
endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to
icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon
icon_url = optional(string, "") # A URL of an image to use as the bot icon
recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to
text = optional(string, "") # Templated content of the message
title = optional(string, "") # Templated title of the message
token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method
webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method
username = optional(string, "") # Username for the bot to use
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
}))
| `[]` | no | +| [webhook\_endpoints](#input\_webhook\_endpoints) | Contact points that send notifications to an arbitrary webhook, using the Prometheus webhook format. |
list(object({
name = string # The name of the contact point
url = string # The URL to send webhook requests to
authorization_credentials = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this value. Do not use in conjunction with basic auth parameters
authorization_scheme = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this name. Do not use in conjunction with basic auth parameters
basic_auth_password = optional(string, null) # The password component of the basic auth credentials to use
basic_auth_user = optional(string, null) # The username component of the basic auth credentials to use
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. Defaults to
settings = any # Additional custom properties to attach to the notifier
}))
| `[]` | no | + +## Outputs + +No outputs. + diff --git a/modules/contact-points/main.tf b/modules/alert-contact-points/main.tf similarity index 56% rename from modules/contact-points/main.tf rename to modules/alert-contact-points/main.tf index 42dd039..6508889 100644 --- a/modules/contact-points/main.tf +++ b/modules/alert-contact-points/main.tf @@ -1,8 +1,9 @@ -//Slack Integration +# Slack Integration resource "grafana_contact_point" "slack_contact_point" { for_each = { for cp in var.slack_endpoints : cp.name => cp } - name = each.key + name = each.key + disable_provenance = var.disable_provenance slack { endpoint_url = each.value.webhook_url @@ -18,11 +19,12 @@ resource "grafana_contact_point" "slack_contact_point" { } } -//OpsGenie Integration +# OpsGenie Integration resource "grafana_contact_point" "opsgenie_contact_point" { for_each = { for cp in var.opsgenie_endpoints : cp.name => cp } - name = each.key + name = each.key + disable_provenance = var.disable_provenance opsgenie { api_key = each.value.api_key @@ -32,3 +34,22 @@ resource "grafana_contact_point" "opsgenie_contact_point" { disable_resolve_message = each.value.disable_resolve_message } } + + +# Webhook endpoints Integration +resource "grafana_contact_point" "webhook_contact_point" { + for_each = { for cp in var.webhook_endpoints : cp.name => cp } + + name = each.key + disable_provenance = var.disable_provenance + + webhook { + url = each.value.url + authorization_credentials = each.value.authorization_credentials + authorization_scheme = each.value.authorization_scheme + basic_auth_password = each.value.basic_auth_password + basic_auth_user = each.value.basic_auth_user + disable_resolve_message = each.value.disable_resolve_message + settings = each.value.settings + } +} diff --git a/modules/contact-points/tests/mixed-contact-points/0-setup.tf b/modules/alert-contact-points/tests/mixed-contact-points/0-setup.tf similarity index 100% rename from modules/contact-points/tests/mixed-contact-points/0-setup.tf rename to modules/alert-contact-points/tests/mixed-contact-points/0-setup.tf diff --git a/modules/contact-points/tests/mixed-contact-points/1-example.tf b/modules/alert-contact-points/tests/mixed-contact-points/1-example.tf similarity index 100% rename from modules/contact-points/tests/mixed-contact-points/1-example.tf rename to modules/alert-contact-points/tests/mixed-contact-points/1-example.tf diff --git a/modules/alerts/tests/autoscaling-max-usage/2-assert.tf b/modules/alert-contact-points/tests/mixed-contact-points/2-assert.tf similarity index 100% rename from modules/alerts/tests/autoscaling-max-usage/2-assert.tf rename to modules/alert-contact-points/tests/mixed-contact-points/2-assert.tf diff --git a/modules/contact-points/tests/mixed-contact-points/README.md b/modules/alert-contact-points/tests/mixed-contact-points/README.md similarity index 100% rename from modules/contact-points/tests/mixed-contact-points/README.md rename to modules/alert-contact-points/tests/mixed-contact-points/README.md diff --git a/modules/contact-points/tests/opsgenie/0-setup.tf b/modules/alert-contact-points/tests/opsgenie/0-setup.tf similarity index 100% rename from modules/contact-points/tests/opsgenie/0-setup.tf rename to modules/alert-contact-points/tests/opsgenie/0-setup.tf diff --git a/modules/contact-points/tests/opsgenie/1-example.tf b/modules/alert-contact-points/tests/opsgenie/1-example.tf similarity index 100% rename from modules/contact-points/tests/opsgenie/1-example.tf rename to modules/alert-contact-points/tests/opsgenie/1-example.tf diff --git a/modules/alerts/tests/available-replica-count/2-assert.tf b/modules/alert-contact-points/tests/opsgenie/2-assert.tf similarity index 100% rename from modules/alerts/tests/available-replica-count/2-assert.tf rename to modules/alert-contact-points/tests/opsgenie/2-assert.tf diff --git a/modules/contact-points/tests/opsgenie/README.md b/modules/alert-contact-points/tests/opsgenie/README.md similarity index 100% rename from modules/contact-points/tests/opsgenie/README.md rename to modules/alert-contact-points/tests/opsgenie/README.md diff --git a/modules/contact-points/tests/slack/0-setup.tf b/modules/alert-contact-points/tests/slack/0-setup.tf similarity index 100% rename from modules/contact-points/tests/slack/0-setup.tf rename to modules/alert-contact-points/tests/slack/0-setup.tf diff --git a/modules/contact-points/tests/slack/1-example.tf b/modules/alert-contact-points/tests/slack/1-example.tf similarity index 100% rename from modules/contact-points/tests/slack/1-example.tf rename to modules/alert-contact-points/tests/slack/1-example.tf diff --git a/modules/alerts/tests/container-restarts/2-assert.tf b/modules/alert-contact-points/tests/slack/2-assert.tf similarity index 100% rename from modules/alerts/tests/container-restarts/2-assert.tf rename to modules/alert-contact-points/tests/slack/2-assert.tf diff --git a/modules/contact-points/tests/slack/README.md b/modules/alert-contact-points/tests/slack/README.md similarity index 100% rename from modules/contact-points/tests/slack/README.md rename to modules/alert-contact-points/tests/slack/README.md diff --git a/modules/alert-contact-points/variables.tf b/modules/alert-contact-points/variables.tf new file mode 100644 index 0000000..6a23c0a --- /dev/null +++ b/modules/alert-contact-points/variables.tf @@ -0,0 +1,51 @@ +variable "disable_provenance" { + type = bool + default = true + description = "Allow modifying the contact point from other sources than Terraform or the Grafana API." +} + +variable "slack_endpoints" { + type = list(object({ + name = string # The name of the contact point + endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to + icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon + icon_url = optional(string, "") # A URL of an image to use as the bot icon + recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to + text = optional(string, "") # Templated content of the message + title = optional(string, "") # Templated title of the message + token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method + webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method + username = optional(string, "") # Username for the bot to use + disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages + })) + default = [] + description = "Slack contact points list." +} + +variable "opsgenie_endpoints" { + type = list(object({ + name = string # The name of the contact point + api_key = string # The OpsGenie API key to use + auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alert manager + message = optional(string, "") # The templated content of the message + api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL + disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages + })) + default = [] + description = "OpsGenie contact points list." +} + +variable "webhook_endpoints" { + type = list(object({ + name = string # The name of the contact point + url = string # The URL to send webhook requests to + authorization_credentials = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this value. Do not use in conjunction with basic auth parameters + authorization_scheme = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this name. Do not use in conjunction with basic auth parameters + basic_auth_password = optional(string, null) # The password component of the basic auth credentials to use + basic_auth_user = optional(string, null) # The username component of the basic auth credentials to use + disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. Defaults to + settings = any # Additional custom properties to attach to the notifier + })) + default = [] + description = "Contact points that send notifications to an arbitrary webhook, using the Prometheus webhook format." +} diff --git a/modules/contact-points/versions.tf b/modules/alert-contact-points/versions.tf similarity index 100% rename from modules/contact-points/versions.tf rename to modules/alert-contact-points/versions.tf diff --git a/modules/alert-notifications/README.md b/modules/alert-notifications/README.md new file mode 100644 index 0000000..72285af --- /dev/null +++ b/modules/alert-notifications/README.md @@ -0,0 +1,42 @@ +## Usage +This Terraform module helps you manage Grafana notification policies, making it easier to configure alert notifications for different contact points and conditions. + +Notification policies can be created for various contact points. Additionally, you can have nested policies. + +Please refer to the `tests` folder for real examples. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 1.40.0 | + +## Providers + +| Name | Version | +|------|---------| +| [grafana](#provider\_grafana) | >= 1.40.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [grafana_mute_timing.this](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/mute_timing) | resource | +| [grafana_notification_policy.this](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/notification_policy) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [disable\_provenance](#input\_disable\_provenance) | Allow modifying the notification policy from other sources than Terraform or the Grafana API. | `bool` | `true` | no | +| [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. This config is global for grafana and overrides existing configs. |
object({
contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to
group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by
group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group
repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing

mute_timing = optional(object({ # Mute timing config, which will be applied on all policies
name = optional(string, "Default mute timing") # the name of mute timing
intervals = optional(list(object({ # the mute timing interval configs
weekdays = optional(string, null)
days_of_month = optional(string, null)
months = optional(string, null)
years = optional(string, null)
location = optional(string, null)
times = optional(object({
start = optional(string, "00:00")
end = optional(string, "24:59")
}), null)
})), [])
}), null)

policies = optional(list(object({
contact_point = optional(string, null) # The contact point to route notifications that match this rule to
continue = optional(bool, true) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it
group_by = optional(list(string), ["..."])

matchers = optional(list(object({
label = optional(string, "priority") # The name of the label to match against
match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality
value = optional(string, "P1") # The label value to match against
})), [])
policies = optional(list(object({ # sub-policies(there is also possibility to implement also ability for sub.sub.sub-policies, but for not seems existing configs are enough)
contact_point = optional(string, null)
continue = optional(bool, true)
group_by = optional(list(string), ["..."])
mute_timings = optional(list(string), [])

matchers = optional(list(object({
label = optional(string, "priority")
match = optional(string, "=")
value = optional(string, "P1")
})), [])
})), [])
})), [])
})
| `null` | no | + +## Outputs + +No outputs. + diff --git a/modules/alert-notifications/main.tf b/modules/alert-notifications/main.tf new file mode 100644 index 0000000..a160930 --- /dev/null +++ b/modules/alert-notifications/main.tf @@ -0,0 +1,80 @@ +resource "grafana_notification_policy" "this" { + count = var.notifications != null ? 1 : 0 + + contact_point = var.notifications.contact_point + group_by = var.notifications.group_by + group_interval = var.notifications.group_interval + repeat_interval = var.notifications.repeat_interval + disable_provenance = var.disable_provenance + + dynamic "policy" { + for_each = var.notifications.policies + + content { + contact_point = policy.value.contact_point + continue = policy.value.continue + group_by = policy.value.group_by + mute_timings = grafana_mute_timing.this.*.name + + dynamic "matcher" { + for_each = policy.value.matchers + + content { + label = matcher.value.label + match = matcher.value.match + value = matcher.value.value + } + } + + dynamic "policy" { + for_each = try(policy.value.policies, []) + + content { + contact_point = policy.value.contact_point + continue = policy.value.continue + group_by = policy.value.group_by + mute_timings = grafana_mute_timing.this.*.name + + dynamic "matcher" { + for_each = policy.value.matchers + + content { + label = matcher.value.label + match = matcher.value.match + value = matcher.value.value + } + } + } + } + } + } +} + +resource "grafana_mute_timing" "this" { + count = try(var.notifications.mute_timing, null) != null ? 1 : 0 + + name = "Default mute timing" + disable_provenance = var.disable_provenance + + + dynamic "intervals" { + for_each = try(var.notifications.mute_timing.intervals, []) + + content { + weekdays = try(intervals.value.weekdays, null) + days_of_month = try(intervals.value.days_of_month, null) + months = try(intervals.value.months, null) + years = try(intervals.value.years, null) + location = try(intervals.value.location, null) + + dynamic "times" { + for_each = try(intervals.value.times, []) + + content { + start = try(times.value.start, "00:00") + end = try(times.value.end, "24:59") + } + } + } + } +} diff --git a/modules/alerts/tests/node-autoscaling/0-setup.tf b/modules/alert-notifications/tests/slack-with-opsgenie/0-setup.tf similarity index 100% rename from modules/alerts/tests/node-autoscaling/0-setup.tf rename to modules/alert-notifications/tests/slack-with-opsgenie/0-setup.tf diff --git a/modules/notifications/tests/slack-with-opsgenie/1-example.tf b/modules/alert-notifications/tests/slack-with-opsgenie/1-example.tf similarity index 100% rename from modules/notifications/tests/slack-with-opsgenie/1-example.tf rename to modules/alert-notifications/tests/slack-with-opsgenie/1-example.tf diff --git a/modules/alerts/tests/expressions/2-assert.tf b/modules/alert-notifications/tests/slack-with-opsgenie/2-assert.tf similarity index 100% rename from modules/alerts/tests/expressions/2-assert.tf rename to modules/alert-notifications/tests/slack-with-opsgenie/2-assert.tf diff --git a/modules/notifications/tests/slack-with-opsgenie/README.md b/modules/alert-notifications/tests/slack-with-opsgenie/README.md similarity index 100% rename from modules/notifications/tests/slack-with-opsgenie/README.md rename to modules/alert-notifications/tests/slack-with-opsgenie/README.md diff --git a/modules/notifications/tests/slack-with-opsgenie/0-setup.tf b/modules/alert-notifications/tests/slack/0-setup.tf similarity index 100% rename from modules/notifications/tests/slack-with-opsgenie/0-setup.tf rename to modules/alert-notifications/tests/slack/0-setup.tf diff --git a/modules/notifications/tests/slack/1-example.tf b/modules/alert-notifications/tests/slack/1-example.tf similarity index 100% rename from modules/notifications/tests/slack/1-example.tf rename to modules/alert-notifications/tests/slack/1-example.tf diff --git a/modules/alerts/tests/mixed-metrics/2-assert.tf b/modules/alert-notifications/tests/slack/2-assert.tf similarity index 100% rename from modules/alerts/tests/mixed-metrics/2-assert.tf rename to modules/alert-notifications/tests/slack/2-assert.tf diff --git a/modules/notifications/tests/slack/README.md b/modules/alert-notifications/tests/slack/README.md similarity index 100% rename from modules/notifications/tests/slack/README.md rename to modules/alert-notifications/tests/slack/README.md diff --git a/modules/alert-notifications/variables.tf b/modules/alert-notifications/variables.tf new file mode 100644 index 0000000..ad6146d --- /dev/null +++ b/modules/alert-notifications/variables.tf @@ -0,0 +1,55 @@ +variable "disable_provenance" { + type = bool + default = true + description = "Allow modifying the notification policy from other sources than Terraform or the Grafana API." +} + +variable "notifications" { + type = object({ + contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to + group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by + group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group + repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing + + mute_timing = optional(object({ # Mute timing config, which will be applied on all policies + name = optional(string, "Default mute timing") # the name of mute timing + intervals = optional(list(object({ # the mute timing interval configs + weekdays = optional(string, null) + days_of_month = optional(string, null) + months = optional(string, null) + years = optional(string, null) + location = optional(string, null) + times = optional(object({ + start = optional(string, "00:00") + end = optional(string, "24:59") + }), null) + })), []) + }), null) + + policies = optional(list(object({ + contact_point = optional(string, null) # The contact point to route notifications that match this rule to + continue = optional(bool, true) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it + group_by = optional(list(string), ["..."]) + + matchers = optional(list(object({ + label = optional(string, "priority") # The name of the label to match against + match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality + value = optional(string, "P1") # The label value to match against + })), []) + policies = optional(list(object({ # sub-policies(there is also possibility to implement also ability for sub.sub.sub-policies, but for not seems existing configs are enough) + contact_point = optional(string, null) + continue = optional(bool, true) + group_by = optional(list(string), ["..."]) + mute_timings = optional(list(string), []) + + matchers = optional(list(object({ + label = optional(string, "priority") + match = optional(string, "=") + value = optional(string, "P1") + })), []) + })), []) + })), []) + }) + description = "Represents the configuration options for Grafana notification policies. This config is global for grafana and overrides existing configs." + default = null +} diff --git a/modules/notifications/versions.tf b/modules/alert-notifications/versions.tf similarity index 100% rename from modules/notifications/versions.tf rename to modules/alert-notifications/versions.tf diff --git a/modules/alert-rules/README.md b/modules/alert-rules/README.md new file mode 100644 index 0000000..7e13db4 --- /dev/null +++ b/modules/alert-rules/README.md @@ -0,0 +1,58 @@ +## Usage +To enable some of these alerts for your applications, you just need to replace `App_1`, `App_2` and `App_3` with the actual names of your applications. You can refer to the Prometheus metrics to identify the available filters that can be used for each application. Additionally, modify the values in the conditions to reflect the real cases of your applications. These adjustments will ensure that the alerts accurately monitor your specific applications and their scaling needs. + +## Alert Expressions +Alert expressions are formed based on `metric_name`, `metric_function`, `metric_interval`, and `filters` parameters. They form alert expressions like: `kube_deployment_status_replicas_available{deployment=\"nginx\"}`, `rate(kube_pod_container_status_restarts_total{container=\"nginx\"}[5m])`, but sometimes we need to have more complex queries like this one: `sum(rate(nginx_ingress_controller_requests{status=~'5..'}[1m])) by (ingress,cluster) / sum(rate(nginx_ingress_controller_requests[1m])) by (ingress) * 100 > 5`. +When you want to create simple queries, use the parameters counted above. And when you need to create complex queries, don't pass those parameters; instead, pass the query string to the `expr` variable. Check the `tests/expressions` folder for an example with complex queries." + +## Conditions and Thresholds +Alert conditions are formed based on $B blocks and `equation`, `threshold` parameters users pass to the module. +`equation` parameter can only get these values: +- `lt` corresponds to `<` +- `gt` corresponds to `>` +- `e` corresponds to `=` +- `lte` corresponds to `<=` +- `gte` corresponds to `>=` + +And `threshold` parameter is the number value against which B blocks are compared in the math expression. + +## Priority +Specify alert rule priority by passing the priority parameter to the alert_rules variable. By default, the value will be P2. For example, you can set the value to P1 and configure it so that alerts with P1 priority will be sent to Opsgenie, while the other alerts will be sent to Slack. + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 1.40.0 | + +## Providers + +| Name | Version | +|------|---------| +| [grafana](#provider\_grafana) | >= 1.40.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [grafana_folder.rule_folder](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/folder) | resource | +| [grafana_rule_group.this](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/rule_group) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [alert\_interval\_seconds](#input\_alert\_interval\_seconds) | The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially. | `number` | `10` | no | +| [alert\_rules](#input\_alert\_rules) | This varibale describes alert folders, groups and rules. |
list(object({
name = string # The name of the alert rule
no_data_state = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data
exec_err_state = optional(string, "Error") # Describes what state to enter when the rule's query is invalid and the rule cannot be executed
summary = optional(string, null) # Rule annotation as a summary, if not passed automatically generated based on data
labels = optional(map(any), { "priority" : "P1" }) # Labels help to define matchers in notification policy to control where to send each alert
folder_name = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created
datasource = string # Name of the datasource used for the alert
expr = optional(string, null) # Full expression for the alert
metric_name = optional(string, "") # Prometheus metric name which queries the data for the alert
metric_function = optional(string, "") # Prometheus function used with metric for queries, like rate, sum etc.
metric_interval = optional(string, "") # The time interval with using functions like rate
settings_mode = optional(string, "replaceNN") # The mode used in B block, possible values are Strict, replaceNN, dropNN
settings_replaceWith = optional(number, 0) # The value by which NaN results of the query will be replaced
filters = optional(any, null) # Filters object to identify each service for alerting
function = optional(string, "mean") # One of Reduce functions which will be used in B block for alerting
equation = string # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e
threshold = number # The value against which B blocks are compared in the math expression
}))
| `[]` | no | +| [disable\_provenance](#input\_disable\_provenance) | Allow modifying the rule groups from other sources than Terraform or the Grafana API. | `bool` | `true` | no | + +## Outputs + +No outputs. + diff --git a/modules/alert-rules/main.tf b/modules/alert-rules/main.tf new file mode 100644 index 0000000..905b2ce --- /dev/null +++ b/modules/alert-rules/main.tf @@ -0,0 +1,159 @@ +locals { + folders = toset(distinct([for rule in var.alert_rules : rule.folder_name])) + alerts = { for member in local.folders : member => [for rule in var.alert_rules : merge(rule, { + expr : coalesce(rule.expr, "${rule.metric_function}(${rule.metric_name}${rule.filters != null ? format("{%s}", replace(join(", ", [for k, v in rule.filters : "${k}=\"${v}\""]), "\"", "\\\"")) : ""}${rule.metric_interval})") + }) if rule.folder_name == member] } + comparison_operators = { + gte = { operator = ">=", definition = "greater than or equal to" }, + gt = { operator = ">", definition = "greater than" }, + lt = { operator = "<", definition = "less than" }, + lte = { operator = "<=", definition = "less than or equal to" }, + e = { operator = "=", definition = "equal to" } + } +} + +resource "grafana_folder" "rule_folder" { + for_each = local.folders + title = each.key +} + +resource "grafana_rule_group" "this" { + for_each = local.alerts + + name = "${each.key} Group" + disable_provenance = var.disable_provenance + folder_uid = grafana_folder.rule_folder[each.key].uid + interval_seconds = var.alert_interval_seconds + org_id = 1 + dynamic "rule" { + for_each = each.value + content { + name = rule.value["name"] + for = "0" + condition = "C" + no_data_state = lookup(rule.value, "no_data_state", "NoData") + exec_err_state = lookup(rule.value, "exec_err_state", "Error") + annotations = { + "Managed By" = "Terraform" + "Summary" = coalesce(rule.value.summary, "${rule.value.name} alert, the value is ${local.comparison_operators[rule.value.equation].definition} ${rule.value.threshold}") + } + labels = lookup(rule.value, "labels", { "priority" : "P1" }) + is_paused = false + data { + ref_id = "A" + query_type = "" + relative_time_range { + from = 600 + to = 0 + } + datasource_uid = rule.value.datasource + model = < [grafana](#provider\_grafana) | >= 1.40.0 | +No providers. ## Modules -No modules. +| Name | Source | Version | +|------|--------|---------| +| [alert\_rules](#module\_alert\_rules) | ../alert-rules | n/a | +| [contact\_points](#module\_contact\_points) | ../alert-contact-points | n/a | +| [notifications](#module\_notifications) | ../alert-notifications | n/a | ## Resources -| Name | Type | -|------|------| -| [grafana_folder.rule_folder](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/folder) | resource | -| [grafana_rule_group.alert_rule](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/rule_group) | resource | +No resources. ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [alert\_interval\_seconds](#input\_alert\_interval\_seconds) | The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially. | `number` | `10` | no | -| [alert\_rules](#input\_alert\_rules) | This varibale describes alert folders, groups and rules. |
list(object({
name = string # The name of the alert rule
no_data_state = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data
exec_err_state = optional(string, "Error") # Describes what state to enter when the rule's query is invalid and the rule cannot be executed
summary = optional(string, "") # Rule annotation as a summary
labels = optional(map(any), { "priorityLow" : "true" }) # Labels help to define where to send each alert
folder_name = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created
datasource = string # Name of the datasource used for the alert
expr = optional(string, null) # Full expression for the alert
metric_name = optional(string, "") # Prometheus metric name which queries the data for the alert
metric_function = optional(string, "") # Prometheus function used with metric for queries, like rate, sum etc.
metric_interval = optional(string, "") # The time interval with using functions like rate
settings_mode = optional(string, "replaceNN") # The mode used in B block, possible values are Strict, replaceNN, dropNN
settings_replaceWith = optional(number, 0) # The value by which NaN results of the query will be replaced
filters = optional(any, {}) # Filters object to identify each service for alerting
function = optional(string, "mean") # One of Reduce functions which will be used in B block for alerting
equation = string # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e
threshold = number # The value against which B blocks are compared in the math expression
}))
| `[]` | no | +| [contact\_points](#input\_contact\_points) | Grafana contact points configs. |
object({
slack = optional(list(object({ # Slack contact points list
name = string # The name of the contact point
endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to
icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon
icon_url = optional(string, "") # A URL of an image to use as the bot icon
recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to
text = optional(string, "") # Templated content of the message
title = optional(string, "") # Templated title of the message
token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method
webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method
username = optional(string, "") # Username for the bot to use
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
})), [])
opsgenie = optional(list(object({ # OpsGenie contact points list
name = string # The name of the contact point
api_key = string # The OpsGenie API key to use
auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alert manager
message = optional(string, "") # The templated content of the message
api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
})), [])
webhook = optional(list(object({ # Contact points that send notifications to an arbitrary webhook, using the Prometheus webhook format
name = string # The name of the contact point
url = string # The URL to send webhook requests to
authorization_credentials = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this value. Do not use in conjunction with basic auth parameters
authorization_scheme = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this name. Do not use in conjunction with basic auth parameters
basic_auth_password = optional(string, null) # The password component of the basic auth credentials to use
basic_auth_user = optional(string, null) # The username component of the basic auth credentials to use
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. Defaults to
settings = optional(any, null) # Additional custom properties to attach to the notifier
})), [])
})
| `null` | no | +| [disable\_provenance](#input\_disable\_provenance) | Allow modifying the resources from other sources than Terraform or the Grafana API | `bool` | `true` | no | +| [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. This config is global for grafana and overrides existing configs. |
object({
contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to
group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by
group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group
repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing

mute_timing = optional(object({ # Mute timing config, which will be applied on all policies
name = optional(string, "Default mute timing") # the name of mute timing
intervals = optional(list(object({ # the mute timing interval configs
weekdays = optional(string, null)
days_of_month = optional(string, null)
months = optional(string, null)
years = optional(string, null)
location = optional(string, null)
times = optional(object({
start = optional(string, "00:00")
end = optional(string, "24:59")
}), null)
})), [])
}), null)

policies = optional(list(object({
contact_point = optional(string, null) # The contact point to route notifications that match this rule to
continue = optional(bool, true) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it
group_by = optional(list(string), ["..."])

matchers = optional(list(object({
label = optional(string, "priority") # The name of the label to match against
match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality
value = optional(string, "P1") # The label value to match against
})), [])
policies = optional(list(object({ # sub-policies(there is also possibility to implement also ability for sub.sub.sub-policies, but for not seems existing configs are enough)
contact_point = optional(string, null)
continue = optional(bool, true)
group_by = optional(list(string), ["..."])
mute_timings = optional(list(string), [])

matchers = optional(list(object({
label = optional(string, "priority")
match = optional(string, "=")
value = optional(string, "P1")
})), [])
})), [])
})), [])
})
| `{}` | no | +| [rules](#input\_rules) | This variable describes alert folders, groups and rules. |
list(object({
name = string # The name of the alert rule
no_data_state = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data
exec_err_state = optional(string, "Error") # Describes what state to enter when the rule's query is invalid and the rule cannot be executed
summary = optional(string, null) # Rule annotation as a summary, if not passed automatically generated based on data
labels = optional(map(any), { "priority" : "P1" }) # Labels help to define matchers in notification policy to control where to send each alert
folder_name = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created, the folder name used also as alert group name with suffix " Group"
datasource = string # Name of the datasource used for the alert
expr = optional(string, null) # Full expression for the alert
metric_name = optional(string, "") # Prometheus metric name which queries the data for the alert
metric_function = optional(string, "") # Prometheus function used with metric for queries, like rate, sum etc.
metric_interval = optional(string, "") # The time interval with using functions like rate
settings_mode = optional(string, "replaceNN") # The mode used in B block, possible values are Strict, replaceNN, dropNN
settings_replaceWith = optional(number, 0) # The value by which NaN results of the query will be replaced
filters = optional(any, null) # Filters object to identify each service for alerting
function = optional(string, "mean") # One of Reduce functions which will be used in B block for alerting
equation = string # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e
threshold = number # The value against which B blocks are compared in the math expression
}))
| `null` | no | ## Outputs diff --git a/modules/alerts/main.tf b/modules/alerts/main.tf index d0291d2..144d227 100644 --- a/modules/alerts/main.tf +++ b/modules/alerts/main.tf @@ -1,162 +1,31 @@ -locals { - folders = toset(distinct([for rule in var.alert_rules : rule.folder_name])) - alerts = { for member in local.folders : member => [for rule in var.alert_rules : merge(rule, { - expr : coalesce(rule.expr, "${rule.metric_function}(${rule.metric_name}${(rule.filters != null && length(rule.filters) > 0) ? format("{%s}", replace(join(", ", [for k, v in rule.filters : "${k}=\"${v}\""]), "\"", "\\\"")) : ""}${rule.metric_interval})") - }) if rule.folder_name == member] } - comparison_operators = { - gte : ">=", - gt : ">", - lt : "<", - lte : "<=", - e : "=" - } -} +module "alert_rules" { + source = "../alert-rules" + + count = var.rules != null ? 1 : 0 -resource "grafana_folder" "rule_folder" { - for_each = local.folders - title = each.key + alert_interval_seconds = var.alert_interval_seconds + disable_provenance = var.disable_provenance + alert_rules = var.rules } -resource "grafana_rule_group" "alert_rule" { - for_each = local.alerts +module "contact_points" { + source = "../alert-contact-points" - name = "${each.key} Group" - folder_uid = grafana_folder.rule_folder[each.key].uid - interval_seconds = var.alert_interval_seconds - org_id = 1 - dynamic "rule" { - for_each = each.value - content { - name = rule.value["name"] - for = "0" - condition = "C" - no_data_state = lookup(rule.value, "no_data_state", "NoData") - exec_err_state = lookup(rule.value, "exec_err_state", "Error") - annotations = { - "Managed By" = "Terraform" - "Summary" = lookup(rule.value, "summary", rule.value.name) - } - labels = lookup(rule.value, "labels", { - "priorityLow" : "true" - }) - is_paused = false - data { - ref_id = "A" - query_type = "" - relative_time_range { - from = 600 - to = 0 - } - datasource_uid = rule.value.datasource - model = < -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 1.3.0 | -| [grafana](#requirement\_grafana) | >= 1.40.0 | - -## Providers - -| Name | Version | -|------|---------| -| [grafana](#provider\_grafana) | >= 1.40.0 | - -## Modules - -No modules. - -## Resources - -| Name | Type | -|------|------| -| [grafana_contact_point.opsgenie_contact_point](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/contact_point) | resource | -| [grafana_contact_point.slack_contact_point](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/contact_point) | resource | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [opsgenie\_endpoints](#input\_opsgenie\_endpoints) | OpsGenie contact points list. |
list(object({
name = string # The name of the contact point.
api_key = string # The OpsGenie API key to use.
auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alertmanager.
message = optional(string, "") # The templated content of the message.
api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL.
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages.
}))
| `[]` | no | -| [slack\_endpoints](#input\_slack\_endpoints) | Slack contact points list. |
list(object({
name = string # The name of the contact point.
endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to.
icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon.
icon_url = optional(string, "") # A URL of an image to use as the bot icon.
recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to.
text = optional(string, "") # Templated content of the message.
title = optional(string, "") # Templated title of the message.
token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method.
webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method.
username = optional(string, "") # Username for the bot to use.
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages.
}))
| `[]` | no | - -## Outputs - -No outputs. - diff --git a/modules/contact-points/variables.tf b/modules/contact-points/variables.tf deleted file mode 100644 index bd86554..0000000 --- a/modules/contact-points/variables.tf +++ /dev/null @@ -1,30 +0,0 @@ -variable "slack_endpoints" { - type = list(object({ - name = string # The name of the contact point. - endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to. - icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon. - icon_url = optional(string, "") # A URL of an image to use as the bot icon. - recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to. - text = optional(string, "") # Templated content of the message. - title = optional(string, "") # Templated title of the message. - token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method. - webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method. - username = optional(string, "") # Username for the bot to use. - disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. - })) - default = [] - description = "Slack contact points list." -} - -variable "opsgenie_endpoints" { - type = list(object({ - name = string # The name of the contact point. - api_key = string # The OpsGenie API key to use. - auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alertmanager. - message = optional(string, "") # The templated content of the message. - api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL. - disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. - })) - default = [] - description = "OpsGenie contact points list." -} diff --git a/modules/notifications/README.md b/modules/notifications/README.md deleted file mode 100644 index 988921f..0000000 --- a/modules/notifications/README.md +++ /dev/null @@ -1,40 +0,0 @@ -## Usage -This Terraform module helps you manage Grafana notification policies, making it easier to configure alert notifications for different contact points and conditions. - -Notification policies can be created for various contact points. Additionally, you can have nested policies. - -Please refer to the `tests` folder for real examples. - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 1.3.0 | -| [grafana](#requirement\_grafana) | >= 1.40.0 | - -## Providers - -| Name | Version | -|------|---------| -| [grafana](#provider\_grafana) | >= 1.40.0 | - -## Modules - -No modules. - -## Resources - -| Name | Type | -|------|------| -| [grafana_notification_policy.policy](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/notification_policy) | resource | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. |
object({
contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to.
group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by.
group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group.
repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing.

policies = optional(list(object({
contact_point = optional(string, null) # The contact point to route notifications that match this rule to.
continue = optional(bool, false) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it.
group_by = optional(list(string), ["..."])
mute_timings = optional(list(string), []) # A list of mute timing names to apply to alerts that match this policy.

matchers = optional(list(object({
label = optional(string, "priority") # The name of the label to match against.
match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality.
value = optional(string, "P1") # The label value to match against.
})), [])
})), [])
})
| `{}` | no | - -## Outputs - -No outputs. - diff --git a/modules/notifications/main.tf b/modules/notifications/main.tf deleted file mode 100644 index 10db533..0000000 --- a/modules/notifications/main.tf +++ /dev/null @@ -1,27 +0,0 @@ -resource "grafana_notification_policy" "policy" { - contact_point = var.notifications.contact_point - group_by = var.notifications.group_by - group_interval = var.notifications.group_interval - repeat_interval = var.notifications.repeat_interval - - dynamic "policy" { - for_each = var.notifications.policies - - content { - contact_point = policy.value.contact_point - continue = policy.value.continue - group_by = policy.value.group_by - mute_timings = policy.value.mute_timings - - dynamic "matcher" { - for_each = policy.value.matchers - - content { - label = matcher.value.label - match = matcher.value.match - value = matcher.value.value - } - } - } - } -} diff --git a/modules/notifications/variables.tf b/modules/notifications/variables.tf deleted file mode 100644 index ea1f1aa..0000000 --- a/modules/notifications/variables.tf +++ /dev/null @@ -1,23 +0,0 @@ -variable "notifications" { - type = object({ - contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to. - group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by. - group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group. - repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing. - - policies = optional(list(object({ - contact_point = optional(string, null) # The contact point to route notifications that match this rule to. - continue = optional(bool, false) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it. - group_by = optional(list(string), ["..."]) - mute_timings = optional(list(string), []) # A list of mute timing names to apply to alerts that match this policy. - - matchers = optional(list(object({ - label = optional(string, "priority") # The name of the label to match against. - match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality. - value = optional(string, "P1") # The label value to match against. - })), []) - })), []) - }) - description = "Represents the configuration options for Grafana notification policies." - default = {} -} diff --git a/tests/base/0-setup.tf b/tests/base/0-setup.tf index 8bb1fbe..87682de 100644 --- a/tests/base/0-setup.tf +++ b/tests/base/0-setup.tf @@ -1,4 +1,6 @@ terraform { + required_version = ">= 1.3.0" + required_providers { grafana = { source = "grafana/grafana" diff --git a/tests/base/1-example.tf b/tests/base/1-example.tf index dca24c9..200a7af 100644 --- a/tests/base/1-example.tf +++ b/tests/base/1-example.tf @@ -33,37 +33,76 @@ module "this" { ] } - alert_rules = [ - { - "datasource" : "prometheus", - "equation" : "gt", - "expr" : "avg(increase(nginx_ingress_controller_request_duration_seconds_sum[3m])) / 10", - "filters" : null, - "folder_name" : "Nginx Alerts", - "function" : "mean", - "name" : "Latency", - "labels" : { - "priorityHigh" : "true", - } - "summary" : "Latency is higher than 3s", - "threshold" : 3 - }, - ] - - notifications = { - "group_interval" : "1m", - "repeat_interval" : "1m", - "contact_point" : "Slack", - "policies" : [ + alerts = { + rules = [ { - "contact_point" : "OpsGenie", - "continue" : "true", - "matchers" : [{ - "label" : "priorityHigh", - "match" : "=", - "value" : "true" - }] + "datasource" : "prometheus", + "equation" : "gt", + "expr" : "avg(increase(nginx_ingress_controller_request_duration_seconds_sum[3m])) / 10", + "folder_name" : "Nginx Alerts", + "function" : "mean", + "name" : "Latency P1", + "labels" : { + "priority" : "P1", + } + "threshold" : 3 + + # we override no-data/exec-error state for this example/test only, it is supposed this values will not be set here so they get their default ones + "no_data_state" : "OK" + "exec_err_state" : "OK" + # "exec_err_state" : "Alerting" # uncomment to trigger new alert }, + { + "datasource" : "prometheus", + "equation" : "gt", + "expr" : "avg(increase(nginx_ingress_controller_request_duration_seconds_sum[3m])) / 10", + "folder_name" : "Nginx Alerts", + "function" : "mean", + "name" : "Latency P2", + "labels" : { + "priority" : "P2", + } + "threshold" : 3 + + # we override no-data/exec-error state for this example/test only, it is supposed this values will not be set here so they get their default ones + "no_data_state" : "OK" + "exec_err_state" : "OK" + # "exec_err_state" : "Alerting" # uncomment to trigger new alert + } ] + contact_points = { + webhook = [ + { + name = "webhook-contact-point-default" + url = "https://example.com?default" + }, + { + name = "webhook-contact-point-second" + url = "https://example.com?second" + }, + { + name = "webhook-contact-point-third" + url = "https://example.com?third" + } + ] + } + notifications = { + "group_interval" : "1m", + "repeat_interval" : "1m", + "contact_point" : "webhook-contact-point-default", # the default policy/channel will be used if no one from policies listing handled alert, in this case as third policy have no matcher rules we will not have such cases + "policies" : [ + { + "contact_point" : "webhook-contact-point-second", # the priority=P1 alerts will go by this channel + "matchers" : [{ + "label" : "priority", + "match" : "=", + "value" : "P1" + }] + }, + { + "contact_point" : "webhook-contact-point-third", # all alerts will go by this policy/channel + }, + ] + } } } diff --git a/tests/base/README.md b/tests/base/README.md index 9ad1a89..1208af6 100644 --- a/tests/base/README.md +++ b/tests/base/README.md @@ -5,6 +5,7 @@ | Name | Version | |------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | | [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers diff --git a/variables.tf b/variables.tf index 13e2730..216df72 100644 --- a/variables.tf +++ b/variables.tf @@ -35,88 +35,109 @@ variable "application_dashboard" { description = "Dashboard for monitoring applications" } -# alerting variables -variable "alert_interval_seconds" { - type = number - default = 10 - description = "The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially." -} - -variable "alert_rules" { - type = list(object({ - name = string # The name of the alert rule - no_data_state = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data - exec_err_state = optional(string, "Error") # Describes what state to enter when the rule's query is invalid and the rule cannot be executed - summary = optional(string, "") # Rule annotation as a summary - labels = optional(map(any), { "priorityLow" : "true" }) - folder_name = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created - datasource = string # Name of the datasource used for the alert - expr = optional(string, null) # Full expression for the alert - metric_name = optional(string, "") # Prometheus metric name which queries the data for the alert - metric_function = optional(string, "") # Prometheus function used with metric for queries, like rate, sum etc. - metric_interval = optional(string, "") # The time interval with using functions like rate - settings_mode = optional(string, "replaceNN") # The mode used in B block, possible values are Strict, replaceNN, dropNN - settings_replaceWith = optional(number, 0) # The value by which NaN results of the query will be replaced - filters = optional(any, {}) # Filters object to identify each service for alerting - function = optional(string, "mean") # One of Reduce functions which will be used in B block for alerting - equation = string # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e - threshold = number # The value against which B blocks are compared in the math expression - })) - default = [] - description = "This variable describes alert folders, groups and rules." -} - -variable "slack_endpoints" { - type = list(object({ - name = string # The name of the contact point. - endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to. - icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon. - icon_url = optional(string, "") # A URL of an image to use as the bot icon. - recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to. - text = optional(string, "") # Templated content of the message. - title = optional(string, "") # Templated title of the message. - token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method. - webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method. - username = optional(string, "") # Username for the bot to use. - disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. - })) - default = [] - description = "Slack contact points list." -} +variable "alerts" { + type = object({ + alert_interval_seconds = optional(number, 10) # The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially + disable_provenance = optional(bool, true) # Allow modifying resources from other sources than Terraform or the Grafana API + rules = optional( # Describes alert folders, groups and rules + list(object({ + name = string # The name of the alert rule + no_data_state = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data + exec_err_state = optional(string, "Error") # Describes what state to enter when the rule's query is invalid and the rule cannot be executed + summary = optional(string, null) # Rule annotation as a summary, if not passed automatically generated based on data + labels = optional(map(any), { "priority" : "P1" }) # Labels help to define matchers in notification policy to control where to send each alert + folder_name = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created, the folder name used also as alert group name with suffix " Group" + datasource = string # Name of the datasource used for the alert + expr = optional(string, null) # Full expression for the alert + metric_name = optional(string, "") # Prometheus metric name which queries the data for the alert + metric_function = optional(string, "") # Prometheus function used with metric for queries, like rate, sum etc. + metric_interval = optional(string, "") # The time interval with using functions like rate + settings_mode = optional(string, "replaceNN") # The mode used in B block, possible values are Strict, replaceNN, dropNN + settings_replaceWith = optional(number, 0) # The value by which NaN results of the query will be replaced + filters = optional(any, null) # Filters object to identify each service for alerting + function = optional(string, "mean") # One of Reduce functions which will be used in B block for alerting + equation = string # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e + threshold = number # The value against which B blocks are compared in the math expression + })), []) + contact_points = optional(object({ + slack = optional(list(object({ # Slack contact points list + name = string # The name of the contact point + endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to + icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon + icon_url = optional(string, "") # A URL of an image to use as the bot icon + recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to + text = optional(string, "") # Templated content of the message + title = optional(string, "") # Templated title of the message + token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method + webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method + username = optional(string, "") # Username for the bot to use + disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages + })), []) + opsgenie = optional(list(object({ # OpsGenie contact points list + name = string # The name of the contact point + api_key = string # The OpsGenie API key to use + auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alert manager + message = optional(string, "") # The templated content of the message + api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL + disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages + })), []) + webhook = optional(list(object({ # Contact points that send notifications to an arbitrary webhook, using the Prometheus webhook format + name = string # The name of the contact point + url = string # The URL to send webhook requests to + authorization_credentials = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this value. Do not use in conjunction with basic auth parameters + authorization_scheme = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this name. Do not use in conjunction with basic auth parameters + basic_auth_password = optional(string, null) # The password component of the basic auth credentials to use + basic_auth_user = optional(string, null) # The username component of the basic auth credentials to use + disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. Defaults to + settings = optional(any, null) # Additional custom properties to attach to the notifier + })), []) + }), null) + notifications = optional(object({ + contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to + group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by + group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group + repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing -variable "opsgenie_endpoints" { - type = list(object({ - name = string # The name of the contact point. - api_key = string # The OpsGenie API key to use. - auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alertmanager. - message = optional(string, "") # The templated content of the message. - api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL. - disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. - })) - default = [] - description = "OpsGenie contact points list." -} + mute_timing = optional(object({ # Mute timing config, which will be applied on all policies + name = optional(string, "Default mute timing") # the name of mute timing + intervals = optional(list(object({ # the mute timing interval configs + weekdays = optional(string, null) + days_of_month = optional(string, null) + months = optional(string, null) + years = optional(string, null) + location = optional(string, null) + times = optional(object({ + start = optional(string, "00:00") + end = optional(string, "24:59") + }), null) + })), []) + }), null) -variable "notifications" { - type = object({ - contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to. - group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by. - group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group. - repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing. + policies = optional(list(object({ + contact_point = optional(string, null) # The contact point to route notifications that match this rule to + continue = optional(bool, true) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it + group_by = optional(list(string), ["..."]) - policies = optional(list(object({ - contact_point = optional(string, null) # The contact point to route notifications that match this rule to. - continue = optional(bool, false) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it. - group_by = optional(list(string), ["..."]) - mute_timings = optional(list(string), []) # A list of mute timing names to apply to alerts that match this policy. + matchers = optional(list(object({ + label = optional(string, "priority") # The name of the label to match against + match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality + value = optional(string, "P1") # The label value to match against + })), []) + policies = optional(list(object({ # sub-policies(there is also possibility to implement also ability for sub.sub.sub-policies, but for not seems existing configs are enough) + contact_point = optional(string, null) + continue = optional(bool, true) + group_by = optional(list(string), ["..."]) + mute_timings = optional(list(string), []) - matchers = optional(list(object({ - label = optional(string, "priority") # The name of the label to match against. - match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality. - value = optional(string, "P1") # The label value to match against. + matchers = optional(list(object({ + label = optional(string, "priority") + match = optional(string, "=") + value = optional(string, "P1") + })), []) + })), []) })), []) - })), []) + }), null) }) - description = "Represents the configuration options for Grafana notification policies." - default = {} + + default = {} } From c36164ab95b587833eaf6f2cdade9b0ffe0fea33 Mon Sep 17 00:00:00 2001 From: Tigran Muradyan Date: Mon, 2 Sep 2024 12:35:16 +0400 Subject: [PATCH 2/2] chore(DMVP-5017): have tests/example fixed --- README.md | 199 +++++------------- .../tests/mixed-contact-points/0-setup.tf | 12 +- .../tests/mixed-contact-points/2-assert.tf | 9 - .../tests/mixed-contact-points/README.md | 11 +- .../tests/opsgenie/0-setup.tf | 12 +- .../tests/opsgenie/1-example.tf | 8 +- .../tests/opsgenie/2-assert.tf | 9 - .../tests/opsgenie/README.md | 11 +- .../tests/slack/0-setup.tf | 12 +- .../tests/slack/1-example.tf | 9 +- .../tests/slack/2-assert.tf | 9 - .../tests/slack/README.md | 11 +- .../tests/slack-with-opsgenie/0-setup.tf | 21 +- .../tests/slack-with-opsgenie/1-example.tf | 25 +-- .../tests/slack-with-opsgenie/2-assert.tf | 9 - .../tests/slack-with-opsgenie/README.md | 15 +- .../tests/slack/0-setup.tf | 17 +- .../tests/slack/1-example.tf | 4 +- .../tests/slack/2-assert.tf | 9 - .../alert-notifications/tests/slack/README.md | 14 +- .../tests/autoscaling-max-usage/0-setup.tf | 13 +- .../tests/autoscaling-max-usage/2-assert.tf | 9 - .../tests/autoscaling-max-usage/README.md | 13 +- .../tests/available-replica-count/0-setup.tf | 13 +- .../tests/available-replica-count/2-assert.tf | 9 - .../tests/available-replica-count/README.md | 13 +- .../tests/container-restarts/0-setup.tf | 13 +- .../tests/container-restarts/2-assert.tf | 9 - .../tests/container-restarts/README.md | 13 +- .../alert-rules/tests/expressions/0-setup.tf | 13 +- .../alert-rules/tests/expressions/2-assert.tf | 9 - .../alert-rules/tests/expressions/README.md | 13 +- .../tests/mixed-metrics/0-setup.tf | 13 +- .../tests/mixed-metrics/2-assert.tf | 9 - .../alert-rules/tests/mixed-metrics/README.md | 13 +- .../tests/node-autoscaling/0-setup.tf | 13 +- .../tests/node-autoscaling/2-assert.tf | 9 - .../tests/node-autoscaling/README.md | 13 +- tests/base/0-setup.tf | 2 +- 39 files changed, 224 insertions(+), 414 deletions(-) delete mode 100644 modules/alert-contact-points/tests/mixed-contact-points/2-assert.tf delete mode 100644 modules/alert-contact-points/tests/opsgenie/2-assert.tf delete mode 100644 modules/alert-contact-points/tests/slack/2-assert.tf delete mode 100644 modules/alert-notifications/tests/slack-with-opsgenie/2-assert.tf delete mode 100644 modules/alert-notifications/tests/slack/2-assert.tf delete mode 100644 modules/alert-rules/tests/autoscaling-max-usage/2-assert.tf delete mode 100644 modules/alert-rules/tests/available-replica-count/2-assert.tf delete mode 100644 modules/alert-rules/tests/container-restarts/2-assert.tf delete mode 100644 modules/alert-rules/tests/expressions/2-assert.tf delete mode 100644 modules/alert-rules/tests/mixed-metrics/2-assert.tf delete mode 100644 modules/alert-rules/tests/node-autoscaling/2-assert.tf diff --git a/README.md b/README.md index 2c73a02..1ad6cfc 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ More parts are coming soon. ```hcl module "grafana_monitoring" { source = "dasmeta/grafana/onpremise" - version = "1.2.0" + version = "1.7.0" name = "Test-dashboard" @@ -50,161 +50,70 @@ module "grafana_monitoring" { } ``` -## Example for Alert Rules +## Example for Alerts ``` module "grafana_alerts" { source = "dasmeta/grafana/onpremise//modules/alerts" - version = "1.0.0" - - alert_rules = [ - { - name = "App_1 has 0 available replicas" - folder_name = "Replica Count" - datasource = "prometheus" - metric_name = "kube_deployment_status_replicas_available" - filters = { - deployment = "app-1-microservice" - } - function = "last" - equation = "lt" - threshold = 1 - }, - { - name = "Nginx Expressions" - folder_name = "Nginx Expressions Group" - datasource = "prometheus" - expr = "sum(rate(nginx_ingress_controller_requests{status=~'5..'}[1m])) by (ingress,cluster) / sum(rate(nginx_ingress_controller_requests[1m]))by (ingress) * 100 > 5" - function = "mean" - equation = "gt" - threshold = 2 - }, - ] -} -``` - -## Example for Contact Points -``` -module "grafana_contact_points" { - source = "dasmeta/grafana/onpremise//modules/contact-points" - - opsgenie_endpoints = [ - { - name = "Dev OpsGenie" - api_key = "asdARdszgads1235fsdad" - auto_close = true - }, - { - name = "Stage OpsGenie" - api_key = "werARdsswefazgads12dad" - } - ] - slack_endpoints = [ - { - name = "Dev Notifications" - webhook_url = "https://hooks.slack.com/services/T6safsfFSF2352SFzdn" - } - ] -} -``` - -## Example for Notifications -``` -module "grafana_contact_points" { - source = "dasmeta/grafana/onpremise//modules/notifications" - - notifications = { - contact_point = "Slack" - group_by = ["alertname"] - group_interval = "10m" - repeat_interval = "1h" - - policy = { - contact_point = "Opsgenie" - continue = false - - matcher = { - label = "priority" - match = "=" - value = "P1" - } - } - } -} -``` - -## Example for all submodules together -``` -module "grafana_alerts" { - source = "dasmeta/grafana/onpremise" + version = "1.7.0" - alert_rules = [ - { - name = "App_1 has 0 available replicas" - folder_name = "Test" - datasource = "prometheus" - metric_name = "kube_deployment_status_replicas_available" - filters = { - deployment = "app-1-microservice" - } - function = "last" - equation = "lt" - threshold = 1 - }, - { - name = "App_2 has 0 available replicas" - folder_name = "Test" - datasource = "prometheus" - metric_name = "kube_deployment_status_replicas_available" - filters = { - deployment = "app-2-microservice" - } - function = "last" - equation = "lt" - threshold = 1 - } - ] - - opsgenie_endpoints = [ - { - name = "Dev OpsGenie" - api_key = "asdARdszgads1235fsdad" - auto_close = true - }, - { - name = "Stage OpsGenie" - api_key = "werARdsswefazgads12dad" - } - ] - - slack_endpoints = [ - { - name = "Dev Notifications" - webhook_url = "https://hooks.slack.com/services/T6safsfFSF2352SFzdn" + alerts = { + rules = [ + { + name = "App_1 has 0 available replicas" + folder_name = "Replica Count" + datasource = "prometheus" + metric_name = "kube_deployment_status_replicas_available" + filters = { + deployment = "app-1-microservice" + } + function = "last" + equation = "lt" + threshold = 1 + }, + { + name = "Nginx Expressions" + folder_name = "Nginx Expressions Group" + datasource = "prometheus" + expr = "sum(rate(nginx_ingress_controller_requests{status=~'5..'}[1m])) by (ingress,cluster) / sum(rate(nginx_ingress_controller_requests[1m]))by (ingress) * 100 > 5" + function = "mean" + equation = "gt" + threshold = 2 + }, + ] + contact_points = { + opsgenie = [ + { + name = "opsgenie" + api_key = "xxxxxxxxxxxxxxxx" + auto_close = true + } + ] + slack = [ + { + name = "slack" + webhook_url = "https://hooks.slack.com/services/xxxxxxxxxxxxxxxx" + } + ] } - ] - - notifications = { - contact_point = "Slack" - group_by = ["alertname"] - group_interval = "10m" - repeat_interval = "1h" - - policy = { - contact_point = "Opsgenie" - continue = false - - matcher = { - label = "priority" - match = "=" - value = "P1" - } + notifications = { + contact_point : "slack" + "policies" : [ + { + contact_point : "opsgenie" + matchers : [{ label : "priority", match : "=", value : "P1" }] + }, + { + "contact_point" : "slack" + } + ] } } } ``` ## Usage -Check `./tests`, `modules/alerts/tests`, `modules/contact-points/tests` and `modules/notifications/tests` folders to see more examples. +Check `./tests`, `modules/alert-rules/tests`, `modules/alert-contact-points/tests` and `modules/alert-notifications/tests` folders to see more examples. + ## Requirements diff --git a/modules/alert-contact-points/tests/mixed-contact-points/0-setup.tf b/modules/alert-contact-points/tests/mixed-contact-points/0-setup.tf index 4cee101..491544a 100644 --- a/modules/alert-contact-points/tests/mixed-contact-points/0-setup.tf +++ b/modules/alert-contact-points/tests/mixed-contact-points/0-setup.tf @@ -1,16 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { source = "grafana/grafana" - version = ">= 1.40.0" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "xxxxxxx" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-contact-points/tests/mixed-contact-points/2-assert.tf b/modules/alert-contact-points/tests/mixed-contact-points/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-contact-points/tests/mixed-contact-points/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-contact-points/tests/mixed-contact-points/README.md b/modules/alert-contact-points/tests/mixed-contact-points/README.md index 020190f..1a973da 100644 --- a/modules/alert-contact-points/tests/mixed-contact-points/README.md +++ b/modules/alert-contact-points/tests/mixed-contact-points/README.md @@ -5,13 +5,12 @@ This test creates multiple contact points which have different types of integrat | Name | Version | |------|---------| -| [grafana](#requirement\_grafana) | >= 1.40.0 | +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -21,9 +20,7 @@ This test creates multiple contact points which have different types of integrat ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-contact-points/tests/opsgenie/0-setup.tf b/modules/alert-contact-points/tests/opsgenie/0-setup.tf index 4cee101..491544a 100644 --- a/modules/alert-contact-points/tests/opsgenie/0-setup.tf +++ b/modules/alert-contact-points/tests/opsgenie/0-setup.tf @@ -1,16 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { source = "grafana/grafana" - version = ">= 1.40.0" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "xxxxxxx" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-contact-points/tests/opsgenie/1-example.tf b/modules/alert-contact-points/tests/opsgenie/1-example.tf index ba83836..b7916c6 100644 --- a/modules/alert-contact-points/tests/opsgenie/1-example.tf +++ b/modules/alert-contact-points/tests/opsgenie/1-example.tf @@ -3,13 +3,9 @@ module "this" { opsgenie_endpoints = [ { - name = "Dev OpsGenie" - api_key = "asdARdszgads1235fsdad" + name = "opsgenie" + api_key = "xxxxxxxx" auto_close = true - }, - { - name = "Stage OpsGenie" - api_key = "werARdsswefazgads12dad" } ] } diff --git a/modules/alert-contact-points/tests/opsgenie/2-assert.tf b/modules/alert-contact-points/tests/opsgenie/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-contact-points/tests/opsgenie/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-contact-points/tests/opsgenie/README.md b/modules/alert-contact-points/tests/opsgenie/README.md index 551e608..2625dfd 100644 --- a/modules/alert-contact-points/tests/opsgenie/README.md +++ b/modules/alert-contact-points/tests/opsgenie/README.md @@ -7,13 +7,12 @@ OpsGenie authentication can be done using OpsGenie API Key by passing the value | Name | Version | |------|---------| -| [grafana](#requirement\_grafana) | >= 1.40.0 | +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -23,9 +22,7 @@ OpsGenie authentication can be done using OpsGenie API Key by passing the value ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-contact-points/tests/slack/0-setup.tf b/modules/alert-contact-points/tests/slack/0-setup.tf index 4cee101..491544a 100644 --- a/modules/alert-contact-points/tests/slack/0-setup.tf +++ b/modules/alert-contact-points/tests/slack/0-setup.tf @@ -1,16 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { source = "grafana/grafana" - version = ">= 1.40.0" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "xxxxxxx" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-contact-points/tests/slack/1-example.tf b/modules/alert-contact-points/tests/slack/1-example.tf index cc1bb73..215c40b 100644 --- a/modules/alert-contact-points/tests/slack/1-example.tf +++ b/modules/alert-contact-points/tests/slack/1-example.tf @@ -3,13 +3,8 @@ module "this" { slack_endpoints = [ { - name = "Dev Notifications" - webhook_url = "https://hooks.slack.com/services/T6safsfFSF2352SFzdn" - }, - { - name = "Prod Notifications" - recipient = "prod-channel" - token = "xxxxxxxxx" + name = "slack" + webhook_url = "https://hooks.slack.com/services/xxxxxxxxx" } ] } diff --git a/modules/alert-contact-points/tests/slack/2-assert.tf b/modules/alert-contact-points/tests/slack/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-contact-points/tests/slack/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-contact-points/tests/slack/README.md b/modules/alert-contact-points/tests/slack/README.md index e8a78f5..972d3a6 100644 --- a/modules/alert-contact-points/tests/slack/README.md +++ b/modules/alert-contact-points/tests/slack/README.md @@ -7,13 +7,12 @@ Slack authentication can be done using either a `token` and `recipient` or a `we | Name | Version | |------|---------| -| [grafana](#requirement\_grafana) | >= 1.40.0 | +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -23,9 +22,7 @@ Slack authentication can be done using either a `token` and `recipient` or a `we ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-notifications/tests/slack-with-opsgenie/0-setup.tf b/modules/alert-notifications/tests/slack-with-opsgenie/0-setup.tf index 857db77..3384d22 100644 --- a/modules/alert-notifications/tests/slack-with-opsgenie/0-setup.tf +++ b/modules/alert-notifications/tests/slack-with-opsgenie/0-setup.tf @@ -1,15 +1,24 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { - source = "grafana/grafana" + source = "grafana/grafana" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "xxxxxxxxxxx" + url = "http://localhost:3000" + auth = "admin:admin" +} + +module "slack_contact_points" { + source = "../../../alert-contact-points/tests/slack" +} + +module "opsgenie_contact_points" { + source = "../../../alert-contact-points/tests/opsgenie" } diff --git a/modules/alert-notifications/tests/slack-with-opsgenie/1-example.tf b/modules/alert-notifications/tests/slack-with-opsgenie/1-example.tf index 14298fd..2794e48 100644 --- a/modules/alert-notifications/tests/slack-with-opsgenie/1-example.tf +++ b/modules/alert-notifications/tests/slack-with-opsgenie/1-example.tf @@ -2,20 +2,21 @@ module "this" { source = "../../" notifications = { - contact_point = "Slack" + contact_point = "slack" group_by = ["alertname"] - group_interval = "10m" - repeat_interval = "1h" + group_interval = "1m" + repeat_interval = "1m" - policy = { - contact_point = "Opsgenie" - continue = false - - matcher = { - label = "priority" - match = "=" - value = "P1" + policies = [ + { + contact_point = "opsgenie" + matchers = [{ label = "priority", match = "=", value = "P1" }] + }, + { + contact_point = "slack" } - } + ] } + + depends_on = [module.opsgenie_contact_points, module.slack_contact_points] } diff --git a/modules/alert-notifications/tests/slack-with-opsgenie/2-assert.tf b/modules/alert-notifications/tests/slack-with-opsgenie/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-notifications/tests/slack-with-opsgenie/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-notifications/tests/slack-with-opsgenie/README.md b/modules/alert-notifications/tests/slack-with-opsgenie/README.md index 9fba39d..cc4456a 100644 --- a/modules/alert-notifications/tests/slack-with-opsgenie/README.md +++ b/modules/alert-notifications/tests/slack-with-opsgenie/README.md @@ -5,25 +5,26 @@ Alerts with the label `priority = P1` will be sent to Opsgenie, while all other ## Requirements -No requirements. +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules | Name | Source | Version | |------|--------|---------| +| [opsgenie\_contact\_points](#module\_opsgenie\_contact\_points) | ../../../alert-contact-points/tests/opsgenie | n/a | +| [slack\_contact\_points](#module\_slack\_contact\_points) | ../../../alert-contact-points/tests/slack | n/a | | [this](#module\_this) | ../../ | n/a | ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-notifications/tests/slack/0-setup.tf b/modules/alert-notifications/tests/slack/0-setup.tf index 857db77..8aeae94 100644 --- a/modules/alert-notifications/tests/slack/0-setup.tf +++ b/modules/alert-notifications/tests/slack/0-setup.tf @@ -1,15 +1,20 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { - source = "grafana/grafana" + source = "grafana/grafana" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "xxxxxxxxxxx" + url = "http://localhost:3000" + auth = "admin:admin" +} + +module "contact_points" { + source = "../../../alert-contact-points/tests/slack" } diff --git a/modules/alert-notifications/tests/slack/1-example.tf b/modules/alert-notifications/tests/slack/1-example.tf index 2b07fc2..1f1080e 100644 --- a/modules/alert-notifications/tests/slack/1-example.tf +++ b/modules/alert-notifications/tests/slack/1-example.tf @@ -2,9 +2,11 @@ module "this" { source = "../../" notifications = { - contact_point = "Slack" + contact_point = "slack" group_by = ["..."] group_interval = "10m" repeat_interval = "1h" } + + depends_on = [module.contact_points] } diff --git a/modules/alert-notifications/tests/slack/2-assert.tf b/modules/alert-notifications/tests/slack/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-notifications/tests/slack/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-notifications/tests/slack/README.md b/modules/alert-notifications/tests/slack/README.md index 9ec44b0..b87bdd4 100644 --- a/modules/alert-notifications/tests/slack/README.md +++ b/modules/alert-notifications/tests/slack/README.md @@ -3,25 +3,25 @@ This example creates a default notification policy for the Slack contact point. ## Requirements -No requirements. +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules | Name | Source | Version | |------|--------|---------| +| [contact\_points](#module\_contact\_points) | ../../../alert-contact-points/tests/slack | n/a | | [this](#module\_this) | ../../ | n/a | ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-rules/tests/autoscaling-max-usage/0-setup.tf b/modules/alert-rules/tests/autoscaling-max-usage/0-setup.tf index bc7155b..491544a 100644 --- a/modules/alert-rules/tests/autoscaling-max-usage/0-setup.tf +++ b/modules/alert-rules/tests/autoscaling-max-usage/0-setup.tf @@ -1,15 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { - source = "grafana/grafana" + source = "grafana/grafana" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "glsa_1Ynadeezh63x3_asfsafaf23dsaad4" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-rules/tests/autoscaling-max-usage/2-assert.tf b/modules/alert-rules/tests/autoscaling-max-usage/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-rules/tests/autoscaling-max-usage/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-rules/tests/autoscaling-max-usage/README.md b/modules/alert-rules/tests/autoscaling-max-usage/README.md index 6df7ea3..518876d 100644 --- a/modules/alert-rules/tests/autoscaling-max-usage/README.md +++ b/modules/alert-rules/tests/autoscaling-max-usage/README.md @@ -7,13 +7,14 @@ For each microservice, we have specified a filter to match the deployment name ( ## Requirements -No requirements. +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -23,9 +24,7 @@ No requirements. ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-rules/tests/available-replica-count/0-setup.tf b/modules/alert-rules/tests/available-replica-count/0-setup.tf index bc7155b..491544a 100644 --- a/modules/alert-rules/tests/available-replica-count/0-setup.tf +++ b/modules/alert-rules/tests/available-replica-count/0-setup.tf @@ -1,15 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { - source = "grafana/grafana" + source = "grafana/grafana" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "glsa_1Ynadeezh63x3_asfsafaf23dsaad4" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-rules/tests/available-replica-count/2-assert.tf b/modules/alert-rules/tests/available-replica-count/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-rules/tests/available-replica-count/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-rules/tests/available-replica-count/README.md b/modules/alert-rules/tests/available-replica-count/README.md index 7af3675..98105ab 100644 --- a/modules/alert-rules/tests/available-replica-count/README.md +++ b/modules/alert-rules/tests/available-replica-count/README.md @@ -9,13 +9,14 @@ The `eqaution`, `threshold` parameters are used to check if the available replic ## Requirements -No requirements. +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -25,9 +26,7 @@ No requirements. ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-rules/tests/container-restarts/0-setup.tf b/modules/alert-rules/tests/container-restarts/0-setup.tf index bc7155b..491544a 100644 --- a/modules/alert-rules/tests/container-restarts/0-setup.tf +++ b/modules/alert-rules/tests/container-restarts/0-setup.tf @@ -1,15 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { - source = "grafana/grafana" + source = "grafana/grafana" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "glsa_1Ynadeezh63x3_asfsafaf23dsaad4" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-rules/tests/container-restarts/2-assert.tf b/modules/alert-rules/tests/container-restarts/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-rules/tests/container-restarts/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-rules/tests/container-restarts/README.md b/modules/alert-rules/tests/container-restarts/README.md index ee63cfa..b0e6bce 100644 --- a/modules/alert-rules/tests/container-restarts/README.md +++ b/modules/alert-rules/tests/container-restarts/README.md @@ -9,13 +9,14 @@ The `eqaution`, `threshold` parameters are employed to check if the restart coun ## Requirements -No requirements. +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -25,9 +26,7 @@ No requirements. ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-rules/tests/expressions/0-setup.tf b/modules/alert-rules/tests/expressions/0-setup.tf index 0cc3f74..491544a 100644 --- a/modules/alert-rules/tests/expressions/0-setup.tf +++ b/modules/alert-rules/tests/expressions/0-setup.tf @@ -1,15 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { - source = "grafana/grafana" + source = "grafana/grafana" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "glsa_xxxxxxxxxxxxxx" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-rules/tests/expressions/2-assert.tf b/modules/alert-rules/tests/expressions/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-rules/tests/expressions/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-rules/tests/expressions/README.md b/modules/alert-rules/tests/expressions/README.md index 0e67cd3..036dda3 100644 --- a/modules/alert-rules/tests/expressions/README.md +++ b/modules/alert-rules/tests/expressions/README.md @@ -3,13 +3,14 @@ ## Requirements -No requirements. +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -19,9 +20,7 @@ No requirements. ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-rules/tests/mixed-metrics/0-setup.tf b/modules/alert-rules/tests/mixed-metrics/0-setup.tf index bc7155b..491544a 100644 --- a/modules/alert-rules/tests/mixed-metrics/0-setup.tf +++ b/modules/alert-rules/tests/mixed-metrics/0-setup.tf @@ -1,15 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { - source = "grafana/grafana" + source = "grafana/grafana" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "glsa_1Ynadeezh63x3_asfsafaf23dsaad4" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-rules/tests/mixed-metrics/2-assert.tf b/modules/alert-rules/tests/mixed-metrics/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-rules/tests/mixed-metrics/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-rules/tests/mixed-metrics/README.md b/modules/alert-rules/tests/mixed-metrics/README.md index 3c9f87c..6174836 100644 --- a/modules/alert-rules/tests/mixed-metrics/README.md +++ b/modules/alert-rules/tests/mixed-metrics/README.md @@ -9,13 +9,14 @@ This test case demonstrates how to configure Grafana alerts to monitor various a ## Requirements -No requirements. +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -25,9 +26,7 @@ No requirements. ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/modules/alert-rules/tests/node-autoscaling/0-setup.tf b/modules/alert-rules/tests/node-autoscaling/0-setup.tf index 857db77..491544a 100644 --- a/modules/alert-rules/tests/node-autoscaling/0-setup.tf +++ b/modules/alert-rules/tests/node-autoscaling/0-setup.tf @@ -1,15 +1,16 @@ terraform { + required_version = ">= 1.3.0" + required_providers { - test = { - source = "terraform.io/builtin/test" - } grafana = { - source = "grafana/grafana" + source = "grafana/grafana" + version = ">= 3.7.0" } } } +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { - url = "https://grafana.example.com/" - auth = "xxxxxxxxxxx" + url = "http://localhost:3000" + auth = "admin:admin" } diff --git a/modules/alert-rules/tests/node-autoscaling/2-assert.tf b/modules/alert-rules/tests/node-autoscaling/2-assert.tf deleted file mode 100644 index 302130e..0000000 --- a/modules/alert-rules/tests/node-autoscaling/2-assert.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "test_assertions" "dummy" { - component = "grafana-modules-alerts" - - equal "scheme" { - description = "As module does not have any output and data just make sure the case runs. Probably can be thrown away." - got = "all good" - want = "all good" - } -} diff --git a/modules/alert-rules/tests/node-autoscaling/README.md b/modules/alert-rules/tests/node-autoscaling/README.md index dca473c..c4b83e9 100644 --- a/modules/alert-rules/tests/node-autoscaling/README.md +++ b/modules/alert-rules/tests/node-autoscaling/README.md @@ -14,13 +14,14 @@ Please, note that we pass `null` value to `filters` variable. It's needed when w ## Requirements -No requirements. +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [grafana](#requirement\_grafana) | >= 3.7.0 | ## Providers -| Name | Version | -|------|---------| -| [test](#provider\_test) | n/a | +No providers. ## Modules @@ -30,9 +31,7 @@ No requirements. ## Resources -| Name | Type | -|------|------| -| test_assertions.dummy | resource | +No resources. ## Inputs diff --git a/tests/base/0-setup.tf b/tests/base/0-setup.tf index 87682de..491544a 100644 --- a/tests/base/0-setup.tf +++ b/tests/base/0-setup.tf @@ -9,7 +9,7 @@ terraform { } } -# please start grafana locally using `docker compose up -d` (the compose.yaml is in ./tests folder) before running the test +# you can start dev grafana server locally using `docker compose up -d` from `/tests` folder before running the test locally provider "grafana" { url = "http://localhost:3000" auth = "admin:admin"