Add ability to export thresholds (#44)

* Add ability to export thresholds * Disable enable_scrape_thresholds in example config file * Update README with enable_scrape_thresholds --------- Co-authored-by: Martin <martin@Martins-MacBook-Air.local>
opsdis · Nov 14, 2024 · b2e4382 · b2e4382
1 parent cc67040
commit b2e4382
Show file tree

Hide file tree

Showing 5 changed files with 166 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -24,9 +24,10 @@ Benefits:
 ## Metric names
 Metrics that is scraped with the icinga2-exporter will have the following name structure:
 
-    icinga2_<check_command>_<perfname>_<unit>
+    <metric_prefix>_<check_command>_<perfname>_<unit>
 
-> The icinga2 prefix can be changed by the configuration
+> The metric_prefix can be changed by the configuration, defaults to icinga2
+> 
 > Unit is only added if it exists on performance data
 
 Example from check command `check_ping` will result in two metrics:
@@ -111,40 +112,77 @@ Example:
 #port: 9638
 
 icinga2:
-  # The url to the icinga2 server
-  url: https://127.0.0.1:5665
-  user: root
-  passwd: cf593406ffcfd2ef
-  # All prometheus metrics will be prefixed with this string
-  metric_prefix: icinga2
-  # Example of custom vars that should be added as labels and how to be translated
-  host_custom_vars:
-    # Specify which custom_vars to extract from hosts in icinga2
-    - env:
-        # Name of the label in Prometheus
-        label_name: environment
-    - site:
-        label_name: dc
-
-  # This section enable that for specific check commands the perfdata metrics name will not be part of the
-  # prometheus metrics name, instead moved to a label
-  # E.g for the disk command the perfdata name will be set to the label disk like:
-  # icinga2_disk_bytes{hostname="icinga2", service="disk", os="Docker", disk="/var/log/icinga2"}
-  perfnametolabel:
+   # The url to the icinga2 server
+   url: https://127.0.0.1:5665
+   # The icinga2 username
+   user: root
+   # The icinga2 password
+   passwd: cf593406ffcfd2ef
+   # Verify the ssl certificate, default false
+   verify: false
+   # Timeout accessing icinga server, default 5 sec
+   timeout: 5
+   # All prometheus metrics will be prefixed with this string
+   metric_prefix: icinga2
+   # Enables a separate request to fetch host metadata like state and state_type. Default false
+   enable_scrape_metadata: true
+   # Enables export of warning and critical threshold values. Default false
+   enable_scrape_thresholds: false
+
+   # Set the service name for host check metric, default is alive - only change this if it is a name conflict with other
+   # services
+   # host_check_service_name: alive
+
+   # Example of host customer variables that should be added as labels and how to be translated
+   host_custom_vars:
+      # Specify which custom_vars to extract from hosts in icinga2
+      - env:
+           # Name of the label in Prometheus
+           label_name: environment
+      - site:
+           label_name: dc
+
+   # This section enable that for specific check commands the perfdata metrics name will not be part of the
+   # prometheus metrics name, instead moved to a label
+   # E.g for the disk command the perfdata name will be set to the label disk like:
+   # icinga2_disk_bytes{hostname="icinga2", service="disk", os="Docker", disk="/var/log/icinga2"}
+   perfnametolabel:
       # The command name
       disk:
-        # the label name to be used
-        label: mount
+         # the label name to be used
+         label_name: mount
 
 logger:
-  # Path and name for the log file. If not set send to stdout
-  logfile: /var/tmp/icinga2-exporter.log
-  # Log level
-  level: INFO
+   # Path and name for the log file. If not set send to stdout
+   logfile: /var/tmp/icinga2-exporter.log
+   # Log level
+   level: INFO
 ```
 
 > When running with gunicorn the port is selected by gunicorn
 
+## enable_scrape_thresholds
+
+Set this to `true` to scrape warning and critical threshold values.
+
+Thresholds that are scraped with the icinga2-exporter will have the following name structure:
+
+    <metric_prefix>_<check_command>_<perfname>_<unit>_threshold_critical
+    <metric_prefix>_<check_command>_<perfname>_<unit>_threshold_warning
+
+> The metric_prefix can be changed by the configuration, defaults to icinga2
+> 
+> Unit is only added if it exists on performance data
+
+Example from check command `check_ping` will result in two metrics together with metrics for critical and warning thresholds:
+
+    icinga2_ping4_rta_seconds
+    icinga2_ping4_rta_seconds_threshold_critical
+    icinga2_ping4_rta_seconds_threshold_warning
+    icinga2_ping_pl_ratio
+    icinga2_ping4_pl_ratio_threshold_critical
+    icinga2_ping4_pl_ratio_threshold_warning
+
 ## Logging
 
 The log stream is configure in the above config. If `logfile` is not set the logs will go to stdout.

diff --git a/config.yml b/config.yml
@@ -1,5 +1,5 @@
-# Port can be overridden by using -p if running development flask
-#port: 9631
+# Port can be overridden by using -p if running development quart
+#port: 9638
 
 icinga2:
   # The url to the icinga2 server
@@ -14,8 +14,10 @@ icinga2:
   timeout: 5
   # All prometheus metrics will be prefixed with this string
   metric_prefix: icinga2
-  # Enables a separate request to fetch host metadata like state and state_type
+  # Enables a separate request to fetch host metadata like state and state_type. Default false
   enable_scrape_metadata: true
+  # Enables export of warning and critical threshold values. Default false
+  enable_scrape_thresholds: false
 
   # Set the service name for host check metric, default is alive - only change this if it is a name conflict with other
   # services

diff --git a/icinga2_exporter/monitorconnection.py b/icinga2_exporter/monitorconnection.py
@@ -71,6 +71,7 @@ def __init__(self, config=None):
         self.url_query_service_perfdata = ''
         self.perfname_to_label = []
         self.host_check_service_name = 'alive'
+        self.enable_scrape_thresholds = False
 
         if config:
             self.user = config[MonitorConfig.config_entry]['user']
@@ -90,10 +91,15 @@ def __init__(self, config=None):
                 self.enable_scrape_metadata = bool(config[MonitorConfig.config_entry]['enable_scrape_metadata'])
             if 'host_check_service_name' in config[MonitorConfig.config_entry]:
                 self.host_check_service_name = config[MonitorConfig.config_entry]['host_check_service_name']
+            if 'enable_scrape_thresholds' in config[MonitorConfig.config_entry]:
+                self.enable_scrape_thresholds = bool(config[MonitorConfig.config_entry]['enable_scrape_thresholds'])
 
             self.url_query_service_perfdata = self.host + '/v1/objects/services'
             self.url_query_host_metadata = self.host + '/v1/objects/hosts/{hostname}'
 
+    def get_enable_scrape_thresholds(self):
+        return self.enable_scrape_thresholds
+
     def get_enable_scrape_metadata(self):
         return self.enable_scrape_metadata
 
@@ -190,4 +196,3 @@ async def async_post(self, url, body = None) -> Dict[str, Any]:
             raise ScrapeExecption(message=f"Timeout after {self.timeout} sec", err=err, url=self.host)
         except ClientConnectorError as err:
             raise ScrapeExecption(message="Connection error", err=err, url=self.host)
-
diff --git a/icinga2_exporter/perfdata.py b/icinga2_exporter/perfdata.py
@@ -45,6 +45,7 @@ def __init__(self, monitor: Monitor, query_hostname: str):
         self.configured_labels = monitor.get_labels()
         self.perfname_to_label = monitor.get_perfname_to_label()
         self.perfdatadict = {}
+        self.enable_scrape_thresholds = monitor.get_enable_scrape_thresholds()
 
     def add_perfdata(self, key: str, labels: Dict[str, str], value: float):
         labels_str = ""
@@ -113,6 +114,45 @@ async def get_service_metrics(self) -> dict:
 
                                 self.perfdatadict.update({prometheus_key_with_labels: str(perf_data_value['value'])})
 
+                            # Export threshold values from perfdata if enabled
+                            if self.enable_scrape_thresholds:
+
+                                if 'crit' in perf_data_value:
+                                    threshold_value = perf_data_value.get('crit')
+
+                                    prometheus_key = self.format_prometheus_metrics_name(
+                                        check_command, perf_data_key,perf_data_value) + '_threshold_critical'
+
+                                    # Add more labels based on perfname
+                                    if check_command in self.perfname_to_label:
+                                        labels.update(
+                                            Perfdata.add_labels_by_items(
+                                                self.perfname_to_label[check_command]['label_name'],
+                                                perf_data_key))
+
+                                    prometheus_key_with_labels = Perfdata.concat_metrics_name_and_labels(labels,
+                                                                                                         prometheus_key)
+
+                                    self.perfdatadict.update({prometheus_key_with_labels: str(threshold_value)})
+
+                                if 'warn' in perf_data_value:
+                                    threshold_value = perf_data_value.get('warn')
+
+                                    prometheus_key = self.format_prometheus_metrics_name(
+                                        check_command, perf_data_key, perf_data_value) + '_threshold_warning'
+
+                                    # Add more labels based on perfname
+                                    if check_command in self.perfname_to_label:
+                                        labels.update(
+                                            Perfdata.add_labels_by_items(
+                                                self.perfname_to_label[check_command]['label_name'],
+                                                perf_data_key))
+
+                                    prometheus_key_with_labels = Perfdata.concat_metrics_name_and_labels(labels,
+                                                                                                         prometheus_key)
+
+                                    self.perfdatadict.update({prometheus_key_with_labels: str(threshold_value)})
+
         return self.perfdatadict
 
     async def get_host_metrics(self) -> dict:
@@ -176,6 +216,45 @@ async def get_host_metrics(self) -> dict:
 
                                 self.perfdatadict.update({prometheus_key_with_labels: str(perf_data_value['value'])})
 
+                            # Export threshold values from perfdata if enabled
+                            if self.enable_scrape_thresholds:
+
+                                if 'crit' in perf_data_value:
+                                    threshold_value = perf_data_value.get('crit')
+
+                                    prometheus_key = self.format_prometheus_metrics_name(
+                                        check_command, perf_data_key,perf_data_value) + '_threshold_critical'
+
+                                    # Add more labels based on perfname
+                                    if check_command in self.perfname_to_label:
+                                        labels.update(
+                                            Perfdata.add_labels_by_items(
+                                                self.perfname_to_label[check_command]['label_name'],
+                                                perf_data_key))
+
+                                    prometheus_key_with_labels = Perfdata.concat_metrics_name_and_labels(labels,
+                                                                                                         prometheus_key)
+
+                                    self.perfdatadict.update({prometheus_key_with_labels: str(threshold_value)})
+
+                                if 'warn' in perf_data_value:
+                                    threshold_value = perf_data_value.get('warn')
+
+                                    prometheus_key = self.format_prometheus_metrics_name(
+                                        check_command, perf_data_key, perf_data_value) + '_threshold_warning'
+
+                                    # Add more labels based on perfname
+                                    if check_command in self.perfname_to_label:
+                                        labels.update(
+                                            Perfdata.add_labels_by_items(
+                                                self.perfname_to_label[check_command]['label_name'],
+                                                perf_data_key))
+
+                                    prometheus_key_with_labels = Perfdata.concat_metrics_name_and_labels(labels,
+                                                                                                         prometheus_key)
+
+                                    self.perfdatadict.update({prometheus_key_with_labels: str(threshold_value)})
+
         return self.perfdatadict
 
     def format_prometheus_metrics_name(self, check_command, key, value):
@@ -282,6 +361,14 @@ def parse_perf_string(s: str) -> dict:
                 norm_value, norm_unit = Perfdata.normalize_to_unit(float(value), uom)
                 metrics[key] = {'value': norm_value, 'unit': norm_unit}
 
+                # Parse critical and warning thresholds if they exist
+                if crit:
+                    norm_crit_value, norm_crit_unit = Perfdata.normalize_to_unit(float(crit), uom)
+                    metrics[key].update({'crit': norm_crit_value})
+                if warn:
+                    norm_warn_value, norm_warn_unit = Perfdata.normalize_to_unit(float(warn), uom)
+                    metrics[key].update({'warn': norm_warn_value})
+
             except ValueError:
                 log.warn(
                     "Couldn't convert value '{value}' to float".format(

diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,8 @@
-aiohttp==3.8.5
+aiohttp==3.10.10
 asyncio==3.4.3
 prometheus-client==0.14.1
 python-json-logger==2.0.2
-PyYAML==6.0
+PyYAML==6.0.2
 quart==0.17.0
 requests==2.31.0
 werkzeug==2.3.7