From 77dc58336b818c205f356bda25a7aa88c00318f3 Mon Sep 17 00:00:00 2001 From: Alex Dworjan Date: Wed, 8 Nov 2023 12:09:43 -0500 Subject: [PATCH] use new collection for prom graf --- .ansible-sign/sha256sum.txt | 1 + .ansible-sign/sha256sum.txt.sig | 22 ++-- Promgraf_collection.yml | 220 ++++++++++++++++++++++++++++++++ files/httpd.conf | 106 +++++++++++++++ 4 files changed, 338 insertions(+), 11 deletions(-) create mode 100644 Promgraf_collection.yml create mode 100644 files/httpd.conf diff --git a/.ansible-sign/sha256sum.txt b/.ansible-sign/sha256sum.txt index 5b45514..895a89e 100644 --- a/.ansible-sign/sha256sum.txt +++ b/.ansible-sign/sha256sum.txt @@ -6,6 +6,7 @@ c8066ae4c79ca812f0abde5b42d4a0bf954a60c88b3eab2149d75d98b567c931 Elastic_restar 936cb966490bb452f91f2cb5b0821ef31c4bc8e62cf2c57a2c11640b60db7e18 MANIFEST.in 2e796c430ae2e8f0d061770ae1f58b19aaa8aec077388a73051dd71a316c4218 Nodeexporter.yml 5d7e02fa3a7a73509e6626d581883804627acec2859ded3b934418dce963f6be Promgraf.yml +7f3d494d3bdd71f457c1c37711ab3da77b719e1bd8b8d298234110e3b243d108 Promgraf_collection.yml 798346886b7ec801a19ed365c33f092e2d5dfb564c034ae886bc51785c89b232 README.md 88223aecf0ee1bfa38a215a481e1dd48054d043a7b52b9fd81243f9699d6142c collections/requirements.yml bcc7a97fdb676c3f94875a674a87d25cd286cabba5820a5bb0a3eb228951b568 email.yml diff --git a/.ansible-sign/sha256sum.txt.sig b/.ansible-sign/sha256sum.txt.sig index fe62c60..8a9ef7d 100644 --- a/.ansible-sign/sha256sum.txt.sig +++ b/.ansible-sign/sha256sum.txt.sig @@ -1,14 +1,14 @@ -----BEGIN PGP SIGNATURE----- -iQGzBAABCAAdFiEE/bJvyFHKKJdaZDOLTiIiIXrUDNEFAmUxcnEACgkQTiIiIXrU -DNFy5QwAn/Yc1QfA/3ttI/71RG3fe+YYAUjcRHDNRsk3OOJ0beuZcOvin76ntexp -zWNIcKhz64fGTZqSFblmknSquBi9/5XyqPetfjJH0iNupx7Ix07WTKYPhjGGY/U5 -J2jacekjSiOVY7girnDo8hNC8bqilMLRI4wVw87KnC0bTAXkjHS7rjlBr86qSp2u -NO04sTI970rk5AjYWM49dBTyj8S+0vUPm2HbCD065PqQb2fuamA35b1gP1PJ/ZyV -lwRfGyWxzFfh+6oyGV2ZRk9EIHyUvTamP4Yh8JpF7Hxla+0e7yP9QFORyUEgZqBc -0xODFCIuRnWBQjNzwS6inCnKyOqA00I+QyZNnUbvSdyTFnG0YgPudP/kf3HHYFP+ -cuDMqB4RarsO9GYWHF9fg5Hx8nGC/XJeZ3ca6+eg05/NNAdASTdKyw1hr2AcFXQn -JFFD1n28UEIUEFo/rzTDJT4883tY7ep9lXnVucEwvJD0mAmaJQWNkUxeZWo0EfNH -qjyauLqH -=BsNT +iQGzBAABCAAdFiEE/bJvyFHKKJdaZDOLTiIiIXrUDNEFAmVLwMIACgkQTiIiIXrU +DNH8vQwAhXmsKVlOUb17c4NsnJM7l8mydX4jfOPqqCMTyaoLPMiXy5EsEbQSNJDS +epxCIu9yp1anX0GSfHyU8+H0Rf5lq9Jq/xdeKjTYuY+9/a772hG7BuKhggtoj7nH +q+68k27embvExSJKcgTWhT+RnhRQj4ZY0V8I4S+zS8lvLqO8UwGTAUNH3DzC3JlX +zV5ZcfYP4OKzvA3vMD1EadZoDBx5WQ7sBcTT8x6QDNTGALA0+rZ6fveSlPmulz23 +AiaX01e82mIvWAnLr3ZB3oxjEWivDNoqJ3w5c2r0vk2drvV/GWsA9Sy559ZUOgjY +/5eWbQnEn+rq5a7S0xvBGz+/G2FbvcdUD4BghmXmZ6bj1WcPmn0Hxuh8As8dF+pR +6QNfsVwBkT3vKEZeMo9AMTKnh4yvAEhuse4vEGOrre03jOFXVGD45KGJELyZgGNl +NJu7wR6f1evuqdn5VaX9Q++UosYXHJVBgtXz/iFQHwijQ9NNLQVQf62G0uYwfF7l +XAX74rPk +=MWVS -----END PGP SIGNATURE----- diff --git a/Promgraf_collection.yml b/Promgraf_collection.yml new file mode 100644 index 0000000..33c1c5d --- /dev/null +++ b/Promgraf_collection.yml @@ -0,0 +1,220 @@ +--- +- name: Install Prometheus and Grafana + hosts: all + vars: + prometheus_alertmanager_config: + - static_configs: + - targets: ["promgraf.shadowman.dev:9093"] + prometheus_global: + scrape_interval: 5m + scrape_timeout: 10s + evaluation_interval: "{{ tower_scrape }}" + prometheus_scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: '{{ inventory_hostname }}:9090' + - job_name: 'tower metrics' + metrics_path: /api/v2/metrics/ + scrape_interval: '{{ tower_scrape }}' + scheme: https + bearer_token: "{{ bearer_token }}" + static_configs: + - targets: 'tower1.shadowman.dev:443' + - job_name: 'tower1.shadowman.dev' + scrape_interval: '{{ tower_scrape }}' + static_configs: + - targets: 'tower1.shadowman.dev:9100' + - job_name: 'tower2.shadowman.dev' + scrape_interval: '{{ tower_scrape }}' + static_configs: + - targets: 'tower2.shadowman.dev:9100' + - job_name: 'rhel8.shadowman.dev' + scrape_interval: '{{ tower_scrape }}' + static_configs: + - targets: 'rhel8.shadowman.dev:9100' + prometheus_alert_rules: # noqa yaml[line-length] # noqa line-length + - groups: + - name: selinux-rules + rules: + - alert: SELinuxDisabled + expr: node_selinux_current_mode == 0 + for: '{{ tower_scrape }}' + labels: + severity: critical + annotations: + description: 'SELINUX Disabled for {% raw %}{{ $labels.job }}{% endraw %}.' + summary: 'SELINUX Disabled (instance: {% raw %}{{ $labels.job }}{% endraw %})' + - groups: + - name: memory-rules + rules: + - alert: HostOutOfMemory + expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 + for: '{{ tower_scrape }}' + labels: + severity: warning + annotations: + summary: 'Host out of memory (instance: {% raw %}{{ $labels.job }}{% endraw %})' + description: 'Node memory is filling up (< 10% left) VALUE = {% raw %}{{ $value }}{% endraw %}' + - alert: HostTooMuchMemory + expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 > 90 + for: '{{ tower_scrape }}' + labels: + severity: warning + annotations: + summary: 'Host too much free memory (instance: {% raw %}{{ $labels.job }}{% endraw %})' + description: 'Node memory is too free (> 90% left) VALUE = {% raw %}{{ $value }}{% endraw %}' + - groups: + - name: exporter-rules + rules: + - alert: ExporterDown + expr: up == 0 + for: '{{ tower_scrape }}' + labels: + severity: critical + annotations: + description: 'Metrics exporter service for {% raw %}{{ $labels.job }}{% endraw %} running on {% raw %}{{ $labels.instance }}{% endraw %} has been down for more than 5 minutes.' + summary: 'Exporter down (instance: {% raw %}{{ $labels.job }}{% endraw %})' + - groups: + - name: disk-rules + rules: + - alert: HostOutOfDiskSpace + expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 + for: '{{ tower_scrape }}' + labels: + severity: warning + annotations: + summary: 'Host out of disk space (instance: {% raw %}{{ $labels.job }}{% endraw %})' + description: 'Disk is almost full (< 10% left) VALUE = {% raw %}{{ $value }}{% endraw %}' + - groups: + - name: cpu-rules + rules: + - alert: HostHighCpuLoad + expr: 100 - (avg by(instance,job) (rate(node_cpu_seconds_total{mode="idle"}[{{ rate_number }}])) * 100) > {{ cpuload }} + for: '{{ tower_scrape }}' + labels: + severity: critical + annotations: + summary: 'Host high CPU load (instance: {% raw %}{{ $labels.job }}{% endraw %})' + description: 'CPU load is > {{ cpuload }}% VALUE = {% raw %}{{ $value }}{% endraw %}' + - alert: HostLowCpuLoad + expr: 100 - (avg by(instance,job) (rate(node_cpu_seconds_total{mode="idle"}[{{ rate_number }}])) * 100) < .1 + for: '{{ tower_scrape }}' + labels: + severity: warning + annotations: + summary: 'Host low CPU load (instance: {% raw %}{{ $labels.job }}{% endraw %})' + description: 'CPU load is < .1% VALUE = {% raw %}{{ $value }}{% endraw %}' + alertmanager_receivers: + - name: snow + webhook_configs: + - url: "http://eda.shadowman.dev:8000/endpoint" + send_resolved: false + alertmanager_route: + group_by: ['instance', 'alert'] + group_wait: 5s + group_interval: 10s + repeat_interval: 3h + receiver: 'snow' + grafana_server: + protocol: https + cert_key: "/etc/grafana/shadowman_private.key" + cert_file: "/etc/grafana/shadowman_cert.cer" + enforce_domain: false + enable_gzip: false + static_root_path: public + router_logging: false + serve_from_sub_path: false + + pre_tasks: + + - name: Copy Cert for grafana + ansible.builtin.copy: + src: /certs/shadowman_cert.cer + dest: /etc/grafana/shadowman_cert.cer + owner: grafana + group: grafana + mode: '0644' + + - name: Copy Key for grafana + ansible.builtin.copy: + src: /certs/shadowman_private.key + dest: /etc/grafana/shadowman_private.key + owner: grafana + group: grafana + mode: '0644' + + roles: + - prometheus.prometheus.prometheus + - prometheus.prometheus.alertmanager + - grafana.grafana.grafana + + tasks: + - name: Copy Cert for prometheus + ansible.builtin.copy: + src: /certs/shadowman_cert.cer + dest: /certs/shadowman_cert.cer + owner: root + group: root + mode: '0644' + + - name: Copy Key for prometheus + ansible.builtin.copy: + src: /certs/shadowman_private.key + dest: /certs/shadowman_private.key + owner: root + group: root + mode: '0644' + + - name: Install httpd + ansible.builtin.package: + name: httpd + state: present + notify: HTTPD_running + + - name: Copy over httpd config + ansible.builtin.copy: + src: files/httpd.conf + dest: /etc/httpd/conf/httpd.conf + owner: root + group: root + mode: '0644' + + - name: Open Firewalld for prometheus + ansible.posix.firewalld: + port: 9090/tcp + permanent: true + state: enabled + notify: restart_firewalld + + - name: Open Firewalld for grafana + ansible.posix.firewalld: + port: 3000/tcp + permanent: true + state: enabled + notify: restart_firewalld + + - name: Open Firewalld for prometheus https + ansible.posix.firewalld: + port: 3000/tcp + permanent: true + state: enabled + notify: restart_firewalld + + - name: Open Firewalld for alertmanager + ansible.posix.firewalld: + port: 9093/tcp + permanent: true + state: enabled + notify: restart_firewalld + + handlers: + - name: restart_firewalld + ansible.builtin.service: + name: firewalld + state: restarted + + - name: HTTPD_running + ansible.builtin.service: + name: httpd + state: started + enabled: true diff --git a/files/httpd.conf b/files/httpd.conf new file mode 100644 index 0000000..0a9ba4f --- /dev/null +++ b/files/httpd.conf @@ -0,0 +1,106 @@ +ServerRoot "/etc/httpd" +Listen 80 +Include conf.modules.d/*.conf +User apache +Group apache +ServerAdmin root@localhost + + AllowOverride none + Require all denied + +DocumentRoot "/var/www/html" + + AllowOverride None + # Allow open access: + Require all granted + + + + Options Indexes FollowSymLinks + AllowOverride None + Require all granted + + + + DirectoryIndex index.html + + + + Require all denied + + +ErrorLog "logs/error_log" + +LogLevel warn + + + LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined + LogFormat "%h %l %u %t \"%r\" %>s %b" common + + + LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio + + + #CustomLog "logs/access_log" common + + CustomLog "logs/access_log" combined + + + + ScriptAlias /cgi-bin/ "/var/www/cgi-bin/" + + + + + AllowOverride None + Options None + Require all granted + + + + TypesConfig /etc/mime.types + + AddType application/x-compress .Z + AddType application/x-gzip .gz .tgz + + AddType text/html .shtml + AddOutputFilter INCLUDES .shtml + + +AddDefaultCharset UTF-8 + + + MIMEMagicFile conf/magic + + +EnableSendfile on + +IncludeOptional conf.d/*.conf + + + ProxyPass "http://localhost:9090" + ProxyPassReverse "http://localhost:9090" + + + + Redirect "/alerts" "/prometheus/alerts" + Redirect "/api" "/prometheus/api" + Redirect "/config" "/prometheus/config" + Redirect "/flags" "/prometheus/flags" + Redirect "/graph" "/prometheus/graph" + Redirect "/rules" "/prometheus/rules" + Redirect "/static" "/prometheus/static" + Redirect "/status" "/prometheus/status" + Redirect "/targets" "/prometheus/targets" + + +Listen 9200 + + ProxyPass /prometheus/ "http://localhost:9090" + ProxyPassReverse /prometheus/ "http://localhost:9090" + Redirect "/" "/prometheus/" + ServerName grafprom.shadowman.dev + SSLEngine on + SSLCertificateFile /certs/shadowman_cert.cer + SSLCertificateKeyFile /certs/shadowman_private.key + \ No newline at end of file