Skip to content

Commit

Permalink
use new collection for prom graf
Browse files Browse the repository at this point in the history
  • Loading branch information
adworjan committed Nov 8, 2023
1 parent 1da0dbe commit 77dc583
Show file tree
Hide file tree
Showing 4 changed files with 338 additions and 11 deletions.
1 change: 1 addition & 0 deletions .ansible-sign/sha256sum.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ c8066ae4c79ca812f0abde5b42d4a0bf954a60c88b3eab2149d75d98b567c931 Elastic_restar
936cb966490bb452f91f2cb5b0821ef31c4bc8e62cf2c57a2c11640b60db7e18 MANIFEST.in
2e796c430ae2e8f0d061770ae1f58b19aaa8aec077388a73051dd71a316c4218 Nodeexporter.yml
5d7e02fa3a7a73509e6626d581883804627acec2859ded3b934418dce963f6be Promgraf.yml
7f3d494d3bdd71f457c1c37711ab3da77b719e1bd8b8d298234110e3b243d108 Promgraf_collection.yml
798346886b7ec801a19ed365c33f092e2d5dfb564c034ae886bc51785c89b232 README.md
88223aecf0ee1bfa38a215a481e1dd48054d043a7b52b9fd81243f9699d6142c collections/requirements.yml
bcc7a97fdb676c3f94875a674a87d25cd286cabba5820a5bb0a3eb228951b568 email.yml
Expand Down
22 changes: 11 additions & 11 deletions .ansible-sign/sha256sum.txt.sig
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
-----BEGIN PGP SIGNATURE-----

iQGzBAABCAAdFiEE/bJvyFHKKJdaZDOLTiIiIXrUDNEFAmUxcnEACgkQTiIiIXrU
DNFy5QwAn/Yc1QfA/3ttI/71RG3fe+YYAUjcRHDNRsk3OOJ0beuZcOvin76ntexp
zWNIcKhz64fGTZqSFblmknSquBi9/5XyqPetfjJH0iNupx7Ix07WTKYPhjGGY/U5
J2jacekjSiOVY7girnDo8hNC8bqilMLRI4wVw87KnC0bTAXkjHS7rjlBr86qSp2u
NO04sTI970rk5AjYWM49dBTyj8S+0vUPm2HbCD065PqQb2fuamA35b1gP1PJ/ZyV
lwRfGyWxzFfh+6oyGV2ZRk9EIHyUvTamP4Yh8JpF7Hxla+0e7yP9QFORyUEgZqBc
0xODFCIuRnWBQjNzwS6inCnKyOqA00I+QyZNnUbvSdyTFnG0YgPudP/kf3HHYFP+
cuDMqB4RarsO9GYWHF9fg5Hx8nGC/XJeZ3ca6+eg05/NNAdASTdKyw1hr2AcFXQn
JFFD1n28UEIUEFo/rzTDJT4883tY7ep9lXnVucEwvJD0mAmaJQWNkUxeZWo0EfNH
qjyauLqH
=BsNT
iQGzBAABCAAdFiEE/bJvyFHKKJdaZDOLTiIiIXrUDNEFAmVLwMIACgkQTiIiIXrU
DNH8vQwAhXmsKVlOUb17c4NsnJM7l8mydX4jfOPqqCMTyaoLPMiXy5EsEbQSNJDS
epxCIu9yp1anX0GSfHyU8+H0Rf5lq9Jq/xdeKjTYuY+9/a772hG7BuKhggtoj7nH
q+68k27embvExSJKcgTWhT+RnhRQj4ZY0V8I4S+zS8lvLqO8UwGTAUNH3DzC3JlX
zV5ZcfYP4OKzvA3vMD1EadZoDBx5WQ7sBcTT8x6QDNTGALA0+rZ6fveSlPmulz23
AiaX01e82mIvWAnLr3ZB3oxjEWivDNoqJ3w5c2r0vk2drvV/GWsA9Sy559ZUOgjY
/5eWbQnEn+rq5a7S0xvBGz+/G2FbvcdUD4BghmXmZ6bj1WcPmn0Hxuh8As8dF+pR
6QNfsVwBkT3vKEZeMo9AMTKnh4yvAEhuse4vEGOrre03jOFXVGD45KGJELyZgGNl
NJu7wR6f1evuqdn5VaX9Q++UosYXHJVBgtXz/iFQHwijQ9NNLQVQf62G0uYwfF7l
XAX74rPk
=MWVS
-----END PGP SIGNATURE-----
220 changes: 220 additions & 0 deletions Promgraf_collection.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
---
- name: Install Prometheus and Grafana
hosts: all
vars:
prometheus_alertmanager_config:
- static_configs:
- targets: ["promgraf.shadowman.dev:9093"]
prometheus_global:
scrape_interval: 5m
scrape_timeout: 10s
evaluation_interval: "{{ tower_scrape }}"
prometheus_scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: '{{ inventory_hostname }}:9090'
- job_name: 'tower metrics'
metrics_path: /api/v2/metrics/
scrape_interval: '{{ tower_scrape }}'
scheme: https
bearer_token: "{{ bearer_token }}"
static_configs:
- targets: 'tower1.shadowman.dev:443'
- job_name: 'tower1.shadowman.dev'
scrape_interval: '{{ tower_scrape }}'
static_configs:
- targets: 'tower1.shadowman.dev:9100'
- job_name: 'tower2.shadowman.dev'
scrape_interval: '{{ tower_scrape }}'
static_configs:
- targets: 'tower2.shadowman.dev:9100'
- job_name: 'rhel8.shadowman.dev'
scrape_interval: '{{ tower_scrape }}'
static_configs:
- targets: 'rhel8.shadowman.dev:9100'
prometheus_alert_rules: # noqa yaml[line-length] # noqa line-length
- groups:
- name: selinux-rules
rules:
- alert: SELinuxDisabled
expr: node_selinux_current_mode == 0
for: '{{ tower_scrape }}'
labels:
severity: critical
annotations:
description: 'SELINUX Disabled for {% raw %}{{ $labels.job }}{% endraw %}.'
summary: 'SELINUX Disabled (instance: {% raw %}{{ $labels.job }}{% endraw %})'
- groups:
- name: memory-rules
rules:
- alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: '{{ tower_scrape }}'
labels:
severity: warning
annotations:
summary: 'Host out of memory (instance: {% raw %}{{ $labels.job }}{% endraw %})'
description: 'Node memory is filling up (< 10% left) VALUE = {% raw %}{{ $value }}{% endraw %}'
- alert: HostTooMuchMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 > 90
for: '{{ tower_scrape }}'
labels:
severity: warning
annotations:
summary: 'Host too much free memory (instance: {% raw %}{{ $labels.job }}{% endraw %})'
description: 'Node memory is too free (> 90% left) VALUE = {% raw %}{{ $value }}{% endraw %}'
- groups:
- name: exporter-rules
rules:
- alert: ExporterDown
expr: up == 0
for: '{{ tower_scrape }}'
labels:
severity: critical
annotations:
description: 'Metrics exporter service for {% raw %}{{ $labels.job }}{% endraw %} running on {% raw %}{{ $labels.instance }}{% endraw %} has been down for more than 5 minutes.'
summary: 'Exporter down (instance: {% raw %}{{ $labels.job }}{% endraw %})'
- groups:
- name: disk-rules
rules:
- alert: HostOutOfDiskSpace
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10
for: '{{ tower_scrape }}'
labels:
severity: warning
annotations:
summary: 'Host out of disk space (instance: {% raw %}{{ $labels.job }}{% endraw %})'
description: 'Disk is almost full (< 10% left) VALUE = {% raw %}{{ $value }}{% endraw %}'
- groups:
- name: cpu-rules
rules:
- alert: HostHighCpuLoad
expr: 100 - (avg by(instance,job) (rate(node_cpu_seconds_total{mode="idle"}[{{ rate_number }}])) * 100) > {{ cpuload }}
for: '{{ tower_scrape }}'
labels:
severity: critical
annotations:
summary: 'Host high CPU load (instance: {% raw %}{{ $labels.job }}{% endraw %})'
description: 'CPU load is > {{ cpuload }}% VALUE = {% raw %}{{ $value }}{% endraw %}'
- alert: HostLowCpuLoad
expr: 100 - (avg by(instance,job) (rate(node_cpu_seconds_total{mode="idle"}[{{ rate_number }}])) * 100) < .1
for: '{{ tower_scrape }}'
labels:
severity: warning
annotations:
summary: 'Host low CPU load (instance: {% raw %}{{ $labels.job }}{% endraw %})'
description: 'CPU load is < .1% VALUE = {% raw %}{{ $value }}{% endraw %}'
alertmanager_receivers:
- name: snow
webhook_configs:
- url: "http://eda.shadowman.dev:8000/endpoint"
send_resolved: false
alertmanager_route:
group_by: ['instance', 'alert']
group_wait: 5s
group_interval: 10s
repeat_interval: 3h
receiver: 'snow'
grafana_server:
protocol: https
cert_key: "/etc/grafana/shadowman_private.key"
cert_file: "/etc/grafana/shadowman_cert.cer"
enforce_domain: false
enable_gzip: false
static_root_path: public
router_logging: false
serve_from_sub_path: false

pre_tasks:

- name: Copy Cert for grafana
ansible.builtin.copy:
src: /certs/shadowman_cert.cer
dest: /etc/grafana/shadowman_cert.cer
owner: grafana
group: grafana
mode: '0644'

- name: Copy Key for grafana
ansible.builtin.copy:
src: /certs/shadowman_private.key
dest: /etc/grafana/shadowman_private.key
owner: grafana
group: grafana
mode: '0644'

roles:
- prometheus.prometheus.prometheus

Check failure on line 147 in Promgraf_collection.yml

View workflow job for this annotation

GitHub Actions / build

syntax-check[specific]

the role 'prometheus.prometheus.prometheus' was not found in /github/workspace/roles:/github/home/.cache/ansible-compat/21a323/roles:/github/workspace/roles:/github/home/.ansible/roles:/usr/share/ansible/roles:/etc/ansible/roles:/github/workspace
- prometheus.prometheus.alertmanager
- grafana.grafana.grafana

tasks:
- name: Copy Cert for prometheus
ansible.builtin.copy:
src: /certs/shadowman_cert.cer
dest: /certs/shadowman_cert.cer
owner: root
group: root
mode: '0644'

- name: Copy Key for prometheus
ansible.builtin.copy:
src: /certs/shadowman_private.key
dest: /certs/shadowman_private.key
owner: root
group: root
mode: '0644'

- name: Install httpd
ansible.builtin.package:
name: httpd
state: present
notify: HTTPD_running

- name: Copy over httpd config
ansible.builtin.copy:
src: files/httpd.conf
dest: /etc/httpd/conf/httpd.conf
owner: root
group: root
mode: '0644'

- name: Open Firewalld for prometheus
ansible.posix.firewalld:
port: 9090/tcp
permanent: true
state: enabled
notify: restart_firewalld

- name: Open Firewalld for grafana
ansible.posix.firewalld:
port: 3000/tcp
permanent: true
state: enabled
notify: restart_firewalld

- name: Open Firewalld for prometheus https
ansible.posix.firewalld:
port: 3000/tcp
permanent: true
state: enabled
notify: restart_firewalld

- name: Open Firewalld for alertmanager
ansible.posix.firewalld:
port: 9093/tcp
permanent: true
state: enabled
notify: restart_firewalld

handlers:
- name: restart_firewalld
ansible.builtin.service:
name: firewalld
state: restarted

- name: HTTPD_running
ansible.builtin.service:
name: httpd
state: started
enabled: true
106 changes: 106 additions & 0 deletions files/httpd.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
ServerRoot "/etc/httpd"
Listen 80
Include conf.modules.d/*.conf
User apache
Group apache
ServerAdmin root@localhost
<Directory />
AllowOverride none
Require all denied
</Directory>
DocumentRoot "/var/www/html"
<Directory "/var/www">
AllowOverride None
# Allow open access:
Require all granted
</Directory>

<Directory "/var/www/html">
Options Indexes FollowSymLinks
AllowOverride None
Require all granted
</Directory>

<IfModule dir_module>
DirectoryIndex index.html
</IfModule>

<Files ".ht*">
Require all denied
</Files>

ErrorLog "logs/error_log"

LogLevel warn

<IfModule log_config_module>
LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
LogFormat "%h %l %u %t \"%r\" %>s %b" common

<IfModule logio_module>
LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio
</IfModule>

#CustomLog "logs/access_log" common

CustomLog "logs/access_log" combined
</IfModule>

<IfModule alias_module>
ScriptAlias /cgi-bin/ "/var/www/cgi-bin/"

</IfModule>

<Directory "/var/www/cgi-bin">
AllowOverride None
Options None
Require all granted
</Directory>

<IfModule mime_module>
TypesConfig /etc/mime.types

AddType application/x-compress .Z
AddType application/x-gzip .gz .tgz

AddType text/html .shtml
AddOutputFilter INCLUDES .shtml
</IfModule>

AddDefaultCharset UTF-8

<IfModule mime_magic_module>
MIMEMagicFile conf/magic
</IfModule>

EnableSendfile on

IncludeOptional conf.d/*.conf

<Location "/prometheus">
ProxyPass "http://localhost:9090"
ProxyPassReverse "http://localhost:9090"
</Location>

<Location "/">
Redirect "/alerts" "/prometheus/alerts"
Redirect "/api" "/prometheus/api"
Redirect "/config" "/prometheus/config"
Redirect "/flags" "/prometheus/flags"
Redirect "/graph" "/prometheus/graph"
Redirect "/rules" "/prometheus/rules"
Redirect "/static" "/prometheus/static"
Redirect "/status" "/prometheus/status"
Redirect "/targets" "/prometheus/targets"
</Location>

Listen 9200
<VirtualHost *:9200>
ProxyPass /prometheus/ "http://localhost:9090"
ProxyPassReverse /prometheus/ "http://localhost:9090"
Redirect "/" "/prometheus/"
ServerName grafprom.shadowman.dev
SSLEngine on
SSLCertificateFile /certs/shadowman_cert.cer
SSLCertificateKeyFile /certs/shadowman_private.key
</VirtualHost>

0 comments on commit 77dc583

Please sign in to comment.