diff --git a/dashboards/lodestar_vm_host.json b/dashboards/lodestar_vm_host.json index bf9b71e01bf7..52e99637558c 100644 --- a/dashboards/lodestar_vm_host.json +++ b/dashboards/lodestar_vm_host.json @@ -148,7 +148,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(process_resident_memory_bytes{job=~\"beacon|validator\"})", + "expr": "sum(process_resident_memory_bytes{job=~\"$beacon_job|beacon|$validator_job|validator\"})", "hide": false, "interval": "", "legendFormat": "rss", @@ -162,7 +162,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(process_heap_bytes{job=~\"validator|beacon\"})", + "expr": "sum(process_heap_bytes{job=~\"$beacon_job|beacon|$validator_job|validator\"})", "hide": false, "interval": "", "legendFormat": "process_heap_bytes", @@ -176,7 +176,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(nodejs_heap_size_total_bytes) + sum(network_worker_nodejs_heap_size_total_bytes) + sum(discv5_worker_nodejs_heap_size_total_bytes)", + "expr": "(sum(nodejs_heap_size_total_bytes) + sum(discv5_worker_nodejs_heap_size_total_bytes) + sum(network_worker_nodejs_heap_size_total_bytes))\nor\n(sum(nodejs_heap_size_total_bytes) + sum(discv5_worker_nodejs_heap_size_total_bytes))", "hide": false, "interval": "", "legendFormat": "node allocated heap", @@ -190,7 +190,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(nodejs_heap_size_used_bytes) + sum(network_worker_nodejs_heap_size_used_bytes) + sum(discv5_worker_nodejs_heap_size_used_bytes)", + "expr": "(sum(nodejs_heap_size_used_bytes) + sum(discv5_worker_nodejs_heap_size_used_bytes) + sum(network_worker_nodejs_heap_size_used_bytes)) \nor\n(sum(nodejs_heap_size_used_bytes) + sum(discv5_worker_nodejs_heap_size_used_bytes)) ", "hide": false, "interval": "", "legendFormat": "node used heap", @@ -204,7 +204,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(nodejs_external_memory_bytes) + sum(network_worker_nodejs_external_memory_bytes) + sum(discv5_worker_nodejs_external_memory_bytes)", + "expr": "(sum(nodejs_external_memory_bytes) + sum(discv5_worker_nodejs_external_memory_bytes) + sum(network_worker_nodejs_external_memory_bytes))\nor\n(sum(nodejs_external_memory_bytes) + sum(discv5_worker_nodejs_external_memory_bytes))", "hide": false, "interval": "", "legendFormat": "node external memory", @@ -303,7 +303,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + network_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + discv5_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"}", + "expr": "(nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + discv5_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + network_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"})\nor\n(nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + discv5_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"})", "hide": false, "interval": "", "legendFormat": "external_memory", @@ -422,7 +422,7 @@ "refId": "B" } ], - "title": "Heap Allocations - Main Thread", + "title": "Main Thread - Heap Allocations", "type": "timeseries" }, { @@ -538,7 +538,7 @@ "refId": "C" } ], - "title": "GC pause time rate + reclaimed bytes - Main Thread", + "title": "Main Thread - GC pause time rate + reclaimed bytes", "type": "timeseries" }, { @@ -636,7 +636,7 @@ "refId": "B" } ], - "title": "Heap Allocations - Network Worker Thread", + "title": "Network Worker Thread - Heap Allocations", "type": "timeseries" }, { @@ -752,7 +752,7 @@ "refId": "C" } ], - "title": "GC pause time rate + reclaimed bytes - Network Worker Thread", + "title": "Network Worker Thread - GC pause time rate + reclaimed bytes", "type": "timeseries" }, { @@ -850,7 +850,7 @@ "refId": "B" } ], - "title": "Heap Allocations - Discv5 Worker Thread", + "title": "Discv5 Worker Thread - Heap Allocations", "type": "timeseries" }, { @@ -966,7 +966,7 @@ "refId": "C" } ], - "title": "GC pause time rate + reclaimed bytes - Discv5 Worker Thread", + "title": "Discv5 Worker Thread - GC pause time rate + reclaimed bytes", "type": "timeseries" }, { @@ -1042,8 +1042,8 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", "expr": "nodejs_heap_space_size_used_bytes{job=~\"$validator_job|validator\"}", + "editorMode": "code", "interval": "", "legendFormat": "{{space}}", "range": true, @@ -1064,7 +1064,7 @@ "refId": "B" } ], - "title": "Heap Allocations - Validator", + "title": "Validator - Heap Allocations", "type": "timeseries" }, { @@ -1180,7 +1180,7 @@ "refId": "C" } ], - "title": "GC pause time rate + reclaimed bytes - Validator", + "title": "Validator - GC pause time rate + reclaimed bytes", "type": "timeseries" }, { @@ -1330,9 +1330,21 @@ "legendFormat": "discv5_worker", "range": true, "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "nodejs_eventloop_lag_seconds{job=~\"$validator_job|validator\"}", + "interval": "", + "legendFormat": "main_thread", + "range": true, + "refId": "A" } ], - "title": "prom-client Event Loop Lag", + "title": "Event Loop Lag", "type": "timeseries" }, { @@ -1427,7 +1439,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg_over_time(nodejs_eventloop_lag_seconds{job=~\"beacon\"}[$rate_interval])", + "expr": "avg_over_time(nodejs_eventloop_lag_seconds{job=~\"$beacon_job|beacon\"}[$rate_interval])", "interval": "", "legendFormat": "main_thread", "range": true, @@ -1456,9 +1468,21 @@ "legendFormat": "discv5_worker", "range": true, "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg_over_time(nodejs_eventloop_lag_seconds{job=~\"$validator_job|validator\"}[$rate_interval])", + "interval": "", + "legendFormat": "main_thread", + "range": true, + "refId": "A" } ], - "title": "Average prom-client Event Loop Lag", + "title": "Average Event Loop Lag", "type": "timeseries" }, { @@ -1513,7 +1537,7 @@ "x": 0, "y": 51 }, - "id": 64, + "id": 6, "options": { "graph": {}, "legend": { @@ -1535,14 +1559,38 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "nodejs_active_requests or network_worker_nodejs_active_requests", + "expr": "nodejs_active_handles", "interval": "", - "legendFormat": "{{type}}", + "legendFormat": "main_thread_{{type}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "network_worker_nodejs_active_handles", + "hide": false, + "legendFormat": "network_worker_{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "discv5_worker_nodejs_active_handles", + "hide": false, + "legendFormat": "discv5_worker_{{type}}", + "range": true, + "refId": "C" } ], - "title": "Active requests", + "title": "Active Handles", "type": "timeseries" }, { @@ -1574,7 +1622,8 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "type": "linear" + "log": 2, + "type": "log" }, "showPoints": "never", "spanNulls": false, @@ -1587,9 +1636,25 @@ } }, "mappings": [], - "unit": "short" + "unit": "s" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "event loop lag" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-yellow", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, @@ -1597,7 +1662,7 @@ "x": 12, "y": 51 }, - "id": 6, + "id": 40, "options": { "graph": {}, "legend": { @@ -1619,9 +1684,9 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "nodejs_active_handles", + "expr": "nodejs_eventloop_lag_seconds{job=~\"$beacon_job|beacon\"}", "interval": "", - "legendFormat": "main_thread_{{type}}", + "legendFormat": "main_thread", "range": true, "refId": "A" }, @@ -1631,9 +1696,9 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "network_worker_nodejs_active_handles", + "expr": "network_worker_nodejs_eventloop_lag_seconds", "hide": false, - "legendFormat": "network_worker_{{type}}", + "legendFormat": "network_worker", "range": true, "refId": "B" }, @@ -1643,14 +1708,14 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "discv5_worker_nodejs_active_handles", + "expr": "discv5_worker_nodejs_eventloop_lag_seconds", "hide": false, - "legendFormat": "discv5_worker_{{type}}", + "legendFormat": "discv5_worker", "range": true, "refId": "C" } ], - "title": "Active Handles", + "title": "Event Loop Lag - (metric A) eventloop_lag_seconds", "type": "timeseries" }, { @@ -5222,6 +5287,6 @@ "timezone": "utc", "title": "Lodestar - VM + host", "uid": "lodestar_vm_host", - "version": 8, + "version": 31, "weekStart": "monday" } diff --git a/package.json b/package.json index b467c150a35f..55f4319a7ae7 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "lint:fix": "yarn lint --fix", "lint-docs": "prettier '**/*.md' --check", "lint-docs:fix": "prettier '**/*.md' --write", + "lint-dashboards": "scripts/validate-grafana-dashboards.sh", "check-build": "lerna run check-build", "check-readme": "lerna run check-readme", "check-types": "lerna run check-types --no-bail",