Skip to content

Commit

Permalink
feat(dashboards): memory panel updates (#5858)
Browse files Browse the repository at this point in the history
* feat: up worker maxYoungGenerationSizeMb to 32

* feat: up worker maxYoungGenerationSizeMb to 64

* feat(dashboards): update memory panels on vm/host

* feat(network-worker): update new space to 512mb

* fix(dashboard): add newline to end of file

* fix(dashboard): lint vm

* fix: dashboard lint

* feat(network-worker): maxYoungGenerationSizeMb to 128mb

* fix(beacon-node): revert worker memory changes

* fix(dashboards): show metrics if worker is enable or if not

* fix(dashboards): remove duplicates

* chore: lint dashboards
  • Loading branch information
matthewkeil authored Aug 7, 2023
1 parent a7466f5 commit 0401a60
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 34 deletions.
133 changes: 99 additions & 34 deletions dashboards/lodestar_vm_host.json
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(process_resident_memory_bytes{job=~\"beacon|validator\"})",
"expr": "sum(process_resident_memory_bytes{job=~\"$beacon_job|beacon|$validator_job|validator\"})",
"hide": false,
"interval": "",
"legendFormat": "rss",
Expand All @@ -162,7 +162,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(process_heap_bytes{job=~\"validator|beacon\"})",
"expr": "sum(process_heap_bytes{job=~\"$beacon_job|beacon|$validator_job|validator\"})",
"hide": false,
"interval": "",
"legendFormat": "process_heap_bytes",
Expand All @@ -176,7 +176,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(nodejs_heap_size_total_bytes) + sum(network_worker_nodejs_heap_size_total_bytes) + sum(discv5_worker_nodejs_heap_size_total_bytes)",
"expr": "(sum(nodejs_heap_size_total_bytes) + sum(discv5_worker_nodejs_heap_size_total_bytes) + sum(network_worker_nodejs_heap_size_total_bytes))\nor\n(sum(nodejs_heap_size_total_bytes) + sum(discv5_worker_nodejs_heap_size_total_bytes))",
"hide": false,
"interval": "",
"legendFormat": "node allocated heap",
Expand All @@ -190,7 +190,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(nodejs_heap_size_used_bytes) + sum(network_worker_nodejs_heap_size_used_bytes) + sum(discv5_worker_nodejs_heap_size_used_bytes)",
"expr": "(sum(nodejs_heap_size_used_bytes) + sum(discv5_worker_nodejs_heap_size_used_bytes) + sum(network_worker_nodejs_heap_size_used_bytes)) \nor\n(sum(nodejs_heap_size_used_bytes) + sum(discv5_worker_nodejs_heap_size_used_bytes)) ",
"hide": false,
"interval": "",
"legendFormat": "node used heap",
Expand All @@ -204,7 +204,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(nodejs_external_memory_bytes) + sum(network_worker_nodejs_external_memory_bytes) + sum(discv5_worker_nodejs_external_memory_bytes)",
"expr": "(sum(nodejs_external_memory_bytes) + sum(discv5_worker_nodejs_external_memory_bytes) + sum(network_worker_nodejs_external_memory_bytes))\nor\n(sum(nodejs_external_memory_bytes) + sum(discv5_worker_nodejs_external_memory_bytes))",
"hide": false,
"interval": "",
"legendFormat": "node external memory",
Expand Down Expand Up @@ -303,7 +303,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + network_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + discv5_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"}",
"expr": "(nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + discv5_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + network_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"})\nor\n(nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"} + discv5_worker_nodejs_external_memory_bytes{job=~\"$beacon_job|beacon\"})",
"hide": false,
"interval": "",
"legendFormat": "external_memory",
Expand Down Expand Up @@ -422,7 +422,7 @@
"refId": "B"
}
],
"title": "Heap Allocations - Main Thread",
"title": "Main Thread - Heap Allocations",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -538,7 +538,7 @@
"refId": "C"
}
],
"title": "GC pause time rate + reclaimed bytes - Main Thread",
"title": "Main Thread - GC pause time rate + reclaimed bytes",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -636,7 +636,7 @@
"refId": "B"
}
],
"title": "Heap Allocations - Network Worker Thread",
"title": "Network Worker Thread - Heap Allocations",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -752,7 +752,7 @@
"refId": "C"
}
],
"title": "GC pause time rate + reclaimed bytes - Network Worker Thread",
"title": "Network Worker Thread - GC pause time rate + reclaimed bytes",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -850,7 +850,7 @@
"refId": "B"
}
],
"title": "Heap Allocations - Discv5 Worker Thread",
"title": "Discv5 Worker Thread - Heap Allocations",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -966,7 +966,7 @@
"refId": "C"
}
],
"title": "GC pause time rate + reclaimed bytes - Discv5 Worker Thread",
"title": "Discv5 Worker Thread - GC pause time rate + reclaimed bytes",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -1042,8 +1042,8 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "nodejs_heap_space_size_used_bytes{job=~\"$validator_job|validator\"}",
"editorMode": "code",
"interval": "",
"legendFormat": "{{space}}",
"range": true,
Expand All @@ -1064,7 +1064,7 @@
"refId": "B"
}
],
"title": "Heap Allocations - Validator",
"title": "Validator - Heap Allocations",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -1180,7 +1180,7 @@
"refId": "C"
}
],
"title": "GC pause time rate + reclaimed bytes - Validator",
"title": "Validator - GC pause time rate + reclaimed bytes",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -1330,9 +1330,21 @@
"legendFormat": "discv5_worker",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "nodejs_eventloop_lag_seconds{job=~\"$validator_job|validator\"}",
"interval": "",
"legendFormat": "main_thread",
"range": true,
"refId": "A"
}
],
"title": "prom-client Event Loop Lag",
"title": "Event Loop Lag",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -1427,7 +1439,7 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "avg_over_time(nodejs_eventloop_lag_seconds{job=~\"beacon\"}[$rate_interval])",
"expr": "avg_over_time(nodejs_eventloop_lag_seconds{job=~\"$beacon_job|beacon\"}[$rate_interval])",
"interval": "",
"legendFormat": "main_thread",
"range": true,
Expand Down Expand Up @@ -1456,9 +1468,21 @@
"legendFormat": "discv5_worker",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "avg_over_time(nodejs_eventloop_lag_seconds{job=~\"$validator_job|validator\"}[$rate_interval])",
"interval": "",
"legendFormat": "main_thread",
"range": true,
"refId": "A"
}
],
"title": "Average prom-client Event Loop Lag",
"title": "Average Event Loop Lag",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -1513,7 +1537,7 @@
"x": 0,
"y": 51
},
"id": 64,
"id": 6,
"options": {
"graph": {},
"legend": {
Expand All @@ -1535,14 +1559,38 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "nodejs_active_requests or network_worker_nodejs_active_requests",
"expr": "nodejs_active_handles",
"interval": "",
"legendFormat": "{{type}}",
"legendFormat": "main_thread_{{type}}",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "network_worker_nodejs_active_handles",
"hide": false,
"legendFormat": "network_worker_{{type}}",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "discv5_worker_nodejs_active_handles",
"hide": false,
"legendFormat": "discv5_worker_{{type}}",
"range": true,
"refId": "C"
}
],
"title": "Active requests",
"title": "Active Handles",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -1574,7 +1622,8 @@
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
"log": 2,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
Expand All @@ -1587,17 +1636,33 @@
}
},
"mappings": [],
"unit": "short"
"unit": "s"
},
"overrides": []
"overrides": [
{
"matcher": {
"id": "byName",
"options": "event loop lag"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "dark-yellow",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 51
},
"id": 6,
"id": 40,
"options": {
"graph": {},
"legend": {
Expand All @@ -1619,9 +1684,9 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "nodejs_active_handles",
"expr": "nodejs_eventloop_lag_seconds{job=~\"$beacon_job|beacon\"}",
"interval": "",
"legendFormat": "main_thread_{{type}}",
"legendFormat": "main_thread",
"range": true,
"refId": "A"
},
Expand All @@ -1631,9 +1696,9 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "network_worker_nodejs_active_handles",
"expr": "network_worker_nodejs_eventloop_lag_seconds",
"hide": false,
"legendFormat": "network_worker_{{type}}",
"legendFormat": "network_worker",
"range": true,
"refId": "B"
},
Expand All @@ -1643,14 +1708,14 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "discv5_worker_nodejs_active_handles",
"expr": "discv5_worker_nodejs_eventloop_lag_seconds",
"hide": false,
"legendFormat": "discv5_worker_{{type}}",
"legendFormat": "discv5_worker",
"range": true,
"refId": "C"
}
],
"title": "Active Handles",
"title": "Event Loop Lag - (metric A) eventloop_lag_seconds",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -5222,6 +5287,6 @@
"timezone": "utc",
"title": "Lodestar - VM + host",
"uid": "lodestar_vm_host",
"version": 8,
"version": 31,
"weekStart": "monday"
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"lint:fix": "yarn lint --fix",
"lint-docs": "prettier '**/*.md' --check",
"lint-docs:fix": "prettier '**/*.md' --write",
"lint-dashboards": "scripts/validate-grafana-dashboards.sh",
"check-build": "lerna run check-build",
"check-readme": "lerna run check-readme",
"check-types": "lerna run check-types --no-bail",
Expand Down

0 comments on commit 0401a60

Please sign in to comment.