diff --git a/dashboards/lodestar_networking.json b/dashboards/lodestar_networking.json index 7a18f218db7b..8633faeb7668 100644 --- a/dashboards/lodestar_networking.json +++ b/dashboards/lodestar_networking.json @@ -1,12 +1,12 @@ { "__inputs": [ { - "description": "", - "label": "Prometheus", "name": "DS_PROMETHEUS", + "type": "datasource", + "label": "Prometheus", + "description": "", "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" + "pluginName": "Prometheus" } ], "annotations": { @@ -937,7 +937,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "(\n sum(rate(\n lodestar_network_worker_wire_events_on_worker_thread_latency_sum[$rate_interval]\n )) \n +\n sum(rate(\n lodestar_network_worker_wire_events_on_main_thread_latency_sum[$rate_interval]\n ))\n)\n/\n(\n sum(rate(\n lodestar_network_worker_wire_events_on_worker_thread_latency_count[$rate_interval]\n ))\n +\n sum(rate(\n lodestar_network_worker_wire_events_on_main_thread_latency_count[$rate_interval]\n ))\n)", + "expr": "(\n sum(rate(\n lodestar_network_worker_wire_events_on_worker_thread_latency_seconds_sum[$rate_interval]\n )) \n +\n sum(rate(\n lodestar_network_worker_wire_events_on_main_thread_latency_seconds_sum[$rate_interval]\n ))\n)\n/\n(\n sum(rate(\n lodestar_network_worker_wire_events_on_worker_thread_latency_seconds_count[$rate_interval]\n ))\n +\n sum(rate(\n lodestar_network_worker_wire_events_on_main_thread_latency_seconds_count[$rate_interval]\n ))\n)", "hide": false, "interval": "", "legendFormat": "Average", @@ -951,7 +951,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "avg(rate(lodestar_network_worker_wire_events_on_worker_thread_latency_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_worker_thread_latency_count[$rate_interval]))", + "expr": "avg(rate(lodestar_network_worker_wire_events_on_worker_thread_latency_seconds_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_worker_thread_latency_seconds_count[$rate_interval]))", "hide": false, "interval": "", "legendFormat": "Worker to Main", @@ -964,7 +964,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(lodestar_network_worker_wire_events_on_main_thread_latency_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_main_thread_latency_count[$rate_interval]))", + "expr": "avg(rate(lodestar_network_worker_wire_events_on_main_thread_latency_seconds_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_main_thread_latency_seconds_count[$rate_interval]))", "hide": false, "legendFormat": "Main to Worker", "range": true, @@ -1051,7 +1051,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "avg(rate(lodestar_network_worker_wire_events_on_main_thread_latency_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_main_thread_latency_count[$rate_interval]))", + "expr": "avg(rate(lodestar_network_worker_wire_events_on_main_thread_latency_seconds_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_main_thread_latency_seconds_count[$rate_interval]))", "hide": true, "interval": "", "legendFormat": "Average to Main", @@ -1065,7 +1065,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "rate(lodestar_network_worker_wire_events_on_main_thread_latency_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_main_thread_latency_count[$rate_interval])", + "expr": "rate(lodestar_network_worker_wire_events_on_main_thread_latency_seconds_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_main_thread_latency_seconds_count[$rate_interval])", "interval": "", "legendFormat": "{{eventName}}", "range": true, @@ -1077,7 +1077,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(lodestar_network_worker_wire_events_on_worker_thread_latency_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_worker_thread_latency_count[$rate_interval])", + "expr": "rate(lodestar_network_worker_wire_events_on_worker_thread_latency_seconds_sum[$rate_interval])/rate(lodestar_network_worker_wire_events_on_worker_thread_latency_seconds_count[$rate_interval])", "hide": false, "legendFormat": "{{eventName}}", "range": true, diff --git a/packages/beacon-node/src/metrics/metrics/lodestar.ts b/packages/beacon-node/src/metrics/metrics/lodestar.ts index 8b8ce0f0c2bc..5ffae34a9eeb 100644 --- a/packages/beacon-node/src/metrics/metrics/lodestar.ts +++ b/packages/beacon-node/src/metrics/metrics/lodestar.ts @@ -121,6 +121,12 @@ export function createLodestarMetrics( help: "Current count of pending items in reqRespBridgeReqCaller data structure", }), }, + networkWorkerWireEventsOnMainThreadLatency: register.histogram<"eventName">({ + name: "lodestar_network_worker_wire_events_on_main_thread_latency_seconds", + help: "Latency in seconds to transmit network events to main thread across worker port", + labelNames: ["eventName"], + buckets: [0.001, 0.003, 0.01, 0.03, 0.1], + }), regenQueue: { length: register.gauge({ diff --git a/packages/beacon-node/src/network/core/metrics.ts b/packages/beacon-node/src/network/core/metrics.ts index 78bc88d52fe7..e5ce0bede447 100644 --- a/packages/beacon-node/src/network/core/metrics.ts +++ b/packages/beacon-node/src/network/core/metrics.ts @@ -333,6 +333,8 @@ export function createNetworkCoreMetrics(register: RegistryMetricCreator) { }; } +export type NetworkCoreWorkerMetrics = ReturnType; + // eslint-disable-next-line @typescript-eslint/explicit-function-return-type export function getNetworkCoreWorkerMetrics(register: RegistryMetricCreator) { return { @@ -340,5 +342,11 @@ export function getNetworkCoreWorkerMetrics(register: RegistryMetricCreator) { name: "lodestar_network_worker_reqresp_bridge_caller_pending_count", help: "Current count of pending elements in respBridgeCaller", }), + networkWorkerWireEventsOnWorkerThreadLatency: register.histogram<"eventName">({ + name: "lodestar_network_worker_wire_events_on_worker_thread_latency_seconds", + help: "Latency in seconds to transmit network events to worker thread across parent port", + labelNames: ["eventName"], + buckets: [0.001, 0.003, 0.01, 0.03, 0.1], + }), }; } diff --git a/packages/beacon-node/src/network/core/networkCoreWorker.ts b/packages/beacon-node/src/network/core/networkCoreWorker.ts index a0c8ff22fe60..35303190a8f8 100644 --- a/packages/beacon-node/src/network/core/networkCoreWorker.ts +++ b/packages/beacon-node/src/network/core/networkCoreWorker.ts @@ -13,6 +13,9 @@ import {peerIdToString} from "../../util/peerId.js"; import {profileNodeJS} from "../../util/profile.js"; import {NetworkEventBus, NetworkEventData, networkEventDirection} from "../events.js"; import {wireEventsOnWorkerThread} from "../../util/workerEvents.js"; +import {getNetworkCoreWorkerMetrics} from "./metrics.js"; +import {NetworkWorkerApi, NetworkWorkerData} from "./types.js"; +import {NetworkCore} from "./networkCore.js"; import { NetworkWorkerThreadEventType, ReqRespBridgeEventBus, @@ -21,9 +24,6 @@ import { getReqRespBridgeRespEvents, reqRespBridgeEventDirection, } from "./events.js"; -import {getNetworkCoreWorkerMetrics} from "./metrics.js"; -import {NetworkCore} from "./networkCore.js"; -import {NetworkWorkerApi, NetworkWorkerData} from "./types.js"; // Cloned data from instantiation const workerData = worker.workerData as NetworkWorkerData; @@ -83,9 +83,9 @@ new AsyncIterableBridgeHandler(getReqRespBridgeReqEvents(reqRespBridgeEventBus), ); const reqRespBridgeRespCaller = new AsyncIterableBridgeCaller(getReqRespBridgeRespEvents(reqRespBridgeEventBus)); +const networkCoreWorkerMetrics = metricsRegister ? getNetworkCoreWorkerMetrics(metricsRegister) : null; // respBridgeCaller metrics -if (metricsRegister) { - const networkCoreWorkerMetrics = getNetworkCoreWorkerMetrics(metricsRegister); +if (networkCoreWorkerMetrics) { networkCoreWorkerMetrics.reqRespBridgeRespCallerPending.addCollect(() => { networkCoreWorkerMetrics.reqRespBridgeRespCallerPending.set(reqRespBridgeRespCaller.pendingCount); }); @@ -110,12 +110,14 @@ wireEventsOnWorkerThread( NetworkWorkerThreadEventType.networkEvent, events, parentPort, + networkCoreWorkerMetrics, networkEventDirection ); wireEventsOnWorkerThread( NetworkWorkerThreadEventType.reqRespBridgeEvents, reqRespBridgeEventBus, parentPort, + networkCoreWorkerMetrics, reqRespBridgeEventDirection ); diff --git a/packages/beacon-node/src/network/core/networkCoreWorkerHandler.ts b/packages/beacon-node/src/network/core/networkCoreWorkerHandler.ts index 73ca9e9c5fd0..8c944dd87d2d 100644 --- a/packages/beacon-node/src/network/core/networkCoreWorkerHandler.ts +++ b/packages/beacon-node/src/network/core/networkCoreWorkerHandler.ts @@ -75,12 +75,14 @@ export class WorkerNetworkCore implements INetworkCore { NetworkWorkerThreadEventType.networkEvent, modules.events, modules.worker as unknown as worker_threads.Worker, + modules.metrics, networkEventDirection ); wireEventsOnMainThread( NetworkWorkerThreadEventType.reqRespBridgeEvents, this.reqRespBridgeEventBus, modules.worker as unknown as worker_threads.Worker, + modules.metrics, reqRespBridgeEventDirection ); diff --git a/packages/beacon-node/src/util/workerEvents.ts b/packages/beacon-node/src/util/workerEvents.ts index cd61e6b95393..807bf7a30618 100644 --- a/packages/beacon-node/src/util/workerEvents.ts +++ b/packages/beacon-node/src/util/workerEvents.ts @@ -2,11 +2,16 @@ import {MessagePort, Worker} from "node:worker_threads"; import {Thread} from "@chainsafe/threads"; import {Logger} from "@lodestar/logger"; import {sleep} from "@lodestar/utils"; +import {Metrics} from "../metrics/metrics.js"; +import {NetworkCoreWorkerMetrics} from "../network/core/metrics.js"; import {StrictEventEmitterSingleArg} from "./strictEvents.js"; +const NANO_TO_SECOND_CONVERSION = 1e9; + export type WorkerBridgeEvent = { type: string; event: keyof EventData; + posted: [number, number]; data: EventData[keyof EventData]; }; @@ -27,6 +32,7 @@ export function wireEventsOnWorkerThread( mainEventName: string, events: StrictEventEmitterSingleArg, parentPort: MessagePort, + metrics: NetworkCoreWorkerMetrics | null, isWorkerToMain: {[K in keyof EventData]: EventDirection} ): void { // Subscribe to events from main thread @@ -37,6 +43,12 @@ export function wireEventsOnWorkerThread( // This check is not necessary but added for safety in case of improper implemented events isWorkerToMain[data.event] === EventDirection.mainToWorker ) { + const [sec, nanoSec] = process.hrtime(data.posted); + const networkWorkerLatency = sec + nanoSec / NANO_TO_SECOND_CONVERSION; + metrics?.networkWorkerWireEventsOnWorkerThreadLatency.observe( + {eventName: data.event as string}, + networkWorkerLatency + ); events.emit(data.event, data.data); } }); @@ -48,6 +60,7 @@ export function wireEventsOnWorkerThread( const workerEvent: WorkerBridgeEvent = { type: mainEventName, event: eventName, + posted: process.hrtime(), data, }; parentPort.postMessage(workerEvent); @@ -60,6 +73,7 @@ export function wireEventsOnMainThread( mainEventName: string, events: StrictEventEmitterSingleArg, worker: Pick, + metrics: Metrics | null, isWorkerToMain: {[K in keyof EventData]: EventDirection} ): void { // Subscribe to events from main thread @@ -70,6 +84,12 @@ export function wireEventsOnMainThread( // This check is not necessary but added for safety in case of improper implemented events isWorkerToMain[data.event] === EventDirection.workerToMain ) { + const [sec, nanoSec] = process.hrtime(data.posted); + const networkWorkerLatency = sec + nanoSec / NANO_TO_SECOND_CONVERSION; + metrics?.networkWorkerWireEventsOnMainThreadLatency.observe( + {eventName: data.event as string}, + networkWorkerLatency + ); events.emit(data.event, data.data); } }); @@ -81,6 +101,7 @@ export function wireEventsOnMainThread( const workerEvent: WorkerBridgeEvent = { type: mainEventName, event: eventName, + posted: process.hrtime(), data, }; worker.postMessage(workerEvent);