diff --git a/Cargo.lock b/Cargo.lock index 030130f2427..13279cb0fb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10568,6 +10568,8 @@ version = "0.0.0" dependencies = [ "axum", "hyper 0.14.30", + "metrics 0.21.1", + "metrics-exporter-prometheus", "papyrus_config", "pretty_assertions", "serde", diff --git a/crates/starknet_monitoring_endpoint/Cargo.toml b/crates/starknet_monitoring_endpoint/Cargo.toml index 775c7ef0eea..f4b462a3857 100644 --- a/crates/starknet_monitoring_endpoint/Cargo.toml +++ b/crates/starknet_monitoring_endpoint/Cargo.toml @@ -14,6 +14,7 @@ workspace = true [dependencies] axum.workspace = true hyper = { workspace = true } +metrics-exporter-prometheus.workspace = true papyrus_config.workspace = true serde.workspace = true starknet_sequencer_infra.workspace = true @@ -23,6 +24,7 @@ tracing.workspace = true validator.workspace = true [dev-dependencies] +metrics.workspace = true pretty_assertions.workspace = true tokio.workspace = true tower.workspace = true diff --git a/crates/starknet_monitoring_endpoint/src/monitoring_endpoint.rs b/crates/starknet_monitoring_endpoint/src/monitoring_endpoint.rs index c14153ec02a..cae2f6fd1c4 100644 --- a/crates/starknet_monitoring_endpoint/src/monitoring_endpoint.rs +++ b/crates/starknet_monitoring_endpoint/src/monitoring_endpoint.rs @@ -2,9 +2,11 @@ use std::any::type_name; use std::net::SocketAddr; use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; use axum::routing::get; use axum::{async_trait, Router, Server}; use hyper::Error; +use metrics_exporter_prometheus::PrometheusHandle; use starknet_sequencer_infra::component_definitions::ComponentStarter; use starknet_sequencer_infra::errors::ComponentError; use tracing::{info, instrument}; @@ -19,15 +21,17 @@ pub(crate) const MONITORING_PREFIX: &str = "monitoring"; pub(crate) const ALIVE: &str = "alive"; pub(crate) const READY: &str = "ready"; pub(crate) const VERSION: &str = "nodeVersion"; +pub(crate) const METRICS: &str = "metrics"; pub struct MonitoringEndpoint { config: MonitoringEndpointConfig, version: &'static str, + prometheus_handle: Option, } impl MonitoringEndpoint { pub fn new(config: MonitoringEndpointConfig, version: &'static str) -> Self { - MonitoringEndpoint { config, version } + MonitoringEndpoint { config, version, prometheus_handle: None } } #[instrument( @@ -41,13 +45,13 @@ impl MonitoringEndpoint { let MonitoringEndpointConfig { ip, port } = self.config; let endpoint_addr = SocketAddr::new(ip, port); - let app = self.app(); + let app = self.app(self.prometheus_handle.clone()); info!("MonitoringEndpoint running using socket: {}", endpoint_addr); Server::bind(&endpoint_addr).serve(app.into_make_service()).await } - fn app(&self) -> Router { + fn app(&self, prometheus_handle: Option) -> Router { let version = self.version.to_string(); Router::new() @@ -63,6 +67,10 @@ impl MonitoringEndpoint { format!("/{MONITORING_PREFIX}/{VERSION}").as_str(), get(move || async { version }), ) + .route( + format!("/{MONITORING_PREFIX}/{METRICS}").as_str(), + get(move || metrics(prometheus_handle)), + ) } } @@ -80,3 +88,14 @@ impl ComponentStarter for MonitoringEndpoint { self.run().await.map_err(|_| ComponentError::InternalComponentError) } } + +/// Returns prometheus metrics. +/// In case the node doesn’t collect metrics returns an empty response with status code 405: method +/// not allowed. +#[instrument(level = "debug", ret, skip(prometheus_handle))] +async fn metrics(prometheus_handle: Option) -> Response { + match prometheus_handle { + Some(handle) => handle.render().into_response(), + None => StatusCode::METHOD_NOT_ALLOWED.into_response(), + } +} diff --git a/crates/starknet_monitoring_endpoint/src/monitoring_endpoint_test.rs b/crates/starknet_monitoring_endpoint/src/monitoring_endpoint_test.rs index 76c521cfc5a..b4a28f76455 100644 --- a/crates/starknet_monitoring_endpoint/src/monitoring_endpoint_test.rs +++ b/crates/starknet_monitoring_endpoint/src/monitoring_endpoint_test.rs @@ -5,6 +5,8 @@ use axum::response::Response; use axum::Router; use hyper::body::to_bytes; use hyper::Client; +use metrics::{absolute_counter, describe_counter, register_counter}; +use metrics_exporter_prometheus::PrometheusBuilder; use pretty_assertions::assert_eq; use tokio::spawn; use tokio::task::yield_now; @@ -32,7 +34,7 @@ async fn request_app(app: Router, method: &str) -> Response { #[tokio::test] async fn test_node_version() { - let response = request_app(setup_monitoring_endpoint().app(), VERSION).await; + let response = request_app(setup_monitoring_endpoint().app(None), VERSION).await; assert_eq!(response.status(), StatusCode::OK); let body = to_bytes(response.into_body()).await.unwrap(); @@ -41,16 +43,47 @@ async fn test_node_version() { #[tokio::test] async fn test_alive() { - let response = request_app(setup_monitoring_endpoint().app(), ALIVE).await; + let response = request_app(setup_monitoring_endpoint().app(None), ALIVE).await; assert_eq!(response.status(), StatusCode::OK); } #[tokio::test] async fn test_ready() { - let response = request_app(setup_monitoring_endpoint().app(), READY).await; + let response = request_app(setup_monitoring_endpoint().app(None), READY).await; assert_eq!(response.status(), StatusCode::OK); } +#[tokio::test] +async fn with_metrics() { + let app = + setup_monitoring_endpoint().app(Some(PrometheusBuilder::new().install_recorder().unwrap())); + + // Register a metric. + let metric_name = "metric_name"; + let metric_help = "metric_help"; + let metric_value = 8224; + register_counter!(metric_name); + describe_counter!(metric_name, metric_help); + absolute_counter!(metric_name, metric_value); + + let response = request_app(app, crate::monitoring_endpoint::METRICS).await; + assert_eq!(response.status(), StatusCode::OK); + let body_bytes = hyper::body::to_bytes(response.into_body()).await.unwrap(); + let body_string = String::from_utf8(body_bytes.to_vec()).unwrap(); + let expected_prefix = format!( + "# HELP {metric_name} {metric_help}\n# TYPE {metric_name} counter\n{metric_name} \ + {metric_value}\n\n" + ); + assert!(body_string.starts_with(&expected_prefix)); +} + +#[tokio::test] +async fn without_metrics() { + let app = setup_monitoring_endpoint().app(None); + let response = request_app(app, crate::monitoring_endpoint::METRICS).await; + assert_eq!(response.status(), StatusCode::METHOD_NOT_ALLOWED); +} + #[tokio::test] async fn test_endpoint_as_server() { spawn(async move { setup_monitoring_endpoint().run().await });