From c76d205e90595ad322750c4210a74f9b3a6bc648 Mon Sep 17 00:00:00 2001 From: Amin Moghaddam Date: Wed, 29 May 2024 19:19:00 +0200 Subject: [PATCH] fix(fortuna): Refactor and improve timestamp lag gauge (#1632) Set INF value for gauge if RPC is not accessible. This would trigger the alerts on the monitoring service. We will not do the same for other gauges (e.g. set balance to 0) since that's just duplicate alerts for the same underlying symptom. --- apps/fortuna/Cargo.lock | 2 +- apps/fortuna/Cargo.toml | 2 +- apps/fortuna/src/command/run.rs | 92 ++++++++++++++++++--------------- 3 files changed, 51 insertions(+), 45 deletions(-) diff --git a/apps/fortuna/Cargo.lock b/apps/fortuna/Cargo.lock index 43d8c03c68..69b42bcb7e 100644 --- a/apps/fortuna/Cargo.lock +++ b/apps/fortuna/Cargo.lock @@ -1488,7 +1488,7 @@ dependencies = [ [[package]] name = "fortuna" -version = "6.0.0" +version = "6.0.1" dependencies = [ "anyhow", "axum", diff --git a/apps/fortuna/Cargo.toml b/apps/fortuna/Cargo.toml index b9d5f8ae75..41294ccb19 100644 --- a/apps/fortuna/Cargo.toml +++ b/apps/fortuna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fortuna" -version = "6.0.0" +version = "6.0.1" edition = "2021" [dependencies] diff --git a/apps/fortuna/src/command/run.rs b/apps/fortuna/src/command/run.rs index 44be17cc77..3e8c9f162f 100644 --- a/apps/fortuna/src/command/run.rs +++ b/apps/fortuna/src/command/run.rs @@ -301,6 +301,50 @@ pub struct ChainLabel { pub chain_id: String, } + +#[tracing::instrument(name = "block_timestamp_lag", skip_all, fields(chain_id = chain_id))] +pub async fn check_block_timestamp_lag( + chain_id: String, + chain_config: EthereumConfig, + metrics: Family, +) { + let provider = match Provider::::try_from(&chain_config.geth_rpc_addr) { + Ok(r) => r, + Err(e) => { + tracing::error!("Failed to create provider for chain id - {:?}", e); + return; + } + }; + + const INF_LAG: i64 = 1000000; // value that definitely triggers an alert + let lag = match provider.get_block(BlockNumber::Latest).await { + Ok(block) => match block { + Some(block) => { + let block_timestamp = block.timestamp; + let server_timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + let lag: i64 = (server_timestamp as i64) - (block_timestamp.as_u64() as i64); + lag + } + None => { + tracing::error!("Block is None"); + INF_LAG + } + }, + Err(e) => { + tracing::error!("Failed to get block - {:?}", e); + INF_LAG + } + }; + metrics + .get_or_create(&ChainLabel { + chain_id: chain_id.clone(), + }) + .set(lag); +} + /// Tracks the difference between the server timestamp and the latest block timestamp for each chain pub async fn track_block_timestamp_lag(config: Config, metrics_registry: Arc>) { let metrics = Family::::default(); @@ -311,49 +355,11 @@ pub async fn track_block_timestamp_lag(config: Config, metrics_registry: Arc::try_from(&chain_config.geth_rpc_addr) { - Ok(r) => r, - Err(e) => { - tracing::error!( - "Failed to create provider for chain id {} - {:?}", - &chain_id, - e - ); - return; - } - }; - - match provider.get_block(BlockNumber::Latest).await { - Ok(b) => { - if let Some(block) = b { - let block_timestamp = block.timestamp; - let server_timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(); - let lag: i64 = - (server_timestamp as i64) - (block_timestamp.as_u64() as i64); - - metrics - .get_or_create(&ChainLabel { - chain_id: chain_id.clone(), - }) - .set(lag); - } - } - Err(e) => { - tracing::error!("Failed to get block for chain id {} - {:?}", &chain_id, e); - } - }; - }); + spawn(check_block_timestamp_lag( + chain_id.clone(), + chain_config.clone(), + metrics.clone(), + )); } time::sleep(TRACK_INTERVAL).await;