Skip to content

Commit

Permalink
fix(fortuna): Refactor and improve timestamp lag gauge (#1632)
Browse files Browse the repository at this point in the history
Set INF value for gauge if RPC is not accessible.
This would trigger the alerts on the monitoring service.
We will not do the same for other gauges (e.g. set balance to 0) since
that's just duplicate alerts for the same underlying symptom.
  • Loading branch information
m30m authored May 29, 2024
1 parent 6535e7f commit c76d205
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 45 deletions.
2 changes: 1 addition & 1 deletion apps/fortuna/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apps/fortuna/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fortuna"
version = "6.0.0"
version = "6.0.1"
edition = "2021"

[dependencies]
Expand Down
92 changes: 49 additions & 43 deletions apps/fortuna/src/command/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,50 @@ pub struct ChainLabel {
pub chain_id: String,
}


#[tracing::instrument(name = "block_timestamp_lag", skip_all, fields(chain_id = chain_id))]
pub async fn check_block_timestamp_lag(
chain_id: String,
chain_config: EthereumConfig,
metrics: Family<ChainLabel, Gauge>,
) {
let provider = match Provider::<Http>::try_from(&chain_config.geth_rpc_addr) {
Ok(r) => r,
Err(e) => {
tracing::error!("Failed to create provider for chain id - {:?}", e);
return;
}
};

const INF_LAG: i64 = 1000000; // value that definitely triggers an alert
let lag = match provider.get_block(BlockNumber::Latest).await {
Ok(block) => match block {
Some(block) => {
let block_timestamp = block.timestamp;
let server_timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
let lag: i64 = (server_timestamp as i64) - (block_timestamp.as_u64() as i64);
lag
}
None => {
tracing::error!("Block is None");
INF_LAG
}
},
Err(e) => {
tracing::error!("Failed to get block - {:?}", e);
INF_LAG
}
};
metrics
.get_or_create(&ChainLabel {
chain_id: chain_id.clone(),
})
.set(lag);
}

/// Tracks the difference between the server timestamp and the latest block timestamp for each chain
pub async fn track_block_timestamp_lag(config: Config, metrics_registry: Arc<RwLock<Registry>>) {
let metrics = Family::<ChainLabel, Gauge>::default();
Expand All @@ -311,49 +355,11 @@ pub async fn track_block_timestamp_lag(config: Config, metrics_registry: Arc<RwL
);
loop {
for (chain_id, chain_config) in &config.chains {
let chain_id = chain_id.clone();
let chain_config = chain_config.clone();
let metrics = metrics.clone();

spawn(async move {
let chain_id = chain_id.clone();
let chain_config = chain_config.clone();

let provider = match Provider::<Http>::try_from(&chain_config.geth_rpc_addr) {
Ok(r) => r,
Err(e) => {
tracing::error!(
"Failed to create provider for chain id {} - {:?}",
&chain_id,
e
);
return;
}
};

match provider.get_block(BlockNumber::Latest).await {
Ok(b) => {
if let Some(block) = b {
let block_timestamp = block.timestamp;
let server_timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
let lag: i64 =
(server_timestamp as i64) - (block_timestamp.as_u64() as i64);

metrics
.get_or_create(&ChainLabel {
chain_id: chain_id.clone(),
})
.set(lag);
}
}
Err(e) => {
tracing::error!("Failed to get block for chain id {} - {:?}", &chain_id, e);
}
};
});
spawn(check_block_timestamp_lag(
chain_id.clone(),
chain_config.clone(),
metrics.clone(),
));
}

time::sleep(TRACK_INTERVAL).await;
Expand Down

0 comments on commit c76d205

Please sign in to comment.