diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index b3790024f81..c59c5e8ed10 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2953,7 +2953,7 @@ impl BeaconChain { } /// Wraps `process_block` in logic to cache the block's commitments in the processing cache - /// and evict if the block was imported or erred. + /// and evict if the block was imported or errored. pub async fn process_block_with_early_caching>( self: &Arc, block_root: Hash256, @@ -2998,22 +2998,20 @@ impl BeaconChain { // Increment the Prometheus counter for block processing requests. metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS); + let block_slot = unverified_block.block().slot(); + // Set observed time if not already set. Usually this should be set by gossip or RPC, // but just in case we set it again here (useful for tests). - if let (Some(seen_timestamp), Some(current_slot)) = - (self.slot_clock.now_duration(), self.slot_clock.now()) - { + if let Some(seen_timestamp) = self.slot_clock.now_duration() { self.block_times_cache.write().set_time_observed( block_root, - current_slot, + block_slot, seen_timestamp, None, None, ); } - let block_slot = unverified_block.block().slot(); - // A small closure to group the verification and import errors. let chain = self.clone(); let import_block = async move { @@ -3024,6 +3022,15 @@ impl BeaconChain { )?; publish_fn()?; let executed_block = chain.into_executed_block(execution_pending).await?; + // Record the time it took to ask the execution layer. + if let Some(seen_timestamp) = self.slot_clock.now_duration() { + self.block_times_cache.write().set_execution_time( + block_root, + block_slot, + seen_timestamp, + ) + } + match executed_block { ExecutedBlock::Available(block) => { self.import_available_block(Box::new(block)).await @@ -3090,8 +3097,8 @@ impl BeaconChain { } } - /// Accepts a fully-verified block and awaits on it's payload verification handle to - /// get a fully `ExecutedBlock` + /// Accepts a fully-verified block and awaits on its payload verification handle to + /// get a fully `ExecutedBlock`. /// /// An error is returned if the verification handle couldn't be awaited. pub async fn into_executed_block( @@ -3224,10 +3231,6 @@ impl BeaconChain { ) -> Result> { match availability { Availability::Available(block) => { - // This is the time since start of the slot where all the components of the block have become available - let delay = - get_slot_delay_ms(timestamp_now(), block.block.slot(), &self.slot_clock); - metrics::observe_duration(&metrics::BLOCK_AVAILABILITY_DELAY, delay); // Block is fully available, import into fork choice self.import_available_block(block).await } @@ -3256,6 +3259,15 @@ impl BeaconChain { consensus_context, } = import_data; + // Record the time at which this block's blobs became available. + if let Some(blobs_available) = block.blobs_available_timestamp() { + self.block_times_cache.write().set_time_blob_observed( + block_root, + block.slot(), + blobs_available, + ); + } + // import let chain = self.clone(); let block_root = self @@ -3396,6 +3408,14 @@ impl BeaconChain { "Early attester cache insert failed"; "error" => ?e ); + } else { + let attestable_timestamp = + self.slot_clock.now_duration().unwrap_or_default(); + self.block_times_cache.write().set_time_attestable( + block_root, + signed_block.slot(), + attestable_timestamp, + ) } } else { warn!( @@ -3885,25 +3905,6 @@ impl BeaconChain { ); } - // Do not store metrics if the block was > 4 slots old, this helps prevent noise during - // sync. - if block_delay_total < self.slot_clock.slot_duration() * 4 { - // Observe the delay between when we observed the block and when we imported it. - let block_delays = self.block_times_cache.read().get_block_delays( - block_root, - self.slot_clock - .start_of(current_slot) - .unwrap_or_else(|| Duration::from_secs(0)), - ); - - metrics::observe_duration( - &metrics::BEACON_BLOCK_IMPORTED_OBSERVED_DELAY_TIME, - block_delays - .imported - .unwrap_or_else(|| Duration::from_secs(0)), - ); - } - if let Some(event_handler) = self.event_handler.as_ref() { if event_handler.has_block_subscribers() { event_handler.register(EventKind::Block(SseBlock { diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 1fb61702006..a1ae260d930 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -14,6 +14,7 @@ use merkle_proof::MerkleTreeError; use slog::{debug, warn}; use ssz_derive::{Decode, Encode}; use ssz_types::VariableList; +use std::time::Duration; use tree_hash::TreeHash; use types::blob_sidecar::BlobIdentifier; use types::{ @@ -214,7 +215,10 @@ impl GossipVerifiedBlob { pub fn __assumed_valid(blob: Arc>) -> Self { Self { block_root: blob.block_root(), - blob: KzgVerifiedBlob { blob }, + blob: KzgVerifiedBlob { + blob, + seen_timestamp: Duration::from_secs(0), + }, } } pub fn id(&self) -> BlobIdentifier { @@ -260,6 +264,8 @@ impl GossipVerifiedBlob { #[ssz(struct_behaviour = "transparent")] pub struct KzgVerifiedBlob { blob: Arc>, + #[ssz(skip_serializing, skip_deserializing)] + seen_timestamp: Duration, } impl PartialOrd for KzgVerifiedBlob { @@ -275,8 +281,12 @@ impl Ord for KzgVerifiedBlob { } impl KzgVerifiedBlob { - pub fn new(blob: Arc>, kzg: &Kzg) -> Result { - verify_kzg_for_blob(blob, kzg) + pub fn new( + blob: Arc>, + kzg: &Kzg, + seen_timestamp: Duration, + ) -> Result { + verify_kzg_for_blob(blob, kzg, seen_timestamp) } pub fn to_blob(self) -> Arc> { self.blob @@ -294,12 +304,18 @@ impl KzgVerifiedBlob { pub fn blob_index(&self) -> u64 { self.blob.index } + pub fn seen_timestamp(&self) -> Duration { + self.seen_timestamp + } /// Construct a `KzgVerifiedBlob` that is assumed to be valid. /// /// This should ONLY be used for testing. #[cfg(test)] pub fn __assumed_valid(blob: Arc>) -> Self { - Self { blob } + Self { + blob, + seen_timestamp: Duration::from_secs(0), + } } } @@ -309,9 +325,13 @@ impl KzgVerifiedBlob { pub fn verify_kzg_for_blob( blob: Arc>, kzg: &Kzg, + seen_timestamp: Duration, ) -> Result, KzgError> { validate_blob::(kzg, &blob.blob, blob.kzg_commitment, blob.kzg_proof)?; - Ok(KzgVerifiedBlob { blob }) + Ok(KzgVerifiedBlob { + blob, + seen_timestamp, + }) } pub struct KzgVerifiedBlobList { @@ -322,13 +342,17 @@ impl KzgVerifiedBlobList { pub fn new>>>( blob_list: I, kzg: &Kzg, + seen_timestamp: Duration, ) -> Result { let blobs = blob_list.into_iter().collect::>(); verify_kzg_for_blob_list(blobs.iter(), kzg)?; Ok(Self { verified_blobs: blobs .into_iter() - .map(|blob| KzgVerifiedBlob { blob }) + .map(|blob| KzgVerifiedBlob { + blob, + seen_timestamp, + }) .collect(), }) } @@ -374,6 +398,8 @@ pub fn validate_blob_sidecar_for_gossip( let blob_epoch = blob_slot.epoch(T::EthSpec::slots_per_epoch()); let signed_block_header = &blob_sidecar.signed_block_header; + let seen_timestamp = chain.slot_clock.now_duration().unwrap_or_default(); + // This condition is not possible if we have received the blob from the network // since we only subscribe to `MaxBlobsPerBlock` subnets over gossip network. // We include this check only for completeness. @@ -641,8 +667,8 @@ pub fn validate_blob_sidecar_for_gossip( .kzg .as_ref() .ok_or(GossipBlobError::KzgNotInitialized)?; - let kzg_verified_blob = - KzgVerifiedBlob::new(blob_sidecar, kzg).map_err(GossipBlobError::KzgError)?; + let kzg_verified_blob = KzgVerifiedBlob::new(blob_sidecar, kzg, seen_timestamp) + .map_err(GossipBlobError::KzgError)?; Ok(GossipVerifiedBlob { block_root, diff --git a/beacon_node/beacon_chain/src/block_times_cache.rs b/beacon_node/beacon_chain/src/block_times_cache.rs index c5293bcb0ee..db547a1186c 100644 --- a/beacon_node/beacon_chain/src/block_times_cache.rs +++ b/beacon_node/beacon_chain/src/block_times_cache.rs @@ -18,6 +18,9 @@ type BlockRoot = Hash256; #[derive(Clone, Default)] pub struct Timestamps { pub observed: Option, + pub all_blobs_observed: Option, + pub execution_time: Option, + pub attestable: Option, pub imported: Option, pub set_as_head: Option, } @@ -25,8 +28,25 @@ pub struct Timestamps { // Helps arrange delay data so it is more relevant to metrics. #[derive(Debug, Default)] pub struct BlockDelays { + /// Time after start of slot we saw the block. pub observed: Option, + /// The time after the start of the slot we saw all blobs. + pub all_blobs_observed: Option, + /// The time it took to get verification from the EL for the block. + pub execution_time: Option, + /// The delay from the start of the slot before the block became available + /// + /// Equal to max(`observed + execution_time`, `all_blobs_observed`). + pub available: Option, + /// Time after `available`. + pub attestable: Option, + /// Time + /// ALSO time after `available`. + /// + /// We need to use `available` again rather than `attestable` to handle the case where the block + /// does not get added to the early-attester cache. pub imported: Option, + /// Time after `imported`. pub set_as_head: Option, } @@ -35,14 +55,34 @@ impl BlockDelays { let observed = times .observed .and_then(|observed_time| observed_time.checked_sub(slot_start_time)); + let all_blobs_observed = times + .all_blobs_observed + .and_then(|all_blobs_observed| all_blobs_observed.checked_sub(slot_start_time)); + let execution_time = times + .execution_time + .and_then(|execution_time| execution_time.checked_sub(times.observed?)); + // Duration since UNIX epoch at which block became available. + let available_time = times.execution_time.map(|execution_time| { + std::cmp::max(execution_time, times.all_blobs_observed.unwrap_or_default()) + }); + // Duration from the start of the slot until the block became available. + let available_delay = + available_time.and_then(|available_time| available_time.checked_sub(slot_start_time)); + let attestable = times + .attestable + .and_then(|attestable_time| attestable_time.checked_sub(slot_start_time)); let imported = times .imported - .and_then(|imported_time| imported_time.checked_sub(times.observed?)); + .and_then(|imported_time| imported_time.checked_sub(available_time?)); let set_as_head = times .set_as_head .and_then(|set_as_head_time| set_as_head_time.checked_sub(times.imported?)); BlockDelays { observed, + all_blobs_observed, + execution_time, + available: available_delay, + attestable, imported, set_as_head, } @@ -109,6 +149,53 @@ impl BlockTimesCache { } } + pub fn set_time_blob_observed( + &mut self, + block_root: BlockRoot, + slot: Slot, + timestamp: Duration, + ) { + let block_times = self + .cache + .entry(block_root) + .or_insert_with(|| BlockTimesCacheValue::new(slot)); + if block_times + .timestamps + .all_blobs_observed + .map_or(true, |prev| timestamp > prev) + { + block_times.timestamps.all_blobs_observed = Some(timestamp); + } + } + + pub fn set_execution_time(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) { + let block_times = self + .cache + .entry(block_root) + .or_insert_with(|| BlockTimesCacheValue::new(slot)); + if block_times + .timestamps + .execution_time + .map_or(true, |prev| timestamp < prev) + { + block_times.timestamps.execution_time = Some(timestamp); + } + } + + pub fn set_time_attestable(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) { + let block_times = self + .cache + .entry(block_root) + .or_insert_with(|| BlockTimesCacheValue::new(slot)); + if block_times + .timestamps + .attestable + .map_or(true, |prev| timestamp < prev) + { + block_times.timestamps.attestable = Some(timestamp); + } + } + pub fn set_time_imported(&mut self, block_root: BlockRoot, slot: Slot, timestamp: Duration) { let block_times = self .cache diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 3cd8a7f259b..38648949f9c 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -666,8 +666,7 @@ type PayloadVerificationHandle = /// - Parent is known /// - Signatures /// - State root check -/// - Per block processing -/// - Blobs sidecar has been validated if present +/// - Block processing /// /// Note: a `ExecutionPendingBlock` is not _forever_ valid to be imported, it may later become invalid /// due to finality or some other event. A `ExecutionPendingBlock` should be imported into the diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index ced4eda05cf..734575d2c0d 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -1405,13 +1405,6 @@ fn observe_head_block_delays( // Do not store metrics if the block was > 4 slots old, this helps prevent noise during // sync. if !block_from_sync { - // Observe the total block delay. This is the delay between the time the slot started - // and when the block was set as head. - metrics::observe_duration( - &metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME, - block_delay_total, - ); - // Observe the delay between when we imported the block and when we set the block as // head. let block_delays = block_times_cache.get_block_delays( @@ -1421,34 +1414,120 @@ fn observe_head_block_delays( .unwrap_or_else(|| Duration::from_secs(0)), ); - metrics::observe_duration( - &metrics::BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME, + // Update all the metrics + + // Convention here is to use "Time" to indicate the duration of the event and "Delay" + // to indicate the time since the start of the slot. + // + // Observe the total block delay. This is the delay between the time the slot started + // and when the block was set as head. + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_TOTAL, + block_delay_total.as_millis() as i64, + ); + + // The time at which the beacon block was first observed to be processed + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_OBSERVED_SLOT_START, block_delays .observed - .unwrap_or_else(|| Duration::from_secs(0)), + .unwrap_or_else(|| Duration::from_secs(0)) + .as_millis() as i64, + ); + + // The time from the start of the slot when all blobs have been observed. Technically this + // is the time we last saw a blob related to this block/slot. + metrics::set_gauge( + &metrics::BEACON_BLOB_DELAY_ALL_OBSERVED_SLOT_START, + block_delays + .all_blobs_observed + .unwrap_or_else(|| Duration::from_secs(0)) + .as_millis() as i64, ); - metrics::observe_duration( - &metrics::BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME, + // The time it took to check the validity with the EL + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_EXECUTION_TIME, + block_delays + .execution_time + .unwrap_or_else(|| Duration::from_secs(0)) + .as_millis() as i64, + ); + + // The time the block became available after the start of the slot. Available here means + // that all the blobs have arrived and the block has been verified by the execution layer. + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_AVAILABLE_SLOT_START, + block_delays + .available + .unwrap_or_else(|| Duration::from_secs(0)) + .as_millis() as i64, + ); + + // The time the block became attestable after the start of the slot. + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_ATTESTABLE_SLOT_START, + block_delays + .attestable + .unwrap_or_else(|| Duration::from_secs(0)) + .as_millis() as i64, + ); + + // The time the block was imported since becoming available. + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_IMPORTED_TIME, + block_delays + .imported + .unwrap_or_else(|| Duration::from_secs(0)) + .as_millis() as i64, + ); + + // The time the block was imported and setting it as head + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_HEAD_IMPORTED_TIME, block_delays .set_as_head - .unwrap_or_else(|| Duration::from_secs(0)), + .unwrap_or_else(|| Duration::from_secs(0)) + .as_millis() as i64, ); // If the block was enshrined as head too late for attestations to be created for it, // log a debug warning and increment a metric. + let format_delay = |delay: &Option| { + delay.map_or("unknown".to_string(), |d| format!("{}", d.as_millis())) + }; if late_head { - metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL); + metrics::inc_counter(&metrics::BEACON_BLOCK_DELAY_HEAD_SLOT_START_EXCEEDED_TOTAL); debug!( log, "Delayed head block"; "block_root" => ?head_block_root, "proposer_index" => head_block_proposer_index, "slot" => head_block_slot, - "block_delay" => ?block_delay_total, - "observed_delay" => ?block_delays.observed, - "imported_delay" => ?block_delays.imported, - "set_as_head_delay" => ?block_delays.set_as_head, + "total_delay_ms" => block_delay_total.as_millis(), + "observed_delay_ms" => format_delay(&block_delays.observed), + "blob_delay_ms" => format_delay(&block_delays.all_blobs_observed), + "execution_time_ms" => format_delay(&block_delays.execution_time), + "available_delay_ms" => format_delay(&block_delays.available), + "attestable_delay_ms" => format_delay(&block_delays.attestable), + "imported_time_ms" => format_delay(&block_delays.imported), + "set_as_head_time_ms" => format_delay(&block_delays.set_as_head), + ); + } else { + debug!( + log, + "On-time head block"; + "block_root" => ?head_block_root, + "proposer_index" => head_block_proposer_index, + "slot" => head_block_slot, + "total_delay_ms" => block_delay_total.as_millis(), + "observed_delay_ms" => format_delay(&block_delays.observed), + "blob_delay_ms" => format_delay(&block_delays.all_blobs_observed), + "execution_time_ms" => format_delay(&block_delays.execution_time), + "available_delay_ms" => format_delay(&block_delays.available), + "attestable_delay_ms" => format_delay(&block_delays.attestable), + "imported_time_ms" => format_delay(&block_delays.imported), + "set_as_head_time_ms" => format_delay(&block_delays.set_as_head), ); } } diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 3ef105c6d34..dd0d97b1dae 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -14,6 +14,7 @@ use std::fmt; use std::fmt::Debug; use std::num::NonZeroUsize; use std::sync::Arc; +use std::time::Duration; use task_executor::TaskExecutor; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{BlobSidecarList, ChainSpec, Epoch, EthSpec, Hash256, SignedBeaconBlock}; @@ -176,8 +177,14 @@ impl DataAvailabilityChecker { return Err(AvailabilityCheckError::KzgNotInitialized); }; - let verified_blobs = KzgVerifiedBlobList::new(Vec::from(blobs).into_iter().flatten(), kzg) - .map_err(AvailabilityCheckError::Kzg)?; + let seen_timestamp = self + .slot_clock + .now_duration() + .ok_or(AvailabilityCheckError::SlotClockError)?; + + let verified_blobs = + KzgVerifiedBlobList::new(Vec::from(blobs).into_iter().flatten(), kzg, seen_timestamp) + .map_err(AvailabilityCheckError::Kzg)?; self.availability_cache .put_kzg_verified_blobs(block_root, verified_blobs) @@ -225,6 +232,7 @@ impl DataAvailabilityChecker { block_root, block, blobs: None, + blobs_available_timestamp: None, })) } } @@ -244,6 +252,7 @@ impl DataAvailabilityChecker { block_root, block, blobs: verified_blobs, + blobs_available_timestamp: None, })) } } @@ -289,6 +298,7 @@ impl DataAvailabilityChecker { block_root, block, blobs: None, + blobs_available_timestamp: None, })) } } @@ -303,6 +313,7 @@ impl DataAvailabilityChecker { block_root, block, blobs: verified_blobs, + blobs_available_timestamp: None, })) } } @@ -462,6 +473,8 @@ pub struct AvailableBlock { block_root: Hash256, block: Arc>, blobs: Option>, + /// Timestamp at which this block first became available (UNIX timestamp, time since 1970). + blobs_available_timestamp: Option, } impl AvailableBlock { @@ -474,6 +487,7 @@ impl AvailableBlock { block_root, block, blobs, + blobs_available_timestamp: None, } } @@ -488,6 +502,10 @@ impl AvailableBlock { self.blobs.as_ref() } + pub fn blobs_available_timestamp(&self) -> Option { + self.blobs_available_timestamp + } + pub fn deconstruct( self, ) -> ( @@ -499,6 +517,7 @@ impl AvailableBlock { block_root, block, blobs, + blobs_available_timestamp: _, } = self; (block_root, block, blobs) } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/error.rs b/beacon_node/beacon_chain/src/data_availability_checker/error.rs index 0804fe3b9ab..6c524786bfa 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/error.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/error.rs @@ -19,6 +19,7 @@ pub enum Error { ParentStateMissing(Hash256), BlockReplayError(state_processing::BlockReplayError), RebuildingStateCaches(BeaconStateError), + SlotClockError, } pub enum ErrorCategory { @@ -39,7 +40,8 @@ impl Error { | Error::Unexpected | Error::ParentStateMissing(_) | Error::BlockReplayError(_) - | Error::RebuildingStateCaches(_) => ErrorCategory::Internal, + | Error::RebuildingStateCaches(_) + | Error::SlotClockError => ErrorCategory::Internal, Error::Kzg(_) | Error::BlobIndexInvalid(_) | Error::KzgCommitmentMismatch { .. } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index edd981e6ddb..f4c1bc308c0 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -204,6 +204,12 @@ impl PendingComponents { executed_block, } = self; + let blobs_available_timestamp = verified_blobs + .iter() + .flatten() + .map(|blob| blob.seen_timestamp()) + .max(); + let Some(diet_executed_block) = executed_block else { return Err(AvailabilityCheckError::Unexpected); }; @@ -231,6 +237,7 @@ impl PendingComponents { block_root, block, blobs: Some(verified_blobs), + blobs_available_timestamp, }; Ok(Availability::Available(Box::new( AvailableExecutedBlock::new(available_block, import_data, payload_verification_outcome), diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 6cb0b6fd766..df718413cc0 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -847,37 +847,55 @@ lazy_static! { "Number of attester slashings seen", &["src", "validator"] ); +} + +// Prevent recursion limit +lazy_static! { /* * Block Delay Metrics */ - pub static ref BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME: Result = try_create_histogram_with_buckets( - "beacon_block_observed_slot_start_delay_time", - "Duration between the start of the block's slot and the time the block was observed.", - // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] - decimal_buckets(-1,2) - ); - pub static ref BEACON_BLOCK_IMPORTED_OBSERVED_DELAY_TIME: Result = try_create_histogram_with_buckets( - "beacon_block_imported_observed_delay_time", - "Duration between the time the block was observed and the time when it was imported.", - // [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5] - decimal_buckets(-2,0) - ); - pub static ref BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME: Result = try_create_histogram_with_buckets( - "beacon_block_head_imported_delay_time", - "Duration between the time the block was imported and the time when it was set as head.", - // [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5] - decimal_buckets(-2,-1) - ); - pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME: Result = try_create_histogram_with_buckets( - "beacon_block_head_slot_start_delay_time", + pub static ref BEACON_BLOCK_DELAY_TOTAL: Result = try_create_int_gauge( + "beacon_block_delay_total", "Duration between the start of the block's slot and the time when it was set as head.", - // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] - decimal_buckets(-1,2) ); - pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL: Result = try_create_int_counter( - "beacon_block_head_slot_start_delay_exceeded_total", - "Triggered when the duration between the start of the block's slot and the current time \ + + pub static ref BEACON_BLOCK_DELAY_OBSERVED_SLOT_START: Result = try_create_int_gauge( + "beacon_block_delay_observed_slot_start", + "Duration between the start of the block's slot and the time the block was observed.", + ); + + pub static ref BEACON_BLOB_DELAY_ALL_OBSERVED_SLOT_START: Result = try_create_int_gauge( + "beacon_blob_delay_all_observed_slot_start", + "Duration between the start of the block's slot and the time the block was observed.", + ); + + pub static ref BEACON_BLOCK_DELAY_EXECUTION_TIME: Result = try_create_int_gauge( + "beacon_block_delay_execution_time", + "The duration in verifying the block with the execution layer.", + ); + + pub static ref BEACON_BLOCK_DELAY_AVAILABLE_SLOT_START: Result = try_create_int_gauge( + "beacon_block_delay_available_slot_start", + "Duration between the time that block became available and the start of the slot.", + ); + pub static ref BEACON_BLOCK_DELAY_ATTESTABLE_SLOT_START: Result = try_create_int_gauge( + "beacon_block_delay_attestable_slot_start", + "Duration between the time that block became attestable and the start of the slot.", + ); + + pub static ref BEACON_BLOCK_DELAY_IMPORTED_TIME: Result = try_create_int_gauge( + "beacon_block_delay_imported_time", + "Duration between the time the block became available and the time when it was imported.", + ); + + pub static ref BEACON_BLOCK_DELAY_HEAD_IMPORTED_TIME: Result = try_create_int_gauge( + "beacon_block_delay_head_imported_time", + "Duration between the time that block was imported and the time when it was set as head.", + ); + pub static ref BEACON_BLOCK_DELAY_HEAD_SLOT_START_EXCEEDED_TOTAL: Result = try_create_int_counter( + "beacon_block_delay_head_slot_start_exceeded_total", + "A counter that is triggered when the duration between the start of the block's slot and the current time \ will result in failed attestations.", ); @@ -1130,11 +1148,9 @@ lazy_static! { /* * Availability related metrics */ - pub static ref BLOCK_AVAILABILITY_DELAY: Result = try_create_histogram_with_buckets( + pub static ref BLOCK_AVAILABILITY_DELAY: Result = try_create_int_gauge( "block_availability_delay", "Duration between start of the slot and the time at which all components of the block are available.", - // Create a custom bucket list for greater granularity in block delay - Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0]) ); /* diff --git a/beacon_node/execution_layer/src/lib.rs b/beacon_node/execution_layer/src/lib.rs index 22410976c9d..3e7bf7f561d 100644 --- a/beacon_node/execution_layer/src/lib.rs +++ b/beacon_node/execution_layer/src/lib.rs @@ -24,7 +24,7 @@ use payload_status::process_payload_status; pub use payload_status::PayloadStatus; use sensitive_url::SensitiveUrl; use serde::{Deserialize, Serialize}; -use slog::{crit, debug, error, info, trace, warn, Logger}; +use slog::{crit, debug, error, info, warn, Logger}; use slot_clock::SlotClock; use std::collections::HashMap; use std::fmt; @@ -1331,15 +1331,11 @@ impl ExecutionLayer { &metrics::EXECUTION_LAYER_REQUEST_TIMES, &[metrics::NEW_PAYLOAD], ); + let timer = std::time::Instant::now(); + let block_number = new_payload_request.block_number(); let block_hash = new_payload_request.block_hash(); - trace!( - self.log(), - "Issuing engine_newPayload"; - "parent_hash" => ?new_payload_request.parent_hash(), - "block_hash" => ?block_hash, - "block_number" => ?new_payload_request.block_number(), - ); + let parent_hash = new_payload_request.parent_hash(); let result = self .engine() @@ -1347,9 +1343,19 @@ impl ExecutionLayer { .await; if let Ok(status) = &result { + let status_str = <&'static str>::from(status.status); metrics::inc_counter_vec( &metrics::EXECUTION_LAYER_PAYLOAD_STATUS, - &["new_payload", status.status.into()], + &["new_payload", status_str], + ); + debug!( + self.log(), + "Processed engine_newPayload"; + "status" => status_str, + "parent_hash" => ?parent_hash, + "block_hash" => ?block_hash, + "block_number" => block_number, + "response_time_ms" => timer.elapsed().as_millis() ); } *self.inner.last_new_payload_errored.write().await = result.is_err(); diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs index d19a41a28fc..d3804fbed8d 100644 --- a/beacon_node/network/src/metrics.rs +++ b/beacon_node/network/src/metrics.rs @@ -248,50 +248,41 @@ lazy_static! { /* * Block Delay Metrics */ - pub static ref BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result = try_create_histogram_with_buckets( - "beacon_block_gossip_propagation_verification_delay_time", - "Duration between when the block is received and when it is verified for propagation.", - // [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5] - decimal_buckets(-3,-1) - ); - pub static ref BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME: Result = try_create_histogram_with_buckets( - "beacon_block_gossip_slot_start_delay_time", - "Duration between when the block is received and the start of the slot it belongs to.", - // Create a custom bucket list for greater granularity in block delay - Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0]) - // NOTE: Previous values, which we may want to switch back to. - // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] - //decimal_buckets(-1,2) - + pub static ref BEACON_BLOCK_DELAY_GOSSIP: Result = try_create_int_gauge( + "beacon_block_delay_gossip", + "The first time we see this block from gossip as a delay from the start of the slot" + ); + pub static ref BEACON_BLOCK_DELAY_GOSSIP_VERIFICATION: Result = try_create_int_gauge( + "beacon_block_delay_gossip_verification", + "Keeps track of the time delay from the start of the slot to the point we propagate the block" ); - pub static ref BEACON_BLOCK_LAST_DELAY: Result = try_create_int_gauge( - "beacon_block_last_delay", - "Keeps track of the last block's delay from the start of the slot" + pub static ref BEACON_BLOCK_DELAY_FULL_VERIFICATION: Result = try_create_int_gauge( + "beacon_block_delay_full_verification", + "The time it takes to verify a beacon block." ); - pub static ref BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL: Result = try_create_int_counter( - "beacon_block_gossip_arrived_late_total", + pub static ref BEACON_BLOCK_DELAY_GOSSIP_ARRIVED_LATE_TOTAL: Result = try_create_int_counter( + "beacon_block_delay_gossip_arrived_late_total", "Count of times when a gossip block arrived from the network later than the attestation deadline.", ); /* * Blob Delay Metrics */ - pub static ref BEACON_BLOB_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result = try_create_histogram_with_buckets( - "beacon_blob_gossip_propagation_verification_delay_time", - "Duration between when the blob is received over gossip and when it is verified for propagation.", - // [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5] - decimal_buckets(-3,-1) - ); - pub static ref BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME: Result = try_create_histogram_with_buckets( - "beacon_blob_gossip_slot_start_delay_time", - "Duration between when the blob is received over gossip and the start of the slot it belongs to.", - // Create a custom bucket list for greater granularity in block delay - Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0]) - // NOTE: Previous values, which we may want to switch back to. - // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] - //decimal_buckets(-1,2) + pub static ref BEACON_BLOB_DELAY_GOSSIP: Result = try_create_int_gauge( + "beacon_blob_delay_gossip_last_delay", + "The first time we see this blob as a delay from the start of the slot" + ); + + pub static ref BEACON_BLOB_DELAY_GOSSIP_VERIFICATION: Result = try_create_int_gauge( + "beacon_blob_delay_gossip_verification", + "Keeps track of the time delay from the start of the slot to the point we propagate the blob" + ); + pub static ref BEACON_BLOB_DELAY_FULL_VERIFICATION: Result = try_create_int_gauge( + "beacon_blob_last_full_verification_delay", + "The time it takes to verify a beacon blob" ); + pub static ref BEACON_BLOB_RPC_SLOT_START_DELAY_TIME: Result = try_create_histogram_with_buckets( "beacon_blob_rpc_slot_start_delay_time", "Duration between when a blob is received over rpc and the start of the slot it belongs to.", @@ -302,10 +293,6 @@ lazy_static! { //decimal_buckets(-1,2) ); - pub static ref BEACON_BLOB_LAST_DELAY: Result = try_create_int_gauge( - "beacon_blob_last_delay", - "Keeps track of the last blob's delay from the start of the slot" - ); pub static ref BEACON_BLOB_GOSSIP_ARRIVED_LATE_TOTAL: Result = try_create_int_counter( "beacon_blob_gossip_arrived_late_total", diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index f7bba900372..7b8826bd853 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -27,7 +27,7 @@ use std::fs; use std::io::Write; use std::path::PathBuf; use std::sync::Arc; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use store::hot_cold_store::HotColdDBError; use tokio::sync::mpsc; use types::{ @@ -615,8 +615,7 @@ impl NetworkBeaconProcessor { let commitment = blob_sidecar.kzg_commitment; let delay = get_slot_delay_ms(seen_duration, slot, &self.chain.slot_clock); // Log metrics to track delay from other nodes on the network. - metrics::observe_duration(&metrics::BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME, delay); - metrics::set_gauge(&metrics::BEACON_BLOB_LAST_DELAY, delay.as_millis() as i64); + metrics::set_gauge(&metrics::BEACON_BLOB_DELAY_GOSSIP, delay.as_millis() as i64); match self .chain .verify_blob_sidecar_for_gossip(blob_sidecar, blob_index) @@ -654,9 +653,9 @@ impl NetworkBeaconProcessor { .ok() .and_then(|now| now.checked_sub(seen_duration)) { - metrics::observe_duration( - &metrics::BEACON_BLOB_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME, - duration, + metrics::set_gauge( + &metrics::BEACON_BLOB_DELAY_GOSSIP_VERIFICATION, + duration.as_millis() as i64, ); } self.process_gossip_verified_blob(peer_id, gossip_verified_blob, seen_duration) @@ -747,9 +746,9 @@ impl NetworkBeaconProcessor { self: &Arc, peer_id: PeerId, verified_blob: GossipVerifiedBlob, - // This value is not used presently, but it might come in handy for debugging. _seen_duration: Duration, ) { + let processing_start_time = Instant::now(); let block_root = verified_blob.block_root(); let blob_slot = verified_blob.slot(); let blob_index = verified_blob.id().index; @@ -764,6 +763,11 @@ impl NetworkBeaconProcessor { "block_root" => %block_root ); self.chain.recompute_head_at_current_slot().await; + + metrics::set_gauge( + &metrics::BEACON_BLOB_DELAY_FULL_VERIFICATION, + processing_start_time.elapsed().as_millis() as i64, + ); } Ok(AvailabilityProcessingStatus::MissingComponents(slot, block_root)) => { trace!( @@ -865,12 +869,9 @@ impl NetworkBeaconProcessor { let block_delay = get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock); // Log metrics to track delay from other nodes on the network. - metrics::observe_duration( - &metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME, - block_delay, - ); + metrics::set_gauge( - &metrics::BEACON_BLOCK_LAST_DELAY, + &metrics::BEACON_BLOCK_DELAY_GOSSIP, block_delay.as_millis() as i64, ); @@ -898,7 +899,7 @@ impl NetworkBeaconProcessor { let verified_block = match verification_result { Ok(verified_block) => { if block_delay >= self.chain.slot_clock.unagg_attestation_production_delay() { - metrics::inc_counter(&metrics::BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL); + metrics::inc_counter(&metrics::BEACON_BLOCK_DELAY_GOSSIP_ARRIVED_LATE_TOTAL); debug!( self.log, "Gossip block arrived late"; @@ -923,9 +924,9 @@ impl NetworkBeaconProcessor { .ok() .and_then(|now| now.checked_sub(seen_duration)) { - metrics::observe_duration( - &metrics::BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME, - duration, + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_GOSSIP_VERIFICATION, + duration.as_millis() as i64, ); } @@ -1130,9 +1131,9 @@ impl NetworkBeaconProcessor { verified_block: GossipVerifiedBlock, reprocess_tx: mpsc::Sender, invalid_block_storage: InvalidBlockStorage, - // This value is not used presently, but it might come in handy for debugging. _seen_duration: Duration, ) { + let processing_start_time = Instant::now(); let block = verified_block.block.block_cloned(); let block_root = verified_block.block_root; @@ -1168,6 +1169,11 @@ impl NetworkBeaconProcessor { ); self.chain.recompute_head_at_current_slot().await; + + metrics::set_gauge( + &metrics::BEACON_BLOCK_DELAY_FULL_VERIFICATION, + processing_start_time.elapsed().as_millis() as i64, + ); } Ok(AvailabilityProcessingStatus::MissingComponents(slot, block_root)) => { trace!(