From ce66582c1609e4963c6345675d7fd1aff9c38850 Mon Sep 17 00:00:00 2001 From: Lion - dapplion <35266934+dapplion@users.noreply.github.com> Date: Wed, 1 May 2024 05:12:15 +0900 Subject: [PATCH] Merge parent and current sync lookups (#5655) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Drop lookup type trait for a simple arg * Drop reconstructed for processing * Send parent blocks one by one * Merge current and parent lookups * Merge current and parent lookups clean up todos * Merge current and parent lookups tests * Merge remote-tracking branch 'origin/unstable' into sync-merged-lookup * Merge branch 'unstable' of https://github.com/sigp/lighthouse into sync-merged-lookup * fix compile after merge * #5655 pr review (#26) * fix compile after merge * remove todos, fix typos etc * fix compile * stable rng * delete TODO and unfilled out test * make download result a struct * enums instead of bools as params * fix comment * Various fixes * Track ignored child components * Track dropped lookup reason as metric * fix test * add comment describing behavior of avail check error *  update ordering --- .../beacon_chain/src/blob_verification.rs | 16 +- .../src/data_availability_checker.rs | 147 +- .../child_components.rs | 69 - .../src/data_availability_checker/error.rs | 1 + .../overflow_lru_cache.rs | 2 +- beacon_node/network/src/metrics.rs | 5 + .../network_beacon_processor/sync_methods.rs | 37 - .../src/network_beacon_processor/tests.rs | 4 +- .../network/src/sync/block_lookups/common.rs | 308 ++-- .../network/src/sync/block_lookups/mod.rs | 1443 ++++------------- .../src/sync/block_lookups/parent_chain.rs | 198 +++ .../src/sync/block_lookups/parent_lookup.rs | 227 --- .../sync/block_lookups/single_block_lookup.rs | 608 +++---- .../network/src/sync/block_lookups/tests.rs | 623 ++++--- beacon_node/network/src/sync/manager.rs | 214 +-- .../network/src/sync/network_context.rs | 150 +- common/lru_cache/src/time.rs | 6 + 17 files changed, 1594 insertions(+), 2464 deletions(-) delete mode 100644 beacon_node/beacon_chain/src/data_availability_checker/child_components.rs create mode 100644 beacon_node/network/src/sync/block_lookups/parent_chain.rs delete mode 100644 beacon_node/network/src/sync/block_lookups/parent_lookup.rs diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 263b9f9e013..fdf8ee2b971 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -571,6 +571,14 @@ pub fn validate_blob_sidecar_for_gossip( }); } + // Kzg verification for gossip blob sidecar + let kzg = chain + .kzg + .as_ref() + .ok_or(GossipBlobError::KzgNotInitialized)?; + let kzg_verified_blob = KzgVerifiedBlob::new(blob_sidecar.clone(), kzg, seen_timestamp) + .map_err(GossipBlobError::KzgError)?; + chain .observed_slashable .write() @@ -605,14 +613,6 @@ pub fn validate_blob_sidecar_for_gossip( }); } - // Kzg verification for gossip blob sidecar - let kzg = chain - .kzg - .as_ref() - .ok_or(GossipBlobError::KzgNotInitialized)?; - let kzg_verified_blob = KzgVerifiedBlob::new(blob_sidecar, kzg, seen_timestamp) - .map_err(GossipBlobError::KzgError)?; - Ok(GossipVerifiedBlob { block_root, blob: kzg_verified_blob, diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index dd0d97b1dae..27ed0ae6d56 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -2,14 +2,11 @@ use crate::blob_verification::{verify_kzg_for_blob_list, GossipVerifiedBlob, Kzg use crate::block_verification_types::{ AvailabilityPendingExecutedBlock, AvailableExecutedBlock, RpcBlock, }; -pub use crate::data_availability_checker::child_components::ChildComponents; use crate::data_availability_checker::overflow_lru_cache::OverflowLRUCache; use crate::{BeaconChain, BeaconChainTypes, BeaconStore}; use kzg::Kzg; -use slasher::test_utils::E; use slog::{debug, error, Logger}; use slot_clock::SlotClock; -use ssz_types::FixedVector; use std::fmt; use std::fmt::Debug; use std::num::NonZeroUsize; @@ -19,7 +16,6 @@ use task_executor::TaskExecutor; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{BlobSidecarList, ChainSpec, Epoch, EthSpec, Hash256, SignedBeaconBlock}; -mod child_components; mod error; mod overflow_lru_cache; mod state_lru_cache; @@ -94,68 +90,27 @@ impl DataAvailabilityChecker { self.availability_cache.has_block(block_root) } - pub fn get_missing_blob_ids_with(&self, block_root: Hash256) -> MissingBlobs { + /// Return the required blobs `block_root` expects if the block is currenlty in the cache. + pub fn num_expected_blobs(&self, block_root: &Hash256) -> Option { self.availability_cache - .with_pending_components(&block_root, |pending_components| match pending_components { - Some(pending_components) => self.get_missing_blob_ids( - block_root, - pending_components - .get_cached_block() - .as_ref() - .map(|b| b.as_block()), - &pending_components.verified_blobs, - ), - None => MissingBlobs::new_without_block(block_root, self.is_deneb()), + .peek_pending_components(block_root, |components| { + components.and_then(|components| components.num_expected_blobs()) }) } - /// If there's no block, all possible ids will be returned that don't exist in the given blobs. - /// If there no blobs, all possible ids will be returned. - pub fn get_missing_blob_ids( - &self, - block_root: Hash256, - block: Option<&SignedBeaconBlock>, - blobs: &FixedVector, ::MaxBlobsPerBlock>, - ) -> MissingBlobs { - let Some(current_slot) = self.slot_clock.now_or_genesis() else { - error!( - self.log, - "Failed to read slot clock when checking for missing blob ids" - ); - return MissingBlobs::BlobsNotRequired; - }; - - let current_epoch = current_slot.epoch(T::EthSpec::slots_per_epoch()); - - if self.da_check_required_for_epoch(current_epoch) { - match block { - Some(cached_block) => { - let block_commitments_len = cached_block - .message() - .body() - .blob_kzg_commitments() - .map(|v| v.len()) - .unwrap_or(0); - let blob_ids = blobs + /// Return the set of imported blob indexes for `block_root`. Returns None if there is no block + /// component for `block_root`. + pub fn imported_blob_indexes(&self, block_root: &Hash256) -> Option> { + self.availability_cache + .peek_pending_components(block_root, |components| { + components.map(|components| { + components + .get_cached_blobs() .iter() - .take(block_commitments_len) - .enumerate() - .filter_map(|(index, blob_commitment_opt)| { - blob_commitment_opt.is_none().then_some(BlobIdentifier { - block_root, - index: index as u64, - }) - }) - .collect(); - MissingBlobs::KnownMissing(blob_ids) - } - None => { - MissingBlobs::PossibleMissing(BlobIdentifier::get_all_blob_ids::(block_root)) - } - } - } else { - MissingBlobs::BlobsNotRequired - } + .filter_map(|blob| blob.as_ref().map(|blob| blob.blob_index())) + .collect::>() + }) + }) } /// Get a blob from the availability cache. @@ -351,6 +306,18 @@ impl DataAvailabilityChecker { .map_or(false, |da_epoch| block_epoch >= da_epoch) } + pub fn da_check_required_for_current_epoch(&self) -> bool { + let Some(current_slot) = self.slot_clock.now_or_genesis() else { + error!( + self.log, + "Failed to read slot clock when checking for missing blob ids" + ); + return false; + }; + + self.da_check_required_for_epoch(current_slot.epoch(T::EthSpec::slots_per_epoch())) + } + /// Returns `true` if the current epoch is greater than or equal to the `Deneb` epoch. pub fn is_deneb(&self) -> bool { self.slot_clock.now().map_or(false, |slot| { @@ -544,61 +511,3 @@ impl MaybeAvailableBlock { } } } - -#[derive(Debug, Clone)] -pub enum MissingBlobs { - /// We know for certain these blobs are missing. - KnownMissing(Vec), - /// We think these blobs might be missing. - PossibleMissing(Vec), - /// Blobs are not required. - BlobsNotRequired, -} - -impl MissingBlobs { - pub fn new_without_block(block_root: Hash256, is_deneb: bool) -> Self { - if is_deneb { - MissingBlobs::PossibleMissing(BlobIdentifier::get_all_blob_ids::(block_root)) - } else { - MissingBlobs::BlobsNotRequired - } - } - pub fn is_empty(&self) -> bool { - match self { - MissingBlobs::KnownMissing(v) => v.is_empty(), - MissingBlobs::PossibleMissing(v) => v.is_empty(), - MissingBlobs::BlobsNotRequired => true, - } - } - pub fn contains(&self, blob_id: &BlobIdentifier) -> bool { - match self { - MissingBlobs::KnownMissing(v) => v.contains(blob_id), - MissingBlobs::PossibleMissing(v) => v.contains(blob_id), - MissingBlobs::BlobsNotRequired => false, - } - } - pub fn remove(&mut self, blob_id: &BlobIdentifier) { - match self { - MissingBlobs::KnownMissing(v) => v.retain(|id| id != blob_id), - MissingBlobs::PossibleMissing(v) => v.retain(|id| id != blob_id), - MissingBlobs::BlobsNotRequired => {} - } - } - pub fn indices(&self) -> Vec { - match self { - MissingBlobs::KnownMissing(v) => v.iter().map(|id| id.index).collect(), - MissingBlobs::PossibleMissing(v) => v.iter().map(|id| id.index).collect(), - MissingBlobs::BlobsNotRequired => vec![], - } - } -} - -impl Into> for MissingBlobs { - fn into(self) -> Vec { - match self { - MissingBlobs::KnownMissing(v) => v, - MissingBlobs::PossibleMissing(v) => v, - MissingBlobs::BlobsNotRequired => vec![], - } - } -} diff --git a/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs b/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs deleted file mode 100644 index 184dfc45001..00000000000 --- a/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs +++ /dev/null @@ -1,69 +0,0 @@ -use crate::block_verification_types::RpcBlock; -use bls::Hash256; -use std::sync::Arc; -use types::blob_sidecar::FixedBlobSidecarList; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; - -/// For requests triggered by an `UnknownBlockParent` or `UnknownBlobParent`, this struct -/// is used to cache components as they are sent to the network service. We can't use the -/// data availability cache currently because any blocks or blobs without parents -/// won't pass validation and therefore won't make it into the cache. -pub struct ChildComponents { - pub block_root: Hash256, - pub downloaded_block: Option>>, - pub downloaded_blobs: FixedBlobSidecarList, -} - -impl From> for ChildComponents { - fn from(value: RpcBlock) -> Self { - let (block_root, block, blobs) = value.deconstruct(); - let fixed_blobs = blobs.map(|blobs| { - FixedBlobSidecarList::from(blobs.into_iter().map(Some).collect::>()) - }); - Self::new(block_root, Some(block), fixed_blobs) - } -} - -impl ChildComponents { - pub fn empty(block_root: Hash256) -> Self { - Self { - block_root, - downloaded_block: None, - downloaded_blobs: <_>::default(), - } - } - pub fn new( - block_root: Hash256, - block: Option>>, - blobs: Option>, - ) -> Self { - let mut cache = Self::empty(block_root); - if let Some(block) = block { - cache.merge_block(block); - } - if let Some(blobs) = blobs { - cache.merge_blobs(blobs); - } - cache - } - - pub fn merge_block(&mut self, block: Arc>) { - self.downloaded_block = Some(block); - } - - pub fn merge_blob(&mut self, blob: Arc>) { - if let Some(blob_ref) = self.downloaded_blobs.get_mut(blob.index as usize) { - *blob_ref = Some(blob); - } - } - - pub fn merge_blobs(&mut self, blobs: FixedBlobSidecarList) { - for blob in blobs.iter().flatten() { - self.merge_blob(blob.clone()); - } - } - - pub fn clear_blobs(&mut self) { - self.downloaded_blobs = FixedBlobSidecarList::default(); - } -} diff --git a/beacon_node/beacon_chain/src/data_availability_checker/error.rs b/beacon_node/beacon_chain/src/data_availability_checker/error.rs index 6c524786bfa..d22f6b2cc9f 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/error.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/error.rs @@ -22,6 +22,7 @@ pub enum Error { SlotClockError, } +#[derive(PartialEq, Eq)] pub enum ErrorCategory { /// Internal Errors (not caused by peers) Internal, diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 91c776adc10..f29cec92444 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -569,7 +569,7 @@ impl OverflowLRUCache { } } - pub fn with_pending_components>) -> R>( + pub fn peek_pending_components>) -> R>( &self, block_root: &Hash256, f: F, diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs index d3804fbed8d..8df22a3d00e 100644 --- a/beacon_node/network/src/metrics.rs +++ b/beacon_node/network/src/metrics.rs @@ -244,6 +244,11 @@ lazy_static! { "sync_parent_block_lookups", "Number of parent block lookups underway" ); + pub static ref SYNC_LOOKUP_DROPPED: Result = try_create_int_counter_vec( + "sync_lookups_dropped_total", + "Total count of sync lookups dropped by reason", + &["reason"] + ); /* * Block Delay Metrics diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index 887974c6e0b..daa9a2cf197 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -33,8 +33,6 @@ pub enum ChainSegmentProcessId { RangeBatchId(ChainId, Epoch), /// Processing ID for a backfill syncing batch. BackSyncBatchId(Epoch), - /// Processing Id of the parent lookup of a block. - ParentLookup(Hash256), } /// Returned when a chain segment import fails. @@ -396,41 +394,6 @@ impl NetworkBeaconProcessor { } } } - // this is a parent lookup request from the sync manager - ChainSegmentProcessId::ParentLookup(chain_head) => { - debug!( - self.log, "Processing parent lookup"; - "chain_hash" => %chain_head, - "blocks" => downloaded_blocks.len() - ); - // parent blocks are ordered from highest slot to lowest, so we need to process in - // reverse - match self - .process_blocks(downloaded_blocks.iter().rev(), notify_execution_layer) - .await - { - (imported_blocks, Err(e)) => { - debug!(self.log, "Parent lookup failed"; "error" => %e.message); - match e.peer_action { - Some(penalty) => BatchProcessResult::FaultyFailure { - imported_blocks: imported_blocks > 0, - penalty, - }, - None => BatchProcessResult::NonFaultyFailure, - } - } - (imported_blocks, Ok(_)) => { - debug!( - self.log, "Parent lookup processed successfully"; - "chain_hash" => %chain_head, - "imported_blocks" => imported_blocks - ); - BatchProcessResult::Success { - was_non_empty: imported_blocks > 0, - } - } - } - } }; self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result }); diff --git a/beacon_node/network/src/network_beacon_processor/tests.rs b/beacon_node/network/src/network_beacon_processor/tests.rs index dd58eb83555..4ba4c4ddd1d 100644 --- a/beacon_node/network/src/network_beacon_processor/tests.rs +++ b/beacon_node/network/src/network_beacon_processor/tests.rs @@ -311,9 +311,7 @@ impl TestRig { block_root, RpcBlock::new_without_blobs(Some(block_root), self.next_block.clone()), std::time::Duration::default(), - BlockProcessType::ParentLookup { - chain_hash: Hash256::random(), - }, + BlockProcessType::SingleBlock { id: 0 }, ) .unwrap(); } diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index 9d1af0cb813..dc82000ef1a 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -1,21 +1,21 @@ -use crate::sync::block_lookups::parent_lookup::PARENT_FAIL_TOLERANCE; use crate::sync::block_lookups::single_block_lookup::{ LookupRequestError, SingleBlockLookup, SingleLookupRequestState, }; use crate::sync::block_lookups::{ - BlobRequestState, BlockLookups, BlockRequestState, PeerId, SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS, + BlobRequestState, BlockRequestState, PeerId, SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS, }; -use crate::sync::manager::{BlockProcessType, Id, SingleLookupReqId}; +use crate::sync::manager::{BlockProcessType, Id, SLOT_IMPORT_TOLERANCE}; use crate::sync::network_context::{ BlobsByRootSingleBlockRequest, BlocksByRootSingleRequest, SyncNetworkContext, }; use beacon_chain::block_verification_types::RpcBlock; -use beacon_chain::data_availability_checker::ChildComponents; use beacon_chain::BeaconChainTypes; use std::sync::Arc; -use std::time::Duration; use types::blob_sidecar::FixedBlobSidecarList; -use types::{Hash256, SignedBeaconBlock}; +use types::SignedBeaconBlock; + +use super::single_block_lookup::DownloadResult; +use super::SingleLookupId; #[derive(Debug, Copy, Clone)] pub enum ResponseType { @@ -23,20 +23,15 @@ pub enum ResponseType { Blob, } -#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] -pub enum LookupType { - Current, - Parent, -} +/// The maximum depth we will search for a parent block. In principle we should have sync'd any +/// canonical chain to its head once the peer connects. A chain should not appear where it's depth +/// is further back than the most recent head slot. +pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2; -impl LookupType { - fn max_attempts(&self) -> u8 { - match self { - LookupType::Current => SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS, - LookupType::Parent => PARENT_FAIL_TOLERANCE, - } - } -} +/// Wrapper around bool to prevent mixing this argument with `BlockIsProcessed` +pub(crate) struct AwaitingParent(pub bool); +/// Wrapper around bool to prevent mixing this argument with `AwaitingParent` +pub(crate) struct BlockIsProcessed(pub bool); /// This trait unifies common single block lookup functionality across blocks and blobs. This /// includes making requests, verifying responses, and handling processing results. A @@ -53,115 +48,68 @@ pub trait RequestState { /// The type created after validation. type VerifiedResponseType: Clone; - /// We convert a `VerifiedResponseType` to this type prior to sending it to the beacon processor. - type ReconstructedResponseType; - - /* Request building methods */ - - /// Construct a new request. - fn build_request( - &mut self, - lookup_type: LookupType, - ) -> Result<(PeerId, Self::RequestType), LookupRequestError> { - // Verify and construct request. - self.too_many_attempts(lookup_type)?; - let peer = self.get_peer()?; - let request = self.new_request(); - Ok((peer, request)) - } - - /// Construct a new request and send it. - fn build_request_and_send( + /// Potentially makes progress on this request if it's in a progress-able state + fn continue_request( &mut self, id: Id, - lookup_type: LookupType, + awaiting_parent: AwaitingParent, + downloaded_block_expected_blobs: Option, + block_is_processed: BlockIsProcessed, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { - // Check if request is necessary. - if !self.get_state().is_awaiting_download() { - return Ok(()); + // Attempt to progress awaiting downloads + if self.get_state().is_awaiting_download() { + // Verify the current request has not exceeded the maximum number of attempts. + let request_state = self.get_state(); + if request_state.failed_attempts() >= SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS { + let cannot_process = request_state.more_failed_processing_attempts(); + return Err(LookupRequestError::TooManyAttempts { cannot_process }); + } + + let peer_id = self + .get_state_mut() + .use_rand_available_peer() + .ok_or(LookupRequestError::NoPeers)?; + + // make_request returns true only if a request was made + if self.make_request(id, peer_id, downloaded_block_expected_blobs, cx)? { + self.get_state_mut().on_download_start()?; + } + + // Otherwise, attempt to progress awaiting processing + // If this request is awaiting a parent lookup to be processed, do not send for processing. + // The request will be rejected with unknown parent error. + } else if !awaiting_parent.0 + && (block_is_processed.0 || matches!(Self::response_type(), ResponseType::Block)) + { + // maybe_start_processing returns Some if state == AwaitingProcess. This pattern is + // useful to conditionally access the result data. + if let Some(result) = self.get_state_mut().maybe_start_processing() { + return Self::send_for_processing(id, result, cx); + } } - // Construct request. - let (peer_id, request) = self.build_request(lookup_type)?; - - // Update request state. - let req_counter = self.get_state_mut().on_download_start(peer_id); - - // Make request - let id = SingleLookupReqId { - id, - req_counter, - lookup_type, - }; - Self::make_request(id, peer_id, request, cx) + Ok(()) } - /// Verify the current request has not exceeded the maximum number of attempts. - fn too_many_attempts(&self, lookup_type: LookupType) -> Result<(), LookupRequestError> { - let request_state = self.get_state(); - - if request_state.failed_attempts() >= lookup_type.max_attempts() { - let cannot_process = request_state.more_failed_processing_attempts(); - Err(LookupRequestError::TooManyAttempts { cannot_process }) - } else { - Ok(()) - } - } - - /// Get the next peer to request. Draws from the set of peers we think should have both the - /// block and blob first. If that fails, we draw from the set of peers that may have either. - fn get_peer(&mut self) -> Result { - self.get_state_mut() - .use_rand_available_peer() - .ok_or(LookupRequestError::NoPeers) - } - - /// Initialize `Self::RequestType`. - fn new_request(&self) -> Self::RequestType; - /// Send the request to the network service. fn make_request( - id: SingleLookupReqId, + &self, + id: Id, peer_id: PeerId, - request: Self::RequestType, + downloaded_block_expected_blobs: Option, cx: &mut SyncNetworkContext, - ) -> Result<(), LookupRequestError>; + ) -> Result; /* Response handling methods */ - /// A getter for the parent root of the response. Returns an `Option` because we won't know - /// the blob parent if we don't end up getting any blobs in the response. - fn get_parent_root(verified_response: &Self::VerifiedResponseType) -> Option; - - /// Caches the verified response in the lookup if necessary. This is only necessary for lookups - /// triggered by `UnknownParent` errors. - fn add_to_child_components( - verified_response: Self::VerifiedResponseType, - components: &mut ChildComponents, - ); - - /// Convert a verified response to the type we send to the beacon processor. - fn verified_to_reconstructed( - block_root: Hash256, - verified: Self::VerifiedResponseType, - ) -> Self::ReconstructedResponseType; - /// Send the response to the beacon processor. - fn send_reconstructed_for_processing( + fn send_for_processing( id: Id, - bl: &BlockLookups, - block_root: Hash256, - verified: Self::ReconstructedResponseType, - duration: Duration, + result: DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError>; - /// Register a failure to process the block or blob. - fn register_failure_downloading(&mut self) { - self.get_state_mut().on_download_failure() - } - /* Utility methods */ /// Returns the `ResponseType` associated with this trait implementation. Useful in logging. @@ -171,64 +119,49 @@ pub trait RequestState { fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self; /// A getter for a reference to the `SingleLookupRequestState` associated with this trait. - fn get_state(&self) -> &SingleLookupRequestState; + fn get_state(&self) -> &SingleLookupRequestState; /// A getter for a mutable reference to the SingleLookupRequestState associated with this trait. - fn get_state_mut(&mut self) -> &mut SingleLookupRequestState; + fn get_state_mut(&mut self) -> &mut SingleLookupRequestState; } -impl RequestState for BlockRequestState { +impl RequestState for BlockRequestState { type RequestType = BlocksByRootSingleRequest; type VerifiedResponseType = Arc>; - type ReconstructedResponseType = RpcBlock; - - fn new_request(&self) -> Self::RequestType { - BlocksByRootSingleRequest(self.requested_block_root) - } fn make_request( - id: SingleLookupReqId, + &self, + id: SingleLookupId, peer_id: PeerId, - request: Self::RequestType, + _: Option, cx: &mut SyncNetworkContext, - ) -> Result<(), LookupRequestError> { - cx.block_lookup_request(id, peer_id, request) - .map_err(LookupRequestError::SendFailed) - } - - fn get_parent_root(verified_response: &Arc>) -> Option { - Some(verified_response.parent_root()) - } - - fn add_to_child_components( - verified_response: Arc>, - components: &mut ChildComponents, - ) { - components.merge_block(verified_response); - } - - fn verified_to_reconstructed( - block_root: Hash256, - block: Arc>, - ) -> RpcBlock { - RpcBlock::new_without_blobs(Some(block_root), block) + ) -> Result { + cx.block_lookup_request( + id, + peer_id, + BlocksByRootSingleRequest(self.requested_block_root), + ) + .map_err(LookupRequestError::SendFailed) } - fn send_reconstructed_for_processing( - id: Id, - bl: &BlockLookups, - block_root: Hash256, - constructed: RpcBlock, - duration: Duration, + fn send_for_processing( + id: SingleLookupId, + download_result: DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { - bl.send_block_for_processing( + let DownloadResult { + value, + block_root, + seen_timestamp, + peer_id: _, + } = download_result; + cx.send_block_for_processing( block_root, - constructed, - duration, + RpcBlock::new_without_blobs(Some(block_root), value), + seen_timestamp, BlockProcessType::SingleBlock { id }, - cx, ) + .map_err(LookupRequestError::SendFailed) } fn response_type() -> ResponseType { @@ -237,73 +170,52 @@ impl RequestState for BlockRequestState { fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self { &mut request.block_request_state } - fn get_state(&self) -> &SingleLookupRequestState { + fn get_state(&self) -> &SingleLookupRequestState { &self.state } - fn get_state_mut(&mut self) -> &mut SingleLookupRequestState { + fn get_state_mut(&mut self) -> &mut SingleLookupRequestState { &mut self.state } } -impl RequestState for BlobRequestState { +impl RequestState for BlobRequestState { type RequestType = BlobsByRootSingleBlockRequest; type VerifiedResponseType = FixedBlobSidecarList; - type ReconstructedResponseType = FixedBlobSidecarList; - - fn new_request(&self) -> Self::RequestType { - BlobsByRootSingleBlockRequest { - block_root: self.block_root, - indices: self.requested_ids.indices(), - } - } fn make_request( - id: SingleLookupReqId, + &self, + id: Id, peer_id: PeerId, - request: Self::RequestType, + downloaded_block_expected_blobs: Option, cx: &mut SyncNetworkContext, - ) -> Result<(), LookupRequestError> { - cx.blob_lookup_request(id, peer_id, request) - .map_err(LookupRequestError::SendFailed) - } - - fn get_parent_root(verified_response: &FixedBlobSidecarList) -> Option { - verified_response - .into_iter() - .filter_map(|blob| blob.as_ref()) - .map(|blob| blob.block_parent_root()) - .next() - } - - fn add_to_child_components( - verified_response: FixedBlobSidecarList, - components: &mut ChildComponents, - ) { - components.merge_blobs(verified_response); - } - - fn verified_to_reconstructed( - _block_root: Hash256, - blobs: FixedBlobSidecarList, - ) -> FixedBlobSidecarList { - blobs + ) -> Result { + cx.blob_lookup_request( + id, + peer_id, + self.block_root, + downloaded_block_expected_blobs, + ) + .map_err(LookupRequestError::SendFailed) } - fn send_reconstructed_for_processing( + fn send_for_processing( id: Id, - bl: &BlockLookups, - block_root: Hash256, - verified: FixedBlobSidecarList, - duration: Duration, + download_result: DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { - bl.send_blobs_for_processing( + let DownloadResult { + value, + block_root, + seen_timestamp, + peer_id: _, + } = download_result; + cx.send_blobs_for_processing( block_root, - verified, - duration, + value, + seen_timestamp, BlockProcessType::SingleBlob { id }, - cx, ) + .map_err(LookupRequestError::SendFailed) } fn response_type() -> ResponseType { @@ -312,10 +224,10 @@ impl RequestState for BlobRequestState { fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self { &mut request.blob_request_state } - fn get_state(&self) -> &SingleLookupRequestState { + fn get_state(&self) -> &SingleLookupRequestState { &self.state } - fn get_state_mut(&mut self) -> &mut SingleLookupRequestState { + fn get_state_mut(&mut self) -> &mut SingleLookupRequestState { &mut self.state } } diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index a2909b49dd1..a0c7c33bb0f 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -1,20 +1,15 @@ -use self::single_block_lookup::SingleBlockLookup; -use super::manager::BlockProcessingResult; -use super::network_context::{LookupFailure, LookupVerifyError}; -use super::BatchProcessResult; -use super::{manager::BlockProcessType, network_context::SyncNetworkContext}; +use self::parent_chain::{compute_parent_chains, NodeChain}; +pub use self::single_block_lookup::DownloadResult; +use self::single_block_lookup::{LookupRequestError, SingleBlockLookup}; +use super::manager::{BlockProcessType, BlockProcessingResult}; +use super::network_context::{RpcProcessingResult, SyncNetworkContext}; use crate::metrics; -use crate::network_beacon_processor::ChainSegmentProcessId; -use crate::sync::block_lookups::common::LookupType; -use crate::sync::block_lookups::parent_lookup::{ParentLookup, RequestError}; -use crate::sync::block_lookups::single_block_lookup::{CachedChild, LookupRequestError}; -use crate::sync::manager::{Id, SingleLookupReqId}; -use beacon_chain::block_verification_types::{AsBlock, RpcBlock}; -pub use beacon_chain::data_availability_checker::ChildComponents; -use beacon_chain::data_availability_checker::{ - AvailabilityCheckErrorCategory, DataAvailabilityChecker, -}; -use beacon_chain::validator_monitor::timestamp_now; +use crate::sync::block_lookups::common::{ResponseType, PARENT_DEPTH_TOLERANCE}; +use crate::sync::block_lookups::parent_chain::find_oldest_fork_ancestor; +use crate::sync::manager::Id; +use crate::sync::network_context::LookupFailure; +use beacon_chain::block_verification_types::AsBlock; +use beacon_chain::data_availability_checker::AvailabilityCheckErrorCategory; use beacon_chain::{AvailabilityProcessingStatus, BeaconChainTypes, BlockError}; pub use common::RequestState; use fnv::FnvHashMap; @@ -22,735 +17,417 @@ use lighthouse_network::{PeerAction, PeerId}; use lru_cache::LRUTimeCache; pub use single_block_lookup::{BlobRequestState, BlockRequestState}; use slog::{debug, error, trace, warn, Logger}; -use smallvec::SmallVec; -use std::collections::{HashMap, VecDeque}; use std::sync::Arc; use std::time::Duration; use store::Hash256; -use types::blob_sidecar::FixedBlobSidecarList; -use types::Slot; +use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; pub mod common; -mod parent_lookup; +pub mod parent_chain; mod single_block_lookup; #[cfg(test)] mod tests; -pub type DownloadedBlock = (Hash256, RpcBlock); - const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; -pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 3; +pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 4; + +pub enum BlockComponent { + Block(DownloadResult>>), + Blob(DownloadResult>>), +} + +impl BlockComponent { + fn parent_root(&self) -> Hash256 { + match self { + BlockComponent::Block(block) => block.value.parent_root(), + BlockComponent::Blob(blob) => blob.value.block_parent_root(), + } + } + fn get_type(&self) -> &'static str { + match self { + BlockComponent::Block(_) => "block", + BlockComponent::Blob(_) => "blob", + } + } +} + +pub type SingleLookupId = u32; enum Action { Retry, - ParentUnknown { parent_root: Hash256, slot: Slot }, + ParentUnknown { parent_root: Hash256 }, Drop, Continue, } pub struct BlockLookups { - /// Parent chain lookups being downloaded. - parent_lookups: SmallVec<[ParentLookup; 3]>, - - processing_parent_lookups: HashMap, SingleBlockLookup)>, - /// A cache of failed chain lookups to prevent duplicate searches. failed_chains: LRUTimeCache, - single_block_lookups: FnvHashMap>, - - pub(crate) da_checker: Arc>, + // TODO: Why not index lookups by block_root? + single_block_lookups: FnvHashMap>, /// The logger for the import manager. log: Logger, } impl BlockLookups { - pub fn new(da_checker: Arc>, log: Logger) -> Self { + pub fn new(log: Logger) -> Self { Self { - parent_lookups: Default::default(), - processing_parent_lookups: Default::default(), failed_chains: LRUTimeCache::new(Duration::from_secs( FAILED_CHAINS_CACHE_EXPIRY_SECONDS, )), single_block_lookups: Default::default(), - da_checker, log, } } #[cfg(test)] - pub(crate) fn active_single_lookups(&self) -> Vec { - self.single_block_lookups.keys().cloned().collect() + pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { + self.failed_chains.insert(block_root); } #[cfg(test)] - pub(crate) fn active_parent_lookups(&self) -> Vec { - self.parent_lookups - .iter() - .map(|r| r.chain_hash()) - .collect::>() + pub(crate) fn get_failed_chains(&mut self) -> Vec { + self.failed_chains.keys().cloned().collect() } #[cfg(test)] - pub(crate) fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { - self.failed_chains.contains(chain_hash) + pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256, Option)> { + self.single_block_lookups + .iter() + .map(|(id, e)| (*id, e.block_root(), e.awaiting_parent())) + .collect() + } + + /// Returns a vec of all parent lookup chains by tip, in descending slot order (tip first) + pub(crate) fn active_parent_lookups(&self) -> Vec { + compute_parent_chains( + &self + .single_block_lookups + .values() + .map(|lookup| lookup.into()) + .collect::>(), + ) } /* Lookup requests */ - /// Creates a lookup for the block with the given `block_root` and immediately triggers it. - pub fn search_block( + /// Creates a parent lookup for the block with the given `block_root` and immediately triggers it. + /// If a parent lookup exists or is triggered, a current lookup will be created. + pub fn search_child_and_parent( &mut self, block_root: Hash256, - peer_source: &[PeerId], + block_component: BlockComponent, + peer_id: PeerId, cx: &mut SyncNetworkContext, ) { - self.new_current_lookup(block_root, None, peer_source, cx) + let parent_root = block_component.parent_root(); + + let parent_lookup_exists = + self.search_parent_of_child(parent_root, block_root, &[peer_id], cx); + // Only create the child lookup if the parent exists + if parent_lookup_exists { + // `search_parent_of_child` ensures that parent root is not a failed chain + self.new_current_lookup( + block_root, + Some(block_component), + Some(parent_root), + &[peer_id], + cx, + ); + } } - /// Creates a lookup for the block with the given `block_root`, while caching other block - /// components we've already received. The block components are cached here because we haven't - /// imported its parent and therefore can't fully validate it and store it in the data - /// availability cache. - /// - /// The request is immediately triggered. - pub fn search_child_block( + /// Seach a block whose parent root is unknown. + /// Returns true if the lookup is created or already exists + pub fn search_unknown_block( &mut self, block_root: Hash256, - child_components: ChildComponents, peer_source: &[PeerId], cx: &mut SyncNetworkContext, ) { - self.new_current_lookup(block_root, Some(child_components), peer_source, cx) + self.new_current_lookup(block_root, None, None, peer_source, cx); } - /// Attempts to trigger the request matching the given `block_root`. - pub fn trigger_single_lookup( + /// A block or blob triggers the search of a parent. + /// Check if this new lookup extends a bad chain: + /// - Extending `child_block_root_trigger` would exceed the max depth + /// - `block_root_to_search` is a failed chain + /// Returns true if the lookup is created or already exists + pub fn search_parent_of_child( &mut self, - mut single_block_lookup: SingleBlockLookup, + block_root_to_search: Hash256, + child_block_root_trigger: Hash256, + peers: &[PeerId], cx: &mut SyncNetworkContext, - ) { - let block_root = single_block_lookup.block_root(); - match single_block_lookup.request_block_and_blobs(cx) { - Ok(()) => self.add_single_lookup(single_block_lookup), - Err(e) => { - debug!(self.log, "Single block lookup failed"; - "error" => ?e, - "block_root" => ?block_root, - ); + ) -> bool { + let parent_chains = self.active_parent_lookups(); + + for (chain_idx, parent_chain) in parent_chains.iter().enumerate() { + if parent_chain.ancestor() == child_block_root_trigger + && parent_chain.len() >= PARENT_DEPTH_TOLERANCE + { + debug!(self.log, "Parent lookup chain too long"; "block_root" => ?block_root_to_search); + + // Searching for this parent would extend a parent chain over the max + // Insert the tip only to failed chains + self.failed_chains.insert(parent_chain.tip); + + // Note: Drop only the chain that's too long until it merges with another chain + // that's not too long. Consider this attack: there's a chain of valid unknown + // blocks A -> B. A malicious peer builds `PARENT_DEPTH_TOLERANCE` garbage + // blocks on top of A forming A -> C. The malicious peer forces us to fetch C + // from it, which will result in parent A hitting the chain_too_long error. Then + // the valid chain A -> B is dropped too. + if let Ok(block_to_drop) = find_oldest_fork_ancestor(parent_chains, chain_idx) { + // Drop all lookups descending from the child of the too long parent chain + if let Some((lookup_id, lookup)) = self + .single_block_lookups + .iter() + .find(|(_, l)| l.block_root() == block_to_drop) + { + for &peer_id in lookup.all_used_peers() { + cx.report_peer( + peer_id, + PeerAction::LowToleranceError, + "chain_too_long", + ); + } + self.drop_lookup_and_children(*lookup_id); + } + } + + return false; } } - } - /// Adds a lookup to the `single_block_lookups` map. - pub fn add_single_lookup(&mut self, single_block_lookup: SingleBlockLookup) { - self.single_block_lookups - .insert(single_block_lookup.id, single_block_lookup); - - metrics::set_gauge( - &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, - self.single_block_lookups.len() as i64, - ); + // `block_root_to_search` is a failed chain check happens inside new_current_lookup + self.new_current_lookup(block_root_to_search, None, None, peers, cx) } /// Searches for a single block hash. If the blocks parent is unknown, a chain of blocks is /// constructed. - pub fn new_current_lookup( + /// Returns true if the lookup is created or already exists + fn new_current_lookup( &mut self, block_root: Hash256, - child_components: Option>, + block_component: Option>, + awaiting_parent: Option, peers: &[PeerId], cx: &mut SyncNetworkContext, - ) { + ) -> bool { + // If this block or it's parent is part of a known failed chain, ignore it. + if self.failed_chains.contains(&block_root) { + debug!(self.log, "Block is from a past failed chain. Dropping"; "block_root" => ?block_root); + for peer_id in peers { + cx.report_peer(*peer_id, PeerAction::MidToleranceError, "failed_chain"); + } + return false; + } + // Do not re-request a block that is already being requested if let Some((_, lookup)) = self .single_block_lookups .iter_mut() .find(|(_id, lookup)| lookup.is_for_block(block_root)) { + trace!(self.log, "Adding peer to existing single block lookup"; "block_root" => %block_root); lookup.add_peers(peers); - if let Some(components) = child_components { - lookup.add_child_components(components); + if let Some(block_component) = block_component { + let component_type = block_component.get_type(); + let imported = lookup.add_child_components(block_component); + if !imported { + debug!(self.log, "Lookup child component ignored"; "block_root" => %block_root, "type" => component_type); + } } - return; + return true; } - if let Some(parent_lookup) = self.parent_lookups.iter_mut().find(|parent_req| { - parent_req.is_for_block(block_root) || parent_req.contains_block(&block_root) - }) { - parent_lookup.add_peers(peers); - - // If the block was already downloaded, or is being downloaded in this moment, do not - // request it. - trace!(self.log, "Already searching for block in a parent lookup request"; "block_root" => ?block_root); - return; - } - - if self - .processing_parent_lookups - .values() - .any(|(hashes, _last_parent_request)| hashes.contains(&block_root)) - { - // we are already processing this block, ignore it. - trace!(self.log, "Already processing block in a parent request"; "block_root" => ?block_root); - return; + // Ensure that awaiting parent exists, otherwise this lookup won't be able to make progress + if let Some(awaiting_parent) = awaiting_parent { + if !self + .single_block_lookups + .iter() + .any(|(_, lookup)| lookup.is_for_block(awaiting_parent)) + { + return false; + } } - let msg = if child_components.is_some() { + let msg = if block_component.is_some() { "Searching for components of a block with unknown parent" } else { "Searching for block components" }; - - let lookup = SingleBlockLookup::new( - block_root, - child_components, - peers, - self.da_checker.clone(), - cx.next_id(), - LookupType::Current, - ); - debug!( self.log, "{}", msg; "peer_ids" => ?peers, "block" => ?block_root, ); - self.trigger_single_lookup(lookup, cx); - } - /// If a block is attempted to be processed but we do not know its parent, this function is - /// called in order to find the block's parent. - pub fn search_parent( - &mut self, - slot: Slot, - block_root: Hash256, - parent_root: Hash256, - peer_id: PeerId, - cx: &mut SyncNetworkContext, - ) { - // If this block or it's parent is part of a known failed chain, ignore it. - if self.failed_chains.contains(&parent_root) || self.failed_chains.contains(&block_root) { - debug!(self.log, "Block is from a past failed chain. Dropping"; - "block_root" => ?block_root, "block_slot" => slot); - return; - } + // If we know that this lookup has unknown parent (is awaiting a parent lookup to resolve), + // signal here to hold processing downloaded data. + let mut lookup = SingleBlockLookup::new(block_root, peers, cx.next_id(), awaiting_parent); - // Make sure this block is not already downloaded, and that neither it or its parent is - // being searched for. - if let Some(parent_lookup) = self.parent_lookups.iter_mut().find(|parent_req| { - parent_req.contains_block(&parent_root) || parent_req.is_for_block(parent_root) - }) { - parent_lookup.add_peer(peer_id); - // we are already searching for this block, ignore it - debug!(self.log, "Already searching for parent block"; - "block_root" => ?block_root, "parent_root" => ?parent_root); - return; + // Add block components to the new request + if let Some(block_component) = block_component { + lookup.add_child_components(block_component); } - if self - .processing_parent_lookups - .iter() - .any(|(chain_hash, (hashes, _peers))| { - chain_hash == &block_root - || hashes.contains(&block_root) - || hashes.contains(&parent_root) - }) - { - // we are already processing this block, ignore it. - debug!(self.log, "Already processing parent block"; - "block_root" => ?block_root, "parent_root" => ?parent_root); - return; + if let Err(e) = lookup.continue_requests(cx) { + self.on_lookup_request_error(lookup.id, e, "new_current_lookup"); + false + } else { + self.single_block_lookups.insert(lookup.id, lookup); + self.update_metrics(); + true } - let parent_lookup = ParentLookup::new( - block_root, - parent_root, - peer_id, - self.da_checker.clone(), - cx, - ); - - debug!(self.log, "Created new parent lookup"; "block_root" => ?block_root, "parent_root" => ?parent_root); - - self.request_parent(parent_lookup, cx); } /* Lookup responses */ - /// Get a single block lookup by its ID. This method additionally ensures the `req_counter` - /// matches the current `req_counter` for the lookup. This ensures any stale responses from requests - /// that have been retried are ignored. - fn get_single_lookup>( + /// Process a block or blob response received from a single lookup request. + pub fn on_download_response>( &mut self, - id: SingleLookupReqId, - ) -> Option> { - let mut lookup = self.single_block_lookups.remove(&id.id)?; - - let request_state = R::request_state_mut(&mut lookup); - if request_state - .get_state() - .is_current_req_counter(id.req_counter) - { - Some(lookup) - } else { - // We don't want to drop the lookup, just ignore the old response. - self.single_block_lookups.insert(id.id, lookup); - None + id: SingleLookupId, + peer_id: PeerId, + response: RpcProcessingResult, + cx: &mut SyncNetworkContext, + ) { + if let Err(e) = self.on_download_response_inner::(id, peer_id, response, cx) { + self.on_lookup_request_error(id, e, "download_response"); } } - /// Checks whether a single block lookup is waiting for a parent lookup to complete. This is - /// necessary because we want to make sure all parents are processed before sending a child - /// for processing, otherwise the block will fail validation and will be returned to the network - /// layer with an `UnknownParent` error. - pub fn has_pending_parent_request(&self, block_root: Hash256) -> bool { - self.parent_lookups - .iter() - .any(|parent_lookup| parent_lookup.chain_hash() == block_root) - } - /// Process a block or blob response received from a single lookup request. - pub fn single_lookup_response>( + pub fn on_download_response_inner>( &mut self, - lookup_id: SingleLookupReqId, + id: SingleLookupId, peer_id: PeerId, - response: R::VerifiedResponseType, - seen_timestamp: Duration, + response: RpcProcessingResult, cx: &mut SyncNetworkContext, - ) { - let id = lookup_id.id; - let response_type = R::response_type(); + ) -> Result<(), LookupRequestError> { + // Downscore peer even if lookup is not known + // Only downscore lookup verify errors. RPC errors are downscored in the network handler. + if let Err(LookupFailure::LookupVerifyError(e)) = &response { + // Note: the error is displayed in full debug form on the match below + cx.report_peer(peer_id, PeerAction::LowToleranceError, e.into()); + } - let Some(mut lookup) = self.get_single_lookup::(lookup_id) else { + let response_type = R::response_type(); + let Some(lookup) = self.single_block_lookups.get_mut(&id) else { // We don't have the ability to cancel in-flight RPC requests. So this can happen // if we started this RPC request, and later saw the block/blobs via gossip. - debug!( - self.log, - "Block returned for single block lookup not present"; - "response_type" => ?response_type, - ); - return; + debug!(self.log, "Block returned for single block lookup not present"; "id" => id); + return Ok(()); }; - let expected_block_root = lookup.block_root(); - debug!(self.log, - "Peer returned response for single lookup"; - "peer_id" => %peer_id , - "id" => ?id, - "block_root" => ?expected_block_root, - "response_type" => ?response_type, - ); + let block_root = lookup.block_root(); + let request_state = R::request_state_mut(lookup).get_state_mut(); - match self.handle_verified_response::( - seen_timestamp, - cx, - BlockProcessType::SingleBlock { id: lookup.id }, - response, - &mut lookup, - ) { - Ok(_) => { - self.single_block_lookups.insert(id, lookup); - } - Err(e) => { + match response { + Ok((response, seen_timestamp)) => { debug!(self.log, - "Single lookup request failed"; - "error" => ?e, - "block_root" => ?expected_block_root, + "Received lookup download success"; + "block_root" => %block_root, + "peer_id" => %peer_id, + "response_type" => ?response_type, ); - } - } - - metrics::set_gauge( - &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, - self.single_block_lookups.len() as i64, - ); - } - - /// Consolidates error handling for `single_lookup_response`. An `Err` here should always mean - /// the lookup is dropped. - fn handle_verified_response>( - &self, - seen_timestamp: Duration, - cx: &mut SyncNetworkContext, - process_type: BlockProcessType, - verified_response: R::VerifiedResponseType, - lookup: &mut SingleBlockLookup, - ) -> Result<(), LookupRequestError> { - let id = lookup.id; - let block_root = lookup.block_root(); - let cached_child = lookup.add_response::(verified_response.clone()); - match cached_child { - CachedChild::Ok(block) => { - // If we have an outstanding parent request for this block, delay sending the response until - // all parent blocks have been processed, otherwise we will fail validation with an - // `UnknownParent`. - let delay_send = match lookup.lookup_type { - LookupType::Parent => false, - LookupType::Current => self.has_pending_parent_request(lookup.block_root()), - }; - - if !delay_send { - R::request_state_mut(lookup) - .get_state_mut() - .on_download_success() - .map_err(LookupRequestError::BadState)?; - self.send_block_for_processing( - block_root, - block, - seen_timestamp, - process_type, - cx, - )? - } - } - CachedChild::DownloadIncomplete => { - R::request_state_mut(lookup) - .get_state_mut() - .on_download_success() - .map_err(LookupRequestError::BadState)?; - // If this was the result of a block request, we can't determine if the block peer - // did anything wrong. If we already had both a block and blobs response processed, - // we should penalize the blobs peer because they did not provide all blobs on the - // initial request. - if lookup.both_components_downloaded() { - lookup.penalize_blob_peer(cx); - lookup.blob_request_state.state.on_download_failure(); - } - lookup.request_block_and_blobs(cx)?; - } - CachedChild::NotRequired => { - R::request_state_mut(lookup) - .get_state_mut() - .on_download_success() - .map_err(LookupRequestError::BadState)?; - - R::send_reconstructed_for_processing( - id, - self, + // Register the download peer here. Once we have received some data over the wire we + // attribute it to this peer for scoring latter regardless of how the request was + // done. + request_state.on_download_success(DownloadResult { + value: response, block_root, - R::verified_to_reconstructed(block_root, verified_response), seen_timestamp, - cx, - )? - } - CachedChild::Err(e) => { - warn!(self.log, "Consistency error in cached block"; - "error" => ?e, - "block_root" => ?block_root - ); - lookup.handle_consistency_failure(cx); - lookup.request_block_and_blobs(cx)?; - } - } - Ok(()) - } - - /// Get a parent block lookup by its ID. This method additionally ensures the `req_counter` - /// matches the current `req_counter` for the lookup. This any stale responses from requests - /// that have been retried are ignored. - fn get_parent_lookup>( - &mut self, - id: SingleLookupReqId, - ) -> Option> { - let mut parent_lookup = if let Some(pos) = self - .parent_lookups - .iter() - .position(|request| request.current_parent_request.id == id.id) - { - self.parent_lookups.remove(pos) - } else { - return None; - }; - - if R::request_state_mut(&mut parent_lookup.current_parent_request) - .get_state() - .is_current_req_counter(id.req_counter) - { - Some(parent_lookup) - } else { - self.parent_lookups.push(parent_lookup); - None - } - } - - /// Process a response received from a parent lookup request. - pub fn parent_lookup_response>( - &mut self, - id: SingleLookupReqId, - peer_id: PeerId, - response: R::VerifiedResponseType, - seen_timestamp: Duration, - cx: &mut SyncNetworkContext, - ) { - let Some(mut parent_lookup) = self.get_parent_lookup::(id) else { - debug!(self.log, "Response for a parent lookup request that was not found"; "peer_id" => %peer_id); - return; - }; - - debug!(self.log, - "Peer returned response for parent lookup"; - "peer_id" => %peer_id , - "id" => ?id, - "block_root" => ?parent_lookup.current_parent_request.block_request_state.requested_block_root, - "response_type" => ?R::response_type(), - ); - - match self.parent_lookup_response_inner::( - peer_id, - response, - seen_timestamp, - cx, - &mut parent_lookup, - ) { - Ok(()) => { - self.parent_lookups.push(parent_lookup); + peer_id, + })?; + // continue_request will send for processing as the request state is AwaitingProcessing + lookup.continue_request::(cx) } Err(e) => { - self.handle_parent_request_error(&mut parent_lookup, cx, e); - } - } - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); - } - - /// Consolidates error handling for `parent_lookup_response`. An `Err` here should always mean - /// the lookup is dropped. - fn parent_lookup_response_inner>( - &mut self, - peer_id: PeerId, - response: R::VerifiedResponseType, - seen_timestamp: Duration, - cx: &mut SyncNetworkContext, - parent_lookup: &mut ParentLookup, - ) -> Result<(), RequestError> { - // check if the parent of this block isn't in the failed cache. If it is, this chain should - // be dropped and the peer downscored. - if let Some(parent_root) = R::get_parent_root(&response) { - if self.failed_chains.contains(&parent_root) { - let request_state = R::request_state_mut(&mut parent_lookup.current_parent_request); - request_state.register_failure_downloading(); - debug!( - self.log, - "Parent chain ignored due to past failure"; - "block" => %parent_root, + debug!(self.log, + "Received lookup download failure"; + "block_root" => %block_root, + "peer_id" => %peer_id, + "response_type" => ?response_type, + "error" => %e, ); - // Add the root block to failed chains - self.failed_chains.insert(parent_lookup.chain_hash()); - cx.report_peer( - peer_id, - PeerAction::MidToleranceError, - "bbroot_failed_chains", - ); - return Ok(()); - } - } - - self.handle_verified_response::( - seen_timestamp, - cx, - BlockProcessType::ParentLookup { - chain_hash: parent_lookup.chain_hash(), - }, - response, - &mut parent_lookup.current_parent_request, - )?; - - Ok(()) - } - - /// Handle logging and peer scoring for `RequestError`s during parent lookup requests. - fn handle_parent_request_error( - &mut self, - parent_lookup: &mut ParentLookup, - cx: &SyncNetworkContext, - e: RequestError, - ) { - debug!(self.log, "Failed to request parent"; "error" => e.as_static()); - match e { - RequestError::SendFailed(_) => { - // Probably shutting down, nothing to do here. Drop the request - } - RequestError::ChainTooLong => { - self.failed_chains.insert(parent_lookup.chain_hash()); - // This indicates faulty peers. - for &peer_id in parent_lookup.all_used_peers() { - cx.report_peer(peer_id, PeerAction::LowToleranceError, e.as_static()) - } - } - RequestError::TooManyAttempts { cannot_process } => { - // We only consider the chain failed if we were unable to process it. - // We could have failed because one peer continually failed to send us - // bad blocks. We still allow other peers to send us this chain. Note - // that peers that do this, still get penalised. - if cannot_process { - self.failed_chains.insert(parent_lookup.chain_hash()); - } - // This indicates faulty peers. - for &peer_id in parent_lookup.all_used_peers() { - cx.report_peer(peer_id, PeerAction::LowToleranceError, e.as_static()) - } - } - RequestError::NoPeers => { - // This happens if the peer disconnects while the block is being - // processed. Drop the request without extra penalty - } - RequestError::BadState(..) => { - warn!(self.log, "Failed to request parent"; "error" => e.as_static()); + request_state.on_download_failure()?; + // continue_request will retry a download as the request state is AwaitingDownload + lookup.continue_request::(cx) } } } /* Error responses */ - pub fn peer_disconnected(&mut self, peer_id: &PeerId, cx: &mut SyncNetworkContext) { + pub fn peer_disconnected(&mut self, peer_id: &PeerId) { /* Check disconnection for single lookups */ self.single_block_lookups.retain(|_, req| { let should_drop_lookup = - req.should_drop_lookup_on_disconnected_peer(peer_id, cx, &self.log); + req.should_drop_lookup_on_disconnected_peer(peer_id ); + + if should_drop_lookup { + debug!(self.log, "Dropping single lookup after peer disconnection"; "block_root" => %req.block_root()); + } !should_drop_lookup }); - - /* Check disconnection for parent lookups */ - while let Some(pos) = self - .parent_lookups - .iter_mut() - .position(|req| req.check_peer_disconnected(peer_id).is_err()) - { - let parent_lookup = self.parent_lookups.remove(pos); - debug!(self.log, "Dropping parent lookup after peer disconnected"; &parent_lookup); - self.request_parent(parent_lookup, cx); - } } - /// An RPC error has occurred during a parent lookup. This function handles this case. - pub fn parent_lookup_failed>( - &mut self, - id: SingleLookupReqId, - peer_id: &PeerId, - cx: &mut SyncNetworkContext, - error: LookupFailure, - ) { - // Only downscore lookup verify errors. RPC errors are downscored in the network handler. - if let LookupFailure::LookupVerifyError(e) = &error { - // Downscore peer even if lookup is not known - self.downscore_on_rpc_error(peer_id, e, cx); - } - - let Some(mut parent_lookup) = self.get_parent_lookup::(id) else { - debug!(self.log, - "RPC failure for a block parent lookup request that was not found"; - "peer_id" => %peer_id, - "error" => %error - ); - return; - }; - R::request_state_mut(&mut parent_lookup.current_parent_request) - .register_failure_downloading(); - debug!(self.log, "Parent lookup block request failed"; - "chain_hash" => %parent_lookup.chain_hash(), "id" => ?id, "error" => %error - ); - - self.request_parent(parent_lookup, cx); - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); - } + /* Processing responses */ - /// An RPC error has occurred during a single lookup. This function handles this case.\ - pub fn single_block_lookup_failed>( + pub fn on_processing_result( &mut self, - id: SingleLookupReqId, - peer_id: &PeerId, + process_type: BlockProcessType, + result: BlockProcessingResult, cx: &mut SyncNetworkContext, - error: LookupFailure, ) { - // Only downscore lookup verify errors. RPC errors are downscored in the network handler. - if let LookupFailure::LookupVerifyError(e) = &error { - // Downscore peer even if lookup is not known - self.downscore_on_rpc_error(peer_id, e, cx); - } - - let log = self.log.clone(); - let Some(mut lookup) = self.get_single_lookup::(id) else { - debug!(log, "Error response to dropped lookup"; "error" => %error); - return; - }; - let block_root = lookup.block_root(); - let request_state = R::request_state_mut(&mut lookup); - let response_type = R::response_type(); - trace!(log, - "Single lookup failed"; - "block_root" => ?block_root, - "error" => %error, - "peer_id" => %peer_id, - "response_type" => ?response_type - ); - let id = id.id; - request_state.register_failure_downloading(); - if let Err(e) = lookup.request_block_and_blobs(cx) { - debug!(self.log, - "Single lookup retry failed"; - "error" => ?e, - "block_root" => ?block_root, - ); - } else { - self.single_block_lookups.insert(id, lookup); + if let Err(e) = match process_type { + BlockProcessType::SingleBlock { id } => { + self.on_processing_result_inner::>(id, result, cx) + } + BlockProcessType::SingleBlob { id } => { + self.on_processing_result_inner::>(id, result, cx) + } + } { + self.on_lookup_request_error(process_type.id(), e, "processing_result"); } - - metrics::set_gauge( - &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, - self.single_block_lookups.len() as i64, - ); } - /* Processing responses */ - - pub fn single_block_component_processed>( + pub fn on_processing_result_inner>( &mut self, - target_id: Id, + lookup_id: SingleLookupId, result: BlockProcessingResult, cx: &mut SyncNetworkContext, - ) { - let Some(mut lookup) = self.single_block_lookups.remove(&target_id) else { - debug!(self.log, "Unknown single block lookup"; "target_id" => target_id); - return; + ) -> Result<(), LookupRequestError> { + let Some(lookup) = self.single_block_lookups.get_mut(&lookup_id) else { + debug!(self.log, "Unknown single block lookup"; "id" => lookup_id); + return Ok(()); }; let block_root = lookup.block_root(); - let request_state = R::request_state_mut(&mut lookup); + let request_state = R::request_state_mut(lookup).get_state_mut(); - let peer_id = match request_state.get_state().processing_peer() { - Ok(peer_id) => peer_id, - Err(e) => { - debug!(self.log, "Attempting to process single block lookup in bad state"; "id" => target_id, "response_type" => ?R::response_type(), "error" => e); - return; - } - }; debug!( self.log, - "Block component processed for lookup"; - "response_type" => ?R::response_type(), + "Received lookup processing result"; + "component" => ?R::response_type(), "block_root" => ?block_root, "result" => ?result, - "id" => target_id, ); let action = match result { BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_)) - | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown { .. }) => { + | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown(_)) => { // Successfully imported - trace!(self.log, "Single block processing succeeded"; "block" => %block_root); - Action::Drop + request_state.on_processing_success()?; + Action::Continue } BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( @@ -759,54 +436,51 @@ impl BlockLookups { )) => { // `on_processing_success` is called here to ensure the request state is updated prior to checking // if both components have been processed. - if R::request_state_mut(&mut lookup) - .get_state_mut() - .on_processing_success() - .is_err() - { - warn!( - self.log, - "Single block processing state incorrect"; - "action" => "dropping single block request" - ); - Action::Drop + request_state.on_processing_success()?; + // If this was the result of a block request, we can't determined if the block peer did anything // wrong. If we already had both a block and blobs response processed, we should penalize the // blobs peer because they did not provide all blobs on the initial request. - } else if lookup.both_components_processed() { - lookup.penalize_blob_peer(cx); - - // Try it again if possible. - lookup.blob_request_state.state.on_processing_failure(); - Action::Retry - } else { - Action::Continue + if lookup.both_components_processed() { + let blob_peer = lookup + .blob_request_state + .state + .on_post_process_validation_failure()?; + cx.report_peer( + blob_peer, + PeerAction::MidToleranceError, + "sent_incomplete_blobs", + ); } + Action::Retry } BlockProcessingResult::Ignored => { // Beacon processor signalled to ignore the block processing result. // This implies that the cpu is overloaded. Drop the request. warn!( self.log, - "Single block processing was ignored, cpu might be overloaded"; - "action" => "dropping single block request" + "Lookup component processing ignored, cpu might be overloaded"; + "component" => ?R::response_type(), ); Action::Drop } BlockProcessingResult::Err(e) => { - let root = lookup.block_root(); - trace!(self.log, "Single block processing failed"; "block" => %root, "error" => %e); match e { BlockError::BeaconChainError(e) => { // Internal error - error!(self.log, "Beacon chain error processing single block"; "block_root" => %root, "error" => ?e); + error!(self.log, "Beacon chain error processing lookup component"; "block_root" => %block_root, "error" => ?e); Action::Drop } BlockError::ParentUnknown(block) => { - let slot = block.slot(); - let parent_root = block.parent_root(); - lookup.add_child_components(block.into()); - Action::ParentUnknown { parent_root, slot } + // Reverts the status of this request to `AwaitingProcessing` holding the + // downloaded data. A future call to `continue_requests` will re-submit it + // once there are no pending parent requests. + // Note: `BlockError::ParentUnknown` is only returned when processing + // blocks, not blobs. + request_state.revert_to_awaiting_processing()?; + Action::ParentUnknown { + parent_root: block.parent_root(), + } } ref e @ BlockError::ExecutionPayloadError(ref epe) if !epe.penalize_peer() => { // These errors indicate that the execution layer is offline @@ -814,35 +488,35 @@ impl BlockLookups { debug!( self.log, "Single block lookup failed. Execution layer is offline / unsynced / misconfigured"; - "root" => %root, + "block_root" => %block_root, "error" => ?e ); Action::Drop } - BlockError::AvailabilityCheck(e) => match e.category() { - AvailabilityCheckErrorCategory::Internal => { - warn!(self.log, "Internal availability check failure"; "root" => %root, "peer_id" => %peer_id, "error" => ?e); - lookup.block_request_state.state.on_download_failure(); - lookup.blob_request_state.state.on_download_failure(); - Action::Retry - } - AvailabilityCheckErrorCategory::Malicious => { - warn!(self.log, "Availability check failure"; "root" => %root, "peer_id" => %peer_id, "error" => ?e); - lookup.handle_availability_check_failure(cx); - Action::Retry - } - }, + BlockError::AvailabilityCheck(e) + if e.category() == AvailabilityCheckErrorCategory::Internal => + { + // There errors indicate internal problems and should not downscore the peer + warn!(self.log, "Internal availability check failure"; "block_root" => %block_root, "error" => ?e); + + // Here we choose *not* to call `on_processing_failure` because this could result in a bad + // lookup state transition. This error invalidates both blob and block requests, and we don't know the + // state of both requests. Blobs may have already successfullly processed for example. + // We opt to drop the lookup instead. + Action::Drop + } other => { - warn!(self.log, "Peer sent invalid block in single block lookup"; "root" => %root, "error" => ?other, "peer_id" => %peer_id); - if let Ok(block_peer) = lookup.block_request_state.state.processing_peer() { - cx.report_peer( - block_peer, - PeerAction::MidToleranceError, - "single_block_failure", - ); + debug!(self.log, "Invalid lookup component"; "block_root" => %block_root, "component" => ?R::response_type(), "error" => ?other); + let peer_id = request_state.on_processing_failure()?; + cx.report_peer( + peer_id, + PeerAction::MidToleranceError, + match R::response_type() { + ResponseType::Block => "lookup_block_processing_failure", + ResponseType::Blob => "lookup_blobs_processing_failure", + }, + ); - lookup.block_request_state.state.on_processing_failure(); - } Action::Retry } } @@ -851,466 +525,87 @@ impl BlockLookups { match action { Action::Retry => { - if let Err(e) = lookup.request_block_and_blobs(cx) { - warn!(self.log, "Single block lookup failed"; "block_root" => %block_root, "error" => ?e); - // Failed with too many retries, drop with noop - self.update_metrics(); - } else { - self.single_block_lookups.insert(target_id, lookup); - } + // Trigger download for all components in case `MissingComponents` failed the blob + // request. Also if blobs are `AwaitingProcessing` and need to be progressed + lookup.continue_requests(cx)?; } - Action::ParentUnknown { parent_root, slot } => { - // TODO: Consider including all peers from the lookup, claiming to know this block, not - // just the one that sent this specific block - self.search_parent(slot, block_root, parent_root, peer_id, cx); - self.single_block_lookups.insert(target_id, lookup); + Action::ParentUnknown { parent_root } => { + let peers = lookup.all_available_peers().cloned().collect::>(); + lookup.set_awaiting_parent(parent_root); + debug!(self.log, "Marking lookup as awaiting parent"; "lookup" => %block_root, "parent_root" => %parent_root); + self.search_parent_of_child(parent_root, block_root, &peers, cx); } Action::Drop => { - // drop with noop + // Drop with noop + self.drop_lookup_and_children(lookup_id); self.update_metrics(); } Action::Continue => { - self.single_block_lookups.insert(target_id, lookup); + // Drop this completed lookup only + self.single_block_lookups.remove(&lookup_id); + self.update_metrics(); + debug!(self.log, "Dropping completed lookup"; "block" => %block_root); + // Block imported, continue the requests of pending child blocks + self.continue_child_lookups(block_root, cx); } } + Ok(()) } - pub fn parent_block_processed( - &mut self, - chain_hash: Hash256, - result: BlockProcessingResult, - cx: &mut SyncNetworkContext, - ) { - let index = self - .parent_lookups - .iter() - .enumerate() - .find(|(_, lookup)| lookup.chain_hash() == chain_hash) - .map(|(index, _)| index); + /// Makes progress on the immediate children of `block_root` + pub fn continue_child_lookups(&mut self, block_root: Hash256, cx: &mut SyncNetworkContext) { + let mut failed_lookups = vec![]; // < need to clean failed lookups latter to re-borrow &mut self - let Some(mut parent_lookup) = index.map(|index| self.parent_lookups.remove(index)) else { - return debug!(self.log, "Process response for a parent lookup request that was not found"; "chain_hash" => %chain_hash); - }; - - match &result { - BlockProcessingResult::Ok(status) => match status { - AvailabilityProcessingStatus::Imported(block_root) => { - debug!(self.log, "Parent block processing succeeded"; &parent_lookup, "block_root" => ?block_root) - } - AvailabilityProcessingStatus::MissingComponents(_, block_root) => { - debug!(self.log, "Parent missing parts, triggering single block lookup"; &parent_lookup,"block_root" => ?block_root) + for (id, lookup) in self.single_block_lookups.iter_mut() { + if lookup.awaiting_parent() == Some(block_root) { + lookup.resolve_awaiting_parent(); + debug!(self.log, "Continuing child lookup"; "parent_root" => %block_root, "block_root" => %lookup.block_root()); + if let Err(e) = lookup.continue_requests(cx) { + failed_lookups.push((*id, e)); } - }, - BlockProcessingResult::Err(e) => { - debug!(self.log, "Parent block processing failed"; &parent_lookup, "error" => %e) - } - BlockProcessingResult::Ignored => { - debug!( - self.log, - "Parent block processing job was ignored"; - "action" => "re-requesting block", - &parent_lookup - ); } } - match result { - BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( - _, - block_root, - )) => { - let expected_block_root = parent_lookup.current_parent_request.block_root(); - if block_root != expected_block_root { - warn!( - self.log, - "Parent block processing result/request root mismatch"; - "request" =>?expected_block_root, - "result" => ?block_root - ); - return; - } - - // We only send parent blocks + blobs for processing together. This means a - // `MissingComponents` response here indicates missing blobs. Therefore we always - // register a blob processing failure here. - parent_lookup - .current_parent_request - .blob_request_state - .state - .on_processing_failure(); - match parent_lookup - .current_parent_request - .request_block_and_blobs(cx) - { - Ok(()) => self.parent_lookups.push(parent_lookup), - Err(e) => self.handle_parent_request_error(&mut parent_lookup, cx, e.into()), - } - } - BlockProcessingResult::Err(BlockError::ParentUnknown(block)) => { - parent_lookup.add_unknown_parent_block(block); - self.request_parent(parent_lookup, cx); - } - BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_)) - | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown(_)) => { - let (chain_hash, blocks, hashes, block_request) = - parent_lookup.parts_for_processing(); - - let blocks = self.add_child_block_to_chain(chain_hash, blocks, cx).into(); - - let process_id = ChainSegmentProcessId::ParentLookup(chain_hash); - - // Check if the beacon processor is available - let Some(beacon_processor) = cx.beacon_processor_if_enabled() else { - return trace!( - self.log, - "Dropping parent chain segment that was ready for processing."; - "chain_hash" => %chain_hash, - ); - }; - - match beacon_processor.send_chain_segment(process_id, blocks) { - Ok(_) => { - self.processing_parent_lookups - .insert(chain_hash, (hashes, block_request)); - } - Err(e) => { - error!( - self.log, - "Failed to send chain segment to processor"; - "error" => ?e - ); - } - } - } - ref e @ BlockProcessingResult::Err(BlockError::ExecutionPayloadError(ref epe)) - if !epe.penalize_peer() => - { - // These errors indicate that the execution layer is offline - // and failed to validate the execution payload. Do not downscore peer. - debug!( - self.log, - "Parent lookup failed. Execution layer is offline"; - "chain_hash" => %chain_hash, - "error" => ?e - ); - } - BlockProcessingResult::Err(outcome) => { - self.handle_parent_block_error(outcome, cx, parent_lookup); - } - BlockProcessingResult::Ignored => { - // Beacon processor signalled to ignore the block processing result. - // This implies that the cpu is overloaded. Drop the request. - warn!( - self.log, - "Parent block processing was ignored, cpu might be overloaded"; - "action" => "dropping parent request" - ); - } + for (id, e) in failed_lookups { + self.on_lookup_request_error(id, e, "continue_child_lookups"); } - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); } - /// Find the child block that spawned the parent lookup request and add it to the chain - /// to send for processing. - fn add_child_block_to_chain( - &mut self, - chain_hash: Hash256, - mut blocks: VecDeque>, - cx: &mut SyncNetworkContext, - ) -> VecDeque> { - // Find the child block that spawned the parent lookup request and add it to the chain - // to send for processing. - if let Some(child_lookup_id) = self - .single_block_lookups - .iter() - .find_map(|(id, lookup)| (lookup.block_root() == chain_hash).then_some(*id)) - { - let Some(child_lookup) = self.single_block_lookups.get_mut(&child_lookup_id) else { - debug!(self.log, "Missing child for parent lookup request"; "child_root" => ?chain_hash); - return blocks; - }; - match child_lookup.get_cached_child_block() { - CachedChild::Ok(rpc_block) => { - // Insert this block at the front. This order is important because we later check - // for linear roots in `filter_chain_segment` - blocks.push_front(rpc_block); - } - CachedChild::DownloadIncomplete => { - trace!(self.log, "Parent lookup chain complete, awaiting child response"; "chain_hash" => ?chain_hash); - } - CachedChild::NotRequired => { - warn!(self.log, "Child not cached for parent lookup"; "chain_hash" => %chain_hash); - } - CachedChild::Err(e) => { - warn!( - self.log, - "Consistency error in child block triggering chain or parent lookups"; - "error" => ?e, - "chain_hash" => ?chain_hash - ); - child_lookup.handle_consistency_failure(cx); - if let Err(e) = child_lookup.request_block_and_blobs(cx) { - debug!(self.log, - "Failed to request block and blobs, dropping lookup"; - "error" => ?e - ); - self.single_block_lookups.remove(&child_lookup_id); - } - } - } - } else { - debug!(self.log, "Missing child for parent lookup request"; "child_root" => ?chain_hash); - }; - blocks - } - - /// Handle the peer scoring, retries, and logging related to a `BlockError` returned from - /// processing a block + blobs for a parent lookup. - fn handle_parent_block_error( - &mut self, - outcome: BlockError<::EthSpec>, - cx: &mut SyncNetworkContext, - mut parent_lookup: ParentLookup, - ) { - // We should always have a block peer. - let block_peer_id = match parent_lookup.block_processing_peer() { - Ok(peer_id) => peer_id, - Err(e) => { - warn!(self.log, "Parent lookup in bad state"; "chain_hash" => %parent_lookup.chain_hash(), "error" => e); - return; - } - }; + /// Drops `dropped_id` lookup and all its children recursively. Lookups awaiting a parent need + /// the parent to make progress to resolve, therefore we must drop them if the parent is + /// dropped. + pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId) { + if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) { + debug!(self.log, "Dropping child lookup"; "id" => ?dropped_id, "block_root" => %dropped_lookup.block_root()); - // We may not have a blob peer, if there were no blobs required for this block. - let blob_peer_id = parent_lookup.blob_processing_peer().ok(); + let child_lookups = self + .single_block_lookups + .iter() + .filter(|(_, lookup)| lookup.awaiting_parent() == Some(dropped_lookup.block_root())) + .map(|(id, _)| *id) + .collect::>(); - // all else we consider the chain a failure and downvote the peer that sent - // us the last block - warn!( - self.log, "Invalid parent chain"; - "score_adjustment" => %PeerAction::MidToleranceError, - "outcome" => ?outcome, - "block_peer_id" => %block_peer_id, - ); - // This currently can be a host of errors. We permit this due to the partial - // ambiguity. - cx.report_peer( - block_peer_id, - PeerAction::MidToleranceError, - "parent_request_err", - ); - // Don't downscore the same peer twice - if let Some(blob_peer_id) = blob_peer_id { - if block_peer_id != blob_peer_id { - debug!( - self.log, "Additionally down-scoring blob peer"; - "score_adjustment" => %PeerAction::MidToleranceError, - "outcome" => ?outcome, - "blob_peer_id" => %blob_peer_id, - ); - cx.report_peer( - blob_peer_id, - PeerAction::MidToleranceError, - "parent_request_err", - ); + for id in child_lookups { + self.drop_lookup_and_children(id); } } - - // Try again if possible - parent_lookup.processing_failed(); - self.request_parent(parent_lookup, cx); } - pub fn parent_chain_processed( + /// Common handler a lookup request error, drop it and update metrics + fn on_lookup_request_error( &mut self, - chain_hash: Hash256, - result: BatchProcessResult, - cx: &mut SyncNetworkContext, + id: SingleLookupId, + error: LookupRequestError, + source: &str, ) { - let Some((_hashes, request)) = self.processing_parent_lookups.remove(&chain_hash) else { - return debug!(self.log, "Chain process response for a parent lookup request that was not found"; "chain_hash" => %chain_hash, "result" => ?result); - }; - - debug!(self.log, "Parent chain processed"; "chain_hash" => %chain_hash, "result" => ?result); - match result { - BatchProcessResult::Success { .. } => { - let Some(id) = self - .single_block_lookups - .iter() - .find_map(|(id, req)| (req.block_root() == chain_hash).then_some(*id)) - else { - warn!(self.log, "No id found for single block lookup"; "chain_hash" => %chain_hash); - return; - }; - - let Some(lookup) = self.single_block_lookups.get_mut(&id) else { - warn!(self.log, "No id found for single block lookup"; "chain_hash" => %chain_hash); - return; - }; - - match lookup.get_cached_child_block() { - CachedChild::Ok(rpc_block) => { - // This is the correct block, send it for processing - if self - .send_block_for_processing( - chain_hash, - rpc_block, - timestamp_now(), - BlockProcessType::SingleBlock { id }, - cx, - ) - .is_err() - { - // Remove to avoid inconsistencies - self.single_block_lookups.remove(&id); - } - } - CachedChild::DownloadIncomplete => { - trace!(self.log, "Parent chain complete, awaiting child response"; "chain_hash" => %chain_hash); - } - CachedChild::NotRequired => { - warn!(self.log, "Child not cached for parent lookup"; "chain_hash" => %chain_hash); - } - CachedChild::Err(e) => { - warn!( - self.log, - "Consistency error in child block triggering parent lookup"; - "chain_hash" => %chain_hash, - "error" => ?e - ); - lookup.handle_consistency_failure(cx); - if let Err(e) = lookup.request_block_and_blobs(cx) { - debug!(self.log, - "Failed to request block and blobs, dropping lookup"; - "error" => ?e - ); - self.single_block_lookups.remove(&id); - } - } - } - } - BatchProcessResult::FaultyFailure { - imported_blocks: _, - penalty, - } => { - self.failed_chains.insert(chain_hash); - for peer_source in request.all_used_peers() { - cx.report_peer(*peer_source, penalty, "parent_chain_failure") - } - } - BatchProcessResult::NonFaultyFailure => { - // We might request this chain again if there is need but otherwise, don't try again - } - } - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); + debug!(self.log, "Dropping lookup on request error"; "id" => id, "source" => source, "error" => ?error); + metrics::inc_counter_vec(&metrics::SYNC_LOOKUP_DROPPED, &[error.as_metric()]); + self.drop_lookup_and_children(id); + self.update_metrics(); } /* Helper functions */ - fn send_block_for_processing( - &self, - block_root: Hash256, - block: RpcBlock, - duration: Duration, - process_type: BlockProcessType, - cx: &SyncNetworkContext, - ) -> Result<(), LookupRequestError> { - match cx.beacon_processor_if_enabled() { - Some(beacon_processor) => { - debug!(self.log, "Sending block for processing"; "block" => ?block_root, "process" => ?process_type); - if let Err(e) = beacon_processor.send_rpc_beacon_block( - block_root, - block, - duration, - process_type, - ) { - error!( - self.log, - "Failed to send sync block to processor"; - "error" => ?e - ); - Err(LookupRequestError::SendFailed( - "beacon processor send failure", - )) - } else { - Ok(()) - } - } - None => { - trace!(self.log, "Dropping block ready for processing. Beacon processor not available"; "block" => %block_root); - Err(LookupRequestError::SendFailed( - "beacon processor unavailable", - )) - } - } - } - - fn send_blobs_for_processing( - &self, - block_root: Hash256, - blobs: FixedBlobSidecarList, - duration: Duration, - process_type: BlockProcessType, - cx: &SyncNetworkContext, - ) -> Result<(), LookupRequestError> { - match cx.beacon_processor_if_enabled() { - Some(beacon_processor) => { - trace!(self.log, "Sending blobs for processing"; "block" => ?block_root, "process_type" => ?process_type); - if let Err(e) = - beacon_processor.send_rpc_blobs(block_root, blobs, duration, process_type) - { - error!( - self.log, - "Failed to send sync blobs to processor"; - "error" => ?e - ); - Err(LookupRequestError::SendFailed( - "beacon processor send failure", - )) - } else { - Ok(()) - } - } - None => { - trace!(self.log, "Dropping blobs ready for processing. Beacon processor not available"; "block_root" => %block_root); - Err(LookupRequestError::SendFailed( - "beacon processor unavailable", - )) - } - } - } - - /// Attempts to request the next unknown parent. This method handles peer scoring and dropping - /// the lookup in the event of failure. - fn request_parent( - &mut self, - mut parent_lookup: ParentLookup, - cx: &mut SyncNetworkContext, - ) { - let response = parent_lookup.request_parent(cx); - - match response { - Err(e) => { - self.handle_parent_request_error(&mut parent_lookup, cx, e); - } - Ok(_) => self.parent_lookups.push(parent_lookup), - } - - // We remove and add back again requests so we want this updated regardless of outcome. - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); - } - /// Drops all the single block requests and returns how many requests were dropped. pub fn drop_single_block_requests(&mut self) -> usize { let requests_to_drop = self.single_block_lookups.len(); @@ -1318,34 +613,10 @@ impl BlockLookups { requests_to_drop } - /// Drops all the parent chain requests and returns how many requests were dropped. - pub fn drop_parent_chain_requests(&mut self) -> usize { - self.parent_lookups.drain(..).len() - } - - pub fn downscore_on_rpc_error( - &self, - peer_id: &PeerId, - error: &LookupVerifyError, - cx: &SyncNetworkContext, - ) { - // Note: logging the report event here with the full error display. The log inside - // `report_peer` only includes a smaller string, like "invalid_data" - let error_str: &'static str = error.into(); - - debug!(self.log, "reporting peer for sync lookup error"; "error" => error_str); - cx.report_peer(*peer_id, PeerAction::LowToleranceError, error_str); - } - pub fn update_metrics(&self) { metrics::set_gauge( &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, self.single_block_lookups.len() as i64, ); - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); } } diff --git a/beacon_node/network/src/sync/block_lookups/parent_chain.rs b/beacon_node/network/src/sync/block_lookups/parent_chain.rs new file mode 100644 index 00000000000..55f2cfe1292 --- /dev/null +++ b/beacon_node/network/src/sync/block_lookups/parent_chain.rs @@ -0,0 +1,198 @@ +use super::single_block_lookup::SingleBlockLookup; +use beacon_chain::BeaconChainTypes; +use std::collections::{HashMap, HashSet}; +use types::Hash256; + +/// Summary of a lookup of which we may not know it's parent_root yet +pub(crate) struct Node { + block_root: Hash256, + parent_root: Option, +} + +impl From<&SingleBlockLookup> for Node { + fn from(value: &SingleBlockLookup) -> Self { + Self { + block_root: value.block_root(), + parent_root: value.awaiting_parent(), + } + } +} + +/// Wrapper around a chain of block roots that have a least one element (tip) +pub(crate) struct NodeChain { + // Parent chain blocks in descending slot order + pub(crate) chain: Vec, + pub(crate) tip: Hash256, +} + +impl NodeChain { + /// Returns the block_root of the oldest ancestor (min slot) of this chain + pub(crate) fn ancestor(&self) -> Hash256 { + self.chain.last().copied().unwrap_or(self.tip) + } + pub(crate) fn len(&self) -> usize { + self.chain.len() + } +} + +/// Given a set of nodes that reference each other, returns a list of chains with unique tips that +/// contain at least two elements. In descending slot order (tip first). +pub(crate) fn compute_parent_chains(nodes: &[Node]) -> Vec { + let mut child_to_parent = HashMap::new(); + let mut parent_to_child = HashMap::>::new(); + for node in nodes { + child_to_parent.insert(node.block_root, node.parent_root); + if let Some(parent_root) = node.parent_root { + parent_to_child + .entry(parent_root) + .or_default() + .push(node.block_root); + } + } + + let mut parent_chains = vec![]; + + // Iterate blocks with no children + for tip in nodes { + let mut block_root = tip.block_root; + if parent_to_child.get(&block_root).is_none() { + let mut chain = vec![]; + + // Resolve chain of blocks + while let Some(parent_root) = child_to_parent.get(&block_root) { + // block_root is a known block that may or may not have a parent root + chain.push(block_root); + if let Some(parent_root) = parent_root { + block_root = *parent_root; + } else { + break; + } + } + + if chain.len() > 1 { + parent_chains.push(NodeChain { + chain, + tip: tip.block_root, + }); + } + } + } + + parent_chains +} + +/// Given a list of node chains, find the oldest node of a specific chain that is not contained in +/// any other chain. +pub(crate) fn find_oldest_fork_ancestor( + parent_chains: Vec, + chain_idx: usize, +) -> Result { + let mut other_blocks = HashSet::new(); + + // Register blocks from other chains + for (i, parent_chain) in parent_chains.iter().enumerate() { + if i != chain_idx { + for block in &parent_chain.chain { + other_blocks.insert(block); + } + } + } + + // Should never happen + let parent_chain = parent_chains + .get(chain_idx) + .ok_or("chain_idx out of bounds")?; + // Find the first block in the target parent chain that is not in other parent chains + // Iterate in ascending slot order + for block in parent_chain.chain.iter().rev() { + if !other_blocks.contains(block) { + return Ok(*block); + } + } + + // No match means that the chain is fully contained within another chain. This should never + // happen, but if that was the case just return the tip + Ok(parent_chain.tip) +} + +#[cfg(test)] +mod tests { + use super::{compute_parent_chains, find_oldest_fork_ancestor, Node}; + use types::Hash256; + + fn h(n: u64) -> Hash256 { + Hash256::from_low_u64_be(n) + } + + fn n(block: u64) -> Node { + Node { + block_root: h(block), + parent_root: None, + } + } + + fn np(parent: u64, block: u64) -> Node { + Node { + block_root: h(block), + parent_root: Some(h(parent)), + } + } + + fn compute_parent_chains_test(nodes: &[Node], expected_chain: Vec>) { + assert_eq!( + compute_parent_chains(nodes) + .iter() + .map(|c| c.chain.clone()) + .collect::>(), + expected_chain + ); + } + + fn find_oldest_fork_ancestor_test(nodes: &[Node], expected: Hash256) { + let chains = compute_parent_chains(nodes); + println!( + "chains {:?}", + chains.iter().map(|c| &c.chain).collect::>() + ); + assert_eq!(find_oldest_fork_ancestor(chains, 0).unwrap(), expected); + } + + #[test] + fn compute_parent_chains_empty_case() { + compute_parent_chains_test(&[], vec![]); + } + + #[test] + fn compute_parent_chains_single_branch() { + compute_parent_chains_test(&[n(0), np(0, 1), np(1, 2)], vec![vec![h(2), h(1), h(0)]]); + } + + #[test] + fn compute_parent_chains_single_branch_with_solo() { + compute_parent_chains_test( + &[n(0), np(0, 1), np(1, 2), np(3, 4)], + vec![vec![h(2), h(1), h(0)]], + ); + } + + #[test] + fn compute_parent_chains_two_forking_branches() { + compute_parent_chains_test( + &[n(0), np(0, 1), np(1, 2), np(1, 3)], + vec![vec![h(2), h(1), h(0)], vec![h(3), h(1), h(0)]], + ); + } + + #[test] + fn compute_parent_chains_two_independent_branches() { + compute_parent_chains_test( + &[n(0), np(0, 1), np(1, 2), n(3), np(3, 4)], + vec![vec![h(2), h(1), h(0)], vec![h(4), h(3)]], + ); + } + + #[test] + fn find_oldest_fork_ancestor_simple_case() { + find_oldest_fork_ancestor_test(&[n(0), np(0, 1), np(1, 2), np(0, 3)], h(1)) + } +} diff --git a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs b/beacon_node/network/src/sync/block_lookups/parent_lookup.rs deleted file mode 100644 index 11eb908953f..00000000000 --- a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs +++ /dev/null @@ -1,227 +0,0 @@ -use super::common::LookupType; -use super::single_block_lookup::{LookupRequestError, SingleBlockLookup}; -use super::{DownloadedBlock, PeerId}; -use crate::sync::{manager::SLOT_IMPORT_TOLERANCE, network_context::SyncNetworkContext}; -use beacon_chain::block_verification_types::AsBlock; -use beacon_chain::block_verification_types::RpcBlock; -use beacon_chain::data_availability_checker::{ChildComponents, DataAvailabilityChecker}; -use beacon_chain::BeaconChainTypes; -use std::collections::VecDeque; -use std::sync::Arc; -use store::Hash256; - -/// How many attempts we try to find a parent of a block before we give up trying. -pub(crate) const PARENT_FAIL_TOLERANCE: u8 = 5; -/// The maximum depth we will search for a parent block. In principle we should have sync'd any -/// canonical chain to its head once the peer connects. A chain should not appear where it's depth -/// is further back than the most recent head slot. -pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2; - -/// Maintains a sequential list of parents to lookup and the lookup's current state. -pub(crate) struct ParentLookup { - /// The root of the block triggering this parent request. - chain_hash: Hash256, - /// The blocks that have currently been downloaded. - downloaded_blocks: Vec>, - /// Request of the last parent. - pub current_parent_request: SingleBlockLookup, -} - -#[derive(Debug, PartialEq, Eq)] -pub(crate) enum RequestError { - SendFailed(&'static str), - ChainTooLong, - /// We witnessed too many failures trying to complete this parent lookup. - TooManyAttempts { - /// We received more failures trying to process the blocks than downloading them - /// from peers. - cannot_process: bool, - }, - NoPeers, - BadState(String), -} - -impl ParentLookup { - pub fn new( - block_root: Hash256, - parent_root: Hash256, - peer_id: PeerId, - da_checker: Arc>, - cx: &mut SyncNetworkContext, - ) -> Self { - let current_parent_request = SingleBlockLookup::new( - parent_root, - Some(ChildComponents::empty(block_root)), - &[peer_id], - da_checker, - cx.next_id(), - LookupType::Parent, - ); - - Self { - chain_hash: block_root, - downloaded_blocks: vec![], - current_parent_request, - } - } - - pub fn contains_block(&self, block_root: &Hash256) -> bool { - self.downloaded_blocks - .iter() - .any(|(root, _d_block)| root == block_root) - } - - pub fn is_for_block(&self, block_root: Hash256) -> bool { - self.current_parent_request.is_for_block(block_root) - } - - /// Attempts to request the next unknown parent. If the request fails, it should be removed. - pub fn request_parent(&mut self, cx: &mut SyncNetworkContext) -> Result<(), RequestError> { - // check to make sure this request hasn't failed - if self.downloaded_blocks.len() + 1 >= PARENT_DEPTH_TOLERANCE { - return Err(RequestError::ChainTooLong); - } - - self.current_parent_request - .request_block_and_blobs(cx) - .map_err(Into::into) - } - - pub fn check_peer_disconnected(&mut self, peer_id: &PeerId) -> Result<(), ()> { - self.current_parent_request - .block_request_state - .state - .check_peer_disconnected(peer_id) - .and_then(|()| { - self.current_parent_request - .blob_request_state - .state - .check_peer_disconnected(peer_id) - }) - } - - pub fn add_unknown_parent_block(&mut self, block: RpcBlock) { - let next_parent = block.parent_root(); - // Cache the block. - let current_root = self.current_parent_request.block_root(); - self.downloaded_blocks.push((current_root, block)); - - // Update the parent request. - self.current_parent_request - .update_requested_parent_block(next_parent) - } - - pub fn block_processing_peer(&self) -> Result { - self.current_parent_request - .block_request_state - .state - .processing_peer() - } - - pub fn blob_processing_peer(&self) -> Result { - self.current_parent_request - .blob_request_state - .state - .processing_peer() - } - - /// Consumes the parent request and destructures it into it's parts. - #[allow(clippy::type_complexity)] - pub fn parts_for_processing( - self, - ) -> ( - Hash256, - VecDeque>, - Vec, - SingleBlockLookup, - ) { - let ParentLookup { - chain_hash, - downloaded_blocks, - current_parent_request, - } = self; - let block_count = downloaded_blocks.len(); - let mut blocks = VecDeque::with_capacity(block_count); - let mut hashes = Vec::with_capacity(block_count); - for (hash, block) in downloaded_blocks.into_iter() { - blocks.push_back(block); - hashes.push(hash); - } - (chain_hash, blocks, hashes, current_parent_request) - } - - /// Get the parent lookup's chain hash. - pub fn chain_hash(&self) -> Hash256 { - self.chain_hash - } - - pub fn processing_failed(&mut self) { - self.current_parent_request - .block_request_state - .state - .on_processing_failure(); - self.current_parent_request - .blob_request_state - .state - .on_processing_failure(); - if let Some(components) = self.current_parent_request.child_components.as_mut() { - components.downloaded_block = None; - components.downloaded_blobs = <_>::default(); - } - } - - pub fn add_peer(&mut self, peer: PeerId) { - self.current_parent_request.add_peer(peer) - } - - /// Adds a list of peers to the parent request. - pub fn add_peers(&mut self, peers: &[PeerId]) { - self.current_parent_request.add_peers(peers) - } - - pub fn all_used_peers(&self) -> impl Iterator + '_ { - self.current_parent_request.all_used_peers() - } -} - -impl From for RequestError { - fn from(e: LookupRequestError) -> Self { - use LookupRequestError as E; - match e { - E::TooManyAttempts { cannot_process } => { - RequestError::TooManyAttempts { cannot_process } - } - E::NoPeers => RequestError::NoPeers, - E::SendFailed(msg) => RequestError::SendFailed(msg), - E::BadState(msg) => RequestError::BadState(msg), - } - } -} - -impl slog::KV for ParentLookup { - fn serialize( - &self, - record: &slog::Record, - serializer: &mut dyn slog::Serializer, - ) -> slog::Result { - serializer.emit_arguments("chain_hash", &format_args!("{}", self.chain_hash))?; - slog::Value::serialize(&self.current_parent_request, record, "parent", serializer)?; - serializer.emit_usize("downloaded_blocks", self.downloaded_blocks.len())?; - slog::Result::Ok(()) - } -} - -impl RequestError { - pub fn as_static(&self) -> &'static str { - match self { - RequestError::SendFailed(e) => e, - RequestError::ChainTooLong => "chain_too_long", - RequestError::TooManyAttempts { cannot_process } if *cannot_process => { - "too_many_processing_attempts" - } - RequestError::TooManyAttempts { cannot_process: _ } => "too_many_downloading_attempts", - RequestError::NoPeers => "no_peers", - RequestError::BadState(..) => "bad_state", - } - } -} diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 49ef1dd15bf..76deb236742 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -1,24 +1,19 @@ -use super::common::LookupType; -use super::PeerId; +use super::common::{AwaitingParent, BlockIsProcessed}; +use super::{BlockComponent, PeerId}; use crate::sync::block_lookups::common::RequestState; use crate::sync::block_lookups::Id; use crate::sync::network_context::SyncNetworkContext; -use beacon_chain::block_verification_types::RpcBlock; -use beacon_chain::data_availability_checker::ChildComponents; -use beacon_chain::data_availability_checker::{ - AvailabilityCheckError, DataAvailabilityChecker, MissingBlobs, -}; use beacon_chain::BeaconChainTypes; use itertools::Itertools; -use lighthouse_network::PeerAction; use rand::seq::IteratorRandom; -use slog::{debug, Logger}; use std::collections::HashSet; use std::fmt::Debug; use std::sync::Arc; +use std::time::Duration; use store::Hash256; use strum::IntoStaticStr; -use types::EthSpec; +use types::blob_sidecar::FixedBlobSidecarList; +use types::{EthSpec, SignedBeaconBlock}; #[derive(Debug, PartialEq, Eq, IntoStaticStr)] pub enum LookupRequestError { @@ -34,38 +29,64 @@ pub enum LookupRequestError { pub struct SingleBlockLookup { pub id: Id, - pub lookup_type: LookupType, - pub block_request_state: BlockRequestState, - pub blob_request_state: BlobRequestState, - pub da_checker: Arc>, - /// Only necessary for requests triggered by an `UnknownBlockParent` or `UnknownBlockParent` - /// because any blocks or blobs without parents won't hit the data availability cache. - pub child_components: Option>, + pub block_request_state: BlockRequestState, + pub blob_request_state: BlobRequestState, + block_root: Hash256, + awaiting_parent: Option, } impl SingleBlockLookup { pub fn new( requested_block_root: Hash256, - child_components: Option>, peers: &[PeerId], - da_checker: Arc>, id: Id, - lookup_type: LookupType, + awaiting_parent: Option, ) -> Self { - let is_deneb = da_checker.is_deneb(); Self { id, - lookup_type, block_request_state: BlockRequestState::new(requested_block_root, peers), - blob_request_state: BlobRequestState::new(requested_block_root, peers, is_deneb), - da_checker, - child_components, + blob_request_state: BlobRequestState::new(requested_block_root, peers), + block_root: requested_block_root, + awaiting_parent, } } /// Get the block root that is being requested. pub fn block_root(&self) -> Hash256 { - self.block_request_state.requested_block_root + self.block_root + } + + pub fn awaiting_parent(&self) -> Option { + self.awaiting_parent + } + + /// Mark this lookup as awaiting a parent lookup from being processed. Meanwhile don't send + /// components for processing. + pub fn set_awaiting_parent(&mut self, parent_root: Hash256) { + self.awaiting_parent = Some(parent_root) + } + + /// Mark this lookup as no longer awaiting a parent lookup. Components can be sent for + /// processing. + pub fn resolve_awaiting_parent(&mut self) { + self.awaiting_parent = None; + } + + /// Maybe insert a verified response into this lookup. Returns true if imported + pub fn add_child_components(&mut self, block_component: BlockComponent) -> bool { + match block_component { + BlockComponent::Block(block) => self + .block_request_state + .state + .insert_verified_response(block), + BlockComponent::Blob(_) => { + // For now ignore single blobs, as the blob request state assumes all blobs are + // attributed to the same peer = the peer serving the remaining blobs. Ignoring this + // block component has a minor effect, causing the node to re-request this blob + // once the parent chain is successfully resolved + false + } + } } /// Check the block root matches the requested block root. @@ -73,16 +94,6 @@ impl SingleBlockLookup { self.block_root() == block_root } - /// Update the requested block, this should only be used in a chain of parent lookups to request - /// the next parent. - pub fn update_requested_parent_block(&mut self, block_root: Hash256) { - self.block_request_state.requested_block_root = block_root; - self.blob_request_state.block_root = block_root; - self.block_request_state.state.state = State::AwaitingDownload; - self.blob_request_state.state.state = State::AwaitingDownload; - self.child_components = Some(ChildComponents::empty(block_root)); - } - /// Get all unique used peers across block and blob requests. pub fn all_used_peers(&self) -> impl Iterator + '_ { self.block_request_state @@ -92,87 +103,44 @@ impl SingleBlockLookup { .unique() } - /// Send the necessary requests for blocks and/or blobs. This will check whether we have - /// downloaded the block and/or blobs already and will not send requests if so. It will also - /// inspect the request state or blocks and blobs to ensure we are not already processing or - /// downloading the block and/or blobs. - pub fn request_block_and_blobs( + /// Get all unique available peers across block and blob requests. + pub fn all_available_peers(&self) -> impl Iterator + '_ { + self.block_request_state + .state + .get_available_peers() + .chain(self.blob_request_state.state.get_available_peers()) + .unique() + } + + pub fn continue_requests( &mut self, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { - let block_already_downloaded = self.block_already_downloaded(); - let blobs_already_downloaded = self.blobs_already_downloaded(); - - if !block_already_downloaded { - self.block_request_state - .build_request_and_send(self.id, self.lookup_type, cx)?; - } - if !blobs_already_downloaded { - self.blob_request_state - .build_request_and_send(self.id, self.lookup_type, cx)?; - } + // TODO: Check what's necessary to download, specially for blobs + self.continue_request::>(cx)?; + self.continue_request::>(cx)?; Ok(()) } - /// Returns a `CachedChild`, which is a wrapper around a `RpcBlock` that is either: - /// - /// 1. `NotRequired`: there is no child caching required for this lookup. - /// 2. `DownloadIncomplete`: Child caching is required, but all components are not yet downloaded. - /// 3. `Ok`: The child is required and we have downloaded it. - /// 4. `Err`: The child is required, but has failed consistency checks. - pub fn get_cached_child_block(&self) -> CachedChild { - if let Some(components) = self.child_components.as_ref() { - let Some(block) = components.downloaded_block.as_ref() else { - return CachedChild::DownloadIncomplete; - }; - - if !self.missing_blob_ids().is_empty() { - return CachedChild::DownloadIncomplete; - } - - match RpcBlock::new_from_fixed( - self.block_request_state.requested_block_root, - block.clone(), - components.downloaded_blobs.clone(), - ) { - Ok(rpc_block) => CachedChild::Ok(rpc_block), - Err(e) => CachedChild::Err(e), - } - } else { - CachedChild::NotRequired - } - } - - /// Accepts a verified response, and adds it to the child components if required. This method - /// returns a `CachedChild` which provides a completed block + blob response if all components have been - /// received, or information about whether the child is required and if it has been downloaded. - pub fn add_response>( + pub fn continue_request>( &mut self, - verified_response: R::VerifiedResponseType, - ) -> CachedChild { - if let Some(child_components) = self.child_components.as_mut() { - R::add_to_child_components(verified_response, child_components); - self.get_cached_child_block() - } else { - CachedChild::NotRequired - } - } - - /// Add a child component to the lookup request. Merges with any existing child components. - pub fn add_child_components(&mut self, components: ChildComponents) { - if let Some(ref mut existing_components) = self.child_components { - let ChildComponents { - block_root: _, - downloaded_block, - downloaded_blobs, - } = components; - if let Some(block) = downloaded_block { - existing_components.merge_block(block); - } - existing_components.merge_blobs(downloaded_blobs); - } else { - self.child_components = Some(components); - } + cx: &mut SyncNetworkContext, + ) -> Result<(), LookupRequestError> { + let id = self.id; + let awaiting_parent = self.awaiting_parent.is_some(); + let downloaded_block_expected_blobs = self + .block_request_state + .state + .peek_downloaded_data() + .map(|block| block.num_expected_blobs()); + let block_is_processed = self.block_request_state.state.is_processed(); + R::request_state_mut(self).continue_request( + id, + AwaitingParent(awaiting_parent), + downloaded_block_expected_blobs, + BlockIsProcessed(block_is_processed), + cx, + ) } /// Add all given peers to both block and blob request states. @@ -188,12 +156,6 @@ impl SingleBlockLookup { } } - /// Returns true if the block has already been downloaded. - pub fn both_components_downloaded(&self) -> bool { - self.block_request_state.state.is_downloaded() - && self.blob_request_state.state.is_downloaded() - } - /// Returns true if the block has already been downloaded. pub fn both_components_processed(&self) -> bool { self.block_request_state.state.is_processed() @@ -203,133 +165,43 @@ impl SingleBlockLookup { /// Checks both the block and blob request states to see if the peer is disconnected. /// /// Returns true if the lookup should be dropped. - pub fn should_drop_lookup_on_disconnected_peer( - &mut self, - peer_id: &PeerId, - cx: &mut SyncNetworkContext, - log: &Logger, - ) -> bool { - let block_root = self.block_root(); - let block_peer_disconnected = self - .block_request_state - .state - .check_peer_disconnected(peer_id) - .is_err(); - let blob_peer_disconnected = self - .blob_request_state - .state - .check_peer_disconnected(peer_id) - .is_err(); - - if block_peer_disconnected || blob_peer_disconnected { - if let Err(e) = self.request_block_and_blobs(cx) { - debug!(log, "Single lookup failed on peer disconnection"; "block_root" => ?block_root, "error" => ?e); - return true; - } - } - false - } - - /// Returns `true` if the block has already been downloaded. - pub(crate) fn block_already_downloaded(&self) -> bool { - if let Some(components) = self.child_components.as_ref() { - components.downloaded_block.is_some() - } else { - self.da_checker.has_block(&self.block_root()) - } - } - - /// Updates the `requested_ids` field of the `BlockRequestState` with the most recent picture - /// of which blobs still need to be requested. Returns `true` if there are no more blobs to - /// request. - pub(crate) fn blobs_already_downloaded(&mut self) -> bool { - if matches!(self.blob_request_state.state.state, State::AwaitingDownload) { - self.update_blobs_request(); - } - self.blob_request_state.requested_ids.is_empty() - } - - /// Updates this request with the most recent picture of which blobs still need to be requested. - pub fn update_blobs_request(&mut self) { - self.blob_request_state.requested_ids = self.missing_blob_ids(); - } - - /// If `child_components` is `Some`, we know block components won't hit the data - /// availability cache, so we don't check its processing cache unless `child_components` - /// is `None`. - pub(crate) fn missing_blob_ids(&self) -> MissingBlobs { - let block_root = self.block_root(); - if let Some(components) = self.child_components.as_ref() { - self.da_checker.get_missing_blob_ids( - block_root, - components.downloaded_block.as_ref().map(|b| b.as_ref()), - &components.downloaded_blobs, - ) - } else { - self.da_checker.get_missing_blob_ids_with(block_root) - } - } + pub fn should_drop_lookup_on_disconnected_peer(&mut self, peer_id: &PeerId) -> bool { + self.block_request_state.state.remove_peer(peer_id); + self.blob_request_state.state.remove_peer(peer_id); - /// Penalizes a blob peer if it should have blobs but didn't return them to us. - pub fn penalize_blob_peer(&mut self, cx: &SyncNetworkContext) { - if let Ok(blob_peer) = self.blob_request_state.state.processing_peer() { - cx.report_peer( - blob_peer, - PeerAction::MidToleranceError, - "single_blob_failure", - ); + if self.all_available_peers().count() == 0 { + return true; } - } - /// This failure occurs on download, so register a failure downloading, penalize the peer - /// and clear the blob cache. - pub fn handle_consistency_failure(&mut self, cx: &SyncNetworkContext) { - self.penalize_blob_peer(cx); - if let Some(cached_child) = self.child_components.as_mut() { - cached_child.clear_blobs(); - } - self.blob_request_state.state.on_download_failure() - } - - /// This failure occurs after processing, so register a failure processing, penalize the peer - /// and clear the blob cache. - pub fn handle_availability_check_failure(&mut self, cx: &SyncNetworkContext) { - self.penalize_blob_peer(cx); - if let Some(cached_child) = self.child_components.as_mut() { - cached_child.clear_blobs(); - } - self.blob_request_state.state.on_processing_failure() + // Note: if the peer disconnected happens to have an on-going request associated with this + // lookup we will receive an RPCError and the lookup will fail. No need to manually retry + // now. + false } } /// The state of the blob request component of a `SingleBlockLookup`. -pub struct BlobRequestState { - /// The latest picture of which blobs still need to be requested. This includes information - /// from both block/blobs downloaded in the network layer and any blocks/blobs that exist in - /// the data availability checker. - pub requested_ids: MissingBlobs, +pub struct BlobRequestState { pub block_root: Hash256, - pub state: SingleLookupRequestState, + pub state: SingleLookupRequestState>, } -impl BlobRequestState { - pub fn new(block_root: Hash256, peer_source: &[PeerId], is_deneb: bool) -> Self { - let default_ids = MissingBlobs::new_without_block(block_root, is_deneb); +impl BlobRequestState { + pub fn new(block_root: Hash256, peer_source: &[PeerId]) -> Self { Self { block_root, - requested_ids: default_ids, state: SingleLookupRequestState::new(peer_source), } } } /// The state of the block request component of a `SingleBlockLookup`. -pub struct BlockRequestState { +pub struct BlockRequestState { pub requested_block_root: Hash256, - pub state: SingleLookupRequestState, + pub state: SingleLookupRequestState>>, } -impl BlockRequestState { +impl BlockRequestState { pub fn new(block_root: Hash256, peers: &[PeerId]) -> Self { Self { requested_block_root: block_root, @@ -338,36 +210,28 @@ impl BlockRequestState { } } -/// This is the status of cached components for a lookup if they are required. It provides information -/// about whether we should send a responses immediately for processing, whether we require more -/// responses, or whether all cached components have been received and the reconstructed block -/// should be sent for processing. -pub enum CachedChild { - /// All child components have been received, this is the reconstructed block, including all. - /// It has been checked for consistency between blobs and block, but no consensus checks have - /// been performed and no kzg verification has been performed. - Ok(RpcBlock), - /// All child components have not yet been received. - DownloadIncomplete, - /// Child components should not be cached, send this directly for processing. - NotRequired, - /// There was an error during consistency checks between block and blobs. - Err(AvailabilityCheckError), +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct DownloadResult { + pub value: T, + pub block_root: Hash256, + pub seen_timestamp: Duration, + pub peer_id: PeerId, } #[derive(Debug, PartialEq, Eq)] -pub enum State { +pub enum State { AwaitingDownload, - Downloading { peer_id: PeerId }, - Processing { peer_id: PeerId }, - Processed { peer_id: PeerId }, + Downloading, + AwaitingProcess(DownloadResult), + Processing(DownloadResult), + Processed(PeerId), } /// Object representing the state of a single block or blob lookup request. #[derive(PartialEq, Eq, Debug)] -pub struct SingleLookupRequestState { +pub struct SingleLookupRequestState { /// State of this request. - state: State, + state: State, /// Peers that should have this block or blob. available_peers: HashSet, /// Peers from which we have requested this block. @@ -376,15 +240,9 @@ pub struct SingleLookupRequestState { failed_processing: u8, /// How many times have we attempted to download this block or blob. failed_downloading: u8, - /// Should be incremented everytime this request is retried. The purpose of this is to - /// differentiate retries of the same block/blob request within a lookup. We currently penalize - /// peers and retry requests prior to receiving the stream terminator. This means responses - /// from a prior request may arrive after a new request has been sent, this counter allows - /// us to differentiate these two responses. - req_counter: u32, } -impl SingleLookupRequestState { +impl SingleLookupRequestState { pub fn new(peers: &[PeerId]) -> Self { let mut available_peers = HashSet::default(); for peer in peers.iter().copied() { @@ -397,74 +255,159 @@ impl SingleLookupRequestState { used_peers: HashSet::default(), failed_processing: 0, failed_downloading: 0, - req_counter: 0, } } - pub fn is_current_req_counter(&self, req_counter: u32) -> bool { - self.req_counter == req_counter - } - pub fn is_awaiting_download(&self) -> bool { - matches!(self.state, State::AwaitingDownload) - } - - pub fn is_downloaded(&self) -> bool { match self.state { - State::AwaitingDownload => false, - State::Downloading { .. } => false, - State::Processing { .. } => true, - State::Processed { .. } => true, + State::AwaitingDownload => true, + State::Downloading { .. } + | State::AwaitingProcess { .. } + | State::Processing { .. } + | State::Processed { .. } => false, } } pub fn is_processed(&self) -> bool { match self.state { - State::AwaitingDownload => false, - State::Downloading { .. } => false, - State::Processing { .. } => false, + State::AwaitingDownload + | State::Downloading { .. } + | State::AwaitingProcess { .. } + | State::Processing { .. } => false, State::Processed { .. } => true, } } - pub fn on_download_start(&mut self, peer_id: PeerId) -> u32 { - self.state = State::Downloading { peer_id }; - self.req_counter += 1; - self.req_counter + pub fn peek_downloaded_data(&self) -> Option<&T> { + match &self.state { + State::AwaitingDownload => None, + State::Downloading { .. } => None, + State::AwaitingProcess(result) => Some(&result.value), + State::Processing(result) => Some(&result.value), + State::Processed { .. } => None, + } + } + + /// Switch to `AwaitingProcessing` if the request is in `AwaitingDownload` state, otherwise + /// ignore. + pub fn insert_verified_response(&mut self, result: DownloadResult) -> bool { + if let State::AwaitingDownload = &self.state { + self.state = State::AwaitingProcess(result); + true + } else { + false + } + } + + /// Switch to `Downloading` if the request is in `AwaitingDownload` state, otherwise returns None. + pub fn on_download_start(&mut self) -> Result<(), LookupRequestError> { + match &self.state { + State::AwaitingDownload => { + self.state = State::Downloading; + Ok(()) + } + other => Err(LookupRequestError::BadState(format!( + "Bad state on_download_start expected AwaitingDownload got {other}" + ))), + } } /// Registers a failure in downloading a block. This might be a peer disconnection or a wrong /// block. - pub fn on_download_failure(&mut self) { - self.failed_downloading = self.failed_downloading.saturating_add(1); - self.state = State::AwaitingDownload; + pub fn on_download_failure(&mut self) -> Result<(), LookupRequestError> { + match &self.state { + State::Downloading => { + self.failed_downloading = self.failed_downloading.saturating_add(1); + self.state = State::AwaitingDownload; + Ok(()) + } + other => Err(LookupRequestError::BadState(format!( + "Bad state on_download_failure expected Downloading got {other}" + ))), + } } - pub fn on_download_success(&mut self) -> Result<(), String> { + pub fn on_download_success( + &mut self, + result: DownloadResult, + ) -> Result<(), LookupRequestError> { match &self.state { - State::Downloading { peer_id } => { - self.state = State::Processing { peer_id: *peer_id }; + State::Downloading => { + self.state = State::AwaitingProcess(result); Ok(()) } - other => Err(format!( - "request bad state, expected downloading got {other}" - )), + other => Err(LookupRequestError::BadState(format!( + "Bad state on_download_success expected Downloading got {other}" + ))), } } - /// Registers a failure in processing a block. - pub fn on_processing_failure(&mut self) { - self.failed_processing = self.failed_processing.saturating_add(1); - self.state = State::AwaitingDownload; + /// Switch to `Processing` if the request is in `AwaitingProcess` state, otherwise returns None. + pub fn maybe_start_processing(&mut self) -> Option> { + // For 2 lines replace state with placeholder to gain ownership of `result` + match &self.state { + State::AwaitingProcess(result) => { + let result = result.clone(); + self.state = State::Processing(result.clone()); + Some(result) + } + _ => None, + } } - pub fn on_processing_success(&mut self) -> Result<(), String> { + /// Revert into `AwaitingProcessing`, if the payload if not invalid and can be submitted for + /// processing latter. + pub fn revert_to_awaiting_processing(&mut self) -> Result<(), LookupRequestError> { match &self.state { - State::Processing { peer_id } => { - self.state = State::Processed { peer_id: *peer_id }; + State::Processing(result) => { + self.state = State::AwaitingProcess(result.clone()); Ok(()) } - other => Err(format!("not in processing state: {}", other).to_string()), + other => Err(LookupRequestError::BadState(format!( + "Bad state on revert_to_awaiting_processing expected Processing got {other}" + ))), + } + } + + /// Registers a failure in processing a block. + pub fn on_processing_failure(&mut self) -> Result { + match &self.state { + State::Processing(result) => { + let peer_id = result.peer_id; + self.failed_processing = self.failed_processing.saturating_add(1); + self.state = State::AwaitingDownload; + Ok(peer_id) + } + other => Err(LookupRequestError::BadState(format!( + "Bad state on_processing_failure expected Processing got {other}" + ))), + } + } + + pub fn on_processing_success(&mut self) -> Result { + match &self.state { + State::Processing(result) => { + let peer_id = result.peer_id; + self.state = State::Processed(peer_id); + Ok(peer_id) + } + other => Err(LookupRequestError::BadState(format!( + "Bad state on_processing_success expected Processing got {other}" + ))), + } + } + + pub fn on_post_process_validation_failure(&mut self) -> Result { + match &self.state { + State::Processed(peer_id) => { + let peer_id = *peer_id; + self.failed_processing = self.failed_processing.saturating_add(1); + self.state = State::AwaitingDownload; + Ok(peer_id) + } + other => Err(LookupRequestError::BadState(format!( + "Bad state on_post_process_validation_failure expected Processed got {other}" + ))), } } @@ -483,31 +426,18 @@ impl SingleLookupRequestState { } /// If a peer disconnects, this request could be failed. If so, an error is returned - pub fn check_peer_disconnected(&mut self, dc_peer_id: &PeerId) -> Result<(), ()> { - self.available_peers.remove(dc_peer_id); - if let State::Downloading { peer_id } = &self.state { - if peer_id == dc_peer_id { - // Peer disconnected before providing a block - self.on_download_failure(); - return Err(()); - } - } - Ok(()) - } - - /// Returns the id peer we downloaded from if we have downloaded a verified block, otherwise - /// returns an error. - pub fn processing_peer(&self) -> Result { - match &self.state { - State::Processing { peer_id } | State::Processed { peer_id } => Ok(*peer_id), - other => Err(format!("not in processing state: {}", other).to_string()), - } + pub fn remove_peer(&mut self, disconnected_peer_id: &PeerId) { + self.available_peers.remove(disconnected_peer_id); } pub fn get_used_peers(&self) -> impl Iterator { self.used_peers.iter() } + pub fn get_available_peers(&self) -> impl Iterator { + self.available_peers.iter() + } + /// Selects a random peer from available peers if any, inserts it in used peers and returns it. pub fn use_rand_available_peer(&mut self) -> Option { let peer_id = self @@ -520,65 +450,25 @@ impl SingleLookupRequestState { } } -impl slog::Value for SingleBlockLookup { - fn serialize( - &self, - _record: &slog::Record, - key: slog::Key, - serializer: &mut dyn slog::Serializer, - ) -> slog::Result { - serializer.emit_str("request", key)?; - serializer.emit_arguments("lookup_type", &format_args!("{:?}", self.lookup_type))?; - serializer.emit_arguments("hash", &format_args!("{}", self.block_root()))?; - serializer.emit_arguments( - "blob_ids", - &format_args!("{:?}", self.blob_request_state.requested_ids.indices()), - )?; - serializer.emit_arguments( - "block_request_state.state", - &format_args!("{:?}", self.block_request_state.state), - )?; - serializer.emit_arguments( - "blob_request_state.state", - &format_args!("{:?}", self.blob_request_state.state), - )?; - slog::Result::Ok(()) - } -} - -impl slog::Value for SingleLookupRequestState { - fn serialize( - &self, - record: &slog::Record, - key: slog::Key, - serializer: &mut dyn slog::Serializer, - ) -> slog::Result { - serializer.emit_str("request_state", key)?; - match &self.state { - State::AwaitingDownload => { - "awaiting_download".serialize(record, "state", serializer)? - } - State::Downloading { peer_id } => { - serializer.emit_arguments("downloading_peer", &format_args!("{}", peer_id))? - } - State::Processing { peer_id } => { - serializer.emit_arguments("processing_peer", &format_args!("{}", peer_id))? - } - State::Processed { .. } => "processed".serialize(record, "state", serializer)?, - } - serializer.emit_u8("failed_downloads", self.failed_downloading)?; - serializer.emit_u8("failed_processing", self.failed_processing)?; - slog::Result::Ok(()) - } -} - -impl std::fmt::Display for State { +impl std::fmt::Display for State { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { State::AwaitingDownload => write!(f, "AwaitingDownload"), State::Downloading { .. } => write!(f, "Downloading"), + State::AwaitingProcess { .. } => write!(f, "AwaitingProcessing"), State::Processing { .. } => write!(f, "Processing"), State::Processed { .. } => write!(f, "Processed"), } } } + +impl LookupRequestError { + pub(crate) fn as_metric(&self) -> &'static str { + match self { + LookupRequestError::TooManyAttempts { .. } => "TooManyAttempts", + LookupRequestError::NoPeers => "NoPeers", + LookupRequestError::SendFailed { .. } => "SendFailed", + LookupRequestError::BadState { .. } => "BadState", + } + } +} diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index 8e3b35ee5d3..302a0489c3b 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -1,14 +1,17 @@ use crate::network_beacon_processor::NetworkBeaconProcessor; use crate::service::RequestId; -use crate::sync::manager::{RequestId as SyncRequestId, SingleLookupReqId, SyncManager}; +use crate::sync::manager::{ + BlockProcessType, RequestId as SyncRequestId, SingleLookupReqId, SyncManager, +}; use crate::sync::SyncMessage; use crate::NetworkMessage; use std::sync::Arc; use super::*; -use crate::sync::block_lookups::common::ResponseType; +use crate::sync::block_lookups::common::{ResponseType, PARENT_DEPTH_TOLERANCE}; +use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::builder::Witness; use beacon_chain::eth1_chain::CachingEth1Backend; use beacon_chain::test_utils::{ @@ -24,7 +27,7 @@ use store::MemoryStore; use tokio::sync::mpsc; use types::{ test_utils::{SeedableRng, XorShiftRng}, - BlobSidecar, ForkName, MinimalEthSpec as E, SignedBeaconBlock, + BlobSidecar, ForkName, MinimalEthSpec as E, SignedBeaconBlock, Slot, }; type T = Witness, E, MemoryStore, MemoryStore>; @@ -72,6 +75,7 @@ struct TestRig { } const D: Duration = Duration::new(0, 0); +const PARENT_FAIL_TOLERANCE: u8 = SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS; impl TestRig { fn test_setup() -> Self { @@ -194,11 +198,15 @@ impl TestRig { self.sync_manager.handle_message(sync_message); } + fn active_single_lookups(&self) -> Vec<(Id, Hash256, Option)> { + self.sync_manager.active_single_lookups() + } + fn active_single_lookups_count(&self) -> usize { self.sync_manager.active_single_lookups().len() } - fn active_parent_lookups(&self) -> Vec { + fn active_parent_lookups(&self) -> Vec> { self.sync_manager.active_parent_lookups() } @@ -206,22 +214,74 @@ impl TestRig { self.sync_manager.active_parent_lookups().len() } - fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { - self.sync_manager.failed_chains_contains(chain_hash) + fn assert_single_lookups_count(&self, count: usize) { + assert_eq!( + self.active_single_lookups_count(), + count, + "Unexpected count of single lookups. Current lookups: {:?}", + self.active_single_lookups() + ); } - #[track_caller] - fn assert_parent_lookups_consistency(&self) { - let hashes = self.active_parent_lookups(); - let expected = hashes.len(); + fn assert_parent_lookups_count(&self, count: usize) { assert_eq!( - expected, - hashes - .into_iter() - .collect::>() - .len(), - "duplicated chain hashes in parent queue" - ) + self.active_parent_lookups_count(), + count, + "Unexpected count of parent lookups. Parent lookups: {:?}. Current lookups: {:?}", + self.active_parent_lookups(), + self.active_single_lookups() + ); + } + + fn assert_lookup_is_active(&self, block_root: Hash256) { + let lookups = self.sync_manager.active_single_lookups(); + if !lookups.iter().any(|l| l.1 == block_root) { + panic!("Expected lookup {block_root} to be the only active: {lookups:?}"); + } + } + + fn insert_failed_chain(&mut self, block_root: Hash256) { + self.sync_manager.insert_failed_chain(block_root); + } + + fn assert_not_failed_chain(&mut self, chain_hash: Hash256) { + let failed_chains = self.sync_manager.get_failed_chains(); + if failed_chains.contains(&chain_hash) { + panic!("failed chains contain {chain_hash:?}: {failed_chains:?}"); + } + } + + fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { + self.sync_manager.get_failed_chains().contains(chain_hash) + } + + fn find_single_lookup_for(&self, block_root: Hash256) -> Id { + self.active_single_lookups() + .iter() + .find(|(_, b, _)| b == &block_root) + .unwrap_or_else(|| panic!("no single block lookup found for {block_root}")) + .0 + } + + fn expect_no_active_single_lookups(&self) { + assert!( + self.active_single_lookups().is_empty(), + "expect no single block lookups: {:?}", + self.active_single_lookups() + ); + } + + fn expect_no_active_lookups(&self) { + self.expect_no_active_single_lookups(); + } + + fn expect_lookups(&self, expected_block_roots: &[Hash256]) { + let block_roots = self + .active_single_lookups() + .iter() + .map(|(_, b, _)| *b) + .collect::>(); + assert_eq!(&block_roots, expected_block_roots); } fn new_connected_peer(&mut self) -> PeerId { @@ -233,27 +293,43 @@ impl TestRig { peer_id } - fn parent_chain_processed(&mut self, chain_hash: Hash256, result: BatchProcessResult) { - self.send_sync_message(SyncMessage::BatchProcessed { - sync_type: ChainSegmentProcessId::ParentLookup(chain_hash), - result, - }) + fn parent_chain_processed_success( + &mut self, + chain_hash: Hash256, + blocks: &[Arc>], + ) { + // Send import events for all pending parent blocks + for _ in blocks { + self.parent_block_processed_imported(chain_hash); + } + // Send final import event for the block that triggered the lookup + self.single_block_component_processed_imported(chain_hash); + } + + /// Locate a parent lookup chain with tip hash `chain_hash` + fn find_oldest_parent_lookup(&self, chain_hash: Hash256) -> Hash256 { + let parent_chain = self + .active_parent_lookups() + .into_iter() + .find(|chain| chain.first() == Some(&chain_hash)) + .unwrap_or_else(|| { + panic!( + "No parent chain with chain_hash {chain_hash:?}: Parent lookups {:?} Single lookups {:?}", + self.active_parent_lookups(), + self.active_single_lookups(), + ) + }); + *parent_chain.last().unwrap() } - fn parent_chain_processed_success(&mut self, chain_hash: Hash256) { - self.parent_chain_processed( - chain_hash, - BatchProcessResult::Success { - was_non_empty: true, - }, - ) + fn parent_block_processed(&mut self, chain_hash: Hash256, result: BlockProcessingResult) { + let id = self.find_single_lookup_for(self.find_oldest_parent_lookup(chain_hash)); + self.single_block_component_processed(id, result); } - fn parent_block_processed(&mut self, chain_hash: Hash256, result: BlockProcessingResult) { - self.send_sync_message(SyncMessage::BlockComponentProcessed { - process_type: BlockProcessType::ParentLookup { chain_hash }, - result, - }); + fn parent_blob_processed(&mut self, chain_hash: Hash256, result: BlockProcessingResult) { + let id = self.find_single_lookup_for(self.find_oldest_parent_lookup(chain_hash)); + self.single_blob_component_processed(id, result); } fn parent_block_processed_imported(&mut self, chain_hash: Hash256) { @@ -263,35 +339,24 @@ impl TestRig { ); } - fn single_block_component_processed( - &mut self, - id: SingleLookupReqId, - result: BlockProcessingResult, - ) { + fn single_block_component_processed(&mut self, id: Id, result: BlockProcessingResult) { self.send_sync_message(SyncMessage::BlockComponentProcessed { - process_type: BlockProcessType::SingleBlock { id: id.id }, + process_type: BlockProcessType::SingleBlock { id }, result, }) } - fn single_block_component_processed_imported( - &mut self, - id: SingleLookupReqId, - block_root: Hash256, - ) { + fn single_block_component_processed_imported(&mut self, block_root: Hash256) { + let id = self.find_single_lookup_for(block_root); self.single_block_component_processed( id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(block_root)), ) } - fn single_blob_component_processed( - &mut self, - id: SingleLookupReqId, - result: BlockProcessingResult, - ) { + fn single_blob_component_processed(&mut self, id: Id, result: BlockProcessingResult) { self.send_sync_message(SyncMessage::BlockComponentProcessed { - process_type: BlockProcessType::SingleBlob { id: id.id }, + process_type: BlockProcessType::SingleBlob { id }, result, }) } @@ -302,6 +367,7 @@ impl TestRig { peer_id: PeerId, beacon_block: Option>>, ) { + self.log("parent_lookup_block_response"); self.send_sync_message(SyncMessage::RpcBlock { request_id: SyncRequestId::SingleBlock { id }, peer_id, @@ -316,6 +382,7 @@ impl TestRig { peer_id: PeerId, beacon_block: Option>>, ) { + self.log("single_lookup_block_response"); self.send_sync_message(SyncMessage::RpcBlock { request_id: SyncRequestId::SingleBlock { id }, peer_id, @@ -330,6 +397,10 @@ impl TestRig { peer_id: PeerId, blob_sidecar: Option>>, ) { + self.log(&format!( + "parent_lookup_blob_response {:?}", + blob_sidecar.as_ref().map(|b| b.index) + )); self.send_sync_message(SyncMessage::RpcBlob { request_id: SyncRequestId::SingleBlob { id }, peer_id, @@ -416,11 +487,7 @@ impl TestRig { peer_id: _, request: Request::BlocksByRoot(request), request_id: RequestId::Sync(SyncRequestId::SingleBlock { id }), - } if id.lookup_type == LookupType::Current - && request.block_roots().to_vec().contains(&for_block) => - { - Some(*id) - } + } if request.block_roots().to_vec().contains(&for_block) => Some(*id), _ => None, }) .unwrap_or_else(|e| panic!("Expected block request for {for_block:?}: {e}")) @@ -433,12 +500,11 @@ impl TestRig { peer_id: _, request: Request::BlobsByRoot(request), request_id: RequestId::Sync(SyncRequestId::SingleBlob { id }), - } if id.lookup_type == LookupType::Current - && request - .blob_ids - .to_vec() - .iter() - .any(|r| r.block_root == for_block) => + } if request + .blob_ids + .to_vec() + .iter() + .any(|r| r.block_root == for_block) => { Some(*id) } @@ -454,11 +520,7 @@ impl TestRig { peer_id: _, request: Request::BlocksByRoot(request), request_id: RequestId::Sync(SyncRequestId::SingleBlock { id }), - } if id.lookup_type == LookupType::Parent - && request.block_roots().to_vec().contains(&for_block) => - { - Some(*id) - } + } if request.block_roots().to_vec().contains(&for_block) => Some(*id), _ => None, }) .unwrap_or_else(|e| panic!("Expected block parent request for {for_block:?}: {e}")) @@ -471,12 +533,11 @@ impl TestRig { peer_id: _, request: Request::BlobsByRoot(request), request_id: RequestId::Sync(SyncRequestId::SingleBlob { id }), - } if id.lookup_type == LookupType::Parent - && request - .blob_ids - .to_vec() - .iter() - .all(|r| r.block_root == for_block) => + } if request + .blob_ids + .to_vec() + .iter() + .all(|r| r.block_root == for_block) => { Some(*id) } @@ -544,9 +605,13 @@ impl TestRig { fn expect_parent_chain_process(&mut self) { match self.beacon_processor_rx.try_recv() { Ok(work) => { - assert_eq!(work.work_type(), beacon_processor::CHAIN_SEGMENT); + // Parent chain sends blocks one by one + assert_eq!(work.work_type(), beacon_processor::RPC_BLOCK); } - other => panic!("Expected chain segment process, found {:?}", other), + other => panic!( + "Expected rpc_block from chain segment process, found {:?}", + other + ), } } @@ -560,24 +625,37 @@ impl TestRig { #[track_caller] fn expect_empty_beacon_processor(&mut self) { + match self.beacon_processor_rx.try_recv() { + Err(mpsc::error::TryRecvError::Empty) => {} // ok + Ok(event) => panic!("expected empty beacon processor: {:?}", event), + other => panic!("unexpected err {:?}", other), + } + } + + #[track_caller] + pub fn expect_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) { + let penalty_msg = self + .pop_received_network_event(|ev| match ev { + NetworkMessage::ReportPeer { + peer_id: p_id, msg, .. + } if p_id == &peer_id => Some(msg.to_owned()), + _ => None, + }) + .unwrap_or_else(|_| { + panic!( + "Expected '{expect_penalty_msg}' penalty for peer {peer_id}: {:#?}", + self.network_rx_queue + ) + }); assert_eq!( - self.beacon_processor_rx.try_recv().expect_err("must err"), - mpsc::error::TryRecvError::Empty + penalty_msg, expect_penalty_msg, + "Unexpected penalty msg for {peer_id}" ); } - #[track_caller] - pub fn expect_penalty(&mut self, peer_id: PeerId) { - self.pop_received_network_event(|ev| match ev { - NetworkMessage::ReportPeer { peer_id: p_id, .. } if p_id == &peer_id => Some(()), - _ => None, - }) - .unwrap_or_else(|_| { - panic!( - "Expected peer penalty for {peer_id}: {:#?}", - self.network_rx_queue - ) - }) + pub fn expect_single_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) { + self.expect_penalty(peer_id, expect_penalty_msg); + self.expect_no_penalty_for(peer_id); } pub fn block_with_parent_and_blobs( @@ -595,19 +673,46 @@ impl TestRig { pub fn rand_blockchain(&mut self, depth: usize) -> Vec>> { let mut blocks = Vec::>>::with_capacity(depth); - while blocks.len() < depth { + for slot in 0..depth { let parent = blocks .last() .map(|b| b.canonical_root()) .unwrap_or_else(Hash256::random); let mut block = self.rand_block(); *block.message_mut().parent_root_mut() = parent; + *block.message_mut().slot_mut() = slot.into(); blocks.push(block.into()); } + self.log(&format!( + "Blockchain dump {:#?}", + blocks + .iter() + .map(|b| format!( + "block {} {} parent {}", + b.slot(), + b.canonical_root(), + b.parent_root() + )) + .collect::>() + )); blocks } } +#[test] +fn stable_rng() { + let mut rng = XorShiftRng::from_seed([42; 16]); + let (block, _) = generate_rand_block_and_blobs::(ForkName::Base, NumBlobs::None, &mut rng); + assert_eq!( + block.canonical_root(), + Hash256::from_slice( + &hex::decode("adfd2e9e7a7976e8ccaed6eaf0257ed36a5b476732fee63ff44966602fd099ec") + .unwrap() + ), + "rng produces a consistent value" + ); +} + #[test] fn test_single_block_lookup_happy_path() { let mut rig = TestRig::test_setup(); @@ -630,9 +735,9 @@ fn test_single_block_lookup_happy_path() { // Send the stream termination. Peer should have not been penalized, and the request removed // after processing. rig.single_lookup_block_response(id, peer_id, None); - rig.single_block_component_processed_imported(id, block_root); + rig.single_block_component_processed_imported(block_root); rig.expect_empty_network(); - assert_eq!(rig.active_single_lookups_count(), 0); + rig.expect_no_active_lookups(); } #[test] @@ -648,7 +753,7 @@ fn test_single_block_lookup_empty_response() { // The peer does not have the block. It should be penalized. rig.single_lookup_block_response(id, peer_id, None); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "NoResponseReturned"); rig.expect_block_lookup_request(block_hash); // it should be retried } @@ -667,7 +772,7 @@ fn test_single_block_lookup_wrong_response() { // Peer sends something else. It should be penalized. let bad_block = rig.rand_block(); rig.single_lookup_block_response(id, peer_id, Some(bad_block.into())); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "UnrequestedBlockRoot"); rig.expect_block_lookup_request(block_hash); // should be retried // Send the stream termination. This should not produce an additional penalty. @@ -717,10 +822,10 @@ fn test_single_block_lookup_becomes_parent_request() { // Send the stream termination. Peer should have not been penalized, and the request moved to a // parent request after processing. rig.single_block_component_processed( - id, + id.lookup_id, BlockError::ParentUnknown(RpcBlock::new_without_blobs(None, block)).into(), ); - assert_eq!(rig.active_single_lookups_count(), 1); + assert_eq!(rig.active_single_lookups_count(), 2); // 2 = current + parent rig.expect_parent_request_block_and_blobs(parent_root); rig.expect_empty_network(); assert_eq!(rig.active_parent_lookups_count(), 1); @@ -748,8 +853,8 @@ fn test_parent_lookup_happy_path() { BlockError::BlockIsAlreadyKnown(block_root).into(), ); rig.expect_parent_chain_process(); - rig.parent_chain_processed_success(block_root); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.parent_chain_processed_success(block_root, &[]); + rig.expect_no_active_lookups(); } #[test] @@ -766,7 +871,7 @@ fn test_parent_lookup_wrong_response() { // Peer sends the wrong block, peer should be penalized and the block re-requested. let bad_block = rig.rand_block(); rig.parent_lookup_block_response(id1, peer_id, Some(bad_block.into())); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "UnrequestedBlockRoot"); let id2 = rig.expect_block_parent_request(parent_root); // Send the stream termination for the first request. This should not produce extra penalties. @@ -780,8 +885,8 @@ fn test_parent_lookup_wrong_response() { // Processing succeeds, now the rest of the chain should be sent for processing. rig.parent_block_processed_imported(block_root); rig.expect_parent_chain_process(); - rig.parent_chain_processed_success(block_root); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.parent_chain_processed_success(block_root, &[]); + rig.expect_no_active_lookups(); } #[test] @@ -797,7 +902,7 @@ fn test_parent_lookup_empty_response() { // Peer sends an empty response, peer should be penalized and the block re-requested. rig.parent_lookup_block_response(id1, peer_id, None); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "NoResponseReturned"); let id2 = rig.expect_block_parent_request(parent_root); // Send the right block this time. @@ -806,9 +911,9 @@ fn test_parent_lookup_empty_response() { // Processing succeeds, now the rest of the chain should be sent for processing. rig.parent_block_processed_imported(block_root); - rig.expect_parent_chain_process(); - rig.parent_chain_processed_success(block_root); - assert_eq!(rig.active_parent_lookups_count(), 0); + + rig.single_block_component_processed_imported(block_root); + rig.expect_no_active_lookups(); } #[test] @@ -833,8 +938,8 @@ fn test_parent_lookup_rpc_failure() { // Processing succeeds, now the rest of the chain should be sent for processing. rig.parent_block_processed_imported(block_root); rig.expect_parent_chain_process(); - rig.parent_chain_processed_success(block_root); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.parent_chain_processed_success(block_root, &[]); + rig.expect_no_active_lookups(); } #[test] @@ -847,7 +952,7 @@ fn test_parent_lookup_too_many_attempts() { // Trigger the request rig.trigger_unknown_parent_block(peer_id, block.into()); - for i in 1..=parent_lookup::PARENT_FAIL_TOLERANCE { + for i in 1..=PARENT_FAIL_TOLERANCE { let id = rig.expect_block_parent_request(parent_root); // Blobs are only requested in the first iteration as this test only retries blocks if rig.after_deneb() && i == 1 { @@ -872,11 +977,11 @@ fn test_parent_lookup_too_many_attempts() { // I'm unsure if this is how it should behave? // rig.parent_lookup_block_response(id, peer_id, None); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "UnrequestedBlockRoot"); } } - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.expect_no_active_lookups(); } #[test] @@ -888,7 +993,7 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { // Trigger the request rig.trigger_unknown_parent_block(peer_id, block.into()); - for i in 1..=parent_lookup::PARENT_FAIL_TOLERANCE { + for i in 1..=PARENT_FAIL_TOLERANCE { assert!(!rig.failed_chains_contains(&block_root)); let id = rig.expect_block_parent_request(parent_root); // Blobs are only requested in the first iteration as this test only retries blocks @@ -902,18 +1007,18 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { // Send a bad block this time. It should be tried again. let bad_block = rig.rand_block(); rig.parent_lookup_block_response(id, peer_id, Some(bad_block.into())); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "UnrequestedBlockRoot"); } } - assert_eq!(rig.active_parent_lookups_count(), 0); assert!(!rig.failed_chains_contains(&block_root)); assert!(!rig.failed_chains_contains(&parent.canonical_root())); + rig.expect_no_active_lookups(); } #[test] fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { - const PROCESSING_FAILURES: u8 = parent_lookup::PARENT_FAIL_TOLERANCE / 2 + 1; + const PROCESSING_FAILURES: u8 = PARENT_FAIL_TOLERANCE / 2 + 1; let mut rig = TestRig::test_setup(); let (parent, block, parent_root, block_root) = rig.rand_block_and_parent(); let peer_id = rig.new_connected_peer(); @@ -922,7 +1027,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { rig.trigger_unknown_parent_block(peer_id, block.into()); rig.log("Fail downloading the block"); - for i in 0..(parent_lookup::PARENT_FAIL_TOLERANCE - PROCESSING_FAILURES) { + for i in 0..(PARENT_FAIL_TOLERANCE - PROCESSING_FAILURES) { let id = rig.expect_block_parent_request(parent_root); // Blobs are only requested in the first iteration as this test only retries blocks if rig.after_deneb() && i == 0 { @@ -933,28 +1038,25 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { } rig.log("Now fail processing a block in the parent request"); - for i in 0..PROCESSING_FAILURES { + for _ in 0..PROCESSING_FAILURES { let id = rig.expect_block_parent_request(parent_root); - // Blobs are only requested in the first iteration as this test only retries blocks - if rig.after_deneb() && i != 0 { - let _ = rig.expect_blob_parent_request(parent_root); - } - assert!(!rig.failed_chains_contains(&block_root)); + // Blobs are only requested in the previous first iteration as this test only retries blocks + rig.assert_not_failed_chain(block_root); // send the right parent but fail processing rig.parent_lookup_block_response(id, peer_id, Some(parent.clone().into())); rig.parent_block_processed(block_root, BlockError::InvalidSignature.into()); rig.parent_lookup_block_response(id, peer_id, None); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "lookup_block_processing_failure"); } - assert!(rig.failed_chains_contains(&block_root)); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.assert_not_failed_chain(block_root); + rig.expect_no_active_lookups(); } #[test] fn test_parent_lookup_too_deep() { let mut rig = TestRig::test_setup(); - let mut blocks = rig.rand_blockchain(parent_lookup::PARENT_DEPTH_TOLERANCE); + let mut blocks = rig.rand_blockchain(PARENT_DEPTH_TOLERANCE); let peer_id = rig.new_connected_peer(); let trigger_block = blocks.pop().unwrap(); @@ -976,19 +1078,59 @@ fn test_parent_lookup_too_deep() { ) } - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "chain_too_long"); assert!(rig.failed_chains_contains(&chain_hash)); } #[test] -fn test_parent_lookup_disconnection() { +fn test_parent_lookup_disconnection_no_peers_left() { let mut rig = TestRig::test_setup(); let peer_id = rig.new_connected_peer(); let trigger_block = rig.rand_block(); rig.trigger_unknown_parent_block(peer_id, trigger_block.into()); rig.peer_disconnected(peer_id); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.expect_no_active_lookups(); +} + +#[test] +fn test_parent_lookup_disconnection_peer_left() { + let mut rig = TestRig::test_setup(); + let peer_ids = (0..2).map(|_| rig.new_connected_peer()).collect::>(); + let trigger_block = rig.rand_block(); + // lookup should have two peers associated with the same block + for peer_id in peer_ids.iter() { + rig.trigger_unknown_parent_block(*peer_id, trigger_block.clone().into()); + } + // Disconnect the first peer only, which is the one handling the request + rig.peer_disconnected(*peer_ids.first().unwrap()); + rig.assert_parent_lookups_count(1); +} + +#[test] +fn test_skip_creating_failed_parent_lookup() { + let mut rig = TestRig::test_setup(); + let (_, block, parent_root, _) = rig.rand_block_and_parent(); + let peer_id = rig.new_connected_peer(); + rig.insert_failed_chain(parent_root); + rig.trigger_unknown_parent_block(peer_id, block.into()); + // Expect single penalty for peer, despite dropping two lookups + rig.expect_single_penalty(peer_id, "failed_chain"); + // Both current and parent lookup should be rejected + rig.expect_no_active_lookups(); +} + +#[test] +fn test_skip_creating_failed_current_lookup() { + let mut rig = TestRig::test_setup(); + let (_, block, parent_root, block_root) = rig.rand_block_and_parent(); + let peer_id = rig.new_connected_peer(); + rig.insert_failed_chain(block_root); + rig.trigger_unknown_parent_block(peer_id, block.into()); + // Expect single penalty for peer + rig.expect_single_penalty(peer_id, "failed_chain"); + // Only the current lookup should be rejected + rig.expect_lookups(&[parent_root]); } #[test] @@ -1015,9 +1157,9 @@ fn test_single_block_lookup_ignored_response() { // after processing. rig.single_lookup_block_response(id, peer_id, None); // Send an Ignored response, the request should be dropped - rig.single_block_component_processed(id, BlockProcessingResult::Ignored); + rig.single_block_component_processed(id.lookup_id, BlockProcessingResult::Ignored); rig.expect_empty_network(); - assert_eq!(rig.active_single_lookups_count(), 0); + rig.expect_no_active_lookups(); } #[test] @@ -1028,8 +1170,10 @@ fn test_parent_lookup_ignored_response() { let peer_id = rig.new_connected_peer(); // Trigger the request - rig.trigger_unknown_parent_block(peer_id, block.into()); + rig.trigger_unknown_parent_block(peer_id, block.clone().into()); let id = rig.expect_parent_request_block_and_blobs(parent_root); + // Note: single block lookup for current `block` does not trigger any request because it does + // not have blobs, and the block is already cached // Peer sends the right block, it should be sent for processing. Peer should not be penalized. rig.parent_lookup_block_response(id, peer_id, Some(parent.into())); @@ -1039,7 +1183,7 @@ fn test_parent_lookup_ignored_response() { // Return an Ignored result. The request should be dropped rig.parent_block_processed(block_root, BlockProcessingResult::Ignored); rig.expect_empty_network(); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.expect_no_active_lookups(); } /// This is a regression test. @@ -1056,7 +1200,7 @@ fn test_same_chain_race_condition() { let chain_hash = trigger_block.canonical_root(); rig.trigger_unknown_parent_block(peer_id, trigger_block.clone()); - for (i, block) in blocks.into_iter().rev().enumerate() { + for (i, block) in blocks.clone().into_iter().rev().enumerate() { let id = rig.expect_parent_request_block_and_blobs(block.canonical_root()); // the block rig.parent_lookup_block_response(id, peer_id, Some(block.clone())); @@ -1066,36 +1210,40 @@ fn test_same_chain_race_condition() { rig.expect_block_process(ResponseType::Block); // the processing result if i + 2 == depth { - // one block was removed + rig.log(&format!("Block {i} was removed and is already known")); rig.parent_block_processed( chain_hash, BlockError::BlockIsAlreadyKnown(block.canonical_root()).into(), ) } else { + rig.log(&format!("Block {i} ParentUnknown")); rig.parent_block_processed( chain_hash, BlockError::ParentUnknown(RpcBlock::new_without_blobs(None, block)).into(), ) } - rig.assert_parent_lookups_consistency(); } - // Processing succeeds, now the rest of the chain should be sent for processing. - rig.expect_parent_chain_process(); - // Try to get this block again while the chain is being processed. We should not request it again. let peer_id = rig.new_connected_peer(); - rig.trigger_unknown_parent_block(peer_id, trigger_block); - rig.assert_parent_lookups_consistency(); + rig.trigger_unknown_parent_block(peer_id, trigger_block.clone()); + rig.expect_empty_network(); - rig.parent_chain_processed_success(chain_hash); - assert_eq!(rig.active_parent_lookups_count(), 0); + // Processing succeeds, now the rest of the chain should be sent for processing. + for block in blocks.iter().skip(1).chain(&[trigger_block]) { + rig.expect_parent_chain_process(); + rig.single_block_component_processed_imported(block.canonical_root()); + } + rig.expect_no_active_lookups(); } mod deneb_only { use super::*; - use beacon_chain::data_availability_checker::AvailabilityCheckError; + use beacon_chain::{ + block_verification_types::RpcBlock, data_availability_checker::AvailabilityCheckError, + }; use ssz_types::VariableList; + use std::collections::VecDeque; struct DenebTester { rig: TestRig, @@ -1232,6 +1380,11 @@ mod deneb_only { }) } + fn log(self, msg: &str) -> Self { + self.rig.log(msg); + self + } + fn parent_block_response(mut self) -> Self { self.rig.expect_empty_network(); let block = self.parent_block.pop_front().unwrap().clone(); @@ -1242,7 +1395,7 @@ mod deneb_only { Some(block), ); - assert_eq!(self.rig.active_parent_lookups_count(), 1); + self.rig.assert_parent_lookups_count(1); self } @@ -1286,18 +1439,22 @@ mod deneb_only { self.rig.expect_empty_network(); // The request should still be active. - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig + .assert_lookup_is_active(self.block.canonical_root()); self } fn blobs_response(mut self) -> Self { + self.rig + .log(&format!("blobs response {}", self.blobs.len())); for blob in &self.blobs { self.rig.single_lookup_blob_response( self.blob_req_id.expect("blob request id"), self.peer_id, Some(blob.clone()), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig + .assert_lookup_is_active(self.block.canonical_root()); } self.rig.single_lookup_blob_response( self.blob_req_id.expect("blob request id"), @@ -1356,29 +1513,68 @@ mod deneb_only { self } + fn block_missing_components(mut self) -> Self { + self.rig.single_block_component_processed( + self.block_req_id.expect("block request id").lookup_id, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + self.block.slot(), + self.block_root, + )), + ); + self.rig.expect_empty_network(); + self.rig.assert_single_lookups_count(1); + self + } + + fn blob_imported(mut self) -> Self { + self.rig.single_blob_component_processed( + self.blob_req_id.expect("blob request id").lookup_id, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), + ); + self.rig.expect_empty_network(); + self.rig.assert_single_lookups_count(0); + self + } + fn block_imported(mut self) -> Self { // Missing blobs should be the request is not removed, the outstanding blobs request should // mean we do not send a new request. self.rig.single_block_component_processed( - self.block_req_id.expect("block request id"), + self.block_req_id + .or(self.blob_req_id) + .expect("block request id") + .lookup_id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), ); self.rig.expect_empty_network(); - assert_eq!(self.rig.active_single_lookups_count(), 0); + self.rig.assert_single_lookups_count(0); self } fn parent_block_imported(mut self) -> Self { + self.rig.log("parent_block_imported"); self.rig.parent_block_processed( self.block_root, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), ); self.rig.expect_empty_network(); - assert_eq!(self.rig.active_parent_lookups_count(), 0); + self.rig.assert_parent_lookups_count(0); + self + } + + fn parent_blob_imported(mut self) -> Self { + self.rig.log("parent_blob_imported"); + self.rig.parent_blob_processed( + self.block_root, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), + ); + self.rig.expect_empty_network(); + self.rig.assert_parent_lookups_count(0); self } fn parent_block_unknown_parent(mut self) -> Self { + self.rig.log("parent_block_unknown_parent"); let block = self.unknown_parent_block.take().unwrap(); // Now this block is the one we expect requests from self.block = block.clone(); @@ -1396,6 +1592,26 @@ mod deneb_only { self } + fn parent_block_missing_components(mut self) -> Self { + let block = self.unknown_parent_block.clone().unwrap(); + self.rig.parent_block_processed( + self.block_root, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + block.slot(), + block.canonical_root(), + )), + ); + self.rig.parent_blob_processed( + self.block_root, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + block.slot(), + block.canonical_root(), + )), + ); + assert_eq!(self.rig.active_parent_lookups_count(), 1); + self + } + fn invalid_parent_processed(mut self) -> Self { self.rig.parent_block_processed( self.block_root, @@ -1407,50 +1623,51 @@ mod deneb_only { fn invalid_block_processed(mut self) -> Self { self.rig.single_block_component_processed( - self.block_req_id.expect("block request id"), + self.block_req_id.expect("block request id").lookup_id, BlockProcessingResult::Err(BlockError::ProposalSignatureInvalid), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig.assert_single_lookups_count(1); self } fn invalid_blob_processed(mut self) -> Self { - self.rig.single_block_component_processed( - self.blob_req_id.expect("blob request id"), + self.rig.log("invalid_blob_processed"); + self.rig.single_blob_component_processed( + self.blob_req_id.expect("blob request id").lookup_id, BlockProcessingResult::Err(BlockError::AvailabilityCheck( AvailabilityCheckError::KzgVerificationFailed, )), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig.assert_single_lookups_count(1); self } fn missing_components_from_block_request(mut self) -> Self { self.rig.single_block_component_processed( - self.block_req_id.expect("block request id"), + self.block_req_id.expect("block request id").lookup_id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( self.slot, self.block_root, )), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig.assert_single_lookups_count(1); self } fn missing_components_from_blob_request(mut self) -> Self { self.rig.single_blob_component_processed( - self.blob_req_id.expect("blob request id"), + self.blob_req_id.expect("blob request id").lookup_id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( self.slot, self.block_root, )), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig.assert_single_lookups_count(1); self } - fn expect_penalty(mut self) -> Self { - self.rig.expect_penalty(self.peer_id); + fn expect_penalty(mut self, expect_penalty_msg: &'static str) -> Self { + self.rig.expect_penalty(self.peer_id, expect_penalty_msg); self } fn expect_no_penalty(mut self) -> Self { @@ -1514,6 +1731,10 @@ mod deneb_only { self.rig.expect_block_process(ResponseType::Block); self } + fn expect_no_active_lookups(self) -> Self { + self.rig.expect_no_active_lookups(); + self + } fn search_parent_dup(mut self) -> Self { self.rig .trigger_unknown_parent_block(self.peer_id, self.block.clone()); @@ -1530,8 +1751,9 @@ mod deneb_only { tester .block_response_triggering_process() .blobs_response() + .block_missing_components() // blobs not yet imported .blobs_response_was_valid() - .block_imported(); + .blob_imported(); // now blobs resolve as imported } #[test] @@ -1541,10 +1763,11 @@ mod deneb_only { }; tester - .blobs_response() - .blobs_response_was_valid() + .blobs_response() // hold blobs for processing .block_response_triggering_process() - .block_imported(); + .block_missing_components() // blobs not yet imported + .blobs_response_was_valid() + .blob_imported(); // now blobs resolve as imported } #[test] @@ -1555,7 +1778,7 @@ mod deneb_only { tester .empty_block_response() - .expect_penalty() + .expect_penalty("NoResponseReturned") .expect_block_request() .expect_no_blobs_request() .empty_blobs_response() @@ -1578,7 +1801,7 @@ mod deneb_only { .missing_components_from_block_request() .empty_blobs_response() .missing_components_from_blob_request() - .expect_penalty() + .expect_penalty("sent_incomplete_blobs") .expect_blobs_request() .expect_no_block_request(); } @@ -1591,11 +1814,10 @@ mod deneb_only { tester .blobs_response() - .blobs_response_was_valid() .expect_no_penalty_and_no_requests() - .missing_components_from_blob_request() + // blobs not sent for processing until the block is processed .empty_block_response() - .expect_penalty() + .expect_penalty("NoResponseReturned") .expect_block_request() .expect_no_blobs_request(); } @@ -1609,11 +1831,11 @@ mod deneb_only { tester .block_response_triggering_process() .invalid_block_processed() - .expect_penalty() + .expect_penalty("lookup_block_processing_failure") .expect_block_request() .expect_no_blobs_request() .blobs_response() - .missing_components_from_blob_request() + // blobs not sent for processing until the block is processed .expect_no_penalty_and_no_requests(); } @@ -1628,7 +1850,7 @@ mod deneb_only { .missing_components_from_block_request() .blobs_response() .invalid_blob_processed() - .expect_penalty() + .expect_penalty("lookup_blobs_processing_failure") .expect_blobs_request() .expect_no_block_request(); } @@ -1645,7 +1867,7 @@ mod deneb_only { .invalidate_blobs_too_few() .blobs_response() .missing_components_from_blob_request() - .expect_penalty() + .expect_penalty("sent_incomplete_blobs") .expect_blobs_request() .expect_no_block_request(); } @@ -1660,7 +1882,7 @@ mod deneb_only { .block_response_triggering_process() .invalidate_blobs_too_many() .blobs_response() - .expect_penalty() + .expect_penalty("DuplicateData") .expect_blobs_request() .expect_no_block_request(); } @@ -1673,8 +1895,7 @@ mod deneb_only { tester .invalidate_blobs_too_few() - .blobs_response() - .blobs_response_was_valid() + .blobs_response() // blobs are not sent until the block is processed .expect_no_penalty_and_no_requests() .block_response_triggering_process(); } @@ -1688,7 +1909,7 @@ mod deneb_only { tester .invalidate_blobs_too_many() .blobs_response() - .expect_penalty() + .expect_penalty("DuplicateData") .expect_blobs_request() .expect_no_block_request() .block_response_triggering_process(); @@ -1729,9 +1950,8 @@ mod deneb_only { .parent_blob_response() .expect_block_process() .invalid_parent_processed() - .expect_penalty() + .expect_penalty("lookup_block_processing_failure") .expect_parent_block_request() - .expect_parent_blobs_request() .expect_empty_beacon_processor(); } @@ -1780,7 +2000,7 @@ mod deneb_only { tester .empty_parent_block_response() - .expect_penalty() + .expect_penalty("NoResponseReturned") .expect_parent_block_request() .expect_no_blobs_request() .parent_blob_response() @@ -1802,15 +2022,20 @@ mod deneb_only { tester .blobs_response() + .log(" Return empty blobs for parent, block errors with missing components, downscore") .empty_parent_blobs_response() .expect_no_penalty_and_no_requests() .parent_block_response() - .expect_penalty() + .parent_block_missing_components() + .expect_penalty("sent_incomplete_blobs") + .log("Re-request parent blobs, succeed and import parent") .expect_parent_blobs_request() .parent_blob_response() .expect_block_process() - .parent_block_imported() - .expect_parent_chain_process(); + .parent_blob_imported() + .log("resolve original block trigger blobs request and import") + .block_imported() + .expect_no_active_lookups(); } #[test] @@ -1848,9 +2073,9 @@ mod deneb_only { .parent_blob_response() .expect_block_process() .invalid_parent_processed() - .expect_penalty() + .expect_penalty("lookup_block_processing_failure") .expect_parent_block_request() - .expect_parent_blobs_request() + // blobs are not sent until block is processed .expect_empty_beacon_processor(); } @@ -1868,7 +2093,10 @@ mod deneb_only { .expect_block_process() .parent_block_imported() .block_response() - .expect_parent_chain_process(); + .blobs_response() + .expect_parent_chain_process() + .block_imported() + .expect_no_active_lookups(); } #[test] @@ -1886,7 +2114,10 @@ mod deneb_only { .parent_blob_response() .expect_block_process() .parent_block_imported() - .expect_parent_chain_process(); + .blobs_response() + .expect_parent_chain_process() + .block_imported() + .expect_no_active_lookups(); } #[test] @@ -1899,7 +2130,7 @@ mod deneb_only { tester .empty_parent_block_response() - .expect_penalty() + .expect_penalty("NoResponseReturned") .expect_parent_block_request() .expect_no_blobs_request() .parent_blob_response() @@ -1907,8 +2138,10 @@ mod deneb_only { .parent_block_response() .expect_block_process() .parent_block_imported() + .blobs_response() .block_response() - .expect_parent_chain_process(); + .block_imported() + .expect_no_active_lookups(); } #[test] @@ -1921,15 +2154,21 @@ mod deneb_only { tester .block_response() + .log(" Return empty blobs for parent, block errors with missing components, downscore") .empty_parent_blobs_response() .expect_no_penalty_and_no_requests() .parent_block_response() - .expect_penalty() + .parent_block_missing_components() + .expect_penalty("sent_incomplete_blobs") + .log("Re-request parent blobs, succeed and import parent") .expect_parent_blobs_request() .parent_blob_response() .expect_block_process() - .parent_block_imported() - .expect_parent_chain_process(); + .parent_blob_imported() + .log("resolve original block trigger blobs request and import") + .blobs_response() + .block_imported() + .expect_no_active_lookups(); } #[test] diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 15b96c52b10..08fde6dcc8f 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -34,7 +34,6 @@ //! search for the block and subsequently search for parents if needed. use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart}; -use super::block_lookups::common::LookupType; use super::block_lookups::BlockLookups; use super::network_context::{BlockOrBlob, RangeRequestId, RpcEvent, SyncNetworkContext}; use super::peer_sync_info::{remote_sync_type, PeerSyncType}; @@ -42,11 +41,13 @@ use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH}; use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor}; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; -use crate::sync::block_lookups::{BlobRequestState, BlockRequestState}; +use crate::sync::block_lookups::{ + BlobRequestState, BlockComponent, BlockRequestState, DownloadResult, +}; use crate::sync::block_sidecar_coupling::BlocksAndBlobsRequestInfo; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::block_verification_types::RpcBlock; -use beacon_chain::data_availability_checker::ChildComponents; +use beacon_chain::validator_monitor::timestamp_now; use beacon_chain::{ AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError, EngineState, }; @@ -56,12 +57,10 @@ use lighthouse_network::types::{NetworkGlobals, SyncState}; use lighthouse_network::SyncInfo; use lighthouse_network::{PeerAction, PeerId}; use slog::{crit, debug, error, info, trace, warn, Logger}; -use std::ops::IndexMut; use std::ops::Sub; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; -use types::blob_sidecar::FixedBlobSidecarList; use types::{BlobSidecar, EthSpec, Hash256, SignedBeaconBlock, Slot}; /// The number of slots ahead of us that is allowed before requesting a long-range (batch) Sync @@ -77,9 +76,8 @@ pub type Id = u32; #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] pub struct SingleLookupReqId { - pub id: Id, - pub req_counter: Id, - pub lookup_type: LookupType, + pub lookup_id: Id, + pub req_id: Id, } /// Id of rpc requests sent by sync to the network. @@ -153,7 +151,14 @@ pub enum SyncMessage { pub enum BlockProcessType { SingleBlock { id: Id }, SingleBlob { id: Id }, - ParentLookup { chain_hash: Hash256 }, +} + +impl BlockProcessType { + pub fn id(&self) -> Id { + match self { + BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => *id, + } + } } #[derive(Debug)] @@ -254,27 +259,33 @@ impl SyncManager { ), range_sync: RangeSync::new(beacon_chain.clone(), log.clone()), backfill_sync: BackFillSync::new(beacon_chain.clone(), network_globals, log.clone()), - block_lookups: BlockLookups::new( - beacon_chain.data_availability_checker.clone(), - log.clone(), - ), + block_lookups: BlockLookups::new(log.clone()), log: log.clone(), } } #[cfg(test)] - pub(crate) fn active_single_lookups(&self) -> Vec { + pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256, Option)> { self.block_lookups.active_single_lookups() } #[cfg(test)] - pub(crate) fn active_parent_lookups(&self) -> Vec { - self.block_lookups.active_parent_lookups() + pub(crate) fn active_parent_lookups(&self) -> Vec> { + self.block_lookups + .active_parent_lookups() + .iter() + .map(|c| c.chain.clone()) + .collect() + } + + #[cfg(test)] + pub(crate) fn get_failed_chains(&mut self) -> Vec { + self.block_lookups.get_failed_chains() } #[cfg(test)] - pub(crate) fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { - self.block_lookups.failed_chains_contains(chain_hash) + pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { + self.block_lookups.insert_failed_chain(block_root); } fn network_globals(&self) -> &NetworkGlobals { @@ -353,8 +364,7 @@ impl SyncManager { fn peer_disconnect(&mut self, peer_id: &PeerId) { self.range_sync.peer_disconnect(&mut self.network, peer_id); - self.block_lookups - .peer_disconnected(peer_id, &mut self.network); + self.block_lookups.peer_disconnected(peer_id); // Regardless of the outcome, we update the sync status. let _ = self .backfill_sync @@ -578,27 +588,30 @@ impl SyncManager { block_root, parent_root, block_slot, - block.into(), + BlockComponent::Block(DownloadResult { + value: block.block_cloned(), + block_root, + seen_timestamp: timestamp_now(), + peer_id, + }), ); } SyncMessage::UnknownParentBlob(peer_id, blob) => { let blob_slot = blob.slot(); let block_root = blob.block_root(); let parent_root = blob.block_parent_root(); - let blob_index = blob.index; - if blob_index >= T::EthSpec::max_blobs_per_block() as u64 { - warn!(self.log, "Peer sent blob with invalid index"; "index" => blob_index, "peer_id" => %peer_id); - return; - } - let mut blobs = FixedBlobSidecarList::default(); - *blobs.index_mut(blob_index as usize) = Some(blob); debug!(self.log, "Received unknown parent blob message"; "block_root" => %block_root, "parent_root" => %parent_root); self.handle_unknown_parent( peer_id, block_root, parent_root, blob_slot, - ChildComponents::new(block_root, None, Some(blobs)), + BlockComponent::Blob(DownloadResult { + value: blob, + block_root, + seen_timestamp: timestamp_now(), + peer_id, + }), ); } SyncMessage::UnknownBlockHashFromAttestation(peer_id, block_root) => { @@ -617,25 +630,9 @@ impl SyncManager { SyncMessage::BlockComponentProcessed { process_type, result, - } => match process_type { - BlockProcessType::SingleBlock { id } => self - .block_lookups - .single_block_component_processed::( - id, - result, - &mut self.network, - ), - BlockProcessType::SingleBlob { id } => self - .block_lookups - .single_block_component_processed::( - id, - result, - &mut self.network, - ), - BlockProcessType::ParentLookup { chain_hash } => self - .block_lookups - .parent_block_processed(chain_hash, result, &mut self.network), - }, + } => self + .block_lookups + .on_processing_result(process_type, result, &mut self.network), SyncMessage::BatchProcessed { sync_type, result } => match sync_type { ChainSegmentProcessId::RangeBatchId(chain_id, epoch) => { self.range_sync.handle_block_process_result( @@ -661,9 +658,6 @@ impl SyncManager { } } } - ChainSegmentProcessId::ParentLookup(chain_hash) => self - .block_lookups - .parent_chain_processed(chain_hash, result, &mut self.network), }, } } @@ -674,23 +668,16 @@ impl SyncManager { block_root: Hash256, parent_root: Hash256, slot: Slot, - child_components: ChildComponents, + block_component: BlockComponent, ) { match self.should_search_for_block(Some(slot), &peer_id) { Ok(_) => { - self.block_lookups.search_parent( - slot, + self.block_lookups.search_child_and_parent( block_root, - parent_root, + block_component, peer_id, &mut self.network, ); - self.block_lookups.search_child_block( - block_root, - child_components, - &[peer_id], - &mut self.network, - ); } Err(reason) => { debug!(self.log, "Ignoring unknown parent request"; "block_root" => %block_root, "parent_root" => %parent_root, "reason" => reason); @@ -702,7 +689,7 @@ impl SyncManager { match self.should_search_for_block(None, &peer_id) { Ok(_) => { self.block_lookups - .search_block(block_root, &[peer_id], &mut self.network); + .search_unknown_block(block_root, &[peer_id], &mut self.network); } Err(reason) => { debug!(self.log, "Ignoring unknown block request"; "block_root" => %block_root, "reason" => reason); @@ -774,11 +761,6 @@ impl SyncManager { let dropped_single_blocks_requests = self.block_lookups.drop_single_block_requests(); - // - Parent lookups: - // Disabled while in this state. We drop current requests and don't search for new - // blocks. - let dropped_parent_chain_requests = self.block_lookups.drop_parent_chain_requests(); - // - Range: // We still send found peers to range so that it can keep track of potential chains // with respect to our current peers. Range will stop processing batches in the @@ -787,10 +769,9 @@ impl SyncManager { // - Backfill: Not affected by ee states, nothing to do. // Some logs. - if dropped_single_blocks_requests > 0 || dropped_parent_chain_requests > 0 { + if dropped_single_blocks_requests > 0 { debug!(self.log, "Execution engine not online. Dropping active requests."; "dropped_single_blocks_requests" => dropped_single_blocks_requests, - "dropped_parent_chain_requests" => dropped_parent_chain_requests, ); } } @@ -829,46 +810,13 @@ impl SyncManager { block: RpcEvent>>, ) { if let Some(resp) = self.network.on_single_block_response(id, block) { - match resp { - Ok((block, seen_timestamp)) => match id.lookup_type { - LookupType::Current => self - .block_lookups - .single_lookup_response::( - id, - peer_id, - block, - seen_timestamp, - &mut self.network, - ), - LookupType::Parent => self - .block_lookups - .parent_lookup_response::( - id, - peer_id, - block, - seen_timestamp, - &mut self.network, - ), - }, - Err(error) => match id.lookup_type { - LookupType::Current => self - .block_lookups - .single_block_lookup_failed::( - id, - &peer_id, - &mut self.network, - error, - ), - LookupType::Parent => self - .block_lookups - .parent_lookup_failed::( - id, - &peer_id, - &mut self.network, - error, - ), - }, - } + self.block_lookups + .on_download_response::>( + id.lookup_id, + peer_id, + resp, + &mut self.network, + ) } } @@ -904,47 +852,13 @@ impl SyncManager { blob: RpcEvent>>, ) { if let Some(resp) = self.network.on_single_blob_response(id, blob) { - match resp { - Ok((blobs, seen_timestamp)) => match id.lookup_type { - LookupType::Current => self - .block_lookups - .single_lookup_response::( - id, - peer_id, - blobs, - seen_timestamp, - &mut self.network, - ), - LookupType::Parent => self - .block_lookups - .parent_lookup_response::( - id, - peer_id, - blobs, - seen_timestamp, - &mut self.network, - ), - }, - - Err(error) => match id.lookup_type { - LookupType::Current => self - .block_lookups - .single_block_lookup_failed::( - id, - &peer_id, - &mut self.network, - error, - ), - LookupType::Parent => { - self.block_lookups.parent_lookup_failed::( - id, - &peer_id, - &mut self.network, - error, - ) - } - }, - } + self.block_lookups + .on_download_response::>( + id.lookup_id, + peer_id, + resp, + &mut self.network, + ) } } diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index fc91270c1dc..860192db684 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -4,11 +4,12 @@ use self::requests::{ActiveBlobsByRootRequest, ActiveBlocksByRootRequest}; pub use self::requests::{BlobsByRootSingleBlockRequest, BlocksByRootSingleRequest}; use super::block_sidecar_coupling::BlocksAndBlobsRequestInfo; -use super::manager::{Id, RequestId as SyncRequestId}; +use super::manager::{BlockProcessType, Id, RequestId as SyncRequestId}; use super::range_sync::{BatchId, ByRangeRequestType, ChainId}; use crate::network_beacon_processor::NetworkBeaconProcessor; use crate::service::{NetworkMessage, RequestId}; use crate::status::ToStatusMessage; +use crate::sync::block_lookups::SingleLookupId; use crate::sync::manager::SingleLookupReqId; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::validator_monitor::timestamp_now; @@ -18,13 +19,13 @@ use lighthouse_network::rpc::methods::BlobsByRangeRequest; use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError}; use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource, Request}; pub use requests::LookupVerifyError; -use slog::{debug, trace, warn}; +use slog::{debug, error, trace, warn}; use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; use types::blob_sidecar::FixedBlobSidecarList; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; +use types::{BlobSidecar, EthSpec, Hash256, SignedBeaconBlock}; mod requests; @@ -52,7 +53,7 @@ pub enum RpcEvent { RPCError(RPCError), } -pub type RpcProcessingResult = Option>; +pub type RpcProcessingResult = Result<(T, Duration), LookupFailure>; pub enum LookupFailure { RpcError(RPCError), @@ -297,10 +298,15 @@ impl SyncNetworkContext { pub fn block_lookup_request( &mut self, - id: SingleLookupReqId, + lookup_id: SingleLookupId, peer_id: PeerId, request: BlocksByRootSingleRequest, - ) -> Result<(), &'static str> { + ) -> Result { + let id = SingleLookupReqId { + lookup_id, + req_id: self.next_id(), + }; + debug!( self.log, "Sending BlocksByRoot Request"; @@ -319,25 +325,76 @@ impl SyncNetworkContext { self.blocks_by_root_requests .insert(id, ActiveBlocksByRootRequest::new(request)); - Ok(()) + Ok(true) } + /// Request necessary blobs for `block_root`. Requests only the necessary blobs by checking: + /// - If we have a downloaded but not yet processed block + /// - If the da_checker has a pending block + /// - If the da_checker has pending blobs from gossip + /// + /// Returns false if no request was made, because we don't need to fetch (more) blobs. pub fn blob_lookup_request( &mut self, - id: SingleLookupReqId, + lookup_id: SingleLookupId, peer_id: PeerId, - request: BlobsByRootSingleBlockRequest, - ) -> Result<(), &'static str> { + block_root: Hash256, + downloaded_block_expected_blobs: Option, + ) -> Result { + let expected_blobs = downloaded_block_expected_blobs + .or_else(|| { + self.chain + .data_availability_checker + .num_expected_blobs(&block_root) + }) + .unwrap_or_else(|| { + // If we don't about the block being requested, attempt to fetch all blobs + if self + .chain + .data_availability_checker + .da_check_required_for_current_epoch() + { + T::EthSpec::max_blobs_per_block() + } else { + 0 + } + }); + + let imported_blob_indexes = self + .chain + .data_availability_checker + .imported_blob_indexes(&block_root) + .unwrap_or_default(); + // Include only the blob indexes not yet imported (received through gossip) + let indices = (0..expected_blobs as u64) + .filter(|index| !imported_blob_indexes.contains(index)) + .collect::>(); + + if indices.is_empty() { + // No blobs required, do not issue any request + return Ok(false); + } + + let id = SingleLookupReqId { + lookup_id, + req_id: self.next_id(), + }; + debug!( self.log, "Sending BlobsByRoot Request"; "method" => "BlobsByRoot", - "block_root" => ?request.block_root, - "blob_indices" => ?request.indices, + "block_root" => ?block_root, + "blob_indices" => ?indices, "peer" => %peer_id, "id" => ?id ); + let request = BlobsByRootSingleBlockRequest { + block_root, + indices, + }; + self.send_network_msg(NetworkMessage::SendRequest { peer_id, request: Request::BlobsByRoot(request.clone().into_request(&self.chain.spec)), @@ -347,7 +404,7 @@ impl SyncNetworkContext { self.blobs_by_root_requests .insert(id, ActiveBlobsByRootRequest::new(request)); - Ok(()) + Ok(true) } pub fn is_execution_engine_online(&self) -> bool { @@ -458,7 +515,7 @@ impl SyncNetworkContext { &mut self, request_id: SingleLookupReqId, block: RpcEvent>>, - ) -> RpcProcessingResult>> { + ) -> Option>>> { let Entry::Occupied(mut request) = self.blocks_by_root_requests.entry(request_id) else { return None; }; @@ -489,7 +546,7 @@ impl SyncNetworkContext { &mut self, request_id: SingleLookupReqId, blob: RpcEvent>>, - ) -> RpcProcessingResult> { + ) -> Option>> { let Entry::Occupied(mut request) = self.blobs_by_root_requests.entry(request_id) else { return None; }; @@ -520,6 +577,69 @@ impl SyncNetworkContext { } }) } + + pub fn send_block_for_processing( + &self, + block_root: Hash256, + block: RpcBlock, + duration: Duration, + process_type: BlockProcessType, + ) -> Result<(), &'static str> { + match self.beacon_processor_if_enabled() { + Some(beacon_processor) => { + debug!(self.log, "Sending block for processing"; "block" => ?block_root, "process" => ?process_type); + if let Err(e) = beacon_processor.send_rpc_beacon_block( + block_root, + block, + duration, + process_type, + ) { + error!( + self.log, + "Failed to send sync block to processor"; + "error" => ?e + ); + Err("beacon processor send failure") + } else { + Ok(()) + } + } + None => { + trace!(self.log, "Dropping block ready for processing. Beacon processor not available"; "block" => %block_root); + Err("beacon processor unavailable") + } + } + } + + pub fn send_blobs_for_processing( + &self, + block_root: Hash256, + blobs: FixedBlobSidecarList, + duration: Duration, + process_type: BlockProcessType, + ) -> Result<(), &'static str> { + match self.beacon_processor_if_enabled() { + Some(beacon_processor) => { + debug!(self.log, "Sending blobs for processing"; "block" => ?block_root, "process_type" => ?process_type); + if let Err(e) = + beacon_processor.send_rpc_blobs(block_root, blobs, duration, process_type) + { + error!( + self.log, + "Failed to send sync blobs to processor"; + "error" => ?e + ); + Err("beacon processor send failure") + } else { + Ok(()) + } + } + None => { + trace!(self.log, "Dropping blobs ready for processing. Beacon processor not available"; "block_root" => %block_root); + Err("beacon processor unavailable") + } + } + } } fn to_fixed_blob_sidecar_list( diff --git a/common/lru_cache/src/time.rs b/common/lru_cache/src/time.rs index 0b2fd835687..890bf47eb44 100644 --- a/common/lru_cache/src/time.rs +++ b/common/lru_cache/src/time.rs @@ -166,6 +166,12 @@ where self.map.contains(key) } + /// List known keys + pub fn keys(&mut self) -> impl Iterator { + self.update(); + self.map.iter() + } + /// Shrink the mappings to fit the current size. pub fn shrink_to_fit(&mut self) { self.map.shrink_to_fit();