From e0b862bfcbdaaa9d6be7875cb23c63322e153618 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Tue, 23 Apr 2024 01:14:26 +0900 Subject: [PATCH 01/14] Drop lookup type trait for a simple arg --- .../network/src/sync/block_lookups/common.rs | 63 +++---- .../network/src/sync/block_lookups/mod.rs | 40 ++-- .../src/sync/block_lookups/parent_lookup.rs | 7 +- .../sync/block_lookups/single_block_lookup.rs | 172 ++---------------- beacon_node/network/src/sync/manager.rs | 21 +-- 5 files changed, 73 insertions(+), 230 deletions(-) diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index 3bd39301b21..7193dd6e216 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -29,34 +29,12 @@ pub enum LookupType { Parent, } -/// This trait helps differentiate `SingleBlockLookup`s from `ParentLookup`s .This is useful in -/// ensuring requests and responses are handled separately and enables us to use different failure -/// tolerances for each, while re-using the same basic request and retry logic. -pub trait Lookup { - const MAX_ATTEMPTS: u8; - fn lookup_type() -> LookupType; - fn max_attempts() -> u8 { - Self::MAX_ATTEMPTS - } -} - -/// A `Lookup` that is a part of a `ParentLookup`. -pub struct Parent; - -impl Lookup for Parent { - const MAX_ATTEMPTS: u8 = PARENT_FAIL_TOLERANCE; - fn lookup_type() -> LookupType { - LookupType::Parent - } -} - -/// A `Lookup` that part of a single block lookup. -pub struct Current; - -impl Lookup for Current { - const MAX_ATTEMPTS: u8 = SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS; - fn lookup_type() -> LookupType { - LookupType::Current +impl LookupType { + fn max_attempts(&self) -> u8 { + match self { + LookupType::Current => SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS, + LookupType::Parent => PARENT_FAIL_TOLERANCE, + } } } @@ -68,7 +46,7 @@ impl Lookup for Current { /// The use of the `ResponseType` associated type gives us a degree of type /// safety when handling a block/blob response ensuring we only mutate the correct corresponding /// state. -pub trait RequestState { +pub trait RequestState { /// The type of the request . type RequestType; @@ -81,9 +59,12 @@ pub trait RequestState { /* Request building methods */ /// Construct a new request. - fn build_request(&mut self) -> Result<(PeerId, Self::RequestType), LookupRequestError> { + fn build_request( + &mut self, + lookup_type: LookupType, + ) -> Result<(PeerId, Self::RequestType), LookupRequestError> { // Verify and construct request. - self.too_many_attempts()?; + self.too_many_attempts(lookup_type)?; let peer = self.get_peer()?; let request = self.new_request(); Ok((peer, request)) @@ -93,6 +74,7 @@ pub trait RequestState { fn build_request_and_send( &mut self, id: Id, + lookup_type: LookupType, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { // Check if request is necessary. @@ -101,7 +83,7 @@ pub trait RequestState { } // Construct request. - let (peer_id, request) = self.build_request()?; + let (peer_id, request) = self.build_request(lookup_type)?; // Update request state. let req_counter = self.get_state_mut().on_download_start(peer_id); @@ -110,17 +92,16 @@ pub trait RequestState { let id = SingleLookupReqId { id, req_counter, - lookup_type: L::lookup_type(), + lookup_type, }; Self::make_request(id, peer_id, request, cx) } /// Verify the current request has not exceeded the maximum number of attempts. - fn too_many_attempts(&self) -> Result<(), LookupRequestError> { - let max_attempts = L::max_attempts(); + fn too_many_attempts(&self, lookup_type: LookupType) -> Result<(), LookupRequestError> { let request_state = self.get_state(); - if request_state.failed_attempts() >= max_attempts { + if request_state.failed_attempts() >= lookup_type.max_attempts() { let cannot_process = request_state.more_failed_processing_attempts(); Err(LookupRequestError::TooManyAttempts { cannot_process }) } else { @@ -187,7 +168,7 @@ pub trait RequestState { fn response_type() -> ResponseType; /// A getter for the `BlockRequestState` or `BlobRequestState` associated with this trait. - fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self; + fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self; /// A getter for a reference to the `SingleLookupRequestState` associated with this trait. fn get_state(&self) -> &SingleLookupRequestState; @@ -196,7 +177,7 @@ pub trait RequestState { fn get_state_mut(&mut self) -> &mut SingleLookupRequestState; } -impl RequestState for BlockRequestState { +impl RequestState for BlockRequestState { type RequestType = BlocksByRootSingleRequest; type VerifiedResponseType = Arc>; type ReconstructedResponseType = RpcBlock; @@ -253,7 +234,7 @@ impl RequestState for BlockRequestState fn response_type() -> ResponseType { ResponseType::Block } - fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self { + fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self { &mut request.block_request_state } fn get_state(&self) -> &SingleLookupRequestState { @@ -264,7 +245,7 @@ impl RequestState for BlockRequestState } } -impl RequestState for BlobRequestState { +impl RequestState for BlobRequestState { type RequestType = BlobsByRootSingleBlockRequest; type VerifiedResponseType = FixedBlobSidecarList; type ReconstructedResponseType = FixedBlobSidecarList; @@ -328,7 +309,7 @@ impl RequestState for BlobRequestState ResponseType { ResponseType::Blob } - fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self { + fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self { &mut request.blob_request_state } fn get_state(&self) -> &SingleLookupRequestState { diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index fa2683fb0f0..a2909b49dd1 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -16,9 +16,6 @@ use beacon_chain::data_availability_checker::{ }; use beacon_chain::validator_monitor::timestamp_now; use beacon_chain::{AvailabilityProcessingStatus, BeaconChainTypes, BlockError}; -pub use common::Current; -pub use common::Lookup; -pub use common::Parent; pub use common::RequestState; use fnv::FnvHashMap; use lighthouse_network::{PeerAction, PeerId}; @@ -55,12 +52,12 @@ pub struct BlockLookups { /// Parent chain lookups being downloaded. parent_lookups: SmallVec<[ParentLookup; 3]>, - processing_parent_lookups: HashMap, SingleBlockLookup)>, + processing_parent_lookups: HashMap, SingleBlockLookup)>, /// A cache of failed chain lookups to prevent duplicate searches. failed_chains: LRUTimeCache, - single_block_lookups: FnvHashMap>, + single_block_lookups: FnvHashMap>, pub(crate) da_checker: Arc>, @@ -131,7 +128,7 @@ impl BlockLookups { /// Attempts to trigger the request matching the given `block_root`. pub fn trigger_single_lookup( &mut self, - mut single_block_lookup: SingleBlockLookup, + mut single_block_lookup: SingleBlockLookup, cx: &mut SyncNetworkContext, ) { let block_root = single_block_lookup.block_root(); @@ -147,7 +144,7 @@ impl BlockLookups { } /// Adds a lookup to the `single_block_lookups` map. - pub fn add_single_lookup(&mut self, single_block_lookup: SingleBlockLookup) { + pub fn add_single_lookup(&mut self, single_block_lookup: SingleBlockLookup) { self.single_block_lookups .insert(single_block_lookup.id, single_block_lookup); @@ -212,6 +209,7 @@ impl BlockLookups { peers, self.da_checker.clone(), cx.next_id(), + LookupType::Current, ); debug!( @@ -284,10 +282,10 @@ impl BlockLookups { /// Get a single block lookup by its ID. This method additionally ensures the `req_counter` /// matches the current `req_counter` for the lookup. This ensures any stale responses from requests /// that have been retried are ignored. - fn get_single_lookup>( + fn get_single_lookup>( &mut self, id: SingleLookupReqId, - ) -> Option> { + ) -> Option> { let mut lookup = self.single_block_lookups.remove(&id.id)?; let request_state = R::request_state_mut(&mut lookup); @@ -314,7 +312,7 @@ impl BlockLookups { } /// Process a block or blob response received from a single lookup request. - pub fn single_lookup_response>( + pub fn single_lookup_response>( &mut self, lookup_id: SingleLookupReqId, peer_id: PeerId, @@ -345,7 +343,7 @@ impl BlockLookups { "response_type" => ?response_type, ); - match self.handle_verified_response::( + match self.handle_verified_response::( seen_timestamp, cx, BlockProcessType::SingleBlock { id: lookup.id }, @@ -372,13 +370,13 @@ impl BlockLookups { /// Consolidates error handling for `single_lookup_response`. An `Err` here should always mean /// the lookup is dropped. - fn handle_verified_response>( + fn handle_verified_response>( &self, seen_timestamp: Duration, cx: &mut SyncNetworkContext, process_type: BlockProcessType, verified_response: R::VerifiedResponseType, - lookup: &mut SingleBlockLookup, + lookup: &mut SingleBlockLookup, ) -> Result<(), LookupRequestError> { let id = lookup.id; let block_root = lookup.block_root(); @@ -389,7 +387,7 @@ impl BlockLookups { // If we have an outstanding parent request for this block, delay sending the response until // all parent blocks have been processed, otherwise we will fail validation with an // `UnknownParent`. - let delay_send = match L::lookup_type() { + let delay_send = match lookup.lookup_type { LookupType::Parent => false, LookupType::Current => self.has_pending_parent_request(lookup.block_root()), }; @@ -453,7 +451,7 @@ impl BlockLookups { /// Get a parent block lookup by its ID. This method additionally ensures the `req_counter` /// matches the current `req_counter` for the lookup. This any stale responses from requests /// that have been retried are ignored. - fn get_parent_lookup>( + fn get_parent_lookup>( &mut self, id: SingleLookupReqId, ) -> Option> { @@ -479,7 +477,7 @@ impl BlockLookups { } /// Process a response received from a parent lookup request. - pub fn parent_lookup_response>( + pub fn parent_lookup_response>( &mut self, id: SingleLookupReqId, peer_id: PeerId, @@ -523,7 +521,7 @@ impl BlockLookups { /// Consolidates error handling for `parent_lookup_response`. An `Err` here should always mean /// the lookup is dropped. - fn parent_lookup_response_inner>( + fn parent_lookup_response_inner>( &mut self, peer_id: PeerId, response: R::VerifiedResponseType, @@ -554,7 +552,7 @@ impl BlockLookups { } } - self.handle_verified_response::( + self.handle_verified_response::( seen_timestamp, cx, BlockProcessType::ParentLookup { @@ -633,7 +631,7 @@ impl BlockLookups { } /// An RPC error has occurred during a parent lookup. This function handles this case. - pub fn parent_lookup_failed>( + pub fn parent_lookup_failed>( &mut self, id: SingleLookupReqId, peer_id: &PeerId, @@ -669,7 +667,7 @@ impl BlockLookups { } /// An RPC error has occurred during a single lookup. This function handles this case.\ - pub fn single_block_lookup_failed>( + pub fn single_block_lookup_failed>( &mut self, id: SingleLookupReqId, peer_id: &PeerId, @@ -717,7 +715,7 @@ impl BlockLookups { /* Processing responses */ - pub fn single_block_component_processed>( + pub fn single_block_component_processed>( &mut self, target_id: Id, result: BlockProcessingResult, diff --git a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs b/beacon_node/network/src/sync/block_lookups/parent_lookup.rs index b7a71860bff..11eb908953f 100644 --- a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/parent_lookup.rs @@ -1,6 +1,6 @@ +use super::common::LookupType; use super::single_block_lookup::{LookupRequestError, SingleBlockLookup}; use super::{DownloadedBlock, PeerId}; -use crate::sync::block_lookups::common::Parent; use crate::sync::{manager::SLOT_IMPORT_TOLERANCE, network_context::SyncNetworkContext}; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::block_verification_types::RpcBlock; @@ -24,7 +24,7 @@ pub(crate) struct ParentLookup { /// The blocks that have currently been downloaded. downloaded_blocks: Vec>, /// Request of the last parent. - pub current_parent_request: SingleBlockLookup, + pub current_parent_request: SingleBlockLookup, } #[derive(Debug, PartialEq, Eq)] @@ -55,6 +55,7 @@ impl ParentLookup { &[peer_id], da_checker, cx.next_id(), + LookupType::Parent, ); Self { @@ -132,7 +133,7 @@ impl ParentLookup { Hash256, VecDeque>, Vec, - SingleBlockLookup, + SingleBlockLookup, ) { let ParentLookup { chain_hash, diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 5bb663967d7..077af7c3d19 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -1,5 +1,6 @@ +use super::common::LookupType; use super::PeerId; -use crate::sync::block_lookups::common::{Lookup, RequestState}; +use crate::sync::block_lookups::common::RequestState; use crate::sync::block_lookups::Id; use crate::sync::network_context::SyncNetworkContext; use beacon_chain::block_verification_types::RpcBlock; @@ -14,7 +15,6 @@ use rand::seq::IteratorRandom; use slog::{debug, Logger}; use std::collections::HashSet; use std::fmt::Debug; -use std::marker::PhantomData; use std::sync::Arc; use store::Hash256; use strum::IntoStaticStr; @@ -33,27 +33,30 @@ pub enum LookupRequestError { BadState(String), } -pub struct SingleBlockLookup { +pub struct SingleBlockLookup { pub id: Id, - pub block_request_state: BlockRequestState, - pub blob_request_state: BlobRequestState, + pub lookup_type: LookupType, + pub block_request_state: BlockRequestState, + pub blob_request_state: BlobRequestState, pub da_checker: Arc>, /// Only necessary for requests triggered by an `UnknownBlockParent` or `UnknownBlockParent` /// because any blocks or blobs without parents won't hit the data availability cache. pub child_components: Option>, } -impl SingleBlockLookup { +impl SingleBlockLookup { pub fn new( requested_block_root: Hash256, child_components: Option>, peers: &[PeerId], da_checker: Arc>, id: Id, + lookup_type: LookupType, ) -> Self { let is_deneb = da_checker.is_deneb(); Self { id, + lookup_type, block_request_state: BlockRequestState::new(requested_block_root, peers), blob_request_state: BlobRequestState::new(requested_block_root, peers, is_deneb), da_checker, @@ -103,11 +106,11 @@ impl SingleBlockLookup { if !block_already_downloaded { self.block_request_state - .build_request_and_send(self.id, cx)?; + .build_request_and_send(self.id, self.lookup_type, cx)?; } if !blobs_already_downloaded { self.blob_request_state - .build_request_and_send(self.id, cx)?; + .build_request_and_send(self.id, self.lookup_type, cx)?; } Ok(()) } @@ -144,7 +147,7 @@ impl SingleBlockLookup { /// Accepts a verified response, and adds it to the child components if required. This method /// returns a `CachedChild` which provides a completed block + blob response if all components have been /// received, or information about whether the child is required and if it has been downloaded. - pub fn add_response>( + pub fn add_response>( &mut self, verified_response: R::VerifiedResponseType, ) -> CachedChild { @@ -301,7 +304,7 @@ impl SingleBlockLookup { } /// The state of the blob request component of a `SingleBlockLookup`. -pub struct BlobRequestState { +pub struct BlobRequestState { /// The latest picture of which blobs still need to be requested. This includes information /// from both block/blobs downloaded in the network layer and any blocks/blobs that exist in /// the data availability checker. @@ -310,10 +313,9 @@ pub struct BlobRequestState { /// Where we store blobs until we receive the stream terminator. pub blob_download_queue: FixedBlobSidecarList, pub state: SingleLookupRequestState, - _phantom: PhantomData, } -impl BlobRequestState { +impl BlobRequestState { pub fn new(block_root: Hash256, peer_source: &[PeerId], is_deneb: bool) -> Self { let default_ids = MissingBlobs::new_without_block(block_root, is_deneb); Self { @@ -321,24 +323,21 @@ impl BlobRequestState { requested_ids: default_ids, blob_download_queue: <_>::default(), state: SingleLookupRequestState::new(peer_source), - _phantom: PhantomData, } } } /// The state of the block request component of a `SingleBlockLookup`. -pub struct BlockRequestState { +pub struct BlockRequestState { pub requested_block_root: Hash256, pub state: SingleLookupRequestState, - _phantom: PhantomData, } -impl BlockRequestState { +impl BlockRequestState { pub fn new(block_root: Hash256, peers: &[PeerId]) -> Self { Self { requested_block_root: block_root, state: SingleLookupRequestState::new(peers), - _phantom: PhantomData, } } } @@ -525,7 +524,7 @@ impl SingleLookupRequestState { } } -impl slog::Value for SingleBlockLookup { +impl slog::Value for SingleBlockLookup { fn serialize( &self, _record: &slog::Record, @@ -533,7 +532,7 @@ impl slog::Value for SingleBlockLookup { serializer: &mut dyn slog::Serializer, ) -> slog::Result { serializer.emit_str("request", key)?; - serializer.emit_arguments("lookup_type", &format_args!("{:?}", L::lookup_type()))?; + serializer.emit_arguments("lookup_type", &format_args!("{:?}", self.lookup_type))?; serializer.emit_arguments("hash", &format_args!("{}", self.block_root()))?; serializer.emit_arguments( "blob_ids", @@ -587,138 +586,3 @@ impl std::fmt::Display for State { } } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::sync::block_lookups::common::LookupType; - use beacon_chain::builder::Witness; - use beacon_chain::eth1_chain::CachingEth1Backend; - use sloggers::null::NullLoggerBuilder; - use sloggers::Build; - use slot_clock::{SlotClock, TestingSlotClock}; - use std::time::Duration; - use store::{HotColdDB, MemoryStore, StoreConfig}; - use types::{ - test_utils::{SeedableRng, TestRandom, XorShiftRng}, - ChainSpec, MinimalEthSpec as E, SignedBeaconBlock, Slot, - }; - - fn rand_block() -> SignedBeaconBlock { - let mut rng = XorShiftRng::from_seed([42; 16]); - SignedBeaconBlock::from_block( - types::BeaconBlock::Base(types::BeaconBlockBase { - ..<_>::random_for_test(&mut rng) - }), - types::Signature::random_for_test(&mut rng), - ) - } - type T = Witness, E, MemoryStore, MemoryStore>; - - struct TestLookup1; - - impl Lookup for TestLookup1 { - const MAX_ATTEMPTS: u8 = 3; - - fn lookup_type() -> LookupType { - panic!() - } - } - - struct TestLookup2; - - impl Lookup for TestLookup2 { - const MAX_ATTEMPTS: u8 = 4; - - fn lookup_type() -> LookupType { - panic!() - } - } - - #[test] - fn test_happy_path() { - let peer_id = PeerId::random(); - let block = rand_block(); - let spec = E::default_spec(); - let slot_clock = TestingSlotClock::new( - Slot::new(0), - Duration::from_secs(0), - Duration::from_secs(spec.seconds_per_slot), - ); - let log = NullLoggerBuilder.build().expect("logger should build"); - let store = - HotColdDB::open_ephemeral(StoreConfig::default(), ChainSpec::minimal(), log.clone()) - .expect("store"); - let da_checker = Arc::new( - DataAvailabilityChecker::new(slot_clock, None, store.into(), &log, spec.clone()) - .expect("data availability checker"), - ); - let mut sl = SingleBlockLookup::::new( - block.canonical_root(), - None, - &[peer_id], - da_checker, - 1, - ); - as RequestState>::build_request( - &mut sl.block_request_state, - ) - .unwrap(); - sl.block_request_state.state.state = State::Downloading { peer_id }; - } - - #[test] - fn test_block_lookup_failures() { - let peer_id = PeerId::random(); - let block = rand_block(); - let spec = E::default_spec(); - let slot_clock = TestingSlotClock::new( - Slot::new(0), - Duration::from_secs(0), - Duration::from_secs(spec.seconds_per_slot), - ); - let log = NullLoggerBuilder.build().expect("logger should build"); - let store = - HotColdDB::open_ephemeral(StoreConfig::default(), ChainSpec::minimal(), log.clone()) - .expect("store"); - - let da_checker = Arc::new( - DataAvailabilityChecker::new(slot_clock, None, store.into(), &log, spec.clone()) - .expect("data availability checker"), - ); - - let mut sl = SingleBlockLookup::::new( - block.canonical_root(), - None, - &[peer_id], - da_checker, - 1, - ); - for _ in 1..TestLookup2::MAX_ATTEMPTS { - as RequestState>::build_request( - &mut sl.block_request_state, - ) - .unwrap(); - sl.block_request_state.state.on_download_failure(); - } - - // Now we receive the block and send it for processing - as RequestState>::build_request( - &mut sl.block_request_state, - ) - .unwrap(); - sl.block_request_state.state.state = State::Downloading { peer_id }; - - // One processing failure maxes the available attempts - sl.block_request_state.state.on_processing_failure(); - assert_eq!( - as RequestState>::build_request( - &mut sl.block_request_state, - ) - .unwrap_err(), - LookupRequestError::TooManyAttempts { - cannot_process: false - } - ) - } -} diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 23bd1010bfe..9c17c6a1512 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -42,7 +42,6 @@ use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH}; use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor}; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; -use crate::sync::block_lookups::common::{Current, Parent}; use crate::sync::block_lookups::{BlobRequestState, BlockRequestState}; use crate::sync::block_sidecar_coupling::BlocksAndBlobsRequestInfo; use beacon_chain::block_verification_types::AsBlock; @@ -621,14 +620,14 @@ impl SyncManager { } => match process_type { BlockProcessType::SingleBlock { id } => self .block_lookups - .single_block_component_processed::>( + .single_block_component_processed::( id, result, &mut self.network, ), BlockProcessType::SingleBlob { id } => self .block_lookups - .single_block_component_processed::>( + .single_block_component_processed::>( id, result, &mut self.network, @@ -834,7 +833,7 @@ impl SyncManager { Ok((block, seen_timestamp)) => match id.lookup_type { LookupType::Current => self .block_lookups - .single_lookup_response::>( + .single_lookup_response::( id, peer_id, block, @@ -843,7 +842,7 @@ impl SyncManager { ), LookupType::Parent => self .block_lookups - .parent_lookup_response::>( + .parent_lookup_response::( id, peer_id, block, @@ -854,7 +853,7 @@ impl SyncManager { Err(error) => match id.lookup_type { LookupType::Current => self .block_lookups - .single_block_lookup_failed::>( + .single_block_lookup_failed::( id, &peer_id, &mut self.network, @@ -862,7 +861,7 @@ impl SyncManager { ), LookupType::Parent => self .block_lookups - .parent_lookup_failed::>( + .parent_lookup_failed::( id, &peer_id, &mut self.network, @@ -909,7 +908,7 @@ impl SyncManager { Ok((blobs, seen_timestamp)) => match id.lookup_type { LookupType::Current => self .block_lookups - .single_lookup_response::>( + .single_lookup_response::>( id, peer_id, blobs, @@ -918,7 +917,7 @@ impl SyncManager { ), LookupType::Parent => self .block_lookups - .parent_lookup_response::>( + .parent_lookup_response::>( id, peer_id, blobs, @@ -930,7 +929,7 @@ impl SyncManager { Err(error) => match id.lookup_type { LookupType::Current => self .block_lookups - .single_block_lookup_failed::>( + .single_block_lookup_failed::>( id, &peer_id, &mut self.network, @@ -938,7 +937,7 @@ impl SyncManager { ), LookupType::Parent => self .block_lookups - .parent_lookup_failed::>( + .parent_lookup_failed::>( id, &peer_id, &mut self.network, From 4c8143ff8ba144ae60578ac502c65b4440b545ff Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Tue, 23 Apr 2024 01:28:53 +0900 Subject: [PATCH 02/14] Drop reconstructed for processing --- .../network/src/sync/block_lookups/common.rs | 37 +++---------------- .../network/src/sync/block_lookups/mod.rs | 9 +---- 2 files changed, 7 insertions(+), 39 deletions(-) diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index 7193dd6e216..43fb0229a70 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -53,9 +53,6 @@ pub trait RequestState { /// The type created after validation. type VerifiedResponseType: Clone; - /// We convert a `VerifiedResponseType` to this type prior to sending it to the beacon processor. - type ReconstructedResponseType; - /* Request building methods */ /// Construct a new request. @@ -141,18 +138,12 @@ pub trait RequestState { components: &mut ChildComponents, ); - /// Convert a verified response to the type we send to the beacon processor. - fn verified_to_reconstructed( - block_root: Hash256, - verified: Self::VerifiedResponseType, - ) -> Self::ReconstructedResponseType; - /// Send the response to the beacon processor. - fn send_reconstructed_for_processing( + fn send_for_processing( id: Id, bl: &BlockLookups, block_root: Hash256, - verified: Self::ReconstructedResponseType, + verified: Self::VerifiedResponseType, duration: Duration, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError>; @@ -180,7 +171,6 @@ pub trait RequestState { impl RequestState for BlockRequestState { type RequestType = BlocksByRootSingleRequest; type VerifiedResponseType = Arc>; - type ReconstructedResponseType = RpcBlock; fn new_request(&self) -> Self::RequestType { BlocksByRootSingleRequest(self.requested_block_root) @@ -207,24 +197,17 @@ impl RequestState for BlockRequestState { components.merge_block(verified_response); } - fn verified_to_reconstructed( - block_root: Hash256, - block: Arc>, - ) -> RpcBlock { - RpcBlock::new_without_blobs(Some(block_root), block) - } - - fn send_reconstructed_for_processing( + fn send_for_processing( id: Id, bl: &BlockLookups, block_root: Hash256, - constructed: RpcBlock, + block: Arc>, duration: Duration, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { bl.send_block_for_processing( block_root, - constructed, + RpcBlock::new_without_blobs(Some(block_root), block), duration, BlockProcessType::SingleBlock { id }, cx, @@ -248,7 +231,6 @@ impl RequestState for BlockRequestState { impl RequestState for BlobRequestState { type RequestType = BlobsByRootSingleBlockRequest; type VerifiedResponseType = FixedBlobSidecarList; - type ReconstructedResponseType = FixedBlobSidecarList; fn new_request(&self) -> Self::RequestType { BlobsByRootSingleBlockRequest { @@ -282,14 +264,7 @@ impl RequestState for BlobRequestState { components.merge_blobs(verified_response); } - fn verified_to_reconstructed( - _block_root: Hash256, - blobs: FixedBlobSidecarList, - ) -> FixedBlobSidecarList { - blobs - } - - fn send_reconstructed_for_processing( + fn send_for_processing( id: Id, bl: &BlockLookups, block_root: Hash256, diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index a2909b49dd1..5e369d376da 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -427,14 +427,7 @@ impl BlockLookups { .on_download_success() .map_err(LookupRequestError::BadState)?; - R::send_reconstructed_for_processing( - id, - self, - block_root, - R::verified_to_reconstructed(block_root, verified_response), - seen_timestamp, - cx, - )? + R::send_for_processing(id, self, block_root, verified_response, seen_timestamp, cx)? } CachedChild::Err(e) => { warn!(self.log, "Consistency error in cached block"; From 3b662ded8f044ba51af48dc3ee1ea2184447fd81 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:14:17 +0900 Subject: [PATCH 03/14] Send parent blocks one by one --- .../child_components.rs | 7 + .../network_beacon_processor/sync_methods.rs | 37 -- .../network/src/sync/block_lookups/mod.rs | 369 +++++++++--------- .../src/sync/block_lookups/parent_lookup.rs | 85 ++-- .../sync/block_lookups/single_block_lookup.rs | 45 ++- .../network/src/sync/block_lookups/tests.rs | 265 ++++++++----- beacon_node/network/src/sync/manager.rs | 5 +- 7 files changed, 419 insertions(+), 394 deletions(-) diff --git a/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs b/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs index 184dfc45001..b33e90bbe1d 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs @@ -66,4 +66,11 @@ impl ChildComponents { pub fn clear_blobs(&mut self) { self.downloaded_blobs = FixedBlobSidecarList::default(); } + + pub fn downloaded_blobs_count(&self) -> usize { + self.downloaded_blobs + .iter() + .filter(|blob| blob.is_some()) + .count() + } } diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index 887974c6e0b..daa9a2cf197 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -33,8 +33,6 @@ pub enum ChainSegmentProcessId { RangeBatchId(ChainId, Epoch), /// Processing ID for a backfill syncing batch. BackSyncBatchId(Epoch), - /// Processing Id of the parent lookup of a block. - ParentLookup(Hash256), } /// Returned when a chain segment import fails. @@ -396,41 +394,6 @@ impl NetworkBeaconProcessor { } } } - // this is a parent lookup request from the sync manager - ChainSegmentProcessId::ParentLookup(chain_head) => { - debug!( - self.log, "Processing parent lookup"; - "chain_hash" => %chain_head, - "blocks" => downloaded_blocks.len() - ); - // parent blocks are ordered from highest slot to lowest, so we need to process in - // reverse - match self - .process_blocks(downloaded_blocks.iter().rev(), notify_execution_layer) - .await - { - (imported_blocks, Err(e)) => { - debug!(self.log, "Parent lookup failed"; "error" => %e.message); - match e.peer_action { - Some(penalty) => BatchProcessResult::FaultyFailure { - imported_blocks: imported_blocks > 0, - penalty, - }, - None => BatchProcessResult::NonFaultyFailure, - } - } - (imported_blocks, Ok(_)) => { - debug!( - self.log, "Parent lookup processed successfully"; - "chain_hash" => %chain_head, - "imported_blocks" => imported_blocks - ); - BatchProcessResult::Success { - was_non_empty: imported_blocks > 0, - } - } - } - } }; self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result }); diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 5e369d376da..5091536cfcd 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -1,10 +1,8 @@ use self::single_block_lookup::SingleBlockLookup; use super::manager::BlockProcessingResult; use super::network_context::{LookupFailure, LookupVerifyError}; -use super::BatchProcessResult; use super::{manager::BlockProcessType, network_context::SyncNetworkContext}; use crate::metrics; -use crate::network_beacon_processor::ChainSegmentProcessId; use crate::sync::block_lookups::common::LookupType; use crate::sync::block_lookups::parent_lookup::{ParentLookup, RequestError}; use crate::sync::block_lookups::single_block_lookup::{CachedChild, LookupRequestError}; @@ -23,7 +21,6 @@ use lru_cache::LRUTimeCache; pub use single_block_lookup::{BlobRequestState, BlockRequestState}; use slog::{debug, error, trace, warn, Logger}; use smallvec::SmallVec; -use std::collections::{HashMap, VecDeque}; use std::sync::Arc; use std::time::Duration; use store::Hash256; @@ -36,8 +33,6 @@ mod single_block_lookup; #[cfg(test)] mod tests; -pub type DownloadedBlock = (Hash256, RpcBlock); - const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 3; @@ -52,8 +47,6 @@ pub struct BlockLookups { /// Parent chain lookups being downloaded. parent_lookups: SmallVec<[ParentLookup; 3]>, - processing_parent_lookups: HashMap, SingleBlockLookup)>, - /// A cache of failed chain lookups to prevent duplicate searches. failed_chains: LRUTimeCache, @@ -69,7 +62,6 @@ impl BlockLookups { pub fn new(da_checker: Arc>, log: Logger) -> Self { Self { parent_lookups: Default::default(), - processing_parent_lookups: Default::default(), failed_chains: LRUTimeCache::new(Duration::from_secs( FAILED_CHAINS_CACHE_EXPIRY_SECONDS, )), @@ -80,8 +72,11 @@ impl BlockLookups { } #[cfg(test)] - pub(crate) fn active_single_lookups(&self) -> Vec { - self.single_block_lookups.keys().cloned().collect() + pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256)> { + self.single_block_lookups + .iter() + .map(|(id, e)| (*id, e.block_root())) + .collect() } #[cfg(test)] @@ -169,6 +164,7 @@ impl BlockLookups { .iter_mut() .find(|(_id, lookup)| lookup.is_for_block(block_root)) { + trace!(self.log, "Adding peer to existing single block lookup"; "block_root" => %block_root); lookup.add_peers(peers); if let Some(components) = child_components { lookup.add_child_components(components); @@ -187,16 +183,6 @@ impl BlockLookups { return; } - if self - .processing_parent_lookups - .values() - .any(|(hashes, _last_parent_request)| hashes.contains(&block_root)) - { - // we are already processing this block, ignore it. - trace!(self.log, "Already processing block in a parent request"; "block_root" => ?block_root); - return; - } - let msg = if child_components.is_some() { "Searching for components of a block with unknown parent" } else { @@ -250,20 +236,6 @@ impl BlockLookups { return; } - if self - .processing_parent_lookups - .iter() - .any(|(chain_hash, (hashes, _peers))| { - chain_hash == &block_root - || hashes.contains(&block_root) - || hashes.contains(&parent_root) - }) - { - // we are already processing this block, ignore it. - debug!(self.log, "Already processing parent block"; - "block_root" => ?block_root, "parent_root" => ?parent_root); - return; - } let parent_lookup = ParentLookup::new( block_root, parent_root, @@ -392,7 +364,9 @@ impl BlockLookups { LookupType::Current => self.has_pending_parent_request(lookup.block_root()), }; - if !delay_send { + if delay_send { + debug!(self.log, "Delaying import of response"; "block_root" => %lookup.block_root(), "lookup_type" => ?lookup.lookup_type, "type" => ?R::response_type()); + } else { R::request_state_mut(lookup) .get_state_mut() .on_download_success() @@ -406,7 +380,9 @@ impl BlockLookups { )? } } - CachedChild::DownloadIncomplete => { + CachedChild::DownloadIncomplete(reason) => { + debug!(self.log, "CacheChild download incomplete"; "block_root" => %lookup.block_root(), "lookup_type" => ?lookup.lookup_type, "type" => ?R::response_type(), "reason" => reason); + R::request_state_mut(lookup) .get_state_mut() .on_download_success() @@ -441,6 +417,63 @@ impl BlockLookups { Ok(()) } + /// Consolidates error handling for `single_lookup_response`. An `Err` here should always mean + /// the lookup is dropped. + fn handle_parent_lookup_cached_child( + &self, + seen_timestamp: Duration, + cx: &mut SyncNetworkContext, + process_type: BlockProcessType, + lookup: &mut SingleBlockLookup, + cached_child: CachedChild, + ) -> Result<(), LookupRequestError> { + let block_root = lookup.block_root(); + match cached_child { + CachedChild::Ok(block) => { + // If we have an outstanding parent request for this block, delay sending the response until + // all parent blocks have been processed, otherwise we will fail validation with an + // `UnknownParent`. + let delay_send = match lookup.lookup_type { + LookupType::Parent => false, + LookupType::Current => self.has_pending_parent_request(block_root), + }; + + if !delay_send { + self.send_block_for_processing( + block_root, + block, + seen_timestamp, + process_type, + cx, + )? + } + } + CachedChild::DownloadIncomplete(_) => { + // If this was the result of a block request, we can't determine if the block peer + // did anything wrong. If we already had both a block and blobs response processed, + // we should penalize the blobs peer because they did not provide all blobs on the + // initial request. + if lookup.both_components_downloaded() { + lookup.penalize_blob_peer(cx); + lookup.blob_request_state.state.on_download_failure(); + } + lookup.request_block_and_blobs(cx)?; + } + CachedChild::NotRequired => { + warn!(self.log, "Child not cached for parent lookup"; "block_root" => ?block_root); + } + CachedChild::Err(e) => { + warn!(self.log, "Consistency error in cached block"; + "error" => ?e, + "block_root" => ?block_root, + ); + lookup.handle_consistency_failure(cx); + lookup.request_block_and_blobs(cx)?; + } + } + Ok(()) + } + /// Get a parent block lookup by its ID. This method additionally ensures the `req_counter` /// matches the current `req_counter` for the lookup. This any stale responses from requests /// that have been retried are ignored. @@ -595,7 +628,9 @@ impl BlockLookups { // processed. Drop the request without extra penalty } RequestError::BadState(..) => { - warn!(self.log, "Failed to request parent"; "error" => e.as_static()); + warn!(self.log, "Request parent on bad state"; "error" => e.as_static()); + #[cfg(test)] + panic!("bad state"); } } } @@ -608,6 +643,10 @@ impl BlockLookups { let should_drop_lookup = req.should_drop_lookup_on_disconnected_peer(peer_id, cx, &self.log); + if should_drop_lookup { + debug!(self.log, "Dropping single lookup after peer disconnection"; "block_root" => %req.block_root()); + } + !should_drop_lookup }); @@ -618,7 +657,7 @@ impl BlockLookups { .position(|req| req.check_peer_disconnected(peer_id).is_err()) { let parent_lookup = self.parent_lookups.remove(pos); - debug!(self.log, "Dropping parent lookup after peer disconnected"; &parent_lookup); + debug!(self.log, "Dropping parent lookup after peer disconnected"; "chain_hash" => %parent_lookup.chain_hash()); self.request_parent(parent_lookup, cx); } } @@ -725,7 +764,15 @@ impl BlockLookups { let peer_id = match request_state.get_state().processing_peer() { Ok(peer_id) => peer_id, Err(e) => { - debug!(self.log, "Attempting to process single block lookup in bad state"; "id" => target_id, "response_type" => ?R::response_type(), "error" => e); + // TODO: This log will fire when processing the child block that triggered a parent + // lookup. If the result is Ok(Imported) or Err(BlockIsAlreadyKnown), there's no + // effect since the lookup is dropped anyway. However, for other variants there may + // be inconsistencies. The problem lies on the lookup transitioning from a child + // components into a regular lookup. + warn!(self.log, "Attempting to process single block lookup in bad state"; "id" => target_id, "response_type" => ?R::response_type(), "error" => e); + #[cfg(test)] + panic!("bad state"); + #[cfg(not(test))] return; } }; @@ -940,38 +987,35 @@ impl BlockLookups { } } BlockProcessingResult::Err(BlockError::ParentUnknown(block)) => { - parent_lookup.add_unknown_parent_block(block); + parent_lookup.add_unknown_parent_block(block, self.da_checker.clone(), cx); self.request_parent(parent_lookup, cx); } BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_)) | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown(_)) => { - let (chain_hash, blocks, hashes, block_request) = - parent_lookup.parts_for_processing(); - - let blocks = self.add_child_block_to_chain(chain_hash, blocks, cx).into(); - - let process_id = ChainSegmentProcessId::ParentLookup(chain_hash); - - // Check if the beacon processor is available - let Some(beacon_processor) = cx.beacon_processor_if_enabled() else { - return trace!( - self.log, - "Dropping parent chain segment that was ready for processing."; - "chain_hash" => %chain_hash, - ); - }; - - match beacon_processor.send_chain_segment(process_id, blocks) { - Ok(_) => { - self.processing_parent_lookups - .insert(chain_hash, (hashes, block_request)); - } - Err(e) => { - error!( - self.log, - "Failed to send chain segment to processor"; - "error" => ?e - ); + // Send the next block in the parent chain with + let chain_completed = parent_lookup.pop_completed_parent_request(); + + if chain_completed { + debug!(self.log, "Parent lookup chain ancestors imported"; "chain_hash" => %chain_hash); + // MUST recover the single lookup that triggered the chain + self.send_child_of_parent_chain_for_processing(chain_hash, cx); + // Do nothing, drop parent lookup + } else { + debug!(self.log, "Parent lookup chain importing ancestor"; "chain_hash" => %chain_hash); + let cached_child = parent_lookup + .current_parent_request + .get_cached_child_block(); + match self.handle_parent_lookup_cached_child( + timestamp_now(), + cx, + BlockProcessType::ParentLookup { chain_hash }, + &mut parent_lookup.current_parent_request, + cached_child, + ) { + Ok(()) => self.parent_lookups.push(parent_lookup), + Err(e) => { + self.handle_parent_request_error(&mut parent_lookup, cx, e.into()) + } } } } @@ -998,6 +1042,8 @@ impl BlockLookups { "Parent block processing was ignored, cpu might be overloaded"; "action" => "dropping parent request" ); + // Drop the trigger single block lookup, otherwise it will get stuck + self.drop_single_lookup(chain_hash); } } @@ -1007,60 +1053,6 @@ impl BlockLookups { ); } - /// Find the child block that spawned the parent lookup request and add it to the chain - /// to send for processing. - fn add_child_block_to_chain( - &mut self, - chain_hash: Hash256, - mut blocks: VecDeque>, - cx: &mut SyncNetworkContext, - ) -> VecDeque> { - // Find the child block that spawned the parent lookup request and add it to the chain - // to send for processing. - if let Some(child_lookup_id) = self - .single_block_lookups - .iter() - .find_map(|(id, lookup)| (lookup.block_root() == chain_hash).then_some(*id)) - { - let Some(child_lookup) = self.single_block_lookups.get_mut(&child_lookup_id) else { - debug!(self.log, "Missing child for parent lookup request"; "child_root" => ?chain_hash); - return blocks; - }; - match child_lookup.get_cached_child_block() { - CachedChild::Ok(rpc_block) => { - // Insert this block at the front. This order is important because we later check - // for linear roots in `filter_chain_segment` - blocks.push_front(rpc_block); - } - CachedChild::DownloadIncomplete => { - trace!(self.log, "Parent lookup chain complete, awaiting child response"; "chain_hash" => ?chain_hash); - } - CachedChild::NotRequired => { - warn!(self.log, "Child not cached for parent lookup"; "chain_hash" => %chain_hash); - } - CachedChild::Err(e) => { - warn!( - self.log, - "Consistency error in child block triggering chain or parent lookups"; - "error" => ?e, - "chain_hash" => ?chain_hash - ); - child_lookup.handle_consistency_failure(cx); - if let Err(e) = child_lookup.request_block_and_blobs(cx) { - debug!(self.log, - "Failed to request block and blobs, dropping lookup"; - "error" => ?e - ); - self.single_block_lookups.remove(&child_lookup_id); - } - } - } - } else { - debug!(self.log, "Missing child for parent lookup request"; "child_root" => ?chain_hash); - }; - blocks - } - /// Handle the peer scoring, retries, and logging related to a `BlockError` returned from /// processing a block + blobs for a parent lookup. fn handle_parent_block_error( @@ -1074,6 +1066,9 @@ impl BlockLookups { Ok(peer_id) => peer_id, Err(e) => { warn!(self.log, "Parent lookup in bad state"; "chain_hash" => %parent_lookup.chain_hash(), "error" => e); + #[cfg(test)] + panic!("bad state"); + #[cfg(not(test))] return; } }; @@ -1118,92 +1113,65 @@ impl BlockLookups { self.request_parent(parent_lookup, cx); } - pub fn parent_chain_processed( + pub fn send_child_of_parent_chain_for_processing( &mut self, chain_hash: Hash256, - result: BatchProcessResult, cx: &mut SyncNetworkContext, ) { - let Some((_hashes, request)) = self.processing_parent_lookups.remove(&chain_hash) else { - return debug!(self.log, "Chain process response for a parent lookup request that was not found"; "chain_hash" => %chain_hash, "result" => ?result); + let Some(id) = self + .single_block_lookups + .iter() + .find_map(|(id, req)| (req.block_root() == chain_hash).then_some(*id)) + else { + warn!(self.log, "No id found for single block lookup"; "chain_hash" => %chain_hash); + return; }; - debug!(self.log, "Parent chain processed"; "chain_hash" => %chain_hash, "result" => ?result); - match result { - BatchProcessResult::Success { .. } => { - let Some(id) = self - .single_block_lookups - .iter() - .find_map(|(id, req)| (req.block_root() == chain_hash).then_some(*id)) - else { - warn!(self.log, "No id found for single block lookup"; "chain_hash" => %chain_hash); - return; - }; - - let Some(lookup) = self.single_block_lookups.get_mut(&id) else { - warn!(self.log, "No id found for single block lookup"; "chain_hash" => %chain_hash); - return; - }; + let Some(lookup) = self.single_block_lookups.get_mut(&id) else { + warn!(self.log, "No id found for single block lookup"; "chain_hash" => %chain_hash); + return; + }; - match lookup.get_cached_child_block() { - CachedChild::Ok(rpc_block) => { - // This is the correct block, send it for processing - if self - .send_block_for_processing( - chain_hash, - rpc_block, - timestamp_now(), - BlockProcessType::SingleBlock { id }, - cx, - ) - .is_err() - { - // Remove to avoid inconsistencies - self.single_block_lookups.remove(&id); - } - } - CachedChild::DownloadIncomplete => { - trace!(self.log, "Parent chain complete, awaiting child response"; "chain_hash" => %chain_hash); - } - CachedChild::NotRequired => { - warn!(self.log, "Child not cached for parent lookup"; "chain_hash" => %chain_hash); - } - CachedChild::Err(e) => { - warn!( - self.log, - "Consistency error in child block triggering parent lookup"; - "chain_hash" => %chain_hash, - "error" => ?e - ); - lookup.handle_consistency_failure(cx); - if let Err(e) = lookup.request_block_and_blobs(cx) { - debug!(self.log, - "Failed to request block and blobs, dropping lookup"; - "error" => ?e - ); - self.single_block_lookups.remove(&id); - } - } + match lookup.get_cached_child_block() { + CachedChild::Ok(rpc_block) => { + // This is the correct block, send it for processing + if self + .send_block_for_processing( + chain_hash, + rpc_block, + timestamp_now(), + BlockProcessType::SingleBlock { id }, + cx, + ) + .is_err() + { + // Remove to avoid inconsistencies + self.single_block_lookups.remove(&id); } } - BatchProcessResult::FaultyFailure { - imported_blocks: _, - penalty, - } => { - self.failed_chains.insert(chain_hash); - for peer_source in request.all_used_peers() { - cx.report_peer(*peer_source, penalty, "parent_chain_failure") - } + CachedChild::DownloadIncomplete(reason) => { + debug!(self.log, "Parent chain complete, awaiting child response"; "chain_hash" => %chain_hash, "reason" => reason); } - BatchProcessResult::NonFaultyFailure => { - // We might request this chain again if there is need but otherwise, don't try again + CachedChild::NotRequired => { + warn!(self.log, "Child not cached for parent lookup"; "chain_hash" => %chain_hash); + } + CachedChild::Err(e) => { + warn!( + self.log, + "Consistency error in child block triggering parent lookup"; + "chain_hash" => %chain_hash, + "error" => ?e + ); + lookup.handle_consistency_failure(cx); + if let Err(e) = lookup.request_block_and_blobs(cx) { + debug!(self.log, + "Failed to request block and blobs, dropping lookup"; + "error" => ?e + ); + self.single_block_lookups.remove(&id); + } } } - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); } /* Helper functions */ @@ -1304,6 +1272,17 @@ impl BlockLookups { ); } + pub fn drop_single_lookup(&mut self, block_root: Hash256) { + if let Some(id) = self + .single_block_lookups + .iter() + .find_map(|(id, req)| (req.block_root() == block_root).then_some(*id)) + { + debug!(self.log, "Dropping single block lookup"; "id" => id, "block_root" => %block_root); + self.single_block_lookups.remove(&id); + }; + } + /// Drops all the single block requests and returns how many requests were dropped. pub fn drop_single_block_requests(&mut self) -> usize { let requests_to_drop = self.single_block_lookups.len(); diff --git a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs b/beacon_node/network/src/sync/block_lookups/parent_lookup.rs index 11eb908953f..87a04451748 100644 --- a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/parent_lookup.rs @@ -1,12 +1,11 @@ use super::common::LookupType; use super::single_block_lookup::{LookupRequestError, SingleBlockLookup}; -use super::{DownloadedBlock, PeerId}; +use super::PeerId; use crate::sync::{manager::SLOT_IMPORT_TOLERANCE, network_context::SyncNetworkContext}; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::data_availability_checker::{ChildComponents, DataAvailabilityChecker}; use beacon_chain::BeaconChainTypes; -use std::collections::VecDeque; use std::sync::Arc; use store::Hash256; @@ -22,7 +21,7 @@ pub(crate) struct ParentLookup { /// The root of the block triggering this parent request. chain_hash: Hash256, /// The blocks that have currently been downloaded. - downloaded_blocks: Vec>, + parent_requests: Vec>, /// Request of the last parent. pub current_parent_request: SingleBlockLookup, } @@ -60,15 +59,17 @@ impl ParentLookup { Self { chain_hash: block_root, - downloaded_blocks: vec![], + parent_requests: vec![], current_parent_request, } } pub fn contains_block(&self, block_root: &Hash256) -> bool { - self.downloaded_blocks - .iter() - .any(|(root, _d_block)| root == block_root) + &self.current_parent_request.block_root() == block_root + || self + .parent_requests + .iter() + .any(|request| &request.block_root() == block_root) } pub fn is_for_block(&self, block_root: Hash256) -> bool { @@ -78,7 +79,7 @@ impl ParentLookup { /// Attempts to request the next unknown parent. If the request fails, it should be removed. pub fn request_parent(&mut self, cx: &mut SyncNetworkContext) -> Result<(), RequestError> { // check to make sure this request hasn't failed - if self.downloaded_blocks.len() + 1 >= PARENT_DEPTH_TOLERANCE { + if self.parent_requests.len() + 1 >= PARENT_DEPTH_TOLERANCE { return Err(RequestError::ChainTooLong); } @@ -100,15 +101,40 @@ impl ParentLookup { }) } - pub fn add_unknown_parent_block(&mut self, block: RpcBlock) { - let next_parent = block.parent_root(); - // Cache the block. - let current_root = self.current_parent_request.block_root(); - self.downloaded_blocks.push((current_root, block)); + pub fn add_unknown_parent_block( + &mut self, + block: RpcBlock, + da_checker: Arc>, + cx: &mut SyncNetworkContext, + ) { + // Create a new empty single block lookup for the parent, copying all peers + let parent_root = block.parent_root(); + let new_parent_request = SingleBlockLookup::new( + parent_root, + Some(ChildComponents::empty(parent_root)), + &self + .current_parent_request + .all_available_peers() + .cloned() + .collect::>(), + da_checker, + cx.next_id(), + LookupType::Parent, + ); - // Update the parent request. - self.current_parent_request - .update_requested_parent_block(next_parent) + // Replace current parent request and store in parent_requests queue + let previous_parent_request = + std::mem::replace(&mut self.current_parent_request, new_parent_request); + self.parent_requests.push(previous_parent_request); + } + + pub fn pop_completed_parent_request(&mut self) -> bool { + if let Some(new_parent_request) = self.parent_requests.pop() { + self.current_parent_request = new_parent_request; + false + } else { + true + } } pub fn block_processing_peer(&self) -> Result { @@ -125,31 +151,6 @@ impl ParentLookup { .processing_peer() } - /// Consumes the parent request and destructures it into it's parts. - #[allow(clippy::type_complexity)] - pub fn parts_for_processing( - self, - ) -> ( - Hash256, - VecDeque>, - Vec, - SingleBlockLookup, - ) { - let ParentLookup { - chain_hash, - downloaded_blocks, - current_parent_request, - } = self; - let block_count = downloaded_blocks.len(); - let mut blocks = VecDeque::with_capacity(block_count); - let mut hashes = Vec::with_capacity(block_count); - for (hash, block) in downloaded_blocks.into_iter() { - blocks.push_back(block); - hashes.push(hash); - } - (chain_hash, blocks, hashes, current_parent_request) - } - /// Get the parent lookup's chain hash. pub fn chain_hash(&self) -> Hash256 { self.chain_hash @@ -206,7 +207,7 @@ impl slog::KV for ParentLookup { ) -> slog::Result { serializer.emit_arguments("chain_hash", &format_args!("{}", self.chain_hash))?; slog::Value::serialize(&self.current_parent_request, record, "parent", serializer)?; - serializer.emit_usize("downloaded_blocks", self.downloaded_blocks.len())?; + serializer.emit_usize("downloaded_blocks", self.parent_requests.len())?; slog::Result::Ok(()) } } diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 077af7c3d19..c1b4139d826 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -74,16 +74,6 @@ impl SingleBlockLookup { self.block_root() == block_root } - /// Update the requested block, this should only be used in a chain of parent lookups to request - /// the next parent. - pub fn update_requested_parent_block(&mut self, block_root: Hash256) { - self.block_request_state.requested_block_root = block_root; - self.blob_request_state.block_root = block_root; - self.block_request_state.state.state = State::AwaitingDownload; - self.blob_request_state.state.state = State::AwaitingDownload; - self.child_components = Some(ChildComponents::empty(block_root)); - } - /// Get all unique used peers across block and blob requests. pub fn all_used_peers(&self) -> impl Iterator + '_ { self.block_request_state @@ -93,6 +83,15 @@ impl SingleBlockLookup { .unique() } + /// Get all unique available peers across block and blob requests. + pub fn all_available_peers(&self) -> impl Iterator + '_ { + self.block_request_state + .state + .get_available_peers() + .chain(self.blob_request_state.state.get_used_peers()) + .unique() + } + /// Send the necessary requests for blocks and/or blobs. This will check whether we have /// downloaded the block and/or blobs already and will not send requests if so. It will also /// inspect the request state or blocks and blobs to ensure we are not already processing or @@ -124,11 +123,17 @@ impl SingleBlockLookup { pub fn get_cached_child_block(&self) -> CachedChild { if let Some(components) = self.child_components.as_ref() { let Some(block) = components.downloaded_block.as_ref() else { - return CachedChild::DownloadIncomplete; + return CachedChild::DownloadIncomplete("missing block".to_owned()); }; - if !self.missing_blob_ids().is_empty() { - return CachedChild::DownloadIncomplete; + // CacheChild should include only block components for an unknown parent, so nothing is + // imported into the da_checker. The only possible contents are here. + let blobs_expected = block.num_expected_blobs(); + let blobs_downloaded = components.downloaded_blobs_count(); + if blobs_expected != blobs_downloaded { + return CachedChild::DownloadIncomplete(format!( + "missing blobs expected {blobs_expected} got {blobs_downloaded}" + )); } match RpcBlock::new_from_fixed( @@ -222,9 +227,15 @@ impl SingleBlockLookup { .check_peer_disconnected(peer_id) .is_err(); + if self.all_available_peers().count() == 0 { + return true; + } + + // If there was an active download request with this peer, send them again with another + // peer. We should receive an RPCError anyway, but this should speed things up? if block_peer_disconnected || blob_peer_disconnected { if let Err(e) = self.request_block_and_blobs(cx) { - debug!(log, "Single lookup failed on peer disconnection"; "block_root" => ?block_root, "error" => ?e); + debug!(log, "Single lookup failed on retry after peer disconnection"; "block_root" => ?block_root, "error" => ?e); return true; } } @@ -352,7 +363,7 @@ pub enum CachedChild { /// been performed and no kzg verification has been performed. Ok(RpcBlock), /// All child components have not yet been received. - DownloadIncomplete, + DownloadIncomplete(String), /// Child components should not be cached, send this directly for processing. NotRequired, /// There was an error during consistency checks between block and blobs. @@ -512,6 +523,10 @@ impl SingleLookupRequestState { self.used_peers.iter() } + pub fn get_available_peers(&self) -> impl Iterator { + self.available_peers.iter() + } + /// Selects a random peer from available peers if any, inserts it in used peers and returns it. pub fn use_rand_available_peer(&mut self) -> Option { let peer_id = self diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index 8e3b35ee5d3..4fd8d625385 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -194,6 +194,10 @@ impl TestRig { self.sync_manager.handle_message(sync_message); } + fn active_single_lookups(&self) -> Vec<(Id, Hash256)> { + self.sync_manager.active_single_lookups() + } + fn active_single_lookups_count(&self) -> usize { self.sync_manager.active_single_lookups().len() } @@ -210,6 +214,35 @@ impl TestRig { self.sync_manager.failed_chains_contains(chain_hash) } + fn find_single_lookup_for(&self, block_root: Hash256) -> Id { + self.active_single_lookups() + .iter() + .find(|(_, b)| b == &block_root) + .unwrap_or_else(|| panic!("no single block lookup found for {block_root}")) + .0 + } + + fn expect_no_active_parent_lookups(&self) { + assert_eq!( + self.active_parent_lookups(), + vec![], + "expected no parent lookups" + ); + } + + fn expect_no_active_single_lookups(&self) { + assert!( + self.active_single_lookups().is_empty(), + "expect no single block lookups: {:?}", + self.active_single_lookups() + ); + } + + fn expect_no_active_lookups(&self) { + self.expect_no_active_parent_lookups(); + self.expect_no_active_single_lookups(); + } + #[track_caller] fn assert_parent_lookups_consistency(&self) { let hashes = self.active_parent_lookups(); @@ -233,20 +266,23 @@ impl TestRig { peer_id } - fn parent_chain_processed(&mut self, chain_hash: Hash256, result: BatchProcessResult) { - self.send_sync_message(SyncMessage::BatchProcessed { - sync_type: ChainSegmentProcessId::ParentLookup(chain_hash), - result, - }) - } - - fn parent_chain_processed_success(&mut self, chain_hash: Hash256) { - self.parent_chain_processed( - chain_hash, - BatchProcessResult::Success { - was_non_empty: true, - }, - ) + fn parent_chain_processed_success( + &mut self, + chain_hash: Hash256, + blocks: &[Arc>], + ) { + // Send import events for all pending parent blocks + for block in blocks { + self.parent_block_processed_imported(chain_hash); + } + // Send final import event for the block that triggered the lookup + let trigger_lookup = self + .active_single_lookups() + .iter() + .find(|(_, block_root)| block_root == &chain_hash) + .copied() + .unwrap_or_else(|| panic!("There should exist a single block lookup for {chain_hash}")); + self.single_block_component_processed_imported(trigger_lookup.0, chain_hash); } fn parent_block_processed(&mut self, chain_hash: Hash256, result: BlockProcessingResult) { @@ -263,22 +299,14 @@ impl TestRig { ); } - fn single_block_component_processed( - &mut self, - id: SingleLookupReqId, - result: BlockProcessingResult, - ) { + fn single_block_component_processed(&mut self, id: Id, result: BlockProcessingResult) { self.send_sync_message(SyncMessage::BlockComponentProcessed { - process_type: BlockProcessType::SingleBlock { id: id.id }, + process_type: BlockProcessType::SingleBlock { id }, result, }) } - fn single_block_component_processed_imported( - &mut self, - id: SingleLookupReqId, - block_root: Hash256, - ) { + fn single_block_component_processed_imported(&mut self, id: Id, block_root: Hash256) { self.single_block_component_processed( id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(block_root)), @@ -544,9 +572,13 @@ impl TestRig { fn expect_parent_chain_process(&mut self) { match self.beacon_processor_rx.try_recv() { Ok(work) => { - assert_eq!(work.work_type(), beacon_processor::CHAIN_SEGMENT); + // Parent chain sends blocks one by one + assert_eq!(work.work_type(), beacon_processor::RPC_BLOCK); } - other => panic!("Expected chain segment process, found {:?}", other), + other => panic!( + "Expected rpc_block from chain segment process, found {:?}", + other + ), } } @@ -567,17 +599,24 @@ impl TestRig { } #[track_caller] - pub fn expect_penalty(&mut self, peer_id: PeerId) { - self.pop_received_network_event(|ev| match ev { - NetworkMessage::ReportPeer { peer_id: p_id, .. } if p_id == &peer_id => Some(()), - _ => None, - }) - .unwrap_or_else(|_| { - panic!( - "Expected peer penalty for {peer_id}: {:#?}", - self.network_rx_queue - ) - }) + pub fn expect_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) { + let penalty_msg = self + .pop_received_network_event(|ev| match ev { + NetworkMessage::ReportPeer { + peer_id: p_id, msg, .. + } if p_id == &peer_id => Some(msg.to_owned()), + _ => None, + }) + .unwrap_or_else(|_| { + panic!( + "Expected peer penalty for {peer_id}: {:#?}", + self.network_rx_queue + ) + }); + assert_eq!( + penalty_msg, expect_penalty_msg, + "Unexpected penalty msg for {peer_id}" + ); } pub fn block_with_parent_and_blobs( @@ -630,9 +669,9 @@ fn test_single_block_lookup_happy_path() { // Send the stream termination. Peer should have not been penalized, and the request removed // after processing. rig.single_lookup_block_response(id, peer_id, None); - rig.single_block_component_processed_imported(id, block_root); + rig.single_block_component_processed_imported(id.id, block_root); rig.expect_empty_network(); - assert_eq!(rig.active_single_lookups_count(), 0); + rig.expect_no_active_lookups(); } #[test] @@ -648,7 +687,7 @@ fn test_single_block_lookup_empty_response() { // The peer does not have the block. It should be penalized. rig.single_lookup_block_response(id, peer_id, None); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "NoResponseReturned"); rig.expect_block_lookup_request(block_hash); // it should be retried } @@ -667,7 +706,7 @@ fn test_single_block_lookup_wrong_response() { // Peer sends something else. It should be penalized. let bad_block = rig.rand_block(); rig.single_lookup_block_response(id, peer_id, Some(bad_block.into())); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "UnrequestedBlockRoot"); rig.expect_block_lookup_request(block_hash); // should be retried // Send the stream termination. This should not produce an additional penalty. @@ -717,7 +756,7 @@ fn test_single_block_lookup_becomes_parent_request() { // Send the stream termination. Peer should have not been penalized, and the request moved to a // parent request after processing. rig.single_block_component_processed( - id, + id.id, BlockError::ParentUnknown(RpcBlock::new_without_blobs(None, block)).into(), ); assert_eq!(rig.active_single_lookups_count(), 1); @@ -748,8 +787,8 @@ fn test_parent_lookup_happy_path() { BlockError::BlockIsAlreadyKnown(block_root).into(), ); rig.expect_parent_chain_process(); - rig.parent_chain_processed_success(block_root); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.parent_chain_processed_success(block_root, &[]); + rig.expect_no_active_lookups(); } #[test] @@ -766,7 +805,7 @@ fn test_parent_lookup_wrong_response() { // Peer sends the wrong block, peer should be penalized and the block re-requested. let bad_block = rig.rand_block(); rig.parent_lookup_block_response(id1, peer_id, Some(bad_block.into())); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "UnrequestedBlockRoot"); let id2 = rig.expect_block_parent_request(parent_root); // Send the stream termination for the first request. This should not produce extra penalties. @@ -780,8 +819,8 @@ fn test_parent_lookup_wrong_response() { // Processing succeeds, now the rest of the chain should be sent for processing. rig.parent_block_processed_imported(block_root); rig.expect_parent_chain_process(); - rig.parent_chain_processed_success(block_root); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.parent_chain_processed_success(block_root, &[]); + rig.expect_no_active_lookups(); } #[test] @@ -797,7 +836,7 @@ fn test_parent_lookup_empty_response() { // Peer sends an empty response, peer should be penalized and the block re-requested. rig.parent_lookup_block_response(id1, peer_id, None); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "NoResponseReturned"); let id2 = rig.expect_block_parent_request(parent_root); // Send the right block this time. @@ -806,9 +845,10 @@ fn test_parent_lookup_empty_response() { // Processing succeeds, now the rest of the chain should be sent for processing. rig.parent_block_processed_imported(block_root); - rig.expect_parent_chain_process(); - rig.parent_chain_processed_success(block_root); - assert_eq!(rig.active_parent_lookups_count(), 0); + + let id = rig.find_single_lookup_for(block_root); + rig.single_block_component_processed_imported(id, block_root); + rig.expect_no_active_lookups(); } #[test] @@ -833,8 +873,8 @@ fn test_parent_lookup_rpc_failure() { // Processing succeeds, now the rest of the chain should be sent for processing. rig.parent_block_processed_imported(block_root); rig.expect_parent_chain_process(); - rig.parent_chain_processed_success(block_root); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.parent_chain_processed_success(block_root, &[]); + rig.expect_no_active_lookups(); } #[test] @@ -872,11 +912,11 @@ fn test_parent_lookup_too_many_attempts() { // I'm unsure if this is how it should behave? // rig.parent_lookup_block_response(id, peer_id, None); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "UnrequestedBlockRoot"); } } - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.expect_no_active_lookups(); } #[test] @@ -902,13 +942,13 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { // Send a bad block this time. It should be tried again. let bad_block = rig.rand_block(); rig.parent_lookup_block_response(id, peer_id, Some(bad_block.into())); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "UnrequestedBlockRoot"); } } - assert_eq!(rig.active_parent_lookups_count(), 0); assert!(!rig.failed_chains_contains(&block_root)); assert!(!rig.failed_chains_contains(&parent.canonical_root())); + rig.expect_no_active_lookups(); } #[test] @@ -944,11 +984,11 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { rig.parent_lookup_block_response(id, peer_id, Some(parent.clone().into())); rig.parent_block_processed(block_root, BlockError::InvalidSignature.into()); rig.parent_lookup_block_response(id, peer_id, None); - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, "parent_request_err"); } assert!(rig.failed_chains_contains(&block_root)); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.expect_no_active_lookups(); } #[test] @@ -976,7 +1016,7 @@ fn test_parent_lookup_too_deep() { ) } - rig.expect_penalty(peer_id); + rig.expect_penalty(peer_id, ""); assert!(rig.failed_chains_contains(&chain_hash)); } @@ -988,7 +1028,7 @@ fn test_parent_lookup_disconnection() { rig.trigger_unknown_parent_block(peer_id, trigger_block.into()); rig.peer_disconnected(peer_id); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.expect_no_active_lookups(); } #[test] @@ -1015,9 +1055,9 @@ fn test_single_block_lookup_ignored_response() { // after processing. rig.single_lookup_block_response(id, peer_id, None); // Send an Ignored response, the request should be dropped - rig.single_block_component_processed(id, BlockProcessingResult::Ignored); + rig.single_block_component_processed(id.id, BlockProcessingResult::Ignored); rig.expect_empty_network(); - assert_eq!(rig.active_single_lookups_count(), 0); + rig.expect_no_active_lookups(); } #[test] @@ -1028,8 +1068,10 @@ fn test_parent_lookup_ignored_response() { let peer_id = rig.new_connected_peer(); // Trigger the request - rig.trigger_unknown_parent_block(peer_id, block.into()); + rig.trigger_unknown_parent_block(peer_id, block.clone().into()); let id = rig.expect_parent_request_block_and_blobs(parent_root); + // Note: single block lookup for current `block` does not trigger any request because it does + // not has blobs, and the block is already cached // Peer sends the right block, it should be sent for processing. Peer should not be penalized. rig.parent_lookup_block_response(id, peer_id, Some(parent.into())); @@ -1039,7 +1081,7 @@ fn test_parent_lookup_ignored_response() { // Return an Ignored result. The request should be dropped rig.parent_block_processed(block_root, BlockProcessingResult::Ignored); rig.expect_empty_network(); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.expect_no_active_lookups(); } /// This is a regression test. @@ -1056,7 +1098,7 @@ fn test_same_chain_race_condition() { let chain_hash = trigger_block.canonical_root(); rig.trigger_unknown_parent_block(peer_id, trigger_block.clone()); - for (i, block) in blocks.into_iter().rev().enumerate() { + for (i, block) in blocks.clone().into_iter().rev().enumerate() { let id = rig.expect_parent_request_block_and_blobs(block.canonical_root()); // the block rig.parent_lookup_block_response(id, peer_id, Some(block.clone())); @@ -1088,14 +1130,15 @@ fn test_same_chain_race_condition() { rig.trigger_unknown_parent_block(peer_id, trigger_block); rig.assert_parent_lookups_consistency(); - rig.parent_chain_processed_success(chain_hash); - assert_eq!(rig.active_parent_lookups_count(), 0); + rig.parent_chain_processed_success(chain_hash, &blocks); + rig.expect_no_active_lookups(); } mod deneb_only { use super::*; use beacon_chain::data_availability_checker::AvailabilityCheckError; use ssz_types::VariableList; + use std::collections::VecDeque; struct DenebTester { rig: TestRig, @@ -1233,6 +1276,7 @@ mod deneb_only { } fn parent_block_response(mut self) -> Self { + self.rig.log("parent_block_response"); self.rig.expect_empty_network(); let block = self.parent_block.pop_front().unwrap().clone(); let _ = self.unknown_parent_block.insert(block.clone()); @@ -1247,6 +1291,7 @@ mod deneb_only { } fn parent_blob_response(mut self) -> Self { + self.rig.log("parent_blob_response"); let blobs = self.parent_blobs.pop_front().unwrap(); let _ = self.unknown_parent_blobs.insert(blobs.clone()); for blob in &blobs { @@ -1276,6 +1321,7 @@ mod deneb_only { } fn block_response(mut self) -> Self { + self.rig.log("block_response"); // The peer provides the correct block, should not be penalized. Now the block should be sent // for processing. self.rig.single_lookup_block_response( @@ -1291,6 +1337,8 @@ mod deneb_only { } fn blobs_response(mut self) -> Self { + self.rig + .log(&format!("blobs response {}", self.blobs.len())); for blob in &self.blobs { self.rig.single_lookup_blob_response( self.blob_req_id.expect("blob request id"), @@ -1360,7 +1408,7 @@ mod deneb_only { // Missing blobs should be the request is not removed, the outstanding blobs request should // mean we do not send a new request. self.rig.single_block_component_processed( - self.block_req_id.expect("block request id"), + self.block_req_id.expect("block request id").id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), ); self.rig.expect_empty_network(); @@ -1369,6 +1417,7 @@ mod deneb_only { } fn parent_block_imported(mut self) -> Self { + self.rig.log("parent_block_imported"); self.rig.parent_block_processed( self.block_root, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), @@ -1407,7 +1456,7 @@ mod deneb_only { fn invalid_block_processed(mut self) -> Self { self.rig.single_block_component_processed( - self.block_req_id.expect("block request id"), + self.block_req_id.expect("block request id").id, BlockProcessingResult::Err(BlockError::ProposalSignatureInvalid), ); assert_eq!(self.rig.active_single_lookups_count(), 1); @@ -1416,7 +1465,7 @@ mod deneb_only { fn invalid_blob_processed(mut self) -> Self { self.rig.single_block_component_processed( - self.blob_req_id.expect("blob request id"), + self.blob_req_id.expect("blob request id").id, BlockProcessingResult::Err(BlockError::AvailabilityCheck( AvailabilityCheckError::KzgVerificationFailed, )), @@ -1427,7 +1476,7 @@ mod deneb_only { fn missing_components_from_block_request(mut self) -> Self { self.rig.single_block_component_processed( - self.block_req_id.expect("block request id"), + self.block_req_id.expect("block request id").id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( self.slot, self.block_root, @@ -1449,8 +1498,8 @@ mod deneb_only { self } - fn expect_penalty(mut self) -> Self { - self.rig.expect_penalty(self.peer_id); + fn expect_penalty(mut self, expect_penalty_msg: &'static str) -> Self { + self.rig.expect_penalty(self.peer_id, expect_penalty_msg); self } fn expect_no_penalty(mut self) -> Self { @@ -1514,6 +1563,10 @@ mod deneb_only { self.rig.expect_block_process(ResponseType::Block); self } + fn expect_no_active_lookups(self) -> Self { + self.rig.expect_no_active_lookups(); + self + } fn search_parent_dup(mut self) -> Self { self.rig .trigger_unknown_parent_block(self.peer_id, self.block.clone()); @@ -1555,7 +1608,7 @@ mod deneb_only { tester .empty_block_response() - .expect_penalty() + .expect_penalty("NoResponseReturned") .expect_block_request() .expect_no_blobs_request() .empty_blobs_response() @@ -1578,7 +1631,7 @@ mod deneb_only { .missing_components_from_block_request() .empty_blobs_response() .missing_components_from_blob_request() - .expect_penalty() + .expect_penalty("single_blob_failure") .expect_blobs_request() .expect_no_block_request(); } @@ -1595,7 +1648,7 @@ mod deneb_only { .expect_no_penalty_and_no_requests() .missing_components_from_blob_request() .empty_block_response() - .expect_penalty() + .expect_penalty("NoResponseReturned") .expect_block_request() .expect_no_blobs_request(); } @@ -1609,7 +1662,7 @@ mod deneb_only { tester .block_response_triggering_process() .invalid_block_processed() - .expect_penalty() + .expect_penalty("single_block_failure") .expect_block_request() .expect_no_blobs_request() .blobs_response() @@ -1628,7 +1681,7 @@ mod deneb_only { .missing_components_from_block_request() .blobs_response() .invalid_blob_processed() - .expect_penalty() + .expect_penalty("single_blob_failure") .expect_blobs_request() .expect_no_block_request(); } @@ -1645,7 +1698,7 @@ mod deneb_only { .invalidate_blobs_too_few() .blobs_response() .missing_components_from_blob_request() - .expect_penalty() + .expect_penalty("single_blob_failure") .expect_blobs_request() .expect_no_block_request(); } @@ -1660,7 +1713,7 @@ mod deneb_only { .block_response_triggering_process() .invalidate_blobs_too_many() .blobs_response() - .expect_penalty() + .expect_penalty("DuplicateData") .expect_blobs_request() .expect_no_block_request(); } @@ -1688,7 +1741,7 @@ mod deneb_only { tester .invalidate_blobs_too_many() .blobs_response() - .expect_penalty() + .expect_penalty("DuplicateData") .expect_blobs_request() .expect_no_block_request() .block_response_triggering_process(); @@ -1729,7 +1782,7 @@ mod deneb_only { .parent_blob_response() .expect_block_process() .invalid_parent_processed() - .expect_penalty() + .expect_penalty("parent_request_err") .expect_parent_block_request() .expect_parent_blobs_request() .expect_empty_beacon_processor(); @@ -1780,7 +1833,7 @@ mod deneb_only { tester .empty_parent_block_response() - .expect_penalty() + .expect_penalty("NoResponseReturned") .expect_parent_block_request() .expect_no_blobs_request() .parent_blob_response() @@ -1805,7 +1858,7 @@ mod deneb_only { .empty_parent_blobs_response() .expect_no_penalty_and_no_requests() .parent_block_response() - .expect_penalty() + .expect_penalty("single_blob_failure") .expect_parent_blobs_request() .parent_blob_response() .expect_block_process() @@ -1848,7 +1901,7 @@ mod deneb_only { .parent_blob_response() .expect_block_process() .invalid_parent_processed() - .expect_penalty() + .expect_penalty("parent_request_err") .expect_parent_block_request() .expect_parent_blobs_request() .expect_empty_beacon_processor(); @@ -1868,7 +1921,10 @@ mod deneb_only { .expect_block_process() .parent_block_imported() .block_response() - .expect_parent_chain_process(); + .blobs_response() + .expect_parent_chain_process() + .block_imported() + .expect_no_active_lookups(); } #[test] @@ -1886,7 +1942,10 @@ mod deneb_only { .parent_blob_response() .expect_block_process() .parent_block_imported() - .expect_parent_chain_process(); + .blobs_response() + .expect_parent_chain_process() + .block_imported() + .expect_no_active_lookups(); } #[test] @@ -1899,7 +1958,7 @@ mod deneb_only { tester .empty_parent_block_response() - .expect_penalty() + .expect_penalty("NoResponseReturned") .expect_parent_block_request() .expect_no_blobs_request() .parent_blob_response() @@ -1907,8 +1966,10 @@ mod deneb_only { .parent_block_response() .expect_block_process() .parent_block_imported() + .blobs_response() .block_response() - .expect_parent_chain_process(); + .block_imported() + .expect_no_active_lookups(); } #[test] @@ -1920,16 +1981,18 @@ mod deneb_only { }; tester - .block_response() - .empty_parent_blobs_response() - .expect_no_penalty_and_no_requests() - .parent_block_response() - .expect_penalty() - .expect_parent_blobs_request() - .parent_blob_response() - .expect_block_process() - .parent_block_imported() - .expect_parent_chain_process(); + .block_response() // reply with current block + .empty_parent_blobs_response() // replies empty blobs to parent block + .expect_no_penalty_and_no_requests() // no penalty because parent block is unknown + .parent_block_response() // reply with parent block + .expect_penalty("single_blob_failure") // parent block has data, so penalize parent blob peer + .expect_parent_blobs_request() // re-request parent blobs + .parent_blob_response() // good response now + .expect_block_process() // send parent block for import + .parent_block_imported() // parent block imported + .blobs_response() + .block_imported() // resolve original block trigger blobs request and import + .expect_no_active_lookups(); } #[test] diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 9c17c6a1512..73b6bcf3f23 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -263,7 +263,7 @@ impl SyncManager { } #[cfg(test)] - pub(crate) fn active_single_lookups(&self) -> Vec { + pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256)> { self.block_lookups.active_single_lookups() } @@ -661,9 +661,6 @@ impl SyncManager { } } } - ChainSegmentProcessId::ParentLookup(chain_hash) => self - .block_lookups - .parent_chain_processed(chain_hash, result, &mut self.network), }, } } From 611c37a1c13bc879bb2db2875d89bc6ecc74df69 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Wed, 24 Apr 2024 08:52:06 +0900 Subject: [PATCH 04/14] Merge current and parent lookups --- .../src/data_availability_checker.rs | 2 - .../child_components.rs | 76 - .../src/data_availability_checker/error.rs | 1 + beacon_node/beacon_chain/src/lib.rs | 6 +- .../network/src/sync/block_lookups/common.rs | 218 +-- .../network/src/sync/block_lookups/mod.rs | 1350 ++++------------- .../src/sync/block_lookups/parent_lookup.rs | 228 --- .../sync/block_lookups/single_block_lookup.rs | 394 ++--- .../network/src/sync/block_lookups/tests.rs | 139 +- beacon_node/network/src/sync/manager.rs | 170 +-- .../network/src/sync/network_context.rs | 90 +- 11 files changed, 730 insertions(+), 1944 deletions(-) delete mode 100644 beacon_node/beacon_chain/src/data_availability_checker/child_components.rs delete mode 100644 beacon_node/network/src/sync/block_lookups/parent_lookup.rs diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 3ef105c6d34..674c54b9f59 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -2,7 +2,6 @@ use crate::blob_verification::{verify_kzg_for_blob_list, GossipVerifiedBlob, Kzg use crate::block_verification_types::{ AvailabilityPendingExecutedBlock, AvailableExecutedBlock, RpcBlock, }; -pub use crate::data_availability_checker::child_components::ChildComponents; use crate::data_availability_checker::overflow_lru_cache::OverflowLRUCache; use crate::{BeaconChain, BeaconChainTypes, BeaconStore}; use kzg::Kzg; @@ -18,7 +17,6 @@ use task_executor::TaskExecutor; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{BlobSidecarList, ChainSpec, Epoch, EthSpec, Hash256, SignedBeaconBlock}; -mod child_components; mod error; mod overflow_lru_cache; mod state_lru_cache; diff --git a/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs b/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs deleted file mode 100644 index b33e90bbe1d..00000000000 --- a/beacon_node/beacon_chain/src/data_availability_checker/child_components.rs +++ /dev/null @@ -1,76 +0,0 @@ -use crate::block_verification_types::RpcBlock; -use bls::Hash256; -use std::sync::Arc; -use types::blob_sidecar::FixedBlobSidecarList; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; - -/// For requests triggered by an `UnknownBlockParent` or `UnknownBlobParent`, this struct -/// is used to cache components as they are sent to the network service. We can't use the -/// data availability cache currently because any blocks or blobs without parents -/// won't pass validation and therefore won't make it into the cache. -pub struct ChildComponents { - pub block_root: Hash256, - pub downloaded_block: Option>>, - pub downloaded_blobs: FixedBlobSidecarList, -} - -impl From> for ChildComponents { - fn from(value: RpcBlock) -> Self { - let (block_root, block, blobs) = value.deconstruct(); - let fixed_blobs = blobs.map(|blobs| { - FixedBlobSidecarList::from(blobs.into_iter().map(Some).collect::>()) - }); - Self::new(block_root, Some(block), fixed_blobs) - } -} - -impl ChildComponents { - pub fn empty(block_root: Hash256) -> Self { - Self { - block_root, - downloaded_block: None, - downloaded_blobs: <_>::default(), - } - } - pub fn new( - block_root: Hash256, - block: Option>>, - blobs: Option>, - ) -> Self { - let mut cache = Self::empty(block_root); - if let Some(block) = block { - cache.merge_block(block); - } - if let Some(blobs) = blobs { - cache.merge_blobs(blobs); - } - cache - } - - pub fn merge_block(&mut self, block: Arc>) { - self.downloaded_block = Some(block); - } - - pub fn merge_blob(&mut self, blob: Arc>) { - if let Some(blob_ref) = self.downloaded_blobs.get_mut(blob.index as usize) { - *blob_ref = Some(blob); - } - } - - pub fn merge_blobs(&mut self, blobs: FixedBlobSidecarList) { - for blob in blobs.iter().flatten() { - self.merge_blob(blob.clone()); - } - } - - pub fn clear_blobs(&mut self) { - self.downloaded_blobs = FixedBlobSidecarList::default(); - } - - pub fn downloaded_blobs_count(&self) -> usize { - self.downloaded_blobs - .iter() - .filter(|blob| blob.is_some()) - .count() - } -} diff --git a/beacon_node/beacon_chain/src/data_availability_checker/error.rs b/beacon_node/beacon_chain/src/data_availability_checker/error.rs index 0804fe3b9ab..d5e2b6c82d7 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/error.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/error.rs @@ -21,6 +21,7 @@ pub enum Error { RebuildingStateCaches(BeaconStateError), } +#[derive(PartialEq, Eq)] pub enum ErrorCategory { /// Internal Errors (not caused by peers) Internal, diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index 71c96d0fd55..a091b0894ab 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -74,9 +74,9 @@ pub use self::historical_blocks::HistoricalBlockError; pub use attestation_verification::Error as AttestationError; pub use beacon_fork_choice_store::{BeaconForkChoiceStore, Error as ForkChoiceStoreError}; pub use block_verification::{ - get_block_root, BlockError, ExecutionPayloadError, ExecutionPendingBlock, GossipVerifiedBlock, - IntoExecutionPendingBlock, IntoGossipVerifiedBlockContents, PayloadVerificationOutcome, - PayloadVerificationStatus, + get_block_root, BlobError, BlockError, ExecutionPayloadError, ExecutionPendingBlock, + GossipVerifiedBlock, IntoExecutionPendingBlock, IntoGossipVerifiedBlockContents, + PayloadVerificationOutcome, PayloadVerificationStatus, }; pub use block_verification_types::AvailabilityPendingExecutedBlock; pub use block_verification_types::ExecutedBlock; diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index 43fb0229a70..5ea62f1ae28 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -1,42 +1,34 @@ -use crate::sync::block_lookups::parent_lookup::PARENT_FAIL_TOLERANCE; use crate::sync::block_lookups::single_block_lookup::{ LookupRequestError, SingleBlockLookup, SingleLookupRequestState, }; use crate::sync::block_lookups::{ - BlobRequestState, BlockLookups, BlockRequestState, PeerId, SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS, + BlobRequestState, BlockRequestState, PeerId, SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS, }; -use crate::sync::manager::{BlockProcessType, Id, SingleLookupReqId}; +use crate::sync::manager::{BlockProcessType, Id, SLOT_IMPORT_TOLERANCE}; use crate::sync::network_context::{ BlobsByRootSingleBlockRequest, BlocksByRootSingleRequest, SyncNetworkContext, }; use beacon_chain::block_verification_types::RpcBlock; -use beacon_chain::data_availability_checker::ChildComponents; use beacon_chain::BeaconChainTypes; use std::sync::Arc; -use std::time::Duration; use types::blob_sidecar::FixedBlobSidecarList; use types::{Hash256, SignedBeaconBlock}; +use super::single_block_lookup::DownloadResult; +use super::SingleLookupId; + #[derive(Debug, Copy, Clone)] pub enum ResponseType { Block, Blob, } -#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] -pub enum LookupType { - Current, - Parent, -} - -impl LookupType { - fn max_attempts(&self) -> u8 { - match self { - LookupType::Current => SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS, - LookupType::Parent => PARENT_FAIL_TOLERANCE, - } - } -} +/// How many attempts we try to find a parent of a block before we give up trying. +pub(crate) const PARENT_FAIL_TOLERANCE: u8 = 5; +/// The maximum depth we will search for a parent block. In principle we should have sync'd any +/// canonical chain to its head once the peer connects. A chain should not appear where it's depth +/// is further back than the most recent head slot. +pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2; /// This trait unifies common single block lookup functionality across blocks and blobs. This /// includes making requests, verifying responses, and handling processing results. A @@ -53,75 +45,35 @@ pub trait RequestState { /// The type created after validation. type VerifiedResponseType: Clone; - /* Request building methods */ - - /// Construct a new request. - fn build_request( - &mut self, - lookup_type: LookupType, - ) -> Result<(PeerId, Self::RequestType), LookupRequestError> { - // Verify and construct request. - self.too_many_attempts(lookup_type)?; - let peer = self.get_peer()?; - let request = self.new_request(); - Ok((peer, request)) - } - - /// Construct a new request and send it. - fn build_request_and_send( + fn continue_request( &mut self, id: Id, - lookup_type: LookupType, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { - // Check if request is necessary. - if !self.get_state().is_awaiting_download() { - return Ok(()); + if let Some(peer_id) = Self::get_state_mut(self).maybe_start_download()? { + // Verify the current request has not exceeded the maximum number of attempts. + let request_state = self.get_state(); + // TODO: Okay to use `SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS` for both current and parent + // lookups now? It not trivial to identify what is a "parent lookup" now. + if request_state.failed_attempts() >= SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS { + let cannot_process = request_state.more_failed_processing_attempts(); + return Err(LookupRequestError::TooManyAttempts { cannot_process }); + } + + // Make request + return self.make_request(id, peer_id, cx); } - - // Construct request. - let (peer_id, request) = self.build_request(lookup_type)?; - - // Update request state. - let req_counter = self.get_state_mut().on_download_start(peer_id); - - // Make request - let id = SingleLookupReqId { - id, - req_counter, - lookup_type, - }; - Self::make_request(id, peer_id, request, cx) - } - - /// Verify the current request has not exceeded the maximum number of attempts. - fn too_many_attempts(&self, lookup_type: LookupType) -> Result<(), LookupRequestError> { - let request_state = self.get_state(); - - if request_state.failed_attempts() >= lookup_type.max_attempts() { - let cannot_process = request_state.more_failed_processing_attempts(); - Err(LookupRequestError::TooManyAttempts { cannot_process }) - } else { - Ok(()) + if let Some(result) = Self::get_state_mut(self).maybe_start_processing() { + return Self::send_for_processing(id, result, cx); } + Ok(()) } - /// Get the next peer to request. Draws from the set of peers we think should have both the - /// block and blob first. If that fails, we draw from the set of peers that may have either. - fn get_peer(&mut self) -> Result { - self.get_state_mut() - .use_rand_available_peer() - .ok_or(LookupRequestError::NoPeers) - } - - /// Initialize `Self::RequestType`. - fn new_request(&self) -> Self::RequestType; - /// Send the request to the network service. fn make_request( - id: SingleLookupReqId, + &self, + id: Id, peer_id: PeerId, - request: Self::RequestType, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError>; @@ -131,28 +83,13 @@ pub trait RequestState { /// the blob parent if we don't end up getting any blobs in the response. fn get_parent_root(verified_response: &Self::VerifiedResponseType) -> Option; - /// Caches the verified response in the lookup if necessary. This is only necessary for lookups - /// triggered by `UnknownParent` errors. - fn add_to_child_components( - verified_response: Self::VerifiedResponseType, - components: &mut ChildComponents, - ); - /// Send the response to the beacon processor. fn send_for_processing( id: Id, - bl: &BlockLookups, - block_root: Hash256, - verified: Self::VerifiedResponseType, - duration: Duration, + result: DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError>; - /// Register a failure to process the block or blob. - fn register_failure_downloading(&mut self) { - self.get_state_mut().on_download_failure() - } - /* Utility methods */ /// Returns the `ResponseType` associated with this trait implementation. Useful in logging. @@ -162,56 +99,46 @@ pub trait RequestState { fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self; /// A getter for a reference to the `SingleLookupRequestState` associated with this trait. - fn get_state(&self) -> &SingleLookupRequestState; + fn get_state(&self) -> &SingleLookupRequestState; /// A getter for a mutable reference to the SingleLookupRequestState associated with this trait. - fn get_state_mut(&mut self) -> &mut SingleLookupRequestState; + fn get_state_mut(&mut self) -> &mut SingleLookupRequestState; } -impl RequestState for BlockRequestState { +impl RequestState for BlockRequestState { type RequestType = BlocksByRootSingleRequest; type VerifiedResponseType = Arc>; - fn new_request(&self) -> Self::RequestType { - BlocksByRootSingleRequest(self.requested_block_root) - } - fn make_request( - id: SingleLookupReqId, + &self, + id: SingleLookupId, peer_id: PeerId, - request: Self::RequestType, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { - cx.block_lookup_request(id, peer_id, request) - .map_err(LookupRequestError::SendFailed) + cx.block_lookup_request( + id, + peer_id, + BlocksByRootSingleRequest(self.requested_block_root), + ) + .map_err(LookupRequestError::SendFailed) } fn get_parent_root(verified_response: &Arc>) -> Option { Some(verified_response.parent_root()) } - fn add_to_child_components( - verified_response: Arc>, - components: &mut ChildComponents, - ) { - components.merge_block(verified_response); - } - fn send_for_processing( - id: Id, - bl: &BlockLookups, - block_root: Hash256, - block: Arc>, - duration: Duration, + id: SingleLookupId, + (block, block_root, seen_timestamp): DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { - bl.send_block_for_processing( + cx.send_block_for_processing( block_root, RpcBlock::new_without_blobs(Some(block_root), block), - duration, + seen_timestamp, BlockProcessType::SingleBlock { id }, - cx, ) + .map_err(LookupRequestError::SendFailed) } fn response_type() -> ResponseType { @@ -220,10 +147,10 @@ impl RequestState for BlockRequestState { fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self { &mut request.block_request_state } - fn get_state(&self) -> &SingleLookupRequestState { + fn get_state(&self) -> &SingleLookupRequestState { &self.state } - fn get_state_mut(&mut self) -> &mut SingleLookupRequestState { + fn get_state_mut(&mut self) -> &mut SingleLookupRequestState { &mut self.state } } @@ -232,21 +159,26 @@ impl RequestState for BlobRequestState { type RequestType = BlobsByRootSingleBlockRequest; type VerifiedResponseType = FixedBlobSidecarList; - fn new_request(&self) -> Self::RequestType { - BlobsByRootSingleBlockRequest { - block_root: self.block_root, - indices: self.requested_ids.indices(), - } - } - fn make_request( - id: SingleLookupReqId, + &self, + id: Id, peer_id: PeerId, - request: Self::RequestType, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { - cx.blob_lookup_request(id, peer_id, request) - .map_err(LookupRequestError::SendFailed) + // TODO: Use cx to figure out which blobs are still to be downloaded + // - Check against the current cached block in the blocks response the required num of blobs + // - Check against da checker if there's a blob how many we need + // - Check against da checker if there are some blobs already downloaded + + cx.blob_lookup_request( + id, + peer_id, + BlobsByRootSingleBlockRequest { + block_root: self.block_root, + indices: self.requested_ids.indices(), + }, + ) + .map_err(LookupRequestError::SendFailed) } fn get_parent_root(verified_response: &FixedBlobSidecarList) -> Option { @@ -257,28 +189,18 @@ impl RequestState for BlobRequestState { .next() } - fn add_to_child_components( - verified_response: FixedBlobSidecarList, - components: &mut ChildComponents, - ) { - components.merge_blobs(verified_response); - } - fn send_for_processing( id: Id, - bl: &BlockLookups, - block_root: Hash256, - verified: FixedBlobSidecarList, - duration: Duration, + (verified, block_root, seen_timestamp): DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { - bl.send_blobs_for_processing( + cx.send_blobs_for_processing( block_root, verified, - duration, + seen_timestamp, BlockProcessType::SingleBlob { id }, - cx, ) + .map_err(LookupRequestError::SendFailed) } fn response_type() -> ResponseType { @@ -287,10 +209,10 @@ impl RequestState for BlobRequestState { fn request_state_mut(request: &mut SingleBlockLookup) -> &mut Self { &mut request.blob_request_state } - fn get_state(&self) -> &SingleLookupRequestState { + fn get_state(&self) -> &SingleLookupRequestState { &self.state } - fn get_state_mut(&mut self) -> &mut SingleLookupRequestState { + fn get_state_mut(&mut self) -> &mut SingleLookupRequestState { &mut self.state } } diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 5091536cfcd..0897b04e8d0 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -1,18 +1,15 @@ -use self::single_block_lookup::SingleBlockLookup; -use super::manager::BlockProcessingResult; -use super::network_context::{LookupFailure, LookupVerifyError}; -use super::{manager::BlockProcessType, network_context::SyncNetworkContext}; +use self::single_block_lookup::{DownloadResult, LookupRequestError, SingleBlockLookup}; +use super::manager::{BlockProcessType, BlockProcessingResult}; +use super::network_context::LookupVerifyError; +use super::network_context::{RpcProcessingResult, SyncNetworkContext}; use crate::metrics; -use crate::sync::block_lookups::common::LookupType; -use crate::sync::block_lookups::parent_lookup::{ParentLookup, RequestError}; -use crate::sync::block_lookups::single_block_lookup::{CachedChild, LookupRequestError}; -use crate::sync::manager::{Id, SingleLookupReqId}; -use beacon_chain::block_verification_types::{AsBlock, RpcBlock}; -pub use beacon_chain::data_availability_checker::ChildComponents; +use crate::sync::block_lookups::common::PARENT_DEPTH_TOLERANCE; +use crate::sync::manager::Id; +use crate::sync::network_context::LookupFailure; +use beacon_chain::block_verification_types::AsBlock; use beacon_chain::data_availability_checker::{ AvailabilityCheckErrorCategory, DataAvailabilityChecker, }; -use beacon_chain::validator_monitor::timestamp_now; use beacon_chain::{AvailabilityProcessingStatus, BeaconChainTypes, BlockError}; pub use common::RequestState; use fnv::FnvHashMap; @@ -20,15 +17,13 @@ use lighthouse_network::{PeerAction, PeerId}; use lru_cache::LRUTimeCache; pub use single_block_lookup::{BlobRequestState, BlockRequestState}; use slog::{debug, error, trace, warn, Logger}; -use smallvec::SmallVec; +use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use store::Hash256; -use types::blob_sidecar::FixedBlobSidecarList; -use types::Slot; +use types::{BlobSidecar, EthSpec, SignedBeaconBlock, Slot}; pub mod common; -mod parent_lookup; mod single_block_lookup; #[cfg(test)] mod tests; @@ -36,6 +31,22 @@ mod tests; const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 3; +pub enum BlockComponent { + Block(DownloadResult>>), + Blob(DownloadResult>>), +} + +impl BlockComponent { + fn parent_root(&self) -> Hash256 { + match self { + BlockComponent::Block(block) => block.0.parent_root(), + BlockComponent::Blob(blob) => blob.0.block_parent_root(), + } + } +} + +pub type SingleLookupId = u32; + enum Action { Retry, ParentUnknown { parent_root: Hash256, slot: Slot }, @@ -44,13 +55,10 @@ enum Action { } pub struct BlockLookups { - /// Parent chain lookups being downloaded. - parent_lookups: SmallVec<[ParentLookup; 3]>, - /// A cache of failed chain lookups to prevent duplicate searches. failed_chains: LRUTimeCache, - single_block_lookups: FnvHashMap>, + single_block_lookups: FnvHashMap>, pub(crate) da_checker: Arc>, @@ -61,7 +69,6 @@ pub struct BlockLookups { impl BlockLookups { pub fn new(da_checker: Arc>, log: Logger) -> Self { Self { - parent_lookups: Default::default(), failed_chains: LRUTimeCache::new(Duration::from_secs( FAILED_CHAINS_CACHE_EXPIRY_SECONDS, )), @@ -72,19 +79,57 @@ impl BlockLookups { } #[cfg(test)] - pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256)> { + pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256, Option)> { self.single_block_lookups .iter() - .map(|(id, e)| (*id, e.block_root())) + .map(|(id, e)| (*id, e.block_root(), e.parent_root())) .collect() } - #[cfg(test)] - pub(crate) fn active_parent_lookups(&self) -> Vec { - self.parent_lookups - .iter() - .map(|r| r.chain_hash()) - .collect::>() + /// Returns a vec of all parent lookup chains by tip, in descending slot order (tip first) + pub(crate) fn active_parent_lookups(&self) -> Vec> { + let mut child_to_parent = HashMap::new(); + let mut parent_to_child = HashMap::>::new(); + for lookup in self.single_block_lookups.values() { + let block_root = lookup.block_root(); + let parent_root = lookup.parent_root(); + child_to_parent.insert(block_root, parent_root); + if let Some(parent_root) = parent_root { + parent_to_child + .entry(parent_root) + .or_default() + .push(block_root); + } + } + + let mut parent_chains = vec![]; + + // Iterate blocks which no child + for lookup in self.single_block_lookups.values() { + let mut block_root = lookup.block_root(); + if parent_to_child.get(&block_root).is_none() { + let mut chain = vec![]; + + // Resolve chain of blocks + loop { + if let Some(parent_root) = child_to_parent.get(&block_root) { + // block_root is a known block that may or may not have a parent root + chain.push(block_root); + if let Some(parent_root) = parent_root { + block_root = *parent_root; + continue; + } + } + break; + } + + if chain.len() > 1 { + parent_chains.push(chain); + } + } + } + + parent_chains } #[cfg(test)] @@ -98,66 +143,79 @@ impl BlockLookups { pub fn search_block( &mut self, block_root: Hash256, + block_component: Option<(PeerId, BlockComponent)>, peer_source: &[PeerId], cx: &mut SyncNetworkContext, ) { - self.new_current_lookup(block_root, None, peer_source, cx) + self.new_current_lookup(block_root, block_component, peer_source, cx) } - /// Creates a lookup for the block with the given `block_root`, while caching other block - /// components we've already received. The block components are cached here because we haven't - /// imported its parent and therefore can't fully validate it and store it in the data - /// availability cache. - /// - /// The request is immediately triggered. - pub fn search_child_block( + /// A block or blob triggers the search of a parent. + /// Check if this new lookup extends a bad chain: + /// - `block_root_to_search` is a failed chain + /// - Extending `child_block_root_trigger` would exceed the max depth + pub fn search_parent_of_child( &mut self, - block_root: Hash256, - child_components: ChildComponents, - peer_source: &[PeerId], + block_root_to_search: Hash256, + child_block_root_trigger: Hash256, + peers: &[PeerId], cx: &mut SyncNetworkContext, ) { - self.new_current_lookup(block_root, Some(child_components), peer_source, cx) - } + for parent_chain in self.active_parent_lookups() { + if parent_chain.last() == Some(&child_block_root_trigger) + && parent_chain.len() >= PARENT_DEPTH_TOLERANCE + { + debug!(self.log, "Parent lookup chain too long"; "block_root" => ?block_root_to_search); + + // Searching for this parent would extend a parent chain over the max + // Insert the tip only to failed chains + let chain_hash = parent_chain.first().expect("has at least one element"); + self.failed_chains.insert(*chain_hash); + + // Drop all lookups descending from the child of the too long parent chain + if let Some((lookup_id, lookup)) = self + .single_block_lookups + .iter() + .find(|(_, l)| l.block_root() == child_block_root_trigger) + { + for &peer_id in lookup.all_used_peers() { + cx.report_peer(peer_id, PeerAction::LowToleranceError, "chain_too_long"); + } - /// Attempts to trigger the request matching the given `block_root`. - pub fn trigger_single_lookup( - &mut self, - mut single_block_lookup: SingleBlockLookup, - cx: &mut SyncNetworkContext, - ) { - let block_root = single_block_lookup.block_root(); - match single_block_lookup.request_block_and_blobs(cx) { - Ok(()) => self.add_single_lookup(single_block_lookup), - Err(e) => { - debug!(self.log, "Single block lookup failed"; - "error" => ?e, - "block_root" => ?block_root, - ); + self.drop_lookup_and_childs(*lookup_id); + } + + return; } } - } - - /// Adds a lookup to the `single_block_lookups` map. - pub fn add_single_lookup(&mut self, single_block_lookup: SingleBlockLookup) { - self.single_block_lookups - .insert(single_block_lookup.id, single_block_lookup); - metrics::set_gauge( - &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, - self.single_block_lookups.len() as i64, - ); + // TODO: implement parent chain checks + self.new_current_lookup(block_root_to_search, None, peers, cx) } /// Searches for a single block hash. If the blocks parent is unknown, a chain of blocks is /// constructed. - pub fn new_current_lookup( + fn new_current_lookup( &mut self, block_root: Hash256, - child_components: Option>, + block_component: Option<(PeerId, BlockComponent)>, peers: &[PeerId], cx: &mut SyncNetworkContext, ) { + // If this block or it's parent is part of a known failed chain, ignore it. + if self.failed_chains.contains(&block_root) { + debug!(self.log, "Block is from a past failed chain. Dropping"; "block_root" => ?block_root); + return; + } + + // TODO: Should check if parent root is a failed chain too? + if let Some(parent_root) = block_component.as_ref().map(|(_, b)| b.parent_root()) { + if self.failed_chains.contains(&parent_root) { + debug!(self.log, "Parent of block is from a past failed chain. Dropping"; "parent_root" => ?parent_root, "block_root" => ?block_root); + return; + } + } + // Do not re-request a block that is already being requested if let Some((_, lookup)) = self .single_block_lookups @@ -166,472 +224,125 @@ impl BlockLookups { { trace!(self.log, "Adding peer to existing single block lookup"; "block_root" => %block_root); lookup.add_peers(peers); - if let Some(components) = child_components { - lookup.add_child_components(components); + if let Some((peer_id, block_component)) = block_component { + lookup.add_child_components(peer_id, block_component); } return; } - if let Some(parent_lookup) = self.parent_lookups.iter_mut().find(|parent_req| { - parent_req.is_for_block(block_root) || parent_req.contains_block(&block_root) - }) { - parent_lookup.add_peers(peers); - - // If the block was already downloaded, or is being downloaded in this moment, do not - // request it. - trace!(self.log, "Already searching for block in a parent lookup request"; "block_root" => ?block_root); - return; - } - - let msg = if child_components.is_some() { + let msg = if block_component.is_some() { "Searching for components of a block with unknown parent" } else { "Searching for block components" }; - - let lookup = SingleBlockLookup::new( - block_root, - child_components, - peers, - self.da_checker.clone(), - cx.next_id(), - LookupType::Current, - ); - debug!( self.log, "{}", msg; "peer_ids" => ?peers, "block" => ?block_root, ); - self.trigger_single_lookup(lookup, cx); - } - /// If a block is attempted to be processed but we do not know its parent, this function is - /// called in order to find the block's parent. - pub fn search_parent( - &mut self, - slot: Slot, - block_root: Hash256, - parent_root: Hash256, - peer_id: PeerId, - cx: &mut SyncNetworkContext, - ) { - // If this block or it's parent is part of a known failed chain, ignore it. - if self.failed_chains.contains(&parent_root) || self.failed_chains.contains(&block_root) { - debug!(self.log, "Block is from a past failed chain. Dropping"; - "block_root" => ?block_root, "block_slot" => slot); - return; - } + let mut lookup = + SingleBlockLookup::new(block_root, peers, self.da_checker.clone(), cx.next_id()); - // Make sure this block is not already downloaded, and that neither it or its parent is - // being searched for. - if let Some(parent_lookup) = self.parent_lookups.iter_mut().find(|parent_req| { - parent_req.contains_block(&parent_root) || parent_req.is_for_block(parent_root) - }) { - parent_lookup.add_peer(peer_id); - // we are already searching for this block, ignore it - debug!(self.log, "Already searching for parent block"; - "block_root" => ?block_root, "parent_root" => ?parent_root); - return; + // Add block components to the new request + if let Some((peer_id, block_component)) = block_component { + lookup.add_child_components(peer_id, block_component); } - let parent_lookup = ParentLookup::new( - block_root, - parent_root, - peer_id, - self.da_checker.clone(), - cx, - ); - - debug!(self.log, "Created new parent lookup"; "block_root" => ?block_root, "parent_root" => ?parent_root); - - self.request_parent(parent_lookup, cx); - } - - /* Lookup responses */ - - /// Get a single block lookup by its ID. This method additionally ensures the `req_counter` - /// matches the current `req_counter` for the lookup. This ensures any stale responses from requests - /// that have been retried are ignored. - fn get_single_lookup>( - &mut self, - id: SingleLookupReqId, - ) -> Option> { - let mut lookup = self.single_block_lookups.remove(&id.id)?; + let block_root = lookup.block_root(); + match lookup.continue_requests(cx) { + Ok(()) => { + self.single_block_lookups.insert(lookup.id, lookup); + self.update_metrics(); - let request_state = R::request_state_mut(&mut lookup); - if request_state - .get_state() - .is_current_req_counter(id.req_counter) - { - Some(lookup) - } else { - // We don't want to drop the lookup, just ignore the old response. - self.single_block_lookups.insert(id.id, lookup); - None + metrics::set_gauge( + &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, + self.single_block_lookups.len() as i64, + ); + } + Err(e) => { + debug!(self.log, "Single block lookup failed"; + "error" => ?e, + "block_root" => ?block_root, + ); + } } } - /// Checks whether a single block lookup is waiting for a parent lookup to complete. This is - /// necessary because we want to make sure all parents are processed before sending a child - /// for processing, otherwise the block will fail validation and will be returned to the network - /// layer with an `UnknownParent` error. - pub fn has_pending_parent_request(&self, block_root: Hash256) -> bool { - self.parent_lookups - .iter() - .any(|parent_lookup| parent_lookup.chain_hash() == block_root) - } + /* Lookup responses */ /// Process a block or blob response received from a single lookup request. - pub fn single_lookup_response>( + pub fn on_download_response>( &mut self, - lookup_id: SingleLookupReqId, + id: SingleLookupId, peer_id: PeerId, - response: R::VerifiedResponseType, - seen_timestamp: Duration, + response: RpcProcessingResult, cx: &mut SyncNetworkContext, ) { - let id = lookup_id.id; - let response_type = R::response_type(); + // Downscore peer even if lookup is not known + // Only downscore lookup verify errors. RPC errors are downscored in the network handler. + if let Err(LookupFailure::LookupVerifyError(e)) = &response { + // Note: the error is displayed in full debug form on the match below + cx.report_peer(peer_id, PeerAction::LowToleranceError, e.into()); + } - let Some(mut lookup) = self.get_single_lookup::(lookup_id) else { + let response_type = R::response_type(); + let Some(mut lookup) = self.single_block_lookups.get_mut(&id) else { // We don't have the ability to cancel in-flight RPC requests. So this can happen // if we started this RPC request, and later saw the block/blobs via gossip. - debug!( - self.log, - "Block returned for single block lookup not present"; - "response_type" => ?response_type, - ); + debug!(self.log, "Block returned for single block lookup not present"; "id" => id); return; }; - let expected_block_root = lookup.block_root(); - debug!(self.log, - "Peer returned response for single lookup"; - "peer_id" => %peer_id , - "id" => ?id, - "block_root" => ?expected_block_root, - "response_type" => ?response_type, - ); + let block_root = lookup.block_root(); + let request_state = R::request_state_mut(&mut lookup); - match self.handle_verified_response::( - seen_timestamp, - cx, - BlockProcessType::SingleBlock { id: lookup.id }, - response, - &mut lookup, - ) { - Ok(_) => { - self.single_block_lookups.insert(id, lookup); - } - Err(e) => { + let result = match response { + Ok((response, seen_timestamp)) => { debug!(self.log, - "Single lookup request failed"; - "error" => ?e, - "block_root" => ?expected_block_root, + "Block lookup download success"; + "block_root" => %block_root, + "peer_id" => %peer_id, + "response_type" => ?response_type, ); - } - } - - metrics::set_gauge( - &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, - self.single_block_lookups.len() as i64, - ); - } - /// Consolidates error handling for `single_lookup_response`. An `Err` here should always mean - /// the lookup is dropped. - fn handle_verified_response>( - &self, - seen_timestamp: Duration, - cx: &mut SyncNetworkContext, - process_type: BlockProcessType, - verified_response: R::VerifiedResponseType, - lookup: &mut SingleBlockLookup, - ) -> Result<(), LookupRequestError> { - let id = lookup.id; - let block_root = lookup.block_root(); - - let cached_child = lookup.add_response::(verified_response.clone()); - match cached_child { - CachedChild::Ok(block) => { - // If we have an outstanding parent request for this block, delay sending the response until - // all parent blocks have been processed, otherwise we will fail validation with an - // `UnknownParent`. - let delay_send = match lookup.lookup_type { - LookupType::Parent => false, - LookupType::Current => self.has_pending_parent_request(lookup.block_root()), - }; - - if delay_send { - debug!(self.log, "Delaying import of response"; "block_root" => %lookup.block_root(), "lookup_type" => ?lookup.lookup_type, "type" => ?R::response_type()); + if let Err(e) = request_state.get_state_mut().on_download_success(( + response, + block_root, + seen_timestamp, + )) { + Err(e) } else { - R::request_state_mut(lookup) - .get_state_mut() - .on_download_success() - .map_err(LookupRequestError::BadState)?; - self.send_block_for_processing( - block_root, - block, - seen_timestamp, - process_type, - cx, - )? - } - } - CachedChild::DownloadIncomplete(reason) => { - debug!(self.log, "CacheChild download incomplete"; "block_root" => %lookup.block_root(), "lookup_type" => ?lookup.lookup_type, "type" => ?R::response_type(), "reason" => reason); - - R::request_state_mut(lookup) - .get_state_mut() - .on_download_success() - .map_err(LookupRequestError::BadState)?; - // If this was the result of a block request, we can't determine if the block peer - // did anything wrong. If we already had both a block and blobs response processed, - // we should penalize the blobs peer because they did not provide all blobs on the - // initial request. - if lookup.both_components_downloaded() { - lookup.penalize_blob_peer(cx); - lookup.blob_request_state.state.on_download_failure(); - } - lookup.request_block_and_blobs(cx)?; - } - CachedChild::NotRequired => { - R::request_state_mut(lookup) - .get_state_mut() - .on_download_success() - .map_err(LookupRequestError::BadState)?; + // TOOD: May choose to delay blobs for sending if we know that their parent is unknown. + // However, da_checker does not ever error with unknown parent. Plus we should not request + // blobs for blocks that are not rooted on a valid chain, as an attacker can trigger us into + // fetching garbage. - R::send_for_processing(id, self, block_root, verified_response, seen_timestamp, cx)? - } - CachedChild::Err(e) => { - warn!(self.log, "Consistency error in cached block"; - "error" => ?e, - "block_root" => ?block_root - ); - lookup.handle_consistency_failure(cx); - lookup.request_block_and_blobs(cx)?; - } - } - Ok(()) - } - - /// Consolidates error handling for `single_lookup_response`. An `Err` here should always mean - /// the lookup is dropped. - fn handle_parent_lookup_cached_child( - &self, - seen_timestamp: Duration, - cx: &mut SyncNetworkContext, - process_type: BlockProcessType, - lookup: &mut SingleBlockLookup, - cached_child: CachedChild, - ) -> Result<(), LookupRequestError> { - let block_root = lookup.block_root(); - match cached_child { - CachedChild::Ok(block) => { - // If we have an outstanding parent request for this block, delay sending the response until - // all parent blocks have been processed, otherwise we will fail validation with an - // `UnknownParent`. - let delay_send = match lookup.lookup_type { - LookupType::Parent => false, - LookupType::Current => self.has_pending_parent_request(block_root), - }; - - if !delay_send { - self.send_block_for_processing( - block_root, - block, - seen_timestamp, - process_type, - cx, - )? + request_state.continue_request(id, cx) } } - CachedChild::DownloadIncomplete(_) => { - // If this was the result of a block request, we can't determine if the block peer - // did anything wrong. If we already had both a block and blobs response processed, - // we should penalize the blobs peer because they did not provide all blobs on the - // initial request. - if lookup.both_components_downloaded() { - lookup.penalize_blob_peer(cx); - lookup.blob_request_state.state.on_download_failure(); - } - lookup.request_block_and_blobs(cx)?; - } - CachedChild::NotRequired => { - warn!(self.log, "Child not cached for parent lookup"; "block_root" => ?block_root); - } - CachedChild::Err(e) => { - warn!(self.log, "Consistency error in cached block"; - "error" => ?e, - "block_root" => ?block_root, - ); - lookup.handle_consistency_failure(cx); - lookup.request_block_and_blobs(cx)?; - } - } - Ok(()) - } - - /// Get a parent block lookup by its ID. This method additionally ensures the `req_counter` - /// matches the current `req_counter` for the lookup. This any stale responses from requests - /// that have been retried are ignored. - fn get_parent_lookup>( - &mut self, - id: SingleLookupReqId, - ) -> Option> { - let mut parent_lookup = if let Some(pos) = self - .parent_lookups - .iter() - .position(|request| request.current_parent_request.id == id.id) - { - self.parent_lookups.remove(pos) - } else { - return None; - }; - - if R::request_state_mut(&mut parent_lookup.current_parent_request) - .get_state() - .is_current_req_counter(id.req_counter) - { - Some(parent_lookup) - } else { - self.parent_lookups.push(parent_lookup); - None - } - } - - /// Process a response received from a parent lookup request. - pub fn parent_lookup_response>( - &mut self, - id: SingleLookupReqId, - peer_id: PeerId, - response: R::VerifiedResponseType, - seen_timestamp: Duration, - cx: &mut SyncNetworkContext, - ) { - let Some(mut parent_lookup) = self.get_parent_lookup::(id) else { - debug!(self.log, "Response for a parent lookup request that was not found"; "peer_id" => %peer_id); - return; - }; - - debug!(self.log, - "Peer returned response for parent lookup"; - "peer_id" => %peer_id , - "id" => ?id, - "block_root" => ?parent_lookup.current_parent_request.block_request_state.requested_block_root, - "response_type" => ?R::response_type(), - ); - - match self.parent_lookup_response_inner::( - peer_id, - response, - seen_timestamp, - cx, - &mut parent_lookup, - ) { - Ok(()) => { - self.parent_lookups.push(parent_lookup); - } Err(e) => { - self.handle_parent_request_error(&mut parent_lookup, cx, e); - } - } - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); - } - - /// Consolidates error handling for `parent_lookup_response`. An `Err` here should always mean - /// the lookup is dropped. - fn parent_lookup_response_inner>( - &mut self, - peer_id: PeerId, - response: R::VerifiedResponseType, - seen_timestamp: Duration, - cx: &mut SyncNetworkContext, - parent_lookup: &mut ParentLookup, - ) -> Result<(), RequestError> { - // check if the parent of this block isn't in the failed cache. If it is, this chain should - // be dropped and the peer downscored. - if let Some(parent_root) = R::get_parent_root(&response) { - if self.failed_chains.contains(&parent_root) { - let request_state = R::request_state_mut(&mut parent_lookup.current_parent_request); - request_state.register_failure_downloading(); - debug!( - self.log, - "Parent chain ignored due to past failure"; - "block" => %parent_root, + debug!(self.log, + "Block lookup download failure"; + "block_root" => %block_root, + "peer_id" => %peer_id, + "response_type" => ?response_type, + "error" => %e, ); - // Add the root block to failed chains - self.failed_chains.insert(parent_lookup.chain_hash()); - cx.report_peer( - peer_id, - PeerAction::MidToleranceError, - "bbroot_failed_chains", - ); - return Ok(()); - } - } - - self.handle_verified_response::( - seen_timestamp, - cx, - BlockProcessType::ParentLookup { - chain_hash: parent_lookup.chain_hash(), - }, - response, - &mut parent_lookup.current_parent_request, - )?; - - Ok(()) - } - - /// Handle logging and peer scoring for `RequestError`s during parent lookup requests. - fn handle_parent_request_error( - &mut self, - parent_lookup: &mut ParentLookup, - cx: &SyncNetworkContext, - e: RequestError, - ) { - debug!(self.log, "Failed to request parent"; "error" => e.as_static()); - match e { - RequestError::SendFailed(_) => { - // Probably shutting down, nothing to do here. Drop the request - } - RequestError::ChainTooLong => { - self.failed_chains.insert(parent_lookup.chain_hash()); - // This indicates faulty peers. - for &peer_id in parent_lookup.all_used_peers() { - cx.report_peer(peer_id, PeerAction::LowToleranceError, e.as_static()) - } - } - RequestError::TooManyAttempts { cannot_process } => { - // We only consider the chain failed if we were unable to process it. - // We could have failed because one peer continually failed to send us - // bad blocks. We still allow other peers to send us this chain. Note - // that peers that do this, still get penalised. - if cannot_process { - self.failed_chains.insert(parent_lookup.chain_hash()); - } - // This indicates faulty peers. - for &peer_id in parent_lookup.all_used_peers() { - cx.report_peer(peer_id, PeerAction::LowToleranceError, e.as_static()) + if let Err(e) = request_state.get_state_mut().on_download_failure() { + Err(e) + } else { + request_state.continue_request(id, cx) } } - RequestError::NoPeers => { - // This happens if the peer disconnects while the block is being - // processed. Drop the request without extra penalty - } - RequestError::BadState(..) => { - warn!(self.log, "Request parent on bad state"; "error" => e.as_static()); - #[cfg(test)] - panic!("bad state"); - } + }; + + if let Err(e) = result { + debug!(self.log, "Dropping single lookup"; "id" => id, "err" => ?e); + self.drop_lookup_and_childs(id); + self.update_metrics(); } } @@ -649,148 +360,63 @@ impl BlockLookups { !should_drop_lookup }); - - /* Check disconnection for parent lookups */ - while let Some(pos) = self - .parent_lookups - .iter_mut() - .position(|req| req.check_peer_disconnected(peer_id).is_err()) - { - let parent_lookup = self.parent_lookups.remove(pos); - debug!(self.log, "Dropping parent lookup after peer disconnected"; "chain_hash" => %parent_lookup.chain_hash()); - self.request_parent(parent_lookup, cx); - } } - /// An RPC error has occurred during a parent lookup. This function handles this case. - pub fn parent_lookup_failed>( - &mut self, - id: SingleLookupReqId, - peer_id: &PeerId, - cx: &mut SyncNetworkContext, - error: LookupFailure, - ) { - // Only downscore lookup verify errors. RPC errors are downscored in the network handler. - if let LookupFailure::LookupVerifyError(e) = &error { - // Downscore peer even if lookup is not known - self.downscore_on_rpc_error(peer_id, e, cx); - } - - let Some(mut parent_lookup) = self.get_parent_lookup::(id) else { - debug!(self.log, - "RPC failure for a block parent lookup request that was not found"; - "peer_id" => %peer_id, - "error" => %error - ); - return; - }; - R::request_state_mut(&mut parent_lookup.current_parent_request) - .register_failure_downloading(); - debug!(self.log, "Parent lookup block request failed"; - "chain_hash" => %parent_lookup.chain_hash(), "id" => ?id, "error" => %error - ); - - self.request_parent(parent_lookup, cx); - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); - } + /* Processing responses */ - /// An RPC error has occurred during a single lookup. This function handles this case.\ - pub fn single_block_lookup_failed>( + pub fn on_processing_result( &mut self, - id: SingleLookupReqId, - peer_id: &PeerId, + process_type: BlockProcessType, + result: BlockProcessingResult, cx: &mut SyncNetworkContext, - error: LookupFailure, ) { - // Only downscore lookup verify errors. RPC errors are downscored in the network handler. - if let LookupFailure::LookupVerifyError(e) = &error { - // Downscore peer even if lookup is not known - self.downscore_on_rpc_error(peer_id, e, cx); + if let Err(e) = match process_type { + BlockProcessType::SingleBlock { id } => { + self.on_processing_result_inner::>(id, result, cx) + } + BlockProcessType::SingleBlob { id } => { + self.on_processing_result_inner::>(id, result, cx) + } + } { + let id = match process_type { + BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => id, + }; + debug!(self.log, "Dropping lookup on request error"; "id" => id, "error" => ?e); + self.drop_lookup_and_childs(id); } - - let log = self.log.clone(); - let Some(mut lookup) = self.get_single_lookup::(id) else { - debug!(log, "Error response to dropped lookup"; "error" => %error); - return; - }; - let block_root = lookup.block_root(); - let request_state = R::request_state_mut(&mut lookup); - let response_type = R::response_type(); - trace!(log, - "Single lookup failed"; - "block_root" => ?block_root, - "error" => %error, - "peer_id" => %peer_id, - "response_type" => ?response_type - ); - let id = id.id; - request_state.register_failure_downloading(); - if let Err(e) = lookup.request_block_and_blobs(cx) { - debug!(self.log, - "Single lookup retry failed"; - "error" => ?e, - "block_root" => ?block_root, - ); - } else { - self.single_block_lookups.insert(id, lookup); - } - - metrics::set_gauge( - &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, - self.single_block_lookups.len() as i64, - ); } - /* Processing responses */ - - pub fn single_block_component_processed>( + pub fn on_processing_result_inner>( &mut self, - target_id: Id, + lookup_id: SingleLookupId, result: BlockProcessingResult, cx: &mut SyncNetworkContext, - ) { - let Some(mut lookup) = self.single_block_lookups.remove(&target_id) else { - debug!(self.log, "Unknown single block lookup"; "target_id" => target_id); - return; + ) -> Result<(), LookupRequestError> { + let Some(mut lookup) = self.single_block_lookups.get_mut(&lookup_id) else { + debug!(self.log, "Unknown single block lookup"; "target_id" => lookup_id); + return Ok(()); }; let block_root = lookup.block_root(); - let request_state = R::request_state_mut(&mut lookup); + let request_state = R::request_state_mut(&mut lookup).get_state_mut(); - let peer_id = match request_state.get_state().processing_peer() { - Ok(peer_id) => peer_id, - Err(e) => { - // TODO: This log will fire when processing the child block that triggered a parent - // lookup. If the result is Ok(Imported) or Err(BlockIsAlreadyKnown), there's no - // effect since the lookup is dropped anyway. However, for other variants there may - // be inconsistencies. The problem lies on the lookup transitioning from a child - // components into a regular lookup. - warn!(self.log, "Attempting to process single block lookup in bad state"; "id" => target_id, "response_type" => ?R::response_type(), "error" => e); - #[cfg(test)] - panic!("bad state"); - #[cfg(not(test))] - return; - } - }; debug!( self.log, "Block component processed for lookup"; "response_type" => ?R::response_type(), "block_root" => ?block_root, "result" => ?result, - "id" => target_id, + "id" => lookup_id, ); let action = match result { BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_)) - | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown { .. }) => { + | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown(_)) => { // Successfully imported + // TODO: Potentially import child blocks trace!(self.log, "Single block processing succeeded"; "block" => %block_root); - Action::Drop + request_state.on_processing_success()?; + Action::Continue } BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( @@ -799,29 +425,21 @@ impl BlockLookups { )) => { // `on_processing_success` is called here to ensure the request state is updated prior to checking // if both components have been processed. - if R::request_state_mut(&mut lookup) - .get_state_mut() - .on_processing_success() - .is_err() - { - warn!( - self.log, - "Single block processing state incorrect"; - "action" => "dropping single block request" - ); - Action::Drop + request_state.on_processing_success()?; + // If this was the result of a block request, we can't determined if the block peer did anything // wrong. If we already had both a block and blobs response processed, we should penalize the // blobs peer because they did not provide all blobs on the initial request. - } else if lookup.both_components_processed() { - lookup.penalize_blob_peer(cx); - - // Try it again if possible. - lookup.blob_request_state.state.on_processing_failure(); - Action::Retry - } else { - Action::Continue + if lookup.both_components_processed() { + if let Ok(blob_peer) = lookup.blob_request_state.state.on_processing_failure() { + cx.report_peer( + blob_peer, + PeerAction::MidToleranceError, + "sent_incomplete_blobs", + ); + } } + Action::Retry } BlockProcessingResult::Ignored => { // Beacon processor signalled to ignore the block processing result. @@ -834,18 +452,23 @@ impl BlockLookups { Action::Drop } BlockProcessingResult::Err(e) => { - let root = lookup.block_root(); - trace!(self.log, "Single block processing failed"; "block" => %root, "error" => %e); + trace!(self.log, "Single block processing failed"; "block_root" => %block_root, "error" => %e); match e { BlockError::BeaconChainError(e) => { // Internal error - error!(self.log, "Beacon chain error processing single block"; "block_root" => %root, "error" => ?e); + error!(self.log, "Beacon chain error processing single block"; "block_root" => %block_root, "error" => ?e); Action::Drop } BlockError::ParentUnknown(block) => { let slot = block.slot(); let parent_root = block.parent_root(); - lookup.add_child_components(block.into()); + + // Reverts the status of this request to `AwaitingProcessing` holding the + // downloaded data. A future call to `continue_requests` will re-submit it + // once there are no pending parent requests. + // Note: `BlockError::ParentUnknown` is only returned when processing + // blocks, not blobs. + request_state.into_awaiting_processing()?; Action::ParentUnknown { parent_root, slot } } ref e @ BlockError::ExecutionPayloadError(ref epe) if !epe.penalize_peer() => { @@ -854,35 +477,37 @@ impl BlockLookups { debug!( self.log, "Single block lookup failed. Execution layer is offline / unsynced / misconfigured"; - "root" => %root, + "block_root" => %block_root, "error" => ?e ); Action::Drop } - BlockError::AvailabilityCheck(e) => match e.category() { - AvailabilityCheckErrorCategory::Internal => { - warn!(self.log, "Internal availability check failure"; "root" => %root, "peer_id" => %peer_id, "error" => ?e); - lookup.block_request_state.state.on_download_failure(); - lookup.blob_request_state.state.on_download_failure(); - Action::Retry - } - AvailabilityCheckErrorCategory::Malicious => { - warn!(self.log, "Availability check failure"; "root" => %root, "peer_id" => %peer_id, "error" => ?e); - lookup.handle_availability_check_failure(cx); - Action::Retry - } - }, + BlockError::AvailabilityCheck(e) + if e.category() == AvailabilityCheckErrorCategory::Internal => + { + // There errors indicate internal problems and should not downscore the peer + warn!(self.log, "Internal availability check failure"; "block_root" => %block_root, "error" => ?e); + // TODO: This lines represent an improper transition of download states, + // which can log errors in the future. If an error here causes the request + // to transition into a bad state, a future network message will cause + // the request to be dropped + // + // lookup.block_request_state.state.on_download_failure(); + // lookup.blob_request_state.state.on_download_failure(); + Action::Drop + } other => { - warn!(self.log, "Peer sent invalid block in single block lookup"; "root" => %root, "error" => ?other, "peer_id" => %peer_id); - if let Ok(block_peer) = lookup.block_request_state.state.processing_peer() { - cx.report_peer( - block_peer, - PeerAction::MidToleranceError, - "single_block_failure", - ); - - lookup.block_request_state.state.on_processing_failure(); - } + warn!(self.log, "Invalid block in single block lookup"; "block_root" => %block_root, "error" => ?other); + let peer_id = request_state.on_processing_failure()?; + // TODO: Why is the original code downscoring the block peer regardless of + // type of request? Sending a blob for verification can result in an error + // attributable to the block peer? + cx.report_peer( + peer_id, + PeerAction::MidToleranceError, + "single_block_failure", + ); + Action::Retry } } @@ -891,398 +516,71 @@ impl BlockLookups { match action { Action::Retry => { - if let Err(e) = lookup.request_block_and_blobs(cx) { - warn!(self.log, "Single block lookup failed"; "block_root" => %block_root, "error" => ?e); - // Failed with too many retries, drop with noop - self.update_metrics(); - } else { - self.single_block_lookups.insert(target_id, lookup); - } + // Trigger download + lookup.continue_requests(cx)?; } - Action::ParentUnknown { parent_root, slot } => { - // TODO: Consider including all peers from the lookup, claiming to know this block, not - // just the one that sent this specific block - self.search_parent(slot, block_root, parent_root, peer_id, cx); - self.single_block_lookups.insert(target_id, lookup); + Action::ParentUnknown { + parent_root, + slot: _, + } => { + let peers = lookup.all_available_peers().cloned().collect::>(); + self.search_parent_of_child(parent_root, block_root, &peers, cx); } Action::Drop => { - // drop with noop + // Drop with noop + self.drop_lookup_and_childs(lookup_id); self.update_metrics(); } Action::Continue => { - self.single_block_lookups.insert(target_id, lookup); + // Block imported, continue the requests of pending child blocks + self.continue_child_lookups(block_root, cx); } } + Ok(()) } - pub fn parent_block_processed( - &mut self, - chain_hash: Hash256, - result: BlockProcessingResult, - cx: &mut SyncNetworkContext, - ) { - let index = self - .parent_lookups - .iter() - .enumerate() - .find(|(_, lookup)| lookup.chain_hash() == chain_hash) - .map(|(index, _)| index); - - let Some(mut parent_lookup) = index.map(|index| self.parent_lookups.remove(index)) else { - return debug!(self.log, "Process response for a parent lookup request that was not found"; "chain_hash" => %chain_hash); - }; - - match &result { - BlockProcessingResult::Ok(status) => match status { - AvailabilityProcessingStatus::Imported(block_root) => { - debug!(self.log, "Parent block processing succeeded"; &parent_lookup, "block_root" => ?block_root) + pub fn continue_child_lookups(&mut self, block_root: Hash256, cx: &mut SyncNetworkContext) { + let mut failed_lookups = vec![]; + for (id, lookup) in self.single_block_lookups.iter_mut() { + if lookup.parent_root() == Some(block_root) { + // Continue lookup + debug!(self.log, "Continuing child lookup"; "block_root" => %lookup.block_root()); + if let Err(e) = lookup.continue_requests(cx) { + debug!(self.log, "Error continuing lookup"; "id" => id, "error" => ?e); + failed_lookups.push(*id); } - AvailabilityProcessingStatus::MissingComponents(_, block_root) => { - debug!(self.log, "Parent missing parts, triggering single block lookup"; &parent_lookup,"block_root" => ?block_root) - } - }, - BlockProcessingResult::Err(e) => { - debug!(self.log, "Parent block processing failed"; &parent_lookup, "error" => %e) - } - BlockProcessingResult::Ignored => { - debug!( - self.log, - "Parent block processing job was ignored"; - "action" => "re-requesting block", - &parent_lookup - ); } } - match result { - BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( - _, - block_root, - )) => { - let expected_block_root = parent_lookup.current_parent_request.block_root(); - if block_root != expected_block_root { - warn!( - self.log, - "Parent block processing result/request root mismatch"; - "request" =>?expected_block_root, - "result" => ?block_root - ); - return; - } - - // We only send parent blocks + blobs for processing together. This means a - // `MissingComponents` response here indicates missing blobs. Therefore we always - // register a blob processing failure here. - parent_lookup - .current_parent_request - .blob_request_state - .state - .on_processing_failure(); - match parent_lookup - .current_parent_request - .request_block_and_blobs(cx) - { - Ok(()) => self.parent_lookups.push(parent_lookup), - Err(e) => self.handle_parent_request_error(&mut parent_lookup, cx, e.into()), - } - } - BlockProcessingResult::Err(BlockError::ParentUnknown(block)) => { - parent_lookup.add_unknown_parent_block(block, self.da_checker.clone(), cx); - self.request_parent(parent_lookup, cx); - } - BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_)) - | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown(_)) => { - // Send the next block in the parent chain with - let chain_completed = parent_lookup.pop_completed_parent_request(); - - if chain_completed { - debug!(self.log, "Parent lookup chain ancestors imported"; "chain_hash" => %chain_hash); - // MUST recover the single lookup that triggered the chain - self.send_child_of_parent_chain_for_processing(chain_hash, cx); - // Do nothing, drop parent lookup - } else { - debug!(self.log, "Parent lookup chain importing ancestor"; "chain_hash" => %chain_hash); - let cached_child = parent_lookup - .current_parent_request - .get_cached_child_block(); - match self.handle_parent_lookup_cached_child( - timestamp_now(), - cx, - BlockProcessType::ParentLookup { chain_hash }, - &mut parent_lookup.current_parent_request, - cached_child, - ) { - Ok(()) => self.parent_lookups.push(parent_lookup), - Err(e) => { - self.handle_parent_request_error(&mut parent_lookup, cx, e.into()) - } - } - } - } - ref e @ BlockProcessingResult::Err(BlockError::ExecutionPayloadError(ref epe)) - if !epe.penalize_peer() => - { - // These errors indicate that the execution layer is offline - // and failed to validate the execution payload. Do not downscore peer. - debug!( - self.log, - "Parent lookup failed. Execution layer is offline"; - "chain_hash" => %chain_hash, - "error" => ?e - ); - } - BlockProcessingResult::Err(outcome) => { - self.handle_parent_block_error(outcome, cx, parent_lookup); - } - BlockProcessingResult::Ignored => { - // Beacon processor signalled to ignore the block processing result. - // This implies that the cpu is overloaded. Drop the request. - warn!( - self.log, - "Parent block processing was ignored, cpu might be overloaded"; - "action" => "dropping parent request" - ); - // Drop the trigger single block lookup, otherwise it will get stuck - self.drop_single_lookup(chain_hash); - } - } - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); - } - - /// Handle the peer scoring, retries, and logging related to a `BlockError` returned from - /// processing a block + blobs for a parent lookup. - fn handle_parent_block_error( - &mut self, - outcome: BlockError<::EthSpec>, - cx: &mut SyncNetworkContext, - mut parent_lookup: ParentLookup, - ) { - // We should always have a block peer. - let block_peer_id = match parent_lookup.block_processing_peer() { - Ok(peer_id) => peer_id, - Err(e) => { - warn!(self.log, "Parent lookup in bad state"; "chain_hash" => %parent_lookup.chain_hash(), "error" => e); - #[cfg(test)] - panic!("bad state"); - #[cfg(not(test))] - return; - } - }; - - // We may not have a blob peer, if there were no blobs required for this block. - let blob_peer_id = parent_lookup.blob_processing_peer().ok(); - - // all else we consider the chain a failure and downvote the peer that sent - // us the last block - warn!( - self.log, "Invalid parent chain"; - "score_adjustment" => %PeerAction::MidToleranceError, - "outcome" => ?outcome, - "block_peer_id" => %block_peer_id, - ); - // This currently can be a host of errors. We permit this due to the partial - // ambiguity. - cx.report_peer( - block_peer_id, - PeerAction::MidToleranceError, - "parent_request_err", - ); - // Don't downscore the same peer twice - if let Some(blob_peer_id) = blob_peer_id { - if block_peer_id != blob_peer_id { - debug!( - self.log, "Additionally down-scoring blob peer"; - "score_adjustment" => %PeerAction::MidToleranceError, - "outcome" => ?outcome, - "blob_peer_id" => %blob_peer_id, - ); - cx.report_peer( - blob_peer_id, - PeerAction::MidToleranceError, - "parent_request_err", - ); - } + for id in failed_lookups { + self.drop_lookup_and_childs(id); } - - // Try again if possible - parent_lookup.processing_failed(); - self.request_parent(parent_lookup, cx); } - pub fn send_child_of_parent_chain_for_processing( - &mut self, - chain_hash: Hash256, - cx: &mut SyncNetworkContext, - ) { - let Some(id) = self - .single_block_lookups - .iter() - .find_map(|(id, req)| (req.block_root() == chain_hash).then_some(*id)) - else { - warn!(self.log, "No id found for single block lookup"; "chain_hash" => %chain_hash); - return; - }; + pub fn drop_lookup_and_childs(&mut self, dropped_id: SingleLookupId) { + if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) { + debug!(self.log, "Dropping child lookup"; "id" => ?dropped_id, "block_root" => %dropped_lookup.block_root()); - let Some(lookup) = self.single_block_lookups.get_mut(&id) else { - warn!(self.log, "No id found for single block lookup"; "chain_hash" => %chain_hash); - return; - }; + let child_lookup_ids = self + .single_block_lookups + .iter() + .filter_map(|(id, lookup)| { + if lookup.parent_root() == Some(dropped_lookup.block_root()) { + Some(*id) + } else { + None + } + }) + .collect::>(); - match lookup.get_cached_child_block() { - CachedChild::Ok(rpc_block) => { - // This is the correct block, send it for processing - if self - .send_block_for_processing( - chain_hash, - rpc_block, - timestamp_now(), - BlockProcessType::SingleBlock { id }, - cx, - ) - .is_err() - { - // Remove to avoid inconsistencies - self.single_block_lookups.remove(&id); - } - } - CachedChild::DownloadIncomplete(reason) => { - debug!(self.log, "Parent chain complete, awaiting child response"; "chain_hash" => %chain_hash, "reason" => reason); - } - CachedChild::NotRequired => { - warn!(self.log, "Child not cached for parent lookup"; "chain_hash" => %chain_hash); - } - CachedChild::Err(e) => { - warn!( - self.log, - "Consistency error in child block triggering parent lookup"; - "chain_hash" => %chain_hash, - "error" => ?e - ); - lookup.handle_consistency_failure(cx); - if let Err(e) = lookup.request_block_and_blobs(cx) { - debug!(self.log, - "Failed to request block and blobs, dropping lookup"; - "error" => ?e - ); - self.single_block_lookups.remove(&id); - } + for id in child_lookup_ids { + self.drop_lookup_and_childs(id); } } } /* Helper functions */ - fn send_block_for_processing( - &self, - block_root: Hash256, - block: RpcBlock, - duration: Duration, - process_type: BlockProcessType, - cx: &SyncNetworkContext, - ) -> Result<(), LookupRequestError> { - match cx.beacon_processor_if_enabled() { - Some(beacon_processor) => { - debug!(self.log, "Sending block for processing"; "block" => ?block_root, "process" => ?process_type); - if let Err(e) = beacon_processor.send_rpc_beacon_block( - block_root, - block, - duration, - process_type, - ) { - error!( - self.log, - "Failed to send sync block to processor"; - "error" => ?e - ); - Err(LookupRequestError::SendFailed( - "beacon processor send failure", - )) - } else { - Ok(()) - } - } - None => { - trace!(self.log, "Dropping block ready for processing. Beacon processor not available"; "block" => %block_root); - Err(LookupRequestError::SendFailed( - "beacon processor unavailable", - )) - } - } - } - - fn send_blobs_for_processing( - &self, - block_root: Hash256, - blobs: FixedBlobSidecarList, - duration: Duration, - process_type: BlockProcessType, - cx: &SyncNetworkContext, - ) -> Result<(), LookupRequestError> { - match cx.beacon_processor_if_enabled() { - Some(beacon_processor) => { - trace!(self.log, "Sending blobs for processing"; "block" => ?block_root, "process_type" => ?process_type); - if let Err(e) = - beacon_processor.send_rpc_blobs(block_root, blobs, duration, process_type) - { - error!( - self.log, - "Failed to send sync blobs to processor"; - "error" => ?e - ); - Err(LookupRequestError::SendFailed( - "beacon processor send failure", - )) - } else { - Ok(()) - } - } - None => { - trace!(self.log, "Dropping blobs ready for processing. Beacon processor not available"; "block_root" => %block_root); - Err(LookupRequestError::SendFailed( - "beacon processor unavailable", - )) - } - } - } - - /// Attempts to request the next unknown parent. This method handles peer scoring and dropping - /// the lookup in the event of failure. - fn request_parent( - &mut self, - mut parent_lookup: ParentLookup, - cx: &mut SyncNetworkContext, - ) { - let response = parent_lookup.request_parent(cx); - - match response { - Err(e) => { - self.handle_parent_request_error(&mut parent_lookup, cx, e); - } - Ok(_) => self.parent_lookups.push(parent_lookup), - } - - // We remove and add back again requests so we want this updated regardless of outcome. - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); - } - - pub fn drop_single_lookup(&mut self, block_root: Hash256) { - if let Some(id) = self - .single_block_lookups - .iter() - .find_map(|(id, req)| (req.block_root() == block_root).then_some(*id)) - { - debug!(self.log, "Dropping single block lookup"; "id" => id, "block_root" => %block_root); - self.single_block_lookups.remove(&id); - }; - } - /// Drops all the single block requests and returns how many requests were dropped. pub fn drop_single_block_requests(&mut self) -> usize { let requests_to_drop = self.single_block_lookups.len(); @@ -1290,34 +588,10 @@ impl BlockLookups { requests_to_drop } - /// Drops all the parent chain requests and returns how many requests were dropped. - pub fn drop_parent_chain_requests(&mut self) -> usize { - self.parent_lookups.drain(..).len() - } - - pub fn downscore_on_rpc_error( - &self, - peer_id: &PeerId, - error: &LookupVerifyError, - cx: &SyncNetworkContext, - ) { - // Note: logging the report event here with the full error display. The log inside - // `report_peer` only includes a smaller string, like "invalid_data" - let error_str: &'static str = error.into(); - - debug!(self.log, "reporting peer for sync lookup error"; "error" => error_str); - cx.report_peer(*peer_id, PeerAction::LowToleranceError, error_str); - } - pub fn update_metrics(&self) { metrics::set_gauge( &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, self.single_block_lookups.len() as i64, ); - - metrics::set_gauge( - &metrics::SYNC_PARENT_BLOCK_LOOKUPS, - self.parent_lookups.len() as i64, - ); } } diff --git a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs b/beacon_node/network/src/sync/block_lookups/parent_lookup.rs deleted file mode 100644 index 87a04451748..00000000000 --- a/beacon_node/network/src/sync/block_lookups/parent_lookup.rs +++ /dev/null @@ -1,228 +0,0 @@ -use super::common::LookupType; -use super::single_block_lookup::{LookupRequestError, SingleBlockLookup}; -use super::PeerId; -use crate::sync::{manager::SLOT_IMPORT_TOLERANCE, network_context::SyncNetworkContext}; -use beacon_chain::block_verification_types::AsBlock; -use beacon_chain::block_verification_types::RpcBlock; -use beacon_chain::data_availability_checker::{ChildComponents, DataAvailabilityChecker}; -use beacon_chain::BeaconChainTypes; -use std::sync::Arc; -use store::Hash256; - -/// How many attempts we try to find a parent of a block before we give up trying. -pub(crate) const PARENT_FAIL_TOLERANCE: u8 = 5; -/// The maximum depth we will search for a parent block. In principle we should have sync'd any -/// canonical chain to its head once the peer connects. A chain should not appear where it's depth -/// is further back than the most recent head slot. -pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2; - -/// Maintains a sequential list of parents to lookup and the lookup's current state. -pub(crate) struct ParentLookup { - /// The root of the block triggering this parent request. - chain_hash: Hash256, - /// The blocks that have currently been downloaded. - parent_requests: Vec>, - /// Request of the last parent. - pub current_parent_request: SingleBlockLookup, -} - -#[derive(Debug, PartialEq, Eq)] -pub(crate) enum RequestError { - SendFailed(&'static str), - ChainTooLong, - /// We witnessed too many failures trying to complete this parent lookup. - TooManyAttempts { - /// We received more failures trying to process the blocks than downloading them - /// from peers. - cannot_process: bool, - }, - NoPeers, - BadState(String), -} - -impl ParentLookup { - pub fn new( - block_root: Hash256, - parent_root: Hash256, - peer_id: PeerId, - da_checker: Arc>, - cx: &mut SyncNetworkContext, - ) -> Self { - let current_parent_request = SingleBlockLookup::new( - parent_root, - Some(ChildComponents::empty(block_root)), - &[peer_id], - da_checker, - cx.next_id(), - LookupType::Parent, - ); - - Self { - chain_hash: block_root, - parent_requests: vec![], - current_parent_request, - } - } - - pub fn contains_block(&self, block_root: &Hash256) -> bool { - &self.current_parent_request.block_root() == block_root - || self - .parent_requests - .iter() - .any(|request| &request.block_root() == block_root) - } - - pub fn is_for_block(&self, block_root: Hash256) -> bool { - self.current_parent_request.is_for_block(block_root) - } - - /// Attempts to request the next unknown parent. If the request fails, it should be removed. - pub fn request_parent(&mut self, cx: &mut SyncNetworkContext) -> Result<(), RequestError> { - // check to make sure this request hasn't failed - if self.parent_requests.len() + 1 >= PARENT_DEPTH_TOLERANCE { - return Err(RequestError::ChainTooLong); - } - - self.current_parent_request - .request_block_and_blobs(cx) - .map_err(Into::into) - } - - pub fn check_peer_disconnected(&mut self, peer_id: &PeerId) -> Result<(), ()> { - self.current_parent_request - .block_request_state - .state - .check_peer_disconnected(peer_id) - .and_then(|()| { - self.current_parent_request - .blob_request_state - .state - .check_peer_disconnected(peer_id) - }) - } - - pub fn add_unknown_parent_block( - &mut self, - block: RpcBlock, - da_checker: Arc>, - cx: &mut SyncNetworkContext, - ) { - // Create a new empty single block lookup for the parent, copying all peers - let parent_root = block.parent_root(); - let new_parent_request = SingleBlockLookup::new( - parent_root, - Some(ChildComponents::empty(parent_root)), - &self - .current_parent_request - .all_available_peers() - .cloned() - .collect::>(), - da_checker, - cx.next_id(), - LookupType::Parent, - ); - - // Replace current parent request and store in parent_requests queue - let previous_parent_request = - std::mem::replace(&mut self.current_parent_request, new_parent_request); - self.parent_requests.push(previous_parent_request); - } - - pub fn pop_completed_parent_request(&mut self) -> bool { - if let Some(new_parent_request) = self.parent_requests.pop() { - self.current_parent_request = new_parent_request; - false - } else { - true - } - } - - pub fn block_processing_peer(&self) -> Result { - self.current_parent_request - .block_request_state - .state - .processing_peer() - } - - pub fn blob_processing_peer(&self) -> Result { - self.current_parent_request - .blob_request_state - .state - .processing_peer() - } - - /// Get the parent lookup's chain hash. - pub fn chain_hash(&self) -> Hash256 { - self.chain_hash - } - - pub fn processing_failed(&mut self) { - self.current_parent_request - .block_request_state - .state - .on_processing_failure(); - self.current_parent_request - .blob_request_state - .state - .on_processing_failure(); - if let Some(components) = self.current_parent_request.child_components.as_mut() { - components.downloaded_block = None; - components.downloaded_blobs = <_>::default(); - } - } - - pub fn add_peer(&mut self, peer: PeerId) { - self.current_parent_request.add_peer(peer) - } - - /// Adds a list of peers to the parent request. - pub fn add_peers(&mut self, peers: &[PeerId]) { - self.current_parent_request.add_peers(peers) - } - - pub fn all_used_peers(&self) -> impl Iterator + '_ { - self.current_parent_request.all_used_peers() - } -} - -impl From for RequestError { - fn from(e: LookupRequestError) -> Self { - use LookupRequestError as E; - match e { - E::TooManyAttempts { cannot_process } => { - RequestError::TooManyAttempts { cannot_process } - } - E::NoPeers => RequestError::NoPeers, - E::SendFailed(msg) => RequestError::SendFailed(msg), - E::BadState(msg) => RequestError::BadState(msg), - } - } -} - -impl slog::KV for ParentLookup { - fn serialize( - &self, - record: &slog::Record, - serializer: &mut dyn slog::Serializer, - ) -> slog::Result { - serializer.emit_arguments("chain_hash", &format_args!("{}", self.chain_hash))?; - slog::Value::serialize(&self.current_parent_request, record, "parent", serializer)?; - serializer.emit_usize("downloaded_blocks", self.parent_requests.len())?; - slog::Result::Ok(()) - } -} - -impl RequestError { - pub fn as_static(&self) -> &'static str { - match self { - RequestError::SendFailed(e) => e, - RequestError::ChainTooLong => "chain_too_long", - RequestError::TooManyAttempts { cannot_process } if *cannot_process => { - "too_many_processing_attempts" - } - RequestError::TooManyAttempts { cannot_process: _ } => "too_many_downloading_attempts", - RequestError::NoPeers => "no_peers", - RequestError::BadState(..) => "bad_state", - } - } -} diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index c1b4139d826..d10311f6400 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -1,25 +1,20 @@ -use super::common::LookupType; -use super::PeerId; +use super::{BlockComponent, PeerId}; use crate::sync::block_lookups::common::RequestState; use crate::sync::block_lookups::Id; use crate::sync::network_context::SyncNetworkContext; -use beacon_chain::block_verification_types::RpcBlock; -use beacon_chain::data_availability_checker::ChildComponents; -use beacon_chain::data_availability_checker::{ - AvailabilityCheckError, DataAvailabilityChecker, MissingBlobs, -}; +use beacon_chain::data_availability_checker::{DataAvailabilityChecker, MissingBlobs}; use beacon_chain::BeaconChainTypes; use itertools::Itertools; -use lighthouse_network::PeerAction; use rand::seq::IteratorRandom; use slog::{debug, Logger}; use std::collections::HashSet; use std::fmt::Debug; use std::sync::Arc; +use std::time::Duration; use store::Hash256; use strum::IntoStaticStr; use types::blob_sidecar::FixedBlobSidecarList; -use types::EthSpec; +use types::{EthSpec, SignedBeaconBlock}; #[derive(Debug, PartialEq, Eq, IntoStaticStr)] pub enum LookupRequestError { @@ -35,38 +30,59 @@ pub enum LookupRequestError { pub struct SingleBlockLookup { pub id: Id, - pub lookup_type: LookupType, - pub block_request_state: BlockRequestState, + pub block_request_state: BlockRequestState, pub blob_request_state: BlobRequestState, pub da_checker: Arc>, - /// Only necessary for requests triggered by an `UnknownBlockParent` or `UnknownBlockParent` - /// because any blocks or blobs without parents won't hit the data availability cache. - pub child_components: Option>, + block_root: Hash256, + parent_root: Option, } impl SingleBlockLookup { pub fn new( requested_block_root: Hash256, - child_components: Option>, peers: &[PeerId], da_checker: Arc>, id: Id, - lookup_type: LookupType, ) -> Self { let is_deneb = da_checker.is_deneb(); + Self { id, - lookup_type, block_request_state: BlockRequestState::new(requested_block_root, peers), blob_request_state: BlobRequestState::new(requested_block_root, peers, is_deneb), da_checker, - child_components, + block_root: requested_block_root, + parent_root: todo!(), } } /// Get the block root that is being requested. pub fn block_root(&self) -> Hash256 { - self.block_request_state.requested_block_root + self.block_root + } + + pub fn parent_root(&self) -> Option { + self.parent_root + } + + pub fn add_child_components( + &mut self, + peer_id: PeerId, + block_component: BlockComponent, + ) { + match block_component { + BlockComponent::Block(block) => { + self.block_request_state + .state + .insert_verified_response(peer_id, block); + } + BlockComponent::Blob(blob) => { + // For now ignore single blobs, as the blob request state assumes all blobs are + // attributed to the same peer = the peer serving the remaining blobs. Ignoring this + // block component has a minor effect, causing the node to re-request this blob + // once the parent chain is successfully resolved + } + } } /// Check the block root matches the requested block root. @@ -92,95 +108,16 @@ impl SingleBlockLookup { .unique() } - /// Send the necessary requests for blocks and/or blobs. This will check whether we have - /// downloaded the block and/or blobs already and will not send requests if so. It will also - /// inspect the request state or blocks and blobs to ensure we are not already processing or - /// downloading the block and/or blobs. - pub fn request_block_and_blobs( + pub fn continue_requests( &mut self, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { - let block_already_downloaded = self.block_already_downloaded(); - let blobs_already_downloaded = self.blobs_already_downloaded(); - - if !block_already_downloaded { - self.block_request_state - .build_request_and_send(self.id, self.lookup_type, cx)?; - } - if !blobs_already_downloaded { - self.blob_request_state - .build_request_and_send(self.id, self.lookup_type, cx)?; - } + // TODO: Check what's necessary to download, specially for blobs + self.block_request_state.continue_request(self.id, cx)?; + self.blob_request_state.continue_request(self.id, cx)?; Ok(()) } - /// Returns a `CachedChild`, which is a wrapper around a `RpcBlock` that is either: - /// - /// 1. `NotRequired`: there is no child caching required for this lookup. - /// 2. `DownloadIncomplete`: Child caching is required, but all components are not yet downloaded. - /// 3. `Ok`: The child is required and we have downloaded it. - /// 4. `Err`: The child is required, but has failed consistency checks. - pub fn get_cached_child_block(&self) -> CachedChild { - if let Some(components) = self.child_components.as_ref() { - let Some(block) = components.downloaded_block.as_ref() else { - return CachedChild::DownloadIncomplete("missing block".to_owned()); - }; - - // CacheChild should include only block components for an unknown parent, so nothing is - // imported into the da_checker. The only possible contents are here. - let blobs_expected = block.num_expected_blobs(); - let blobs_downloaded = components.downloaded_blobs_count(); - if blobs_expected != blobs_downloaded { - return CachedChild::DownloadIncomplete(format!( - "missing blobs expected {blobs_expected} got {blobs_downloaded}" - )); - } - - match RpcBlock::new_from_fixed( - self.block_request_state.requested_block_root, - block.clone(), - components.downloaded_blobs.clone(), - ) { - Ok(rpc_block) => CachedChild::Ok(rpc_block), - Err(e) => CachedChild::Err(e), - } - } else { - CachedChild::NotRequired - } - } - - /// Accepts a verified response, and adds it to the child components if required. This method - /// returns a `CachedChild` which provides a completed block + blob response if all components have been - /// received, or information about whether the child is required and if it has been downloaded. - pub fn add_response>( - &mut self, - verified_response: R::VerifiedResponseType, - ) -> CachedChild { - if let Some(child_components) = self.child_components.as_mut() { - R::add_to_child_components(verified_response, child_components); - self.get_cached_child_block() - } else { - CachedChild::NotRequired - } - } - - /// Add a child component to the lookup request. Merges with any existing child components. - pub fn add_child_components(&mut self, components: ChildComponents) { - if let Some(ref mut existing_components) = self.child_components { - let ChildComponents { - block_root: _, - downloaded_block, - downloaded_blobs, - } = components; - if let Some(block) = downloaded_block { - existing_components.merge_block(block); - } - existing_components.merge_blobs(downloaded_blobs); - } else { - self.child_components = Some(components); - } - } - /// Add all given peers to both block and blob request states. pub fn add_peer(&mut self, peer_id: PeerId) { self.block_request_state.state.add_peer(&peer_id); @@ -194,12 +131,6 @@ impl SingleBlockLookup { } } - /// Returns true if the block has already been downloaded. - pub fn both_components_downloaded(&self) -> bool { - self.block_request_state.state.is_downloaded() - && self.blob_request_state.state.is_downloaded() - } - /// Returns true if the block has already been downloaded. pub fn both_components_processed(&self) -> bool { self.block_request_state.state.is_processed() @@ -234,7 +165,7 @@ impl SingleBlockLookup { // If there was an active download request with this peer, send them again with another // peer. We should receive an RPCError anyway, but this should speed things up? if block_peer_disconnected || blob_peer_disconnected { - if let Err(e) = self.request_block_and_blobs(cx) { + if let Err(e) = self.continue_requests(cx) { debug!(log, "Single lookup failed on retry after peer disconnection"; "block_root" => ?block_root, "error" => ?e); return true; } @@ -242,75 +173,14 @@ impl SingleBlockLookup { false } - /// Returns `true` if the block has already been downloaded. - pub(crate) fn block_already_downloaded(&self) -> bool { - if let Some(components) = self.child_components.as_ref() { - components.downloaded_block.is_some() - } else { - self.da_checker.has_block(&self.block_root()) - } - } - - /// Updates the `requested_ids` field of the `BlockRequestState` with the most recent picture - /// of which blobs still need to be requested. Returns `true` if there are no more blobs to - /// request. - pub(crate) fn blobs_already_downloaded(&mut self) -> bool { - if matches!(self.blob_request_state.state.state, State::AwaitingDownload) { - self.update_blobs_request(); - } - self.blob_request_state.requested_ids.is_empty() - } - - /// Updates this request with the most recent picture of which blobs still need to be requested. - pub fn update_blobs_request(&mut self) { - self.blob_request_state.requested_ids = self.missing_blob_ids(); - } - /// If `child_components` is `Some`, we know block components won't hit the data /// availability cache, so we don't check its processing cache unless `child_components` /// is `None`. pub(crate) fn missing_blob_ids(&self) -> MissingBlobs { - let block_root = self.block_root(); - if let Some(components) = self.child_components.as_ref() { - self.da_checker.get_missing_blob_ids( - block_root, - components.downloaded_block.as_ref().map(|b| b.as_ref()), - &components.downloaded_blobs, - ) - } else { - self.da_checker.get_missing_blob_ids_with(block_root) - } - } - - /// Penalizes a blob peer if it should have blobs but didn't return them to us. - pub fn penalize_blob_peer(&mut self, cx: &SyncNetworkContext) { - if let Ok(blob_peer) = self.blob_request_state.state.processing_peer() { - cx.report_peer( - blob_peer, - PeerAction::MidToleranceError, - "single_blob_failure", - ); - } - } - - /// This failure occurs on download, so register a failure downloading, penalize the peer - /// and clear the blob cache. - pub fn handle_consistency_failure(&mut self, cx: &SyncNetworkContext) { - self.penalize_blob_peer(cx); - if let Some(cached_child) = self.child_components.as_mut() { - cached_child.clear_blobs(); - } - self.blob_request_state.state.on_download_failure() - } - - /// This failure occurs after processing, so register a failure processing, penalize the peer - /// and clear the blob cache. - pub fn handle_availability_check_failure(&mut self, cx: &SyncNetworkContext) { - self.penalize_blob_peer(cx); - if let Some(cached_child) = self.child_components.as_mut() { - cached_child.clear_blobs(); - } - self.blob_request_state.state.on_processing_failure() + let _block_root = self.block_root(); + // TODO: Check against currently downloaded blobs, and the blobs in the da_checker to figure + // out what is remaining to be downloaded + todo!(); } } @@ -321,9 +191,7 @@ pub struct BlobRequestState { /// the data availability checker. pub requested_ids: MissingBlobs, pub block_root: Hash256, - /// Where we store blobs until we receive the stream terminator. - pub blob_download_queue: FixedBlobSidecarList, - pub state: SingleLookupRequestState, + pub state: SingleLookupRequestState>, } impl BlobRequestState { @@ -332,19 +200,18 @@ impl BlobRequestState { Self { block_root, requested_ids: default_ids, - blob_download_queue: <_>::default(), state: SingleLookupRequestState::new(peer_source), } } } /// The state of the block request component of a `SingleBlockLookup`. -pub struct BlockRequestState { +pub struct BlockRequestState { pub requested_block_root: Hash256, - pub state: SingleLookupRequestState, + pub state: SingleLookupRequestState>>, } -impl BlockRequestState { +impl BlockRequestState { pub fn new(block_root: Hash256, peers: &[PeerId]) -> Self { Self { requested_block_root: block_root, @@ -353,36 +220,22 @@ impl BlockRequestState { } } -/// This is the status of cached components for a lookup if they are required. It provides information -/// about whether we should send a responses immediately for processing, whether we require more -/// responses, or whether all cached components have been received and the reconstructed block -/// should be sent for processing. -pub enum CachedChild { - /// All child components have been received, this is the reconstructed block, including all. - /// It has been checked for consistency between blobs and block, but no consensus checks have - /// been performed and no kzg verification has been performed. - Ok(RpcBlock), - /// All child components have not yet been received. - DownloadIncomplete(String), - /// Child components should not be cached, send this directly for processing. - NotRequired, - /// There was an error during consistency checks between block and blobs. - Err(AvailabilityCheckError), -} +pub type DownloadResult = (T, Hash256, Duration); #[derive(Debug, PartialEq, Eq)] -pub enum State { +pub enum State { AwaitingDownload, Downloading { peer_id: PeerId }, - Processing { peer_id: PeerId }, + AwaitingProcess(PeerId, DownloadResult), + Processing(PeerId, DownloadResult), Processed { peer_id: PeerId }, } /// Object representing the state of a single block or blob lookup request. #[derive(PartialEq, Eq, Debug)] -pub struct SingleLookupRequestState { +pub struct SingleLookupRequestState { /// State of this request. - state: State, + state: State, /// Peers that should have this block or blob. available_peers: HashSet, /// Peers from which we have requested this block. @@ -391,15 +244,9 @@ pub struct SingleLookupRequestState { failed_processing: u8, /// How many times have we attempted to download this block or blob. failed_downloading: u8, - /// Should be incremented everytime this request is retried. The purpose of this is to - /// differentiate retries of the same block/blob request within a lookup. We currently penalize - /// peers and retry requests prior to receiving the stream terminator. This means responses - /// from a prior request may arrive after a new request has been sent, this counter allows - /// us to differentiate these two responses. - req_counter: u32, } -impl SingleLookupRequestState { +impl SingleLookupRequestState { pub fn new(peers: &[PeerId]) -> Self { let mut available_peers = HashSet::default(); for peer in peers.iter().copied() { @@ -412,74 +259,123 @@ impl SingleLookupRequestState { used_peers: HashSet::default(), failed_processing: 0, failed_downloading: 0, - req_counter: 0, } } - pub fn is_current_req_counter(&self, req_counter: u32) -> bool { - self.req_counter == req_counter - } - - pub fn is_awaiting_download(&self) -> bool { - matches!(self.state, State::AwaitingDownload) - } - - pub fn is_downloaded(&self) -> bool { + pub fn is_processed(&self) -> bool { match self.state { State::AwaitingDownload => false, State::Downloading { .. } => false, - State::Processing { .. } => true, + State::AwaitingProcess { .. } => false, + State::Processing { .. } => false, State::Processed { .. } => true, } } - pub fn is_processed(&self) -> bool { - match self.state { - State::AwaitingDownload => false, - State::Downloading { .. } => false, - State::Processing { .. } => false, - State::Processed { .. } => true, + pub fn insert_verified_response(&mut self, peer_id: PeerId, result: DownloadResult) { + match &self.state { + State::AwaitingDownload => { + self.state = State::AwaitingProcess(peer_id, result); + } + _ => {} } } - pub fn on_download_start(&mut self, peer_id: PeerId) -> u32 { - self.state = State::Downloading { peer_id }; - self.req_counter += 1; - self.req_counter + /// Switch to `Downloading` if the request is in `AwaitingDownload` state, otherwise returns None. + pub fn maybe_start_download(&mut self) -> Result, LookupRequestError> { + Ok(match &self.state { + State::AwaitingDownload => { + let peer_id = self + .use_rand_available_peer() + .ok_or(LookupRequestError::NoPeers)?; + self.state = State::Downloading { peer_id }; + Some(peer_id) + } + _ => None, + }) } /// Registers a failure in downloading a block. This might be a peer disconnection or a wrong /// block. - pub fn on_download_failure(&mut self) { - self.failed_downloading = self.failed_downloading.saturating_add(1); - self.state = State::AwaitingDownload; + pub fn on_download_failure(&mut self) -> Result<(), LookupRequestError> { + match &self.state { + State::Downloading { .. } => { + self.failed_downloading = self.failed_downloading.saturating_add(1); + self.state = State::AwaitingDownload; + Ok(()) + } + other => Err(LookupRequestError::BadState(format!( + "request bad state, expected Downloading got {other}" + ))), + } } - pub fn on_download_success(&mut self) -> Result<(), String> { + pub fn on_download_success( + &mut self, + result: DownloadResult, + ) -> Result<(), LookupRequestError> { match &self.state { State::Downloading { peer_id } => { - self.state = State::Processing { peer_id: *peer_id }; + self.state = State::AwaitingProcess(*peer_id, result); + Ok(()) + } + other => Err(LookupRequestError::BadState(format!( + "request bad state, expected Downloading got {other}" + ))), + } + } + + /// Switch to `Processing` if the request is in `AwaitingProcess` state, otherwise returns None. + pub fn maybe_start_processing(&mut self) -> Option> { + // For 2 lines replace state with placeholder to gain ownership of `result` + match &self.state { + State::AwaitingProcess(peer_id, result) => { + let result = result.clone(); + self.state = State::Processing(*peer_id, result.clone()); + Some(result) + } + _ => None, + } + } + + /// Revert into `AwaitingProcessing`, if the payload if not invalid and can be submitted for + /// processing latter. + pub fn into_awaiting_processing(&mut self) -> Result<(), LookupRequestError> { + match &self.state { + State::Processing(peer_id, result) => { + self.state = State::AwaitingProcess(*peer_id, result.clone()); Ok(()) } - other => Err(format!( - "request bad state, expected downloading got {other}" - )), + other => Err(LookupRequestError::BadState(format!( + "request bad state, expected Processing got {other}" + ))), } } /// Registers a failure in processing a block. - pub fn on_processing_failure(&mut self) { - self.failed_processing = self.failed_processing.saturating_add(1); - self.state = State::AwaitingDownload; + pub fn on_processing_failure(&mut self) -> Result { + match &self.state { + State::Processing(peer_id, _) => { + let peer_id = *peer_id; + self.failed_processing = self.failed_processing.saturating_add(1); + self.state = State::AwaitingDownload; + Ok(peer_id) + } + other => Err(LookupRequestError::BadState(format!( + "request bad state, expected Processing got {other}" + ))), + } } - pub fn on_processing_success(&mut self) -> Result<(), String> { + pub fn on_processing_success(&mut self) -> Result<(), LookupRequestError> { match &self.state { - State::Processing { peer_id } => { + State::Processing(peer_id, _) => { self.state = State::Processed { peer_id: *peer_id }; Ok(()) } - other => Err(format!("not in processing state: {}", other).to_string()), + other => Err(LookupRequestError::BadState(format!( + "request bad state, expected Processing got {other}" + ))), } } @@ -514,7 +410,7 @@ impl SingleLookupRequestState { /// returns an error. pub fn processing_peer(&self) -> Result { match &self.state { - State::Processing { peer_id } | State::Processed { peer_id } => Ok(*peer_id), + State::Processing(peer_id, _) | State::Processed { peer_id } => Ok(*peer_id), other => Err(format!("not in processing state: {}", other).to_string()), } } @@ -547,7 +443,6 @@ impl slog::Value for SingleBlockLookup { serializer: &mut dyn slog::Serializer, ) -> slog::Result { serializer.emit_str("request", key)?; - serializer.emit_arguments("lookup_type", &format_args!("{:?}", self.lookup_type))?; serializer.emit_arguments("hash", &format_args!("{}", self.block_root()))?; serializer.emit_arguments( "blob_ids", @@ -565,7 +460,7 @@ impl slog::Value for SingleBlockLookup { } } -impl slog::Value for SingleLookupRequestState { +impl slog::Value for SingleLookupRequestState { fn serialize( &self, record: &slog::Record, @@ -580,7 +475,9 @@ impl slog::Value for SingleLookupRequestState { State::Downloading { peer_id } => { serializer.emit_arguments("downloading_peer", &format_args!("{}", peer_id))? } - State::Processing { peer_id } => { + State::AwaitingProcess(peer_id, _) => serializer + .emit_arguments("awaiting_processing_peer", &format_args!("{}", peer_id))?, + State::Processing(peer_id, _) => { serializer.emit_arguments("processing_peer", &format_args!("{}", peer_id))? } State::Processed { .. } => "processed".serialize(record, "state", serializer)?, @@ -591,11 +488,12 @@ impl slog::Value for SingleLookupRequestState { } } -impl std::fmt::Display for State { +impl std::fmt::Display for State { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { State::AwaitingDownload => write!(f, "AwaitingDownload"), State::Downloading { .. } => write!(f, "Downloading"), + State::AwaitingProcess { .. } => write!(f, "AwaitingProcessing"), State::Processing { .. } => write!(f, "Processing"), State::Processed { .. } => write!(f, "Processed"), } diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index 4fd8d625385..6a09213b0b5 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -1,14 +1,19 @@ use crate::network_beacon_processor::NetworkBeaconProcessor; use crate::service::RequestId; -use crate::sync::manager::{RequestId as SyncRequestId, SingleLookupReqId, SyncManager}; +use crate::sync::manager::{ + BlockProcessType, RequestId as SyncRequestId, SingleLookupReqId, SyncManager, +}; use crate::sync::SyncMessage; use crate::NetworkMessage; use std::sync::Arc; use super::*; -use crate::sync::block_lookups::common::ResponseType; +use crate::sync::block_lookups::common::{ + ResponseType, PARENT_DEPTH_TOLERANCE, PARENT_FAIL_TOLERANCE, +}; +use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::builder::Witness; use beacon_chain::eth1_chain::CachingEth1Backend; use beacon_chain::test_utils::{ @@ -194,7 +199,7 @@ impl TestRig { self.sync_manager.handle_message(sync_message); } - fn active_single_lookups(&self) -> Vec<(Id, Hash256)> { + fn active_single_lookups(&self) -> Vec<(Id, Hash256, Option)> { self.sync_manager.active_single_lookups() } @@ -202,7 +207,7 @@ impl TestRig { self.sync_manager.active_single_lookups().len() } - fn active_parent_lookups(&self) -> Vec { + fn active_parent_lookups(&self) -> Vec> { self.sync_manager.active_parent_lookups() } @@ -210,6 +215,16 @@ impl TestRig { self.sync_manager.active_parent_lookups().len() } + fn assert_parent_lookups_count(&self, count: usize) { + assert_eq!( + self.active_parent_lookups_count(), + count, + "Unexpected count of parent lookups. Parent lookups: {:?}. Current lookups: {:?}", + self.active_parent_lookups(), + self.active_single_lookups() + ); + } + fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { self.sync_manager.failed_chains_contains(chain_hash) } @@ -217,16 +232,16 @@ impl TestRig { fn find_single_lookup_for(&self, block_root: Hash256) -> Id { self.active_single_lookups() .iter() - .find(|(_, b)| b == &block_root) + .find(|(_, b, _)| b == &block_root) .unwrap_or_else(|| panic!("no single block lookup found for {block_root}")) .0 } fn expect_no_active_parent_lookups(&self) { - assert_eq!( - self.active_parent_lookups(), - vec![], - "expected no parent lookups" + assert!( + self.active_parent_lookups().is_empty(), + "expected no parent lookups: {:?}", + self.active_parent_lookups() ); } @@ -272,24 +287,34 @@ impl TestRig { blocks: &[Arc>], ) { // Send import events for all pending parent blocks - for block in blocks { + for _ in blocks { self.parent_block_processed_imported(chain_hash); } // Send final import event for the block that triggered the lookup let trigger_lookup = self .active_single_lookups() .iter() - .find(|(_, block_root)| block_root == &chain_hash) + .find(|(_, block_root, _)| block_root == &chain_hash) .copied() .unwrap_or_else(|| panic!("There should exist a single block lookup for {chain_hash}")); self.single_block_component_processed_imported(trigger_lookup.0, chain_hash); } fn parent_block_processed(&mut self, chain_hash: Hash256, result: BlockProcessingResult) { - self.send_sync_message(SyncMessage::BlockComponentProcessed { - process_type: BlockProcessType::ParentLookup { chain_hash }, - result, - }); + // Locate a parent lookup chain with tip hash `chain_hash` + let parent_chain = self + .active_parent_lookups() + .into_iter() + .find(|chain| chain.first() == Some(&chain_hash)) + .unwrap_or_else(|| { + panic!( + "No parent chain with chain_hash {chain_hash:?}: {:?}", + self.active_parent_lookups() + ) + }); + + let id = self.find_single_lookup_for(*parent_chain.last().unwrap()); + self.single_block_component_processed(id, result); } fn parent_block_processed_imported(&mut self, chain_hash: Hash256) { @@ -319,7 +344,7 @@ impl TestRig { result: BlockProcessingResult, ) { self.send_sync_message(SyncMessage::BlockComponentProcessed { - process_type: BlockProcessType::SingleBlob { id: id.id }, + process_type: BlockProcessType::SingleBlob { id: id.lookup_id }, result, }) } @@ -330,6 +355,7 @@ impl TestRig { peer_id: PeerId, beacon_block: Option>>, ) { + self.log("parent_lookup_block_response"); self.send_sync_message(SyncMessage::RpcBlock { request_id: SyncRequestId::SingleBlock { id }, peer_id, @@ -344,6 +370,7 @@ impl TestRig { peer_id: PeerId, beacon_block: Option>>, ) { + self.log("single_lookup_block_response"); self.send_sync_message(SyncMessage::RpcBlock { request_id: SyncRequestId::SingleBlock { id }, peer_id, @@ -358,6 +385,7 @@ impl TestRig { peer_id: PeerId, blob_sidecar: Option>>, ) { + self.log("parent_lookup_blob_response"); self.send_sync_message(SyncMessage::RpcBlob { request_id: SyncRequestId::SingleBlob { id }, peer_id, @@ -444,11 +472,7 @@ impl TestRig { peer_id: _, request: Request::BlocksByRoot(request), request_id: RequestId::Sync(SyncRequestId::SingleBlock { id }), - } if id.lookup_type == LookupType::Current - && request.block_roots().to_vec().contains(&for_block) => - { - Some(*id) - } + } if request.block_roots().to_vec().contains(&for_block) => Some(*id), _ => None, }) .unwrap_or_else(|e| panic!("Expected block request for {for_block:?}: {e}")) @@ -461,12 +485,11 @@ impl TestRig { peer_id: _, request: Request::BlobsByRoot(request), request_id: RequestId::Sync(SyncRequestId::SingleBlob { id }), - } if id.lookup_type == LookupType::Current - && request - .blob_ids - .to_vec() - .iter() - .any(|r| r.block_root == for_block) => + } if request + .blob_ids + .to_vec() + .iter() + .any(|r| r.block_root == for_block) => { Some(*id) } @@ -482,11 +505,7 @@ impl TestRig { peer_id: _, request: Request::BlocksByRoot(request), request_id: RequestId::Sync(SyncRequestId::SingleBlock { id }), - } if id.lookup_type == LookupType::Parent - && request.block_roots().to_vec().contains(&for_block) => - { - Some(*id) - } + } if request.block_roots().to_vec().contains(&for_block) => Some(*id), _ => None, }) .unwrap_or_else(|e| panic!("Expected block parent request for {for_block:?}: {e}")) @@ -499,12 +518,11 @@ impl TestRig { peer_id: _, request: Request::BlobsByRoot(request), request_id: RequestId::Sync(SyncRequestId::SingleBlob { id }), - } if id.lookup_type == LookupType::Parent - && request - .blob_ids - .to_vec() - .iter() - .all(|r| r.block_root == for_block) => + } if request + .blob_ids + .to_vec() + .iter() + .all(|r| r.block_root == for_block) => { Some(*id) } @@ -669,7 +687,7 @@ fn test_single_block_lookup_happy_path() { // Send the stream termination. Peer should have not been penalized, and the request removed // after processing. rig.single_lookup_block_response(id, peer_id, None); - rig.single_block_component_processed_imported(id.id, block_root); + rig.single_block_component_processed_imported(id.lookup_id, block_root); rig.expect_empty_network(); rig.expect_no_active_lookups(); } @@ -756,7 +774,7 @@ fn test_single_block_lookup_becomes_parent_request() { // Send the stream termination. Peer should have not been penalized, and the request moved to a // parent request after processing. rig.single_block_component_processed( - id.id, + id.lookup_id, BlockError::ParentUnknown(RpcBlock::new_without_blobs(None, block)).into(), ); assert_eq!(rig.active_single_lookups_count(), 1); @@ -887,7 +905,7 @@ fn test_parent_lookup_too_many_attempts() { // Trigger the request rig.trigger_unknown_parent_block(peer_id, block.into()); - for i in 1..=parent_lookup::PARENT_FAIL_TOLERANCE { + for i in 1..=PARENT_FAIL_TOLERANCE { let id = rig.expect_block_parent_request(parent_root); // Blobs are only requested in the first iteration as this test only retries blocks if rig.after_deneb() && i == 1 { @@ -928,7 +946,7 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { // Trigger the request rig.trigger_unknown_parent_block(peer_id, block.into()); - for i in 1..=parent_lookup::PARENT_FAIL_TOLERANCE { + for i in 1..=PARENT_FAIL_TOLERANCE { assert!(!rig.failed_chains_contains(&block_root)); let id = rig.expect_block_parent_request(parent_root); // Blobs are only requested in the first iteration as this test only retries blocks @@ -953,7 +971,7 @@ fn test_parent_lookup_too_many_download_attempts_no_blacklist() { #[test] fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { - const PROCESSING_FAILURES: u8 = parent_lookup::PARENT_FAIL_TOLERANCE / 2 + 1; + const PROCESSING_FAILURES: u8 = PARENT_FAIL_TOLERANCE / 2 + 1; let mut rig = TestRig::test_setup(); let (parent, block, parent_root, block_root) = rig.rand_block_and_parent(); let peer_id = rig.new_connected_peer(); @@ -962,7 +980,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { rig.trigger_unknown_parent_block(peer_id, block.into()); rig.log("Fail downloading the block"); - for i in 0..(parent_lookup::PARENT_FAIL_TOLERANCE - PROCESSING_FAILURES) { + for i in 0..(PARENT_FAIL_TOLERANCE - PROCESSING_FAILURES) { let id = rig.expect_block_parent_request(parent_root); // Blobs are only requested in the first iteration as this test only retries blocks if rig.after_deneb() && i == 0 { @@ -994,7 +1012,7 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { #[test] fn test_parent_lookup_too_deep() { let mut rig = TestRig::test_setup(); - let mut blocks = rig.rand_blockchain(parent_lookup::PARENT_DEPTH_TOLERANCE); + let mut blocks = rig.rand_blockchain(PARENT_DEPTH_TOLERANCE); let peer_id = rig.new_connected_peer(); let trigger_block = blocks.pop().unwrap(); @@ -1055,7 +1073,7 @@ fn test_single_block_lookup_ignored_response() { // after processing. rig.single_lookup_block_response(id, peer_id, None); // Send an Ignored response, the request should be dropped - rig.single_block_component_processed(id.id, BlockProcessingResult::Ignored); + rig.single_block_component_processed(id.lookup_id, BlockProcessingResult::Ignored); rig.expect_empty_network(); rig.expect_no_active_lookups(); } @@ -1108,12 +1126,13 @@ fn test_same_chain_race_condition() { rig.expect_block_process(ResponseType::Block); // the processing result if i + 2 == depth { - // one block was removed + rig.log(&format!("Block {i} was removed and is already known")); rig.parent_block_processed( chain_hash, BlockError::BlockIsAlreadyKnown(block.canonical_root()).into(), ) } else { + rig.log(&format!("Block {i} ParentUnknown")); rig.parent_block_processed( chain_hash, BlockError::ParentUnknown(RpcBlock::new_without_blobs(None, block)).into(), @@ -1134,9 +1153,18 @@ fn test_same_chain_race_condition() { rig.expect_no_active_lookups(); } +#[test] +fn test_penalize_wrong_peer_with_cached_child() { + // peer A sends blob with malicious data as unknown parent + // peer B serves parent and rest of blocks + // All components are sent as RpcBlock, penalizing peer B +} + mod deneb_only { use super::*; - use beacon_chain::data_availability_checker::AvailabilityCheckError; + use beacon_chain::{ + block_verification_types::RpcBlock, data_availability_checker::AvailabilityCheckError, + }; use ssz_types::VariableList; use std::collections::VecDeque; @@ -1276,7 +1304,6 @@ mod deneb_only { } fn parent_block_response(mut self) -> Self { - self.rig.log("parent_block_response"); self.rig.expect_empty_network(); let block = self.parent_block.pop_front().unwrap().clone(); let _ = self.unknown_parent_block.insert(block.clone()); @@ -1286,12 +1313,11 @@ mod deneb_only { Some(block), ); - assert_eq!(self.rig.active_parent_lookups_count(), 1); + self.rig.assert_parent_lookups_count(1); self } fn parent_blob_response(mut self) -> Self { - self.rig.log("parent_blob_response"); let blobs = self.parent_blobs.pop_front().unwrap(); let _ = self.unknown_parent_blobs.insert(blobs.clone()); for blob in &blobs { @@ -1321,7 +1347,6 @@ mod deneb_only { } fn block_response(mut self) -> Self { - self.rig.log("block_response"); // The peer provides the correct block, should not be penalized. Now the block should be sent // for processing. self.rig.single_lookup_block_response( @@ -1408,7 +1433,7 @@ mod deneb_only { // Missing blobs should be the request is not removed, the outstanding blobs request should // mean we do not send a new request. self.rig.single_block_component_processed( - self.block_req_id.expect("block request id").id, + self.block_req_id.expect("block request id").lookup_id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), ); self.rig.expect_empty_network(); @@ -1423,7 +1448,7 @@ mod deneb_only { BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), ); self.rig.expect_empty_network(); - assert_eq!(self.rig.active_parent_lookups_count(), 0); + self.rig.assert_parent_lookups_count(0); self } @@ -1456,7 +1481,7 @@ mod deneb_only { fn invalid_block_processed(mut self) -> Self { self.rig.single_block_component_processed( - self.block_req_id.expect("block request id").id, + self.block_req_id.expect("block request id").lookup_id, BlockProcessingResult::Err(BlockError::ProposalSignatureInvalid), ); assert_eq!(self.rig.active_single_lookups_count(), 1); @@ -1465,7 +1490,7 @@ mod deneb_only { fn invalid_blob_processed(mut self) -> Self { self.rig.single_block_component_processed( - self.blob_req_id.expect("blob request id").id, + self.blob_req_id.expect("blob request id").lookup_id, BlockProcessingResult::Err(BlockError::AvailabilityCheck( AvailabilityCheckError::KzgVerificationFailed, )), @@ -1476,7 +1501,7 @@ mod deneb_only { fn missing_components_from_block_request(mut self) -> Self { self.rig.single_block_component_processed( - self.block_req_id.expect("block request id").id, + self.block_req_id.expect("block request id").lookup_id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( self.slot, self.block_root, diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 73b6bcf3f23..0b30f2f3819 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -34,7 +34,6 @@ //! search for the block and subsequently search for parents if needed. use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart}; -use super::block_lookups::common::LookupType; use super::block_lookups::BlockLookups; use super::network_context::{BlockOrBlob, RangeRequestId, RpcEvent, SyncNetworkContext}; use super::peer_sync_info::{remote_sync_type, PeerSyncType}; @@ -42,13 +41,13 @@ use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH}; use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor}; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; -use crate::sync::block_lookups::{BlobRequestState, BlockRequestState}; +use crate::sync::block_lookups::{BlobRequestState, BlockComponent, BlockRequestState}; use crate::sync::block_sidecar_coupling::BlocksAndBlobsRequestInfo; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::block_verification_types::RpcBlock; -use beacon_chain::data_availability_checker::ChildComponents; +use beacon_chain::validator_monitor::timestamp_now; use beacon_chain::{ - AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError, EngineState, + AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlobError, BlockError, EngineState, }; use futures::StreamExt; use lighthouse_network::rpc::RPCError; @@ -56,12 +55,10 @@ use lighthouse_network::types::{NetworkGlobals, SyncState}; use lighthouse_network::SyncInfo; use lighthouse_network::{PeerAction, PeerId}; use slog::{crit, debug, error, info, trace, warn, Logger}; -use std::ops::IndexMut; use std::ops::Sub; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; -use types::blob_sidecar::FixedBlobSidecarList; use types::{BlobSidecar, EthSpec, Hash256, SignedBeaconBlock, Slot}; /// The number of slots ahead of us that is allowed before requesting a long-range (batch) Sync @@ -77,9 +74,8 @@ pub type Id = u32; #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] pub struct SingleLookupReqId { - pub id: Id, - pub req_counter: Id, - pub lookup_type: LookupType, + pub lookup_id: Id, + pub req_id: Id, } /// Id of rpc requests sent by sync to the network. @@ -153,7 +149,6 @@ pub enum SyncMessage { pub enum BlockProcessType { SingleBlock { id: Id }, SingleBlob { id: Id }, - ParentLookup { chain_hash: Hash256 }, } #[derive(Debug)] @@ -263,12 +258,12 @@ impl SyncManager { } #[cfg(test)] - pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256)> { + pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256, Option)> { self.block_lookups.active_single_lookups() } #[cfg(test)] - pub(crate) fn active_parent_lookups(&self) -> Vec { + pub(crate) fn active_parent_lookups(&self) -> Vec> { self.block_lookups.active_parent_lookups() } @@ -578,27 +573,20 @@ impl SyncManager { block_root, parent_root, block_slot, - block.into(), + BlockComponent::Block((block.block_cloned(), block_root, timestamp_now())), ); } SyncMessage::UnknownParentBlob(peer_id, blob) => { let blob_slot = blob.slot(); let block_root = blob.block_root(); let parent_root = blob.block_parent_root(); - let blob_index = blob.index; - if blob_index >= T::EthSpec::max_blobs_per_block() as u64 { - warn!(self.log, "Peer sent blob with invalid index"; "index" => blob_index, "peer_id" => %peer_id); - return; - } - let mut blobs = FixedBlobSidecarList::default(); - *blobs.index_mut(blob_index as usize) = Some(blob); debug!(self.log, "Received unknown parent blob message"; "block_root" => %block_root, "parent_root" => %parent_root); self.handle_unknown_parent( peer_id, block_root, parent_root, blob_slot, - ChildComponents::new(block_root, None, Some(blobs)), + BlockComponent::Blob((blob, block_root, timestamp_now())), ); } SyncMessage::UnknownBlockHashFromAttestation(peer_id, block_root) => { @@ -617,25 +605,9 @@ impl SyncManager { SyncMessage::BlockComponentProcessed { process_type, result, - } => match process_type { - BlockProcessType::SingleBlock { id } => self - .block_lookups - .single_block_component_processed::( - id, - result, - &mut self.network, - ), - BlockProcessType::SingleBlob { id } => self - .block_lookups - .single_block_component_processed::>( - id, - result, - &mut self.network, - ), - BlockProcessType::ParentLookup { chain_hash } => self - .block_lookups - .parent_block_processed(chain_hash, result, &mut self.network), - }, + } => self + .block_lookups + .on_processing_result(process_type, result, &mut self.network), SyncMessage::BatchProcessed { sync_type, result } => match sync_type { ChainSegmentProcessId::RangeBatchId(chain_id, epoch) => { self.range_sync.handle_block_process_result( @@ -671,20 +643,19 @@ impl SyncManager { block_root: Hash256, parent_root: Hash256, slot: Slot, - child_components: ChildComponents, + unknown_parent_trigger: BlockComponent, ) { match self.should_search_for_block(Some(slot), &peer_id) { Ok(_) => { - self.block_lookups.search_parent( - slot, - block_root, + self.block_lookups.search_parent_of_child( parent_root, - peer_id, + block_root, + &[peer_id], &mut self.network, ); - self.block_lookups.search_child_block( + self.block_lookups.search_block( block_root, - child_components, + Some((peer_id, unknown_parent_trigger)), &[peer_id], &mut self.network, ); @@ -699,7 +670,7 @@ impl SyncManager { match self.should_search_for_block(None, &peer_id) { Ok(_) => { self.block_lookups - .search_block(block_root, &[peer_id], &mut self.network); + .search_block(block_root, None, &[peer_id], &mut self.network); } Err(reason) => { debug!(self.log, "Ignoring unknown block request"; "block_root" => %block_root, "reason" => reason); @@ -771,11 +742,6 @@ impl SyncManager { let dropped_single_blocks_requests = self.block_lookups.drop_single_block_requests(); - // - Parent lookups: - // Disabled while in this state. We drop current requests and don't search for new - // blocks. - let dropped_parent_chain_requests = self.block_lookups.drop_parent_chain_requests(); - // - Range: // We still send found peers to range so that it can keep track of potential chains // with respect to our current peers. Range will stop processing batches in the @@ -784,10 +750,9 @@ impl SyncManager { // - Backfill: Not affected by ee states, nothing to do. // Some logs. - if dropped_single_blocks_requests > 0 || dropped_parent_chain_requests > 0 { + if dropped_single_blocks_requests > 0 { debug!(self.log, "Execution engine not online. Dropping active requests."; "dropped_single_blocks_requests" => dropped_single_blocks_requests, - "dropped_parent_chain_requests" => dropped_parent_chain_requests, ); } } @@ -826,46 +791,13 @@ impl SyncManager { block: RpcEvent>>, ) { if let Some(resp) = self.network.on_single_block_response(id, block) { - match resp { - Ok((block, seen_timestamp)) => match id.lookup_type { - LookupType::Current => self - .block_lookups - .single_lookup_response::( - id, - peer_id, - block, - seen_timestamp, - &mut self.network, - ), - LookupType::Parent => self - .block_lookups - .parent_lookup_response::( - id, - peer_id, - block, - seen_timestamp, - &mut self.network, - ), - }, - Err(error) => match id.lookup_type { - LookupType::Current => self - .block_lookups - .single_block_lookup_failed::( - id, - &peer_id, - &mut self.network, - error, - ), - LookupType::Parent => self - .block_lookups - .parent_lookup_failed::( - id, - &peer_id, - &mut self.network, - error, - ), - }, - } + self.block_lookups + .on_download_response::>( + id.lookup_id, + peer_id, + resp, + &mut self.network, + ) } } @@ -901,47 +833,13 @@ impl SyncManager { blob: RpcEvent>>, ) { if let Some(resp) = self.network.on_single_blob_response(id, blob) { - match resp { - Ok((blobs, seen_timestamp)) => match id.lookup_type { - LookupType::Current => self - .block_lookups - .single_lookup_response::>( - id, - peer_id, - blobs, - seen_timestamp, - &mut self.network, - ), - LookupType::Parent => self - .block_lookups - .parent_lookup_response::>( - id, - peer_id, - blobs, - seen_timestamp, - &mut self.network, - ), - }, - - Err(error) => match id.lookup_type { - LookupType::Current => self - .block_lookups - .single_block_lookup_failed::>( - id, - &peer_id, - &mut self.network, - error, - ), - LookupType::Parent => self - .block_lookups - .parent_lookup_failed::>( - id, - &peer_id, - &mut self.network, - error, - ), - }, - } + self.block_lookups + .on_download_response::>( + id.lookup_id, + peer_id, + resp, + &mut self.network, + ) } } diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index fc91270c1dc..2f6724aac4e 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -4,11 +4,12 @@ use self::requests::{ActiveBlobsByRootRequest, ActiveBlocksByRootRequest}; pub use self::requests::{BlobsByRootSingleBlockRequest, BlocksByRootSingleRequest}; use super::block_sidecar_coupling::BlocksAndBlobsRequestInfo; -use super::manager::{Id, RequestId as SyncRequestId}; +use super::manager::{BlockProcessType, Id, RequestId as SyncRequestId}; use super::range_sync::{BatchId, ByRangeRequestType, ChainId}; use crate::network_beacon_processor::NetworkBeaconProcessor; use crate::service::{NetworkMessage, RequestId}; use crate::status::ToStatusMessage; +use crate::sync::block_lookups::SingleLookupId; use crate::sync::manager::SingleLookupReqId; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::validator_monitor::timestamp_now; @@ -18,13 +19,13 @@ use lighthouse_network::rpc::methods::BlobsByRangeRequest; use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError}; use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource, Request}; pub use requests::LookupVerifyError; -use slog::{debug, trace, warn}; +use slog::{debug, error, trace, warn}; use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; use types::blob_sidecar::FixedBlobSidecarList; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; +use types::{BlobSidecar, EthSpec, Hash256, SignedBeaconBlock}; mod requests; @@ -52,7 +53,7 @@ pub enum RpcEvent { RPCError(RPCError), } -pub type RpcProcessingResult = Option>; +pub type RpcProcessingResult = Result<(T, Duration), LookupFailure>; pub enum LookupFailure { RpcError(RPCError), @@ -297,10 +298,15 @@ impl SyncNetworkContext { pub fn block_lookup_request( &mut self, - id: SingleLookupReqId, + lookup_id: SingleLookupId, peer_id: PeerId, request: BlocksByRootSingleRequest, ) -> Result<(), &'static str> { + let id = SingleLookupReqId { + lookup_id, + req_id: self.next_id(), + }; + debug!( self.log, "Sending BlocksByRoot Request"; @@ -324,10 +330,15 @@ impl SyncNetworkContext { pub fn blob_lookup_request( &mut self, - id: SingleLookupReqId, + lookup_id: SingleLookupId, peer_id: PeerId, request: BlobsByRootSingleBlockRequest, ) -> Result<(), &'static str> { + let id = SingleLookupReqId { + lookup_id, + req_id: self.next_id(), + }; + debug!( self.log, "Sending BlobsByRoot Request"; @@ -458,7 +469,7 @@ impl SyncNetworkContext { &mut self, request_id: SingleLookupReqId, block: RpcEvent>>, - ) -> RpcProcessingResult>> { + ) -> Option>>> { let Entry::Occupied(mut request) = self.blocks_by_root_requests.entry(request_id) else { return None; }; @@ -489,7 +500,7 @@ impl SyncNetworkContext { &mut self, request_id: SingleLookupReqId, blob: RpcEvent>>, - ) -> RpcProcessingResult> { + ) -> Option>> { let Entry::Occupied(mut request) = self.blobs_by_root_requests.entry(request_id) else { return None; }; @@ -520,6 +531,69 @@ impl SyncNetworkContext { } }) } + + pub fn send_block_for_processing( + &self, + block_root: Hash256, + block: RpcBlock, + duration: Duration, + process_type: BlockProcessType, + ) -> Result<(), &'static str> { + match self.beacon_processor_if_enabled() { + Some(beacon_processor) => { + debug!(self.log, "Sending block for processing"; "block" => ?block_root, "process" => ?process_type); + if let Err(e) = beacon_processor.send_rpc_beacon_block( + block_root, + block, + duration, + process_type, + ) { + error!( + self.log, + "Failed to send sync block to processor"; + "error" => ?e + ); + Err("beacon processor send failure") + } else { + Ok(()) + } + } + None => { + trace!(self.log, "Dropping block ready for processing. Beacon processor not available"; "block" => %block_root); + Err("beacon processor unavailable") + } + } + } + + pub fn send_blobs_for_processing( + &self, + block_root: Hash256, + blobs: FixedBlobSidecarList, + duration: Duration, + process_type: BlockProcessType, + ) -> Result<(), &'static str> { + match self.beacon_processor_if_enabled() { + Some(beacon_processor) => { + trace!(self.log, "Sending blobs for processing"; "block" => ?block_root, "process_type" => ?process_type); + if let Err(e) = + beacon_processor.send_rpc_blobs(block_root, blobs, duration, process_type) + { + error!( + self.log, + "Failed to send sync blobs to processor"; + "error" => ?e + ); + Err("beacon processor send failure") + } else { + Ok(()) + } + } + None => { + trace!(self.log, "Dropping blobs ready for processing. Beacon processor not available"; "block_root" => %block_root); + Err("beacon processor unavailable") + } + } + } } fn to_fixed_blob_sidecar_list( From 5d29618b72f73e2a58a5731590bbb6a337101bb9 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Thu, 25 Apr 2024 21:59:22 +0900 Subject: [PATCH 05/14] Merge current and parent lookups clean up todos --- .../src/data_availability_checker.rs | 145 ++-------- .../overflow_lru_cache.rs | 2 +- beacon_node/beacon_chain/src/lib.rs | 6 +- .../network/src/sync/block_lookups/common.rs | 59 ++-- .../network/src/sync/block_lookups/mod.rs | 177 ++++++------ .../sync/block_lookups/single_block_lookup.rs | 272 +++++++----------- .../network/src/sync/block_lookups/tests.rs | 7 +- beacon_node/network/src/sync/manager.rs | 29 +- .../network/src/sync/network_context.rs | 60 +++- 9 files changed, 339 insertions(+), 418 deletions(-) diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 674c54b9f59..e568aed0af5 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -5,10 +5,8 @@ use crate::block_verification_types::{ use crate::data_availability_checker::overflow_lru_cache::OverflowLRUCache; use crate::{BeaconChain, BeaconChainTypes, BeaconStore}; use kzg::Kzg; -use slasher::test_utils::E; use slog::{debug, error, Logger}; use slot_clock::SlotClock; -use ssz_types::FixedVector; use std::fmt; use std::fmt::Debug; use std::num::NonZeroUsize; @@ -91,68 +89,27 @@ impl DataAvailabilityChecker { self.availability_cache.has_block(block_root) } - pub fn get_missing_blob_ids_with(&self, block_root: Hash256) -> MissingBlobs { + /// Return the required blobs `block_root` expects if the block is currenlty in the cache. + pub fn num_expected_blobs(&self, block_root: &Hash256) -> Option { self.availability_cache - .with_pending_components(&block_root, |pending_components| match pending_components { - Some(pending_components) => self.get_missing_blob_ids( - block_root, - pending_components - .get_cached_block() - .as_ref() - .map(|b| b.as_block()), - &pending_components.verified_blobs, - ), - None => MissingBlobs::new_without_block(block_root, self.is_deneb()), + .peek_pending_components(block_root, |components| { + components.and_then(|components| components.num_expected_blobs()) }) } - /// If there's no block, all possible ids will be returned that don't exist in the given blobs. - /// If there no blobs, all possible ids will be returned. - pub fn get_missing_blob_ids( - &self, - block_root: Hash256, - block: Option<&SignedBeaconBlock>, - blobs: &FixedVector, ::MaxBlobsPerBlock>, - ) -> MissingBlobs { - let Some(current_slot) = self.slot_clock.now_or_genesis() else { - error!( - self.log, - "Failed to read slot clock when checking for missing blob ids" - ); - return MissingBlobs::BlobsNotRequired; - }; - - let current_epoch = current_slot.epoch(T::EthSpec::slots_per_epoch()); - - if self.da_check_required_for_epoch(current_epoch) { - match block { - Some(cached_block) => { - let block_commitments_len = cached_block - .message() - .body() - .blob_kzg_commitments() - .map(|v| v.len()) - .unwrap_or(0); - let blob_ids = blobs + /// Return the set of imported blob indexes for `block_root`. Returns None if there is no block + /// component for `block_root`. + pub fn imported_blob_indexes(&self, block_root: &Hash256) -> Option> { + self.availability_cache + .peek_pending_components(block_root, |components| { + components.map(|components| { + components + .get_cached_blobs() .iter() - .take(block_commitments_len) - .enumerate() - .filter_map(|(index, blob_commitment_opt)| { - blob_commitment_opt.is_none().then_some(BlobIdentifier { - block_root, - index: index as u64, - }) - }) - .collect(); - MissingBlobs::KnownMissing(blob_ids) - } - None => { - MissingBlobs::PossibleMissing(BlobIdentifier::get_all_blob_ids::(block_root)) - } - } - } else { - MissingBlobs::BlobsNotRequired - } + .filter_map(|blob| blob.as_ref().map(|blob| blob.blob_index())) + .collect::>() + }) + }) } /// Get a blob from the availability cache. @@ -338,6 +295,18 @@ impl DataAvailabilityChecker { .map_or(false, |da_epoch| block_epoch >= da_epoch) } + pub fn da_check_required_for_current_epoch(&self) -> bool { + let Some(current_slot) = self.slot_clock.now_or_genesis() else { + error!( + self.log, + "Failed to read slot clock when checking for missing blob ids" + ); + return false; + }; + + self.da_check_required_for_epoch(current_slot.epoch(T::EthSpec::slots_per_epoch())) + } + /// Returns `true` if the current epoch is greater than or equal to the `Deneb` epoch. pub fn is_deneb(&self) -> bool { self.slot_clock.now().map_or(false, |slot| { @@ -523,61 +492,3 @@ impl MaybeAvailableBlock { } } } - -#[derive(Debug, Clone)] -pub enum MissingBlobs { - /// We know for certain these blobs are missing. - KnownMissing(Vec), - /// We think these blobs might be missing. - PossibleMissing(Vec), - /// Blobs are not required. - BlobsNotRequired, -} - -impl MissingBlobs { - pub fn new_without_block(block_root: Hash256, is_deneb: bool) -> Self { - if is_deneb { - MissingBlobs::PossibleMissing(BlobIdentifier::get_all_blob_ids::(block_root)) - } else { - MissingBlobs::BlobsNotRequired - } - } - pub fn is_empty(&self) -> bool { - match self { - MissingBlobs::KnownMissing(v) => v.is_empty(), - MissingBlobs::PossibleMissing(v) => v.is_empty(), - MissingBlobs::BlobsNotRequired => true, - } - } - pub fn contains(&self, blob_id: &BlobIdentifier) -> bool { - match self { - MissingBlobs::KnownMissing(v) => v.contains(blob_id), - MissingBlobs::PossibleMissing(v) => v.contains(blob_id), - MissingBlobs::BlobsNotRequired => false, - } - } - pub fn remove(&mut self, blob_id: &BlobIdentifier) { - match self { - MissingBlobs::KnownMissing(v) => v.retain(|id| id != blob_id), - MissingBlobs::PossibleMissing(v) => v.retain(|id| id != blob_id), - MissingBlobs::BlobsNotRequired => {} - } - } - pub fn indices(&self) -> Vec { - match self { - MissingBlobs::KnownMissing(v) => v.iter().map(|id| id.index).collect(), - MissingBlobs::PossibleMissing(v) => v.iter().map(|id| id.index).collect(), - MissingBlobs::BlobsNotRequired => vec![], - } - } -} - -impl Into> for MissingBlobs { - fn into(self) -> Vec { - match self { - MissingBlobs::KnownMissing(v) => v, - MissingBlobs::PossibleMissing(v) => v, - MissingBlobs::BlobsNotRequired => vec![], - } - } -} diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index edd981e6ddb..4a261c41414 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -562,7 +562,7 @@ impl OverflowLRUCache { } } - pub fn with_pending_components>) -> R>( + pub fn peek_pending_components>) -> R>( &self, block_root: &Hash256, f: F, diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index a091b0894ab..71c96d0fd55 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -74,9 +74,9 @@ pub use self::historical_blocks::HistoricalBlockError; pub use attestation_verification::Error as AttestationError; pub use beacon_fork_choice_store::{BeaconForkChoiceStore, Error as ForkChoiceStoreError}; pub use block_verification::{ - get_block_root, BlobError, BlockError, ExecutionPayloadError, ExecutionPendingBlock, - GossipVerifiedBlock, IntoExecutionPendingBlock, IntoGossipVerifiedBlockContents, - PayloadVerificationOutcome, PayloadVerificationStatus, + get_block_root, BlockError, ExecutionPayloadError, ExecutionPendingBlock, GossipVerifiedBlock, + IntoExecutionPendingBlock, IntoGossipVerifiedBlockContents, PayloadVerificationOutcome, + PayloadVerificationStatus, }; pub use block_verification_types::AvailabilityPendingExecutedBlock; pub use block_verification_types::ExecutedBlock; diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index 5ea62f1ae28..1b56bbd0c5f 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -23,8 +23,6 @@ pub enum ResponseType { Blob, } -/// How many attempts we try to find a parent of a block before we give up trying. -pub(crate) const PARENT_FAIL_TOLERANCE: u8 = 5; /// The maximum depth we will search for a parent block. In principle we should have sync'd any /// canonical chain to its head once the peer connects. A chain should not appear where it's depth /// is further back than the most recent head slot. @@ -45,27 +43,46 @@ pub trait RequestState { /// The type created after validation. type VerifiedResponseType: Clone; + /// Potentially makes progress on this request if it's in a progress-able state fn continue_request( &mut self, id: Id, + awaiting_parent: bool, + downloaded_block_expected_blobs: Option, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { - if let Some(peer_id) = Self::get_state_mut(self).maybe_start_download()? { + // Attempt to progress awaiting downloads + if self.get_state().is_awaiting_download() { // Verify the current request has not exceeded the maximum number of attempts. - let request_state = self.get_state(); // TODO: Okay to use `SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS` for both current and parent // lookups now? It not trivial to identify what is a "parent lookup" now. + let request_state = self.get_state(); if request_state.failed_attempts() >= SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS { let cannot_process = request_state.more_failed_processing_attempts(); return Err(LookupRequestError::TooManyAttempts { cannot_process }); } - // Make request - return self.make_request(id, peer_id, cx); - } - if let Some(result) = Self::get_state_mut(self).maybe_start_processing() { - return Self::send_for_processing(id, result, cx); + let peer_id = self + .get_state_mut() + .use_rand_available_peer() + .ok_or(LookupRequestError::NoPeers)?; + + // make_request returns true only if a request was made + if self.make_request(id, peer_id, downloaded_block_expected_blobs, cx)? { + self.get_state_mut().on_download_start()?; + } + + // Otherwise, attempt to progress awaiting processing + // If this request is awaiting a parent lookup to be processed, do not send for processing. + // The request will be rejected with unknown parent error. + } else if !awaiting_parent { + // maybe_start_processing returns Some if state == AwaitingProcess. This pattern is + // useful to conditionally access the result data. + if let Some(result) = self.get_state_mut().maybe_start_processing() { + return Self::send_for_processing(id, result, cx); + } } + Ok(()) } @@ -74,8 +91,9 @@ pub trait RequestState { &self, id: Id, peer_id: PeerId, + downloaded_block_expected_blobs: Option, cx: &mut SyncNetworkContext, - ) -> Result<(), LookupRequestError>; + ) -> Result; /* Response handling methods */ @@ -113,8 +131,9 @@ impl RequestState for BlockRequestState { &self, id: SingleLookupId, peer_id: PeerId, + _: Option, cx: &mut SyncNetworkContext, - ) -> Result<(), LookupRequestError> { + ) -> Result { cx.block_lookup_request( id, peer_id, @@ -129,7 +148,7 @@ impl RequestState for BlockRequestState { fn send_for_processing( id: SingleLookupId, - (block, block_root, seen_timestamp): DownloadResult, + (block, block_root, seen_timestamp, _): DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { cx.send_block_for_processing( @@ -163,20 +182,14 @@ impl RequestState for BlobRequestState { &self, id: Id, peer_id: PeerId, + downloaded_block_expected_blobs: Option, cx: &mut SyncNetworkContext, - ) -> Result<(), LookupRequestError> { - // TODO: Use cx to figure out which blobs are still to be downloaded - // - Check against the current cached block in the blocks response the required num of blobs - // - Check against da checker if there's a blob how many we need - // - Check against da checker if there are some blobs already downloaded - + ) -> Result { cx.blob_lookup_request( id, peer_id, - BlobsByRootSingleBlockRequest { - block_root: self.block_root, - indices: self.requested_ids.indices(), - }, + self.block_root, + downloaded_block_expected_blobs, ) .map_err(LookupRequestError::SendFailed) } @@ -191,7 +204,7 @@ impl RequestState for BlobRequestState { fn send_for_processing( id: Id, - (verified, block_root, seen_timestamp): DownloadResult, + (verified, block_root, seen_timestamp, _): DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { cx.send_blobs_for_processing( diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 0897b04e8d0..094bebf0c4f 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -1,15 +1,12 @@ use self::single_block_lookup::{DownloadResult, LookupRequestError, SingleBlockLookup}; use super::manager::{BlockProcessType, BlockProcessingResult}; -use super::network_context::LookupVerifyError; use super::network_context::{RpcProcessingResult, SyncNetworkContext}; use crate::metrics; use crate::sync::block_lookups::common::PARENT_DEPTH_TOLERANCE; use crate::sync::manager::Id; use crate::sync::network_context::LookupFailure; use beacon_chain::block_verification_types::AsBlock; -use beacon_chain::data_availability_checker::{ - AvailabilityCheckErrorCategory, DataAvailabilityChecker, -}; +use beacon_chain::data_availability_checker::AvailabilityCheckErrorCategory; use beacon_chain::{AvailabilityProcessingStatus, BeaconChainTypes, BlockError}; pub use common::RequestState; use fnv::FnvHashMap; @@ -21,7 +18,7 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use store::Hash256; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock, Slot}; +use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; pub mod common; mod single_block_lookup; @@ -49,7 +46,7 @@ pub type SingleLookupId = u32; enum Action { Retry, - ParentUnknown { parent_root: Hash256, slot: Slot }, + ParentUnknown { parent_root: Hash256 }, Drop, Continue, } @@ -60,20 +57,17 @@ pub struct BlockLookups { single_block_lookups: FnvHashMap>, - pub(crate) da_checker: Arc>, - /// The logger for the import manager. log: Logger, } impl BlockLookups { - pub fn new(da_checker: Arc>, log: Logger) -> Self { + pub fn new(log: Logger) -> Self { Self { failed_chains: LRUTimeCache::new(Duration::from_secs( FAILED_CHAINS_CACHE_EXPIRY_SECONDS, )), single_block_lookups: Default::default(), - da_checker, log, } } @@ -82,7 +76,7 @@ impl BlockLookups { pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256, Option)> { self.single_block_lookups .iter() - .map(|(id, e)| (*id, e.block_root(), e.parent_root())) + .map(|(id, e)| (*id, e.block_root(), e.awaiting_parent())) .collect() } @@ -92,7 +86,7 @@ impl BlockLookups { let mut parent_to_child = HashMap::>::new(); for lookup in self.single_block_lookups.values() { let block_root = lookup.block_root(); - let parent_root = lookup.parent_root(); + let parent_root = lookup.awaiting_parent(); child_to_parent.insert(block_root, parent_root); if let Some(parent_root) = parent_root { parent_to_child @@ -140,20 +134,37 @@ impl BlockLookups { /* Lookup requests */ /// Creates a lookup for the block with the given `block_root` and immediately triggers it. - pub fn search_block( + pub fn search_child_of_parent( + &mut self, + block_root: Hash256, + block_component: BlockComponent, + peer_source: PeerId, + cx: &mut SyncNetworkContext, + ) { + let awaiting_parent = block_component.parent_root(); + self.new_current_lookup( + block_root, + Some(block_component), + Some(awaiting_parent), + &[peer_source], + cx, + ) + } + + /// Seach a block that we don't known its parent root. + pub fn search_unknown_block( &mut self, block_root: Hash256, - block_component: Option<(PeerId, BlockComponent)>, peer_source: &[PeerId], cx: &mut SyncNetworkContext, ) { - self.new_current_lookup(block_root, block_component, peer_source, cx) + self.new_current_lookup(block_root, None, None, peer_source, cx) } /// A block or blob triggers the search of a parent. /// Check if this new lookup extends a bad chain: - /// - `block_root_to_search` is a failed chain /// - Extending `child_block_root_trigger` would exceed the max depth + /// - `block_root_to_search` is a failed chain pub fn search_parent_of_child( &mut self, block_root_to_search: Hash256, @@ -189,8 +200,8 @@ impl BlockLookups { } } - // TODO: implement parent chain checks - self.new_current_lookup(block_root_to_search, None, peers, cx) + // `block_root_to_search` is a failed chain check happens inside new_current_lookup + self.new_current_lookup(block_root_to_search, None, None, peers, cx) } /// Searches for a single block hash. If the blocks parent is unknown, a chain of blocks is @@ -198,7 +209,8 @@ impl BlockLookups { fn new_current_lookup( &mut self, block_root: Hash256, - block_component: Option<(PeerId, BlockComponent)>, + block_component: Option>, + awaiting_parent: Option, peers: &[PeerId], cx: &mut SyncNetworkContext, ) { @@ -208,8 +220,8 @@ impl BlockLookups { return; } - // TODO: Should check if parent root is a failed chain too? - if let Some(parent_root) = block_component.as_ref().map(|(_, b)| b.parent_root()) { + // TODO: Is checking if parent root is a failed chain necessary? + if let Some(parent_root) = block_component.as_ref().map(|b| b.parent_root()) { if self.failed_chains.contains(&parent_root) { debug!(self.log, "Parent of block is from a past failed chain. Dropping"; "parent_root" => ?parent_root, "block_root" => ?block_root); return; @@ -224,8 +236,8 @@ impl BlockLookups { { trace!(self.log, "Adding peer to existing single block lookup"; "block_root" => %block_root); lookup.add_peers(peers); - if let Some((peer_id, block_component)) = block_component { - lookup.add_child_components(peer_id, block_component); + if let Some(block_component) = block_component { + lookup.add_child_components(block_component); } return; } @@ -242,12 +254,13 @@ impl BlockLookups { "block" => ?block_root, ); - let mut lookup = - SingleBlockLookup::new(block_root, peers, self.da_checker.clone(), cx.next_id()); + // If we know that this lookup has unknown parent (is awaiting a parent lookup to resolve), + // signal here to hold processing downloaded data. + let mut lookup = SingleBlockLookup::new(block_root, peers, cx.next_id(), awaiting_parent); // Add block components to the new request - if let Some((peer_id, block_component)) = block_component { - lookup.add_child_components(peer_id, block_component); + if let Some(block_component) = block_component { + lookup.add_child_components(block_component); } let block_root = lookup.block_root(); @@ -280,6 +293,21 @@ impl BlockLookups { response: RpcProcessingResult, cx: &mut SyncNetworkContext, ) { + if let Err(e) = self.on_download_response_inner::(id, peer_id, response, cx) { + debug!(self.log, "Dropping single lookup"; "id" => id, "err" => ?e); + self.drop_lookup_and_childs(id); + self.update_metrics(); + } + } + + /// Process a block or blob response received from a single lookup request. + pub fn on_download_response_inner>( + &mut self, + id: SingleLookupId, + peer_id: PeerId, + response: RpcProcessingResult, + cx: &mut SyncNetworkContext, + ) -> Result<(), LookupRequestError> { // Downscore peer even if lookup is not known // Only downscore lookup verify errors. RPC errors are downscored in the network handler. if let Err(LookupFailure::LookupVerifyError(e)) = &response { @@ -288,17 +316,17 @@ impl BlockLookups { } let response_type = R::response_type(); - let Some(mut lookup) = self.single_block_lookups.get_mut(&id) else { + let Some(lookup) = self.single_block_lookups.get_mut(&id) else { // We don't have the ability to cancel in-flight RPC requests. So this can happen // if we started this RPC request, and later saw the block/blobs via gossip. debug!(self.log, "Block returned for single block lookup not present"; "id" => id); - return; + return Ok(()); }; let block_root = lookup.block_root(); - let request_state = R::request_state_mut(&mut lookup); + let request_state = R::request_state_mut(lookup).get_state_mut(); - let result = match response { + match response { Ok((response, seen_timestamp)) => { debug!(self.log, "Block lookup download success"; @@ -307,20 +335,17 @@ impl BlockLookups { "response_type" => ?response_type, ); - if let Err(e) = request_state.get_state_mut().on_download_success(( + // Register the download peer here. Once we have received some data over the wire we + // attribute it to this peer for scoring latter regardless of how the request was + // done. + request_state.on_download_success(( response, block_root, seen_timestamp, - )) { - Err(e) - } else { - // TOOD: May choose to delay blobs for sending if we know that their parent is unknown. - // However, da_checker does not ever error with unknown parent. Plus we should not request - // blobs for blocks that are not rooted on a valid chain, as an attacker can trigger us into - // fetching garbage. - - request_state.continue_request(id, cx) - } + peer_id, + ))?; + // continue_request will send for processing as the request state is AwaitingProcessing + lookup.continue_request::(cx) } Err(e) => { debug!(self.log, @@ -331,28 +356,20 @@ impl BlockLookups { "error" => %e, ); - if let Err(e) = request_state.get_state_mut().on_download_failure() { - Err(e) - } else { - request_state.continue_request(id, cx) - } + request_state.on_download_failure()?; + // continue_request will retry a download as the request state is AwaitingDownload + lookup.continue_request::(cx) } - }; - - if let Err(e) = result { - debug!(self.log, "Dropping single lookup"; "id" => id, "err" => ?e); - self.drop_lookup_and_childs(id); - self.update_metrics(); } } /* Error responses */ - pub fn peer_disconnected(&mut self, peer_id: &PeerId, cx: &mut SyncNetworkContext) { + pub fn peer_disconnected(&mut self, peer_id: &PeerId) { /* Check disconnection for single lookups */ self.single_block_lookups.retain(|_, req| { let should_drop_lookup = - req.should_drop_lookup_on_disconnected_peer(peer_id, cx, &self.log); + req.should_drop_lookup_on_disconnected_peer(peer_id ); if should_drop_lookup { debug!(self.log, "Dropping single lookup after peer disconnection"; "block_root" => %req.block_root()); @@ -383,6 +400,7 @@ impl BlockLookups { }; debug!(self.log, "Dropping lookup on request error"; "id" => id, "error" => ?e); self.drop_lookup_and_childs(id); + self.update_metrics(); } } @@ -392,13 +410,13 @@ impl BlockLookups { result: BlockProcessingResult, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { - let Some(mut lookup) = self.single_block_lookups.get_mut(&lookup_id) else { + let Some(lookup) = self.single_block_lookups.get_mut(&lookup_id) else { debug!(self.log, "Unknown single block lookup"; "target_id" => lookup_id); return Ok(()); }; let block_root = lookup.block_root(); - let request_state = R::request_state_mut(&mut lookup).get_state_mut(); + let request_state = R::request_state_mut(lookup).get_state_mut(); debug!( self.log, @@ -413,7 +431,6 @@ impl BlockLookups { BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_)) | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown(_)) => { // Successfully imported - // TODO: Potentially import child blocks trace!(self.log, "Single block processing succeeded"; "block" => %block_root); request_state.on_processing_success()?; Action::Continue @@ -460,16 +477,15 @@ impl BlockLookups { Action::Drop } BlockError::ParentUnknown(block) => { - let slot = block.slot(); - let parent_root = block.parent_root(); - // Reverts the status of this request to `AwaitingProcessing` holding the // downloaded data. A future call to `continue_requests` will re-submit it // once there are no pending parent requests. // Note: `BlockError::ParentUnknown` is only returned when processing // blocks, not blobs. - request_state.into_awaiting_processing()?; - Action::ParentUnknown { parent_root, slot } + request_state.revert_to_awaiting_processing()?; + Action::ParentUnknown { + parent_root: block.parent_root(), + } } ref e @ BlockError::ExecutionPayloadError(ref epe) if !epe.penalize_peer() => { // These errors indicate that the execution layer is offline @@ -516,14 +532,16 @@ impl BlockLookups { match action { Action::Retry => { - // Trigger download + // Trigger download for all components in case `MissingComponents` failed the blob + // request. + // TODO: `continue_requests` must know when to not move a request forward: + // - If parent unknown do not request data? + // - If parent unknown error hold off for processing lookup.continue_requests(cx)?; } - Action::ParentUnknown { - parent_root, - slot: _, - } => { + Action::ParentUnknown { parent_root } => { let peers = lookup.all_available_peers().cloned().collect::>(); + lookup.set_awaiting_parent(parent_root); self.search_parent_of_child(parent_root, block_root, &peers, cx); } Action::Drop => { @@ -532,6 +550,9 @@ impl BlockLookups { self.update_metrics(); } Action::Continue => { + // Drop this completed lookup only + self.single_block_lookups.remove(&lookup_id); + self.update_metrics(); // Block imported, continue the requests of pending child blocks self.continue_child_lookups(block_root, cx); } @@ -540,11 +561,12 @@ impl BlockLookups { } pub fn continue_child_lookups(&mut self, block_root: Hash256, cx: &mut SyncNetworkContext) { - let mut failed_lookups = vec![]; + let mut failed_lookups = vec![]; // < need to clean failed lookups latter to re-borrow &mut self + for (id, lookup) in self.single_block_lookups.iter_mut() { - if lookup.parent_root() == Some(block_root) { + if lookup.resolve_awaiting_parent() == Some(block_root) { // Continue lookup - debug!(self.log, "Continuing child lookup"; "block_root" => %lookup.block_root()); + debug!(self.log, "Continuing child lookup"; "parent_root" => %block_root, "block_root" => %lookup.block_root()); if let Err(e) = lookup.continue_requests(cx) { debug!(self.log, "Error continuing lookup"; "id" => id, "error" => ?e); failed_lookups.push(*id); @@ -561,19 +583,14 @@ impl BlockLookups { if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) { debug!(self.log, "Dropping child lookup"; "id" => ?dropped_id, "block_root" => %dropped_lookup.block_root()); - let child_lookup_ids = self + let child_lookups = self .single_block_lookups .iter() - .filter_map(|(id, lookup)| { - if lookup.parent_root() == Some(dropped_lookup.block_root()) { - Some(*id) - } else { - None - } - }) + .filter(|(_, lookup)| lookup.awaiting_parent() == Some(dropped_lookup.block_root())) + .map(|(id, _)| *id) .collect::>(); - for id in child_lookup_ids { + for id in child_lookups { self.drop_lookup_and_childs(id); } } diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index d10311f6400..8e51b508eb5 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -2,11 +2,9 @@ use super::{BlockComponent, PeerId}; use crate::sync::block_lookups::common::RequestState; use crate::sync::block_lookups::Id; use crate::sync::network_context::SyncNetworkContext; -use beacon_chain::data_availability_checker::{DataAvailabilityChecker, MissingBlobs}; use beacon_chain::BeaconChainTypes; use itertools::Itertools; use rand::seq::IteratorRandom; -use slog::{debug, Logger}; use std::collections::HashSet; use std::fmt::Debug; use std::sync::Arc; @@ -32,27 +30,23 @@ pub struct SingleBlockLookup { pub id: Id, pub block_request_state: BlockRequestState, pub blob_request_state: BlobRequestState, - pub da_checker: Arc>, block_root: Hash256, - parent_root: Option, + awaiting_parent: Option, } impl SingleBlockLookup { pub fn new( requested_block_root: Hash256, peers: &[PeerId], - da_checker: Arc>, id: Id, + awaiting_parent: Option, ) -> Self { - let is_deneb = da_checker.is_deneb(); - Self { id, block_request_state: BlockRequestState::new(requested_block_root, peers), - blob_request_state: BlobRequestState::new(requested_block_root, peers, is_deneb), - da_checker, + blob_request_state: BlobRequestState::new(requested_block_root, peers), block_root: requested_block_root, - parent_root: todo!(), + awaiting_parent, } } @@ -61,22 +55,30 @@ impl SingleBlockLookup { self.block_root } - pub fn parent_root(&self) -> Option { - self.parent_root + pub fn awaiting_parent(&self) -> Option { + self.awaiting_parent } - pub fn add_child_components( - &mut self, - peer_id: PeerId, - block_component: BlockComponent, - ) { + /// Mark this lookup as awaiting a parent lookup from being processed. Meanwhile don't send + /// components for processing. + pub fn set_awaiting_parent(&mut self, parent_root: Hash256) { + self.awaiting_parent = Some(parent_root) + } + + /// Mark this lookup as no longer awaiting a parent lookup. Components can be sent for + /// processing. + pub fn resolve_awaiting_parent(&mut self) -> Option { + self.awaiting_parent.take() + } + + pub fn add_child_components(&mut self, block_component: BlockComponent) { match block_component { BlockComponent::Block(block) => { self.block_request_state .state - .insert_verified_response(peer_id, block); + .insert_verified_response(block); } - BlockComponent::Blob(blob) => { + BlockComponent::Blob(_) => { // For now ignore single blobs, as the blob request state assumes all blobs are // attributed to the same peer = the peer serving the remaining blobs. Ignoring this // block component has a minor effect, causing the node to re-request this blob @@ -113,11 +115,30 @@ impl SingleBlockLookup { cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { // TODO: Check what's necessary to download, specially for blobs - self.block_request_state.continue_request(self.id, cx)?; - self.blob_request_state.continue_request(self.id, cx)?; + self.continue_request::>(cx)?; + self.continue_request::>(cx)?; Ok(()) } + pub fn continue_request>( + &mut self, + cx: &mut SyncNetworkContext, + ) -> Result<(), LookupRequestError> { + let id = self.id; + let awaiting_parent = self.awaiting_parent.is_some(); + let downloaded_block_expected_blobs = self + .block_request_state + .state + .peek_downloaded_data() + .map(|block| block.num_expected_blobs()); + R::request_state_mut(self).continue_request( + id, + awaiting_parent, + downloaded_block_expected_blobs, + cx, + ) + } + /// Add all given peers to both block and blob request states. pub fn add_peer(&mut self, peer_id: PeerId) { self.block_request_state.state.add_peer(&peer_id); @@ -140,66 +161,31 @@ impl SingleBlockLookup { /// Checks both the block and blob request states to see if the peer is disconnected. /// /// Returns true if the lookup should be dropped. - pub fn should_drop_lookup_on_disconnected_peer( - &mut self, - peer_id: &PeerId, - cx: &mut SyncNetworkContext, - log: &Logger, - ) -> bool { - let block_root = self.block_root(); - let block_peer_disconnected = self - .block_request_state - .state - .check_peer_disconnected(peer_id) - .is_err(); - let blob_peer_disconnected = self - .blob_request_state - .state - .check_peer_disconnected(peer_id) - .is_err(); + pub fn should_drop_lookup_on_disconnected_peer(&mut self, peer_id: &PeerId) -> bool { + self.block_request_state.state.remove_peer(peer_id); + self.blob_request_state.state.remove_peer(peer_id); if self.all_available_peers().count() == 0 { return true; } - // If there was an active download request with this peer, send them again with another - // peer. We should receive an RPCError anyway, but this should speed things up? - if block_peer_disconnected || blob_peer_disconnected { - if let Err(e) = self.continue_requests(cx) { - debug!(log, "Single lookup failed on retry after peer disconnection"; "block_root" => ?block_root, "error" => ?e); - return true; - } - } + // Note: if the peer disconnected happens to have an on-going request associated with this + // lookup we will receive an RPCError and the lookup will fail. No need to manually retry + // now. false } - - /// If `child_components` is `Some`, we know block components won't hit the data - /// availability cache, so we don't check its processing cache unless `child_components` - /// is `None`. - pub(crate) fn missing_blob_ids(&self) -> MissingBlobs { - let _block_root = self.block_root(); - // TODO: Check against currently downloaded blobs, and the blobs in the da_checker to figure - // out what is remaining to be downloaded - todo!(); - } } /// The state of the blob request component of a `SingleBlockLookup`. pub struct BlobRequestState { - /// The latest picture of which blobs still need to be requested. This includes information - /// from both block/blobs downloaded in the network layer and any blocks/blobs that exist in - /// the data availability checker. - pub requested_ids: MissingBlobs, pub block_root: Hash256, pub state: SingleLookupRequestState>, } impl BlobRequestState { - pub fn new(block_root: Hash256, peer_source: &[PeerId], is_deneb: bool) -> Self { - let default_ids = MissingBlobs::new_without_block(block_root, is_deneb); + pub fn new(block_root: Hash256, peer_source: &[PeerId]) -> Self { Self { block_root, - requested_ids: default_ids, state: SingleLookupRequestState::new(peer_source), } } @@ -220,14 +206,14 @@ impl BlockRequestState { } } -pub type DownloadResult = (T, Hash256, Duration); +pub type DownloadResult = (T, Hash256, Duration, PeerId); #[derive(Debug, PartialEq, Eq)] pub enum State { AwaitingDownload, - Downloading { peer_id: PeerId }, - AwaitingProcess(PeerId, DownloadResult), - Processing(PeerId, DownloadResult), + Downloading, + AwaitingProcess(DownloadResult), + Processing(DownloadResult), Processed { peer_id: PeerId }, } @@ -262,44 +248,62 @@ impl SingleLookupRequestState { } } + pub fn is_awaiting_download(&self) -> bool { + match self.state { + State::AwaitingDownload => true, + State::Downloading { .. } + | State::AwaitingProcess { .. } + | State::Processing { .. } + | State::Processed { .. } => false, + } + } + pub fn is_processed(&self) -> bool { match self.state { - State::AwaitingDownload => false, - State::Downloading { .. } => false, - State::AwaitingProcess { .. } => false, - State::Processing { .. } => false, + State::AwaitingDownload + | State::Downloading { .. } + | State::AwaitingProcess { .. } + | State::Processing { .. } => false, State::Processed { .. } => true, } } - pub fn insert_verified_response(&mut self, peer_id: PeerId, result: DownloadResult) { + pub fn peek_downloaded_data(&self) -> Option<&T> { match &self.state { - State::AwaitingDownload => { - self.state = State::AwaitingProcess(peer_id, result); - } - _ => {} + State::AwaitingDownload => None, + State::Downloading { .. } => None, + State::AwaitingProcess(result) => Some(&result.0), + State::Processing(result) => Some(&result.0), + State::Processed { .. } => None, + } + } + + /// Switch to `AwaitingProcessing` if the request is in `AwaitingDownload` state, otherwise + /// ignore. + pub fn insert_verified_response(&mut self, result: DownloadResult) { + if let State::AwaitingDownload = &self.state { + self.state = State::AwaitingProcess(result); } } /// Switch to `Downloading` if the request is in `AwaitingDownload` state, otherwise returns None. - pub fn maybe_start_download(&mut self) -> Result, LookupRequestError> { - Ok(match &self.state { + pub fn on_download_start(&mut self) -> Result<(), LookupRequestError> { + match &self.state { State::AwaitingDownload => { - let peer_id = self - .use_rand_available_peer() - .ok_or(LookupRequestError::NoPeers)?; - self.state = State::Downloading { peer_id }; - Some(peer_id) + self.state = State::Downloading; + Ok(()) } - _ => None, - }) + other => Err(LookupRequestError::BadState(format!( + "request bad state, expected AwaitingDownload got {other}" + ))), + } } /// Registers a failure in downloading a block. This might be a peer disconnection or a wrong /// block. pub fn on_download_failure(&mut self) -> Result<(), LookupRequestError> { match &self.state { - State::Downloading { .. } => { + State::Downloading => { self.failed_downloading = self.failed_downloading.saturating_add(1); self.state = State::AwaitingDownload; Ok(()) @@ -315,8 +319,8 @@ impl SingleLookupRequestState { result: DownloadResult, ) -> Result<(), LookupRequestError> { match &self.state { - State::Downloading { peer_id } => { - self.state = State::AwaitingProcess(*peer_id, result); + State::Downloading => { + self.state = State::AwaitingProcess(result); Ok(()) } other => Err(LookupRequestError::BadState(format!( @@ -329,9 +333,9 @@ impl SingleLookupRequestState { pub fn maybe_start_processing(&mut self) -> Option> { // For 2 lines replace state with placeholder to gain ownership of `result` match &self.state { - State::AwaitingProcess(peer_id, result) => { + State::AwaitingProcess(result) => { let result = result.clone(); - self.state = State::Processing(*peer_id, result.clone()); + self.state = State::Processing(result.clone()); Some(result) } _ => None, @@ -340,10 +344,10 @@ impl SingleLookupRequestState { /// Revert into `AwaitingProcessing`, if the payload if not invalid and can be submitted for /// processing latter. - pub fn into_awaiting_processing(&mut self) -> Result<(), LookupRequestError> { + pub fn revert_to_awaiting_processing(&mut self) -> Result<(), LookupRequestError> { match &self.state { - State::Processing(peer_id, result) => { - self.state = State::AwaitingProcess(*peer_id, result.clone()); + State::Processing(result) => { + self.state = State::AwaitingProcess(result.clone()); Ok(()) } other => Err(LookupRequestError::BadState(format!( @@ -355,8 +359,8 @@ impl SingleLookupRequestState { /// Registers a failure in processing a block. pub fn on_processing_failure(&mut self) -> Result { match &self.state { - State::Processing(peer_id, _) => { - let peer_id = *peer_id; + State::Processing(result) => { + let peer_id = result.3; self.failed_processing = self.failed_processing.saturating_add(1); self.state = State::AwaitingDownload; Ok(peer_id) @@ -369,8 +373,8 @@ impl SingleLookupRequestState { pub fn on_processing_success(&mut self) -> Result<(), LookupRequestError> { match &self.state { - State::Processing(peer_id, _) => { - self.state = State::Processed { peer_id: *peer_id }; + State::Processing(result) => { + self.state = State::Processed { peer_id: result.3 }; Ok(()) } other => Err(LookupRequestError::BadState(format!( @@ -394,25 +398,8 @@ impl SingleLookupRequestState { } /// If a peer disconnects, this request could be failed. If so, an error is returned - pub fn check_peer_disconnected(&mut self, dc_peer_id: &PeerId) -> Result<(), ()> { - self.available_peers.remove(dc_peer_id); - if let State::Downloading { peer_id } = &self.state { - if peer_id == dc_peer_id { - // Peer disconnected before providing a block - self.on_download_failure(); - return Err(()); - } - } - Ok(()) - } - - /// Returns the id peer we downloaded from if we have downloaded a verified block, otherwise - /// returns an error. - pub fn processing_peer(&self) -> Result { - match &self.state { - State::Processing(peer_id, _) | State::Processed { peer_id } => Ok(*peer_id), - other => Err(format!("not in processing state: {}", other).to_string()), - } + pub fn remove_peer(&mut self, disconnected_peer_id: &PeerId) { + self.available_peers.remove(disconnected_peer_id); } pub fn get_used_peers(&self) -> impl Iterator { @@ -435,59 +422,6 @@ impl SingleLookupRequestState { } } -impl slog::Value for SingleBlockLookup { - fn serialize( - &self, - _record: &slog::Record, - key: slog::Key, - serializer: &mut dyn slog::Serializer, - ) -> slog::Result { - serializer.emit_str("request", key)?; - serializer.emit_arguments("hash", &format_args!("{}", self.block_root()))?; - serializer.emit_arguments( - "blob_ids", - &format_args!("{:?}", self.blob_request_state.requested_ids.indices()), - )?; - serializer.emit_arguments( - "block_request_state.state", - &format_args!("{:?}", self.block_request_state.state), - )?; - serializer.emit_arguments( - "blob_request_state.state", - &format_args!("{:?}", self.blob_request_state.state), - )?; - slog::Result::Ok(()) - } -} - -impl slog::Value for SingleLookupRequestState { - fn serialize( - &self, - record: &slog::Record, - key: slog::Key, - serializer: &mut dyn slog::Serializer, - ) -> slog::Result { - serializer.emit_str("request_state", key)?; - match &self.state { - State::AwaitingDownload => { - "awaiting_download".serialize(record, "state", serializer)? - } - State::Downloading { peer_id } => { - serializer.emit_arguments("downloading_peer", &format_args!("{}", peer_id))? - } - State::AwaitingProcess(peer_id, _) => serializer - .emit_arguments("awaiting_processing_peer", &format_args!("{}", peer_id))?, - State::Processing(peer_id, _) => { - serializer.emit_arguments("processing_peer", &format_args!("{}", peer_id))? - } - State::Processed { .. } => "processed".serialize(record, "state", serializer)?, - } - serializer.emit_u8("failed_downloads", self.failed_downloading)?; - serializer.emit_u8("failed_processing", self.failed_processing)?; - slog::Result::Ok(()) - } -} - impl std::fmt::Display for State { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index 6a09213b0b5..57a6e7412e3 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -10,9 +10,7 @@ use std::sync::Arc; use super::*; -use crate::sync::block_lookups::common::{ - ResponseType, PARENT_DEPTH_TOLERANCE, PARENT_FAIL_TOLERANCE, -}; +use crate::sync::block_lookups::common::{ResponseType, PARENT_DEPTH_TOLERANCE}; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::builder::Witness; use beacon_chain::eth1_chain::CachingEth1Backend; @@ -29,7 +27,7 @@ use store::MemoryStore; use tokio::sync::mpsc; use types::{ test_utils::{SeedableRng, XorShiftRng}, - BlobSidecar, ForkName, MinimalEthSpec as E, SignedBeaconBlock, + BlobSidecar, ForkName, MinimalEthSpec as E, SignedBeaconBlock, Slot, }; type T = Witness, E, MemoryStore, MemoryStore>; @@ -77,6 +75,7 @@ struct TestRig { } const D: Duration = Duration::new(0, 0); +const PARENT_FAIL_TOLERANCE: u8 = SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS; impl TestRig { fn test_setup() -> Self { diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 0b30f2f3819..fed5cbe090a 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -47,7 +47,7 @@ use beacon_chain::block_verification_types::AsBlock; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::validator_monitor::timestamp_now; use beacon_chain::{ - AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlobError, BlockError, EngineState, + AvailabilityProcessingStatus, BeaconChain, BeaconChainTypes, BlockError, EngineState, }; use futures::StreamExt; use lighthouse_network::rpc::RPCError; @@ -249,10 +249,7 @@ impl SyncManager { ), range_sync: RangeSync::new(beacon_chain.clone(), log.clone()), backfill_sync: BackFillSync::new(beacon_chain.clone(), network_globals, log.clone()), - block_lookups: BlockLookups::new( - beacon_chain.data_availability_checker.clone(), - log.clone(), - ), + block_lookups: BlockLookups::new(log.clone()), log: log.clone(), } } @@ -348,8 +345,7 @@ impl SyncManager { fn peer_disconnect(&mut self, peer_id: &PeerId) { self.range_sync.peer_disconnect(&mut self.network, peer_id); - self.block_lookups - .peer_disconnected(peer_id, &mut self.network); + self.block_lookups.peer_disconnected(peer_id); // Regardless of the outcome, we update the sync status. let _ = self .backfill_sync @@ -573,7 +569,12 @@ impl SyncManager { block_root, parent_root, block_slot, - BlockComponent::Block((block.block_cloned(), block_root, timestamp_now())), + BlockComponent::Block(( + block.block_cloned(), + block_root, + timestamp_now(), + peer_id, + )), ); } SyncMessage::UnknownParentBlob(peer_id, blob) => { @@ -586,7 +587,7 @@ impl SyncManager { block_root, parent_root, blob_slot, - BlockComponent::Blob((blob, block_root, timestamp_now())), + BlockComponent::Blob((blob, block_root, timestamp_now(), peer_id)), ); } SyncMessage::UnknownBlockHashFromAttestation(peer_id, block_root) => { @@ -643,7 +644,7 @@ impl SyncManager { block_root: Hash256, parent_root: Hash256, slot: Slot, - unknown_parent_trigger: BlockComponent, + block_component: BlockComponent, ) { match self.should_search_for_block(Some(slot), &peer_id) { Ok(_) => { @@ -653,10 +654,10 @@ impl SyncManager { &[peer_id], &mut self.network, ); - self.block_lookups.search_block( + self.block_lookups.search_child_of_parent( block_root, - Some((peer_id, unknown_parent_trigger)), - &[peer_id], + block_component, + peer_id, &mut self.network, ); } @@ -670,7 +671,7 @@ impl SyncManager { match self.should_search_for_block(None, &peer_id) { Ok(_) => { self.block_lookups - .search_block(block_root, None, &[peer_id], &mut self.network); + .search_unknown_block(block_root, &[peer_id], &mut self.network); } Err(reason) => { debug!(self.log, "Ignoring unknown block request"; "block_root" => %block_root, "reason" => reason); diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index 2f6724aac4e..80481d10ca6 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -301,7 +301,7 @@ impl SyncNetworkContext { lookup_id: SingleLookupId, peer_id: PeerId, request: BlocksByRootSingleRequest, - ) -> Result<(), &'static str> { + ) -> Result { let id = SingleLookupReqId { lookup_id, req_id: self.next_id(), @@ -325,15 +325,56 @@ impl SyncNetworkContext { self.blocks_by_root_requests .insert(id, ActiveBlocksByRootRequest::new(request)); - Ok(()) + Ok(true) } + /// Request necessary blobs for `block_root`. Requests only the necessary blobs by checking: + /// - If we have a downloaded but not yet processed block + /// - If the da_checker has a pending block + /// - If the da_checker has pending blobs from gossip + /// + /// Returns false if no request was made, because we don't need to fetch (more) blobs. pub fn blob_lookup_request( &mut self, lookup_id: SingleLookupId, peer_id: PeerId, - request: BlobsByRootSingleBlockRequest, - ) -> Result<(), &'static str> { + block_root: Hash256, + downloaded_block_expected_blobs: Option, + ) -> Result { + let expected_blobs = downloaded_block_expected_blobs + .or_else(|| { + self.chain + .data_availability_checker + .num_expected_blobs(&block_root) + }) + .unwrap_or_else(|| { + // If we don't about the block being requested, attempt to fetch all blobs + if self + .chain + .data_availability_checker + .da_check_required_for_current_epoch() + { + T::EthSpec::max_blobs_per_block() + } else { + 0 + } + }); + + let imported_blob_indexes = self + .chain + .data_availability_checker + .imported_blob_indexes(&block_root) + .unwrap_or_default(); + // Include only the blob indexes not yet imported (received through gossip) + let indices = (0..expected_blobs as u64) + .filter(|index| !imported_blob_indexes.contains(index)) + .collect::>(); + + if indices.is_empty() { + // No blobs required, do not issue any request + return Ok(false); + } + let id = SingleLookupReqId { lookup_id, req_id: self.next_id(), @@ -343,12 +384,17 @@ impl SyncNetworkContext { self.log, "Sending BlobsByRoot Request"; "method" => "BlobsByRoot", - "block_root" => ?request.block_root, - "blob_indices" => ?request.indices, + "block_root" => ?block_root, + "blob_indices" => ?indices, "peer" => %peer_id, "id" => ?id ); + let request = BlobsByRootSingleBlockRequest { + block_root, + indices, + }; + self.send_network_msg(NetworkMessage::SendRequest { peer_id, request: Request::BlobsByRoot(request.clone().into_request(&self.chain.spec)), @@ -358,7 +404,7 @@ impl SyncNetworkContext { self.blobs_by_root_requests .insert(id, ActiveBlobsByRootRequest::new(request)); - Ok(()) + Ok(true) } pub fn is_execution_engine_online(&self) -> bool { From bad87b1357e6b0436ac8765ed5049e81fc751a59 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:32:23 +0900 Subject: [PATCH 06/14] Merge current and parent lookups tests --- beacon_node/beacon_chain/src/beacon_chain.rs | 3 + .../network/src/sync/block_lookups/common.rs | 8 +- .../network/src/sync/block_lookups/mod.rs | 252 ++++++------ .../src/sync/block_lookups/parent_chain.rs | 197 +++++++++ .../sync/block_lookups/single_block_lookup.rs | 43 +- .../network/src/sync/block_lookups/tests.rs | 385 +++++++++++++----- beacon_node/network/src/sync/manager.rs | 38 +- .../network/src/sync/network_context.rs | 2 +- common/lru_cache/src/time.rs | 6 + .../src/test_utils/test_random/secret_key.rs | 2 + .../src/test_utils/test_random/signature.rs | 11 +- 11 files changed, 682 insertions(+), 265 deletions(-) create mode 100644 beacon_node/network/src/sync/block_lookups/parent_chain.rs diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index b3790024f81..069ed18cf90 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2918,6 +2918,9 @@ impl BeaconChain { .fork_choice_read_lock() .contains_block(&block_root) { + // TODO: Should also check for: + // - Parent block is known + // - Slot is not in the future return Err(BlockError::BlockIsAlreadyKnown(block_root)); } diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index 1b56bbd0c5f..d7d3c9a061f 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -49,6 +49,7 @@ pub trait RequestState { id: Id, awaiting_parent: bool, downloaded_block_expected_blobs: Option, + block_is_processed: bool, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { // Attempt to progress awaiting downloads @@ -75,7 +76,12 @@ pub trait RequestState { // Otherwise, attempt to progress awaiting processing // If this request is awaiting a parent lookup to be processed, do not send for processing. // The request will be rejected with unknown parent error. - } else if !awaiting_parent { + } else if !awaiting_parent && + // TODO: Blob processing / import does not check for unknown parent. As a temporary fix + // and to emulate the behaviour before this PR, hold blobs for processing until the + // block has been processed i.e. it has a known parent. + (block_is_processed || matches!(Self::response_type(), ResponseType::Block)) + { // maybe_start_processing returns Some if state == AwaitingProcess. This pattern is // useful to conditionally access the result data. if let Some(result) = self.get_state_mut().maybe_start_processing() { diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 094bebf0c4f..99724837034 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -1,8 +1,10 @@ +use self::parent_chain::{compute_parent_chains, NodeChain}; use self::single_block_lookup::{DownloadResult, LookupRequestError, SingleBlockLookup}; use super::manager::{BlockProcessType, BlockProcessingResult}; use super::network_context::{RpcProcessingResult, SyncNetworkContext}; use crate::metrics; -use crate::sync::block_lookups::common::PARENT_DEPTH_TOLERANCE; +use crate::sync::block_lookups::common::{ResponseType, PARENT_DEPTH_TOLERANCE}; +use crate::sync::block_lookups::parent_chain::find_oldest_fork_ancestor; use crate::sync::manager::Id; use crate::sync::network_context::LookupFailure; use beacon_chain::block_verification_types::AsBlock; @@ -14,19 +16,19 @@ use lighthouse_network::{PeerAction, PeerId}; use lru_cache::LRUTimeCache; pub use single_block_lookup::{BlobRequestState, BlockRequestState}; use slog::{debug, error, trace, warn, Logger}; -use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use store::Hash256; use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; pub mod common; +pub mod parent_chain; mod single_block_lookup; #[cfg(test)] mod tests; const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; -pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 3; +pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 4; pub enum BlockComponent { Block(DownloadResult>>), @@ -55,6 +57,7 @@ pub struct BlockLookups { /// A cache of failed chain lookups to prevent duplicate searches. failed_chains: LRUTimeCache, + // TODO: Why not index lookups by block_root? single_block_lookups: FnvHashMap>, /// The logger for the import manager. @@ -72,6 +75,16 @@ impl BlockLookups { } } + #[cfg(test)] + pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { + self.failed_chains.insert(block_root); + } + + #[cfg(test)] + pub(crate) fn get_failed_chains(&mut self) -> Vec { + self.failed_chains.keys().cloned().collect() + } + #[cfg(test)] pub(crate) fn active_single_lookups(&self) -> Vec<(Id, Hash256, Option)> { self.single_block_lookups @@ -81,122 +94,104 @@ impl BlockLookups { } /// Returns a vec of all parent lookup chains by tip, in descending slot order (tip first) - pub(crate) fn active_parent_lookups(&self) -> Vec> { - let mut child_to_parent = HashMap::new(); - let mut parent_to_child = HashMap::>::new(); - for lookup in self.single_block_lookups.values() { - let block_root = lookup.block_root(); - let parent_root = lookup.awaiting_parent(); - child_to_parent.insert(block_root, parent_root); - if let Some(parent_root) = parent_root { - parent_to_child - .entry(parent_root) - .or_default() - .push(block_root); - } - } - - let mut parent_chains = vec![]; - - // Iterate blocks which no child - for lookup in self.single_block_lookups.values() { - let mut block_root = lookup.block_root(); - if parent_to_child.get(&block_root).is_none() { - let mut chain = vec![]; - - // Resolve chain of blocks - loop { - if let Some(parent_root) = child_to_parent.get(&block_root) { - // block_root is a known block that may or may not have a parent root - chain.push(block_root); - if let Some(parent_root) = parent_root { - block_root = *parent_root; - continue; - } - } - break; - } - - if chain.len() > 1 { - parent_chains.push(chain); - } - } - } - - parent_chains - } - - #[cfg(test)] - pub(crate) fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { - self.failed_chains.contains(chain_hash) + pub(crate) fn active_parent_lookups(&self) -> Vec { + compute_parent_chains( + &self + .single_block_lookups + .values() + .map(|lookup| lookup.into()) + .collect::>(), + ) } /* Lookup requests */ /// Creates a lookup for the block with the given `block_root` and immediately triggers it. - pub fn search_child_of_parent( + /// Returns true if the lookup is created or already exists + pub fn search_child_and_parent( &mut self, block_root: Hash256, block_component: BlockComponent, - peer_source: PeerId, + peer_id: PeerId, cx: &mut SyncNetworkContext, ) { - let awaiting_parent = block_component.parent_root(); - self.new_current_lookup( - block_root, - Some(block_component), - Some(awaiting_parent), - &[peer_source], - cx, - ) + let parent_root = block_component.parent_root(); + + let parent_lookup_exists = + self.search_parent_of_child(parent_root, block_root, &[peer_id], cx); + // Only create the child lookup if the parent exists + if parent_lookup_exists { + // `search_parent_of_child` ensures that parent root is not a failed chain + self.new_current_lookup( + block_root, + Some(block_component), + Some(parent_root), + &[peer_id], + cx, + ); + } } /// Seach a block that we don't known its parent root. + /// Returns true if the lookup is created or already exists pub fn search_unknown_block( &mut self, block_root: Hash256, peer_source: &[PeerId], cx: &mut SyncNetworkContext, ) { - self.new_current_lookup(block_root, None, None, peer_source, cx) + self.new_current_lookup(block_root, None, None, peer_source, cx); } /// A block or blob triggers the search of a parent. /// Check if this new lookup extends a bad chain: /// - Extending `child_block_root_trigger` would exceed the max depth /// - `block_root_to_search` is a failed chain + /// Returns true if the lookup is created or already exists pub fn search_parent_of_child( &mut self, block_root_to_search: Hash256, child_block_root_trigger: Hash256, peers: &[PeerId], cx: &mut SyncNetworkContext, - ) { - for parent_chain in self.active_parent_lookups() { - if parent_chain.last() == Some(&child_block_root_trigger) + ) -> bool { + let parent_chains = self.active_parent_lookups(); + + for (chain_idx, parent_chain) in parent_chains.iter().enumerate() { + if parent_chain.ancestor() == child_block_root_trigger && parent_chain.len() >= PARENT_DEPTH_TOLERANCE { debug!(self.log, "Parent lookup chain too long"; "block_root" => ?block_root_to_search); // Searching for this parent would extend a parent chain over the max // Insert the tip only to failed chains - let chain_hash = parent_chain.first().expect("has at least one element"); - self.failed_chains.insert(*chain_hash); - - // Drop all lookups descending from the child of the too long parent chain - if let Some((lookup_id, lookup)) = self - .single_block_lookups - .iter() - .find(|(_, l)| l.block_root() == child_block_root_trigger) - { - for &peer_id in lookup.all_used_peers() { - cx.report_peer(peer_id, PeerAction::LowToleranceError, "chain_too_long"); + self.failed_chains.insert(parent_chain.tip); + + // Note: Drop only the chain that's too long until it merges with another chain + // that's not too long. Consider this attack: there's a chain of valid unknown + // blocks A -> B. A malicious peer builds `PARENT_DEPTH_TOLERANCE` garbage + // blocks on top of A forming A -> C. The malicious peer forces us to fetch C + // from it, which will result in parent A hitting the chain_too_long error. Then + // the valid chain A -> B is dropped too. + if let Ok(block_to_drop) = find_oldest_fork_ancestor(parent_chains, chain_idx) { + // Drop all lookups descending from the child of the too long parent chain + if let Some((lookup_id, lookup)) = self + .single_block_lookups + .iter() + .find(|(_, l)| l.block_root() == block_to_drop) + { + for &peer_id in lookup.all_used_peers() { + cx.report_peer( + peer_id, + PeerAction::LowToleranceError, + "chain_too_long", + ); + } + self.drop_lookup_and_childs(*lookup_id); } - - self.drop_lookup_and_childs(*lookup_id); } - return; + return false; } } @@ -206,6 +201,7 @@ impl BlockLookups { /// Searches for a single block hash. If the blocks parent is unknown, a chain of blocks is /// constructed. + /// Returns true if the lookup is created or already exists fn new_current_lookup( &mut self, block_root: Hash256, @@ -213,19 +209,14 @@ impl BlockLookups { awaiting_parent: Option, peers: &[PeerId], cx: &mut SyncNetworkContext, - ) { + ) -> bool { // If this block or it's parent is part of a known failed chain, ignore it. if self.failed_chains.contains(&block_root) { debug!(self.log, "Block is from a past failed chain. Dropping"; "block_root" => ?block_root); - return; - } - - // TODO: Is checking if parent root is a failed chain necessary? - if let Some(parent_root) = block_component.as_ref().map(|b| b.parent_root()) { - if self.failed_chains.contains(&parent_root) { - debug!(self.log, "Parent of block is from a past failed chain. Dropping"; "parent_root" => ?parent_root, "block_root" => ?block_root); - return; + for peer_id in peers { + cx.report_peer(*peer_id, PeerAction::MidToleranceError, "failed_chain"); } + return false; } // Do not re-request a block that is already being requested @@ -239,7 +230,18 @@ impl BlockLookups { if let Some(block_component) = block_component { lookup.add_child_components(block_component); } - return; + return true; + } + + // Ensure that awaiting parent exists, otherwise this lookup won't be able to make progress + if let Some(awaiting_parent) = awaiting_parent { + if !self + .single_block_lookups + .iter() + .any(|(_, lookup)| lookup.is_for_block(awaiting_parent)) + { + return false; + } } let msg = if block_component.is_some() { @@ -263,22 +265,15 @@ impl BlockLookups { lookup.add_child_components(block_component); } - let block_root = lookup.block_root(); match lookup.continue_requests(cx) { Ok(()) => { self.single_block_lookups.insert(lookup.id, lookup); self.update_metrics(); - - metrics::set_gauge( - &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, - self.single_block_lookups.len() as i64, - ); + true } Err(e) => { - debug!(self.log, "Single block lookup failed"; - "error" => ?e, - "block_root" => ?block_root, - ); + debug!(self.log, "Single block lookup failed"; "block_root" => ?block_root, "error" => ?e); + false } } } @@ -329,7 +324,7 @@ impl BlockLookups { match response { Ok((response, seen_timestamp)) => { debug!(self.log, - "Block lookup download success"; + "Received lookup download success"; "block_root" => %block_root, "peer_id" => %peer_id, "response_type" => ?response_type, @@ -349,7 +344,7 @@ impl BlockLookups { } Err(e) => { debug!(self.log, - "Block lookup download failure"; + "Received lookup download failure"; "block_root" => %block_root, "peer_id" => %peer_id, "response_type" => ?response_type, @@ -398,7 +393,7 @@ impl BlockLookups { let id = match process_type { BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => id, }; - debug!(self.log, "Dropping lookup on request error"; "id" => id, "error" => ?e); + debug!(self.log, "Dropping lookup on request error"; "component" => process_type.component(), "id" => process_type.id(), "error" => ?e); self.drop_lookup_and_childs(id); self.update_metrics(); } @@ -420,18 +415,16 @@ impl BlockLookups { debug!( self.log, - "Block component processed for lookup"; - "response_type" => ?R::response_type(), + "Received lookup processing result"; + "component" => ?R::response_type(), "block_root" => ?block_root, "result" => ?result, - "id" => lookup_id, ); let action = match result { BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_)) | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown(_)) => { // Successfully imported - trace!(self.log, "Single block processing succeeded"; "block" => %block_root); request_state.on_processing_success()?; Action::Continue } @@ -448,13 +441,15 @@ impl BlockLookups { // wrong. If we already had both a block and blobs response processed, we should penalize the // blobs peer because they did not provide all blobs on the initial request. if lookup.both_components_processed() { - if let Ok(blob_peer) = lookup.blob_request_state.state.on_processing_failure() { - cx.report_peer( - blob_peer, - PeerAction::MidToleranceError, - "sent_incomplete_blobs", - ); - } + let blob_peer = lookup + .blob_request_state + .state + .on_post_process_validation_failure()?; + cx.report_peer( + blob_peer, + PeerAction::MidToleranceError, + "sent_incomplete_blobs", + ); } Action::Retry } @@ -463,17 +458,16 @@ impl BlockLookups { // This implies that the cpu is overloaded. Drop the request. warn!( self.log, - "Single block processing was ignored, cpu might be overloaded"; - "action" => "dropping single block request" + "Lookup component processing ignored, cpu might be overloaded"; + "component" => ?R::response_type(), ); Action::Drop } BlockProcessingResult::Err(e) => { - trace!(self.log, "Single block processing failed"; "block_root" => %block_root, "error" => %e); match e { BlockError::BeaconChainError(e) => { // Internal error - error!(self.log, "Beacon chain error processing single block"; "block_root" => %block_root, "error" => ?e); + error!(self.log, "Beacon chain error processing lookup component"; "block_root" => %block_root, "error" => ?e); Action::Drop } BlockError::ParentUnknown(block) => { @@ -513,7 +507,7 @@ impl BlockLookups { Action::Drop } other => { - warn!(self.log, "Invalid block in single block lookup"; "block_root" => %block_root, "error" => ?other); + debug!(self.log, "Invalid lookup component"; "block_root" => %block_root, "component" => ?R::response_type(), "error" => ?other); let peer_id = request_state.on_processing_failure()?; // TODO: Why is the original code downscoring the block peer regardless of // type of request? Sending a blob for verification can result in an error @@ -521,7 +515,10 @@ impl BlockLookups { cx.report_peer( peer_id, PeerAction::MidToleranceError, - "single_block_failure", + match R::response_type() { + ResponseType::Block => "lookup_block_processing_failure", + ResponseType::Blob => "lookup_blobs_processing_failure", + }, ); Action::Retry @@ -533,15 +530,13 @@ impl BlockLookups { match action { Action::Retry => { // Trigger download for all components in case `MissingComponents` failed the blob - // request. - // TODO: `continue_requests` must know when to not move a request forward: - // - If parent unknown do not request data? - // - If parent unknown error hold off for processing + // request. Also if blobs are `AwaitingProcessing` and need to be progressed lookup.continue_requests(cx)?; } Action::ParentUnknown { parent_root } => { let peers = lookup.all_available_peers().cloned().collect::>(); lookup.set_awaiting_parent(parent_root); + debug!(self.log, "Marking lookup as awaiting parent"; "lookup" => %block_root, "parent_root" => %parent_root); self.search_parent_of_child(parent_root, block_root, &peers, cx); } Action::Drop => { @@ -553,6 +548,7 @@ impl BlockLookups { // Drop this completed lookup only self.single_block_lookups.remove(&lookup_id); self.update_metrics(); + debug!(self.log, "Dropping completed lookup"; "block" => %block_root); // Block imported, continue the requests of pending child blocks self.continue_child_lookups(block_root, cx); } @@ -560,13 +556,14 @@ impl BlockLookups { Ok(()) } + /// Makes progress on the immediate children of `block_root` pub fn continue_child_lookups(&mut self, block_root: Hash256, cx: &mut SyncNetworkContext) { let mut failed_lookups = vec![]; // < need to clean failed lookups latter to re-borrow &mut self for (id, lookup) in self.single_block_lookups.iter_mut() { - if lookup.resolve_awaiting_parent() == Some(block_root) { - // Continue lookup - debug!(self.log, "Continuing child lookup"; "parent_root" => %block_root, "block_root" => %lookup.block_root()); + if lookup.awaiting_parent() == Some(block_root) { + lookup.resolve_awaiting_parent(); + debug!(self.log, "Continuing child lookup"; "parent_root" => %block_root, "block_root" => %lookup.block_root()); if let Err(e) = lookup.continue_requests(cx) { debug!(self.log, "Error continuing lookup"; "id" => id, "error" => ?e); failed_lookups.push(*id); @@ -579,6 +576,9 @@ impl BlockLookups { } } + /// Drops `dropped_id` lookup and all its children recursively. Lookups awaiting a parent need + /// the parent to make progress to resolve, therefore we must drop them is the parent is + /// dropped. pub fn drop_lookup_and_childs(&mut self, dropped_id: SingleLookupId) { if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) { debug!(self.log, "Dropping child lookup"; "id" => ?dropped_id, "block_root" => %dropped_lookup.block_root()); diff --git a/beacon_node/network/src/sync/block_lookups/parent_chain.rs b/beacon_node/network/src/sync/block_lookups/parent_chain.rs new file mode 100644 index 00000000000..0571e6d0dbb --- /dev/null +++ b/beacon_node/network/src/sync/block_lookups/parent_chain.rs @@ -0,0 +1,197 @@ +use std::collections::{HashMap, HashSet}; + +use beacon_chain::BeaconChainTypes; +use types::Hash256; + +use super::single_block_lookup::SingleBlockLookup; + +pub(crate) struct Node { + block_root: Hash256, + parent_root: Option, +} + +impl From<&SingleBlockLookup> for Node { + fn from(value: &SingleBlockLookup) -> Self { + Self { + block_root: value.block_root(), + parent_root: value.awaiting_parent(), + } + } +} + +pub(crate) struct NodeChain { + // Parent chain blocks in descending slot order + pub(crate) chain: Vec, + pub(crate) tip: Hash256, +} + +impl NodeChain { + pub(crate) fn ancestor(&self) -> Hash256 { + self.chain.last().copied().unwrap_or(self.tip) + } + pub(crate) fn len(&self) -> usize { + self.chain.len() + } +} + +/// Given a set of nodes that reference each other, returns a list of chains with unique tips that +/// contain at least two elements. In descending slot order (tip first). +pub(crate) fn compute_parent_chains(nodes: &[Node]) -> Vec { + let mut child_to_parent = HashMap::new(); + let mut parent_to_child = HashMap::>::new(); + for node in nodes { + child_to_parent.insert(node.block_root, node.parent_root); + if let Some(parent_root) = node.parent_root { + parent_to_child + .entry(parent_root) + .or_default() + .push(node.block_root); + } + } + + let mut parent_chains = vec![]; + + // Iterate blocks which no child + for tip in nodes { + let mut block_root = tip.block_root; + if parent_to_child.get(&block_root).is_none() { + let mut chain = vec![]; + + // Resolve chain of blocks + loop { + if let Some(parent_root) = child_to_parent.get(&block_root) { + // block_root is a known block that may or may not have a parent root + chain.push(block_root); + if let Some(parent_root) = parent_root { + block_root = *parent_root; + continue; + } + } + break; + } + + if chain.len() > 1 { + parent_chains.push(NodeChain { + chain, + tip: tip.block_root, + }); + } + } + } + + parent_chains +} + +/// Given a list of node chains, find the oldest node of a specific chain that is not contained in +/// any other chain. +pub(crate) fn find_oldest_fork_ancestor( + parent_chains: Vec, + chain_idx: usize, +) -> Result { + let mut other_blocks = HashSet::new(); + + // Register blocks from other chains + for (i, parent_chain) in parent_chains.iter().enumerate() { + if i != chain_idx { + for block in &parent_chain.chain { + other_blocks.insert(block); + } + } + } + + // Should never happen + let parent_chain = parent_chains.get(chain_idx).ok_or("chain_idx off bounds")?; + // Find the first block in the target parent chain that is not in other parent chains + // Iterate in ascending slot order + for block in parent_chain.chain.iter().rev() { + if !other_blocks.contains(block) { + return Ok(*block); + } + } + + // If no match means that the chain is fully contained within another chain. This should never + // happen, but if that was the case just return the tip + Ok(parent_chain.tip) +} + +#[cfg(test)] +mod tests { + use super::{compute_parent_chains, find_oldest_fork_ancestor, Node}; + use types::Hash256; + + fn h(n: u64) -> Hash256 { + Hash256::from_low_u64_be(n) + } + + fn n(block: u64) -> Node { + Node { + block_root: h(block), + parent_root: None, + } + } + + fn np(parent: u64, block: u64) -> Node { + Node { + block_root: h(block), + parent_root: Some(h(parent)), + } + } + + fn compute_parent_chains_test(nodes: &[Node], expected_chain: Vec>) { + assert_eq!( + compute_parent_chains(nodes) + .iter() + .map(|c| c.chain.clone()) + .collect::>(), + expected_chain + ); + } + + fn find_oldest_fork_ancestor_test(nodes: &[Node], expected: Hash256) { + let chains = compute_parent_chains(nodes); + println!( + "chains {:?}", + chains.iter().map(|c| &c.chain).collect::>() + ); + assert_eq!(find_oldest_fork_ancestor(chains, 0).unwrap(), expected); + } + + #[test] + fn compute_parent_chains_empty_case() { + compute_parent_chains_test(&[], vec![]); + } + + #[test] + fn compute_parent_chains_single_branch() { + compute_parent_chains_test(&[n(0), np(0, 1), np(1, 2)], vec![vec![h(2), h(1), h(0)]]); + } + + #[test] + fn compute_parent_chains_single_branch_with_solo() { + compute_parent_chains_test( + &[n(0), np(0, 1), np(1, 2), np(3, 4)], + vec![vec![h(2), h(1), h(0)]], + ); + } + + #[test] + fn compute_parent_chains_two_forking_branches() { + compute_parent_chains_test( + &[n(0), np(0, 1), np(1, 2), np(1, 3)], + vec![vec![h(2), h(1), h(0)], vec![h(3), h(1), h(0)]], + ); + } + + #[test] + fn compute_parent_chains_two_independent_branches() { + compute_parent_chains_test( + &[n(0), np(0, 1), np(1, 2), n(3), np(3, 4)], + vec![vec![h(2), h(1), h(0)], vec![h(4), h(3)]], + ); + } + + #[test] + fn find_oldest_fork_ancestor_simple_case() { + find_oldest_fork_ancestor_test(&[n(0), np(0, 1), np(1, 2), np(0, 3)], h(1)) + } +} diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 8e51b508eb5..9349e24c69c 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -67,8 +67,8 @@ impl SingleBlockLookup { /// Mark this lookup as no longer awaiting a parent lookup. Components can be sent for /// processing. - pub fn resolve_awaiting_parent(&mut self) -> Option { - self.awaiting_parent.take() + pub fn resolve_awaiting_parent(&mut self) { + self.awaiting_parent = None; } pub fn add_child_components(&mut self, block_component: BlockComponent) { @@ -106,7 +106,7 @@ impl SingleBlockLookup { self.block_request_state .state .get_available_peers() - .chain(self.blob_request_state.state.get_used_peers()) + .chain(self.blob_request_state.state.get_available_peers()) .unique() } @@ -131,10 +131,12 @@ impl SingleBlockLookup { .state .peek_downloaded_data() .map(|block| block.num_expected_blobs()); + let block_is_processed = self.block_request_state.state.is_processed(); R::request_state_mut(self).continue_request( id, awaiting_parent, downloaded_block_expected_blobs, + block_is_processed, cx, ) } @@ -214,7 +216,7 @@ pub enum State { Downloading, AwaitingProcess(DownloadResult), Processing(DownloadResult), - Processed { peer_id: PeerId }, + Processed(PeerId), } /// Object representing the state of a single block or blob lookup request. @@ -294,7 +296,7 @@ impl SingleLookupRequestState { Ok(()) } other => Err(LookupRequestError::BadState(format!( - "request bad state, expected AwaitingDownload got {other}" + "Bad state on_download_start expected AwaitingDownload got {other}" ))), } } @@ -309,7 +311,7 @@ impl SingleLookupRequestState { Ok(()) } other => Err(LookupRequestError::BadState(format!( - "request bad state, expected Downloading got {other}" + "Bad state on_download_failure expected Downloading got {other}" ))), } } @@ -324,7 +326,7 @@ impl SingleLookupRequestState { Ok(()) } other => Err(LookupRequestError::BadState(format!( - "request bad state, expected Downloading got {other}" + "Bad state on_download_success expected Downloading got {other}" ))), } } @@ -351,7 +353,7 @@ impl SingleLookupRequestState { Ok(()) } other => Err(LookupRequestError::BadState(format!( - "request bad state, expected Processing got {other}" + "Bad state on revert_to_awaiting_processing expected Processing got {other}" ))), } } @@ -366,19 +368,34 @@ impl SingleLookupRequestState { Ok(peer_id) } other => Err(LookupRequestError::BadState(format!( - "request bad state, expected Processing got {other}" + "Bad state on_processing_failure expected Processing got {other}" ))), } } - pub fn on_processing_success(&mut self) -> Result<(), LookupRequestError> { + pub fn on_processing_success(&mut self) -> Result { match &self.state { State::Processing(result) => { - self.state = State::Processed { peer_id: result.3 }; - Ok(()) + let peer_id = result.3; + self.state = State::Processed(peer_id); + Ok(peer_id) + } + other => Err(LookupRequestError::BadState(format!( + "Bad state on_processing_success expected Processing got {other}" + ))), + } + } + + pub fn on_post_process_validation_failure(&mut self) -> Result { + match &self.state { + State::Processed(peer_id) => { + let peer_id = *peer_id; + self.failed_processing = self.failed_processing.saturating_add(1); + self.state = State::AwaitingDownload; + Ok(peer_id) } other => Err(LookupRequestError::BadState(format!( - "request bad state, expected Processing got {other}" + "Bad state on_post_process_validation_failure expected Processed got {other}" ))), } } diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index 57a6e7412e3..ff02a19a554 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -214,6 +214,15 @@ impl TestRig { self.sync_manager.active_parent_lookups().len() } + fn assert_single_lookups_count(&self, count: usize) { + assert_eq!( + self.active_single_lookups_count(), + count, + "Unexpected count of single lookups. Current lookups: {:?}", + self.active_single_lookups() + ); + } + fn assert_parent_lookups_count(&self, count: usize) { assert_eq!( self.active_parent_lookups_count(), @@ -224,8 +233,26 @@ impl TestRig { ); } + fn assert_lookup_is_active(&self, block_root: Hash256) { + let lookups = self.sync_manager.active_single_lookups(); + if !lookups.iter().any(|l| l.1 == block_root) { + panic!("Expected lookup {block_root} to be the only active: {lookups:?}"); + } + } + + fn insert_failed_chain(&mut self, block_root: Hash256) { + self.sync_manager.insert_failed_chain(block_root); + } + + fn assert_not_failed_chain(&mut self, chain_hash: Hash256) { + let failed_chains = self.sync_manager.get_failed_chains(); + if failed_chains.contains(&chain_hash) { + panic!("failed chains contain {chain_hash:?}: {failed_chains:?}"); + } + } + fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { - self.sync_manager.failed_chains_contains(chain_hash) + self.sync_manager.get_failed_chains().contains(chain_hash) } fn find_single_lookup_for(&self, block_root: Hash256) -> Id { @@ -236,14 +263,6 @@ impl TestRig { .0 } - fn expect_no_active_parent_lookups(&self) { - assert!( - self.active_parent_lookups().is_empty(), - "expected no parent lookups: {:?}", - self.active_parent_lookups() - ); - } - fn expect_no_active_single_lookups(&self) { assert!( self.active_single_lookups().is_empty(), @@ -253,22 +272,16 @@ impl TestRig { } fn expect_no_active_lookups(&self) { - self.expect_no_active_parent_lookups(); self.expect_no_active_single_lookups(); } - #[track_caller] - fn assert_parent_lookups_consistency(&self) { - let hashes = self.active_parent_lookups(); - let expected = hashes.len(); - assert_eq!( - expected, - hashes - .into_iter() - .collect::>() - .len(), - "duplicated chain hashes in parent queue" - ) + fn expect_lookups(&self, expected_block_roots: &[Hash256]) { + let block_roots = self + .active_single_lookups() + .iter() + .map(|(_, b, _)| *b) + .collect::>(); + assert_eq!(&block_roots, expected_block_roots); } fn new_connected_peer(&mut self) -> PeerId { @@ -290,32 +303,35 @@ impl TestRig { self.parent_block_processed_imported(chain_hash); } // Send final import event for the block that triggered the lookup - let trigger_lookup = self - .active_single_lookups() - .iter() - .find(|(_, block_root, _)| block_root == &chain_hash) - .copied() - .unwrap_or_else(|| panic!("There should exist a single block lookup for {chain_hash}")); - self.single_block_component_processed_imported(trigger_lookup.0, chain_hash); + self.single_block_component_processed_imported(chain_hash); } - fn parent_block_processed(&mut self, chain_hash: Hash256, result: BlockProcessingResult) { - // Locate a parent lookup chain with tip hash `chain_hash` + /// Locate a parent lookup chain with tip hash `chain_hash` + fn find_oldest_parent_lookup(&self, chain_hash: Hash256) -> Hash256 { let parent_chain = self .active_parent_lookups() .into_iter() .find(|chain| chain.first() == Some(&chain_hash)) .unwrap_or_else(|| { panic!( - "No parent chain with chain_hash {chain_hash:?}: {:?}", - self.active_parent_lookups() + "No parent chain with chain_hash {chain_hash:?}: Parent lookups {:?} Single lookups {:?}", + self.active_parent_lookups(), + self.active_single_lookups(), ) }); + *parent_chain.last().unwrap() + } - let id = self.find_single_lookup_for(*parent_chain.last().unwrap()); + fn parent_block_processed(&mut self, chain_hash: Hash256, result: BlockProcessingResult) { + let id = self.find_single_lookup_for(self.find_oldest_parent_lookup(chain_hash)); self.single_block_component_processed(id, result); } + fn parent_blob_processed(&mut self, chain_hash: Hash256, result: BlockProcessingResult) { + let id = self.find_single_lookup_for(self.find_oldest_parent_lookup(chain_hash)); + self.single_blob_component_processed(id, result); + } + fn parent_block_processed_imported(&mut self, chain_hash: Hash256) { self.parent_block_processed( chain_hash, @@ -330,20 +346,17 @@ impl TestRig { }) } - fn single_block_component_processed_imported(&mut self, id: Id, block_root: Hash256) { + fn single_block_component_processed_imported(&mut self, block_root: Hash256) { + let id = self.find_single_lookup_for(block_root); self.single_block_component_processed( id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(block_root)), ) } - fn single_blob_component_processed( - &mut self, - id: SingleLookupReqId, - result: BlockProcessingResult, - ) { + fn single_blob_component_processed(&mut self, id: Id, result: BlockProcessingResult) { self.send_sync_message(SyncMessage::BlockComponentProcessed { - process_type: BlockProcessType::SingleBlob { id: id.lookup_id }, + process_type: BlockProcessType::SingleBlob { id }, result, }) } @@ -384,7 +397,10 @@ impl TestRig { peer_id: PeerId, blob_sidecar: Option>>, ) { - self.log("parent_lookup_blob_response"); + self.log(&format!( + "parent_lookup_blob_response {:?}", + blob_sidecar.as_ref().map(|b| b.index) + )); self.send_sync_message(SyncMessage::RpcBlob { request_id: SyncRequestId::SingleBlob { id }, peer_id, @@ -609,10 +625,11 @@ impl TestRig { #[track_caller] fn expect_empty_beacon_processor(&mut self) { - assert_eq!( - self.beacon_processor_rx.try_recv().expect_err("must err"), - mpsc::error::TryRecvError::Empty - ); + match self.beacon_processor_rx.try_recv() { + Err(mpsc::error::TryRecvError::Empty) => {} // ok + Ok(event) => panic!("expected empty beacon processor: {:?}", event), + other => panic!("unexpected err {:?}", other), + } } #[track_caller] @@ -626,7 +643,7 @@ impl TestRig { }) .unwrap_or_else(|_| { panic!( - "Expected peer penalty for {peer_id}: {:#?}", + "Expected '{expect_penalty_msg}' penalty for peer {peer_id}: {:#?}", self.network_rx_queue ) }); @@ -636,6 +653,11 @@ impl TestRig { ); } + pub fn expect_single_penalty(&mut self, peer_id: PeerId, expect_penalty_msg: &'static str) { + self.expect_penalty(peer_id, expect_penalty_msg); + self.expect_no_penalty_for(peer_id); + } + pub fn block_with_parent_and_blobs( &mut self, parent_root: Hash256, @@ -651,19 +673,47 @@ impl TestRig { pub fn rand_blockchain(&mut self, depth: usize) -> Vec>> { let mut blocks = Vec::>>::with_capacity(depth); - while blocks.len() < depth { + for slot in 0..depth { let parent = blocks .last() .map(|b| b.canonical_root()) .unwrap_or_else(Hash256::random); let mut block = self.rand_block(); *block.message_mut().parent_root_mut() = parent; + *block.message_mut().slot_mut() = slot.into(); blocks.push(block.into()); } + self.log(&format!( + "Blockchain dump {:#?}", + blocks + .iter() + .map(|b| format!( + "block {} {} parent {}", + b.slot(), + b.canonical_root(), + b.parent_root() + )) + .collect::>() + )); blocks } } +#[test] +fn stable_rng() { + let mut rng = XorShiftRng::from_seed([42; 16]); + let (block, _) = generate_rand_block_and_blobs::(ForkName::Base, NumBlobs::None, &mut rng); + // TODO: Make rand block generation stable + assert_ne!( + block.canonical_root(), + Hash256::from_slice( + &hex::decode("9cfcfc321759d8a2c38d6541a966da5e88fe8729ed5a5ab37013781ff097b0d6") + .unwrap() + ), + "rng produces a consistent value" + ); +} + #[test] fn test_single_block_lookup_happy_path() { let mut rig = TestRig::test_setup(); @@ -686,7 +736,7 @@ fn test_single_block_lookup_happy_path() { // Send the stream termination. Peer should have not been penalized, and the request removed // after processing. rig.single_lookup_block_response(id, peer_id, None); - rig.single_block_component_processed_imported(id.lookup_id, block_root); + rig.single_block_component_processed_imported(block_root); rig.expect_empty_network(); rig.expect_no_active_lookups(); } @@ -776,7 +826,7 @@ fn test_single_block_lookup_becomes_parent_request() { id.lookup_id, BlockError::ParentUnknown(RpcBlock::new_without_blobs(None, block)).into(), ); - assert_eq!(rig.active_single_lookups_count(), 1); + assert_eq!(rig.active_single_lookups_count(), 2); // 2 = current + parent rig.expect_parent_request_block_and_blobs(parent_root); rig.expect_empty_network(); assert_eq!(rig.active_parent_lookups_count(), 1); @@ -863,8 +913,7 @@ fn test_parent_lookup_empty_response() { // Processing succeeds, now the rest of the chain should be sent for processing. rig.parent_block_processed_imported(block_root); - let id = rig.find_single_lookup_for(block_root); - rig.single_block_component_processed_imported(id, block_root); + rig.single_block_component_processed_imported(block_root); rig.expect_no_active_lookups(); } @@ -990,21 +1039,18 @@ fn test_parent_lookup_too_many_processing_attempts_must_blacklist() { } rig.log("Now fail processing a block in the parent request"); - for i in 0..PROCESSING_FAILURES { + for _ in 0..PROCESSING_FAILURES { let id = rig.expect_block_parent_request(parent_root); - // Blobs are only requested in the first iteration as this test only retries blocks - if rig.after_deneb() && i != 0 { - let _ = rig.expect_blob_parent_request(parent_root); - } - assert!(!rig.failed_chains_contains(&block_root)); + // Blobs are only requested in the previous first iteration as this test only retries blocks + rig.assert_not_failed_chain(block_root); // send the right parent but fail processing rig.parent_lookup_block_response(id, peer_id, Some(parent.clone().into())); rig.parent_block_processed(block_root, BlockError::InvalidSignature.into()); rig.parent_lookup_block_response(id, peer_id, None); - rig.expect_penalty(peer_id, "parent_request_err"); + rig.expect_penalty(peer_id, "lookup_block_processing_failure"); } - assert!(rig.failed_chains_contains(&block_root)); + rig.assert_not_failed_chain(block_root); rig.expect_no_active_lookups(); } @@ -1033,12 +1079,12 @@ fn test_parent_lookup_too_deep() { ) } - rig.expect_penalty(peer_id, ""); + rig.expect_penalty(peer_id, "chain_too_long"); assert!(rig.failed_chains_contains(&chain_hash)); } #[test] -fn test_parent_lookup_disconnection() { +fn test_parent_lookup_disconnection_no_peers_left() { let mut rig = TestRig::test_setup(); let peer_id = rig.new_connected_peer(); let trigger_block = rig.rand_block(); @@ -1048,6 +1094,46 @@ fn test_parent_lookup_disconnection() { rig.expect_no_active_lookups(); } +#[test] +fn test_parent_lookup_disconnection_peer_left() { + let mut rig = TestRig::test_setup(); + let peer_ids = (0..2).map(|_| rig.new_connected_peer()).collect::>(); + let trigger_block = rig.rand_block(); + // lookup should have two peers associated with the same block + for peer_id in peer_ids.iter() { + rig.trigger_unknown_parent_block(*peer_id, trigger_block.clone().into()); + } + // Disconnect the first peer only, which is the one handling the request + rig.peer_disconnected(*peer_ids.first().unwrap()); + rig.assert_parent_lookups_count(1); +} + +#[test] +fn test_skip_creating_failed_parent_lookup() { + let mut rig = TestRig::test_setup(); + let (_, block, parent_root, _) = rig.rand_block_and_parent(); + let peer_id = rig.new_connected_peer(); + rig.insert_failed_chain(parent_root); + rig.trigger_unknown_parent_block(peer_id, block.into()); + // Expect single penalty for peer, despite dropping two lookups + rig.expect_single_penalty(peer_id, "failed_chain"); + // Both current and parent lookup should be rejected + rig.expect_no_active_lookups(); +} + +#[test] +fn test_skip_creating_failed_current_lookup() { + let mut rig = TestRig::test_setup(); + let (_, block, parent_root, block_root) = rig.rand_block_and_parent(); + let peer_id = rig.new_connected_peer(); + rig.insert_failed_chain(block_root); + rig.trigger_unknown_parent_block(peer_id, block.into()); + // Expect single penalty for peer + rig.expect_single_penalty(peer_id, "failed_chain"); + // Only the current lookup should be rejected + rig.expect_lookups(&[parent_root]); +} + #[test] fn test_single_block_lookup_ignored_response() { let mut rig = TestRig::test_setup(); @@ -1137,18 +1223,18 @@ fn test_same_chain_race_condition() { BlockError::ParentUnknown(RpcBlock::new_without_blobs(None, block)).into(), ) } - rig.assert_parent_lookups_consistency(); } - // Processing succeeds, now the rest of the chain should be sent for processing. - rig.expect_parent_chain_process(); - // Try to get this block again while the chain is being processed. We should not request it again. let peer_id = rig.new_connected_peer(); - rig.trigger_unknown_parent_block(peer_id, trigger_block); - rig.assert_parent_lookups_consistency(); + rig.trigger_unknown_parent_block(peer_id, trigger_block.clone()); + rig.expect_empty_network(); - rig.parent_chain_processed_success(chain_hash, &blocks); + // Processing succeeds, now the rest of the chain should be sent for processing. + for block in blocks.iter().skip(1).chain(&[trigger_block]) { + rig.expect_parent_chain_process(); + rig.single_block_component_processed_imported(block.canonical_root()); + } rig.expect_no_active_lookups(); } @@ -1302,6 +1388,21 @@ mod deneb_only { }) } + fn log(self, msg: &str) -> Self { + self.rig.log(msg); + self + } + + // TODO: Eventually deprecate this function + fn set_block_id_for_import(mut self) -> Self { + let lookup_id = self.rig.find_single_lookup_for(self.block_root); + self.block_req_id = Some(SingleLookupReqId { + lookup_id, + req_id: 0, + }); + self + } + fn parent_block_response(mut self) -> Self { self.rig.expect_empty_network(); let block = self.parent_block.pop_front().unwrap().clone(); @@ -1356,7 +1457,8 @@ mod deneb_only { self.rig.expect_empty_network(); // The request should still be active. - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig + .assert_lookup_is_active(self.block.canonical_root()); self } @@ -1369,7 +1471,8 @@ mod deneb_only { self.peer_id, Some(blob.clone()), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig + .assert_lookup_is_active(self.block.canonical_root()); } self.rig.single_lookup_blob_response( self.blob_req_id.expect("blob request id"), @@ -1428,6 +1531,29 @@ mod deneb_only { self } + fn block_missing_components(mut self) -> Self { + self.rig.single_block_component_processed( + self.block_req_id.expect("block request id").lookup_id, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + self.block.slot(), + self.block_root, + )), + ); + self.rig.expect_empty_network(); + self.rig.assert_single_lookups_count(1); + self + } + + fn blob_imported(mut self) -> Self { + self.rig.single_blob_component_processed( + self.blob_req_id.expect("blob request id").lookup_id, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), + ); + self.rig.expect_empty_network(); + self.rig.assert_single_lookups_count(0); + self + } + fn block_imported(mut self) -> Self { // Missing blobs should be the request is not removed, the outstanding blobs request should // mean we do not send a new request. @@ -1436,7 +1562,7 @@ mod deneb_only { BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), ); self.rig.expect_empty_network(); - assert_eq!(self.rig.active_single_lookups_count(), 0); + self.rig.assert_single_lookups_count(0); self } @@ -1451,7 +1577,19 @@ mod deneb_only { self } + fn parent_blob_imported(mut self) -> Self { + self.rig.log("parent_blob_imported"); + self.rig.parent_blob_processed( + self.block_root, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), + ); + self.rig.expect_empty_network(); + self.rig.assert_parent_lookups_count(0); + self + } + fn parent_block_unknown_parent(mut self) -> Self { + self.rig.log("parent_block_unknown_parent"); let block = self.unknown_parent_block.take().unwrap(); // Now this block is the one we expect requests from self.block = block.clone(); @@ -1469,6 +1607,26 @@ mod deneb_only { self } + fn parent_block_missing_components(mut self) -> Self { + let block = self.unknown_parent_block.clone().unwrap(); + self.rig.parent_block_processed( + self.block_root, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + block.slot(), + block.canonical_root(), + )), + ); + self.rig.parent_blob_processed( + self.block_root, + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + block.slot(), + block.canonical_root(), + )), + ); + assert_eq!(self.rig.active_parent_lookups_count(), 1); + self + } + fn invalid_parent_processed(mut self) -> Self { self.rig.parent_block_processed( self.block_root, @@ -1483,18 +1641,19 @@ mod deneb_only { self.block_req_id.expect("block request id").lookup_id, BlockProcessingResult::Err(BlockError::ProposalSignatureInvalid), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig.assert_single_lookups_count(1); self } fn invalid_blob_processed(mut self) -> Self { - self.rig.single_block_component_processed( + self.rig.log("invalid_blob_processed"); + self.rig.single_blob_component_processed( self.blob_req_id.expect("blob request id").lookup_id, BlockProcessingResult::Err(BlockError::AvailabilityCheck( AvailabilityCheckError::KzgVerificationFailed, )), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig.assert_single_lookups_count(1); self } @@ -1506,19 +1665,19 @@ mod deneb_only { self.block_root, )), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig.assert_single_lookups_count(1); self } fn missing_components_from_blob_request(mut self) -> Self { self.rig.single_blob_component_processed( - self.blob_req_id.expect("blob request id"), + self.blob_req_id.expect("blob request id").lookup_id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( self.slot, self.block_root, )), ); - assert_eq!(self.rig.active_single_lookups_count(), 1); + self.rig.assert_single_lookups_count(1); self } @@ -1607,8 +1766,9 @@ mod deneb_only { tester .block_response_triggering_process() .blobs_response() + .block_missing_components() // blobs not yet imported .blobs_response_was_valid() - .block_imported(); + .blob_imported(); // now blobs resolve as imported } #[test] @@ -1618,10 +1778,11 @@ mod deneb_only { }; tester - .blobs_response() - .blobs_response_was_valid() + .blobs_response() // hold blobs for processing .block_response_triggering_process() - .block_imported(); + .block_missing_components() // blobs not yet imported + .blobs_response_was_valid() + .blob_imported(); // now blobs resolve as imported } #[test] @@ -1655,7 +1816,7 @@ mod deneb_only { .missing_components_from_block_request() .empty_blobs_response() .missing_components_from_blob_request() - .expect_penalty("single_blob_failure") + .expect_penalty("sent_incomplete_blobs") .expect_blobs_request() .expect_no_block_request(); } @@ -1668,9 +1829,8 @@ mod deneb_only { tester .blobs_response() - .blobs_response_was_valid() .expect_no_penalty_and_no_requests() - .missing_components_from_blob_request() + // blobs not sent for processing until the block is processed .empty_block_response() .expect_penalty("NoResponseReturned") .expect_block_request() @@ -1686,11 +1846,11 @@ mod deneb_only { tester .block_response_triggering_process() .invalid_block_processed() - .expect_penalty("single_block_failure") + .expect_penalty("lookup_block_processing_failure") .expect_block_request() .expect_no_blobs_request() .blobs_response() - .missing_components_from_blob_request() + // blobs not sent for processing until the block is processed .expect_no_penalty_and_no_requests(); } @@ -1705,7 +1865,7 @@ mod deneb_only { .missing_components_from_block_request() .blobs_response() .invalid_blob_processed() - .expect_penalty("single_blob_failure") + .expect_penalty("lookup_blobs_processing_failure") .expect_blobs_request() .expect_no_block_request(); } @@ -1722,7 +1882,7 @@ mod deneb_only { .invalidate_blobs_too_few() .blobs_response() .missing_components_from_blob_request() - .expect_penalty("single_blob_failure") + .expect_penalty("sent_incomplete_blobs") .expect_blobs_request() .expect_no_block_request(); } @@ -1750,8 +1910,7 @@ mod deneb_only { tester .invalidate_blobs_too_few() - .blobs_response() - .blobs_response_was_valid() + .blobs_response() // blobs are not sent until the block is processed .expect_no_penalty_and_no_requests() .block_response_triggering_process(); } @@ -1806,9 +1965,8 @@ mod deneb_only { .parent_blob_response() .expect_block_process() .invalid_parent_processed() - .expect_penalty("parent_request_err") + .expect_penalty("lookup_block_processing_failure") .expect_parent_block_request() - .expect_parent_blobs_request() .expect_empty_beacon_processor(); } @@ -1879,15 +2037,22 @@ mod deneb_only { tester .blobs_response() + .log(" Return empty blobs for parent, block errors with missing components, downscore") .empty_parent_blobs_response() .expect_no_penalty_and_no_requests() .parent_block_response() - .expect_penalty("single_blob_failure") + .parent_block_missing_components() + .expect_penalty("sent_incomplete_blobs") + .log("Re-request parent blobs, succeed and import parent") .expect_parent_blobs_request() .parent_blob_response() .expect_block_process() - .parent_block_imported() - .expect_parent_chain_process(); + .parent_blob_imported() + .log("resolve original block trigger blobs request and import") + .blobs_response() + .set_block_id_for_import() + .block_imported() + .expect_no_active_lookups(); } #[test] @@ -1925,9 +2090,9 @@ mod deneb_only { .parent_blob_response() .expect_block_process() .invalid_parent_processed() - .expect_penalty("parent_request_err") + .expect_penalty("lookup_block_processing_failure") .expect_parent_block_request() - .expect_parent_blobs_request() + // blobs are not sent until block is processed .expect_empty_beacon_processor(); } @@ -2005,17 +2170,21 @@ mod deneb_only { }; tester - .block_response() // reply with current block - .empty_parent_blobs_response() // replies empty blobs to parent block - .expect_no_penalty_and_no_requests() // no penalty because parent block is unknown - .parent_block_response() // reply with parent block - .expect_penalty("single_blob_failure") // parent block has data, so penalize parent blob peer - .expect_parent_blobs_request() // re-request parent blobs - .parent_blob_response() // good response now - .expect_block_process() // send parent block for import - .parent_block_imported() // parent block imported + .block_response() + .log(" Return empty blobs for parent, block errors with missing components, downscore") + .empty_parent_blobs_response() + .expect_no_penalty_and_no_requests() + .parent_block_response() + .parent_block_missing_components() + .expect_penalty("sent_incomplete_blobs") + .log("Re-request parent blobs, succeed and import parent") + .expect_parent_blobs_request() + .parent_blob_response() + .expect_block_process() + .parent_blob_imported() + .log("resolve original block trigger blobs request and import") .blobs_response() - .block_imported() // resolve original block trigger blobs request and import + .block_imported() .expect_no_active_lookups(); } diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index fed5cbe090a..f7c9dd783c8 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -151,6 +151,21 @@ pub enum BlockProcessType { SingleBlob { id: Id }, } +impl BlockProcessType { + pub fn component(&self) -> &'static str { + match self { + BlockProcessType::SingleBlock { .. } => "block", + BlockProcessType::SingleBlob { .. } => "blob", + } + } + + pub fn id(&self) -> Id { + match self { + BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => *id, + } + } +} + #[derive(Debug)] pub enum BlockProcessingResult { Ok(AvailabilityProcessingStatus), @@ -261,12 +276,21 @@ impl SyncManager { #[cfg(test)] pub(crate) fn active_parent_lookups(&self) -> Vec> { - self.block_lookups.active_parent_lookups() + self.block_lookups + .active_parent_lookups() + .iter() + .map(|c| c.chain.clone()) + .collect() } #[cfg(test)] - pub(crate) fn failed_chains_contains(&mut self, chain_hash: &Hash256) -> bool { - self.block_lookups.failed_chains_contains(chain_hash) + pub(crate) fn get_failed_chains(&mut self) -> Vec { + self.block_lookups.get_failed_chains() + } + + #[cfg(test)] + pub(crate) fn insert_failed_chain(&mut self, block_root: Hash256) { + self.block_lookups.insert_failed_chain(block_root); } fn network_globals(&self) -> &NetworkGlobals { @@ -648,13 +672,7 @@ impl SyncManager { ) { match self.should_search_for_block(Some(slot), &peer_id) { Ok(_) => { - self.block_lookups.search_parent_of_child( - parent_root, - block_root, - &[peer_id], - &mut self.network, - ); - self.block_lookups.search_child_of_parent( + self.block_lookups.search_child_and_parent( block_root, block_component, peer_id, diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index 80481d10ca6..860192db684 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -620,7 +620,7 @@ impl SyncNetworkContext { ) -> Result<(), &'static str> { match self.beacon_processor_if_enabled() { Some(beacon_processor) => { - trace!(self.log, "Sending blobs for processing"; "block" => ?block_root, "process_type" => ?process_type); + debug!(self.log, "Sending blobs for processing"; "block" => ?block_root, "process_type" => ?process_type); if let Err(e) = beacon_processor.send_rpc_blobs(block_root, blobs, duration, process_type) { diff --git a/common/lru_cache/src/time.rs b/common/lru_cache/src/time.rs index 0b2fd835687..890bf47eb44 100644 --- a/common/lru_cache/src/time.rs +++ b/common/lru_cache/src/time.rs @@ -166,6 +166,12 @@ where self.map.contains(key) } + /// List known keys + pub fn keys(&mut self) -> impl Iterator { + self.update(); + self.map.iter() + } + /// Shrink the mappings to fit the current size. pub fn shrink_to_fit(&mut self) { self.map.shrink_to_fit(); diff --git a/consensus/types/src/test_utils/test_random/secret_key.rs b/consensus/types/src/test_utils/test_random/secret_key.rs index 3f3f6ed5184..da1614aa24e 100644 --- a/consensus/types/src/test_utils/test_random/secret_key.rs +++ b/consensus/types/src/test_utils/test_random/secret_key.rs @@ -2,6 +2,8 @@ use super::*; impl TestRandom for SecretKey { fn random_for_test(_rng: &mut impl RngCore) -> Self { + // TODO: Not deterministic generation. Using `SecretKey::deserialize` results in + // `BlstError(BLST_BAD_ENCODING)`, need to debug with blst source on what encoding expects. SecretKey::random() } } diff --git a/consensus/types/src/test_utils/test_random/signature.rs b/consensus/types/src/test_utils/test_random/signature.rs index 5b952296b61..8bc0d711103 100644 --- a/consensus/types/src/test_utils/test_random/signature.rs +++ b/consensus/types/src/test_utils/test_random/signature.rs @@ -1,11 +1,10 @@ use super::*; impl TestRandom for Signature { - fn random_for_test(rng: &mut impl RngCore) -> Self { - let secret_key = SecretKey::random_for_test(rng); - let mut message = vec![0; 32]; - rng.fill_bytes(&mut message); - - secret_key.sign(Hash256::from_slice(&message)) + fn random_for_test(_rng: &mut impl RngCore) -> Self { + // TODO: `SecretKey::random_for_test` does not return a deterministic signature. Since this + // signature will not pass verification we could just return the generator point or the + // generator point multiplied by a random scalar if we want disctint signatures. + Signature::infinity().expect("infinity signature is valid") } } From 4bde25b56bc0561e1385d226ae7b8f66ed7cd1cb Mon Sep 17 00:00:00 2001 From: realbigsean Date: Mon, 29 Apr 2024 21:29:55 -0400 Subject: [PATCH 07/14] fix compile after merge --- beacon_node/network/src/sync/block_lookups/common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index 8a78a10ddc8..d7d3c9a061f 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -180,7 +180,7 @@ impl RequestState for BlockRequestState { } } -impl RequestState for BlobRequestState { +impl RequestState for BlobRequestState { type RequestType = BlobsByRootSingleBlockRequest; type VerifiedResponseType = FixedBlobSidecarList; From 9d185d162558ddcbf72cb3ac3bdbeb5495b6bc1a Mon Sep 17 00:00:00 2001 From: realbigsean Date: Tue, 30 Apr 2024 01:59:05 -0400 Subject: [PATCH 08/14] #5655 pr review (#26) * fix compile after merge * remove todos, fix typos etc * fix compile * stable rng * delete TODO and unfilled out test * make download result a struct * enums instead of bools as params * fix comment --- beacon_node/beacon_chain/src/beacon_chain.rs | 3 - .../src/network_beacon_processor/tests.rs | 4 +- .../network/src/sync/block_lookups/common.rs | 62 ++++++++++--------- .../network/src/sync/block_lookups/mod.rs | 45 ++++++-------- .../src/sync/block_lookups/parent_chain.rs | 14 +++-- .../sync/block_lookups/single_block_lookup.rs | 29 ++++++--- .../network/src/sync/block_lookups/tests.rs | 15 +---- beacon_node/network/src/sync/manager.rs | 19 ++++-- 8 files changed, 97 insertions(+), 94 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 746aa371ffe..9c7ded313b6 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2898,9 +2898,6 @@ impl BeaconChain { .fork_choice_read_lock() .contains_block(&block_root) { - // TODO: Should also check for: - // - Parent block is known - // - Slot is not in the future return Err(BlockError::BlockIsAlreadyKnown(block_root)); } diff --git a/beacon_node/network/src/network_beacon_processor/tests.rs b/beacon_node/network/src/network_beacon_processor/tests.rs index dd58eb83555..4ba4c4ddd1d 100644 --- a/beacon_node/network/src/network_beacon_processor/tests.rs +++ b/beacon_node/network/src/network_beacon_processor/tests.rs @@ -311,9 +311,7 @@ impl TestRig { block_root, RpcBlock::new_without_blobs(Some(block_root), self.next_block.clone()), std::time::Duration::default(), - BlockProcessType::ParentLookup { - chain_hash: Hash256::random(), - }, + BlockProcessType::SingleBlock { id: 0 }, ) .unwrap(); } diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index d7d3c9a061f..1dfe4310324 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -12,7 +12,7 @@ use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::BeaconChainTypes; use std::sync::Arc; use types::blob_sidecar::FixedBlobSidecarList; -use types::{Hash256, SignedBeaconBlock}; +use types::SignedBeaconBlock; use super::single_block_lookup::DownloadResult; use super::SingleLookupId; @@ -28,6 +28,16 @@ pub enum ResponseType { /// is further back than the most recent head slot. pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2; +pub enum AwaitingParent { + True, + False, +} + +pub enum BlockIsProcessed { + True, + False, +} + /// This trait unifies common single block lookup functionality across blocks and blobs. This /// includes making requests, verifying responses, and handling processing results. A /// `SingleBlockLookup` includes both a `BlockRequestState` and a `BlobRequestState`, this trait is @@ -47,16 +57,14 @@ pub trait RequestState { fn continue_request( &mut self, id: Id, - awaiting_parent: bool, + awaiting_parent: AwaitingParent, downloaded_block_expected_blobs: Option, - block_is_processed: bool, + block_is_processed: BlockIsProcessed, cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { // Attempt to progress awaiting downloads if self.get_state().is_awaiting_download() { // Verify the current request has not exceeded the maximum number of attempts. - // TODO: Okay to use `SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS` for both current and parent - // lookups now? It not trivial to identify what is a "parent lookup" now. let request_state = self.get_state(); if request_state.failed_attempts() >= SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS { let cannot_process = request_state.more_failed_processing_attempts(); @@ -76,11 +84,9 @@ pub trait RequestState { // Otherwise, attempt to progress awaiting processing // If this request is awaiting a parent lookup to be processed, do not send for processing. // The request will be rejected with unknown parent error. - } else if !awaiting_parent && - // TODO: Blob processing / import does not check for unknown parent. As a temporary fix - // and to emulate the behaviour before this PR, hold blobs for processing until the - // block has been processed i.e. it has a known parent. - (block_is_processed || matches!(Self::response_type(), ResponseType::Block)) + } else if matches!(awaiting_parent, AwaitingParent::False) + && (matches!(block_is_processed, BlockIsProcessed::True) + || matches!(Self::response_type(), ResponseType::Block)) { // maybe_start_processing returns Some if state == AwaitingProcess. This pattern is // useful to conditionally access the result data. @@ -103,10 +109,6 @@ pub trait RequestState { /* Response handling methods */ - /// A getter for the parent root of the response. Returns an `Option` because we won't know - /// the blob parent if we don't end up getting any blobs in the response. - fn get_parent_root(verified_response: &Self::VerifiedResponseType) -> Option; - /// Send the response to the beacon processor. fn send_for_processing( id: Id, @@ -148,18 +150,20 @@ impl RequestState for BlockRequestState { .map_err(LookupRequestError::SendFailed) } - fn get_parent_root(verified_response: &Arc>) -> Option { - Some(verified_response.parent_root()) - } - fn send_for_processing( id: SingleLookupId, - (block, block_root, seen_timestamp, _): DownloadResult, + download_result: DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { + let DownloadResult { + value, + block_root, + seen_timestamp, + peer_id: _, + } = download_result; cx.send_block_for_processing( block_root, - RpcBlock::new_without_blobs(Some(block_root), block), + RpcBlock::new_without_blobs(Some(block_root), value), seen_timestamp, BlockProcessType::SingleBlock { id }, ) @@ -200,22 +204,20 @@ impl RequestState for BlobRequestState { .map_err(LookupRequestError::SendFailed) } - fn get_parent_root(verified_response: &FixedBlobSidecarList) -> Option { - verified_response - .into_iter() - .filter_map(|blob| blob.as_ref()) - .map(|blob| blob.block_parent_root()) - .next() - } - fn send_for_processing( id: Id, - (verified, block_root, seen_timestamp, _): DownloadResult, + download_result: DownloadResult, cx: &SyncNetworkContext, ) -> Result<(), LookupRequestError> { + let DownloadResult { + value, + block_root, + seen_timestamp, + peer_id: _, + } = download_result; cx.send_blobs_for_processing( block_root, - verified, + value, seen_timestamp, BlockProcessType::SingleBlob { id }, ) diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 99724837034..3549b938d6c 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -1,5 +1,6 @@ use self::parent_chain::{compute_parent_chains, NodeChain}; -use self::single_block_lookup::{DownloadResult, LookupRequestError, SingleBlockLookup}; +pub use self::single_block_lookup::DownloadResult; +use self::single_block_lookup::{LookupRequestError, SingleBlockLookup}; use super::manager::{BlockProcessType, BlockProcessingResult}; use super::network_context::{RpcProcessingResult, SyncNetworkContext}; use crate::metrics; @@ -38,8 +39,8 @@ pub enum BlockComponent { impl BlockComponent { fn parent_root(&self) -> Hash256 { match self { - BlockComponent::Block(block) => block.0.parent_root(), - BlockComponent::Blob(blob) => blob.0.block_parent_root(), + BlockComponent::Block(block) => block.value.parent_root(), + BlockComponent::Blob(blob) => blob.value.block_parent_root(), } } } @@ -106,8 +107,8 @@ impl BlockLookups { /* Lookup requests */ - /// Creates a lookup for the block with the given `block_root` and immediately triggers it. - /// Returns true if the lookup is created or already exists + /// Creates a parent lookup for the block with the given `block_root` and immediately triggers it. + /// If a parent lookup exists or is triggered, a current lookup will be created. pub fn search_child_and_parent( &mut self, block_root: Hash256, @@ -132,7 +133,7 @@ impl BlockLookups { } } - /// Seach a block that we don't known its parent root. + /// Seach a block whose parent root is unknown. /// Returns true if the lookup is created or already exists pub fn search_unknown_block( &mut self, @@ -187,7 +188,7 @@ impl BlockLookups { "chain_too_long", ); } - self.drop_lookup_and_childs(*lookup_id); + self.drop_lookup_and_children(*lookup_id); } } @@ -290,7 +291,7 @@ impl BlockLookups { ) { if let Err(e) = self.on_download_response_inner::(id, peer_id, response, cx) { debug!(self.log, "Dropping single lookup"; "id" => id, "err" => ?e); - self.drop_lookup_and_childs(id); + self.drop_lookup_and_children(id); self.update_metrics(); } } @@ -333,12 +334,12 @@ impl BlockLookups { // Register the download peer here. Once we have received some data over the wire we // attribute it to this peer for scoring latter regardless of how the request was // done. - request_state.on_download_success(( - response, + request_state.on_download_success(DownloadResult { + value: response, block_root, seen_timestamp, peer_id, - ))?; + })?; // continue_request will send for processing as the request state is AwaitingProcessing lookup.continue_request::(cx) } @@ -394,7 +395,7 @@ impl BlockLookups { BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => id, }; debug!(self.log, "Dropping lookup on request error"; "component" => process_type.component(), "id" => process_type.id(), "error" => ?e); - self.drop_lookup_and_childs(id); + self.drop_lookup_and_children(id); self.update_metrics(); } } @@ -497,21 +498,11 @@ impl BlockLookups { { // There errors indicate internal problems and should not downscore the peer warn!(self.log, "Internal availability check failure"; "block_root" => %block_root, "error" => ?e); - // TODO: This lines represent an improper transition of download states, - // which can log errors in the future. If an error here causes the request - // to transition into a bad state, a future network message will cause - // the request to be dropped - // - // lookup.block_request_state.state.on_download_failure(); - // lookup.blob_request_state.state.on_download_failure(); Action::Drop } other => { debug!(self.log, "Invalid lookup component"; "block_root" => %block_root, "component" => ?R::response_type(), "error" => ?other); let peer_id = request_state.on_processing_failure()?; - // TODO: Why is the original code downscoring the block peer regardless of - // type of request? Sending a blob for verification can result in an error - // attributable to the block peer? cx.report_peer( peer_id, PeerAction::MidToleranceError, @@ -541,7 +532,7 @@ impl BlockLookups { } Action::Drop => { // Drop with noop - self.drop_lookup_and_childs(lookup_id); + self.drop_lookup_and_children(lookup_id); self.update_metrics(); } Action::Continue => { @@ -572,14 +563,14 @@ impl BlockLookups { } for id in failed_lookups { - self.drop_lookup_and_childs(id); + self.drop_lookup_and_children(id); } } /// Drops `dropped_id` lookup and all its children recursively. Lookups awaiting a parent need - /// the parent to make progress to resolve, therefore we must drop them is the parent is + /// the parent to make progress to resolve, therefore we must drop them if the parent is /// dropped. - pub fn drop_lookup_and_childs(&mut self, dropped_id: SingleLookupId) { + pub fn drop_lookup_and_children(&mut self, dropped_id: SingleLookupId) { if let Some(dropped_lookup) = self.single_block_lookups.remove(&dropped_id) { debug!(self.log, "Dropping child lookup"; "id" => ?dropped_id, "block_root" => %dropped_lookup.block_root()); @@ -591,7 +582,7 @@ impl BlockLookups { .collect::>(); for id in child_lookups { - self.drop_lookup_and_childs(id); + self.drop_lookup_and_children(id); } } } diff --git a/beacon_node/network/src/sync/block_lookups/parent_chain.rs b/beacon_node/network/src/sync/block_lookups/parent_chain.rs index 0571e6d0dbb..01a39a69713 100644 --- a/beacon_node/network/src/sync/block_lookups/parent_chain.rs +++ b/beacon_node/network/src/sync/block_lookups/parent_chain.rs @@ -51,23 +51,23 @@ pub(crate) fn compute_parent_chains(nodes: &[Node]) -> Vec { let mut parent_chains = vec![]; - // Iterate blocks which no child + // Iterate blocks with no children for tip in nodes { let mut block_root = tip.block_root; if parent_to_child.get(&block_root).is_none() { let mut chain = vec![]; // Resolve chain of blocks - loop { + 'inner: loop { if let Some(parent_root) = child_to_parent.get(&block_root) { // block_root is a known block that may or may not have a parent root chain.push(block_root); if let Some(parent_root) = parent_root { block_root = *parent_root; - continue; + continue 'inner; } } - break; + break 'inner; } if chain.len() > 1 { @@ -100,7 +100,9 @@ pub(crate) fn find_oldest_fork_ancestor( } // Should never happen - let parent_chain = parent_chains.get(chain_idx).ok_or("chain_idx off bounds")?; + let parent_chain = parent_chains + .get(chain_idx) + .ok_or("chain_idx out of bounds")?; // Find the first block in the target parent chain that is not in other parent chains // Iterate in ascending slot order for block in parent_chain.chain.iter().rev() { @@ -109,7 +111,7 @@ pub(crate) fn find_oldest_fork_ancestor( } } - // If no match means that the chain is fully contained within another chain. This should never + // No match means that the chain is fully contained within another chain. This should never // happen, but if that was the case just return the tip Ok(parent_chain.tip) } diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 9349e24c69c..19b3f5326d0 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -1,3 +1,4 @@ +use super::common::{AwaitingParent, BlockIsProcessed}; use super::{BlockComponent, PeerId}; use crate::sync::block_lookups::common::RequestState; use crate::sync::block_lookups::Id; @@ -125,13 +126,21 @@ impl SingleBlockLookup { cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { let id = self.id; - let awaiting_parent = self.awaiting_parent.is_some(); + let awaiting_parent = if self.awaiting_parent.is_some() { + AwaitingParent::True + } else { + AwaitingParent::False + }; let downloaded_block_expected_blobs = self .block_request_state .state .peek_downloaded_data() .map(|block| block.num_expected_blobs()); - let block_is_processed = self.block_request_state.state.is_processed(); + let block_is_processed = if self.block_request_state.state.is_processed() { + BlockIsProcessed::True + } else { + BlockIsProcessed::False + }; R::request_state_mut(self).continue_request( id, awaiting_parent, @@ -208,7 +217,13 @@ impl BlockRequestState { } } -pub type DownloadResult = (T, Hash256, Duration, PeerId); +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct DownloadResult { + pub value: T, + pub block_root: Hash256, + pub seen_timestamp: Duration, + pub peer_id: PeerId, +} #[derive(Debug, PartialEq, Eq)] pub enum State { @@ -274,8 +289,8 @@ impl SingleLookupRequestState { match &self.state { State::AwaitingDownload => None, State::Downloading { .. } => None, - State::AwaitingProcess(result) => Some(&result.0), - State::Processing(result) => Some(&result.0), + State::AwaitingProcess(result) => Some(&result.value), + State::Processing(result) => Some(&result.value), State::Processed { .. } => None, } } @@ -362,7 +377,7 @@ impl SingleLookupRequestState { pub fn on_processing_failure(&mut self) -> Result { match &self.state { State::Processing(result) => { - let peer_id = result.3; + let peer_id = result.peer_id; self.failed_processing = self.failed_processing.saturating_add(1); self.state = State::AwaitingDownload; Ok(peer_id) @@ -376,7 +391,7 @@ impl SingleLookupRequestState { pub fn on_processing_success(&mut self) -> Result { match &self.state { State::Processing(result) => { - let peer_id = result.3; + let peer_id = result.peer_id; self.state = State::Processed(peer_id); Ok(peer_id) } diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index ff02a19a554..699f581eebe 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -703,11 +703,10 @@ impl TestRig { fn stable_rng() { let mut rng = XorShiftRng::from_seed([42; 16]); let (block, _) = generate_rand_block_and_blobs::(ForkName::Base, NumBlobs::None, &mut rng); - // TODO: Make rand block generation stable - assert_ne!( + assert_eq!( block.canonical_root(), Hash256::from_slice( - &hex::decode("9cfcfc321759d8a2c38d6541a966da5e88fe8729ed5a5ab37013781ff097b0d6") + &hex::decode("adfd2e9e7a7976e8ccaed6eaf0257ed36a5b476732fee63ff44966602fd099ec") .unwrap() ), "rng produces a consistent value" @@ -1174,7 +1173,7 @@ fn test_parent_lookup_ignored_response() { rig.trigger_unknown_parent_block(peer_id, block.clone().into()); let id = rig.expect_parent_request_block_and_blobs(parent_root); // Note: single block lookup for current `block` does not trigger any request because it does - // not has blobs, and the block is already cached + // not have blobs, and the block is already cached // Peer sends the right block, it should be sent for processing. Peer should not be penalized. rig.parent_lookup_block_response(id, peer_id, Some(parent.into())); @@ -1238,13 +1237,6 @@ fn test_same_chain_race_condition() { rig.expect_no_active_lookups(); } -#[test] -fn test_penalize_wrong_peer_with_cached_child() { - // peer A sends blob with malicious data as unknown parent - // peer B serves parent and rest of blocks - // All components are sent as RpcBlock, penalizing peer B -} - mod deneb_only { use super::*; use beacon_chain::{ @@ -1393,7 +1385,6 @@ mod deneb_only { self } - // TODO: Eventually deprecate this function fn set_block_id_for_import(mut self) -> Self { let lookup_id = self.rig.find_single_lookup_for(self.block_root); self.block_req_id = Some(SingleLookupReqId { diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index f7c9dd783c8..7d512733c6c 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -41,7 +41,9 @@ use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH}; use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor}; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; -use crate::sync::block_lookups::{BlobRequestState, BlockComponent, BlockRequestState}; +use crate::sync::block_lookups::{ + BlobRequestState, BlockComponent, BlockRequestState, DownloadResult, +}; use crate::sync::block_sidecar_coupling::BlocksAndBlobsRequestInfo; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::block_verification_types::RpcBlock; @@ -593,12 +595,12 @@ impl SyncManager { block_root, parent_root, block_slot, - BlockComponent::Block(( - block.block_cloned(), + BlockComponent::Block(DownloadResult { + value: block.block_cloned(), block_root, - timestamp_now(), + seen_timestamp: timestamp_now(), peer_id, - )), + }), ); } SyncMessage::UnknownParentBlob(peer_id, blob) => { @@ -611,7 +613,12 @@ impl SyncManager { block_root, parent_root, blob_slot, - BlockComponent::Blob((blob, block_root, timestamp_now(), peer_id)), + BlockComponent::Blob(DownloadResult { + value: blob, + block_root, + seen_timestamp: timestamp_now(), + peer_id, + }), ); } SyncMessage::UnknownBlockHashFromAttestation(peer_id, block_root) => { From 0203843202f2c451609c0a1afa54fee23bc560ed Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:14:30 +0900 Subject: [PATCH 09/14] Various fixes --- .../network/src/sync/block_lookups/common.rs | 18 +++++-------- .../src/sync/block_lookups/parent_chain.rs | 25 +++++++++---------- .../sync/block_lookups/single_block_lookup.rs | 16 +++--------- 3 files changed, 22 insertions(+), 37 deletions(-) diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index 1dfe4310324..dc82000ef1a 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -28,15 +28,10 @@ pub enum ResponseType { /// is further back than the most recent head slot. pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2; -pub enum AwaitingParent { - True, - False, -} - -pub enum BlockIsProcessed { - True, - False, -} +/// Wrapper around bool to prevent mixing this argument with `BlockIsProcessed` +pub(crate) struct AwaitingParent(pub bool); +/// Wrapper around bool to prevent mixing this argument with `AwaitingParent` +pub(crate) struct BlockIsProcessed(pub bool); /// This trait unifies common single block lookup functionality across blocks and blobs. This /// includes making requests, verifying responses, and handling processing results. A @@ -84,9 +79,8 @@ pub trait RequestState { // Otherwise, attempt to progress awaiting processing // If this request is awaiting a parent lookup to be processed, do not send for processing. // The request will be rejected with unknown parent error. - } else if matches!(awaiting_parent, AwaitingParent::False) - && (matches!(block_is_processed, BlockIsProcessed::True) - || matches!(Self::response_type(), ResponseType::Block)) + } else if !awaiting_parent.0 + && (block_is_processed.0 || matches!(Self::response_type(), ResponseType::Block)) { // maybe_start_processing returns Some if state == AwaitingProcess. This pattern is // useful to conditionally access the result data. diff --git a/beacon_node/network/src/sync/block_lookups/parent_chain.rs b/beacon_node/network/src/sync/block_lookups/parent_chain.rs index 01a39a69713..55f2cfe1292 100644 --- a/beacon_node/network/src/sync/block_lookups/parent_chain.rs +++ b/beacon_node/network/src/sync/block_lookups/parent_chain.rs @@ -1,10 +1,9 @@ -use std::collections::{HashMap, HashSet}; - +use super::single_block_lookup::SingleBlockLookup; use beacon_chain::BeaconChainTypes; +use std::collections::{HashMap, HashSet}; use types::Hash256; -use super::single_block_lookup::SingleBlockLookup; - +/// Summary of a lookup of which we may not know it's parent_root yet pub(crate) struct Node { block_root: Hash256, parent_root: Option, @@ -19,6 +18,7 @@ impl From<&SingleBlockLookup> for Node { } } +/// Wrapper around a chain of block roots that have a least one element (tip) pub(crate) struct NodeChain { // Parent chain blocks in descending slot order pub(crate) chain: Vec, @@ -26,6 +26,7 @@ pub(crate) struct NodeChain { } impl NodeChain { + /// Returns the block_root of the oldest ancestor (min slot) of this chain pub(crate) fn ancestor(&self) -> Hash256 { self.chain.last().copied().unwrap_or(self.tip) } @@ -58,16 +59,14 @@ pub(crate) fn compute_parent_chains(nodes: &[Node]) -> Vec { let mut chain = vec![]; // Resolve chain of blocks - 'inner: loop { - if let Some(parent_root) = child_to_parent.get(&block_root) { - // block_root is a known block that may or may not have a parent root - chain.push(block_root); - if let Some(parent_root) = parent_root { - block_root = *parent_root; - continue 'inner; - } + while let Some(parent_root) = child_to_parent.get(&block_root) { + // block_root is a known block that may or may not have a parent root + chain.push(block_root); + if let Some(parent_root) = parent_root { + block_root = *parent_root; + } else { + break; } - break 'inner; } if chain.len() > 1 { diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 19b3f5326d0..a08a6e9d2b4 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -126,26 +126,18 @@ impl SingleBlockLookup { cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { let id = self.id; - let awaiting_parent = if self.awaiting_parent.is_some() { - AwaitingParent::True - } else { - AwaitingParent::False - }; + let awaiting_parent = self.awaiting_parent.is_some(); let downloaded_block_expected_blobs = self .block_request_state .state .peek_downloaded_data() .map(|block| block.num_expected_blobs()); - let block_is_processed = if self.block_request_state.state.is_processed() { - BlockIsProcessed::True - } else { - BlockIsProcessed::False - }; + let block_is_processed = self.block_request_state.state.is_processed(); R::request_state_mut(self).continue_request( id, - awaiting_parent, + AwaitingParent(awaiting_parent), downloaded_block_expected_blobs, - block_is_processed, + BlockIsProcessed(block_is_processed), cx, ) } From e8fc4825afcb5baa40f48ba3200921b1471f4df6 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Tue, 30 Apr 2024 20:43:43 +0900 Subject: [PATCH 10/14] Track ignored child components --- .../network/src/sync/block_lookups/mod.rs | 12 +++++++++++- .../sync/block_lookups/single_block_lookup.rs | 18 +++++++++++------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 3549b938d6c..bf47bff3365 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -43,6 +43,12 @@ impl BlockComponent { BlockComponent::Blob(blob) => blob.value.block_parent_root(), } } + fn get_type(&self) -> &'static str { + match self { + BlockComponent::Block(_) => "block", + BlockComponent::Blob(_) => "blob", + } + } } pub type SingleLookupId = u32; @@ -229,7 +235,11 @@ impl BlockLookups { trace!(self.log, "Adding peer to existing single block lookup"; "block_root" => %block_root); lookup.add_peers(peers); if let Some(block_component) = block_component { - lookup.add_child_components(block_component); + let component_type = block_component.get_type(); + let imported = lookup.add_child_components(block_component); + if !imported { + debug!(self.log, "Lookup child component ignored"; "block_root" => %block_root, "type" => component_type); + } } return true; } diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index a08a6e9d2b4..1a1bd570c6d 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -72,18 +72,19 @@ impl SingleBlockLookup { self.awaiting_parent = None; } - pub fn add_child_components(&mut self, block_component: BlockComponent) { + /// Maybe insert a verified response into this lookup. Returns true if imported + pub fn add_child_components(&mut self, block_component: BlockComponent) -> bool { match block_component { - BlockComponent::Block(block) => { - self.block_request_state - .state - .insert_verified_response(block); - } + BlockComponent::Block(block) => self + .block_request_state + .state + .insert_verified_response(block), BlockComponent::Blob(_) => { // For now ignore single blobs, as the blob request state assumes all blobs are // attributed to the same peer = the peer serving the remaining blobs. Ignoring this // block component has a minor effect, causing the node to re-request this blob // once the parent chain is successfully resolved + false } } } @@ -289,9 +290,12 @@ impl SingleLookupRequestState { /// Switch to `AwaitingProcessing` if the request is in `AwaitingDownload` state, otherwise /// ignore. - pub fn insert_verified_response(&mut self, result: DownloadResult) { + pub fn insert_verified_response(&mut self, result: DownloadResult) -> bool { if let State::AwaitingDownload = &self.state { self.state = State::AwaitingProcess(result); + true + } else { + false } } From 38eaa8b7593c8a45bcce84b2304de52e14e9a539 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Tue, 30 Apr 2024 21:08:26 +0900 Subject: [PATCH 11/14] Track dropped lookup reason as metric --- beacon_node/network/src/metrics.rs | 5 ++ .../network/src/sync/block_lookups/mod.rs | 50 ++++++++++--------- .../sync/block_lookups/single_block_lookup.rs | 11 ++++ beacon_node/network/src/sync/manager.rs | 7 --- 4 files changed, 42 insertions(+), 31 deletions(-) diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs index d3804fbed8d..8df22a3d00e 100644 --- a/beacon_node/network/src/metrics.rs +++ b/beacon_node/network/src/metrics.rs @@ -244,6 +244,11 @@ lazy_static! { "sync_parent_block_lookups", "Number of parent block lookups underway" ); + pub static ref SYNC_LOOKUP_DROPPED: Result = try_create_int_counter_vec( + "sync_lookups_dropped_total", + "Total count of sync lookups dropped by reason", + &["reason"] + ); /* * Block Delay Metrics diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index bf47bff3365..d5b1a80592d 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -276,16 +276,13 @@ impl BlockLookups { lookup.add_child_components(block_component); } - match lookup.continue_requests(cx) { - Ok(()) => { - self.single_block_lookups.insert(lookup.id, lookup); - self.update_metrics(); - true - } - Err(e) => { - debug!(self.log, "Single block lookup failed"; "block_root" => ?block_root, "error" => ?e); - false - } + if let Err(e) = lookup.continue_requests(cx) { + self.on_lookup_request_error(lookup.id, e, "new_current_lookup"); + false + } else { + self.single_block_lookups.insert(lookup.id, lookup); + self.update_metrics(); + true } } @@ -300,9 +297,7 @@ impl BlockLookups { cx: &mut SyncNetworkContext, ) { if let Err(e) = self.on_download_response_inner::(id, peer_id, response, cx) { - debug!(self.log, "Dropping single lookup"; "id" => id, "err" => ?e); - self.drop_lookup_and_children(id); - self.update_metrics(); + self.on_lookup_request_error(id, e, "download_response"); } } @@ -401,12 +396,7 @@ impl BlockLookups { self.on_processing_result_inner::>(id, result, cx) } } { - let id = match process_type { - BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => id, - }; - debug!(self.log, "Dropping lookup on request error"; "component" => process_type.component(), "id" => process_type.id(), "error" => ?e); - self.drop_lookup_and_children(id); - self.update_metrics(); + self.on_lookup_request_error(process_type.id(), e, "processing_result"); } } @@ -417,7 +407,7 @@ impl BlockLookups { cx: &mut SyncNetworkContext, ) -> Result<(), LookupRequestError> { let Some(lookup) = self.single_block_lookups.get_mut(&lookup_id) else { - debug!(self.log, "Unknown single block lookup"; "target_id" => lookup_id); + debug!(self.log, "Unknown single block lookup"; "id" => lookup_id); return Ok(()); }; @@ -566,14 +556,13 @@ impl BlockLookups { lookup.resolve_awaiting_parent(); debug!(self.log, "Continuing child lookup"; "parent_root" => %block_root, "block_root" => %lookup.block_root()); if let Err(e) = lookup.continue_requests(cx) { - debug!(self.log, "Error continuing lookup"; "id" => id, "error" => ?e); - failed_lookups.push(*id); + failed_lookups.push((*id, e)); } } } - for id in failed_lookups { - self.drop_lookup_and_children(id); + for (id, e) in failed_lookups { + self.on_lookup_request_error(id, e, "continue_child_lookups"); } } @@ -597,6 +586,19 @@ impl BlockLookups { } } + /// Common handler a lookup request error, drop it and update metrics + fn on_lookup_request_error( + &mut self, + id: SingleLookupId, + error: LookupRequestError, + source: &str, + ) { + debug!(self.log, "Dropping lookup on request error"; "id" => id, "source" => source, "error" => ?error); + metrics::inc_counter_vec(&metrics::SYNC_LOOKUP_DROPPED, &[error.as_metric()]); + self.drop_lookup_and_children(id); + self.update_metrics(); + } + /* Helper functions */ /// Drops all the single block requests and returns how many requests were dropped. diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index 1a1bd570c6d..76deb236742 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -461,3 +461,14 @@ impl std::fmt::Display for State { } } } + +impl LookupRequestError { + pub(crate) fn as_metric(&self) -> &'static str { + match self { + LookupRequestError::TooManyAttempts { .. } => "TooManyAttempts", + LookupRequestError::NoPeers => "NoPeers", + LookupRequestError::SendFailed { .. } => "SendFailed", + LookupRequestError::BadState { .. } => "BadState", + } + } +} diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 7d512733c6c..08fde6dcc8f 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -154,13 +154,6 @@ pub enum BlockProcessType { } impl BlockProcessType { - pub fn component(&self) -> &'static str { - match self { - BlockProcessType::SingleBlock { .. } => "block", - BlockProcessType::SingleBlob { .. } => "blob", - } - } - pub fn id(&self) -> Id { match self { BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => *id, From 4726cbd9819dc5b0484d23d82514637baa2ab4ef Mon Sep 17 00:00:00 2001 From: realbigsean Date: Tue, 30 Apr 2024 10:42:56 -0400 Subject: [PATCH 12/14] fix test --- .../network/src/sync/block_lookups/tests.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index 699f581eebe..302a0489c3b 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -1385,15 +1385,6 @@ mod deneb_only { self } - fn set_block_id_for_import(mut self) -> Self { - let lookup_id = self.rig.find_single_lookup_for(self.block_root); - self.block_req_id = Some(SingleLookupReqId { - lookup_id, - req_id: 0, - }); - self - } - fn parent_block_response(mut self) -> Self { self.rig.expect_empty_network(); let block = self.parent_block.pop_front().unwrap().clone(); @@ -1549,7 +1540,10 @@ mod deneb_only { // Missing blobs should be the request is not removed, the outstanding blobs request should // mean we do not send a new request. self.rig.single_block_component_processed( - self.block_req_id.expect("block request id").lookup_id, + self.block_req_id + .or(self.blob_req_id) + .expect("block request id") + .lookup_id, BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(self.block_root)), ); self.rig.expect_empty_network(); @@ -2040,8 +2034,6 @@ mod deneb_only { .expect_block_process() .parent_blob_imported() .log("resolve original block trigger blobs request and import") - .blobs_response() - .set_block_id_for_import() .block_imported() .expect_no_active_lookups(); } From 3544fbde94bba56a8560b5223a027803383589b2 Mon Sep 17 00:00:00 2001 From: realbigsean Date: Tue, 30 Apr 2024 11:08:58 -0400 Subject: [PATCH 13/14] add comment describing behavior of avail check error --- beacon_node/network/src/sync/block_lookups/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index d5b1a80592d..a0c7c33bb0f 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -498,6 +498,11 @@ impl BlockLookups { { // There errors indicate internal problems and should not downscore the peer warn!(self.log, "Internal availability check failure"; "block_root" => %block_root, "error" => ?e); + + // Here we choose *not* to call `on_processing_failure` because this could result in a bad + // lookup state transition. This error invalidates both blob and block requests, and we don't know the + // state of both requests. Blobs may have already successfullly processed for example. + // We opt to drop the lookup instead. Action::Drop } other => { From 49a9e396e5da415ea2764eab484b6f273d75e533 Mon Sep 17 00:00:00 2001 From: realbigsean Date: Tue, 30 Apr 2024 11:13:03 -0400 Subject: [PATCH 14/14] =?UTF-8?q?=C2=A0update=20ordering?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../beacon_chain/src/blob_verification.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 263b9f9e013..fdf8ee2b971 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -571,6 +571,14 @@ pub fn validate_blob_sidecar_for_gossip( }); } + // Kzg verification for gossip blob sidecar + let kzg = chain + .kzg + .as_ref() + .ok_or(GossipBlobError::KzgNotInitialized)?; + let kzg_verified_blob = KzgVerifiedBlob::new(blob_sidecar.clone(), kzg, seen_timestamp) + .map_err(GossipBlobError::KzgError)?; + chain .observed_slashable .write() @@ -605,14 +613,6 @@ pub fn validate_blob_sidecar_for_gossip( }); } - // Kzg verification for gossip blob sidecar - let kzg = chain - .kzg - .as_ref() - .ok_or(GossipBlobError::KzgNotInitialized)?; - let kzg_verified_blob = KzgVerifiedBlob::new(blob_sidecar, kzg, seen_timestamp) - .map_err(GossipBlobError::KzgError)?; - Ok(GossipVerifiedBlob { block_root, blob: kzg_verified_blob,