diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs index 81c8f662ee9..fb813723920 100644 --- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs @@ -134,13 +134,37 @@ impl NetworkBeaconProcessor { request_id: PeerRequestId, request: BlocksByRootRequest, ) { + self.terminate_response_stream( + peer_id, + request_id, + self.clone() + .handle_blocks_by_root_request_inner(executor, peer_id, request_id, request) + .await, + Response::BlocksByRoot, + ); + } + + /// Handle a `BlocksByRoot` request from the peer. + pub async fn handle_blocks_by_root_request_inner( + self: Arc, + executor: TaskExecutor, + peer_id: PeerId, + request_id: PeerRequestId, + request: BlocksByRootRequest, + ) -> Result<(), (RPCResponseErrorCode, &'static str)> { let requested_blocks = request.block_roots().len(); let mut block_stream = match self .chain .get_blocks_checking_caches(request.block_roots().to_vec(), &executor) { Ok(block_stream) => block_stream, - Err(e) => return error!(self.log, "Error getting block stream"; "error" => ?e), + Err(e) => { + error!(self.log, "Error getting block stream"; "error" => ?e); + return Err(( + RPCResponseErrorCode::ServerError, + "Error getting block stream", + )); + } }; // Fetching blocks is async because it may have to hit the execution layer for payloads. let mut send_block_count = 0; @@ -169,13 +193,10 @@ impl NetworkBeaconProcessor { "block_root" => ?root, "reason" => "execution layer not synced", ); - // send the stream terminator - return self.send_error_response( - peer_id, + return Err(( RPCResponseErrorCode::ResourceUnavailable, - "Execution layer not synced".into(), - request_id, - ); + "Execution layer not synced", + )); } Err(e) => { debug!( @@ -196,8 +217,7 @@ impl NetworkBeaconProcessor { "returned" => %send_block_count ); - // send stream termination - self.send_response(peer_id, Response::BlocksByRoot(None), request_id); + Ok(()) } /// Handle a `BlobsByRoot` request from the peer. @@ -380,6 +400,24 @@ impl NetworkBeaconProcessor { request_id: PeerRequestId, req: BlocksByRangeRequest, ) { + self.terminate_response_stream( + peer_id, + request_id, + self.clone() + .handle_blocks_by_range_request_inner(executor, peer_id, request_id, req) + .await, + Response::BlocksByRange, + ); + } + + /// Handle a `BlocksByRange` request from the peer. + pub async fn handle_blocks_by_range_request_inner( + self: Arc, + executor: TaskExecutor, + peer_id: PeerId, + request_id: PeerRequestId, + req: BlocksByRangeRequest, + ) -> Result<(), (RPCResponseErrorCode, &'static str)> { debug!(self.log, "Received BlocksByRange Request"; "peer_id" => %peer_id, "count" => req.count(), @@ -401,12 +439,10 @@ impl NetworkBeaconProcessor { } }); if *req.count() > max_request_size { - return self.send_error_response( - peer_id, + return Err(( RPCResponseErrorCode::InvalidRequest, - format!("Request exceeded max size {max_request_size}"), - request_id, - ); + "Request exceeded max size", + )); } let forwards_block_root_iter = match self @@ -424,25 +460,15 @@ impl NetworkBeaconProcessor { "requested_slot" => slot, "oldest_known_slot" => oldest_block_slot ); - return self.send_error_response( - peer_id, - RPCResponseErrorCode::ResourceUnavailable, - "Backfilling".into(), - request_id, - ); + return Err((RPCResponseErrorCode::ResourceUnavailable, "Backfilling")); } Err(e) => { - self.send_error_response( - peer_id, - RPCResponseErrorCode::ServerError, - "Database error".into(), - request_id, - ); - return error!(self.log, "Unable to obtain root iter"; + error!(self.log, "Unable to obtain root iter"; "request" => ?req, "peer" => %peer_id, "error" => ?e ); + return Err((RPCResponseErrorCode::ServerError, "Database error")); } }; @@ -468,11 +494,12 @@ impl NetworkBeaconProcessor { let block_roots = match maybe_block_roots { Ok(block_roots) => block_roots, Err(e) => { - return error!(self.log, "Error during iteration over blocks"; + error!(self.log, "Error during iteration over blocks"; "request" => ?req, "peer" => %peer_id, "error" => ?e - ) + ); + return Err((RPCResponseErrorCode::ServerError, "Iteration error")); } }; @@ -481,7 +508,10 @@ impl NetworkBeaconProcessor { let mut block_stream = match self.chain.get_blocks(block_roots, &executor) { Ok(block_stream) => block_stream, - Err(e) => return error!(self.log, "Error getting block stream"; "error" => ?e), + Err(e) => { + error!(self.log, "Error getting block stream"; "error" => ?e); + return Err((RPCResponseErrorCode::ServerError, "Iterator error")); + } }; // Fetching blocks is async because it may have to hit the execution layer for payloads. @@ -511,12 +541,7 @@ impl NetworkBeaconProcessor { "peer" => %peer_id, "request_root" => ?root ); - return self.send_error_response( - peer_id, - RPCResponseErrorCode::ServerError, - "Database inconsistency".into(), - request_id, - ); + return Err((RPCResponseErrorCode::ServerError, "Database inconsistency")); } Err(BeaconChainError::BlockHashMissingFromExecutionLayer(_)) => { debug!( @@ -526,12 +551,10 @@ impl NetworkBeaconProcessor { "reason" => "execution layer not synced", ); // send the stream terminator - return self.send_error_response( - peer_id, + return Err(( RPCResponseErrorCode::ResourceUnavailable, - "Execution layer not synced".into(), - request_id, - ); + "Execution layer not synced", + )); } Err(e) => { if matches!( @@ -556,12 +579,7 @@ impl NetworkBeaconProcessor { } // send the stream terminator - return self.send_error_response( - peer_id, - RPCResponseErrorCode::ServerError, - "Failed fetching blocks".into(), - request_id, - ); + return Err((RPCResponseErrorCode::ServerError, "Failed fetching blocks")); } } } @@ -594,12 +612,7 @@ impl NetworkBeaconProcessor { ); } - // send the stream terminator - self.send_network_message(NetworkMessage::SendResponse { - peer_id, - response: Response::BlocksByRange(None), - id: request_id, - }); + Ok(()) } /// Handle a `BlobsByRange` request from the peer. diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 86109a38ac2..998bcc855a5 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -44,6 +44,13 @@ pub type DownloadedBlock = (Hash256, RpcBlock); const FAILED_CHAINS_CACHE_EXPIRY_SECONDS: u64 = 60; pub const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 3; +enum Action { + Retry, + ParentUnknown { parent_root: Hash256, slot: Slot }, + Drop, + Continue, +} + pub struct BlockLookups { /// Parent chain lookups being downloaded. parent_lookups: SmallVec<[ParentLookup; 3]>, @@ -715,7 +722,7 @@ impl BlockLookups { return; }; - let root = lookup.block_root(); + let block_root = lookup.block_root(); let request_state = R::request_state_mut(&mut lookup); let peer_id = match request_state.get_state().processing_peer() { @@ -729,28 +736,49 @@ impl BlockLookups { self.log, "Block component processed for lookup"; "response_type" => ?R::response_type(), - "block_root" => ?root, + "block_root" => ?block_root, "result" => ?result, "id" => target_id, ); - match result { - BlockProcessingResult::Ok(status) => match status { - AvailabilityProcessingStatus::Imported(root) => { - trace!(self.log, "Single block processing succeeded"; "block" => %root); - } - AvailabilityProcessingStatus::MissingComponents(_, _block_root) => { - match self.handle_missing_components::(cx, &mut lookup) { - Ok(()) => { - self.single_block_lookups.insert(target_id, lookup); - } - Err(e) => { - // Drop with an additional error. - warn!(self.log, "Single block lookup failed"; "block" => %root, "error" => ?e); - } - } + let action = match result { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(_)) + | BlockProcessingResult::Err(BlockError::BlockIsAlreadyKnown { .. }) => { + // Successfully imported + trace!(self.log, "Single block processing succeeded"; "block" => %block_root); + Action::Drop + } + + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + _, + _block_root, + )) => { + // `on_processing_success` is called here to ensure the request state is updated prior to checking + // if both components have been processed. + if R::request_state_mut(&mut lookup) + .get_state_mut() + .on_processing_success() + .is_err() + { + warn!( + self.log, + "Single block processing state incorrect"; + "action" => "dropping single block request" + ); + Action::Drop + // If this was the result of a block request, we can't determined if the block peer did anything + // wrong. If we already had both a block and blobs response processed, we should penalize the + // blobs peer because they did not provide all blobs on the initial request. + } else if lookup.both_components_processed() { + lookup.penalize_blob_peer(cx); + + // Try it again if possible. + lookup.blob_request_state.state.on_processing_failure(); + Action::Retry + } else { + Action::Continue } - }, + } BlockProcessingResult::Ignored => { // Beacon processor signalled to ignore the block processing result. // This implies that the cpu is overloaded. Drop the request. @@ -759,117 +787,88 @@ impl BlockLookups { "Single block processing was ignored, cpu might be overloaded"; "action" => "dropping single block request" ); + Action::Drop } BlockProcessingResult::Err(e) => { - match self.handle_single_lookup_block_error(cx, lookup, peer_id, e) { - Ok(Some(lookup)) => { - self.single_block_lookups.insert(target_id, lookup); + let root = lookup.block_root(); + trace!(self.log, "Single block processing failed"; "block" => %root, "error" => %e); + match e { + BlockError::BeaconChainError(e) => { + // Internal error + error!(self.log, "Beacon chain error processing single block"; "block_root" => %root, "error" => ?e); + Action::Drop } - Ok(None) => { - // Drop without an additional error. + BlockError::ParentUnknown(block) => { + let slot = block.slot(); + let parent_root = block.parent_root(); + lookup.add_child_components(block.into()); + Action::ParentUnknown { parent_root, slot } } - Err(e) => { - // Drop with an additional error. - warn!(self.log, "Single block lookup failed"; "block" => %root, "error" => ?e); + ref e @ BlockError::ExecutionPayloadError(ref epe) if !epe.penalize_peer() => { + // These errors indicate that the execution layer is offline + // and failed to validate the execution payload. Do not downscore peer. + debug!( + self.log, + "Single block lookup failed. Execution layer is offline / unsynced / misconfigured"; + "root" => %root, + "error" => ?e + ); + Action::Drop + } + BlockError::AvailabilityCheck(e) => match e.category() { + AvailabilityCheckErrorCategory::Internal => { + warn!(self.log, "Internal availability check failure"; "root" => %root, "peer_id" => %peer_id, "error" => ?e); + lookup.block_request_state.state.on_download_failure(); + lookup.blob_request_state.state.on_download_failure(); + Action::Retry + } + AvailabilityCheckErrorCategory::Malicious => { + warn!(self.log, "Availability check failure"; "root" => %root, "peer_id" => %peer_id, "error" => ?e); + lookup.handle_availability_check_failure(cx); + Action::Retry + } + }, + other => { + warn!(self.log, "Peer sent invalid block in single block lookup"; "root" => %root, "error" => ?other, "peer_id" => %peer_id); + if let Ok(block_peer) = lookup.block_request_state.state.processing_peer() { + cx.report_peer( + block_peer, + PeerAction::MidToleranceError, + "single_block_failure", + ); + + lookup.block_request_state.state.on_processing_failure(); + } + Action::Retry } } } }; - } - /// Handles a `MissingComponents` block processing error. Handles peer scoring and retries. - /// - /// If this was the result of a block request, we can't determined if the block peer did anything - /// wrong. If we already had both a block and blobs response processed, we should penalize the - /// blobs peer because they did not provide all blobs on the initial request. - fn handle_missing_components>( - &self, - cx: &mut SyncNetworkContext, - lookup: &mut SingleBlockLookup, - ) -> Result<(), LookupRequestError> { - R::request_state_mut(lookup) - .get_state_mut() - .on_processing_success() - .map_err(LookupRequestError::BadState)?; - - if lookup.both_components_processed() { - lookup.penalize_blob_peer(cx); - - // Try it again if possible. - lookup.blob_request_state.state.on_processing_failure(); - lookup.request_block_and_blobs(cx)?; - } - Ok(()) - } - - /// Handles peer scoring and retries related to a `BlockError` in response to a single block - /// or blob lookup processing result. - fn handle_single_lookup_block_error( - &mut self, - cx: &mut SyncNetworkContext, - mut lookup: SingleBlockLookup, - peer_id: PeerId, - e: BlockError, - ) -> Result>, LookupRequestError> { - let root = lookup.block_root(); - trace!(self.log, "Single block processing failed"; "block" => %root, "error" => %e); - match e { - BlockError::BlockIsAlreadyKnown(_) => { - // No error here - return Ok(None); - } - BlockError::BeaconChainError(e) => { - // Internal error - error!(self.log, "Beacon chain error processing single block"; "block_root" => %root, "error" => ?e); - return Ok(None); + match action { + Action::Retry => { + if let Err(e) = lookup.request_block_and_blobs(cx) { + warn!(self.log, "Single block lookup failed"; "block_root" => %block_root, "error" => ?e); + // Failed with too many retries, drop with noop + self.update_metrics(); + } else { + self.single_block_lookups.insert(target_id, lookup); + } } - BlockError::ParentUnknown(block) => { - let slot = block.slot(); - let parent_root = block.parent_root(); - lookup.add_child_components(block.into()); - lookup.request_block_and_blobs(cx)?; - self.search_parent(slot, root, parent_root, peer_id, cx); + Action::ParentUnknown { parent_root, slot } => { + // TODO: Consider including all peers from the lookup, claiming to know this block, not + // just the one that sent this specific block + self.search_parent(slot, block_root, parent_root, peer_id, cx); + self.single_block_lookups.insert(target_id, lookup); } - ref e @ BlockError::ExecutionPayloadError(ref epe) if !epe.penalize_peer() => { - // These errors indicate that the execution layer is offline - // and failed to validate the execution payload. Do not downscore peer. - debug!( - self.log, - "Single block lookup failed. Execution layer is offline / unsynced / misconfigured"; - "root" => %root, - "error" => ?e - ); - return Ok(None); + Action::Drop => { + // drop with noop + self.update_metrics(); } - BlockError::AvailabilityCheck(e) => match e.category() { - AvailabilityCheckErrorCategory::Internal => { - warn!(self.log, "Internal availability check failure"; "root" => %root, "peer_id" => %peer_id, "error" => ?e); - lookup.block_request_state.state.on_download_failure(); - lookup.blob_request_state.state.on_download_failure(); - lookup.request_block_and_blobs(cx)? - } - AvailabilityCheckErrorCategory::Malicious => { - warn!(self.log, "Availability check failure"; "root" => %root, "peer_id" => %peer_id, "error" => ?e); - lookup.handle_availability_check_failure(cx); - lookup.request_block_and_blobs(cx)? - } - }, - other => { - warn!(self.log, "Peer sent invalid block in single block lookup"; "root" => %root, "error" => ?other, "peer_id" => %peer_id); - if let Ok(block_peer) = lookup.block_request_state.state.processing_peer() { - cx.report_peer( - block_peer, - PeerAction::MidToleranceError, - "single_block_failure", - ); - - // Try it again if possible. - lookup.block_request_state.state.on_processing_failure(); - lookup.request_block_and_blobs(cx)? - } + Action::Continue => { + self.single_block_lookups.insert(target_id, lookup); } } - Ok(Some(lookup)) } pub fn parent_block_processed( @@ -1349,4 +1348,16 @@ impl BlockLookups { cx.report_peer(*peer_id, action, error.into()); } } + + pub fn update_metrics(&self) { + metrics::set_gauge( + &metrics::SYNC_SINGLE_BLOCK_LOOKUPS, + self.single_block_lookups.len() as i64, + ); + + metrics::set_gauge( + &metrics::SYNC_PARENT_BLOCK_LOOKUPS, + self.parent_lookups.len() as i64, + ); + } }