From ba891e1fed8379f1410f50ce56eccc05f25daac2 Mon Sep 17 00:00:00 2001 From: realbigsean Date: Wed, 25 Oct 2023 15:30:17 +0000 Subject: [PATCH] deneb related logging improvements (#4859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Add commitments to logs and update the `Display` implementation of `KzgCommitment` to become truncated similarly to block root. I've been finding it difficult to debug scenarios involving multiple blobs for the same `(index, block_root)`. Logging the commitment will help with this, we can match it to what exists in the block. Example output: ``` Oct 20 21:13:36.700 DEBG Successfully verified gossip blob commitment: 0xa3c1…1cd8, index: 0, root: 0xf31e…f9de, slot: 154568 Oct 20 21:13:36.785 DEBG Successfully verified gossip block commitments: [0xa3c1…1cd8, 0x8655…02ff, 0x8d6a…955a, 0x84ac…3a1b, 0x9752…629b, 0xb9fc…20fb], root: 0xf31eeb732702e429e89057b15e1c0c631e8452e09e03cb1924353f536ef4f9de, slot: 154568, graffiti: teku/besu, service: beacon ``` Example output in a block with no blobs (this will show up pre-deneb): ``` 426734:Oct 20 21:15:24.113 DEBG Successfully verified gossip block, commitments: [], root: 0x619db1360ba0e8d44ae2a0f2450ebca47e167191feecffcfac0e8d7b6c39623c, slot: 154577, graffiti: teku/nethermind, service: beacon, module: beacon_chain::beacon_chain:2765 ``` 2. Remove `strum::IntoStaticStr` from `AvailabilityCheckError`. This is because `IntoStaticStr` end up dropping information inside the enum. So kzg commitments in this error are dropped, making it more difficult to debug ``` AvailabilityCheckError::KzgCommitmentMismatch { blob_commitment: KzgCommitment, block_commitment: KzgCommitment, }, ``` which is output as just `AvailabilityCheckError` 3. Some additional misc sync logs I found useful in debugging https://github.com/sigp/lighthouse/pull/4869 4. This downgrades ”Block returned for single block lookup not present” to debug because I don’t think we can fix the scenario that causes this unless we can cancel inflight rpc requests Co-authored-by: realbigsean --- beacon_node/beacon_chain/src/beacon_chain.rs | 2 ++ .../src/block_verification_types.rs | 7 ++++ .../src/data_availability_checker/error.rs | 3 +- .../gossip_methods.rs | 18 ++++++---- .../network_beacon_processor/rpc_methods.rs | 13 ++++++-- .../network_beacon_processor/sync_methods.rs | 32 ++++++++++++++++-- .../network/src/sync/block_lookups/mod.rs | 4 ++- .../src/sync/block_sidecar_coupling.rs | 12 +++---- beacon_node/network/src/sync/manager.rs | 13 +++++--- .../network/src/sync/network_context.rs | 33 +++++++++++++++++-- .../network/src/sync/range_sync/chain.rs | 24 +++++++++++++- consensus/types/src/beacon_block_body.rs | 8 +++++ consensus/types/src/signed_beacon_block.rs | 10 ++++++ crypto/kzg/src/kzg_commitment.rs | 33 +++++++++++++++++-- 14 files changed, 181 insertions(+), 31 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index e5c0499c9ad..2640d537475 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2761,12 +2761,14 @@ impl BeaconChain { match GossipVerifiedBlock::new(block, &chain) { Ok(verified) => { + let commitments_formatted = verified.block.commitments_formatted(); debug!( chain.log, "Successfully verified gossip block"; "graffiti" => graffiti_string, "slot" => slot, "root" => ?verified.block_root(), + "commitments" => commitments_formatted, ); Ok(verified) diff --git a/beacon_node/beacon_chain/src/block_verification_types.rs b/beacon_node/beacon_chain/src/block_verification_types.rs index 3dfd45b007a..d236e94f939 100644 --- a/beacon_node/beacon_chain/src/block_verification_types.rs +++ b/beacon_node/beacon_chain/src/block_verification_types.rs @@ -38,6 +38,13 @@ impl RpcBlock { pub fn block_root(&self) -> Hash256 { self.block_root } + + pub fn as_block(&self) -> &SignedBeaconBlock { + match &self.block { + RpcBlockInner::Block(block) => block, + RpcBlockInner::BlockAndBlobs(block, _) => block, + } + } } /// Note: This variant is intentionally private because we want to safely construct the diff --git a/beacon_node/beacon_chain/src/data_availability_checker/error.rs b/beacon_node/beacon_chain/src/data_availability_checker/error.rs index 5415d1f9588..b2979f2bf04 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/error.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/error.rs @@ -1,8 +1,7 @@ use kzg::{Error as KzgError, KzgCommitment}; -use strum::IntoStaticStr; use types::{BeaconStateError, Hash256}; -#[derive(Debug, IntoStaticStr)] +#[derive(Debug)] pub enum Error { Kzg(KzgError), KzgNotInitialized, diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index 9fe64d159f2..82daf74efe0 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -613,6 +613,7 @@ impl NetworkBeaconProcessor { let slot = signed_blob.message.slot; let root = signed_blob.message.block_root; let index = signed_blob.message.index; + let commitment = signed_blob.message.kzg_commitment; let delay = get_slot_delay_ms(seen_duration, slot, &self.chain.slot_clock); // Log metrics to track delay from other nodes on the network. metrics::observe_duration(&metrics::BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME, delay); @@ -633,6 +634,7 @@ impl NetworkBeaconProcessor { "proposer_index" => gossip_verified_blob.proposer_index(), "slot" => gossip_verified_blob.slot(), "delay" => ?delay, + "commitment" => %gossip_verified_blob.kzg_commitment(), ); } @@ -641,7 +643,8 @@ impl NetworkBeaconProcessor { "Successfully verified gossip blob"; "slot" => %slot, "root" => %root, - "index" => %index + "index" => %index, + "commitment" => %gossip_verified_blob.kzg_commitment(), ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept); @@ -668,7 +671,8 @@ impl NetworkBeaconProcessor { "Unknown parent hash for blob"; "action" => "requesting parent", "block_root" => %blob.block_root, - "parent_root" => %blob.block_parent_root + "parent_root" => %blob.block_parent_root, + "commitment" => %commitment, ); self.send_sync_message(SyncMessage::UnknownParentBlob(peer_id, blob)); } @@ -683,7 +687,8 @@ impl NetworkBeaconProcessor { "error" => ?err, "slot" => %slot, "root" => %root, - "index" => %index + "index" => %index, + "commitment" => %commitment, ); // Prevent recurring behaviour by penalizing the peer slightly. self.gossip_penalize_peer( @@ -707,7 +712,8 @@ impl NetworkBeaconProcessor { "error" => ?err, "slot" => %slot, "root" => %root, - "index" => %index + "index" => %index, + "commitment" => %commitment, ); // Prevent recurring behaviour by penalizing the peer slightly. self.gossip_penalize_peer( @@ -743,13 +749,13 @@ impl NetworkBeaconProcessor { .should_delay_lookup(blob_slot); match self.chain.process_gossip_blob(verified_blob).await { - Ok(AvailabilityProcessingStatus::Imported(hash)) => { + Ok(AvailabilityProcessingStatus::Imported(block_root)) => { // Note: Reusing block imported metric here metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_IMPORTED_TOTAL); info!( self.log, "Gossipsub blob processed, imported fully available block"; - "hash" => %hash + "block_root" => %block_root ); self.chain.recompute_head_at_current_slot().await; } diff --git a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs index bc35c059c84..6d569112f39 100644 --- a/beacon_node/network/src/network_beacon_processor/rpc_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/rpc_methods.rs @@ -224,7 +224,15 @@ impl NetworkBeaconProcessor { request_id: PeerRequestId, request: BlobsByRootRequest, ) { - let requested_blobs = request.blob_ids.len(); + let Some(requested_root) = request.blob_ids.get(0).map(|id| id.block_root) else { + // No blob ids requested. + return; + }; + let requested_indices = request + .blob_ids + .iter() + .map(|id| id.index) + .collect::>(); let mut send_blob_count = 0; let send_response = true; @@ -277,7 +285,8 @@ impl NetworkBeaconProcessor { self.log, "Received BlobsByRoot Request"; "peer" => %peer_id, - "requested" => requested_blobs, + "request_root" => %requested_root, + "request_indices" => ?requested_indices, "returned" => send_blob_count ); diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index 9ecc21c9e84..0e74cb08c80 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -24,7 +24,9 @@ use slot_clock::SlotClock; use std::sync::Arc; use std::time::Duration; use std::time::{SystemTime, UNIX_EPOCH}; +use store::KzgCommitment; use tokio::sync::mpsc; +use types::beacon_block_body::format_kzg_commitments; use types::blob_sidecar::FixedBlobSidecarList; use types::{Epoch, Hash256, Slot}; @@ -212,6 +214,16 @@ impl NetworkBeaconProcessor { let slot = block.slot(); let parent_root = block.message().parent_root(); + let commitments_formatted = block.as_block().commitments_formatted(); + + debug!( + self.log, + "Processing RPC block"; + "block_root" => ?block_root, + "proposer" => block.message().proposer_index(), + "slot" => block.slot(), + "commitments" => commitments_formatted, + ); let result = self .chain @@ -288,10 +300,15 @@ impl NetworkBeaconProcessor { return; }; - let indices: Vec<_> = blobs + let (indices, commitments): (Vec, Vec) = blobs .iter() - .filter_map(|blob_opt| blob_opt.as_ref().map(|blob| blob.index)) - .collect(); + .filter_map(|blob_opt| { + blob_opt + .as_ref() + .map(|blob| (blob.index, blob.kzg_commitment)) + }) + .unzip(); + let commitments = format_kzg_commitments(&commitments); debug!( self.log, @@ -299,6 +316,7 @@ impl NetworkBeaconProcessor { "indices" => ?indices, "block_root" => %block_root, "slot" => %slot, + "commitments" => commitments, ); if let Ok(current_slot) = self.chain.slot() { @@ -332,6 +350,14 @@ impl NetworkBeaconProcessor { "slot" => %slot, ); } + Err(BlockError::BlockIsAlreadyKnown) => { + debug!( + self.log, + "Blobs have already been imported"; + "block_hash" => %block_root, + "slot" => %slot, + ); + } Err(e) => { warn!( self.log, diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 3f1ecd129b6..92cb115f40d 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -330,7 +330,9 @@ impl BlockLookups { let Some(lookup) = self.get_single_lookup::(lookup_id) else { if response.is_some() { - warn!( + // We don't have the ability to cancel in-flight RPC requests. So this can happen + // if we started this RPC request, and later saw the block/blobs via gossip. + debug!( self.log, "Block returned for single block lookup not present"; "response_type" => ?response_type, diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs index 100e84a6bbc..819ea8e30bd 100644 --- a/beacon_node/network/src/sync/block_sidecar_coupling.rs +++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs @@ -30,7 +30,7 @@ impl BlocksAndBlobsRequestInfo { } } - pub fn into_responses(self) -> Result>, &'static str> { + pub fn into_responses(self) -> Result>, String> { let BlocksAndBlobsRequestInfo { accumulated_blocks, accumulated_sidecars, @@ -50,28 +50,28 @@ impl BlocksAndBlobsRequestInfo { .unwrap_or(false); pair_next_blob } { - blob_list.push(blob_iter.next().ok_or("Missing next blob")?); + blob_list.push(blob_iter.next().ok_or("Missing next blob".to_string())?); } let mut blobs_buffer = vec![None; T::max_blobs_per_block()]; for blob in blob_list { let blob_index = blob.index as usize; let Some(blob_opt) = blobs_buffer.get_mut(blob_index) else { - return Err("Invalid blob index"); + return Err("Invalid blob index".to_string()); }; if blob_opt.is_some() { - return Err("Repeat blob index"); + return Err("Repeat blob index".to_string()); } else { *blob_opt = Some(blob); } } let blobs = VariableList::from(blobs_buffer.into_iter().flatten().collect::>()); - responses.push(RpcBlock::new(None, block, Some(blobs))?) + responses.push(RpcBlock::new(None, block, Some(blobs)).map_err(|e| format!("{e:?}"))?) } // if accumulated sidecars is not empty, throw an error. if blob_iter.next().is_some() { - return Err("Received sidecars that don't pair well"); + return Err("Received sidecars that don't pair well".to_string()); } Ok(responses) diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index ccd765f9bc5..90fa169292b 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -1034,8 +1034,11 @@ impl SyncManager { // inform range that the request needs to be treated as failed // With time we will want to downgrade this log warn!( - self.log, "Blocks and blobs request for range received invalid data"; - "peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e + self.log, + "Blocks and blobs request for range received invalid data"; + "peer_id" => %peer_id, + "batch_id" => resp.batch_id, + "error" => e.clone() ); let id = RequestId::RangeBlockAndBlobs { id }; self.network.report_peer( @@ -1043,7 +1046,7 @@ impl SyncManager { PeerAction::MidToleranceError, "block_blob_faulty_batch", ); - self.inject_error(peer_id, id, RPCError::InvalidData(e.into())) + self.inject_error(peer_id, id, RPCError::InvalidData(e)) } } } @@ -1091,7 +1094,7 @@ impl SyncManager { // With time we will want to downgrade this log warn!( self.log, "Blocks and blobs request for backfill received invalid data"; - "peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e + "peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e.clone() ); let id = RequestId::BackFillBlockAndBlobs { id }; self.network.report_peer( @@ -1099,7 +1102,7 @@ impl SyncManager { PeerAction::MidToleranceError, "block_blob_faulty_backfill_batch", ); - self.inject_error(peer_id, id, RPCError::InvalidData(e.into())) + self.inject_error(peer_id, id, RPCError::InvalidData(e)) } } } diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index f7779cb76d1..ab18c0abd0a 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -23,7 +23,7 @@ use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; pub struct BlocksAndBlobsByRangeResponse { pub batch_id: BatchId, - pub responses: Result>, &'static str>, + pub responses: Result>, String>, } pub struct BlocksAndBlobsByRangeRequest { @@ -338,12 +338,26 @@ impl SyncNetworkContext { request_id: Id, batch_type: ByRangeRequestType, ) -> Option<(ChainId, BatchId)> { - match batch_type { + let req = match batch_type { ByRangeRequestType::BlocksAndBlobs => self .range_blocks_and_blobs_requests .remove(&request_id) .map(|req| (req.chain_id, req.batch_id)), ByRangeRequestType::Blocks => self.range_requests.remove(&request_id), + }; + if let Some(req) = req { + debug!( + self.log, + "Range sync request failed"; + "request_id" => request_id, + "batch_type" => ?batch_type, + "chain_id" => ?req.0, + "batch_id" => ?req.1 + ); + Some(req) + } else { + debug!(self.log, "Range sync request failed"; "request_id" => request_id, "batch_type" => ?batch_type); + None } } @@ -352,12 +366,25 @@ impl SyncNetworkContext { request_id: Id, batch_type: ByRangeRequestType, ) -> Option { - match batch_type { + let batch_id = match batch_type { ByRangeRequestType::BlocksAndBlobs => self .backfill_blocks_and_blobs_requests .remove(&request_id) .map(|(batch_id, _info)| batch_id), ByRangeRequestType::Blocks => self.backfill_requests.remove(&request_id), + }; + if let Some(batch_id) = batch_id { + debug!( + self.log, + "Backfill sync request failed"; + "request_id" => request_id, + "batch_type" => ?batch_type, + "batch_id" => ?batch_id + ); + Some(batch_id) + } else { + debug!(self.log, "Backfill sync request failed"; "request_id" => request_id, "batch_type" => ?batch_type); + None } } diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index 4d399b5cb99..9386d29e0fe 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -827,9 +827,24 @@ impl SyncingChain { // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer if !batch.is_expecting_block(peer_id, &request_id) { + debug!( + self.log, + "Batch not expecting block"; + "batch_epoch" => batch_id, + "batch_state" => ?batch.state(), + "peer_id" => %peer_id, + "request_id" => %request_id + ); return Ok(KeepChain); } - debug!(self.log, "Batch failed. RPC Error"; "batch_epoch" => batch_id); + debug!( + self.log, + "Batch failed. RPC Error"; + "batch_epoch" => batch_id, + "batch_state" => ?batch.state(), + "peer_id" => %peer_id, + "request_id" => %request_id + ); if let Some(active_requests) = self.peers.get_mut(peer_id) { active_requests.remove(&batch_id); } @@ -841,6 +856,13 @@ impl SyncingChain { } self.retry_batch_download(network, batch_id) } else { + debug!( + self.log, + "Batch not found"; + "batch_epoch" => batch_id, + "peer_id" => %peer_id, + "request_id" => %request_id + ); // this could be an error for an old batch, removed when the chain advances Ok(KeepChain) } diff --git a/consensus/types/src/beacon_block_body.rs b/consensus/types/src/beacon_block_body.rs index ba609387409..2f7c6891e4c 100644 --- a/consensus/types/src/beacon_block_body.rs +++ b/consensus/types/src/beacon_block_body.rs @@ -508,6 +508,14 @@ impl From>> } } +/// Util method helpful for logging. +pub fn format_kzg_commitments(commitments: &[KzgCommitment]) -> String { + let commitment_strings: Vec = commitments.iter().map(|x| x.to_string()).collect(); + let commitments_joined = commitment_strings.join(", "); + let surrounded_commitments = format!("[{}]", commitments_joined); + surrounded_commitments +} + #[cfg(test)] mod tests { mod base { diff --git a/consensus/types/src/signed_beacon_block.rs b/consensus/types/src/signed_beacon_block.rs index 11009457fda..24b5e27c8c3 100644 --- a/consensus/types/src/signed_beacon_block.rs +++ b/consensus/types/src/signed_beacon_block.rs @@ -1,3 +1,4 @@ +use crate::beacon_block_body::format_kzg_commitments; use crate::*; use bls::Signature; use derivative::Derivative; @@ -256,6 +257,15 @@ impl> SignedBeaconBlock .map(|c| c.len()) .unwrap_or(0) } + + /// Used for displaying commitments in logs. + pub fn commitments_formatted(&self) -> String { + let Ok(commitments) = self.message().body().blob_kzg_commitments() else { + return "[]".to_string(); + }; + + format_kzg_commitments(commitments.as_ref()) + } } // We can convert pre-Bellatrix blocks without payloads into blocks with payloads. diff --git a/crypto/kzg/src/kzg_commitment.rs b/crypto/kzg/src/kzg_commitment.rs index 04ecce0b30f..cfab09f63e7 100644 --- a/crypto/kzg/src/kzg_commitment.rs +++ b/crypto/kzg/src/kzg_commitment.rs @@ -36,7 +36,15 @@ impl From for c_kzg::Bytes48 { impl Display for KzgCommitment { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{}", serde_utils::hex::encode(self.0)) + write!(f, "0x")?; + for i in &self.0[0..2] { + write!(f, "{:02x}", i)?; + } + write!(f, "…")?; + for i in &self.0[BYTES_PER_COMMITMENT - 2..BYTES_PER_COMMITMENT] { + write!(f, "{:02x}", i)?; + } + Ok(()) } } @@ -63,7 +71,7 @@ impl Serialize for KzgCommitment { where S: Serializer, { - serializer.serialize_str(&self.to_string()) + serializer.serialize_str(&format!("{:?}", self)) } } @@ -113,3 +121,24 @@ impl arbitrary::Arbitrary<'_> for KzgCommitment { Ok(KzgCommitment(bytes)) } } + +#[test] +fn kzg_commitment_display() { + let display_commitment_str = "0x53fa…adac"; + let display_commitment = KzgCommitment::from_str( + "0x53fa09af35d1d1a9e76f65e16112a9064ce30d1e4e2df98583f0f5dc2e7dd13a4f421a9c89f518fafd952df76f23adac", + ) + .unwrap() + .to_string(); + + assert_eq!(display_commitment, display_commitment_str); +} + +#[test] +fn kzg_commitment_debug() { + let debug_commitment_str = + "0x53fa09af35d1d1a9e76f65e16112a9064ce30d1e4e2df98583f0f5dc2e7dd13a4f421a9c89f518fafd952df76f23adac"; + let debug_commitment = KzgCommitment::from_str(debug_commitment_str).unwrap(); + + assert_eq!(format!("{debug_commitment:?}"), debug_commitment_str); +}