Skip to content

Commit

Permalink
deneb related logging improvements (#4859)
Browse files Browse the repository at this point in the history
1. Add commitments to logs and update the `Display` implementation of `KzgCommitment` to become truncated similarly to block root.

I've been finding it difficult to debug scenarios involving multiple blobs for the same `(index, block_root)`. Logging the commitment will help with this, we can match it to what exists in the block. 

Example output:

```
Oct 20 21:13:36.700 DEBG Successfully verified gossip blob       commitment: 0xa3c1…1cd8, index: 0, root: 0xf31e…f9de, slot: 154568
Oct 20 21:13:36.785 DEBG Successfully verified gossip block      commitments: [0xa3c1…1cd8, 0x8655…02ff, 0x8d6a…955a, 0x84ac…3a1b, 0x9752…629b, 0xb9fc…20fb], root: 0xf31eeb732702e429e89057b15e1c0c631e8452e09e03cb1924353f536ef4f9de, slot: 154568, graffiti: teku/besu, service: beacon
```

Example output in a block with no blobs (this will show up pre-deneb):
```
426734:Oct 20 21:15:24.113 DEBG Successfully verified gossip block, commitments: [], root: 0x619db1360ba0e8d44ae2a0f2450ebca47e167191feecffcfac0e8d7b6c39623c, slot: 154577, graffiti: teku/nethermind, service: beacon, module: beacon_chain::beacon_chain:2765
```

2. Remove `strum::IntoStaticStr` from `AvailabilityCheckError`. This is because `IntoStaticStr` end up dropping information inside the enum. So kzg commitments in this error are dropped, making it more difficult to debug
```
AvailabilityCheckError::KzgCommitmentMismatch {
        blob_commitment: KzgCommitment,
        block_commitment: KzgCommitment,
    },
```
which is output as just `AvailabilityCheckError`

3. Some additional misc sync logs I found useful in debugging #4869

4. This downgrades ”Block returned for single block lookup not present” to debug because I don’t think we can fix the scenario that causes this unless we can cancel inflight rpc requests

Co-authored-by: realbigsean <seananderson33@gmail.com>
  • Loading branch information
realbigsean and realbigsean committed Oct 25, 2023
1 parent d4f26ee commit ba891e1
Show file tree
Hide file tree
Showing 14 changed files with 181 additions and 31 deletions.
2 changes: 2 additions & 0 deletions beacon_node/beacon_chain/src/beacon_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2761,12 +2761,14 @@ impl<T: BeaconChainTypes> BeaconChain<T> {

match GossipVerifiedBlock::new(block, &chain) {
Ok(verified) => {
let commitments_formatted = verified.block.commitments_formatted();
debug!(
chain.log,
"Successfully verified gossip block";
"graffiti" => graffiti_string,
"slot" => slot,
"root" => ?verified.block_root(),
"commitments" => commitments_formatted,
);

Ok(verified)
Expand Down
7 changes: 7 additions & 0 deletions beacon_node/beacon_chain/src/block_verification_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ impl<E: EthSpec> RpcBlock<E> {
pub fn block_root(&self) -> Hash256 {
self.block_root
}

pub fn as_block(&self) -> &SignedBeaconBlock<E> {
match &self.block {
RpcBlockInner::Block(block) => block,
RpcBlockInner::BlockAndBlobs(block, _) => block,
}
}
}

/// Note: This variant is intentionally private because we want to safely construct the
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use kzg::{Error as KzgError, KzgCommitment};
use strum::IntoStaticStr;
use types::{BeaconStateError, Hash256};

#[derive(Debug, IntoStaticStr)]
#[derive(Debug)]
pub enum Error {
Kzg(KzgError),
KzgNotInitialized,
Expand Down
18 changes: 12 additions & 6 deletions beacon_node/network/src/network_beacon_processor/gossip_methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
let slot = signed_blob.message.slot;
let root = signed_blob.message.block_root;
let index = signed_blob.message.index;
let commitment = signed_blob.message.kzg_commitment;
let delay = get_slot_delay_ms(seen_duration, slot, &self.chain.slot_clock);
// Log metrics to track delay from other nodes on the network.
metrics::observe_duration(&metrics::BEACON_BLOB_GOSSIP_SLOT_START_DELAY_TIME, delay);
Expand All @@ -633,6 +634,7 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"proposer_index" => gossip_verified_blob.proposer_index(),
"slot" => gossip_verified_blob.slot(),
"delay" => ?delay,
"commitment" => %gossip_verified_blob.kzg_commitment(),
);
}

Expand All @@ -641,7 +643,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"Successfully verified gossip blob";
"slot" => %slot,
"root" => %root,
"index" => %index
"index" => %index,
"commitment" => %gossip_verified_blob.kzg_commitment(),
);

self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
Expand All @@ -668,7 +671,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"Unknown parent hash for blob";
"action" => "requesting parent",
"block_root" => %blob.block_root,
"parent_root" => %blob.block_parent_root
"parent_root" => %blob.block_parent_root,
"commitment" => %commitment,
);
self.send_sync_message(SyncMessage::UnknownParentBlob(peer_id, blob));
}
Expand All @@ -683,7 +687,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"error" => ?err,
"slot" => %slot,
"root" => %root,
"index" => %index
"index" => %index,
"commitment" => %commitment,
);
// Prevent recurring behaviour by penalizing the peer slightly.
self.gossip_penalize_peer(
Expand All @@ -707,7 +712,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"error" => ?err,
"slot" => %slot,
"root" => %root,
"index" => %index
"index" => %index,
"commitment" => %commitment,
);
// Prevent recurring behaviour by penalizing the peer slightly.
self.gossip_penalize_peer(
Expand Down Expand Up @@ -743,13 +749,13 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
.should_delay_lookup(blob_slot);

match self.chain.process_gossip_blob(verified_blob).await {
Ok(AvailabilityProcessingStatus::Imported(hash)) => {
Ok(AvailabilityProcessingStatus::Imported(block_root)) => {
// Note: Reusing block imported metric here
metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_IMPORTED_TOTAL);
info!(
self.log,
"Gossipsub blob processed, imported fully available block";
"hash" => %hash
"block_root" => %block_root
);
self.chain.recompute_head_at_current_slot().await;
}
Expand Down
13 changes: 11 additions & 2 deletions beacon_node/network/src/network_beacon_processor/rpc_methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,15 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
request_id: PeerRequestId,
request: BlobsByRootRequest,
) {
let requested_blobs = request.blob_ids.len();
let Some(requested_root) = request.blob_ids.get(0).map(|id| id.block_root) else {
// No blob ids requested.
return;
};
let requested_indices = request
.blob_ids
.iter()
.map(|id| id.index)
.collect::<Vec<_>>();
let mut send_blob_count = 0;
let send_response = true;

Expand Down Expand Up @@ -277,7 +285,8 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
self.log,
"Received BlobsByRoot Request";
"peer" => %peer_id,
"requested" => requested_blobs,
"request_root" => %requested_root,
"request_indices" => ?requested_indices,
"returned" => send_blob_count
);

Expand Down
32 changes: 29 additions & 3 deletions beacon_node/network/src/network_beacon_processor/sync_methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ use slot_clock::SlotClock;
use std::sync::Arc;
use std::time::Duration;
use std::time::{SystemTime, UNIX_EPOCH};
use store::KzgCommitment;
use tokio::sync::mpsc;
use types::beacon_block_body::format_kzg_commitments;
use types::blob_sidecar::FixedBlobSidecarList;
use types::{Epoch, Hash256, Slot};

Expand Down Expand Up @@ -212,6 +214,16 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {

let slot = block.slot();
let parent_root = block.message().parent_root();
let commitments_formatted = block.as_block().commitments_formatted();

debug!(
self.log,
"Processing RPC block";
"block_root" => ?block_root,
"proposer" => block.message().proposer_index(),
"slot" => block.slot(),
"commitments" => commitments_formatted,
);

let result = self
.chain
Expand Down Expand Up @@ -288,17 +300,23 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
return;
};

let indices: Vec<_> = blobs
let (indices, commitments): (Vec<u64>, Vec<KzgCommitment>) = blobs
.iter()
.filter_map(|blob_opt| blob_opt.as_ref().map(|blob| blob.index))
.collect();
.filter_map(|blob_opt| {
blob_opt
.as_ref()
.map(|blob| (blob.index, blob.kzg_commitment))
})
.unzip();
let commitments = format_kzg_commitments(&commitments);

debug!(
self.log,
"RPC blobs received";
"indices" => ?indices,
"block_root" => %block_root,
"slot" => %slot,
"commitments" => commitments,
);

if let Ok(current_slot) = self.chain.slot() {
Expand Down Expand Up @@ -332,6 +350,14 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
"slot" => %slot,
);
}
Err(BlockError::BlockIsAlreadyKnown) => {
debug!(
self.log,
"Blobs have already been imported";
"block_hash" => %block_root,
"slot" => %slot,
);
}
Err(e) => {
warn!(
self.log,
Expand Down
4 changes: 3 additions & 1 deletion beacon_node/network/src/sync/block_lookups/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,9 @@ impl<T: BeaconChainTypes> BlockLookups<T> {

let Some(lookup) = self.get_single_lookup::<R>(lookup_id) else {
if response.is_some() {
warn!(
// We don't have the ability to cancel in-flight RPC requests. So this can happen
// if we started this RPC request, and later saw the block/blobs via gossip.
debug!(
self.log,
"Block returned for single block lookup not present";
"response_type" => ?response_type,
Expand Down
12 changes: 6 additions & 6 deletions beacon_node/network/src/sync/block_sidecar_coupling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ impl<T: EthSpec> BlocksAndBlobsRequestInfo<T> {
}
}

pub fn into_responses(self) -> Result<Vec<RpcBlock<T>>, &'static str> {
pub fn into_responses(self) -> Result<Vec<RpcBlock<T>>, String> {
let BlocksAndBlobsRequestInfo {
accumulated_blocks,
accumulated_sidecars,
Expand All @@ -50,28 +50,28 @@ impl<T: EthSpec> BlocksAndBlobsRequestInfo<T> {
.unwrap_or(false);
pair_next_blob
} {
blob_list.push(blob_iter.next().ok_or("Missing next blob")?);
blob_list.push(blob_iter.next().ok_or("Missing next blob".to_string())?);
}

let mut blobs_buffer = vec![None; T::max_blobs_per_block()];
for blob in blob_list {
let blob_index = blob.index as usize;
let Some(blob_opt) = blobs_buffer.get_mut(blob_index) else {
return Err("Invalid blob index");
return Err("Invalid blob index".to_string());
};
if blob_opt.is_some() {
return Err("Repeat blob index");
return Err("Repeat blob index".to_string());
} else {
*blob_opt = Some(blob);
}
}
let blobs = VariableList::from(blobs_buffer.into_iter().flatten().collect::<Vec<_>>());
responses.push(RpcBlock::new(None, block, Some(blobs))?)
responses.push(RpcBlock::new(None, block, Some(blobs)).map_err(|e| format!("{e:?}"))?)
}

// if accumulated sidecars is not empty, throw an error.
if blob_iter.next().is_some() {
return Err("Received sidecars that don't pair well");
return Err("Received sidecars that don't pair well".to_string());
}

Ok(responses)
Expand Down
13 changes: 8 additions & 5 deletions beacon_node/network/src/sync/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1034,16 +1034,19 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// inform range that the request needs to be treated as failed
// With time we will want to downgrade this log
warn!(
self.log, "Blocks and blobs request for range received invalid data";
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e
self.log,
"Blocks and blobs request for range received invalid data";
"peer_id" => %peer_id,
"batch_id" => resp.batch_id,
"error" => e.clone()
);
let id = RequestId::RangeBlockAndBlobs { id };
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"block_blob_faulty_batch",
);
self.inject_error(peer_id, id, RPCError::InvalidData(e.into()))
self.inject_error(peer_id, id, RPCError::InvalidData(e))
}
}
}
Expand Down Expand Up @@ -1091,15 +1094,15 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// With time we will want to downgrade this log
warn!(
self.log, "Blocks and blobs request for backfill received invalid data";
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e.clone()
);
let id = RequestId::BackFillBlockAndBlobs { id };
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"block_blob_faulty_backfill_batch",
);
self.inject_error(peer_id, id, RPCError::InvalidData(e.into()))
self.inject_error(peer_id, id, RPCError::InvalidData(e))
}
}
}
Expand Down
33 changes: 30 additions & 3 deletions beacon_node/network/src/sync/network_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use types::{BlobSidecar, EthSpec, SignedBeaconBlock};

pub struct BlocksAndBlobsByRangeResponse<T: EthSpec> {
pub batch_id: BatchId,
pub responses: Result<Vec<RpcBlock<T>>, &'static str>,
pub responses: Result<Vec<RpcBlock<T>>, String>,
}

pub struct BlocksAndBlobsByRangeRequest<T: EthSpec> {
Expand Down Expand Up @@ -338,12 +338,26 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
request_id: Id,
batch_type: ByRangeRequestType,
) -> Option<(ChainId, BatchId)> {
match batch_type {
let req = match batch_type {
ByRangeRequestType::BlocksAndBlobs => self
.range_blocks_and_blobs_requests
.remove(&request_id)
.map(|req| (req.chain_id, req.batch_id)),
ByRangeRequestType::Blocks => self.range_requests.remove(&request_id),
};
if let Some(req) = req {
debug!(
self.log,
"Range sync request failed";
"request_id" => request_id,
"batch_type" => ?batch_type,
"chain_id" => ?req.0,
"batch_id" => ?req.1
);
Some(req)
} else {
debug!(self.log, "Range sync request failed"; "request_id" => request_id, "batch_type" => ?batch_type);
None
}
}

Expand All @@ -352,12 +366,25 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
request_id: Id,
batch_type: ByRangeRequestType,
) -> Option<BatchId> {
match batch_type {
let batch_id = match batch_type {
ByRangeRequestType::BlocksAndBlobs => self
.backfill_blocks_and_blobs_requests
.remove(&request_id)
.map(|(batch_id, _info)| batch_id),
ByRangeRequestType::Blocks => self.backfill_requests.remove(&request_id),
};
if let Some(batch_id) = batch_id {
debug!(
self.log,
"Backfill sync request failed";
"request_id" => request_id,
"batch_type" => ?batch_type,
"batch_id" => ?batch_id
);
Some(batch_id)
} else {
debug!(self.log, "Backfill sync request failed"; "request_id" => request_id, "batch_type" => ?batch_type);
None
}
}

Expand Down
Loading

0 comments on commit ba891e1

Please sign in to comment.