From a3b1ef31294ffce70278408131d82d69e92fe9e6 Mon Sep 17 00:00:00 2001 From: Age Manning Date: Mon, 29 Jul 2024 15:52:07 +1000 Subject: [PATCH] Visualisation logging for sync batch states (#6034) * Add visualization for batch states * Replace icons with emojis * Reviewers comments * Change empty emoji and improve docs comments * Fix lints * Move to letters rather than emojis * Replace 'V' with 'v'. Cargo update * Merge latest unstable * Improve docs around visualisation * Merge branch 'unstable' into sync-batch-state --- Cargo.lock | 8 +-- .../network/src/sync/range_sync/batch.rs | 21 +++++++ .../network/src/sync/range_sync/chain.rs | 60 ++++++++++++++++--- 3 files changed, 78 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 605cb4d2a58..818b6adc55d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1021,9 +1021,9 @@ dependencies = [ [[package]] name = "blst" -version = "0.3.12" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62dc83a094a71d43eeadd254b1ec2d24cb6a0bb6cadce00df51f0db594711a32" +checksum = "4378725facc195f1a538864863f6de233b500a8862747e7f165078a419d5e874" dependencies = [ "cc", "glob", @@ -5751,9 +5751,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.1" +version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" +checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" dependencies = [ "memchr", ] diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs index 49e3ac3a817..7f9629740bb 100644 --- a/beacon_node/network/src/sync/range_sync/batch.rs +++ b/beacon_node/network/src/sync/range_sync/batch.rs @@ -463,6 +463,11 @@ impl BatchInfo { } } } + + // Visualizes the state of this batch using state::visualize() + pub fn visualize(&self) -> char { + self.state.visualize() + } } /// Represents a peer's attempt and providing the result for this batch. @@ -539,3 +544,19 @@ impl std::fmt::Debug for BatchState { } } } + +impl BatchState { + /// Creates a character representation/visualization for the batch state to display in logs for quicker and + /// easier recognition + fn visualize(&self) -> char { + match self { + BatchState::Downloading(..) => 'D', + BatchState::Processing(_) => 'P', + BatchState::AwaitingValidation(_) => 'v', + BatchState::AwaitingDownload => 'd', + BatchState::Failed => 'F', + BatchState::AwaitingProcessing(..) => 'p', + BatchState::Poisoned => 'X', + } + } +} diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index d63b2f95d80..556b4194dd8 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -277,7 +277,7 @@ impl SyncingChain { let awaiting_batches = batch_id .saturating_sub(self.optimistic_start.unwrap_or(self.processing_target)) / EPOCHS_PER_BATCH; - debug!(self.log, "Completed batch received"; "epoch" => batch_id, "blocks" => received, "awaiting_batches" => awaiting_batches); + debug!(self.log, "Batch downloaded"; "epoch" => batch_id, "blocks" => received, "batch_state" => self.visualize_batch_state(), "awaiting_batches" => awaiting_batches); // pre-emptively request more blocks from peers whilst we process current blocks, self.request_batches(network)?; @@ -460,6 +460,7 @@ impl SyncingChain { ) -> ProcessingResult { // the first two cases are possible if the chain advances while waiting for a processing // result + let batch_state = self.visualize_batch_state(); let batch = match &self.current_processing_batch { Some(processing_id) if *processing_id != batch_id => { debug!(self.log, "Unexpected batch result"; @@ -492,7 +493,7 @@ impl SyncingChain { // Log the process result and the batch for debugging purposes. debug!(self.log, "Batch processing result"; "result" => ?result, &batch, - "batch_epoch" => batch_id, "client" => %network.client_type(&peer)); + "batch_epoch" => batch_id, "client" => %network.client_type(&peer), "batch_state" => batch_state); // We consider three cases. Batch was successfully processed, Batch failed processing due // to a faulty peer, or batch failed processing but the peer can't be deemed faulty. @@ -859,6 +860,7 @@ impl SyncingChain { peer_id: &PeerId, request_id: Id, ) -> ProcessingResult { + let batch_state = self.visualize_batch_state(); if let Some(batch) = self.batches.get_mut(&batch_id) { // A batch could be retried without the peer failing the request (disconnecting/ // sending an error /timeout) if the peer is removed from the chain for other @@ -870,7 +872,8 @@ impl SyncingChain { "batch_epoch" => batch_id, "batch_state" => ?batch.state(), "peer_id" => %peer_id, - "request_id" => %request_id + "request_id" => %request_id, + "batch_state" => batch_state ); return Ok(KeepChain); } @@ -880,7 +883,8 @@ impl SyncingChain { "batch_epoch" => batch_id, "batch_state" => ?batch.state(), "peer_id" => %peer_id, - "request_id" => %request_id + "request_id" => %request_id, + "batch_state" => batch_state ); if let Some(active_requests) = self.peers.get_mut(peer_id) { active_requests.remove(&batch_id); @@ -898,7 +902,8 @@ impl SyncingChain { "Batch not found"; "batch_epoch" => batch_id, "peer_id" => %peer_id, - "request_id" => %request_id + "request_id" => %request_id, + "batch_state" => batch_state ); // this could be an error for an old batch, removed when the chain advances Ok(KeepChain) @@ -948,6 +953,7 @@ impl SyncingChain { batch_id: BatchId, peer: PeerId, ) -> ProcessingResult { + let batch_state = self.visualize_batch_state(); if let Some(batch) = self.batches.get_mut(&batch_id) { let (request, batch_type) = batch.to_blocks_by_range_request(); match network.blocks_and_blobs_by_range_request( @@ -967,9 +973,9 @@ impl SyncingChain { .map(|epoch| epoch == batch_id) .unwrap_or(false) { - debug!(self.log, "Requesting optimistic batch"; "epoch" => batch_id, &batch); + debug!(self.log, "Requesting optimistic batch"; "epoch" => batch_id, &batch, "batch_state" => batch_state); } else { - debug!(self.log, "Requesting batch"; "epoch" => batch_id, &batch); + debug!(self.log, "Requesting batch"; "epoch" => batch_id, &batch, "batch_state" => batch_state); } // register the batch for this peer return self @@ -1130,6 +1136,46 @@ impl SyncingChain { } } } + + /// Creates a string visualization of the current state of the chain, to make it easier for debugging and understanding + /// where sync is up to from glancing at the logs. + /// + /// This produces a string of the form: [D,E,E,E,E] + /// to indicate the current buffer state of the chain. The symbols are defined on each of the + /// batch states. See [BatchState::visualize] for symbol definitions. + fn visualize_batch_state(&self) -> String { + let mut visualization_string = String::with_capacity((BATCH_BUFFER_SIZE * 3) as usize); + + // Start of the block + visualization_string.push('['); + + for mut batch_index in 0..BATCH_BUFFER_SIZE { + if let Some(batch) = self + .batches + .get(&(self.processing_target + batch_index as u64 * EPOCHS_PER_BATCH)) + { + visualization_string.push(batch.visualize()); + if batch_index != BATCH_BUFFER_SIZE { + // Add a comma in between elements + visualization_string.push(','); + } + } else { + // No batch exists, it is on our list to be downloaded + // Fill in the rest of the gaps + while batch_index < BATCH_BUFFER_SIZE { + visualization_string.push('E'); + // Add a comma between the empty batches + if batch_index < BATCH_BUFFER_SIZE.saturating_sub(1) { + visualization_string.push(',') + } + batch_index += 1; + } + break; + } + } + visualization_string.push(']'); + visualization_string + } } impl slog::KV for &mut SyncingChain {