From 1fcada8a328be02987e8135bde0bd37f0eae59b0 Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Thu, 10 Aug 2023 00:10:09 +0000 Subject: [PATCH] Improve transport connection errors (#4540) ## Issue Addressed #4538 ## Proposed Changes add newtype wrapper around DialError that extracts error messages and logs them in a more readable format ## Additional Info I was able to test Transport Dial Errors in the situation where a libp2p instance attempts to ping a nonexistent peer. That error message should look something like `A transport level error has ocurred: Connection refused (os error 61)` AgeManning mentioned we should try fetching only the most inner error (in situations where theres a nested error). I took a stab at implementing that For non transport DialErrors, I wrote out the error messages explicitly (as per the docs). Could potentially clean things up here if thats not necessary Co-authored-by: Age Manning --- .../lighthouse_network/src/discovery/mod.rs | 4 +- beacon_node/lighthouse_network/src/lib.rs | 41 +++++++++++++++++++ .../src/peer_manager/network_behaviour.rs | 4 +- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/beacon_node/lighthouse_network/src/discovery/mod.rs b/beacon_node/lighthouse_network/src/discovery/mod.rs index 0f8ddc53c1b..82a371d8a20 100644 --- a/beacon_node/lighthouse_network/src/discovery/mod.rs +++ b/beacon_node/lighthouse_network/src/discovery/mod.rs @@ -7,9 +7,9 @@ pub(crate) mod enr; pub mod enr_ext; // Allow external use of the lighthouse ENR builder -use crate::metrics; use crate::service::TARGET_SUBNET_PEERS; use crate::{error, Enr, NetworkConfig, NetworkGlobals, Subnet, SubnetDiscovery}; +use crate::{metrics, ClearDialError}; use discv5::{enr::NodeId, Discv5, Discv5Event}; pub use enr::{ build_enr, create_enr_builder_from_config, load_enr_from_disk, use_or_load_enr, CombinedKey, @@ -1111,7 +1111,7 @@ impl Discovery { | DialError::Transport(_) | DialError::WrongPeerId { .. } => { // set peer as disconnected in discovery DHT - debug!(self.log, "Marking peer disconnected in DHT"; "peer_id" => %peer_id); + debug!(self.log, "Marking peer disconnected in DHT"; "peer_id" => %peer_id, "error" => %ClearDialError(error)); self.disconnect_peer(&peer_id); } DialError::DialPeerConditionFalse(_) | DialError::Aborted => {} diff --git a/beacon_node/lighthouse_network/src/lib.rs b/beacon_node/lighthouse_network/src/lib.rs index 3d539af3b28..7467fb7f067 100644 --- a/beacon_node/lighthouse_network/src/lib.rs +++ b/beacon_node/lighthouse_network/src/lib.rs @@ -17,6 +17,7 @@ pub mod rpc; pub mod types; pub use config::gossip_max_size; +use libp2p::swarm::DialError; pub use listen_addr::*; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; @@ -63,6 +64,46 @@ impl<'de> Deserialize<'de> for PeerIdSerialized { } } +// A wrapper struct that prints a dial error nicely. +struct ClearDialError<'a>(&'a DialError); + +impl<'a> ClearDialError<'a> { + fn most_inner_error(err: &(dyn std::error::Error)) -> &(dyn std::error::Error) { + let mut current = err; + while let Some(source) = current.source() { + current = source; + } + current + } +} + +impl<'a> std::fmt::Display for ClearDialError<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match &self.0 { + DialError::Transport(errors) => { + for (_, transport_error) in errors { + match transport_error { + libp2p::TransportError::MultiaddrNotSupported(multiaddr_error) => { + write!(f, "Multiaddr not supported: {multiaddr_error}")?; + } + libp2p::TransportError::Other(other_error) => { + let inner_error = ClearDialError::most_inner_error(other_error); + write!(f, "Transport error: {inner_error}")?; + } + } + } + Ok(()) + } + DialError::LocalPeerId { .. } => write!(f, "The peer being dialed is the local peer."), + DialError::NoAddresses => write!(f, "No addresses for the peer to dial."), + DialError::DialPeerConditionFalse(_) => write!(f, "PeerCondition evaluation failed."), + DialError::Aborted => write!(f, "Connection aborted."), + DialError::WrongPeerId { .. } => write!(f, "Wrong peer id."), + DialError::Denied { cause } => write!(f, "Connection denied: {:?}", cause), + } + } +} + pub use crate::types::{ error, Enr, EnrSyncCommitteeBitfield, GossipTopic, NetworkGlobals, PubsubMessage, Subnet, SubnetDiscovery, diff --git a/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs b/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs index ce374bb9ab4..70f421681a3 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs @@ -12,9 +12,9 @@ use libp2p::swarm::{ConnectionId, NetworkBehaviour, PollParameters, ToSwarm}; use slog::{debug, error}; use types::EthSpec; -use crate::metrics; use crate::rpc::GoodbyeReason; use crate::types::SyncState; +use crate::{metrics, ClearDialError}; use super::peerdb::BanResult; use super::{ConnectingType, PeerManager, PeerManagerEvent, ReportSource}; @@ -132,7 +132,7 @@ impl NetworkBehaviour for PeerManager { error, connection_id: _, }) => { - debug!(self.log, "Failed to dial peer"; "peer_id"=> ?peer_id, "error" => %error); + debug!(self.log, "Failed to dial peer"; "peer_id"=> ?peer_id, "error" => %ClearDialError(error)); self.on_dial_failure(peer_id); } FromSwarm::ExternalAddrConfirmed(_) => {