From 061c82f89c660d9f457559d7136766d6132fbb0a Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 25 Nov 2024 23:37:18 +0400 Subject: [PATCH] Send validator telemetry to the private overlay (#1325) * Send validator telemetry to the private overlay * Improve rotating neighbours in overlays --- create-hardfork/create-hardfork.cpp | 3 + overlay/overlay-manager.cpp | 15 +++- overlay/overlay-manager.h | 1 + overlay/overlay-peers.cpp | 4 +- overlay/overlay.cpp | 7 +- overlay/overlay.hpp | 1 + tdutils/td/utils/port/Stat.cpp | 41 +++++++++++ tdutils/td/utils/port/Stat.h | 2 + test/test-ton-collator.cpp | 3 + tl/generate/scheme/ton_api.tl | 4 ++ tl/generate/scheme/ton_api.tlo | Bin 97736 -> 98356 bytes validator-engine/validator-engine.cpp | 13 ++++ validator-engine/validator-engine.hpp | 4 ++ validator/CMakeLists.txt | 2 + validator/full-node-private-overlay.cpp | 66 ++++++++++++++++- validator/full-node-private-overlay.hpp | 9 +++ validator/full-node.cpp | 33 +++++++++ validator/full-node.h | 2 + validator/full-node.hpp | 8 +++ validator/impl/validator-set.cpp | 8 +-- validator/impl/validator-set.hpp | 3 +- validator/interfaces/validator-manager.h | 1 + validator/interfaces/validator-set.h | 1 + validator/manager-disk.hpp | 2 + validator/manager-hardfork.hpp | 2 + validator/manager.cpp | 42 +++++++++++ validator/manager.hpp | 8 +++ validator/validator-telemetry.cpp | 87 +++++++++++++++++++++++ validator/validator-telemetry.hpp | 66 +++++++++++++++++ validator/validator.h | 1 + 30 files changed, 426 insertions(+), 13 deletions(-) create mode 100644 validator/validator-telemetry.cpp create mode 100644 validator/validator-telemetry.hpp diff --git a/create-hardfork/create-hardfork.cpp b/create-hardfork/create-hardfork.cpp index 501ce3b93..a24f3f8e0 100644 --- a/create-hardfork/create-hardfork.cpp +++ b/create-hardfork/create-hardfork.cpp @@ -277,6 +277,9 @@ class HardforkCreator : public td::actor::Actor { void new_key_block(ton::validator::BlockHandle handle) override { } + void send_validator_telemetry(ton::PublicKeyHash key, + ton::tl_object_ptr telemetry) override { + } }; td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::install_callback, diff --git a/overlay/overlay-manager.cpp b/overlay/overlay-manager.cpp index ff6ad2c99..f24c6cbc2 100644 --- a/overlay/overlay-manager.cpp +++ b/overlay/overlay-manager.cpp @@ -68,6 +68,9 @@ void OverlayManager::register_overlay(adnl::AdnlNodeIdShort local_id, OverlayIdS } overlays_[local_id][overlay_id] = OverlayDescription{std::move(overlay), std::move(cert)}; + if (!with_db_) { + return; + } auto P = td::PromiseCreator::lambda([id = overlays_[local_id][overlay_id].overlay.get()](td::Result R) { R.ensure(); @@ -417,13 +420,19 @@ OverlayManager::OverlayManager(std::string db_root, td::actor::ActorId kv = - std::make_shared(td::RocksDb::open(PSTRING() << db_root_ << "/overlays").move_as_ok()); - db_ = DbType{std::move(kv)}; + if (!db_root_.empty()) { + with_db_ = true; + std::shared_ptr kv = + std::make_shared(td::RocksDb::open(PSTRING() << db_root_ << "/overlays").move_as_ok()); + db_ = DbType{std::move(kv)}; + } } void OverlayManager::save_to_db(adnl::AdnlNodeIdShort local_id, OverlayIdShort overlay_id, std::vector nodes) { + if (!with_db_) { + return; + } std::vector> nodes_vec; for (auto &n : nodes) { nodes_vec.push_back(n.tl()); diff --git a/overlay/overlay-manager.h b/overlay/overlay-manager.h index 12206e048..68b033a3b 100644 --- a/overlay/overlay-manager.h +++ b/overlay/overlay-manager.h @@ -131,6 +131,7 @@ class OverlayManager : public Overlays { td::actor::ActorId dht_node_; using DbType = td::KeyValueAsync; + bool with_db_ = false; DbType db_; class AdnlCallback : public adnl::Adnl::Callback { diff --git a/overlay/overlay-peers.cpp b/overlay/overlay-peers.cpp index 00b508fd4..7def4a2d3 100644 --- a/overlay/overlay-peers.cpp +++ b/overlay/overlay-peers.cpp @@ -213,7 +213,7 @@ void OverlayImpl::add_peer(OverlayNode node) { peer_list_.peers_.insert(id, OverlayPeer(std::move(node))); del_some_peers(); auto X = peer_list_.peers_.get(id); - if (X != nullptr && peer_list_.neighbours_.size() < max_neighbours() && + if (X != nullptr && !X->is_neighbour() && peer_list_.neighbours_.size() < max_neighbours() && !(X->get_node()->flags() & OverlayMemberFlags::DoNotReceiveBroadcasts) && X->get_id() != local_id_) { peer_list_.neighbours_.push_back(X->get_id()); X->set_neighbour(true); @@ -440,7 +440,7 @@ void OverlayImpl::update_neighbours(td::uint32 nodes_to_change) { VLOG(OVERLAY_INFO) << this << ": adding new neighbour " << X->get_id(); peer_list_.neighbours_.push_back(X->get_id()); X->set_neighbour(true); - } else { + } else if (X->is_alive()) { CHECK(nodes_to_change > 0); auto i = td::Random::fast(0, static_cast(peer_list_.neighbours_.size()) - 1); auto Y = peer_list_.peers_.get(peer_list_.neighbours_[i]); diff --git a/overlay/overlay.cpp b/overlay/overlay.cpp index c9da6f5c3..429c6a9c8 100644 --- a/overlay/overlay.cpp +++ b/overlay/overlay.cpp @@ -347,7 +347,12 @@ void OverlayImpl::alarm() { update_db_at_ = td::Timestamp::in(60.0); } - update_neighbours(0); + if (update_neighbours_at_.is_in_past()) { + update_neighbours(2); + update_neighbours_at_ = td::Timestamp::in(td::Random::fast(30.0, 120.0)); + } else { + update_neighbours(0); + } alarm_timestamp() = td::Timestamp::in(1.0); } else { update_neighbours(0); diff --git a/overlay/overlay.hpp b/overlay/overlay.hpp index ba0fae46d..41a04dec2 100644 --- a/overlay/overlay.hpp +++ b/overlay/overlay.hpp @@ -391,6 +391,7 @@ class OverlayImpl : public Overlay { td::Timestamp next_dht_store_query_ = td::Timestamp::in(1.0); td::Timestamp update_db_at_; td::Timestamp update_throughput_at_; + td::Timestamp update_neighbours_at_; td::Timestamp last_throughput_update_; std::unique_ptr callback_; diff --git a/tdutils/td/utils/port/Stat.cpp b/tdutils/td/utils/port/Stat.cpp index 816d622e4..73b006084 100644 --- a/tdutils/td/utils/port/Stat.cpp +++ b/tdutils/td/utils/port/Stat.cpp @@ -472,4 +472,45 @@ Result get_total_mem_stat() { #endif } +Result get_cpu_cores() { +#if TD_LINUX + uint32 result = 0; + TRY_RESULT(fd, FileFd::open("/proc/cpuinfo", FileFd::Read)); + SCOPE_EXIT { + fd.close(); + }; + std::string data; + char buf[10000]; + while (true) { + TRY_RESULT(size, fd.read(MutableSlice{buf, sizeof(buf) - 1})); + if (size == 0) { + break; + } + buf[size] = '\0'; + data += buf; + } + size_t i = 0; + while (i < data.size()) { + const char *line_begin = data.data() + i; + while (i < data.size() && data[i] != '\n') { + ++i; + } + auto line_end = data.data() + i; + ++i; + Slice line{line_begin, line_end}; + size_t j = 0; + while (j < line.size() && line[j] != ' ' && line[j] != '\t' && line[j] != ':') { + ++j; + } + Slice name = line.substr(0, j); + if (name == "processor") { + ++result; + } + } + return result; +#else + return Status::Error("Not supported"); +#endif +} + } // namespace td diff --git a/tdutils/td/utils/port/Stat.h b/tdutils/td/utils/port/Stat.h index ab97be0f1..82e1832a0 100644 --- a/tdutils/td/utils/port/Stat.h +++ b/tdutils/td/utils/port/Stat.h @@ -70,4 +70,6 @@ struct TotalMemStat { }; Result get_total_mem_stat() TD_WARN_UNUSED_RESULT; +Result get_cpu_cores() TD_WARN_UNUSED_RESULT; + } // namespace td diff --git a/test/test-ton-collator.cpp b/test/test-ton-collator.cpp index 7c13870e5..78e0e6039 100644 --- a/test/test-ton-collator.cpp +++ b/test/test-ton-collator.cpp @@ -378,6 +378,9 @@ class TestNode : public td::actor::Actor { void new_key_block(ton::validator::BlockHandle handle) override { } + void send_validator_telemetry(ton::PublicKeyHash key, + ton::tl_object_ptr telemetry) override { + } }; td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::install_callback, diff --git a/tl/generate/scheme/ton_api.tl b/tl/generate/scheme/ton_api.tl index 9ef47d95e..f7bbfd868 100644 --- a/tl/generate/scheme/ton_api.tl +++ b/tl/generate/scheme/ton_api.tl @@ -580,6 +580,10 @@ validator.group workchain:int shard:long catchain_seqno:int config_hash:int256 m validator.groupEx workchain:int shard:long vertical_seqno:int catchain_seqno:int config_hash:int256 members:(vector validator.groupMember) = validator.Group; validator.groupNew workchain:int shard:long vertical_seqno:int last_key_block_seqno:int catchain_seqno:int config_hash:int256 members:(vector validator.groupMember) = validator.Group; +validator.telemetry flags:# timestamp:double adnl_id:int256 + node_version:string os_version:string node_started_at:int + ram_size:long cpu_cores:int node_threads:int = validator.Telemetry; + ---functions--- diff --git a/tl/generate/scheme/ton_api.tlo b/tl/generate/scheme/ton_api.tlo index bc46d1b16c9b737a44a09b3a2d363dc13ebf2847..0cd394bd57976c4ec31bcba7ed03e0c3ab5293a2 100644 GIT binary patch delta 308 zcmX@{n{`VA8}Fmp`c@23AhnVAzmY^?IdiOVSz=CRN@7WVkzPn@PHJvyNm1qG39^#n z8uJgxV3WIJsIr;IIERIC(dJ6?%l8%03@L#b0usM@Ovs0GGGl?xWXmY!$r-B^Sb6gD zQ&J}@Zdc~!$}f&DOD!tS%+H(Lzgkj&A0!iBT#{H+lA02qSTcFS3n`|=-02^R8D%Da zNMz+mE+~yp&M!)xeo&E7nip(Pd`U)8YGO+9{} : dht_nodes_[default_dht_node_].get(), overlay_manager_.get(), validator_manager_.get(), full_node_client_.get(), db_root_); load_custom_overlays_config(); + if (!validator_telemetry_filename_.empty()) { + td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::set_validator_telemetry_filename, + validator_telemetry_filename_); + } } for (auto &v : config_.validators) { @@ -4331,6 +4335,15 @@ int main(int argc, char *argv[]) { acts.push_back( [&x]() { td::actor::send_closure(x, &ValidatorEngine::set_fast_state_serializer_enabled, true); }); }); + p.add_option( + '\0', "collect-validator-telemetry", + "store validator telemetry from private block overlay to a given file (json format)", + [&](td::Slice s) { + acts.push_back( + [&x, s = s.str()]() { + td::actor::send_closure(x, &ValidatorEngine::set_validator_telemetry_filename, s); + }); + }); auto S = p.run(argc, argv); if (S.is_error()) { LOG(ERROR) << "failed to parse options: " << S.move_as_error(); diff --git a/validator-engine/validator-engine.hpp b/validator-engine/validator-engine.hpp index 50cd5a323..5beef477e 100644 --- a/validator-engine/validator-engine.hpp +++ b/validator-engine/validator-engine.hpp @@ -222,6 +222,7 @@ class ValidatorEngine : public td::actor::Actor { ton::BlockSeqno truncate_seqno_{0}; std::string session_logs_file_; bool fast_state_serializer_enabled_ = false; + std::string validator_telemetry_filename_; std::set unsafe_catchains_; std::map> unsafe_catchain_rotations_; @@ -310,6 +311,9 @@ class ValidatorEngine : public td::actor::Actor { void set_fast_state_serializer_enabled(bool value) { fast_state_serializer_enabled_ = value; } + void set_validator_telemetry_filename(std::string value) { + validator_telemetry_filename_ = std::move(value); + } void start_up() override; ValidatorEngine() { } diff --git a/validator/CMakeLists.txt b/validator/CMakeLists.txt index d5ba00e39..b7f3787c4 100644 --- a/validator/CMakeLists.txt +++ b/validator/CMakeLists.txt @@ -56,6 +56,7 @@ set(VALIDATOR_HEADERS import-db-slice.hpp queue-size-counter.hpp + validator-telemetry.hpp manager-disk.h manager-disk.hpp @@ -82,6 +83,7 @@ set(VALIDATOR_SOURCE validator-group.cpp validator-options.cpp queue-size-counter.cpp + validator-telemetry.cpp downloaders/wait-block-data.cpp downloaders/wait-block-state.cpp diff --git a/validator/full-node-private-overlay.cpp b/validator/full-node-private-overlay.cpp index e5ea1f0be..1acfbd4ed 100644 --- a/validator/full-node-private-overlay.cpp +++ b/validator/full-node-private-overlay.cpp @@ -19,6 +19,9 @@ #include "common/delay.h" #include "common/checksum.h" #include "full-node-serializer.hpp" +#include "auto/tl/ton_api_json.h" +#include "td/utils/JsonBuilder.h" +#include "tl/tl_json.h" namespace ton::validator::fullnode { @@ -85,15 +88,52 @@ void FullNodePrivateBlockOverlay::process_block_candidate_broadcast(PublicKeyHas validator_set_hash, std::move(data)); } +void FullNodePrivateBlockOverlay::process_telemetry_broadcast( + PublicKeyHash src, const tl_object_ptr& telemetry) { + if (telemetry->adnl_id_ != src.bits256_value()) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": adnl_id mismatch"; + return; + } + auto now = (td::int32)td::Clocks::system(); + if (telemetry->timestamp_ < now - 60) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": too old (" + << now - telemetry->timestamp_ << "s ago)"; + return; + } + if (telemetry->timestamp_ > now + 60) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": too new (" + << telemetry->timestamp_ - now << "s in the future)"; + return; + } + VLOG(FULL_NODE_DEBUG) << "Got telemetry broadcast from " << src; + auto s = td::json_encode(td::ToJson(*telemetry), false); + std::erase_if(s, [](char c) { + return c == '\n' || c == '\r'; + }); + telemetry_file_ << s << "\n"; + telemetry_file_.flush(); + if (telemetry_file_.fail()) { + VLOG(FULL_NODE_WARNING) << "Failed to write telemetry to file"; + } +} + void FullNodePrivateBlockOverlay::receive_broadcast(PublicKeyHash src, td::BufferSlice broadcast) { if (adnl::AdnlNodeIdShort{src} == local_id_) { return; } auto B = fetch_tl_object(std::move(broadcast), true); if (B.is_error()) { + if (collect_telemetry_ && src != local_id_.pubkey_hash()) { + auto R = fetch_tl_prefix(broadcast, true); + if (R.is_ok()) { + process_telemetry_broadcast(src, R.ok()); + } + } return; } - ton_api::downcast_call(*B.move_as_ok(), [src, Self = this](auto &obj) { Self->process_broadcast(src, obj); }); + ton_api::downcast_call(*B.move_as_ok(), [src, Self = this](auto& obj) { + Self->process_broadcast(src, obj); + }); } void FullNodePrivateBlockOverlay::send_shard_block_info(BlockIdExt block_id, CatchainSeqno cc_seqno, @@ -144,6 +184,30 @@ void FullNodePrivateBlockOverlay::send_broadcast(BlockBroadcast broadcast) { local_id_.pubkey_hash(), overlay::Overlays::BroadcastFlagAnySender(), B.move_as_ok()); } +void FullNodePrivateBlockOverlay::send_validator_telemetry(tl_object_ptr telemetry) { + process_telemetry_broadcast(local_id_.pubkey_hash(), telemetry); + auto data = serialize_tl_object(telemetry, true); + if (data.size() <= overlay::Overlays::max_simple_broadcast_size()) { + td::actor::send_closure(overlays_, &overlay::Overlays::send_broadcast_ex, local_id_, overlay_id_, + local_id_.pubkey_hash(), 0, std::move(data)); + } else { + td::actor::send_closure(overlays_, &overlay::Overlays::send_broadcast_fec_ex, local_id_, overlay_id_, + local_id_.pubkey_hash(), 0, std::move(data)); + } +} + +void FullNodePrivateBlockOverlay::collect_validator_telemetry(std::string filename) { + if (collect_telemetry_) { + telemetry_file_.close(); + } + collect_telemetry_ = true; + LOG(FULL_NODE_WARNING) << "Collecting validator telemetry to " << filename << " (local id: " << local_id_ << ")"; + telemetry_file_.open(filename, std::ios_base::app); + if (!telemetry_file_.is_open()) { + LOG(WARNING) << "Cannot open file " << filename << " for validator telemetry"; + } +} + void FullNodePrivateBlockOverlay::start_up() { std::sort(nodes_.begin(), nodes_.end()); nodes_.erase(std::unique(nodes_.begin(), nodes_.end()), nodes_.end()); diff --git a/validator/full-node-private-overlay.hpp b/validator/full-node-private-overlay.hpp index a0022fa03..1e29f8c93 100644 --- a/validator/full-node-private-overlay.hpp +++ b/validator/full-node-private-overlay.hpp @@ -17,6 +17,7 @@ #pragma once #include "full-node.h" +#include namespace ton::validator::fullnode { @@ -32,6 +33,8 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { void process_broadcast(PublicKeyHash src, ton_api::tonNode_newBlockCandidateBroadcastCompressed &query); void process_block_candidate_broadcast(PublicKeyHash src, ton_api::tonNode_Broadcast &query); + void process_telemetry_broadcast(PublicKeyHash src, const tl_object_ptr& telemetry); + template void process_broadcast(PublicKeyHash, T &) { VLOG(FULL_NODE_WARNING) << "dropping unknown broadcast"; @@ -42,6 +45,9 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { void send_block_candidate(BlockIdExt block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, td::BufferSlice data); void send_broadcast(BlockBroadcast broadcast); + void send_validator_telemetry(tl_object_ptr telemetry); + + void collect_validator_telemetry(std::string filename); void set_config(FullNodeConfig config) { config_ = std::move(config); @@ -91,6 +97,9 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { void try_init(); void init(); + + bool collect_telemetry_ = false; + std::ofstream telemetry_file_; }; class FullNodeCustomOverlay : public td::actor::Actor { diff --git a/validator/full-node.cpp b/validator/full-node.cpp index bb6da2c8b..350606ca9 100644 --- a/validator/full-node.cpp +++ b/validator/full-node.cpp @@ -64,6 +64,7 @@ void FullNodeImpl::del_permanent_key(PublicKeyHash key, td::Promise pr } local_keys_.erase(key); private_block_overlays_.erase(key); + update_validator_telemetry_collector(); for (auto &p : custom_overlays_) { update_custom_overlay(p.second); } @@ -438,6 +439,15 @@ void FullNodeImpl::new_key_block(BlockHandle handle) { } } +void FullNodeImpl::send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) { + auto it = private_block_overlays_.find(key); + if (it == private_block_overlays_.end()) { + VLOG(FULL_NODE_INFO) << "Cannot send validator telemetry for " << key << " : no private block overlay"; + return; + } + td::actor::send_closure(it->second, &FullNodePrivateBlockOverlay::send_validator_telemetry, std::move(telemetry)); +} + void FullNodeImpl::process_block_broadcast(BlockBroadcast broadcast) { send_block_broadcast_to_custom_overlays(broadcast); td::actor::send_closure(validator_manager_, &ValidatorManagerInterface::prevalidate_block, std::move(broadcast), @@ -460,6 +470,24 @@ void FullNodeImpl::process_block_candidate_broadcast(BlockIdExt block_id, Catcha std::move(data)); } +void FullNodeImpl::set_validator_telemetry_filename(std::string value) { + validator_telemetry_filename_ = std::move(value); + update_validator_telemetry_collector(); +} + +void FullNodeImpl::update_validator_telemetry_collector() { + if (validator_telemetry_filename_.empty() || private_block_overlays_.empty()) { + validator_telemetry_collector_key_ = PublicKeyHash::zero(); + return; + } + if (!private_block_overlays_.contains(validator_telemetry_collector_key_)) { + auto it = private_block_overlays_.begin(); + validator_telemetry_collector_key_ = it->first; + td::actor::send_closure(it->second, &FullNodePrivateBlockOverlay::collect_validator_telemetry, + validator_telemetry_filename_); + } +} + void FullNodeImpl::start_up() { add_shard(ShardIdFull{masterchainId}); if (local_id_.is_zero()) { @@ -536,6 +564,9 @@ void FullNodeImpl::start_up() { void new_key_block(BlockHandle handle) override { td::actor::send_closure(id_, &FullNodeImpl::new_key_block, std::move(handle)); } + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) override { + td::actor::send_closure(id_, &FullNodeImpl::send_validator_telemetry, key, std::move(telemetry)); + } Callback(td::actor::ActorId id) : id_(id) { } @@ -555,6 +586,7 @@ void FullNodeImpl::update_private_overlays() { } private_block_overlays_.clear(); + update_validator_telemetry_collector(); if (local_keys_.empty()) { return; } @@ -573,6 +605,7 @@ void FullNodeImpl::create_private_block_overlay(PublicKeyHash key) { private_block_overlays_[key] = td::actor::create_actor( "BlocksPrivateOverlay", current_validators_[key], std::move(nodes), zero_state_file_hash_, config_, keyring_, adnl_, rldp_, rldp2_, overlays_, validator_manager_, actor_id(this)); + update_validator_telemetry_collector(); } } diff --git a/validator/full-node.h b/validator/full-node.h index 621cdac0d..67e090c63 100644 --- a/validator/full-node.h +++ b/validator/full-node.h @@ -90,6 +90,8 @@ class FullNode : public td::actor::Actor { virtual void process_block_candidate_broadcast(BlockIdExt block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, td::BufferSlice data) = 0; + virtual void set_validator_telemetry_filename(std::string value) = 0; + static constexpr td::uint32 max_block_size() { return 4 << 20; } diff --git a/validator/full-node.hpp b/validator/full-node.hpp index 584be5ee2..bf530f29e 100644 --- a/validator/full-node.hpp +++ b/validator/full-node.hpp @@ -84,11 +84,14 @@ class FullNodeImpl : public FullNode { void got_key_block_config(td::Ref config); void new_key_block(BlockHandle handle); + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry); void process_block_broadcast(BlockBroadcast broadcast) override; void process_block_candidate_broadcast(BlockIdExt block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, td::BufferSlice data) override; + void set_validator_telemetry_filename(std::string value) override; + void start_up() override; FullNodeImpl(PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, @@ -143,6 +146,11 @@ class FullNodeImpl : public FullNode { void send_block_broadcast_to_custom_overlays(const BlockBroadcast& broadcast); void send_block_candidate_broadcast_to_custom_overlays(const BlockIdExt& block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, const td::BufferSlice& data); + + std::string validator_telemetry_filename_; + PublicKeyHash validator_telemetry_collector_key_ = PublicKeyHash::zero(); + + void update_validator_telemetry_collector(); }; } // namespace fullnode diff --git a/validator/impl/validator-set.cpp b/validator/impl/validator-set.cpp index 629337cf8..d4b0d647b 100644 --- a/validator/impl/validator-set.cpp +++ b/validator/impl/validator-set.cpp @@ -28,14 +28,14 @@ namespace ton { namespace validator { using td::Ref; -const ValidatorDescr *ValidatorSetQ::find_validator(const NodeIdShort &id) const { +const ValidatorDescr *ValidatorSetQ::get_validator(const NodeIdShort &id) const { auto it = std::lower_bound(ids_map_.begin(), ids_map_.end(), id, [](const auto &p, const auto &x) { return p.first < x; }); return it < ids_map_.end() && it->first == id ? &ids_[it->second] : nullptr; } bool ValidatorSetQ::is_validator(NodeIdShort id) const { - return find_validator(id); + return get_validator(id); } td::Result ValidatorSetQ::check_signatures(RootHash root_hash, FileHash file_hash, @@ -53,7 +53,7 @@ td::Result ValidatorSetQ::check_signatures(RootHash root_hash, } nodes.insert(sig.node); - auto vdescr = find_validator(sig.node); + auto vdescr = get_validator(sig.node); if (!vdescr) { return td::Status::Error(ErrorCode::protoviolation, "unknown node to sign"); } @@ -84,7 +84,7 @@ td::Result ValidatorSetQ::check_approve_signatures(RootHash roo } nodes.insert(sig.node); - auto vdescr = find_validator(sig.node); + auto vdescr = get_validator(sig.node); if (!vdescr) { return td::Status::Error(ErrorCode::protoviolation, "unknown node to sign"); } diff --git a/validator/impl/validator-set.hpp b/validator/impl/validator-set.hpp index 3141f36c5..951ca4b71 100644 --- a/validator/impl/validator-set.hpp +++ b/validator/impl/validator-set.hpp @@ -32,6 +32,7 @@ namespace validator { class ValidatorSetQ : public ValidatorSet { public: + const ValidatorDescr* get_validator(const NodeIdShort& id) const override; bool is_validator(NodeIdShort id) const override; CatchainSeqno get_catchain_seqno() const override { return cc_seqno_; @@ -62,8 +63,6 @@ class ValidatorSetQ : public ValidatorSet { ValidatorWeight total_weight_; std::vector ids_; std::vector> ids_map_; - - const ValidatorDescr* find_validator(const NodeIdShort& id) const; }; class ValidatorSetCompute { diff --git a/validator/interfaces/validator-manager.h b/validator/interfaces/validator-manager.h index ce0c27e1f..78257fa07 100644 --- a/validator/interfaces/validator-manager.h +++ b/validator/interfaces/validator-manager.h @@ -147,6 +147,7 @@ class ValidatorManager : public ValidatorManagerInterface { virtual void send_ihr_message(td::Ref message) = 0; virtual void send_top_shard_block_description(td::Ref desc) = 0; virtual void send_block_broadcast(BlockBroadcast broadcast, int mode) = 0; + virtual void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) = 0; virtual void update_shard_client_state(BlockIdExt masterchain_block_id, td::Promise promise) = 0; virtual void get_shard_client_state(bool from_db, td::Promise promise) = 0; diff --git a/validator/interfaces/validator-set.h b/validator/interfaces/validator-set.h index b71c0bfea..ad7fb9b55 100644 --- a/validator/interfaces/validator-set.h +++ b/validator/interfaces/validator-set.h @@ -30,6 +30,7 @@ namespace validator { class ValidatorSet : public td::CntObject { public: virtual ~ValidatorSet() = default; + virtual const ValidatorDescr* get_validator(const NodeIdShort& id) const = 0; virtual bool is_validator(NodeIdShort id) const = 0; virtual CatchainSeqno get_catchain_seqno() const = 0; virtual td::uint32 get_validator_set_hash() const = 0; diff --git a/validator/manager-disk.hpp b/validator/manager-disk.hpp index 5287a3876..48b72d3b0 100644 --- a/validator/manager-disk.hpp +++ b/validator/manager-disk.hpp @@ -263,6 +263,8 @@ class ValidatorManagerImpl : public ValidatorManager { void send_top_shard_block_description(td::Ref desc) override; void send_block_broadcast(BlockBroadcast broadcast, int mode) override { } + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) override { + } void update_shard_client_state(BlockIdExt masterchain_block_id, td::Promise promise) override; void get_shard_client_state(bool from_db, td::Promise promise) override; diff --git a/validator/manager-hardfork.hpp b/validator/manager-hardfork.hpp index 2e703faf7..bb0dab5fd 100644 --- a/validator/manager-hardfork.hpp +++ b/validator/manager-hardfork.hpp @@ -332,6 +332,8 @@ class ValidatorManagerImpl : public ValidatorManager { } void send_block_broadcast(BlockBroadcast broadcast, int mode) override { } + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) override { + } void update_shard_client_state(BlockIdExt masterchain_block_id, td::Promise promise) override { UNREACHABLE(); diff --git a/validator/manager.cpp b/validator/manager.cpp index 67cd60a79..76f8a845b 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -1624,6 +1624,11 @@ void ValidatorManagerImpl::send_block_broadcast(BlockBroadcast broadcast, int mo callback_->send_broadcast(std::move(broadcast), mode); } +void ValidatorManagerImpl::send_validator_telemetry(PublicKeyHash key, + tl_object_ptr telemetry) { + callback_->send_validator_telemetry(key, std::move(telemetry)); +} + void ValidatorManagerImpl::start_up() { db_ = create_db_actor(actor_id(this), db_root_, opts_); actor_stats_ = td::actor::create_actor("actor_stats"); @@ -1724,6 +1729,7 @@ void ValidatorManagerImpl::started(ValidatorManagerInitResult R) { if (opts_->nonfinal_ls_queries_enabled()) { candidates_buffer_ = td::actor::create_actor("candidates-buffer", actor_id(this)); } + init_validator_telemetry(); } void ValidatorManagerImpl::read_gc_list(std::vector list) { @@ -1930,6 +1936,7 @@ void ValidatorManagerImpl::new_masterchain_block() { td::actor::send_closure(serializer_, &AsyncStateSerializer::update_last_known_key_block_ts, last_key_block_handle_->unix_time()); } + init_validator_telemetry(); } update_shards(); @@ -3286,6 +3293,41 @@ void ValidatorManagerImpl::CheckedExtMsgCounter::before_query() { } } +void ValidatorManagerImpl::init_validator_telemetry() { + if (last_masterchain_state_.is_null()) { + return; + } + td::Ref validator_set = last_masterchain_state_->get_total_validator_set(0); + if (validator_set.is_null()) { + validator_telemetry_.clear(); + return; + } + std::set processed; + for (auto& key : temp_keys_) { + if (const ValidatorDescr* desc = validator_set->get_validator(key.bits256_value())) { + processed.insert(key); + adnl::AdnlNodeIdShort adnl_id; + if (desc->addr.is_zero()) { + adnl_id = adnl::AdnlNodeIdShort{ValidatorFullId{desc->key}.compute_short_id()}; + } else { + adnl_id = adnl::AdnlNodeIdShort{desc->addr}; + } + auto& telemetry = validator_telemetry_[key]; + if (telemetry.empty()) { + telemetry = td::actor::create_actor( + "telemetry", key, adnl_id, opts_->zero_block_id().file_hash, actor_id(this)); + } + } + } + for (auto it = validator_telemetry_.begin(); it != validator_telemetry_.end();) { + if (processed.contains(it->first)) { + ++it; + } else { + it = validator_telemetry_.erase(it); + } + } +} + } // namespace validator } // namespace ton diff --git a/validator/manager.hpp b/validator/manager.hpp index 7410f75f1..e8768e1bf 100644 --- a/validator/manager.hpp +++ b/validator/manager.hpp @@ -34,6 +34,7 @@ #include "rldp/rldp.h" #include "token-manager.h" #include "queue-size-counter.hpp" +#include "validator-telemetry.hpp" #include "impl/candidates-buffer.hpp" #include @@ -339,6 +340,7 @@ class ValidatorManagerImpl : public ValidatorManager { } void add_temp_key(PublicKeyHash key, td::Promise promise) override { temp_keys_.insert(key); + init_validator_telemetry(); promise.set_value(td::Unit()); } void del_permanent_key(PublicKeyHash key, td::Promise promise) override { @@ -347,6 +349,7 @@ class ValidatorManagerImpl : public ValidatorManager { } void del_temp_key(PublicKeyHash key, td::Promise promise) override { temp_keys_.erase(key); + init_validator_telemetry(); promise.set_value(td::Unit()); } @@ -501,6 +504,7 @@ class ValidatorManagerImpl : public ValidatorManager { void send_ihr_message(td::Ref message) override; void send_top_shard_block_description(td::Ref desc) override; void send_block_broadcast(BlockBroadcast broadcast, int mode) override; + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) override; void update_shard_client_state(BlockIdExt masterchain_block_id, td::Promise promise) override; void get_shard_client_state(bool from_db, td::Promise promise) override; @@ -732,6 +736,10 @@ class ValidatorManagerImpl : public ValidatorManager { CollationStats stats) override; void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) override; RecordedBlockStats &new_block_stats_record(BlockIdExt block_id); + + std::map> validator_telemetry_; + + void init_validator_telemetry(); }; } // namespace validator diff --git a/validator/validator-telemetry.cpp b/validator/validator-telemetry.cpp new file mode 100644 index 000000000..403dd6f9f --- /dev/null +++ b/validator/validator-telemetry.cpp @@ -0,0 +1,87 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "validator-telemetry.hpp" +#include "git.h" +#include "td/utils/Random.h" +#include "td/utils/port/uname.h" +#include "interfaces/validator-manager.h" + +namespace ton::validator { + +void ValidatorTelemetry::start_up() { + node_version_ = PSTRING() << "validator-engine, Commit: " << GitMetadata::CommitSHA1() + << ", Date: " << GitMetadata::CommitDate(); + + os_version_ = td::get_operating_system_version().str(); + + auto r_total_mem_stat = td::get_total_mem_stat(); + if (r_total_mem_stat.is_error()) { + LOG(WARNING) << "Cannot get RAM size: " << r_total_mem_stat.move_as_error(); + } else { + ram_size_ = r_total_mem_stat.ok().total_ram; + } + + auto r_cpu_cores = td::get_cpu_cores(); + if (r_cpu_cores.is_error()) { + LOG(WARNING) << "Cannot get CPU info: " << r_cpu_cores.move_as_error(); + } else { + cpu_cores_ = r_cpu_cores.move_as_ok(); + } + + LOG(DEBUG) << "Initializing validator telemetry, key = " << key_ << ", adnl_id = " << local_id_; + alarm_timestamp().relax(send_telemetry_at_ = td::Timestamp::in(td::Random::fast(30.0, 60.0))); +} + +void ValidatorTelemetry::alarm() { + if (send_telemetry_at_.is_in_past()) { + send_telemetry_at_ = td::Timestamp::never(); + send_telemetry(); + } + alarm_timestamp().relax(send_telemetry_at_); +} + +void ValidatorTelemetry::send_telemetry() { + send_telemetry_at_ = td::Timestamp::in(PERIOD); + + auto telemetry = create_tl_object(); + telemetry->flags_ = 0; + telemetry->timestamp_ = td::Clocks::system(); + telemetry->adnl_id_ = local_id_.bits256_value(); + telemetry->node_version_ = node_version_; + telemetry->os_version_ = os_version_; + telemetry->node_started_at_ = adnl::Adnl::adnl_start_time(); + telemetry->ram_size_ = ram_size_; + telemetry->cpu_cores_ = cpu_cores_; + telemetry->node_threads_ = (td::int32)td::actor::SchedulerContext::get() + ->scheduler_group() + ->schedulers.at(td::actor::SchedulerContext::get()->get_scheduler_id().value()) + .cpu_threads_count; + + LOG(DEBUG) << "Sending validator telemetry for adnl id " << local_id_; + td::actor::send_closure(manager_, &ValidatorManager::send_validator_telemetry, key_, std::move(telemetry)); +} + +} // namespace ton::validator diff --git a/validator/validator-telemetry.hpp b/validator/validator-telemetry.hpp new file mode 100644 index 000000000..73908bdd1 --- /dev/null +++ b/validator/validator-telemetry.hpp @@ -0,0 +1,66 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#pragma once +#include "overlay.h" +#include "td/actor/actor.h" +#include "adnl/adnl.h" +#include "interfaces/shard.h" + +namespace ton::validator { +class ValidatorManager; + +class ValidatorTelemetry : public td::actor::Actor { +public: + ValidatorTelemetry(PublicKeyHash key, adnl::AdnlNodeIdShort local_id, td::Bits256 zero_state_file_hash, + td::actor::ActorId manager) + : key_(key) + , local_id_(local_id) + , zero_state_file_hash_(zero_state_file_hash) + , manager_(std::move(manager)) { + } + + void start_up() override; + void alarm() override; + +private: + PublicKeyHash key_; + adnl::AdnlNodeIdShort local_id_; + td::Bits256 zero_state_file_hash_; + td::actor::ActorId manager_; + + std::string node_version_; + std::string os_version_; + td::uint32 cpu_cores_ = 0; + td::uint64 ram_size_ = 0; + + td::Timestamp send_telemetry_at_ = td::Timestamp::never(); + + void send_telemetry(); + + static constexpr double PERIOD = 600.0; + static constexpr td::uint32 MAX_SIZE = 8192; +}; +} // namespace ton::validator \ No newline at end of file diff --git a/validator/validator.h b/validator/validator.h index 9dbaa185f..77a62540f 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -191,6 +191,7 @@ class ValidatorManagerInterface : public td::actor::Actor { td::Promise promise) = 0; virtual void new_key_block(BlockHandle handle) = 0; + virtual void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) = 0; }; virtual ~ValidatorManagerInterface() = default;