Skip to content

Commit

Permalink
Improve collator node pings and collation manager stats
Browse files Browse the repository at this point in the history
  • Loading branch information
SpyCheese committed Nov 28, 2024
1 parent 5fae8db commit 923f1cd
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 7 deletions.
2 changes: 1 addition & 1 deletion tl/generate/scheme/ton_api.tl
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ engine.validator.perfTimerStats stats:(vector engine.validator.PerfTimerStatsByN
engine.validator.shardOutQueueSize size:long = engine.validator.ShardOutQueueSize;

engine.validator.collationManagerStats.shard shard_id:tonNode.shardId self_collate:Bool select_mode:string active:Bool collators:(vector int256) = engine.validator.collationManagerStats.Shard;
engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double = engine.validator.collationManagerStats.Collator;
engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double last_ping_ago:double last_ping_status:string = engine.validator.collationManagerStats.Collator;
engine.validator.collationManagerStats.localId adnl_id:int256 shards:(vector engine.validator.collationManagerStats.shard)
collators:(vector engine.validator.collationManagerStats.collator) = engine.validator.collationManagerStats.LocalId;
engine.validator.collationManagerStats local_ids:(vector engine.validator.collationManagerStats.localId) = engine.validator.CollationManagerStats;
Expand Down
Binary file modified tl/generate/scheme/ton_api.tlo
Binary file not shown.
20 changes: 18 additions & 2 deletions validator-engine-console/validator-engine-console-query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1775,8 +1775,24 @@ td::Status GetCollationManagerStatsQuery::receive(td::BufferSlice data) {
if (collator == nullptr) {
return td::Status::Error("collator not found");
}
td::TerminalIO::out() << " " << id << " alive=" << (int)collator->alive_
<< " ping_in=" << collator->ping_in_ << "\n";
td::StringBuilder sb;
sb << " " << id << "\n";
sb << " alive=" << (int)collator->alive_;
if (collator->active_) {
sb << " ping_in=" << td::StringBuilder::FixedDouble(std::max(collator->ping_in_, 0.0), 3);
}
sb << " last_ping_ago=";
if (collator->last_ping_ago_ < 0.0) {
sb << "never";
} else {
std::string status = collator->last_ping_status_;
std::erase_if(status, [](char c) { return c < (char)32; });
if (status.size() > 128) {
status.resize(128);
}
sb << td::StringBuilder::FixedDouble(collator->last_ping_ago_, 3) << ": " << status;
}
td::TerminalIO::out() << sb.as_cslice() << "\n";
}
}
}
Expand Down
11 changes: 8 additions & 3 deletions validator/collation-manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ void CollationManager::get_stats(
} else {
obj->ping_in_ = -1.0;
}
obj->last_ping_ago_ = collator.last_ping_at ? td::Time::now() - collator.last_ping_at.at() : -1.0;
obj->last_ping_status_ = collator.last_ping_status.is_ok() ? "OK" : collator.last_ping_status.message().str();
stats->collators_.push_back(std::move(obj));
}
promise.set_value(std::move(stats));
Expand Down Expand Up @@ -323,7 +325,7 @@ void CollationManager::alarm() {
td::actor::send_closure(SelfId, &CollationManager::got_pong, id, std::move(R));
};
LOG(DEBUG) << "sending ping to " << id;
td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "collatorping", std::move(P),
td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "ping", std::move(P),
td::Timestamp::in(2.0), std::move(query));
} else {
alarm_timestamp().relax(collator.ping_at);
Expand All @@ -340,20 +342,23 @@ void CollationManager::got_pong(adnl::AdnlNodeIdShort id, td::Result<td::BufferS
collator.sent_ping = false;

auto r_pong = [&]() -> td::Result<tl_object_ptr<ton_api::collatorNode_pong>> {
TRY_RESULT_PREFIX(data, std::move(R), "rldp query error: ");
TRY_RESULT(data, std::move(R));
auto r_error = fetch_tl_object<ton_api::collatorNode_error>(data, true);
if (r_error.is_ok()) {
auto error = r_error.move_as_ok();
return td::Status::Error(error->code_, error->message_);
}
return fetch_tl_object<ton_api::collatorNode_pong>(data, true);
}();
collator.last_ping_at = td::Timestamp::now();
if (r_pong.is_error()) {
LOG(DEBUG) << "pong from " << id << " : " << r_pong.move_as_error();
LOG(DEBUG) << "pong from " << id << " : " << r_pong.error();
collator.alive = false;
collator.last_ping_status = r_pong.move_as_error();
} else {
LOG(DEBUG) << "pong from " << id << " : OK";
collator.alive = true;
collator.last_ping_status = td::Status::OK();
}
collator.ping_at = td::Timestamp::in(td::Random::fast(10.0, 20.0));
if (collator.active_cnt && !collator.sent_ping) {
Expand Down
2 changes: 2 additions & 0 deletions validator/collation-manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ class CollationManager : public td::actor::Actor {
td::Timestamp ping_at = td::Timestamp::now();
bool sent_ping = false;
size_t active_cnt = 0;
td::Timestamp last_ping_at = td::Timestamp::never();
td::Status last_ping_status = td::Status::Error("not pinged");
};
std::map<adnl::AdnlNodeIdShort, CollatorInfo> collators_;

Expand Down
24 changes: 23 additions & 1 deletion validator/collator-node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ void CollatorNode::new_masterchain_block_notification(td::Ref<MasterchainState>
}
}

void CollatorNode::update_shard_client_handle(BlockHandle shard_client_handle) {
shard_client_handle_ = shard_client_handle;
}

void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector<BlockIdExt> prev,
CatchainSeqno cc_seqno) {
if (!can_collate_shard(shard)) {
Expand Down Expand Up @@ -225,7 +229,12 @@ void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector<Bl
}
++cache_it;
}
generate_block(shard, cc_seqno, info.prev, {}, td::Timestamp::in(10.0), [](td::Result<BlockCandidate>) {});
auto S = check_out_of_sync();
if (S.is_ok()) {
generate_block(shard, cc_seqno, info.prev, {}, td::Timestamp::in(10.0), [](td::Result<BlockCandidate>) {});
} else {
LOG(DEBUG) << "not generating block automatically: " << S;
}
}
return;
}
Expand Down Expand Up @@ -535,9 +544,22 @@ void CollatorNode::process_result(std::shared_ptr<CacheEntry> cache_entry, td::R
cache_entry->promises.clear();
}

td::Status CollatorNode::check_out_of_sync() {
if (last_masterchain_state_.is_null() || !shard_client_handle_) {
return td::Status::Error("not inited");
}
auto now = (UnixTime)td::Clocks::system();
if (last_masterchain_state_->get_unix_time() < now - 60 || shard_client_handle_->unix_time() < now - 60) {
return td::Status::Error(PSTRING() << "out of sync: mc " << now - last_masterchain_state_->get_unix_time()
<< "s ago, shardclient " << now - shard_client_handle_->unix_time() << "s ago");
}
return td::Status::OK();
}

void CollatorNode::process_ping(adnl::AdnlNodeIdShort src, ton_api::collatorNode_ping& ping,
td::Promise<td::BufferSlice> promise) {
LOG(DEBUG) << "got ping from " << src;
TRY_STATUS_PROMISE(promise, check_out_of_sync());
promise.set_result(create_serialize_tl_object<ton_api::collatorNode_pong>(0));
}

Expand Down
4 changes: 4 additions & 0 deletions validator/collator-node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class CollatorNode : public td::actor::Actor {
void del_shard(ShardIdFull shard);

void new_masterchain_block_notification(td::Ref<MasterchainState> state);
void update_shard_client_handle(BlockHandle shard_client_handle);
void update_validator_group_info(ShardIdFull shard, std::vector<BlockIdExt> prev, CatchainSeqno cc_seqno);

void update_options(td::Ref<ValidatorManagerOptions> opts) {
Expand Down Expand Up @@ -84,6 +85,7 @@ class CollatorNode : public td::actor::Actor {
std::map<std::pair<ShardIdFull, CatchainSeqno>, FutureValidatorGroup> future_validator_groups_;

td::Ref<MasterchainState> last_masterchain_state_;
BlockHandle shard_client_handle_;

td::Result<FutureValidatorGroup*> get_future_validator_group(ShardIdFull shard, CatchainSeqno cc_seqno);

Expand All @@ -92,6 +94,8 @@ class CollatorNode : public td::actor::Actor {
td::Promise<BlockCandidate> promise);
void process_result(std::shared_ptr<CacheEntry> cache_entry, td::Result<BlockCandidate> R);

td::Status check_out_of_sync();

public:
static tl_object_ptr<ton_api::collatorNode_Candidate> serialize_candidate(const BlockCandidate& block, bool compress);
static td::Result<BlockCandidate> deserialize_candidate(tl_object_ptr<ton_api::collatorNode_Candidate> f,
Expand Down
10 changes: 10 additions & 0 deletions validator/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2754,6 +2754,9 @@ void ValidatorManagerImpl::update_shard_client_block_handle(BlockHandle handle,
last_liteserver_state_ = std::move(state);
}
}
for (auto &c : collator_nodes_) {
td::actor::send_closure(c.second.actor, &CollatorNode::update_shard_client_handle, shard_client_handle_);
}
shard_client_update(seqno);
promise.set_value(td::Unit());
}
Expand Down Expand Up @@ -3509,6 +3512,13 @@ void ValidatorManagerImpl::add_collator(adnl::AdnlNodeIdShort id, ShardIdFull sh
if (it == collator_nodes_.end()) {
it = collator_nodes_.emplace(id, Collator()).first;
it->second.actor = td::actor::create_actor<CollatorNode>("collatornode", id, opts_, actor_id(this), adnl_, rldp_);
if (last_masterchain_state_.not_null()) {
td::actor::send_closure(it->second.actor, &CollatorNode::new_masterchain_block_notification,
last_masterchain_state_);
}
if (shard_client_handle_) {
td::actor::send_closure(it->second.actor, &CollatorNode::update_shard_client_handle, shard_client_handle_);
}
}
if (!it->second.shards.insert(shard).second) {
return;
Expand Down

0 comments on commit 923f1cd

Please sign in to comment.