From 08e9e29dff7d70cec8ee65a7208f0a5780691b5b Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Sat, 5 Aug 2023 10:47:10 +0300 Subject: [PATCH 1/2] subscriber: fix off-by-one in awaitHeight() Block count is literally a block counter, while block height is the height written in the block. For block zero the height is zero while we have one block in the chain and therefore the count is one. Persistent store operates with heights, therefore comparison should be adjusted by one. Signed-off-by: Roman Khimov --- pkg/morph/subscriber/subscriber.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/morph/subscriber/subscriber.go b/pkg/morph/subscriber/subscriber.go index e9b3bca21d..21f91d2fb9 100644 --- a/pkg/morph/subscriber/subscriber.go +++ b/pkg/morph/subscriber/subscriber.go @@ -332,7 +332,7 @@ func awaitHeight(cli *client.Client, startFrom uint32) error { return fmt.Errorf("could not get block height: %w", err) } - if height < startFrom { + if height < startFrom+1 { return fmt.Errorf("RPC block counter %d didn't reach expected height %d", height, startFrom) } From e3de08e8242ff80afb38d4d35a4f05c18456633f Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Sun, 6 Aug 2023 22:32:07 +0300 Subject: [PATCH 2/2] ir: retry mainnet RPC connection if the node is outdated We can have a set of RPC nodes configured and we will switch them in runtime, but if the first node is outdated then the IR node will _always_ fail to start until RPC node catches up. We can have a better behaviour with node switching to other RPCs in this phase. Can't be done this easily for the sidechain since in most cases we have an internal node and it needs to go through the chain completely. Refs. #2426. Signed-off-by: Roman Khimov --- CHANGELOG.md | 1 + pkg/innerring/innerring.go | 9 +++++++-- pkg/morph/subscriber/subscriber.go | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8b6134b9d..d1ee418e78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Changelog for NeoFS Node - Double voting for validators on IR startup (#2365) - Skip unexpected notary events on notary request parsing step (#2315) - Session inactivity on object PUT request relay (#2460) +- Missing connection retries on IR node startup when the first configured mainnet RPC node is not in sync (#2474) ### Removed - Deprecated `morph.rpc_endpoint` SN and `morph.endpoint.client` IR config sections (#2400) diff --git a/pkg/innerring/innerring.go b/pkg/innerring/innerring.go index e468997259..6195cfd3ce 100644 --- a/pkg/innerring/innerring.go +++ b/pkg/innerring/innerring.go @@ -479,8 +479,13 @@ func New(ctx context.Context, log *zap.Logger, cfg *viper.Viper, errChan chan<- return nil, err } - // create mainnet listener - server.mainnetListener, err = createListener(ctx, server.mainnetClient, mainnetChain) + // create mainnet listener, retry with a different node if current one is not up to date + for { + server.mainnetListener, err = createListener(ctx, server.mainnetClient, mainnetChain) + if !errors.Is(err, subscriber.ErrStaleNode) || !server.mainnetClient.SwitchRPC() { + break + } + } if err != nil { return nil, err } diff --git a/pkg/morph/subscriber/subscriber.go b/pkg/morph/subscriber/subscriber.go index 21f91d2fb9..4163aaf01c 100644 --- a/pkg/morph/subscriber/subscriber.go +++ b/pkg/morph/subscriber/subscriber.go @@ -68,6 +68,9 @@ func (s *subscriber) NotificationChannels() NotificationChannels { } var ( + // ErrStaleNode is returned from [New] when StartFromBlock requirement + // specified in [Params] is not satisfied by the given node. + ErrStaleNode = errors.New("RPC node is not yet up to date") errNilParams = errors.New("chain/subscriber: config was not provided to the constructor") errNilLogger = errors.New("chain/subscriber: logger was not provided to the constructor") @@ -333,7 +336,7 @@ func awaitHeight(cli *client.Client, startFrom uint32) error { } if height < startFrom+1 { - return fmt.Errorf("RPC block counter %d didn't reach expected height %d", height, startFrom) + return fmt.Errorf("%w: expected %d height, got %d count", ErrStaleNode, startFrom, height) } return nil