-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BCI-2525: check all responses on transaction submission #11599
Changes from 8 commits
41cf0be
7bdce25
7274949
6c5915a
86104a2
ed88f18
6768c7e
fc41b06
e2767df
b27afc9
5db54d7
169a90b
6b0a7af
16dd2b1
70c7c2c
ab5fa6c
279f5f0
96f131e
d5d6ce2
d4268af
d43dc50
5ed9ab7
6369972
8256c5d
e376873
9a85e97
46f0fd3
0ee678f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,6 +84,7 @@ type multiNode[ | |
services.StateMachine | ||
nodes []Node[CHAIN_ID, HEAD, RPC_CLIENT] | ||
sendonlys []SendOnlyNode[CHAIN_ID, RPC_CLIENT] | ||
allNodes []SendOnlyNode[CHAIN_ID, RPC_CLIENT] | ||
chainID CHAIN_ID | ||
chainType config.ChainType | ||
lggr logger.SugaredLogger | ||
|
@@ -131,6 +132,11 @@ func NewMultiNode[ | |
) MultiNode[CHAIN_ID, SEQ, ADDR, BLOCK_HASH, TX, TX_HASH, EVENT, EVENT_OPS, TX_RECEIPT, FEE, HEAD, RPC_CLIENT] { | ||
nodeSelector := newNodeSelector(selectionMode, nodes) | ||
|
||
var all []SendOnlyNode[CHAIN_ID, RPC_CLIENT] | ||
for _, n := range nodes { | ||
all = append(all, n) | ||
} | ||
all = append(all, sendonlys...) | ||
// Prometheus' default interval is 15s, set this to under 7.5s to avoid | ||
// aliasing (see: https://en.wikipedia.org/wiki/Nyquist_frequency) | ||
const reportInterval = 6500 * time.Millisecond | ||
|
@@ -148,6 +154,7 @@ func NewMultiNode[ | |
chainFamily: chainFamily, | ||
sendOnlyErrorParser: sendOnlyErrorParser, | ||
reportInterval: reportInterval, | ||
allNodes: all, | ||
} | ||
|
||
c.lggr.Debugf("The MultiNode is configured to use NodeSelectionMode: %s", selectionMode) | ||
|
@@ -546,21 +553,19 @@ func (c *multiNode[CHAIN_ID, SEQ, ADDR, BLOCK_HASH, TX, TX_HASH, EVENT, EVENT_OP | |
} | ||
|
||
func (c *multiNode[CHAIN_ID, SEQ, ADDR, BLOCK_HASH, TX, TX_HASH, EVENT, EVENT_OPS, TX_RECEIPT, FEE, HEAD, RPC_CLIENT]) SendTransaction(ctx context.Context, tx TX) error { | ||
main, nodeError := c.selectNode() | ||
var all []SendOnlyNode[CHAIN_ID, RPC_CLIENT] | ||
for _, n := range c.nodes { | ||
all = append(all, n) | ||
if len(c.allNodes) == 0 { | ||
return ErroringNodeError | ||
} | ||
all = append(all, c.sendonlys...) | ||
for _, n := range all { | ||
if n == main { | ||
// main node is used at the end for the return value | ||
continue | ||
} | ||
// Parallel send to all other nodes with ignored return value | ||
// Async - we do not want to block the main thread with secondary nodes | ||
// in case they are unreliable/slow. | ||
// It is purely a "best effort" send. | ||
result := make(chan error, 1) | ||
// Even if we fail to select a main node, try sending the tx and notify the caller of failure. | ||
// It gives us a chance that tx will be applied while we are trying to recover. | ||
main, nodeError := c.selectNode() | ||
for _, n := range c.allNodes { | ||
// Parallel send to all nodes. | ||
// Release the caller on the success of any node or on the error from the main. | ||
// This way, we: | ||
// * prefer potentially the healthiest node to report the error; | ||
// * improve performance for cases when the main node is degraded and would eventually return time out. | ||
// Resource is not unbounded because the default context has a timeout. | ||
ok := c.IfNotStopped(func() { | ||
// Must wrap inside IfNotStopped to avoid waitgroup racing with Close | ||
|
@@ -569,21 +574,38 @@ func (c *multiNode[CHAIN_ID, SEQ, ADDR, BLOCK_HASH, TX, TX_HASH, EVENT, EVENT_OP | |
defer c.wg.Done() | ||
|
||
txErr := n.RPC().SendTransaction(ctx, tx) | ||
c.lggr.Debugw("Sendonly node sent transaction", "name", n.String(), "tx", tx, "err", txErr) | ||
sendOnlyError := c.sendOnlyErrorParser(txErr) | ||
if sendOnlyError != Successful { | ||
c.lggr.Debugw("Node sent transaction", "name", n.String(), "tx", tx, "err", txErr) | ||
isSuccess := c.sendOnlyErrorParser(txErr) == Successful | ||
if !isSuccess { | ||
c.lggr.Warnw("RPC returned error", "name", n.String(), "tx", tx, "err", txErr) | ||
} | ||
|
||
if isSuccess || n == main { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really like that we still will return error as soon as main node says so, without waiting for other nodes. |
||
select { | ||
case result <- txErr: | ||
default: | ||
} | ||
} | ||
|
||
}(n) | ||
}) | ||
if !ok { | ||
c.lggr.Debug("Cannot send transaction on sendonly node; MultiNode is stopped", "node", n.String()) | ||
c.lggr.Debugw("Cannot send transaction on node; MultiNode is stopped", "node", n.String()) | ||
return fmt.Errorf("MulltiNode is stopped: %w", context.Canceled) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice catch! |
||
} | ||
} | ||
|
||
if nodeError != nil { | ||
return nodeError | ||
} | ||
return main.RPC().SendTransaction(ctx, tx) | ||
|
||
select { | ||
case err := <-result: | ||
return err | ||
case <-ctx.Done(): | ||
return ctx.Err() | ||
} | ||
|
||
} | ||
|
||
func (c *multiNode[CHAIN_ID, SEQ, ADDR, BLOCK_HASH, TX, TX_HASH, EVENT, EVENT_OPS, TX_RECEIPT, FEE, HEAD, RPC_CLIENT]) SequenceAt(ctx context.Context, account ADDR, blockNumber *big.Int) (s SEQ, err error) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: The name sendOnlyErrorParser is misleading.
Can you please rename it to sendTxErrorParser.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's important to keep the
sendOnly
prefix as it emphasizes the distinction between the standard ClassifySendError function and the ClassifySendOnlyError