diff --git a/.changeset/mean-dots-move.md b/.changeset/mean-dots-move.md new file mode 100644 index 00000000000..1169d8379e9 --- /dev/null +++ b/.changeset/mean-dots-move.md @@ -0,0 +1,5 @@ +--- +"chainlink": patch +--- + +Add config var Mercury.Transmitter.TransmitConcurrency #added diff --git a/core/config/docs/core.toml b/core/config/docs/core.toml index e0fc76f449c..edd1494e4f0 100644 --- a/core/config/docs/core.toml +++ b/core/config/docs/core.toml @@ -689,6 +689,10 @@ TransmitQueueMaxSize = 10_000 # Default # when sending a message to the mercury server, before aborting and considering # the transmission to be failed. TransmitTimeout = "5s" # Default +# TransmitConcurrency is the max number of concurrent transmits to each server. +# +# Only has effect with LLO jobs. +TransmitConcurrency = 100 # Default # Telemetry holds OTEL settings. # This data includes open telemetry metrics, traces, & logs. diff --git a/core/config/mercury_config.go b/core/config/mercury_config.go index d1b4b142e20..2e58ff0ee9d 100644 --- a/core/config/mercury_config.go +++ b/core/config/mercury_config.go @@ -20,6 +20,7 @@ type MercuryTLS interface { type MercuryTransmitter interface { TransmitQueueMaxSize() uint32 TransmitTimeout() commonconfig.Duration + TransmitConcurrency() uint32 } type Mercury interface { diff --git a/core/config/toml/types.go b/core/config/toml/types.go index d9302b81fb0..610d18b6b4d 100644 --- a/core/config/toml/types.go +++ b/core/config/toml/types.go @@ -1330,6 +1330,7 @@ func (m *MercuryTLS) ValidateConfig() (err error) { type MercuryTransmitter struct { TransmitQueueMaxSize *uint32 TransmitTimeout *commonconfig.Duration + TransmitConcurrency *uint32 } func (m *MercuryTransmitter) setFrom(f *MercuryTransmitter) { @@ -1339,6 +1340,9 @@ func (m *MercuryTransmitter) setFrom(f *MercuryTransmitter) { if v := f.TransmitTimeout; v != nil { m.TransmitTimeout = v } + if v := f.TransmitConcurrency; v != nil { + m.TransmitConcurrency = v + } } type Mercury struct { diff --git a/core/services/chainlink/config_mercury.go b/core/services/chainlink/config_mercury.go index bc4aed6fb07..0e56105406b 100644 --- a/core/services/chainlink/config_mercury.go +++ b/core/services/chainlink/config_mercury.go @@ -50,6 +50,10 @@ func (m *mercuryTransmitterConfig) TransmitTimeout() commonconfig.Duration { return *m.c.TransmitTimeout } +func (m *mercuryTransmitterConfig) TransmitConcurrency() uint32 { + return *m.c.TransmitConcurrency +} + type mercuryConfig struct { c toml.Mercury s toml.MercurySecrets diff --git a/core/services/chainlink/config_test.go b/core/services/chainlink/config_test.go index 76b80672dbb..e04d6d7e25b 100644 --- a/core/services/chainlink/config_test.go +++ b/core/services/chainlink/config_test.go @@ -838,6 +838,7 @@ func TestConfig_Marshal(t *testing.T) { Transmitter: toml.MercuryTransmitter{ TransmitQueueMaxSize: ptr(uint32(123)), TransmitTimeout: commoncfg.MustNewDuration(234 * time.Second), + TransmitConcurrency: ptr(uint32(456)), }, VerboseLogging: ptr(true), } @@ -1348,6 +1349,7 @@ CertFile = '/path/to/cert.pem' [Mercury.Transmitter] TransmitQueueMaxSize = 123 TransmitTimeout = '3m54s' +TransmitConcurrency = 456 `}, {"full", full, fullTOML}, {"multi-chain", multiChain, multiChainTOML}, diff --git a/core/services/chainlink/testdata/config-empty-effective.toml b/core/services/chainlink/testdata/config-empty-effective.toml index cd51afac5f8..0f26b02ab6f 100644 --- a/core/services/chainlink/testdata/config-empty-effective.toml +++ b/core/services/chainlink/testdata/config-empty-effective.toml @@ -237,6 +237,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/core/services/chainlink/testdata/config-full.toml b/core/services/chainlink/testdata/config-full.toml index c6a5302a459..3191ce576ed 100644 --- a/core/services/chainlink/testdata/config-full.toml +++ b/core/services/chainlink/testdata/config-full.toml @@ -247,6 +247,7 @@ CertFile = '/path/to/cert.pem' [Mercury.Transmitter] TransmitQueueMaxSize = 123 TransmitTimeout = '3m54s' +TransmitConcurrency = 456 [Capabilities] [Capabilities.Peering] diff --git a/core/services/chainlink/testdata/config-multi-chain-effective.toml b/core/services/chainlink/testdata/config-multi-chain-effective.toml index e8da8142181..5f52c06ca1f 100644 --- a/core/services/chainlink/testdata/config-multi-chain-effective.toml +++ b/core/services/chainlink/testdata/config-multi-chain-effective.toml @@ -237,6 +237,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/core/services/llo/mercurytransmitter/transmitter.go b/core/services/llo/mercurytransmitter/transmitter.go index c8c826a343d..c105f4a6787 100644 --- a/core/services/llo/mercurytransmitter/transmitter.go +++ b/core/services/llo/mercurytransmitter/transmitter.go @@ -29,14 +29,6 @@ import ( const ( // Mercury server error codes DuplicateReport = 2 - - // TransmitConcurrency is the number of concurrent transmits to each - // server. Number of goroutines per server will be roughly 2x this because - // each server has a delete queue and a transmit queue. - // - // This could be reduced by implementing transmit batching, see: https://smartcontract-it.atlassian.net/browse/MERC-6635 - // TODO: Should this be a configuration variable (e.g. set to 1 or 2 for testing?) - TransmitConcurrency = 100 ) var ( @@ -105,6 +97,7 @@ var _ Transmitter = (*transmitter)(nil) type Config interface { TransmitQueueMaxSize() uint32 TransmitTimeout() commonconfig.Duration + TransmitConcurrency() uint32 } type transmitter struct { @@ -172,10 +165,17 @@ func (mt *transmitter) Start(ctx context.Context) (err error) { return err } s.q.Init(transmissions) - // starting pm after loading from it is fine because it simply spawns some garbage collection/prune goroutines + // starting pm after loading from it is fine because it simply + // spawns some garbage collection/prune goroutines startClosers = append(startClosers, s.c, s.q, s.pm) - nThreads := int(TransmitConcurrency) + // Number of goroutines per server will be roughly + // 2*nServers*TransmitConcurrency because each server has a + // delete queue and a transmit queue. + // + // This could potentially be reduced by implementing transmit batching, + // see: https://smartcontract-it.atlassian.net/browse/MERC-6635 + nThreads := int(mt.cfg.TransmitConcurrency()) mt.wg.Add(2 * nThreads) for i := 0; i < nThreads; i++ { go s.runDeleteQueueLoop(mt.stopCh, mt.wg) diff --git a/core/services/llo/mercurytransmitter/transmitter_test.go b/core/services/llo/mercurytransmitter/transmitter_test.go index b1364643ddc..7477e848b78 100644 --- a/core/services/llo/mercurytransmitter/transmitter_test.go +++ b/core/services/llo/mercurytransmitter/transmitter_test.go @@ -33,6 +33,10 @@ func (m mockCfg) TransmitTimeout() commonconfig.Duration { return *commonconfig.MustNewDuration(1 * time.Hour) } +func (m mockCfg) TransmitConcurrency() uint32 { + return 5 +} + func Test_Transmitter_Transmit(t *testing.T) { lggr := logger.TestLogger(t) db := pgtest.NewSqlxDB(t) diff --git a/core/services/ocr2/plugins/llo/helpers_test.go b/core/services/ocr2/plugins/llo/helpers_test.go index 0ca6eeb60cb..9cd8742ffa8 100644 --- a/core/services/ocr2/plugins/llo/helpers_test.go +++ b/core/services/ocr2/plugins/llo/helpers_test.go @@ -185,6 +185,7 @@ func setupNode( // [Mercury] c.Mercury.VerboseLogging = ptr(true) + c.Mercury.Transmitter.TransmitConcurrency = ptr(uint32(5)) // Avoid a ridiculous number of goroutines }) lggr, observedLogs := logger.TestLoggerObserved(t, zapcore.DebugLevel) diff --git a/core/web/resolver/testdata/config-empty-effective.toml b/core/web/resolver/testdata/config-empty-effective.toml index cd51afac5f8..0f26b02ab6f 100644 --- a/core/web/resolver/testdata/config-empty-effective.toml +++ b/core/web/resolver/testdata/config-empty-effective.toml @@ -237,6 +237,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/core/web/resolver/testdata/config-full.toml b/core/web/resolver/testdata/config-full.toml index bfb0dcb9961..113d319e3c5 100644 --- a/core/web/resolver/testdata/config-full.toml +++ b/core/web/resolver/testdata/config-full.toml @@ -247,6 +247,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 123 TransmitTimeout = '3m54s' +TransmitConcurrency = 456 [Capabilities] [Capabilities.Peering] diff --git a/core/web/resolver/testdata/config-multi-chain-effective.toml b/core/web/resolver/testdata/config-multi-chain-effective.toml index 074cb82482b..cb2884fde8f 100644 --- a/core/web/resolver/testdata/config-multi-chain-effective.toml +++ b/core/web/resolver/testdata/config-multi-chain-effective.toml @@ -237,6 +237,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/docs/CONFIG.md b/docs/CONFIG.md index ff918468c07..20965d816ec 100644 --- a/docs/CONFIG.md +++ b/docs/CONFIG.md @@ -1875,6 +1875,7 @@ CertFile is the path to a PEM file of trusted root certificate authority certifi [Mercury.Transmitter] TransmitQueueMaxSize = 10_000 # Default TransmitTimeout = "5s" # Default +TransmitConcurrency = 100 # Default ``` Mercury.Transmitter controls settings for the mercury transmitter @@ -1897,6 +1898,14 @@ TransmitTimeout controls how long the transmitter will wait for a response when sending a message to the mercury server, before aborting and considering the transmission to be failed. +### TransmitConcurrency +```toml +TransmitConcurrency = 100 # Default +``` +TransmitConcurrency is the max number of concurrent transmits to each server. + +Only has effect with LLO jobs. + ## Telemetry ```toml [Telemetry] diff --git a/testdata/scripts/config/merge_raw_configs.txtar b/testdata/scripts/config/merge_raw_configs.txtar index b3d50f22b36..bf0da942eea 100644 --- a/testdata/scripts/config/merge_raw_configs.txtar +++ b/testdata/scripts/config/merge_raw_configs.txtar @@ -384,6 +384,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/default.txtar b/testdata/scripts/node/validate/default.txtar index 5e8b847ceda..51edf69d599 100644 --- a/testdata/scripts/node/validate/default.txtar +++ b/testdata/scripts/node/validate/default.txtar @@ -249,6 +249,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/defaults-override.txtar b/testdata/scripts/node/validate/defaults-override.txtar index bf8bece28bf..19bae4bec1a 100644 --- a/testdata/scripts/node/validate/defaults-override.txtar +++ b/testdata/scripts/node/validate/defaults-override.txtar @@ -310,6 +310,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/disk-based-logging-disabled.txtar b/testdata/scripts/node/validate/disk-based-logging-disabled.txtar index 2e72ed7e9bb..ddd01a4c1e4 100644 --- a/testdata/scripts/node/validate/disk-based-logging-disabled.txtar +++ b/testdata/scripts/node/validate/disk-based-logging-disabled.txtar @@ -293,6 +293,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar b/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar index 7b27328f7a6..0f40ad6a208 100644 --- a/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar +++ b/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar @@ -293,6 +293,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/disk-based-logging.txtar b/testdata/scripts/node/validate/disk-based-logging.txtar index 83d23546175..dd7455ca3a8 100644 --- a/testdata/scripts/node/validate/disk-based-logging.txtar +++ b/testdata/scripts/node/validate/disk-based-logging.txtar @@ -293,6 +293,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/invalid-ocr-p2p.txtar b/testdata/scripts/node/validate/invalid-ocr-p2p.txtar index 3fccffc4e69..1ffe2ab718c 100644 --- a/testdata/scripts/node/validate/invalid-ocr-p2p.txtar +++ b/testdata/scripts/node/validate/invalid-ocr-p2p.txtar @@ -278,6 +278,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/invalid.txtar b/testdata/scripts/node/validate/invalid.txtar index 5ea0aa289a8..52edd2b8065 100644 --- a/testdata/scripts/node/validate/invalid.txtar +++ b/testdata/scripts/node/validate/invalid.txtar @@ -283,6 +283,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/valid.txtar b/testdata/scripts/node/validate/valid.txtar index 26641c0ef76..623459ce253 100644 --- a/testdata/scripts/node/validate/valid.txtar +++ b/testdata/scripts/node/validate/valid.txtar @@ -290,6 +290,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering] diff --git a/testdata/scripts/node/validate/warnings.txtar b/testdata/scripts/node/validate/warnings.txtar index 51b3e897741..5452c49f122 100644 --- a/testdata/scripts/node/validate/warnings.txtar +++ b/testdata/scripts/node/validate/warnings.txtar @@ -272,6 +272,7 @@ CertFile = '' [Mercury.Transmitter] TransmitQueueMaxSize = 10000 TransmitTimeout = '5s' +TransmitConcurrency = 100 [Capabilities] [Capabilities.Peering]