Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Add new PoET config "poet-request-timeout" and use it instead of GracePeriod for timeouts #4996

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@

See [RELEASE](./RELEASE.md) for workflow instructions.

## UNRELEASED

### Upgrade information

A new config `poet-request-timeout` has been added, that defines the timeout for requesting PoET proofs.
It defaults to 9 minutes so there is enough time to retry if the request fails.

### Highlights

### Features

### Improvements

## v1.1.5

### Upgrade information
Expand Down
22 changes: 10 additions & 12 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type PoetConfig struct {
PhaseShift time.Duration `mapstructure:"phase-shift"`
CycleGap time.Duration `mapstructure:"cycle-gap"`
GracePeriod time.Duration `mapstructure:"grace-period"`
RequestTimeout time.Duration `mapstructure:"poet-request-timeout"`
RequestRetryDelay time.Duration `mapstructure:"retry-delay"`
MaxRequestRetries int `mapstructure:"retry-max"`
}
Expand All @@ -50,7 +51,7 @@ const (
defaultPoetRetryInterval = 5 * time.Second

// Jitter added to the wait time before building a nipost challenge.
// It's expressed as % of poet grace period which translates to:
// It is expressed as % of poet grace period which translates to:
// mainnet (grace period 1h) -> 36s
// systest (grace period 10s) -> 0.1s
maxNipostChallengeBuildJitter = 1.0
Expand Down Expand Up @@ -436,24 +437,21 @@ func (b *Builder) buildNIPostChallenge(ctx context.Context) (*types.NIPostChalle
until := time.Until(b.poetRoundStart(current))
if until <= 0 {
metrics.PublishLateWindowLatency.Observe(-until.Seconds())
return nil, fmt.Errorf("%w: builder doesn't have time to submit in epoch %d. poet round already started %v ago",
ErrATXChallengeExpired, current, -until)
return nil, fmt.Errorf("%w: builder cannot to submit in epoch %d. poet round already started %v ago", ErrATXChallengeExpired, current, -until)
}
metrics.PublishOntimeWindowLatency.Observe(until.Seconds())
wait := timeToWaitToBuildNipostChallenge(until, b.poetCfg.GracePeriod)
if wait >= 0 {
wait := buildNipostChallengeStartDeadline(b.poetRoundStart(current), b.poetCfg.GracePeriod)
if time.Until(wait) > 0 {
b.log.WithContext(ctx).With().Debug("waiting for fresh atxs",
log.Duration("till poet round", until),
log.Uint32("current epoch", current.Uint32()),
log.Duration("wait", wait),
log.Duration("wait", time.Until(wait)),
)
if wait > 0 {
events.EmitPoetWaitRound(current, current+1, wait)
}
events.EmitPoetWaitRound(current, current+1, time.Until(wait))
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(wait):
case <-time.After(time.Until(wait)):
}
}

Expand Down Expand Up @@ -726,7 +724,7 @@ func SignAndFinalizeAtx(signer *signing.EdSigner, atx *types.ActivationTx) error
return atx.Initialize()
}

func timeToWaitToBuildNipostChallenge(untilRoundStart, gracePeriod time.Duration) time.Duration {
func buildNipostChallengeStartDeadline(roundStart time.Time, gracePeriod time.Duration) time.Time {
jitter := randomDurationInRange(time.Duration(0), gracePeriod*maxNipostChallengeBuildJitter/100.0)
return untilRoundStart + jitter - gracePeriod
return roundStart.Add(jitter).Add(-gracePeriod)
}
16 changes: 8 additions & 8 deletions activation/activation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1278,28 +1278,28 @@ func TestWaitingToBuildNipostChallengeWithJitter(t *testing.T) {
// ───▲─────|──────|─────────|----> time
// │ └jitter| └round start
// now
wait := timeToWaitToBuildNipostChallenge(2*time.Hour, time.Hour)
require.Greater(t, wait, time.Hour)
require.LessOrEqual(t, wait, time.Hour+time.Second*36)
deadline := buildNipostChallengeStartDeadline(time.Now().Add(2*time.Hour), time.Hour)
require.Greater(t, deadline, time.Now().Add(time.Hour))
require.LessOrEqual(t, deadline, time.Now().Add(time.Hour+time.Second*36))
})
t.Run("after grace period, within max jitter value", func(t *testing.T) {
// ┌──grace period──┐
// │ │
// ─────────|──▲────|────────|----> time
// └ji│tter| └round start
// now
wait := timeToWaitToBuildNipostChallenge(time.Hour-time.Second*10, time.Hour)
require.GreaterOrEqual(t, wait, -time.Second*10)
deadline := buildNipostChallengeStartDeadline(time.Now().Add(time.Hour-time.Second*10), time.Hour)
require.GreaterOrEqual(t, deadline, time.Now().Add(-time.Second*10))
// jitter is 1% = 36s for 1h grace period
require.LessOrEqual(t, wait, time.Second*(36-10))
require.LessOrEqual(t, deadline, time.Now().Add(time.Second*(36-10)))
})
t.Run("after jitter max value", func(t *testing.T) {
// ┌──grace period──┐
// │ │
// ─────────|──────|──▲──────|----> time
// └jitter| │ └round start
// now
wait := timeToWaitToBuildNipostChallenge(time.Hour-time.Second*37, time.Hour)
require.Less(t, wait, time.Duration(0))
deadline := buildNipostChallengeStartDeadline(time.Now().Add(time.Hour-time.Second*37), time.Hour)
require.Less(t, deadline, time.Now())
})
}
22 changes: 10 additions & 12 deletions activation/nipost.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,15 +322,17 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.NIPos
}

// Submit the challenge to a single PoET.
func (nb *NIPostBuilder) submitPoetChallenge(ctx context.Context, poet PoetProvingServiceClient, prefix, challenge []byte, signature types.EdSignature, nodeID types.NodeID) (*types.PoetRequest, error) {
poetServiceID, err := poet.PoetServiceID(ctx)
func (nb *NIPostBuilder) submitPoetChallenge(ctx context.Context, client PoetProvingServiceClient, prefix, challenge []byte, signature types.EdSignature, nodeID types.NodeID) (*types.PoetRequest, error) {
poetServiceID, err := client.PoetServiceID(ctx)
if err != nil {
return nil, &PoetSvcUnstableError{msg: "failed to get PoET service ID", source: err}
}
logger := nb.log.WithContext(ctx).WithFields(log.String("poet_id", hex.EncodeToString(poetServiceID.ServiceID)))

logger.Debug("querying for poet pow parameters")
powParams, err := poet.PowParams(ctx)
powCtx, cancel := context.WithTimeout(ctx, nb.poetCfg.RequestTimeout)
defer cancel()
powParams, err := client.PowParams(powCtx)
if err != nil {
return nil, &PoetSvcUnstableError{msg: "failed to get PoW params", source: err}
}
Expand All @@ -345,7 +347,9 @@ func (nb *NIPostBuilder) submitPoetChallenge(ctx context.Context, poet PoetProvi

logger.Debug("submitting challenge to poet proving service")

round, err := poet.Submit(ctx, prefix, challenge, signature, nodeID, PoetPoW{
submitCtx, cancel := context.WithTimeout(ctx, nb.poetCfg.RequestTimeout)
defer cancel()
round, err := client.Submit(submitCtx, prefix, challenge, signature, nodeID, PoetPoW{
Nonce: nonce,
Params: *powParams,
})
Expand Down Expand Up @@ -482,16 +486,10 @@ func (nb *NIPostBuilder) getBestProof(ctx context.Context, challenge types.Hash3
case <-time.After(time.Until(waitDeadline)):
}

// TODO(mafa): this should be renamed from GracePeriod to something like RequestTimeout
// and should be much shorter (e.g. 10 seconds instead of 1 hour on mainnet)
getProofsCtx, cancel := context.WithTimeout(ctx, nb.poetCfg.GracePeriod)
getProofsCtx, cancel := context.WithTimeout(ctx, nb.poetCfg.RequestTimeout)
defer cancel()

proof, members, err := client.Proof(getProofsCtx, round)
switch {
case errors.Is(err, context.Canceled):
return fmt.Errorf("querying proof: %w", ctx.Err())
case err != nil:
if err != nil {
logger.With().Warning("failed to get proof from poet", log.Err(err))
return nil
}
Expand Down
2 changes: 2 additions & 0 deletions activation/nipost_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ func buildNIPost(tb testing.TB, postProvider *testPostManager, nipostChallenge t
PhaseShift: epoch / 5,
CycleGap: epoch / 10,
GracePeriod: epoch / 10,
RequestTimeout: epoch / 20,
RequestRetryDelay: epoch / 100,
MaxRequestRetries: 10,
}
Expand Down Expand Up @@ -244,6 +245,7 @@ func TestNewNIPostBuilderNotInitialized(t *testing.T) {
PhaseShift: epoch / 5,
CycleGap: epoch / 10,
GracePeriod: epoch / 10,
RequestTimeout: epoch / 20,
RequestRetryDelay: epoch / 100,
MaxRequestRetries: 10,
}
Expand Down
4 changes: 3 additions & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,9 @@ func AddCommands(cmd *cobra.Command) {
cmd.PersistentFlags().DurationVar(&cfg.POET.CycleGap, "cycle-gap",
cfg.POET.CycleGap, "cycle gap of poet server")
cmd.PersistentFlags().DurationVar(&cfg.POET.GracePeriod, "grace-period",
cfg.POET.GracePeriod, "propagation time for ATXs in the network")
cfg.POET.GracePeriod, "time before PoET round starts when the node builds and submits a challenge")
cmd.PersistentFlags().DurationVar(&cfg.POET.RequestTimeout, "poet-request-timeout",
cfg.POET.RequestTimeout, "timeout for poet requests")

/**======================== bootstrap data updater Flags ========================== **/
cmd.PersistentFlags().StringVar(&cfg.Bootstrap.URL, "bootstrap-url",
Expand Down
1 change: 1 addition & 0 deletions config/mainnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ func MainnetConfig() Config {
PhaseShift: 240 * time.Hour,
CycleGap: 12 * time.Hour,
GracePeriod: 1 * time.Hour,
RequestTimeout: 1100 * time.Second, // RequestRetryDelay * 2 * MaxRequestRetries*(MaxRequestRetries+1)/2
RequestRetryDelay: 10 * time.Second,
MaxRequestRetries: 10,
},
Expand Down
3 changes: 3 additions & 0 deletions config/presets/fastnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ func fastnet() config.Config {
conf.POET.GracePeriod = 10 * time.Second
conf.POET.CycleGap = 30 * time.Second
conf.POET.PhaseShift = 30 * time.Second
conf.POET.RequestTimeout = 12 * time.Second // RequestRetryDelay * 2 * MaxRequestRetries*(MaxRequestRetries+1)/2
conf.POET.RequestRetryDelay = 1 * time.Second
conf.POET.MaxRequestRetries = 3

return conf
}
5 changes: 4 additions & 1 deletion config/presets/standalone.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,12 @@ func standalone() config.Config {
conf.Beacon.VotesLimit = 100

conf.PoETServers = []string{"http://0.0.0.0:10010"}
conf.POET.GracePeriod = 5 * time.Second
conf.POET.GracePeriod = 12 * time.Second
conf.POET.CycleGap = 30 * time.Second
conf.POET.PhaseShift = 30 * time.Second
conf.POET.RequestTimeout = 12 * time.Second // RequestRetryDelay * 2 * MaxRequestRetries*(MaxRequestRetries+1)/2
conf.POET.RequestRetryDelay = 1 * time.Second
conf.POET.MaxRequestRetries = 3

conf.P2P.DisableNatPort = true

Expand Down
3 changes: 2 additions & 1 deletion node/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ func TestAdminEvents(t *testing.T) {
grpc.WithBlock(),
)
require.NoError(t, err)
t.Cleanup(func() { conn.Close() })
t.Cleanup(func() { assert.NoError(t, conn.Close()) })
client := pb.NewAdminServiceClient(conn)

tctx, cancel := context.WithTimeout(ctx, 2*time.Minute)
Expand Down Expand Up @@ -1144,6 +1144,7 @@ func TestAdminEvents(t *testing.T) {
require.NoError(t, err, "stream %d", i)
require.IsType(t, ev, msg.Details, "stream %d", i)
}
require.NoError(t, stream.CloseSend())
}
}

Expand Down