Skip to content

Commit

Permalink
bug: account for etcd leader changes error
Browse files Browse the repository at this point in the history
when validation if a session is valid or not we were failing if the etcd
leader changed. this case happens quite often when joining the second
node to an embedded cluster installation.

this commit adds a retry mechanism for this specific scenario. we retry
8 times, the backoff time should span up to ~5 seconds.
  • Loading branch information
ricardomaraschini committed Nov 13, 2024
1 parent 55b35f4 commit 4061607
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
1 change: 1 addition & 0 deletions pkg/handlers/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func RequireValidSessionQuietMiddleware(kotsStore store.Store) mux.MiddlewareFun
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
sess, err := requireValidSession(kotsStore, w, r)
if err != nil {
logger.Errorf("failed validating session: %s", err)
return
}

Expand Down
22 changes: 20 additions & 2 deletions pkg/handlers/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"github.com/replicatedhq/kots/pkg/util"
kuberneteserrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"
)

type authorization struct {
Expand Down Expand Up @@ -92,12 +94,28 @@ func requireValidSession(kotsStore store.Store, w http.ResponseWriter, r *http.R
return nil, err
}

passwordUpdatedAt, err := kotsStore.GetPasswordUpdatedAt()
if err != nil {
// XXX: we have noticed that when joining a second controller to an
// embedded cluster installation the etcd leader usually changes.
// GetPasswordUpdatedAt() function reads a secret from the cluster
// and if it attempts to do so while the leader is changing we receive
// an error back. here we retry for this specific etcd error.
var passwordUpdatedAt *time.Time
if err = retry.OnError(
// this amounts to a maximum of ~5 seconds.
wait.Backoff{Steps: 8, Duration: 40 * time.Millisecond, Factor: 2},
func(err error) bool {
return strings.Contains(err.Error(), "leader changed")
},
func() (err error) {
passwordUpdatedAt, err = kotsStore.GetPasswordUpdatedAt()
return
},
); err != nil {
response := types.ErrorResponse{Error: util.StrPointer("failed to validate session with current password")}
JSON(w, http.StatusUnauthorized, response)
return nil, err
}

if passwordUpdatedAt != nil && passwordUpdatedAt.After(sess.IssuedAt) {
if err := kotsStore.DeleteSession(sess.ID); err != nil {
logger.Error(errors.Wrapf(err, "password was updated after session created. failed to delete invalid session %s", sess.ID))
Expand Down

0 comments on commit 4061607

Please sign in to comment.