Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Address race condition disrupting graceful shutdown process #2864

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 12 additions & 30 deletions internal/cmd/envoy/shutdown_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ const (
ShutdownManagerHealthCheckPath = "/healthz"
// ShutdownManagerReadyPath is the path used to indicate shutdown readiness.
ShutdownManagerReadyPath = "/shutdown/ready"
// ShutdownActiveFile is the file used to indicate shutdown readiness.
ShutdownActiveFile = "/tmp/shutdown-active"
// ShutdownReadyFile is the file used to indicate shutdown readiness.
ShutdownReadyFile = "/tmp/shutdown-ready"
)

// ShutdownManager serves shutdown manager process for Envoy proxies.
Expand All @@ -44,7 +44,7 @@ func ShutdownManager(readyTimeout time.Duration) error {
handler := http.NewServeMux()
handler.HandleFunc(ShutdownManagerHealthCheckPath, func(_ http.ResponseWriter, _ *http.Request) {})
handler.HandleFunc(ShutdownManagerReadyPath, func(w http.ResponseWriter, _ *http.Request) {
shutdownReadyHandler(w, readyTimeout, ShutdownActiveFile)
shutdownReadyHandler(w, readyTimeout, ShutdownReadyFile)
})

// Setup HTTP server
Expand Down Expand Up @@ -85,25 +85,13 @@ func ShutdownManager(readyTimeout time.Duration) error {
}

// shutdownReadyHandler handles the endpoint used by a preStop hook on the Envoy
// container to block until ready to terminate. When the graceful drain process
// has begun a file will be written to indicate shutdown is in progress. This
// handler will poll for the file and block until it is removed.
func shutdownReadyHandler(w http.ResponseWriter, readyTimeout time.Duration, activeFile string) {
// container to block until ready to terminate. After the graceful drain process
// has completed a file will be written to indicate shutdown readiness.
func shutdownReadyHandler(w http.ResponseWriter, readyTimeout time.Duration, readyFile string) {
var startTime = time.Now()

logger.Info("received shutdown ready request")

// Since we cannot establish order between preStop hooks, wait a bit giving the exec
// preStop hook a chance to touch the signaling file before starting to poll for it.
time.Sleep(2 * time.Second)

// Exit early if active file failed to show up
if _, err := os.Stat(activeFile); os.IsNotExist(err) {
logger.Info("shutdown active file not found")
w.WriteHeader(http.StatusInternalServerError)
return
}

// Poll for shutdown readiness
for {
// Check if ready timeout is exceeded
Expand All @@ -114,15 +102,15 @@ func shutdownReadyHandler(w http.ResponseWriter, readyTimeout time.Duration, act
return
}

_, err := os.Stat(activeFile)
_, err := os.Stat(readyFile)
switch {
case os.IsNotExist(err):
logger.Info("shutdown readiness detected")
return
time.Sleep(1 * time.Second)
case err != nil:
logger.Error(err, "error checking for shutdown readiness")
case err == nil:
time.Sleep(1 * time.Second)
logger.Info("shutdown readiness detected")
return
}
}
}
Expand All @@ -139,12 +127,6 @@ func Shutdown(drainTimeout time.Duration, minDrainDuration time.Duration, exitAt
logger = logging.FileLogger("/proc/1/fd/1", "shutdown-manager", v1alpha1.LogLevelInfo)
}

// Signal to shutdownReadyHandler that drain process is started
if _, err := os.Create(ShutdownActiveFile); err != nil {
logger.Error(err, "error creating shutdown active file")
return err
}

logger.Info(fmt.Sprintf("initiating graceful drain with %.0f second minimum drain period and %.0f second timeout",
minDrainDuration.Seconds(), drainTimeout.Seconds()))

Expand Down Expand Up @@ -185,8 +167,8 @@ func Shutdown(drainTimeout time.Duration, minDrainDuration time.Duration, exitAt
}

// Signal to shutdownReadyHandler that drain process is complete
if err := os.Remove(ShutdownActiveFile); err != nil {
logger.Error(err, "error removing shutdown active file")
if _, err := os.Create(ShutdownReadyFile); err != nil {
logger.Error(err, "error creating shutdown ready file")
return err
}

Expand Down