From 7ee078755740fee24ecba1da1f5ce2c9435b3aab Mon Sep 17 00:00:00 2001 From: David Alger Date: Sun, 10 Mar 2024 14:44:32 -0500 Subject: [PATCH] fix: Address race condition disrupting graceful shutdown process Signed-off-by: David Alger --- internal/cmd/envoy/shutdown_manager.go | 42 ++++++++------------------ 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/internal/cmd/envoy/shutdown_manager.go b/internal/cmd/envoy/shutdown_manager.go index 1f0a199a4eb..0f9ee77d814 100644 --- a/internal/cmd/envoy/shutdown_manager.go +++ b/internal/cmd/envoy/shutdown_manager.go @@ -34,8 +34,8 @@ const ( ShutdownManagerHealthCheckPath = "/healthz" // ShutdownManagerReadyPath is the path used to indicate shutdown readiness. ShutdownManagerReadyPath = "/shutdown/ready" - // ShutdownActiveFile is the file used to indicate shutdown readiness. - ShutdownActiveFile = "/tmp/shutdown-active" + // ShutdownReadyFile is the file used to indicate shutdown readiness. + ShutdownReadyFile = "/tmp/shutdown-ready" ) // ShutdownManager serves shutdown manager process for Envoy proxies. @@ -44,7 +44,7 @@ func ShutdownManager(readyTimeout time.Duration) error { handler := http.NewServeMux() handler.HandleFunc(ShutdownManagerHealthCheckPath, func(_ http.ResponseWriter, _ *http.Request) {}) handler.HandleFunc(ShutdownManagerReadyPath, func(w http.ResponseWriter, _ *http.Request) { - shutdownReadyHandler(w, readyTimeout, ShutdownActiveFile) + shutdownReadyHandler(w, readyTimeout, ShutdownReadyFile) }) // Setup HTTP server @@ -85,25 +85,13 @@ func ShutdownManager(readyTimeout time.Duration) error { } // shutdownReadyHandler handles the endpoint used by a preStop hook on the Envoy -// container to block until ready to terminate. When the graceful drain process -// has begun a file will be written to indicate shutdown is in progress. This -// handler will poll for the file and block until it is removed. -func shutdownReadyHandler(w http.ResponseWriter, readyTimeout time.Duration, activeFile string) { +// container to block until ready to terminate. After the graceful drain process +// has completed a file will be written to indicate shutdown readiness. +func shutdownReadyHandler(w http.ResponseWriter, readyTimeout time.Duration, readyFile string) { var startTime = time.Now() logger.Info("received shutdown ready request") - // Since we cannot establish order between preStop hooks, wait a bit giving the exec - // preStop hook a chance to touch the signaling file before starting to poll for it. - time.Sleep(2 * time.Second) - - // Exit early if active file failed to show up - if _, err := os.Stat(activeFile); os.IsNotExist(err) { - logger.Info("shutdown active file not found") - w.WriteHeader(http.StatusInternalServerError) - return - } - // Poll for shutdown readiness for { // Check if ready timeout is exceeded @@ -114,15 +102,15 @@ func shutdownReadyHandler(w http.ResponseWriter, readyTimeout time.Duration, act return } - _, err := os.Stat(activeFile) + _, err := os.Stat(readyFile) switch { case os.IsNotExist(err): - logger.Info("shutdown readiness detected") - return + time.Sleep(1 * time.Second) case err != nil: logger.Error(err, "error checking for shutdown readiness") case err == nil: - time.Sleep(1 * time.Second) + logger.Info("shutdown readiness detected") + return } } } @@ -139,12 +127,6 @@ func Shutdown(drainTimeout time.Duration, minDrainDuration time.Duration, exitAt logger = logging.FileLogger("/proc/1/fd/1", "shutdown-manager", v1alpha1.LogLevelInfo) } - // Signal to shutdownReadyHandler that drain process is started - if _, err := os.Create(ShutdownActiveFile); err != nil { - logger.Error(err, "error creating shutdown active file") - return err - } - logger.Info(fmt.Sprintf("initiating graceful drain with %.0f second minimum drain period and %.0f second timeout", minDrainDuration.Seconds(), drainTimeout.Seconds())) @@ -185,8 +167,8 @@ func Shutdown(drainTimeout time.Duration, minDrainDuration time.Duration, exitAt } // Signal to shutdownReadyHandler that drain process is complete - if err := os.Remove(ShutdownActiveFile); err != nil { - logger.Error(err, "error removing shutdown active file") + if _, err := os.Create(ShutdownReadyFile); err != nil { + logger.Error(err, "error creating shutdown ready file") return err }