From 2bc4307816ff1f3ee38a36f45b978e5af3b3c7e2 Mon Sep 17 00:00:00 2001 From: Michael Zampani Date: Sat, 21 Mar 2026 15:16:53 -0700 Subject: [PATCH] fix(cmd/docker): prevent race between force-exit goroutine and plugin wait When a plugin ignores context cancellation and the user sends 3 SIGINTs, the CLI kills the plugin with SIGKILL. Previously the signal goroutine called os.Exit(1) directly; a race existed where plugincmd.Run() could return first (plugin was SIGKILL'd, so ws.ExitStatus() = -1) and the main goroutine would call os.Exit(-1) = exit code 255 before the goroutine reached os.Exit(1). Fix by moving exit-code ownership to the main goroutine. The signal goroutine closes forceExitCh before calling Kill(), guaranteeing the channel is closed before plugincmd.Run() returns (the plugin can only die after Kill() delivers SIGKILL; Run() only returns after the process is reaped). The main goroutine checks forceExitCh after Run() returns and performs the print + os.Exit(1) itself. Also return from the signal goroutine after the force-kill to prevent further loop iterations from calling close(forceExitCh) a second time (which would panic), in case additional signals arrive while the kill is in flight. Fixes a flaky failure in TestPluginSocketCommunication/detached/ the_main_CLI_exits_after_3_signals where exit code 255 was observed instead of 1 on loaded CI runners (RC Docker on Alpine). Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Michael Zampani --- cmd/docker/docker.go | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/cmd/docker/docker.go b/cmd/docker/docker.go index 4936f2b28e48..67e74cc65b80 100644 --- a/cmd/docker/docker.go +++ b/cmd/docker/docker.go @@ -344,6 +344,12 @@ func tryPluginRun(ctx context.Context, dockerCli command.Cli, cmd *cobra.Command // notify the plugin via the PluginServer (or signal) as appropriate. const exitLimit = 2 + // forceExitCh is closed by the signal goroutine just before it SIGKILLs + // the plugin. The main goroutine checks this after plugincmd.Run() returns + // and owns the final os.Exit(1) call, keeping exit-code ownership in one + // place and avoiding a race between two concurrent os.Exit calls. + forceExitCh := make(chan struct{}) + tryTerminatePlugin := func(force bool) { // If stdin is a TTY, the kernel will forward // signals to the subprocess because the shared @@ -368,12 +374,12 @@ func tryPluginRun(ctx context.Context, dockerCli command.Cli, cmd *cobra.Command // force the process to terminate if it hasn't already if force { + // Close forceExitCh before Kill so the channel is guaranteed + // to be closed by the time plugincmd.Run() returns: the plugin + // can only exit after Kill() delivers SIGKILL, and Run() only + // returns after the process is reaped. + close(forceExitCh) _ = plugincmd.Process.Kill() - _, _ = fmt.Fprint(dockerCli.Err(), "got 3 SIGTERM/SIGINTs, forcefully exiting\n") - - // Restore terminal in case it was in raw mode. - restoreTerminal(dockerCli) - os.Exit(1) } } @@ -397,10 +403,28 @@ func tryPluginRun(ctx context.Context, dockerCli command.Cli, cmd *cobra.Command force = true } tryTerminatePlugin(force) + if force { + // Plugin has been killed; return to prevent further + // loop iterations from calling close(forceExitCh) again. + return + } } }() if err := plugincmd.Run(); err != nil { + select { + case <-forceExitCh: + // We force-killed the plugin after 3 signals. Print the message + // and exit here so that exit-code ownership stays in the main + // goroutine and we avoid a race with any concurrent os.Exit call. + // Note: the deferred srv.Close() is already called by tryTerminatePlugin + // before forceExitCh is closed, so skipping it here is safe. + _, _ = fmt.Fprint(dockerCli.Err(), "got 3 SIGTERM/SIGINTs, forcefully exiting\n") + restoreTerminal(dockerCli) + os.Exit(1) //nolint:gocritic // exitAfterDefer: srv.Close() already called above + default: + } + statusCode := 1 exitErr, ok := err.(*exec.ExitError) if !ok {