From f1847ccd62607a1c70af6314c1a5942e20c8f9d7 Mon Sep 17 00:00:00 2001 From: David Bennett Date: Thu, 25 Jun 2026 15:47:06 -0700 Subject: [PATCH 1/3] CLI: Fix hang with exec -i with self-exiting command and open stdin --- src/windows/common/relay.cpp | 2 +- src/windows/wslc/services/ConsoleService.cpp | 59 ++++++++++++++----- .../wslc/e2e/WSLCE2EContainerExecTests.cpp | 57 ++++++++++++++++++ 3 files changed, 103 insertions(+), 15 deletions(-) diff --git a/src/windows/common/relay.cpp b/src/windows/common/relay.cpp index 93d9ef7c52..9b65283dbc 100644 --- a/src/windows/common/relay.cpp +++ b/src/windows/common/relay.cpp @@ -133,7 +133,7 @@ wsl::windows::common::relay::InterruptableRead( if (!ReadFile(InputHandle, Buffer.data(), gsl::narrow_cast(Buffer.size()), &bytesRead, Overlapped)) { auto lastError = GetLastError(); - if ((lastError == ERROR_HANDLE_EOF) || (lastError == ERROR_BROKEN_PIPE)) + if ((lastError == ERROR_HANDLE_EOF) || (lastError == ERROR_BROKEN_PIPE) || (lastError == ERROR_OPERATION_ABORTED)) { return 0; } diff --git a/src/windows/wslc/services/ConsoleService.cpp b/src/windows/wslc/services/ConsoleService.cpp index e471ebad4b..6875a20dab 100644 --- a/src/windows/wslc/services/ConsoleService.cpp +++ b/src/windows/wslc/services/ConsoleService.cpp @@ -24,6 +24,49 @@ using wsl::windows::common::io::ReadConsoleHandle; using wsl::windows::common::io::ReadHandle; using wsl::windows::common::io::RelayHandle; +namespace { + + // Interrupts and joins the stdin-relay worker thread at teardown. + // + // A worker thread is only created when stdin is not a character device (any non-FILE_TYPE_CHAR handle, + // e.g. a redirected pipe); the console (FILE_TYPE_CHAR) and detach paths create none, so this no-ops + // (not joinable) for them. When that handle is a non-overlapped (synchronous) pipe the worker blocks + // in a ReadFile() that neither the exit event nor CancelIoEx() can interrupt, so once the relay's + // scope-exit runs (normally after io.Run() returns) the join() below would hang until stdin is closed + // -- the bug this guards against. The exit event is signaled first (enough for handles that support + // overlapped IO), then CancelSynchronousIo() aborts the synchronous read the event cannot reach. + void InterruptAndJoinInputThread(std::thread& inputThread, wil::unique_event& exitEvent) + { + if (!inputThread.joinable()) + { + return; + } + + if (exitEvent) + { + exitEvent.SetEvent(); + } + + // CancelSynchronousIo() only aborts I/O in flight on the target thread at the instant it is called, + // so a single call can miss the worker while it is between read iterations. Cancel immediately to + // unblock a worker already parked in ReadFile, then retry every 50ms until the thread exits; + // WaitForSingleObject is both the termination test and the backoff. ERROR_NOT_FOUND means nothing + // was in flight to cancel, which is expected, so only unexpected failures are logged. + const auto threadHandle = static_cast(inputThread.native_handle()); + do + { + if (!CancelSynchronousIo(threadHandle)) + { + const auto lastError = GetLastError(); + LOG_HR_IF(HRESULT_FROM_WIN32(lastError), lastError != ERROR_NOT_FOUND); + } + } while (WaitForSingleObject(threadHandle, 50) == WAIT_TIMEOUT); + + inputThread.join(); + } + +} // namespace + bool ConsoleService::RelayInteractiveTty(wsl::windows::common::ConsoleState& Console, ClientRunningWSLCProcess& Process, HANDLE Tty, bool TriggerRefresh) { // Configure the console for interactive usage. @@ -43,13 +86,7 @@ bool ConsoleService::RelayInteractiveTty(wsl::windows::common::ConsoleState& Con wil::unique_event exitEvent; std::thread inputThread; - auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { - if (inputThread.joinable()) - { - exitEvent.SetEvent(); - inputThread.join(); - } - }); + auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { InterruptAndJoinInputThread(inputThread, exitEvent); }); bool detached = false; MultiHandleWait io; @@ -99,13 +136,7 @@ void ConsoleService::RelayNonTtyProcess(wil::unique_handle&& Stdin, wil::unique_ wil::unique_event exitEvent; std::thread inputThread; - auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { - if (inputThread.joinable()) - { - exitEvent.SetEvent(); - inputThread.join(); - } - }); + auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { InterruptAndJoinInputThread(inputThread, exitEvent); }); if (Stdin.is_valid()) { diff --git a/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp b/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp index 64bba794a8..24d68ea5bb 100644 --- a/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp +++ b/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp @@ -154,6 +154,63 @@ class WSLCE2EContainerExecTests session.VerifyNoErrors(); } + WSLC_TEST_METHOD(WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand) + { + // Regression test for a stdin deadlock. When stdin is a synchronous (non-overlapped) anonymous pipe, the client + // relays it on a worker thread parked in a blocking ReadFile() that the exit event cannot interrupt. With + // `echo hello` (which exits without reading stdin), teardown's join() on that worker blocks until stdin closes, + // so wslc hangs. The test exercises this by running `exec -i echo hello` and requiring it to exit while the + // client keeps stdin open. + // + // If this regresses, look at the client-side stdin relay teardown: ConsoleService::InterruptAndJoinInputThread + // (the CancelSynchronousIo retry loop that unblocks the worker) and relay.cpp's InterruptableRead (which maps + // the resulting ERROR_OPERATION_ABORTED to EOF). + VerifyContainerIsNotListed(WslcContainerName); + auto result = RunWslc(std::format(L"container run -id --name {} {}", WslcContainerName, DebianImage.NameAndTag())); + result.Verify({.Stderr = L"", .ExitCode = 0}); + auto containerId = result.GetStdoutOneLine(); + + // RunWslcInteractive wires wslc's stdin to the read end of a synchronous (non-overlapped) anonymous pipe, which + // is what triggers the blocking-ReadFile relay path under test. + auto session = RunWslcInteractive(std::format(L"container exec -i {} echo hello", containerId)); + + // The command's output must arrive without the client closing stdin first. + session.ExpectStdout("hello\n"); + + // Long timeout: this only bounds the failure (hang) path, so it is generous to avoid false positives under CI load. + auto exitCode = session.Wait(120000); + VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin"); + + // Closing stdin after the process has already exited must remain a clean no-op with no errors emitted. + session.CloseStdin(); + session.VerifyNoErrors(); + } + + WSLC_TEST_METHOD(WSLCE2E_Container_Exec_InteractiveTTY_SelfExitingCommand) + { + // TTY counterpart to WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand (see that test for the full + // explanation of the deadlock). The -t flag routes wslc through ConsoleService::RelayInteractiveTty, whose + // stdin worker teardown is a separate scope-exit from the non-TTY path, so a regression could be introduced + // in one path and not the other. This test guards the TTY call site. + VerifyContainerIsNotListed(WslcContainerName); + auto result = RunWslc(std::format(L"container run -id --name {} {}", WslcContainerName, DebianImage.NameAndTag())); + result.Verify({.Stderr = L"", .ExitCode = 0}); + auto containerId = result.GetStdoutOneLine(); + + // -t sets the TTY flag; the harness still wires stdin as a synchronous pipe, so wslc takes the vulnerable + // RelayInteractiveTty else-branch (its input handle is a pipe, not a console). + auto session = RunWslcInteractive(std::format(L"container exec -it {} echo hello", containerId)); + + // The TTY translates the trailing LF to CRLF, so the exact output is "hello\r\n". + session.ExpectStdout("hello\r\n"); + + auto exitCode = session.Wait(120000); + VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin"); + + session.CloseStdin(); + session.VerifyNoErrors(); + } + WSLC_TEST_METHOD(WSLCE2E_Container_Exec_PseudoConsole_TerminalSize) { VerifyContainerIsNotListed(WslcContainerName); From ab18cf6bd3d3967ca0a6d37ccff736af71ef70e7 Mon Sep 17 00:00:00 2001 From: David Bennett Date: Fri, 26 Jun 2026 12:15:51 -0700 Subject: [PATCH 2/3] PR feedback, add run tests --- src/windows/wslc/services/ConsoleService.cpp | 40 +++++++++--------- .../wslc/e2e/WSLCE2EContainerExecTests.cpp | 2 +- .../wslc/e2e/WSLCE2EContainerRunTests.cpp | 42 +++++++++++++++++++ 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/src/windows/wslc/services/ConsoleService.cpp b/src/windows/wslc/services/ConsoleService.cpp index 6875a20dab..774980ac7e 100644 --- a/src/windows/wslc/services/ConsoleService.cpp +++ b/src/windows/wslc/services/ConsoleService.cpp @@ -28,13 +28,10 @@ namespace { // Interrupts and joins the stdin-relay worker thread at teardown. // - // A worker thread is only created when stdin is not a character device (any non-FILE_TYPE_CHAR handle, - // e.g. a redirected pipe); the console (FILE_TYPE_CHAR) and detach paths create none, so this no-ops - // (not joinable) for them. When that handle is a non-overlapped (synchronous) pipe the worker blocks - // in a ReadFile() that neither the exit event nor CancelIoEx() can interrupt, so once the relay's - // scope-exit runs (normally after io.Run() returns) the join() below would hang until stdin is closed - // -- the bug this guards against. The exit event is signaled first (enough for handles that support - // overlapped IO), then CancelSynchronousIo() aborts the synchronous read the event cannot reach. + // The worker only exists when stdin is not a character device (any non-FILE_TYPE_CHAR handle, e.g. a + // redirected pipe), so this no-ops (not joinable) for a console. When stdin is a non-overlapped + // (synchronous) pipe the worker blocks in a ReadFile() that neither the exit event nor CancelIoEx() can + // interrupt, so the join() below would hang until stdin is closed -- the bug this guards against. void InterruptAndJoinInputThread(std::thread& inputThread, wil::unique_event& exitEvent) { if (!inputThread.joinable()) @@ -42,25 +39,28 @@ namespace { return; } - if (exitEvent) - { - exitEvent.SetEvent(); - } + FAIL_FAST_IF(!exitEvent); + exitEvent.SetEvent(); - // CancelSynchronousIo() only aborts I/O in flight on the target thread at the instant it is called, - // so a single call can miss the worker while it is between read iterations. Cancel immediately to - // unblock a worker already parked in ReadFile, then retry every 50ms until the thread exits; - // WaitForSingleObject is both the termination test and the backoff. ERROR_NOT_FOUND means nothing - // was in flight to cancel, which is expected, so only unexpected failures are logged. + // Overlapped IO will get terminated by SetEvent(). Synchronous IO will not, so we need to cancel it. const auto threadHandle = static_cast(inputThread.native_handle()); - do + DWORD waitResult = WAIT_TIMEOUT; + while (waitResult == WAIT_TIMEOUT) { if (!CancelSynchronousIo(threadHandle)) { - const auto lastError = GetLastError(); - LOG_HR_IF(HRESULT_FROM_WIN32(lastError), lastError != ERROR_NOT_FOUND); + // ERROR_NOT_FOUND just means nothing was in flight to cancel (expected); any other error + // means the cancel cannot make progress and would otherwise spin this loop, so throw. + const auto cancelError = GetLastError(); + THROW_WIN32_IF(cancelError, cancelError != ERROR_NOT_FOUND); } - } while (WaitForSingleObject(threadHandle, 50) == WAIT_TIMEOUT); + + waitResult = WaitForSingleObject(threadHandle, 50); + } + + // The loop only breaks on a non-timeout result; anything but WAIT_OBJECT_0 (e.g. WAIT_FAILED) is an + // error -- surface it instead of join()ing on a thread we can't confirm has exited. + THROW_LAST_ERROR_IF(waitResult != WAIT_OBJECT_0); inputThread.join(); } diff --git a/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp b/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp index 24d68ea5bb..81a02e1ae9 100644 --- a/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp +++ b/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp @@ -162,7 +162,7 @@ class WSLCE2EContainerExecTests // so wslc hangs. The test exercises this by running `exec -i echo hello` and requiring it to exit while the // client keeps stdin open. // - // If this regresses, look at the client-side stdin relay teardown: ConsoleService::InterruptAndJoinInputThread + // If this regresses, look at the client-side stdin relay teardown: InterruptAndJoinInputThread // (the CancelSynchronousIo retry loop that unblocks the worker) and relay.cpp's InterruptableRead (which maps // the resulting ERROR_OPERATION_ABORTED to EOF). VerifyContainerIsNotListed(WslcContainerName); diff --git a/test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp b/test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp index 03f19ecda5..e188d524d9 100644 --- a/test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp +++ b/test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp @@ -715,6 +715,48 @@ class WSLCE2EContainerRunTests session.VerifyNoErrors(); } + WSLC_TEST_METHOD(WSLCE2E_Container_Run_InteractiveNoTTY_SelfExitingCommand) + { + // Same stdin-relay teardown deadlock as WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand (see it + // for the root cause), but via `container run -i`. run and exec share the client relay (both route through + // AttachToCurrentConsole), so the hang is not exec-specific. The test requires run to exit with the client + // still holding stdin open; RunWslcInteractive supplies stdin as a synchronous (non-overlapped) pipe, the + // case that triggers the bug. + VerifyContainerIsNotListed(WslcContainerName); + + auto session = + RunWslcInteractive(std::format(L"container run -i --name {} {} echo hello", WslcContainerName, DebianImage.NameAndTag())); + + session.ExpectStdout("hello\n"); + + // Generous timeout: it only bounds the failure (hang) path. + auto exitCode = session.Wait(120000); + VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin"); + + // Closing stdin after exit must stay a clean no-op. + session.CloseStdin(); + session.VerifyNoErrors(); + } + + WSLC_TEST_METHOD(WSLCE2E_Container_Run_InteractiveTTY_SelfExitingCommand) + { + // TTY counterpart of the above: `-t` routes through ConsoleService::RelayInteractiveTty, a separate + // stdin-worker teardown from the non-TTY path, so it could regress independently. Guards the TTY run path. + VerifyContainerIsNotListed(WslcContainerName); + + auto session = + RunWslcInteractive(std::format(L"container run -it --name {} {} echo hello", WslcContainerName, DebianImage.NameAndTag())); + + // The TTY translates the trailing LF to CRLF. + session.ExpectStdout("hello\r\n"); + + auto exitCode = session.Wait(120000); + VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin"); + + session.CloseStdin(); + session.VerifyNoErrors(); + } + WSLC_TEST_METHOD(WSLCE2E_Container_Run_PseudoConsole_TerminalSize) { VerifyContainerIsNotListed(WslcContainerName); From 68896b54ca3f16d322c44c014cb4ee549b3e08f4 Mon Sep 17 00:00:00 2001 From: David Bennett Date: Fri, 26 Jun 2026 12:19:03 -0700 Subject: [PATCH 3/3] Change failfast to assert --- src/windows/wslc/services/ConsoleService.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/windows/wslc/services/ConsoleService.cpp b/src/windows/wslc/services/ConsoleService.cpp index 774980ac7e..74f66494e6 100644 --- a/src/windows/wslc/services/ConsoleService.cpp +++ b/src/windows/wslc/services/ConsoleService.cpp @@ -39,7 +39,7 @@ namespace { return; } - FAIL_FAST_IF(!exitEvent); + WI_ASSERT(exitEvent); exitEvent.SetEvent(); // Overlapped IO will get terminated by SetEvent(). Synchronous IO will not, so we need to cancel it.