diff --git a/src/windows/common/relay.cpp b/src/windows/common/relay.cpp index 93d9ef7c5..9b65283db 100644 --- a/src/windows/common/relay.cpp +++ b/src/windows/common/relay.cpp @@ -133,7 +133,7 @@ wsl::windows::common::relay::InterruptableRead( if (!ReadFile(InputHandle, Buffer.data(), gsl::narrow_cast(Buffer.size()), &bytesRead, Overlapped)) { auto lastError = GetLastError(); - if ((lastError == ERROR_HANDLE_EOF) || (lastError == ERROR_BROKEN_PIPE)) + if ((lastError == ERROR_HANDLE_EOF) || (lastError == ERROR_BROKEN_PIPE) || (lastError == ERROR_OPERATION_ABORTED)) { return 0; } diff --git a/src/windows/wslc/services/ConsoleService.cpp b/src/windows/wslc/services/ConsoleService.cpp index e471ebad4..74f66494e 100644 --- a/src/windows/wslc/services/ConsoleService.cpp +++ b/src/windows/wslc/services/ConsoleService.cpp @@ -24,6 +24,49 @@ using wsl::windows::common::io::ReadConsoleHandle; using wsl::windows::common::io::ReadHandle; using wsl::windows::common::io::RelayHandle; +namespace { + + // Interrupts and joins the stdin-relay worker thread at teardown. + // + // The worker only exists when stdin is not a character device (any non-FILE_TYPE_CHAR handle, e.g. a + // redirected pipe), so this no-ops (not joinable) for a console. When stdin is a non-overlapped + // (synchronous) pipe the worker blocks in a ReadFile() that neither the exit event nor CancelIoEx() can + // interrupt, so the join() below would hang until stdin is closed -- the bug this guards against. + void InterruptAndJoinInputThread(std::thread& inputThread, wil::unique_event& exitEvent) + { + if (!inputThread.joinable()) + { + return; + } + + WI_ASSERT(exitEvent); + exitEvent.SetEvent(); + + // Overlapped IO will get terminated by SetEvent(). Synchronous IO will not, so we need to cancel it. + const auto threadHandle = static_cast(inputThread.native_handle()); + DWORD waitResult = WAIT_TIMEOUT; + while (waitResult == WAIT_TIMEOUT) + { + if (!CancelSynchronousIo(threadHandle)) + { + // ERROR_NOT_FOUND just means nothing was in flight to cancel (expected); any other error + // means the cancel cannot make progress and would otherwise spin this loop, so throw. + const auto cancelError = GetLastError(); + THROW_WIN32_IF(cancelError, cancelError != ERROR_NOT_FOUND); + } + + waitResult = WaitForSingleObject(threadHandle, 50); + } + + // The loop only breaks on a non-timeout result; anything but WAIT_OBJECT_0 (e.g. WAIT_FAILED) is an + // error -- surface it instead of join()ing on a thread we can't confirm has exited. + THROW_LAST_ERROR_IF(waitResult != WAIT_OBJECT_0); + + inputThread.join(); + } + +} // namespace + bool ConsoleService::RelayInteractiveTty(wsl::windows::common::ConsoleState& Console, ClientRunningWSLCProcess& Process, HANDLE Tty, bool TriggerRefresh) { // Configure the console for interactive usage. @@ -43,13 +86,7 @@ bool ConsoleService::RelayInteractiveTty(wsl::windows::common::ConsoleState& Con wil::unique_event exitEvent; std::thread inputThread; - auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { - if (inputThread.joinable()) - { - exitEvent.SetEvent(); - inputThread.join(); - } - }); + auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { InterruptAndJoinInputThread(inputThread, exitEvent); }); bool detached = false; MultiHandleWait io; @@ -99,13 +136,7 @@ void ConsoleService::RelayNonTtyProcess(wil::unique_handle&& Stdin, wil::unique_ wil::unique_event exitEvent; std::thread inputThread; - auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { - if (inputThread.joinable()) - { - exitEvent.SetEvent(); - inputThread.join(); - } - }); + auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { InterruptAndJoinInputThread(inputThread, exitEvent); }); if (Stdin.is_valid()) { diff --git a/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp b/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp index 64bba794a..81a02e1ae 100644 --- a/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp +++ b/test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp @@ -154,6 +154,63 @@ class WSLCE2EContainerExecTests session.VerifyNoErrors(); } + WSLC_TEST_METHOD(WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand) + { + // Regression test for a stdin deadlock. When stdin is a synchronous (non-overlapped) anonymous pipe, the client + // relays it on a worker thread parked in a blocking ReadFile() that the exit event cannot interrupt. With + // `echo hello` (which exits without reading stdin), teardown's join() on that worker blocks until stdin closes, + // so wslc hangs. The test exercises this by running `exec -i echo hello` and requiring it to exit while the + // client keeps stdin open. + // + // If this regresses, look at the client-side stdin relay teardown: InterruptAndJoinInputThread + // (the CancelSynchronousIo retry loop that unblocks the worker) and relay.cpp's InterruptableRead (which maps + // the resulting ERROR_OPERATION_ABORTED to EOF). + VerifyContainerIsNotListed(WslcContainerName); + auto result = RunWslc(std::format(L"container run -id --name {} {}", WslcContainerName, DebianImage.NameAndTag())); + result.Verify({.Stderr = L"", .ExitCode = 0}); + auto containerId = result.GetStdoutOneLine(); + + // RunWslcInteractive wires wslc's stdin to the read end of a synchronous (non-overlapped) anonymous pipe, which + // is what triggers the blocking-ReadFile relay path under test. + auto session = RunWslcInteractive(std::format(L"container exec -i {} echo hello", containerId)); + + // The command's output must arrive without the client closing stdin first. + session.ExpectStdout("hello\n"); + + // Long timeout: this only bounds the failure (hang) path, so it is generous to avoid false positives under CI load. + auto exitCode = session.Wait(120000); + VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin"); + + // Closing stdin after the process has already exited must remain a clean no-op with no errors emitted. + session.CloseStdin(); + session.VerifyNoErrors(); + } + + WSLC_TEST_METHOD(WSLCE2E_Container_Exec_InteractiveTTY_SelfExitingCommand) + { + // TTY counterpart to WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand (see that test for the full + // explanation of the deadlock). The -t flag routes wslc through ConsoleService::RelayInteractiveTty, whose + // stdin worker teardown is a separate scope-exit from the non-TTY path, so a regression could be introduced + // in one path and not the other. This test guards the TTY call site. + VerifyContainerIsNotListed(WslcContainerName); + auto result = RunWslc(std::format(L"container run -id --name {} {}", WslcContainerName, DebianImage.NameAndTag())); + result.Verify({.Stderr = L"", .ExitCode = 0}); + auto containerId = result.GetStdoutOneLine(); + + // -t sets the TTY flag; the harness still wires stdin as a synchronous pipe, so wslc takes the vulnerable + // RelayInteractiveTty else-branch (its input handle is a pipe, not a console). + auto session = RunWslcInteractive(std::format(L"container exec -it {} echo hello", containerId)); + + // The TTY translates the trailing LF to CRLF, so the exact output is "hello\r\n". + session.ExpectStdout("hello\r\n"); + + auto exitCode = session.Wait(120000); + VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin"); + + session.CloseStdin(); + session.VerifyNoErrors(); + } + WSLC_TEST_METHOD(WSLCE2E_Container_Exec_PseudoConsole_TerminalSize) { VerifyContainerIsNotListed(WslcContainerName); diff --git a/test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp b/test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp index 03f19ecda..e188d524d 100644 --- a/test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp +++ b/test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp @@ -715,6 +715,48 @@ class WSLCE2EContainerRunTests session.VerifyNoErrors(); } + WSLC_TEST_METHOD(WSLCE2E_Container_Run_InteractiveNoTTY_SelfExitingCommand) + { + // Same stdin-relay teardown deadlock as WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand (see it + // for the root cause), but via `container run -i`. run and exec share the client relay (both route through + // AttachToCurrentConsole), so the hang is not exec-specific. The test requires run to exit with the client + // still holding stdin open; RunWslcInteractive supplies stdin as a synchronous (non-overlapped) pipe, the + // case that triggers the bug. + VerifyContainerIsNotListed(WslcContainerName); + + auto session = + RunWslcInteractive(std::format(L"container run -i --name {} {} echo hello", WslcContainerName, DebianImage.NameAndTag())); + + session.ExpectStdout("hello\n"); + + // Generous timeout: it only bounds the failure (hang) path. + auto exitCode = session.Wait(120000); + VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin"); + + // Closing stdin after exit must stay a clean no-op. + session.CloseStdin(); + session.VerifyNoErrors(); + } + + WSLC_TEST_METHOD(WSLCE2E_Container_Run_InteractiveTTY_SelfExitingCommand) + { + // TTY counterpart of the above: `-t` routes through ConsoleService::RelayInteractiveTty, a separate + // stdin-worker teardown from the non-TTY path, so it could regress independently. Guards the TTY run path. + VerifyContainerIsNotListed(WslcContainerName); + + auto session = + RunWslcInteractive(std::format(L"container run -it --name {} {} echo hello", WslcContainerName, DebianImage.NameAndTag())); + + // The TTY translates the trailing LF to CRLF. + session.ExpectStdout("hello\r\n"); + + auto exitCode = session.Wait(120000); + VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin"); + + session.CloseStdin(); + session.VerifyNoErrors(); + } + WSLC_TEST_METHOD(WSLCE2E_Container_Run_PseudoConsole_TerminalSize) { VerifyContainerIsNotListed(WslcContainerName);