Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/windows/common/relay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ wsl::windows::common::relay::InterruptableRead(
if (!ReadFile(InputHandle, Buffer.data(), gsl::narrow_cast<DWORD>(Buffer.size()), &bytesRead, Overlapped))
{
auto lastError = GetLastError();
if ((lastError == ERROR_HANDLE_EOF) || (lastError == ERROR_BROKEN_PIPE))
if ((lastError == ERROR_HANDLE_EOF) || (lastError == ERROR_BROKEN_PIPE) || (lastError == ERROR_OPERATION_ABORTED))
Comment thread
dkbennett marked this conversation as resolved.
{
return 0;
}
Expand Down
59 changes: 45 additions & 14 deletions src/windows/wslc/services/ConsoleService.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,49 @@ using wsl::windows::common::io::ReadConsoleHandle;
using wsl::windows::common::io::ReadHandle;
using wsl::windows::common::io::RelayHandle;

namespace {

// Interrupts and joins the stdin-relay worker thread at teardown.
//
// The worker only exists when stdin is not a character device (any non-FILE_TYPE_CHAR handle, e.g. a
// redirected pipe), so this no-ops (not joinable) for a console. When stdin is a non-overlapped
// (synchronous) pipe the worker blocks in a ReadFile() that neither the exit event nor CancelIoEx() can
// interrupt, so the join() below would hang until stdin is closed -- the bug this guards against.
void InterruptAndJoinInputThread(std::thread& inputThread, wil::unique_event& exitEvent)
{
if (!inputThread.joinable())
{
return;
}

WI_ASSERT(exitEvent);
exitEvent.SetEvent();

// Overlapped IO will get terminated by SetEvent(). Synchronous IO will not, so we need to cancel it.
const auto threadHandle = static_cast<HANDLE>(inputThread.native_handle());
DWORD waitResult = WAIT_TIMEOUT;
while (waitResult == WAIT_TIMEOUT)
{
if (!CancelSynchronousIo(threadHandle))
{
// ERROR_NOT_FOUND just means nothing was in flight to cancel (expected); any other error
// means the cancel cannot make progress and would otherwise spin this loop, so throw.
const auto cancelError = GetLastError();
THROW_WIN32_IF(cancelError, cancelError != ERROR_NOT_FOUND);
}

waitResult = WaitForSingleObject(threadHandle, 50);
}

// The loop only breaks on a non-timeout result; anything but WAIT_OBJECT_0 (e.g. WAIT_FAILED) is an
// error -- surface it instead of join()ing on a thread we can't confirm has exited.
THROW_LAST_ERROR_IF(waitResult != WAIT_OBJECT_0);

inputThread.join();
}
Comment on lines +46 to +66

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only real thing to do here is change to a failfast but its going to crash either way. It will not "mask the original failure" because the scope exit log will log the failure first, so this is really not a "high" priority issue, it's just a discussion about whether we want a failfast crash or an abort() crash w/ a log message.


} // namespace

bool ConsoleService::RelayInteractiveTty(wsl::windows::common::ConsoleState& Console, ClientRunningWSLCProcess& Process, HANDLE Tty, bool TriggerRefresh)
{
// Configure the console for interactive usage.
Expand All @@ -43,13 +86,7 @@ bool ConsoleService::RelayInteractiveTty(wsl::windows::common::ConsoleState& Con
wil::unique_event exitEvent;
std::thread inputThread;

auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() {
if (inputThread.joinable())
{
exitEvent.SetEvent();
inputThread.join();
}
});
auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { InterruptAndJoinInputThread(inputThread, exitEvent); });

bool detached = false;
MultiHandleWait io;
Expand Down Expand Up @@ -99,13 +136,7 @@ void ConsoleService::RelayNonTtyProcess(wil::unique_handle&& Stdin, wil::unique_
wil::unique_event exitEvent;
std::thread inputThread;

auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() {
if (inputThread.joinable())
{
exitEvent.SetEvent();
inputThread.join();
}
});
auto joinThread = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { InterruptAndJoinInputThread(inputThread, exitEvent); });

if (Stdin.is_valid())
{
Expand Down
57 changes: 57 additions & 0 deletions test/windows/wslc/e2e/WSLCE2EContainerExecTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,63 @@ class WSLCE2EContainerExecTests
session.VerifyNoErrors();
}

WSLC_TEST_METHOD(WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand)
{
// Regression test for a stdin deadlock. When stdin is a synchronous (non-overlapped) anonymous pipe, the client
// relays it on a worker thread parked in a blocking ReadFile() that the exit event cannot interrupt. With
// `echo hello` (which exits without reading stdin), teardown's join() on that worker blocks until stdin closes,
// so wslc hangs. The test exercises this by running `exec -i echo hello` and requiring it to exit while the
// client keeps stdin open.
//
// If this regresses, look at the client-side stdin relay teardown: InterruptAndJoinInputThread
// (the CancelSynchronousIo retry loop that unblocks the worker) and relay.cpp's InterruptableRead (which maps
// the resulting ERROR_OPERATION_ABORTED to EOF).
VerifyContainerIsNotListed(WslcContainerName);
auto result = RunWslc(std::format(L"container run -id --name {} {}", WslcContainerName, DebianImage.NameAndTag()));
result.Verify({.Stderr = L"", .ExitCode = 0});
auto containerId = result.GetStdoutOneLine();

// RunWslcInteractive wires wslc's stdin to the read end of a synchronous (non-overlapped) anonymous pipe, which
// is what triggers the blocking-ReadFile relay path under test.
auto session = RunWslcInteractive(std::format(L"container exec -i {} echo hello", containerId));

// The command's output must arrive without the client closing stdin first.
session.ExpectStdout("hello\n");

// Long timeout: this only bounds the failure (hang) path, so it is generous to avoid false positives under CI load.
auto exitCode = session.Wait(120000);
VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin");

// Closing stdin after the process has already exited must remain a clean no-op with no errors emitted.
session.CloseStdin();
session.VerifyNoErrors();
}

WSLC_TEST_METHOD(WSLCE2E_Container_Exec_InteractiveTTY_SelfExitingCommand)
{
// TTY counterpart to WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand (see that test for the full
// explanation of the deadlock). The -t flag routes wslc through ConsoleService::RelayInteractiveTty, whose
// stdin worker teardown is a separate scope-exit from the non-TTY path, so a regression could be introduced
// in one path and not the other. This test guards the TTY call site.
VerifyContainerIsNotListed(WslcContainerName);
auto result = RunWslc(std::format(L"container run -id --name {} {}", WslcContainerName, DebianImage.NameAndTag()));
result.Verify({.Stderr = L"", .ExitCode = 0});
auto containerId = result.GetStdoutOneLine();

// -t sets the TTY flag; the harness still wires stdin as a synchronous pipe, so wslc takes the vulnerable
// RelayInteractiveTty else-branch (its input handle is a pipe, not a console).
auto session = RunWslcInteractive(std::format(L"container exec -it {} echo hello", containerId));

// The TTY translates the trailing LF to CRLF, so the exact output is "hello\r\n".
session.ExpectStdout("hello\r\n");

auto exitCode = session.Wait(120000);
VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin");

session.CloseStdin();
session.VerifyNoErrors();
}

WSLC_TEST_METHOD(WSLCE2E_Container_Exec_PseudoConsole_TerminalSize)
{
VerifyContainerIsNotListed(WslcContainerName);
Expand Down
42 changes: 42 additions & 0 deletions test/windows/wslc/e2e/WSLCE2EContainerRunTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,48 @@ class WSLCE2EContainerRunTests
session.VerifyNoErrors();
}

WSLC_TEST_METHOD(WSLCE2E_Container_Run_InteractiveNoTTY_SelfExitingCommand)
{
// Same stdin-relay teardown deadlock as WSLCE2E_Container_Exec_InteractiveNoTTY_SelfExitingCommand (see it
// for the root cause), but via `container run -i`. run and exec share the client relay (both route through
// AttachToCurrentConsole), so the hang is not exec-specific. The test requires run to exit with the client
// still holding stdin open; RunWslcInteractive supplies stdin as a synchronous (non-overlapped) pipe, the
// case that triggers the bug.
VerifyContainerIsNotListed(WslcContainerName);

auto session =
RunWslcInteractive(std::format(L"container run -i --name {} {} echo hello", WslcContainerName, DebianImage.NameAndTag()));

session.ExpectStdout("hello\n");

// Generous timeout: it only bounds the failure (hang) path.
auto exitCode = session.Wait(120000);
VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin");

// Closing stdin after exit must stay a clean no-op.
session.CloseStdin();
session.VerifyNoErrors();
}

WSLC_TEST_METHOD(WSLCE2E_Container_Run_InteractiveTTY_SelfExitingCommand)
{
// TTY counterpart of the above: `-t` routes through ConsoleService::RelayInteractiveTty, a separate
// stdin-worker teardown from the non-TTY path, so it could regress independently. Guards the TTY run path.
VerifyContainerIsNotListed(WslcContainerName);

auto session =
RunWslcInteractive(std::format(L"container run -it --name {} {} echo hello", WslcContainerName, DebianImage.NameAndTag()));

// The TTY translates the trailing LF to CRLF.
session.ExpectStdout("hello\r\n");

auto exitCode = session.Wait(120000);
VERIFY_ARE_EQUAL(0, exitCode, L"echo should exit with code 0 without the client closing stdin");

session.CloseStdin();
session.VerifyNoErrors();
}

WSLC_TEST_METHOD(WSLCE2E_Container_Run_PseudoConsole_TerminalSize)
{
VerifyContainerIsNotListed(WslcContainerName);
Expand Down