diff --git a/src/windows/service/exe/HcsVirtualMachine.cpp b/src/windows/service/exe/HcsVirtualMachine.cpp index 7d44c17c6..e5b5cfe6d 100644 --- a/src/windows/service/exe/HcsVirtualMachine.cpp +++ b/src/windows/service/exe/HcsVirtualMachine.cpp @@ -351,7 +351,9 @@ HcsVirtualMachine::HcsVirtualMachine(_In_ const WSLCSessionSettings* Settings) HcsVirtualMachine::~HcsVirtualMachine() { - std::lock_guard lock(m_lock); + // Do not hold m_lock: waiting on m_vmExitEvent and closing the compute system below both block + // on in-flight HCS exit/crash callbacks, which may themselves need m_lock. OnExit() is lock-free, + // and closing the compute system drains all callbacks, so the rest of teardown needs no lock. // Wait up to 5 seconds for the VM to terminate gracefully. bool forceTerminate = false; diff --git a/src/windows/service/exe/WSLCSessionManager.cpp b/src/windows/service/exe/WSLCSessionManager.cpp index f13ff8269..721432f41 100644 --- a/src/windows/service/exe/WSLCSessionManager.cpp +++ b/src/windows/service/exe/WSLCSessionManager.cpp @@ -277,8 +277,7 @@ void WSLCSessionManagerImpl::CreateSession( g_pluginManager, sessionId, creatorPid, std::wstring(resolvedDisplayName), wil::shared_handle(sharedToken), std::vector(storedSid)); // Create the VM factory in the SYSTEM service (privileged). The per-user session - // uses it to create the VM. Funneling VM creation through a factory lets the session - // own when VMs are created, rather than having one handed to it up front. + // uses it to create VMs on demand and recreate them after idle-termination. auto vmFactory = Microsoft::WRL::Make(Settings); // Launch per-user COM server factory and add it to a fresh per-session job object for crash cleanup. diff --git a/src/windows/service/inc/wslc.idl b/src/windows/service/inc/wslc.idl index 147b44ce4..775caa18c 100644 --- a/src/windows/service/inc/wslc.idl +++ b/src/windows/service/inc/wslc.idl @@ -608,6 +608,14 @@ interface IWSLCSession : IUnknown // Container management. HRESULT CreateContainer([in] const WSLCContainerOptions* Options, [in, unique] IWarningCallback* WarningCallback, [out] IWSLCContainer** Container); HRESULT OpenContainer([in, ref] LPCSTR Id, [out] IWSLCContainer** Container); + + // Keeps the VM alive for the duration of a client-side container operation. The CLI performs + // each mutation as two round-trips (OpenContainer followed by the operation) and may stream + // output afterwards. With on-demand VM idle-termination the VM could otherwise tear down + // between those calls, disconnecting the container wrapper and failing the second call with + // RPC_E_DISCONNECTED. The client holds the returned token for the whole operation; releasing + // it (or the client exiting) lets the VM idle-terminate again. + HRESULT BeginContainerOperation([out] IUnknown** Operation); HRESULT ListContainers([in, unique] const WSLCListContainersOptions* Options,[out, size_is(, *Count)] WSLCContainerEntry** Containers,[out] ULONG* Count, [out, size_is(, *PortsCount)] WSLCContainerPortMapping** Ports, [out] ULONG* PortsCount); HRESULT PruneContainers([in, unique, size_is(FiltersCount)] const WSLCFilter* Filters, [in] ULONG FiltersCount, [out] WSLCPruneContainersResults* Result); diff --git a/src/windows/wslc/services/ContainerService.cpp b/src/windows/wslc/services/ContainerService.cpp index 529c1039f..5fed3cc8a 100644 --- a/src/windows/wslc/services/ContainerService.cpp +++ b/src/windows/wslc/services/ContainerService.cpp @@ -291,6 +291,7 @@ std::wstring ContainerService::FormatRelativeTime(ULONGLONG timestamp) int ContainerService::Attach(Session& session, const std::string& id) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); @@ -452,6 +453,7 @@ CreateContainerResult ContainerService::Create(Session& session, const std::stri int ContainerService::Start(Session& session, const std::string& id, bool attach) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); WSLCContainerStartFlags flags = attach ? WSLCContainerStartFlagsAttach : WSLCContainerStartFlagsNone; @@ -482,6 +484,7 @@ int ContainerService::Start(Session& session, const std::string& id, bool attach void ContainerService::Stop(Session& session, const std::string& id, StopContainerOptions options) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); THROW_IF_FAILED_EXCEPT(container->Stop(options.Signal, options.Timeout), WSLC_E_CONTAINER_NOT_RUNNING); @@ -489,6 +492,7 @@ void ContainerService::Stop(Session& session, const std::string& id, StopContain void ContainerService::Kill(Session& session, const std::string& id, WSLCSignal signal) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); THROW_IF_FAILED(container->Kill(signal)); @@ -496,6 +500,7 @@ void ContainerService::Kill(Session& session, const std::string& id, WSLCSignal void ContainerService::Delete(Session& session, const std::string& id, bool force) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); THROW_IF_FAILED(container->Delete(force ? WSLCDeleteFlagsForce : WSLCDeleteFlagsNone)); @@ -550,6 +555,7 @@ std::vector ContainerService::List( int ContainerService::Exec(Session& session, const std::string& id, ContainerOptions options) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); @@ -581,6 +587,7 @@ int ContainerService::Exec(Session& session, const std::string& id, ContainerOpt InspectContainer ContainerService::Inspect(Session& session, const std::string& id) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); wil::unique_cotaskmem_ansistring output; @@ -610,6 +617,7 @@ void ContainerService::Export(Session& session, const std::string& id, HANDLE ou void ContainerService::Logs(Session& session, const std::string& id, bool follow, bool timestamps, ULONGLONG since, ULONGLONG until, ULONGLONG tail) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); @@ -637,6 +645,7 @@ void ContainerService::Logs(Session& session, const std::string& id, bool follow wsl::windows::common::docker_schema::ContainerStats ContainerService::Stats(Session& session, const std::string& id) { + auto operation = session.BeginContainerOperation(); wil::com_ptr container; THROW_IF_FAILED(session.Get()->OpenContainer(id.c_str(), &container)); wil::unique_cotaskmem_ansistring output; diff --git a/src/windows/wslc/services/SessionModel.h b/src/windows/wslc/services/SessionModel.h index ab6b5824e..491df6fc3 100644 --- a/src/windows/wslc/services/SessionModel.h +++ b/src/windows/wslc/services/SessionModel.h @@ -22,7 +22,8 @@ struct Session NON_COPYABLE(Session); DEFAULT_MOVABLE(Session); - explicit Session(wil::com_ptr session) : m_session(std::move(session)) + explicit Session(wil::com_ptr session, wil::com_ptr warningCallback = {}) : + m_session(std::move(session)), m_warningCallback(std::move(warningCallback)) { } @@ -31,8 +32,23 @@ struct Session return m_session.get(); } + // Acquires an activity token that keeps the VM alive for the duration of a client-side + // container operation (resolve + operate, plus any streamed output). Hold the returned + // pointer for the whole operation; releasing it lets the VM idle-terminate again. + [[nodiscard]] wil::com_ptr BeginContainerOperation() const + { + wil::com_ptr operation; + THROW_IF_FAILED(m_session->BeginContainerOperation(&operation)); + return operation; + } + private: wil::com_ptr m_session; + + // Kept alive for the lifetime of the session model (i.e. the whole CLI command) so the service + // can deliver warnings emitted by lazy/background work — such as resource recovery on the first + // VM start — back to this CLI invocation, even though no single COM call carries the callback. + wil::com_ptr m_warningCallback; }; } // namespace wsl::windows::wslc::models \ No newline at end of file diff --git a/src/windows/wslc/services/SessionService.cpp b/src/windows/wslc/services/SessionService.cpp index 86ebcebbe..ff8008a22 100644 --- a/src/windows/wslc/services/SessionService.cpp +++ b/src/windows/wslc/services/SessionService.cpp @@ -61,7 +61,10 @@ Session SessionService::OpenOrCreateDefaultSession() auto warningCallback = Microsoft::WRL::Make(); THROW_IF_FAILED(manager->CreateSession(nullptr, WSLCSessionFlagsNone, warningCallback.Get(), &session)); wsl::windows::common::security::ConfigureForCOMImpersonation(session.get()); - return Session(std::move(session)); + + // Hold the warning callback for the session's lifetime so warnings emitted by a lazy VM start + // (e.g. resource recovery) are still delivered to this CLI invocation. + return Session(std::move(session), wil::com_ptr(warningCallback.Get())); } int SessionService::Attach(const Session& session) diff --git a/src/windows/wslcsession/IORelay.cpp b/src/windows/wslcsession/IORelay.cpp index 6677bca87..09f5e05cd 100644 --- a/src/windows/wslcsession/IORelay.cpp +++ b/src/windows/wslcsession/IORelay.cpp @@ -68,11 +68,20 @@ void IORelay::Stop() } } +bool IORelay::IsRelayThread() const noexcept +{ + return m_thread.get_id() == std::this_thread::get_id(); +} + void IORelay::Run() try { common::wslutil::SetThreadDescription(L"IORelay"); + // Handle callbacks dispatched from this thread (e.g. unexpected VM exit) can tear the VM down, + // releasing cross-process COM proxies, so join the process MTA to avoid RPC_E_WRONG_THREAD. + const auto coInit = wil::CoInitializeEx(COINIT_MULTITHREADED); + windows::common::io::MultiHandleWait io; // N.B. All the IO must happen on the thread. diff --git a/src/windows/wslcsession/IORelay.h b/src/windows/wslcsession/IORelay.h index 879d3fee1..844c16dee 100644 --- a/src/windows/wslcsession/IORelay.h +++ b/src/windows/wslcsession/IORelay.h @@ -30,6 +30,12 @@ class IORelay void Stop(); + // Returns true if the calling thread is the IORelay's own worker thread (i.e. the call + // is being made from a handle callback). Destroying the IORelay from this thread would + // join the thread with itself and call std::terminate(), so callers that may run on the + // relay thread must check this before destroying the object. + bool IsRelayThread() const noexcept; + private: void Start(); void Run(); diff --git a/src/windows/wslcsession/WSLCContainer.cpp b/src/windows/wslcsession/WSLCContainer.cpp index 4da373ed4..ab24146e0 100644 --- a/src/windows/wslcsession/WSLCContainer.cpp +++ b/src/windows/wslcsession/WSLCContainer.cpp @@ -562,6 +562,13 @@ WSLCContainerImpl::WSLCContainerImpl( m_initProcessFlags(InitProcessFlags), m_containerFlags(ContainerFlags) { + // Acquire the activity hold up front for a container recovered in the running state, so it keeps + // the VM alive even before any client opens its wrapper. A merely-created (never-started) + // container does not pin the VM: its metadata survives teardown and the VM restarts on next use. + if (m_state == WslcContainerStateRunning) + { + m_activityHold = ActivityRef(m_wslcSession.IdleStateShared()); + } } WSLCContainerImpl::~WSLCContainerImpl() @@ -1266,6 +1273,12 @@ void WSLCContainerImpl::Exec(const WSLCProcessOptions* Options, const WSLCProces } while (!control->GetExitEvent().wait(100)); auto process = wil::MakeOrThrow(std::move(control), std::move(io), Options->Flags); + + // The exec'd process wrapper is handed to the client and is not retained internally, so its + // lifetime tracks the client's proxy. Bind a keep-alive token to it so the idle worker does + // not tear the VM down (killing the process) while the client still holds the proxy. + process->SetKeepAliveToken(m_wslcSession.CreateActivityToken()); + THROW_IF_FAILED(process.CopyTo(__uuidof(IWSLCProcess), (void**)Process)); } CATCH_AND_THROW_DOCKER_USER_ERROR("Failed to exec process in container %hs", m_id.c_str()); @@ -1878,11 +1891,21 @@ std::unique_ptr WSLCContainerImpl::Open( { auto inspectData = DockerClient.InspectContainer(dockerContainer.Id); auto state = DockerStateToWSLCState(dockerContainer.State); - const auto& timestamp = (state == WslcContainerStateRunning) ? inspectData.State.StartedAt : inspectData.State.FinishedAt; - if (!timestamp.empty()) + if (state == WslcContainerStateCreated) { - container->m_stateChangedAt = ParseDockerTimestamp(timestamp); + // A created-but-never-started container has no StartedAt/FinishedAt; its state last + // changed when it was created. + container->m_stateChangedAt = static_cast(dockerContainer.Created); + } + else + { + const auto& timestamp = (state == WslcContainerStateRunning) ? inspectData.State.StartedAt : inspectData.State.FinishedAt; + + if (!timestamp.empty()) + { + container->m_stateChangedAt = ParseDockerTimestamp(timestamp); + } } } catch (...) @@ -2159,6 +2182,25 @@ __requires_lock_held(m_lock) void WSLCContainerImpl::Transition(WSLCContainerSta m_state = State; m_stateChangedAt = stateChangedAt.value_or(static_cast(std::time(nullptr))); + + // Keep the VM alive while this container is Running and release the hold once it leaves that + // state, even when no client holds the wrapper (e.g. a detached `run -d` container). Dropping + // the hold on the transition out of Running is what lets an otherwise-idle VM be torn down; a + // Created or Exited container does not pin the VM, since its metadata survives teardown. + UpdateActivityHoldLockHeld(); +} + +__requires_lock_held(m_lock) void WSLCContainerImpl::UpdateActivityHoldLockHeld() noexcept +{ + const bool active = (m_state == WslcContainerStateRunning); + if (active && !m_activityHold) + { + m_activityHold = ActivityRef(m_wslcSession.IdleStateShared()); + } + else if (!active && m_activityHold) + { + m_activityHold.reset(); + } } WSLCContainer::WSLCContainer(WSLCContainerImpl* impl, WSLCSession& session, std::function&& OnDeleted) : @@ -2167,6 +2209,7 @@ WSLCContainer::WSLCContainer(WSLCContainerImpl* impl, WSLCSession& session, std: } HRESULT WSLCContainer::Attach(LPCSTR DetachKeys, WSLCHandle* Stdin, WSLCHandle* Stdout, WSLCHandle* Stderr) +try { WSLCExecutionContext context(&m_session); @@ -2178,8 +2221,10 @@ HRESULT WSLCContainer::Attach(LPCSTR DetachKeys, WSLCHandle* Stdin, WSLCHandle* *Stdout = {}; *Stderr = {}; + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Attach, DetachKeys, Stdin, Stdout, Stderr); } +CATCH_RETURN(); HRESULT WSLCContainer::GetState(WSLCContainerState* Result) { @@ -2237,6 +2282,7 @@ HRESULT WSLCContainer::GetInitProcess(IWSLCProcess** Process) } HRESULT WSLCContainer::Exec(const WSLCProcessOptions* Options, const WSLCProcessStartOptions* StartOptions, IWSLCProcess** Process) +try { WSLCExecutionContext context(&m_session); @@ -2245,22 +2291,35 @@ HRESULT WSLCContainer::Exec(const WSLCProcessOptions* Options, const WSLCProcess RETURN_HR_IF_MSG(E_INVALIDARG, WI_IsAnyFlagSet(Options->Flags, ~WSLCProcessFlagsValid), "Invalid flags: 0x%x", Options->Flags); *Process = nullptr; + + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Exec, Options, StartOptions, Process); } +CATCH_RETURN(); HRESULT WSLCContainer::Stop(_In_ WSLCSignal Signal, _In_ LONG TimeoutSeconds) +try { WSLCExecutionContext context(&m_session); + // Hold a VM lease for the whole operation: --rm containers self-delete during Stop, which + // disconnects the wrapper and drops activity. Without the lease, the idle worker can fire + // during the post-stop destroy wait (up to 60s) and tear the VM down mid-call. + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Stop, Signal, TimeoutSeconds, false); } +CATCH_RETURN(); HRESULT WSLCContainer::Kill(_In_ WSLCSignal Signal) +try { WSLCExecutionContext context(&m_session); + // Hold a VM lease for the same reason as Stop(): --rm can self-delete and drop activity. + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Stop, Signal, {}, true); } +CATCH_RETURN(); HRESULT WSLCContainer::Start(WSLCContainerStartFlags Flags, const WSLCProcessStartOptions* StartOptions, IWarningCallback* WarningCallback) try @@ -2269,11 +2328,13 @@ try THROW_HR_IF_MSG(E_INVALIDARG, WI_IsAnyFlagSet(Flags, ~WSLCContainerStartFlagsValid), "Invalid flags: 0x%x", Flags); + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Start, Flags, StartOptions); } CATCH_RETURN(); HRESULT WSLCContainer::Inspect(LPSTR* Output) +try { WSLCExecutionContext context(&m_session); @@ -2281,8 +2342,10 @@ HRESULT WSLCContainer::Inspect(LPSTR* Output) *Output = nullptr; + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Inspect, Output); } +CATCH_RETURN(); HRESULT WSLCContainer::Stats(LPSTR* Output) try @@ -2292,6 +2355,8 @@ try RETURN_HR_IF(E_POINTER, Output == nullptr); *Output = nullptr; + + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Stats, Output); } CATCH_RETURN(); @@ -2304,6 +2369,11 @@ try THROW_HR_IF_MSG(E_INVALIDARG, WI_IsAnyFlagSet(Flags, ~WSLCDeleteFlagsValid), "Invalid flags: 0x%x", Flags); // Special case for Delete(): If deletion is successful, notify the WSLCSession that the container has been deleted. + // Hold a VM lease across the whole operation: deleting a container makes it inactive and + // can trigger an idle teardown. Without the lease the idle worker could take the session + // lock exclusively and clear m_containers (destroying this container) concurrently, racing + // the delete and inverting the container->session lock order. + auto vmLease = m_session.AcquireVmLease(); auto [lock, impl] = LockImpl(); impl->Delete(Flags); @@ -2329,11 +2399,14 @@ try CATCH_LOG(); HRESULT WSLCContainer::Export(WSLCHandle TarHandle) +try { WSLCExecutionContext context(&m_session); + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Export, TarHandle); } +CATCH_RETURN(); HRESULT WSLCContainer::Logs(WSLCLogsFlags Flags, WSLCHandle* Stdout, WSLCHandle* Stderr, ULONGLONG Since, ULONGLONG Until, ULONGLONG Tail) try @@ -2346,6 +2419,7 @@ try *Stdout = {}; *Stderr = {}; + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::Logs, Flags, Stdout, Stderr, Since, Until, Tail); } CATCH_RETURN(); @@ -2523,6 +2597,8 @@ HRESULT WSLCContainer::ConnectToNetwork(const WSLCNetworkConnectionOptions* Opti try { COMServiceExecutionContext context; + + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::ConnectToNetwork, Options); } CATCH_RETURN(); @@ -2531,6 +2607,8 @@ HRESULT WSLCContainer::DisconnectFromNetwork(LPCSTR NetworkName) try { COMServiceExecutionContext context; + + auto vmLease = m_session.AcquireVmLease(); return CallImpl(&WSLCContainerImpl::DisconnectFromNetwork, NetworkName); } CATCH_RETURN(); diff --git a/src/windows/wslcsession/WSLCContainer.h b/src/windows/wslcsession/WSLCContainer.h index 10fd57fd3..5032dd6de 100644 --- a/src/windows/wslcsession/WSLCContainer.h +++ b/src/windows/wslcsession/WSLCContainer.h @@ -16,6 +16,7 @@ Module Name: #include "ServiceProcessLauncher.h" #include "WSLCSession.h" +#include "WSLCIdleState.h" #include "DockerEventTracker.h" #include "DockerHTTPClient.h" #include "WSLCProcessControl.h" @@ -176,6 +177,10 @@ class WSLCContainerImpl void MapPorts(); void UnmapPorts(); + // Acquires or releases the activity hold so it is held exactly while the container is Running, + // keeping the session's VM alive across idle teardown. + __requires_lock_held(m_lock) void UpdateActivityHoldLockHeld() noexcept; + __requires_shared_lock_held(m_lock) std::string InspectLockHeld() const; mutable wil::srwlock m_lock; @@ -220,6 +225,11 @@ class WSLCContainerImpl DockerEventTracker::EventTrackingReference m_containerEvents; IORelay& m_ioRelay; std::string m_networkMode; + + // Held (non-empty) exactly while the container is Running so the session's VM stays alive even + // when no client holds the wrapper (e.g. a detached `run -d` container). Maintained by + // UpdateActivityHoldLockHeld(); released automatically when the container is destroyed. + ActivityRef m_activityHold; }; class DECLSPEC_UUID("B1F1C4E3-C225-4CAE-AD8A-34C004DE1AE4") WSLCContainer diff --git a/src/windows/wslcsession/WSLCExecutionContext.h b/src/windows/wslcsession/WSLCExecutionContext.h index 5d75bfed1..b3abec5a0 100644 --- a/src/windows/wslcsession/WSLCExecutionContext.h +++ b/src/windows/wslcsession/WSLCExecutionContext.h @@ -27,7 +27,19 @@ class WSLCExecutionContext : public wsl::windows::common::COMServiceExecutionCon protected: bool CollectUserWarning(const std::wstring& warning) override { - if (m_warningCallback != nullptr) + IWarningCallback* callback = m_warningCallback; + + // When the operation carries no explicit callback, fall back to the callback supplied when + // the session was created/entered. This routes warnings emitted outside a callback-bearing + // operation (e.g. resource recovery during the lazy VM start) back to the session creator. + wil::com_ptr sessionCallback; + if (callback == nullptr && m_session != nullptr) + { + sessionCallback = m_session->AcquireWarningCallback(); + callback = sessionCallback.get(); + } + + if (callback != nullptr) { std::unique_ptr comCallback; if (m_session != nullptr) @@ -35,7 +47,7 @@ class WSLCExecutionContext : public wsl::windows::common::COMServiceExecutionCon comCallback = std::make_unique(m_session->RegisterUserCOMCallback()); } - auto hr = m_warningCallback->OnWarning(warning.c_str()); + auto hr = callback->OnWarning(warning.c_str()); if (SUCCEEDED(hr) || hr == RPC_E_CALL_CANCELED || hr == HRESULT_FROM_WIN32(ERROR_CANCELLED)) { return true; diff --git a/src/windows/wslcsession/WSLCIdleState.h b/src/windows/wslcsession/WSLCIdleState.h new file mode 100644 index 000000000..00ecb9feb --- /dev/null +++ b/src/windows/wslcsession/WSLCIdleState.h @@ -0,0 +1,224 @@ +/*++ + +Copyright (c) Microsoft. All rights reserved. + +Module Name: + + WSLCIdleState.h + +Abstract: + + Shared idle-termination state for WSLC session VM lifecycle. + +--*/ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace wsl::windows::service::wslc { + +// Shared idle-termination state for a WSLC session. +// +// A single activity refcount is the only source of truth for "the VM is needed". Everything that +// requires the VM holds a reference for as long as it needs it: +// * in-flight operations (WSLCSession::VmLease), +// * running/created containers themselves (WSLCContainerImpl's ActivityRef), +// * client-held process wrappers (WSLCProcess keep-alive token), +// * multi-round-trip CLI operations (WSLCSession::BeginContainerOperation). +// +// When the count drops to zero a threadpool timer is armed for the idle grace period; if it +// elapses without new activity the session-supplied OnIdle callback tears the VM down. Any new +// activity before it fires cancels the timer. +// +// Held via shared_ptr so activity holders (container/process wrappers, operation tokens) can +// outlive the owning session and release activity without dereferencing it. The session clears the +// callback and drains the timer in Disarm() during teardown, after which a late release simply +// decrements the count and never re-enters the destroyed session. +class IdleState +{ +public: + IdleState() = default; + + IdleState(const IdleState&) = delete; + IdleState& operator=(const IdleState&) = delete; + + // Installs the idle-teardown callback and grace period and creates the timer. Called once by + // the owning session after construction. OnIdle runs on a threadpool thread. + void Initialize(std::chrono::milliseconds GracePeriod, std::function OnIdle) + { + auto lock = m_lock.lock_exclusive(); + m_gracePeriod = GracePeriod; + m_onIdle = std::move(OnIdle); + m_timer.reset(CreateThreadpoolTimer(&IdleState::TimerCallback, this, nullptr)); + THROW_LAST_ERROR_IF(!m_timer); + } + + // Permanently disables idle teardown: clears the callback so no further arm has any effect, and + // drains any pending/running timer callback. Must be called by the session (with its own lock + // released) during teardown, before the session object is destroyed, so no callback can + // reference it afterwards. + void Disarm() noexcept + { + PTP_TIMER timer = nullptr; + { + auto lock = m_lock.lock_exclusive(); + m_onIdle = nullptr; + timer = m_timer.get(); + if (timer != nullptr) + { + SetThreadpoolTimer(timer, nullptr, 0, 0); + } + } + + // Drain any in-flight callback outside the lock; it may take the session lock. + if (timer != nullptr) + { + WaitForThreadpoolTimerCallbacks(timer, TRUE); + } + } + + // Records the start of an activity; cancels any pending idle teardown on the 0->1 transition. + void AddActivity() noexcept + { + auto lock = m_lock.lock_exclusive(); + if (m_activityCount.fetch_add(1) == 0) + { + CancelLockHeld(); + } + } + + // Records the end of an activity; arms the idle timer on the 1->0 transition. + void ReleaseActivity() noexcept + { + auto lock = m_lock.lock_exclusive(); + const int previous = m_activityCount.fetch_sub(1); + FAIL_FAST_IF(previous <= 0); // Underflow is a fatal bug, not a recoverable condition. + if (previous == 1) + { + ArmLockHeld(); + } + } + + int ActivityCount() const noexcept + { + return m_activityCount.load(); + } + +private: + static void CALLBACK TimerCallback(PTP_CALLBACK_INSTANCE, PVOID Context, PTP_TIMER) noexcept + { + auto* self = static_cast(Context); + + std::function onIdle; + { + auto lock = self->m_lock.lock_exclusive(); + + // Activity resumed (count != 0) or teardown raced us (callback cleared): nothing to do. + if (self->m_activityCount.load() != 0 || !self->m_onIdle) + { + return; + } + + // Copy and invoke outside the lock: OnIdle takes the session lock, and holding this + // lock across that would invert the session-lock -> idle-lock ordering. + onIdle = self->m_onIdle; + } + + onIdle(); + } + + void ArmLockHeld() noexcept + { + if (!m_timer || !m_onIdle) + { + return; + } + + // Relative due time is expressed as a negative count of 100ns intervals. + const int64_t relative = -static_cast(m_gracePeriod.count()) * 10000; + FILETIME due{}; + due.dwLowDateTime = static_cast(relative & 0xFFFFFFFF); + due.dwHighDateTime = static_cast((relative >> 32) & 0xFFFFFFFF); + SetThreadpoolTimer(m_timer.get(), &due, 0, 0); + } + + void CancelLockHeld() noexcept + { + if (m_timer) + { + SetThreadpoolTimer(m_timer.get(), nullptr, 0, 0); + } + } + + std::atomic m_activityCount{0}; + wil::srwlock m_lock; + + _Guarded_by_(m_lock) std::function m_onIdle; + _Guarded_by_(m_lock) std::chrono::milliseconds m_gracePeriod { 0 }; + _Guarded_by_(m_lock) wil::unique_threadpool_timer m_timer; +}; + +// RAII activity hold on an IdleState: increments on construction and decrements on destruction or +// reset(). Movable, non-copyable. Used by running/created containers to keep the VM alive without +// a client reference. Holds the IdleState via shared_ptr so it is safe even if it outlives the +// owning session. +class ActivityRef +{ +public: + ActivityRef() = default; + + explicit ActivityRef(std::shared_ptr State) noexcept : m_state(std::move(State)) + { + if (m_state) + { + m_state->AddActivity(); + } + } + + ActivityRef(ActivityRef&& Other) noexcept : m_state(std::exchange(Other.m_state, nullptr)) + { + } + + ActivityRef& operator=(ActivityRef&& Other) noexcept + { + if (this != &Other) + { + reset(); + m_state = std::exchange(Other.m_state, nullptr); + } + + return *this; + } + + ActivityRef(const ActivityRef&) = delete; + ActivityRef& operator=(const ActivityRef&) = delete; + + ~ActivityRef() + { + reset(); + } + + void reset() noexcept + { + if (m_state) + { + m_state->ReleaseActivity(); + m_state.reset(); + } + } + + explicit operator bool() const noexcept + { + return m_state != nullptr; + } + +private: + std::shared_ptr m_state; +}; + +} // namespace wsl::windows::service::wslc diff --git a/src/windows/wslcsession/WSLCProcess.h b/src/windows/wslcsession/WSLCProcess.h index 1a79525dc..9ce606a37 100644 --- a/src/windows/wslcsession/WSLCProcess.h +++ b/src/windows/wslcsession/WSLCProcess.h @@ -45,9 +45,19 @@ class DECLSPEC_UUID("AFBEA6D6-D8A4-4F81-8FED-F947EB74B33B") WSLCProcess HANDLE GetExitEvent(); int GetPid() const; + // Attaches an opaque keep-alive token whose lifetime is bound to this process object. A + // root-namespace process is not tracked as a container, so it relies on this token to hold an + // activity reference on the owning session for as long as the client keeps the process alive, + // preventing the idle worker from tearing the VM down (and killing the process) underneath it. + void SetKeepAliveToken(Microsoft::WRL::ComPtr&& Token) noexcept + { + m_keepAliveToken = std::move(Token); + } + private: WSLCProcessFlags m_flags; std::shared_ptr m_control; std::unique_ptr m_io; + Microsoft::WRL::ComPtr m_keepAliveToken; }; } // namespace wsl::windows::service::wslc \ No newline at end of file diff --git a/src/windows/wslcsession/WSLCProcessControl.cpp b/src/windows/wslcsession/WSLCProcessControl.cpp index 0c0e61eac..8c8da1837 100644 --- a/src/windows/wslcsession/WSLCProcessControl.cpp +++ b/src/windows/wslcsession/WSLCProcessControl.cpp @@ -101,7 +101,15 @@ void DockerContainerProcessControl::OnContainerReleased() noexcept // Signal the exit event to prevent callers from being blocked on it. if (!m_exitEvent.is_signaled()) { - m_exitedCode = 128 + WSLCSignalSIGKILL; + // If the container already produced a real exit code (recorded by SetExitCode but not yet + // signaled — e.g. an --rm container whose init-exit signal is deferred to the Destroy + // event), preserve it. Only synthesize SIGKILL when the container is released without ever + // having produced an exit code (an abrupt teardown of a still-running container). + if (!m_exitedCode.has_value()) + { + m_exitedCode = 128 + WSLCSignalSIGKILL; + } + m_exitEvent.SetEvent(); } } diff --git a/src/windows/wslcsession/WSLCSession.cpp b/src/windows/wslcsession/WSLCSession.cpp index eefba3048..6471db9a4 100644 --- a/src/windows/wslcsession/WSLCSession.cpp +++ b/src/windows/wslcsession/WSLCSession.cpp @@ -42,8 +42,41 @@ constexpr auto c_storageVhdFilename = L"storage.vhdx"; constexpr DWORD c_processTerminateTimeoutMs = 30 * 1000; constexpr DWORD c_processKillTimeoutMs = 10 * 1000; +// Grace period to keep an otherwise-idle VM running before tearing it down. This avoids +// thrashing the VM (repeated teardown/recreate) when containers are created and destroyed, +// or operations issued, in quick succession. The clock restarts whenever the VM is observed +// to be non-idle, so a full grace period of continuous idleness is required before teardown. +constexpr auto c_vmIdleGracePeriod = std::chrono::seconds(30); + namespace { +// Validates the target path for a NEW session (one with no existing storage VHD): if the path +// already exists it must be an empty directory, so session storage is never mixed with unrelated +// user files. A non-existent path is fine (it will be created). Enforced eagerly at session +// creation and again when the storage VHD is lazily created. +void ValidateNewSessionStorageDirectory(const std::filesystem::path& StoragePath) +{ + // status's error_code distinguishes "doesn't exist yet" (OK, we'll create it) from other I/O errors. + std::error_code ec; + const auto status = std::filesystem::status(StoragePath, ec); + if (ec && ec.value() != ERROR_FILE_NOT_FOUND && ec.value() != ERROR_PATH_NOT_FOUND) + { + THROW_IF_WIN32_ERROR_MSG(ec.value(), "status failed for %ls", StoragePath.c_str()); + } + + if (!std::filesystem::exists(status)) + { + return; + } + + THROW_HR_WITH_USER_ERROR_IF( + E_INVALIDARG, Localization::MessageWslcSessionStorageMustBeDirectory(StoragePath.c_str()), !std::filesystem::is_directory(status)); + + const bool empty = std::filesystem::is_empty(StoragePath, ec); + THROW_IF_WIN32_ERROR_MSG(ec.value(), "is_empty failed for %ls", StoragePath.c_str()); + THROW_HR_WITH_USER_ERROR_IF(E_INVALIDARG, Localization::MessageWslcSessionStorageMustBeEmpty(StoragePath.c_str()), !empty); +} + // Group policy: WSLContainerRegistryAllowlist restricts which container-image // registries can be pulled from or pushed to. The check is enforced here at the // service boundary so it covers ALL callers (wslc.exe CLI, the WslcSDK C API, and @@ -331,7 +364,7 @@ HRESULT WSLCSession::Initialize( try { RETURN_HR_IF(E_POINTER, Settings == nullptr || VmFactory == nullptr); - RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_ALREADY_INITIALIZED), m_virtualMachine.has_value()); + RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_ALREADY_INITIALIZED), m_vmFactoryGitCookie != 0); THROW_HR_IF_MSG( E_INVALIDARG, WI_IsAnyFlagSet(Settings->FeatureFlags, ~WSLCFeatureFlagsValid), "Invalid feature flags: 0x%x", Settings->FeatureFlags); @@ -342,9 +375,32 @@ try Settings->StorageFlags); // Set up a warning context for the duration of initialization so that non-fatal - // failures (e.g., container/volume/network recovery) are streamed to the CLI. + // failures are streamed to the CLI. WSLCExecutionContext warningContext(this, WarningCallback); + // The VM (and storage VHD) is created lazily on the first operation. Validate the storage + // configuration eagerly here so misconfiguration is reported at session creation rather than + // surfacing later on the first VM-starting operation. + if (Settings->StoragePath != nullptr) + { + const std::filesystem::path storagePath{Settings->StoragePath}; + THROW_HR_WITH_USER_ERROR_IF(E_INVALIDARG, Localization::MessagePathNotAbsolute(Settings->StoragePath), !storagePath.is_absolute()); + + if (WI_IsFlagSet(Settings->StorageFlags, WSLCSessionStorageFlagsNoCreate)) + { + // The storage VHD must already exist (ConfigureStorage will not create it). + THROW_HR_WITH_USER_ERROR_IF( + HRESULT_FROM_WIN32(ERROR_PATH_NOT_FOUND), + Localization::MessageWslcSessionStorageNotFound(Settings->StoragePath), + !std::filesystem::exists(storagePath / c_storageVhdFilename)); + } + else if (!std::filesystem::exists(storagePath / c_storageVhdFilename)) + { + // New session: the target path (if it exists) must be an empty directory. + ValidateNewSessionStorageDirectory(storagePath); + } + } + // N.B. No locking is required because Initialize() is always called before the session is returned to the caller. m_id = Settings->SessionId; m_displayName = Settings->DisplayName ? Settings->DisplayName : L""; @@ -352,8 +408,24 @@ try m_featureFlags = Settings->FeatureFlags; m_pluginNotifier = PluginNotifier; - // Get user token for the current process + // Park the VM factory in the Global Interface Table. It is supplied here (on the call that + // creates the session) but used on demand from other threads/apartments; storing the raw + // proxy and calling it later would raise RPC_E_WRONG_THREAD. + m_git = wil::CoCreateInstance(CLSID_StdGlobalInterfaceTable, CLSCTX_INPROC_SERVER); + THROW_IF_FAILED(m_git->RegisterInterfaceInGlobal(VmFactory, __uuidof(IWSLCVirtualMachineFactory), &m_vmFactoryGitCookie)); + + // Park the warning callback too. The VM (and resource recovery) is created lazily on the + // first operation, which may not carry its own warning callback, so recovery warnings are + // routed back to this callback via AcquireWarningCallback()/WSLCExecutionContext. + if (WarningCallback != nullptr) + { + THROW_IF_FAILED(m_git->RegisterInterfaceInGlobal(WarningCallback, __uuidof(IWarningCallback), &m_warningCallbackGitCookie)); + } + + // Persist a deep copy of the settings (and the creating user's SID) required to + // (re)create the VM on demand. const auto tokenInfo = wil::get_token_information(GetCurrentProcessToken()); + PersistSettings(*Settings, tokenInfo->User.Sid); WSL_LOG( "SessionInitialized", @@ -361,61 +433,430 @@ try TraceLoggingValue(m_displayName.c_str(), "DisplayName"), TraceLoggingValue(m_creatorProcessName.c_str(), "CreatorProcess")); - // Create the VM through the factory. The VM produces crash events; the session multiplexes - // them out to any registered ICrashDumpCallback subscribers via OnCrashDumpWritten. + // The VM is created lazily on the first operation that requires it (see EnsureVmRunning) and + // torn down once the session has been continuously idle (activity count zero) for the grace + // period. Wire up the idle-teardown timer; IdleState arms it whenever the activity count drops + // to zero and cancels it when activity resumes, so no dedicated worker thread is needed. + m_idleState->Initialize(c_vmIdleGracePeriod, [this]() { OnIdleTimer(); }); + + return S_OK; +} +CATCH_RETURN() + +void WSLCSession::PersistSettings(const WSLCSessionInitSettings& Settings, PSID UserSid) +{ + m_settings = Settings; + + // Repoint the string fields at storage owned by the session so they outlive the caller's buffers. + m_settings.DisplayName = m_displayName.c_str(); + + if (Settings.CreatorProcessName != nullptr) + { + m_settingsCreatorProcessName = Settings.CreatorProcessName; + m_settings.CreatorProcessName = m_settingsCreatorProcessName->c_str(); + } + else + { + m_settings.CreatorProcessName = nullptr; + } + + if (Settings.StoragePath != nullptr) + { + m_settingsStoragePath = Settings.StoragePath; + m_settings.StoragePath = m_settingsStoragePath->c_str(); + } + else + { + m_settings.StoragePath = nullptr; + } + + if (Settings.RootVhdTypeOverride != nullptr) + { + m_settingsRootVhdTypeOverride = Settings.RootVhdTypeOverride; + m_settings.RootVhdTypeOverride = m_settingsRootVhdTypeOverride->c_str(); + } + else + { + m_settings.RootVhdTypeOverride = nullptr; + } + + if (UserSid != nullptr) + { + const auto length = GetLengthSid(UserSid); + const auto* bytes = reinterpret_cast(UserSid); + m_userSid.assign(bytes, bytes + length); + } + else + { + m_userSid.clear(); + } +} + +bool WSLCSession::IdleTerminationEnabled() const noexcept +{ + // Only tear the VM down when there is persistent storage to recover from. A tmpfs-backed + // session would lose all image/container state on teardown, so its VM is kept alive once started. + return m_settings.StoragePath != nullptr; +} + +void WSLCSession::EnsureVmRunning() +{ + if (m_vmState.load() == VmState::Running) + { + return; + } + + auto lock = m_lock.lock_exclusive(); + + // Do not (re)start the VM once the session is terminating or has terminated. This also + // bounds VmLease's retry loop: a lease that races with Terminate() fails here instead of + // restarting a VM that is being permanently torn down. + THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), m_terminating.load() || m_sessionTerminatedEvent.is_signaled()); + + if (m_vmState.load() == VmState::Running) + { + return; + } + + StartVmLockHeld(); +} + +bool WSLCSession::TryClaimExpectedStop() noexcept +{ + auto expected = VmExitDisposition::Active; + return m_vmExitDisposition.compare_exchange_strong(expected, VmExitDisposition::StopRequested); +} + +bool WSLCSession::TryClaimSpontaneousExit() noexcept +{ + auto expected = VmExitDisposition::Active; + return m_vmExitDisposition.compare_exchange_strong(expected, VmExitDisposition::ExitClaimed); +} + +void WSLCSession::StartVmLockHeld() +{ + WI_ASSERT(m_vmState.load() != VmState::Running); + + WSL_LOG("WslcVmStarting", TraceLoggingValue(m_id, "SessionId")); + + m_vmState.store(VmState::Starting); + m_vmExitDisposition.store(VmExitDisposition::Active); + + // Tear back down if bring-up fails partway. The VM may have exited on its own during bring-up, + // so claim the stop first and only tear down if we win it (TryClaimExpectedStop()); otherwise + // OnVmExited() owns the teardown and we just release the lock to let its Terminate() finish. + auto startCleanup = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { + if (TryClaimExpectedStop()) + { + TearDownVmLockHeld(); + m_vmState.store(VmState::None); + } + else + { + WSL_LOG("WslcVmExitedDuringStart", TraceLoggingValue(m_id, "SessionId")); + } + }); + + // Create a fresh IO relay for this VM instance. The previous one (if any) was stopped + // during teardown and cannot be restarted. + m_ioRelay.emplace(); + + // Create the VM via the factory. Re-fetch the factory from the GIT so we call it through a + // proxy marshalled into this thread's apartment (see m_git). The VM produces crash events; + // the session multiplexes them out to any registered ICrashDumpCallback subscribers via + // OnCrashDumpWritten. + wil::com_ptr vmFactory; + THROW_IF_FAILED(m_git->GetInterfaceFromGlobal(m_vmFactoryGitCookie, __uuidof(IWSLCVirtualMachineFactory), vmFactory.put_void())); + wil::com_ptr vm; - THROW_IF_FAILED(VmFactory->CreateVirtualMachine(&vm)); + THROW_IF_FAILED(vmFactory->CreateVirtualMachine(&vm)); m_virtualMachine.emplace( vm.get(), - Settings, + &m_settings, m_sessionTerminatingEvent.get(), std::bind(&WSLCSession::OnCrashDumpWritten, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5)); - - // Make sure that everything is destroyed correctly if an exception is thrown. - auto errorCleanup = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [&]() { LOG_IF_FAILED(Terminate()); }); - m_virtualMachine->Initialize(); // Get an event from the service that is signaled when the VM exits. + m_vmExitedEvent.reset(); THROW_IF_FAILED(vm->GetTerminationEvent(&m_vmExitedEvent)); // Configure storage. - ConfigureStorage(*Settings, tokenInfo->User.Sid); + ConfigureStorage(m_settings, m_userSid.empty() ? nullptr : reinterpret_cast(m_userSid.data())); // Mirror the host's trusted root CAs into the VM before dockerd starts. InstallTrustedRootCertificates(); - // Launch containerd first + // Launch containerd first, then dockerd with the external containerd socket. StartContainerd(); - // Launch dockerd with external containerd socket + // Reset the readiness event before (re)starting dockerd so a stale signal from a prior + // VM instance is not observed. + m_dockerdReadyEvent.ResetEvent(); StartDockerd(); // Wait for dockerd to be ready before starting the event tracker. THROW_WIN32_IF_MSG( - ERROR_TIMEOUT, !m_dockerdReadyEvent.wait(Settings->BootTimeoutMs), "Timed out waiting for dockerd to start"); + ERROR_TIMEOUT, !m_dockerdReadyEvent.wait(m_settings.BootTimeoutMs), "Timed out waiting for dockerd to start"); auto [_, __, channel] = m_virtualMachine->Fork(WSLC_FORK::Thread); m_dockerClient.emplace(std::move(channel), m_virtualMachine->TerminatingEvent(), m_virtualMachine->VmId(), 10 * 1000); // Start the event tracker. - m_eventTracker.emplace(m_dockerClient.value(), *this, m_ioRelay); + m_eventTracker.emplace(m_dockerClient.value(), *this, *m_ioRelay); m_volumes.emplace(m_dockerClient.value(), m_virtualMachine.value(), m_eventTracker.value(), m_storageVhdPath.parent_path()); // Monitor for unexpected VM exit. - m_ioRelay.AddHandle(std::make_unique(m_vmExitedEvent.get(), std::bind(&WSLCSession::OnVmExited, this))); + m_ioRelay->AddHandle(std::make_unique(m_vmExitedEvent.get(), std::bind(&WSLCSession::OnVmExited, this))); // Recover any existing resources from storage. RecoverExistingNetworks(); RecoverExistingContainers(); - errorCleanup.release(); - return S_OK; + m_vmState.store(VmState::Running); + startCleanup.release(); + + WSL_LOG("WslcVmStarted", TraceLoggingValue(m_id, "SessionId")); +} + +void WSLCSession::StopVmLockHeld() +{ + if (m_vmState.load() != VmState::Running) + { + return; + } + + WSL_LOG("WslcVmIdleStop", TraceLoggingValue(m_id, "SessionId")); + + // N.B. The caller has claimed StopRequested (via TryClaimExpectedStop), so VM/dockerd/containerd + // exit callbacks firing from the relay thread during teardown are treated as expected, not as a + // crash. + m_vmState.store(VmState::Stopping); + + TearDownVmLockHeld(); + + m_vmState.store(VmState::None); +} + +void WSLCSession::TearDownVmLockHeld(bool CaptureTerminationReason) +{ + std::lock_guard containersLock(m_containersLock); + std::lock_guard networksLock(m_networksLock); + + m_containers.clear(); + m_volumes.reset(); + m_networks.clear(); + + // Stop the IO relay. + // This stops: + // - container state monitoring. + // - container init process relays + // - execs relays + // - container logs relays + if (m_ioRelay) + { + m_ioRelay->Stop(); + } + + { + std::lock_guard allocatedPortsLock(m_allocatedPortsLock); + m_allocatedPorts.clear(); + } + + m_eventTracker.reset(); + m_dockerClient.reset(); + + if (CaptureTerminationReason) + { + // Default: an explicit/graceful teardown is a shutdown (the VM is still alive and we are + // bringing it down). Overridden below if the VM exited on its own and recorded a cause. + m_terminationReason = WSLCVirtualMachineTerminationReasonShutdown; + } + + // Check if the VM has already exited (e.g., killed externally). + // If so, skip operations that require a live VM to avoid unnecessary waits. + // N.B. m_vmExitedEvent may be uninitialized if teardown runs before GetTerminationEvent() succeeds. + if (m_vmExitedEvent && m_vmExitedEvent.is_signaled()) + { + WSL_LOG("SkippingGracefulShutdown_VmDead", TraceLoggingValue(m_id, "SessionId")); + + // The VM exited on its own, so it recorded the cause. + if (CaptureTerminationReason && m_virtualMachine) + { + wil::unique_cotaskmem_string details; + LOG_IF_FAILED(m_virtualMachine->GetTerminationReason(&m_terminationReason, &details)); + m_terminationDetails = details ? details.get() : L""; + } + } + else if (m_virtualMachine) + { + m_virtualMachine->OnSessionTerminated(); + + // Stop dockerd first, then containerd (dockerd is a client of containerd). + // N.B. dockerd waits a couple seconds if there are any outstanding HTTP request sockets opened. + if (m_dockerdProcess.has_value()) + { + auto dockerdExitCode = StopProcess(m_dockerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs); + WSL_LOG("DockerdExit", TraceLoggingValue(dockerdExitCode, "code")); + } + + if (m_containerdProcess.has_value()) + { + auto containerdExitCode = StopProcess(m_containerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs); + WSL_LOG("ContainerdExit", TraceLoggingValue(containerdExitCode, "code")); + } + + // N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running. + try + { + m_virtualMachine->Unmount(c_containerdStorage); + } + CATCH_LOG(); + } + + m_dockerdProcess.reset(); + m_containerdProcess.reset(); + m_virtualMachine.reset(); + + // Destroy the relay unless we're on its own thread (~IORelay joins the thread, which would + // deadlock). On unexpected-VM-exit path (runs on relay thread), leave it for ~WSLCSession. + if (!m_ioRelay || !m_ioRelay->IsRelayThread()) + { + m_ioRelay.reset(); + m_vmExitedEvent.reset(); + } + + // Delete the ephemeral swap VHD now that the VM is gone. + if (!m_swapVhdPath.empty()) + { + LOG_IF_WIN32_BOOL_FALSE(DeleteFileW(m_swapVhdPath.c_str())); + m_swapVhdPath.clear(); + } +} + +void WSLCSession::OnIdleTimer() +try +{ + // Idle teardown releases cross-process COM proxies (the VM and its VM-scoped state), so this + // threadpool callback must join the process MTA; otherwise those Release/calls fail with + // RPC_E_WRONG_THREAD. The function-try-block keeps this (and everything below) under CATCH_LOG: + // the threadpool callback that invokes us is noexcept, so an escaping throw would terminate. + const auto coInit = wil::CoInitializeEx(COINIT_MULTITHREADED); + + if (m_terminating.load() || !IdleTerminationEnabled()) + { + return; + } + + // Non-blocking acquire: a blocking exclusive would queue behind in-flight operations, and + // SRW locks favor waiting writers, stalling all new ops. If the lock is held, an operation + // is in flight; it holds an activity reference and will re-arm the timer (via the 1->0 + // transition) when it releases, so there is nothing to do here. + auto lock = m_lock.try_lock_exclusive(); + if (!lock) + { + return; + } + + // Re-check every teardown precondition under the lock. The activity count is the single + // source of truth for "the VM is needed"; a 0->1 transition since the timer fired (cancel + // raced the callback) is caught here. + if (m_terminating.load() || m_vmState.load() != VmState::Running || m_idleState->ActivityCount() != 0) + { + return; + } + + // Claim the stop. If we lose, OnVmExited() owns a spontaneous-exit teardown and is spinning for + // this lock, so release it and let that run instead of joining the relay ourselves. + if (!TryClaimExpectedStop()) + { + return; + } + + // Restore Active on completion (or early exit) so the next StartVmLockHeld starts clean; only + // clear our own claim. + auto dispositionCleanup = wil::scope_exit([this]() { + auto stopRequested = VmExitDisposition::StopRequested; + m_vmExitDisposition.compare_exchange_strong(stopRequested, VmExitDisposition::Active); + }); + + StopVmLockHeld(); +} +CATCH_LOG(); + +WSLCSession::VmLease WSLCSession::AcquireVmLease() +{ + return VmLease(*this); +} + +WSLCSession::VmLease::VmLease(WSLCSession& Session) : m_session(&Session) +{ + // Record an in-flight operation before bringing the VM up so idle teardown cannot tear it down + // between EnsureVmRunning() and acquiring the shared lock. AddActivity cancels any pending idle + // timer. + m_session->m_idleState->AddActivity(); + + auto countCleanup = wil::scope_exit([this]() { + m_session->m_idleState->ReleaseActivity(); + m_session = nullptr; + }); + + // Activity increment may race with idle teardown. Retry until we hold the lock with VM running. + for (;;) + { + m_session->EnsureVmRunning(); + + m_lock = m_session->m_lock.lock_shared(); + + if (m_session->m_vmState.load() == VmState::Running) + { + break; + } + + m_lock.reset(); + } + + countCleanup.release(); +} + +WSLCSession::VmLease::VmLease(VmLease&& Other) noexcept : + m_session(std::exchange(Other.m_session, nullptr)), m_lock(std::move(Other.m_lock)) +{ +} + +WSLCSession::VmLease& WSLCSession::VmLease::operator=(VmLease&& Other) noexcept +{ + if (this != &Other) + { + if (m_session != nullptr) + { + // Release the shared lock before the activity reference so that, if this was the last + // activity, idle teardown can immediately take the exclusive lock. + m_lock.reset(); + m_session->m_idleState->ReleaseActivity(); + } + + m_session = std::exchange(Other.m_session, nullptr); + m_lock = std::move(Other.m_lock); + } + + return *this; +} + +WSLCSession::VmLease::~VmLease() +{ + if (m_session != nullptr) + { + // Release the shared lock before the activity reference so that, if this was the last + // activity, idle teardown can immediately take the exclusive lock. ReleaseActivity arms the + // idle timer on the 1->0 transition. + m_lock.reset(); + m_session->m_idleState->ReleaseActivity(); + } } -CATCH_RETURN() WSLCSession::~WSLCSession() { @@ -483,23 +924,7 @@ void WSLCSession::ConfigureStorage(const WSLCSessionInitSettings& Settings, PSID WI_IsFlagSet(Settings.StorageFlags, WSLCSessionStorageFlagsNoCreate)); // Reject any non-empty existing path so we don't mix user files with session storage. - // status's error_code distinguishes "doesn't exist yet" (OK, we'll create it) from other I/O errors. - std::error_code ec; - const auto status = std::filesystem::status(storagePath, ec); - if (ec && ec.value() != ERROR_FILE_NOT_FOUND && ec.value() != ERROR_PATH_NOT_FOUND) - { - THROW_IF_WIN32_ERROR_MSG(ec.value(), "status failed for %ls", storagePath.c_str()); - } - - if (std::filesystem::exists(status)) - { - THROW_HR_WITH_USER_ERROR_IF( - E_INVALIDARG, Localization::MessageWslcSessionStorageMustBeDirectory(storagePath.c_str()), !std::filesystem::is_directory(status)); - - const bool empty = std::filesystem::is_empty(storagePath, ec); - THROW_IF_WIN32_ERROR_MSG(ec.value(), "is_empty failed for %ls", storagePath.c_str()); - THROW_HR_WITH_USER_ERROR_IF(E_INVALIDARG, Localization::MessageWslcSessionStorageMustBeEmpty(storagePath.c_str()), !empty); - } + ValidateNewSessionStorageDirectory(storagePath); // If the VHD wasn't found, create it. WSL_LOG("CreateStorageVhd", TraceLoggingValue(m_storageVhdPath.c_str(), "StorageVhdPath")); @@ -567,7 +992,7 @@ CATCH_RETURN(); void WSLCSession::OnDockerdExited() { - if (!m_sessionTerminatingEvent.is_signaled()) + if (!m_sessionTerminatingEvent.is_signaled() && m_vmExitDisposition.load() != VmExitDisposition::StopRequested) { WSL_LOG("UnexpectedDockerdExit", TraceLoggingValue(m_displayName.c_str(), "Name")); } @@ -575,7 +1000,7 @@ void WSLCSession::OnDockerdExited() void WSLCSession::OnContainerdExited() { - if (!m_sessionTerminatingEvent.is_signaled()) + if (!m_sessionTerminatingEvent.is_signaled() && m_vmExitDisposition.load() != VmExitDisposition::StopRequested) { WSL_LOG("UnexpectedContainerdExit", TraceLoggingValue(m_displayName.c_str(), "Name")); } @@ -583,6 +1008,14 @@ void WSLCSession::OnContainerdExited() void WSLCSession::OnVmExited() { + // A spontaneous exit we must permanently terminate, unless an expected stop already claimed it, + // in which case the exit was wanted and we decline. + if (!TryClaimSpontaneousExit()) + { + WSL_LOG("WslcVmExitedDuringStop", TraceLoggingValue(m_id, "SessionId")); + return; + } + WSL_LOG( "VmExited", TraceLoggingLevel(WINEVENT_LEVEL_WARNING), @@ -625,13 +1058,13 @@ ServiceRunningProcess WSLCSession::StartProcess( auto process = launcher.Launch(*m_virtualMachine); - m_ioRelay.AddHandle(std::make_unique( + m_ioRelay->AddHandle(std::make_unique( process.GetStdHandle(1), [this, LogSource](const auto& data) { OnProcessLog(data, LogSource); }, false)); - m_ioRelay.AddHandle(std::make_unique( + m_ioRelay->AddHandle(std::make_unique( process.GetStdHandle(2), [this, LogSource](const auto& data) { OnProcessLog(data, LogSource); }, false)); - m_ioRelay.AddHandle(std::make_unique(process.GetExitEvent(), std::move(ExitCallback))); + m_ioRelay->AddHandle(std::make_unique(process.GetExitEvent(), std::move(ExitCallback))); return process; } @@ -839,7 +1272,7 @@ try auto [repo, tagOrDigest] = wslutil::ParseImage(Image); EnforceRegistryAllowlist(repo); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); if (!tagOrDigest.has_value()) @@ -897,7 +1330,7 @@ try comCall = RegisterUserCOMCallback(); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); @@ -1222,7 +1655,7 @@ try WSLCExecutionContext context(this, WarningCallback); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1258,7 +1691,7 @@ try tag = tagOrDigest.value(); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1392,7 +1825,7 @@ try RETURN_HR_IF_NULL(E_POINTER, ImageNameOrID); RETURN_HR_IF(E_INVALIDARG, strlen(ImageNameOrID) > WSLC_MAX_IMAGE_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1425,7 +1858,7 @@ try names.emplace_back(ImageNames->Values[i]); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1500,7 +1933,7 @@ try filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Options->Filters, Options->FiltersCount); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1609,7 +2042,7 @@ try *DeletedImages = nullptr; *Count = 0; - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1683,7 +2116,7 @@ try RETURN_HR_IF_NULL(E_POINTER, Options->Tag); RETURN_HR_IF(E_INVALIDARG, strlen(Options->Repo) + strlen(Options->Tag) + 1 > WSLC_MAX_IMAGE_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); @@ -1720,7 +2153,7 @@ try auto [repo, tagOrDigest] = wslutil::ParseImage(Image); EnforceRegistryAllowlist(repo); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); auto requestContext = m_dockerClient->PushImage(repo, tagOrDigest, RegistryAuthenticationInformation); @@ -1741,7 +2174,7 @@ try *Output = nullptr; - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); *Output = wil::make_unique_ansistring(InspectImageLockHeld(ImageNameOrId).c_str()).release(); @@ -1789,7 +2222,7 @@ try *IdentityToken = nullptr; - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); wil::unique_cotaskmem_ansistring token; @@ -1821,7 +2254,7 @@ try auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); docker_schema::PruneImageResult pruneResult; @@ -1882,7 +2315,7 @@ try "Invalid process flags: 0x%x", containerOptions->InitProcessOptions.Flags); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); auto result = wil::ResultFromException([&]() { CreateContainerImpl(containerOptions, Container); }); @@ -1960,7 +2393,7 @@ void WSLCSession::CreateContainerImpl(const WSLCContainerOptions* containerOptio std::bind(&WSLCSession::OnContainerDeleted, this, std::placeholders::_1), m_eventTracker.value(), m_dockerClient.value(), - m_ioRelay); + *m_ioRelay); // Key the map by Docker's container ID, which is set in the WSLCContainerImpl constructor and stable for its lifetime. auto [it, inserted] = m_containers.emplace(container->ID(), std::move(container)); @@ -1993,7 +2426,7 @@ try ValidateName(Id, WSLC_MAX_CONTAINER_NAME_LENGTH); // Look for an exact ID match first. - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); std::lock_guard containersLock{m_containersLock}; // Purge containers that were auto-deleted via OnEvent (--rm). @@ -2032,6 +2465,73 @@ try } CATCH_RETURN(); +namespace { + + // Activity token holds an activity reference to prevent idle VM teardown while client holds it. + // Implements IFastRundown so crashed clients reclaim stub promptly instead of slow default rundown. + class ContainerOperation + : public Microsoft::WRL::RuntimeClass, IUnknown, IFastRundown> + { + public: + // Adopts an activity reference from CreateActivityToken; callback releases it. + void Initialize(std::function&& onRelease) noexcept + { + m_onRelease = std::move(onRelease); + } + + ~ContainerOperation() override + { + if (m_onRelease) + { + m_onRelease(); + } + } + + private: + std::function m_onRelease; + }; + +} // namespace + +Microsoft::WRL::ComPtr WSLCSession::CreateActivityToken() +{ + // Record the in-flight activity up front so the VM cannot idle-terminate before the caller + // takes ownership of the returned token. + m_idleState->AddActivity(); + auto countCleanup = wil::scope_exit([this]() { m_idleState->ReleaseActivity(); }); + + auto operation = Microsoft::WRL::Make(); + THROW_IF_NULL_ALLOC(operation.Get()); + + // Capture shared idle state so token can outlive session and release activity without keeping session alive. + std::shared_ptr idleState = m_idleState; + operation->Initialize([idleState = std::move(idleState)]() { idleState->ReleaseActivity(); }); + + // The token now owns the activity-count reference and will release it on destruction. + countCleanup.release(); + + Microsoft::WRL::ComPtr token; + THROW_IF_FAILED(operation.As(&token)); + return token; +} + +HRESULT WSLCSession::BeginContainerOperation(IUnknown** Operation) +try +{ + WSLCExecutionContext context(this); + + RETURN_HR_IF_NULL(E_POINTER, Operation); + *Operation = nullptr; + + // Record the in-flight operation up front so the VM cannot idle-terminate before the client + // resolves the container and issues the operation (and streams any output). + auto token = CreateActivityToken(); + + RETURN_IF_FAILED(token.CopyTo(Operation)); + return S_OK; +} +CATCH_RETURN(); + HRESULT WSLCSession::ListContainers( const WSLCListContainersOptions* Options, WSLCContainerEntry** Containers, ULONG* Count, WSLCContainerPortMapping** Ports, ULONG* PortsCount) try @@ -2066,7 +2566,7 @@ try filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Options->Filters, Options->FiltersCount); } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); std::vector dockerContainers; @@ -2145,7 +2645,7 @@ try auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); RETURN_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient.has_value()); std::lock_guard containersLock{m_containersLock}; @@ -2216,10 +2716,18 @@ try *Errno = -1; // Make sure not to return 0 if something fails. } - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); auto process = m_virtualMachine->CreateLinuxProcess(Executable, *Options, TtyRows, TtyColumns, Errno); + + // The VmLease above is released when this call returns, but the process keeps running in the + // VM and the client holds the returned proxy. A root-namespace process is not tracked as a + // container, so attach an activity token bound to the process's lifetime; this keeps the VM + // alive for as long as the client holds the process, preventing the idle worker from tearing + // the VM down and killing the process out from under the client. + process->SetKeepAliveToken(CreateActivityToken()); + THROW_IF_FAILED(process.CopyTo(Process)); return S_OK; @@ -2242,7 +2750,7 @@ try THROW_HR_WITH_USER_ERROR_IF(E_INVALIDARG, Localization::MessagePathNotAbsolute(Path), !std::filesystem::path(Path).is_absolute()); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); // Attach the disk to the VM (AttachDisk() performs the access check for the VHD file). @@ -2270,7 +2778,7 @@ try auto driverOpts = wslutil::ParseKeyValuePairs(Options->DriverOpts, Options->DriverOptsCount); auto labels = wslutil::ParseKeyValuePairs(Options->Labels, Options->LabelsCount, WSLCVolumeMetadataLabel); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); if (Options->Name != nullptr && Options->Name[0] != '\0') @@ -2290,7 +2798,7 @@ try RETURN_HR_IF_NULL(E_POINTER, Name); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); m_volumes->DeleteVolume(Name); @@ -2311,7 +2819,7 @@ try auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); auto volumeList = m_volumes->ListVolumes(std::move(filters)); @@ -2343,7 +2851,7 @@ try std::string name = Name; ValidateName(name.c_str(), WSLC_MAX_VOLUME_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); std::string json = m_volumes->InspectVolume(name); @@ -2368,7 +2876,7 @@ try auto filters = wsl::windows::common::wslutil::ParseKeyMultiValuePairs(Filters, FiltersCount); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_volumes); WSLCVolumes::PruneVolumesResult pruneResult; @@ -2458,7 +2966,7 @@ try THROW_HR_WITH_USER_ERROR_IF( E_INVALIDARG, Localization::MessageWslcGatewayRequiresSubnet(), driverOpts.contains("Gateway") && !driverOpts.contains("Subnet")); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); @@ -2559,7 +3067,7 @@ try std::string name = Name; ValidateName(name.c_str(), WSLC_MAX_NETWORK_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); @@ -2599,7 +3107,7 @@ try *Networks = nullptr; *Count = 0; - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); std::lock_guard networksLock(m_networksLock); if (m_networks.empty()) @@ -2638,7 +3146,7 @@ try std::string name = Name; ValidateName(name.c_str(), WSLC_MAX_NETWORK_NAME_LENGTH); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); std::lock_guard networksLock(m_networksLock); auto it = m_networks.find(name); @@ -2689,7 +3197,7 @@ try // Scope the prune to WSLC-managed networks. filters["label"].push_back(WSLCNetworkManagedLabel); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_dockerClient); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); @@ -2822,97 +3330,56 @@ try // Acquire an exclusive lock to ensure that no operation is running. WI_VERIFY(sessionLock); - std::lock_guard containersLock(m_containersLock); - std::lock_guard networksLock(m_networksLock); + // Tear down the VM (if running) and all VM-scoped state, capturing the termination reason. + // This mirrors the soft teardown used for idle shutdown, but here it is permanent. + TearDownVmLockHeld(/* CaptureTerminationReason */ true); - m_containers.clear(); - m_volumes.reset(); - m_networks.clear(); + m_vmState.store(VmState::None); - // Stop the IO relay. - // This stops: - // - container state monitoring. - // - container init process relays - // - execs relays - // - container logs relays - m_ioRelay.Stop(); + // Signal completion last so any observer of the terminated event sees a fully torn-down + // session and a populated termination reason. + m_sessionTerminatedEvent.SetEvent(); - { - std::lock_guard allocatedPortsLock(m_allocatedPortsLock); - m_allocatedPorts.clear(); - } + // Release the exclusive lock before disarming the idle timer. If a timer callback is currently + // blocked acquiring the exclusive lock (about to evaluate idle teardown), it must be able to + // obtain it, observe m_terminating, and return — otherwise Disarm()'s wait for in-flight + // callbacks below would deadlock. + sessionLock.reset(); - m_eventTracker.reset(); - m_dockerClient.reset(); + // Permanently disable idle teardown and drain any in-flight timer callback so it cannot + // reference this session after it is destroyed. + m_idleState->Disarm(); - // Check if the VM has already exited (e.g., killed externally). - // If so, skip operations that require a live VM to avoid unnecessary waits. - // N.B. m_vmExitedEvent may be uninitialized if Terminate() is called from the - // Initialize() error path before GetTerminationEvent() succeeds. - if (m_vmExitedEvent && m_vmExitedEvent.is_signaled()) + // Idle teardown is disabled and no operation can run past termination, so the parked VM + // factory can no longer be re-fetched; revoke it from the GIT. + if (m_vmFactoryGitCookie != 0) { - WSL_LOG("SkippingGracefulShutdown_VmDead", TraceLoggingValue(m_id, "SessionId")); - - // The VM exited on its own, so it recorded the cause. - if (m_virtualMachine) - { - wil::unique_cotaskmem_string details; - LOG_IF_FAILED(m_virtualMachine->GetTerminationReason(&m_terminationReason, &details)); - m_terminationDetails = details ? details.get() : L""; - } + LOG_IF_FAILED(m_git->RevokeInterfaceFromGlobal(m_vmFactoryGitCookie)); + m_vmFactoryGitCookie = 0; } - else - { - // The VM is still alive, so this is a graceful shutdown initiated by us. - m_terminationReason = WSLCVirtualMachineTerminationReasonShutdown; - - if (m_virtualMachine) - { - m_virtualMachine->OnSessionTerminated(); - // Stop dockerd first, then containerd (dockerd is a client of containerd). - // N.B. dockerd waits a couple seconds if there are any outstanding HTTP request sockets opened. - if (m_dockerdProcess.has_value()) - { - auto dockerdExitCode = StopProcess(m_dockerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs); - WSL_LOG("DockerdExit", TraceLoggingValue(dockerdExitCode, "code")); - } - - if (m_containerdProcess.has_value()) - { - auto containerdExitCode = StopProcess(m_containerdProcess.value(), c_processTerminateTimeoutMs, c_processKillTimeoutMs); - WSL_LOG("ContainerdExit", TraceLoggingValue(containerdExitCode, "code")); - } - - // N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running. - if (m_storageMounted) - { - try - { - m_virtualMachine->Unmount(c_containerdStorage); - m_storageMounted = false; - } - CATCH_LOG(); - } - } + if (m_warningCallbackGitCookie != 0) + { + LOG_IF_FAILED(m_git->RevokeInterfaceFromGlobal(m_warningCallbackGitCookie)); + m_warningCallbackGitCookie = 0; } - m_dockerdProcess.reset(); - m_containerdProcess.reset(); - m_virtualMachine.reset(); + return S_OK; +} +CATCH_RETURN(); - // Delete the ephemeral swap VHD now that the VM is gone. - if (!m_swapVhdPath.empty()) +wil::com_ptr WSLCSession::AcquireWarningCallback() const +{ + wil::com_ptr callback; + if (m_warningCallbackGitCookie != 0) { - LOG_IF_WIN32_BOOL_FALSE(DeleteFileW(m_swapVhdPath.c_str())); - m_swapVhdPath.clear(); + // Best-effort: the creating client's proxy may already be gone (e.g. the CLI exited before + // a later VM restart), in which case the warning falls through to the default sink. + LOG_IF_FAILED(m_git->GetInterfaceFromGlobal(m_warningCallbackGitCookie, __uuidof(IWarningCallback), callback.put_void())); } - m_sessionTerminatedEvent.SetEvent(); - - return S_OK; + return callback; } -CATCH_RETURN(); HRESULT WSLCSession::RegisterCrashDumpCallback(_In_ ICrashDumpCallback* Callback, _Out_ IUnknown** Subscription) try @@ -2977,7 +3444,7 @@ try RETURN_HR_IF_NULL(E_POINTER, WindowsPath); RETURN_HR_IF_NULL(E_POINTER, LinuxPath); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); return m_virtualMachine->MountWindowsFolder(WindowsPath, LinuxPath, ReadOnly); @@ -2991,7 +3458,7 @@ try RETURN_HR_IF_NULL(E_POINTER, LinuxPath); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); return m_virtualMachine->UnmountWindowsFolder(LinuxPath); @@ -3003,7 +3470,7 @@ try { WSLCExecutionContext context(this); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); std::lock_guard allocatedPortsLock(m_allocatedPortsLock); @@ -3049,7 +3516,7 @@ try { WSLCExecutionContext context(this); - auto lock = m_lock.lock_shared(); + auto lock = AcquireVmLease(); THROW_HR_IF(HRESULT_FROM_WIN32(ERROR_INVALID_STATE), !m_virtualMachine); std::lock_guard allocatedPortsLock(m_allocatedPortsLock); @@ -3370,7 +3837,11 @@ void WSLCSession::CancelUserCOMCallbacks() void WSLCSession::OnContainerDeleted(const WSLCContainerImpl* Container) { - auto lock = m_lock.lock_shared(); + // N.B. Invoked only from WSLCContainer::Delete, which already holds a VmLease (the shared + // session lock). The lease prevents a concurrent idle teardown from clearing m_containers, + // so this only needs m_containersLock. It must NOT re-acquire the shared session lock here: + // doing so while the idle worker is queued for the exclusive lock would deadlock (recursive + // shared acquire behind a pending writer). std::lock_guard containersLock(m_containersLock); WI_VERIFY(m_containers.erase(Container->ID()) == 1); @@ -3438,7 +3909,7 @@ void WSLCSession::RecoverExistingContainers() std::bind(&WSLCSession::OnContainerDeleted, this, std::placeholders::_1), m_eventTracker.value(), m_dockerClient.value(), - m_ioRelay); + *m_ioRelay); auto [it, inserted] = m_containers.emplace(container->ID(), std::move(container)); WI_ASSERT(inserted); diff --git a/src/windows/wslcsession/WSLCSession.h b/src/windows/wslcsession/WSLCSession.h index b9f36e2f9..f0f1294f6 100644 --- a/src/windows/wslcsession/WSLCSession.h +++ b/src/windows/wslcsession/WSLCSession.h @@ -18,12 +18,15 @@ Module Name: #include "WSLCCompat.h" #include "WSLCVirtualMachine.h" #include "WSLCContainer.h" +#include "WSLCIdleState.h" #include "WSLCVolumes.h" #include "WSLCNetworkMetadata.h" #include "DockerEventTracker.h" #include "DockerHTTPClient.h" #include "IORelay.h" +#include #include +#include #include namespace wsl::windows::service::wslc { @@ -71,11 +74,16 @@ class UserCOMCallback // // WSLCSession - Implements IWSLCSession for container management. // Runs in a per-user COM server process for security isolation. -// The SYSTEM service creates the VM and passes IWSLCVirtualMachine to Initialize(). +// The SYSTEM service passes an IWSLCVirtualMachineFactory to Initialize(); the VM is created +// lazily on first use and may be torn down when idle and recreated on demand. // class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession : public Microsoft::WRL::RuntimeClass, IWSLCSession, IWSLCCompatSession, IFastRundown, ISupportErrorInfo> { + // WSLCContainer::Delete acquires a VmLease to keep the VM alive (and block idle + // teardown) for the duration of a container deletion. + friend class WSLCContainer; + public: WSLCSession() = default; @@ -140,6 +148,7 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession // Container management. IFACEMETHOD(CreateContainer)(_In_ const WSLCContainerOptions* Options, _In_opt_ IWarningCallback* WarningCallback, _Out_ IWSLCContainer** Container) override; IFACEMETHOD(OpenContainer)(_In_ LPCSTR Id, _In_ IWSLCContainer** Container) override; + IFACEMETHOD(BeginContainerOperation)(_Outptr_ IUnknown** Operation) override; IFACEMETHOD(ListContainers)( _In_opt_ const WSLCListContainersOptions* Options, _Out_ WSLCContainerEntry** Containers, @@ -244,6 +253,13 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession UserCOMCallback RegisterUserCOMCallback(); void UnregisterUserCOMCallback(DWORD ThreadId); + // Returns the warning callback supplied when the session was created/entered, re-marshalled + // into the calling apartment. Used as a fallback by WSLCExecutionContext so that warnings + // emitted by operations that carry no explicit callback (e.g. resource recovery during the + // lazy VM start) still reach the session creator. Returns null if no callback was supplied + // or the creating client's proxy is no longer reachable. + wil::com_ptr AcquireWarningCallback() const; + HANDLE SessionTerminatingEvent() const noexcept { return m_sessionTerminatingEvent.get(); @@ -256,9 +272,94 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession bool WaitForEventOrSessionTerminating(HANDLE Event, std::chrono::milliseconds Timeout) const; + // Shared idle-termination state. Exposed so VM-scoped objects (e.g. running containers via + // WSLCContainerImpl's ActivityRef) can hold an activity reference for their lifetime without + // keeping the session object itself alive. + std::shared_ptr IdleStateShared() const noexcept + { + return m_idleState; + } + + // Creates an opaque activity token that holds a reference on this session's activity count for + // its lifetime, deferring idle teardown of the VM until every outstanding token is released. + // Used both for transient client operations (BeginContainerOperation) and to keep the VM alive + // for the lifetime of a process whose wrapper a client may keep (root-namespace and exec'd + // processes). + Microsoft::WRL::ComPtr CreateActivityToken(); + private: ULONG m_id = 0; + // VM lifecycle state for on-demand creation / idle termination. + enum class VmState + { + None, + Starting, + Running, + Stopping, + }; + + // Single-owner arbitration for a VM exit: exactly one side tears a given VM instance down, + // resolved atomically. An expected stop (idle teardown or bring-up cleanup, on a lock-holding + // thread) and a spontaneous exit (OnVmExited, lock-free on the relay thread) each try to claim + // it; the loser declines. This avoids both a missed teardown and a deadlock (a lock-holder + // joining the relay thread while OnVmExited spins for the lock in Terminate()). A fresh VM + // starts Active. Claim via TryClaim*(), not by touching the atomic directly. + enum class VmExitDisposition + { + Active, // Running normally; a VM exit is unexpected and triggers a permanent Terminate(). + StopRequested, // An expected (soft) stop is in progress; OnVmExited treats the exit as expected. + ExitClaimed, // OnVmExited owns the permanent teardown of a spontaneous exit. + }; + + // Claims an expected (soft) stop of the current VM from a lock-holding thread. On success a + // racing OnVmExited() declines, so the caller may tear down (joining the relay thread is safe). + // On failure OnVmExited() already owns a spontaneous-exit teardown and is spinning for the lock + // in Terminate(); the caller must not tear down or it deadlocks joining the relay. + [[nodiscard]] bool TryClaimExpectedStop() noexcept; + + // Claims the teardown of a spontaneous VM exit from OnVmExited(). Fails if an expected stop is + // already in progress, in which case the exit was wanted and the caller declines. + [[nodiscard]] bool TryClaimSpontaneousExit() noexcept; + + _Requires_exclusive_lock_held_(m_lock) + void StartVmLockHeld(); + _Requires_exclusive_lock_held_(m_lock) + void StopVmLockHeld(); + _Requires_exclusive_lock_held_(m_lock) + void TearDownVmLockHeld(bool CaptureTerminationReason = false); + void EnsureVmRunning(); + + // Idle-teardown callback invoked by IdleState's timer once the VM has been continuously idle + // (activity count zero) for the grace period. Runs on a threadpool thread. + void OnIdleTimer(); + bool IdleTerminationEnabled() const noexcept; + void PersistSettings(const WSLCSessionInitSettings& Settings, PSID UserSid); + + // RAII lease taken at the top of every VM-requiring operation. On construction it + // ensures the VM is running (lazily restarting it if it was idle-terminated) and records + // an in-flight operation so idle teardown is deferred; it then holds the shared session + // lock for the operation's duration. On destruction it releases the lock and triggers an + // idle check. + class VmLease + { + public: + VmLease() = default; + explicit VmLease(WSLCSession& Session); + VmLease(VmLease&& Other) noexcept; + VmLease& operator=(VmLease&& Other) noexcept; + ~VmLease(); + + VmLease(const VmLease&) = delete; + VmLease& operator=(const VmLease&) = delete; + + private: + WSLCSession* m_session{}; + wil::rwlock_release_shared_scope_exit m_lock; + }; + + [[nodiscard]] VmLease AcquireVmLease(); + __requires_lock_held(m_userHandlesLock) void CancelUserHandleIO(); __requires_lock_held(m_userCOMCallbacksLock) void CancelUserCOMCallbacks(); @@ -297,6 +398,19 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession void StreamImageOperation(DockerHTTPClient::HTTPRequestContext& requestContext, LPCSTR Image, LPCSTR OperationName, IProgressCallback* ProgressCallback); std::optional m_dockerClient; + + // The VM factory is a cross-process proxy supplied by the SYSTEM service at Initialize() time + // but first used later (on demand) from a different thread/apartment. A directly stored proxy + // would fail with RPC_E_WRONG_THREAD, so it is parked in the process Global Interface Table and + // re-fetched (re-marshalled into the calling apartment) each time a VM is created. + wil::com_ptr m_git; + DWORD m_vmFactoryGitCookie{}; + + // The warning callback supplied at Initialize() is parked in the GIT for the same reason as + // the VM factory: it is used later, on demand, from other threads/apartments (a directly + // stored proxy would fail with RPC_E_WRONG_THREAD). Zero if no callback was supplied. + DWORD m_warningCallbackGitCookie{}; + std::optional m_virtualMachine; std::optional m_eventTracker; wil::unique_event m_dockerdReadyEvent{wil::EventOptions::ManualReset}; @@ -322,7 +436,26 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession WSLCVirtualMachineTerminationReason m_terminationReason{WSLCVirtualMachineTerminationReasonUnknown}; std::wstring m_terminationDetails; wil::srwlock m_lock; - IORelay m_ioRelay; + std::optional m_ioRelay; + + // VM lifecycle / idle-termination state. + std::atomic m_vmState{VmState::None}; + std::atomic m_vmExitDisposition{VmExitDisposition::Active}; + // In-flight activity count, idle timer and teardown callback, decoupled from this object's + // lifetime (see IdleState in WSLCIdleState.h) so activity tokens and container COM wrappers can + // safely manage activity without keeping the session alive. See WSLCContainerImpl's ActivityRef + // (m_activityHold), WSLCSession::VmLease and CreateActivityToken(). + std::shared_ptr m_idleState{std::make_shared()}; + + // Persisted settings required to (re)create the VM on demand. The string fields point + // into the owned storage members below (or m_displayName) so they remain valid for the + // lifetime of the session. + WSLCSessionInitSettings m_settings{}; + std::optional m_settingsCreatorProcessName; + std::optional m_settingsStoragePath; + std::optional m_settingsRootVhdTypeOverride; + std::vector m_userSid; + std::optional m_containerdProcess; std::optional m_dockerdProcess; WSLCFeatureFlags m_featureFlags{}; diff --git a/test/windows/WSLCTests.cpp b/test/windows/WSLCTests.cpp index 8baad20ab..0d6016782 100644 --- a/test/windows/WSLCTests.cpp +++ b/test/windows/WSLCTests.cpp @@ -11148,6 +11148,10 @@ class WSLCTests auto settings = GetDefaultSessionSettings(c_sessionName); auto session = CreateSession(settings); + // Session creation is lazy, so start the VM by launching a process before killing it. + WSLCProcessLauncher launcher("/bin/sleep", {"/bin/sleep", "60"}); + auto process = launcher.Launch(*session); + KillVmByOwner(c_sessionName); WaitForSessionTermination(session.get()); @@ -11239,6 +11243,10 @@ class WSLCTests VERIFY_SUCCEEDED(sessionManager2->CreateSession(&settings2, WSLCSessionFlagsNone, warningCallback.Get(), &session2)); wsl::windows::common::security::ConfigureForCOMImpersonation(session2.get()); + // The VM (and container recovery) starts lazily on the first operation. Trigger it so + // recovery runs and its warning is delivered to the session's warning callback. + VERIFY_IS_TRUE(WSLCProcessLauncher("/bin/sh", {"/bin/sh", "-c", "exit 0"}).Launch(*session2).GetExitEvent().wait(30000)); + // Verify the warning matches the expected localized message for the corrupt container. auto warnings = warningCallback->GetWarnings(); auto expectedWarning = std::format( @@ -11307,6 +11315,10 @@ class WSLCTests VERIFY_SUCCEEDED(sessionManager->CreateSession(&settings, WSLCSessionFlagsNone, warningCallback.Get(), &session)); wsl::windows::common::security::ConfigureForCOMImpersonation(session.get()); + // The VM (and volume recovery) starts lazily on the first operation. Trigger it so + // recovery runs and its warning is delivered to the session's warning callback. + VERIFY_IS_TRUE(WSLCProcessLauncher("/bin/sh", {"/bin/sh", "-c", "exit 0"}).Launch(*session).GetExitEvent().wait(30000)); + // Verify the warning matches the expected localized message for the missing volume. auto warnings = warningCallback->GetWarnings(); auto expectedWarning = @@ -11372,6 +11384,10 @@ class WSLCTests VERIFY_SUCCEEDED(sessionManager->CreateSession(&settings, WSLCSessionFlagsNone, warningCallback.Get(), &session)); wsl::windows::common::security::ConfigureForCOMImpersonation(session.get()); + // The VM (and guest volume recovery) starts lazily on the first operation. Trigger it so + // recovery runs and its warning is delivered to the session's warning callback. + VERIFY_IS_TRUE(WSLCProcessLauncher("/bin/sh", {"/bin/sh", "-c", "exit 0"}).Launch(*session).GetExitEvent().wait(30000)); + auto warnings = warningCallback->GetWarnings(); auto expectedWarning = std::format(L"wsl: {}\n", wsl::shared::Localization::MessageWslcFailedToRecoverVolume(c_volumeName)); diff --git a/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp b/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp index c04c4b767..c512452cf 100644 --- a/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp +++ b/test/windows/wslc/e2e/WSLCE2ESessionEnterTests.cpp @@ -109,9 +109,14 @@ class WSLCE2ESessionEnterTests WSLC_TEST_METHOD(WSLCE2E_SessionEnter_StoragePathNotFound) { auto result = RunWslc(L"system session enter does-not-exist"); - const auto expectedPath = std::filesystem::absolute(L"does-not-exist").wstring(); + + // The CLI resolves the storage argument to an absolute path (see EnterSession task) and the + // service validates it eagerly at session creation, reporting the friendly "No WSLC session + // found in ''" message rather than a bare system error. + const auto storagePath = std::filesystem::absolute(L"does-not-exist").wstring(); result.Verify({ - .Stderr = std::format(L"No WSLC session found in '{}'\r\nError code: ERROR_PATH_NOT_FOUND\r\n", expectedPath), + .Stderr = wsl::shared::Localization::MessageWslcSessionStorageNotFound(storagePath) + + L"\r\nError code: ERROR_PATH_NOT_FOUND\r\n", .ExitCode = 1, }); }