diff --git a/src/windows/common/CMakeLists.txt b/src/windows/common/CMakeLists.txt index 6dbe4c0b1..cd889d59b 100644 --- a/src/windows/common/CMakeLists.txt +++ b/src/windows/common/CMakeLists.txt @@ -42,6 +42,7 @@ set(SOURCES WslCoreNetworkingSupport.cpp WslInstall.cpp WslSecurity.cpp + SlowOperationWatcher.cpp WslTelemetry.cpp VirtioNetworking.cpp wslutil.cpp @@ -119,6 +120,7 @@ set(HEADERS WslCoreNetworkingSupport.h WslInstall.h WslSecurity.h + SlowOperationWatcher.h WslTelemetry.h VirtioNetworking.h wslutil.h diff --git a/src/windows/common/SlowOperationWatcher.cpp b/src/windows/common/SlowOperationWatcher.cpp new file mode 100644 index 000000000..e08a51591 --- /dev/null +++ b/src/windows/common/SlowOperationWatcher.cpp @@ -0,0 +1,76 @@ +/*++ + +Copyright (c) Microsoft. All rights reserved. + +Module Name: + + SlowOperationWatcher.cpp + +Abstract: + + See header for contract. A single-shot threadpool timer is armed for SlowThreshold + in the constructor. If it fires, the callback emits one `SlowOperation` telemetry + event with the phase name and captured std::source_location. The timer is owned by + wil::unique_threadpool_timer, whose destroyer cancels pending callbacks and blocks + for any in-flight callback before closing, so OnTimerFired cannot dereference + `*this` after destruction. + +--*/ + +#include "precomp.h" +#include "SlowOperationWatcher.h" + +namespace { +FILETIME RelativeFileTime(std::chrono::milliseconds Relative) noexcept +{ + // Negative FILETIME means "relative to now", in 100ns units. Matches the pattern used + // elsewhere in the service (see Lifetime.cpp). + return wil::filetime::from_int64(-wil::filetime_duration::one_millisecond * Relative.count()); +} + +// std::source_location::file_name() returns the path as the compiler saw it, which on +// MSVC is an absolute build-agent path. Strip to the basename so telemetry groups the +// same file across different build environments without leaking machine-specific paths. +// The substring is taken from the same null-terminated char array, so the returned view's +// data() is safe to pass to C APIs that expect a null-terminated string. +constexpr std::string_view Basename(std::string_view Path) noexcept +{ + const auto pos = Path.find_last_of("\\/"); + return pos == std::string_view::npos ? Path : Path.substr(pos + 1); +} + +static_assert(Basename("/foo/bar/test.cpp") == "test.cpp"); +static_assert(Basename("C:\\src\\test.cpp") == "test.cpp"); +static_assert(Basename("no_separator.cpp") == "no_separator.cpp"); +} // namespace + +SlowOperationWatcher::SlowOperationWatcher(_In_z_ const char* Name, std::chrono::milliseconds SlowThreshold, std::source_location Location) : + m_name(Name), m_slowThreshold(SlowThreshold), m_location(Location) +{ + m_timer.reset(CreateThreadpoolTimer(OnTimerFired, this, nullptr)); + THROW_IF_NULL_ALLOC(m_timer.get()); + + FILETIME due = RelativeFileTime(m_slowThreshold); + SetThreadpoolTimer(m_timer.get(), &due, 0, 0); +} + +void SlowOperationWatcher::Reset() noexcept +{ + m_timer.reset(); +} + +void CALLBACK SlowOperationWatcher::OnTimerFired(PTP_CALLBACK_INSTANCE, PVOID Context, PTP_TIMER) noexcept +try +{ + auto* self = static_cast(Context); + + WSL_LOG_TELEMETRY( + "SlowOperation", + PDT_ProductAndServicePerformance, + TraceLoggingValue(self->m_name, "name"), + TraceLoggingInt64(self->m_slowThreshold.count(), "thresholdMs"), + TraceLoggingValue(Basename(self->m_location.file_name()).data(), "file"), + TraceLoggingValue(self->m_location.function_name(), "function"), + TraceLoggingUInt32(self->m_location.line(), "line")); +} +CATCH_LOG() diff --git a/src/windows/common/SlowOperationWatcher.h b/src/windows/common/SlowOperationWatcher.h new file mode 100644 index 000000000..55b86c637 --- /dev/null +++ b/src/windows/common/SlowOperationWatcher.h @@ -0,0 +1,81 @@ +/*++ + +Copyright (c) Microsoft. All rights reserved. + +Module Name: + + SlowOperationWatcher.h + +Abstract: + + RAII guard that watches a scoped operation. A threadpool timer is armed in the + constructor for `SlowThreshold` (10 s default). On the fast path (scope exits + before the threshold) the watcher's destructor cancels and drains the timer and + nothing is emitted. If the threshold is reached first -- including while the + scope is still running or hung indefinitely -- the timer callback fires once, + emitting a single `SlowOperation` telemetry event carrying the phase name and + the call site captured via std::source_location, so the backend can attribute + where time is spent. + + Usage: + + SlowOperationWatcher slow{"WaitForMiniInitConnect"}; + m_miniInitChannel = wsl::shared::SocketChannel{AcceptConnection(timeout), ...}; + + If the scope needs to outlive the watched operation (for example to keep a + pointer into an internal receive buffer alive without a nested block), call + Reset() to disarm the watcher early: + + SlowOperationWatcher slow{"WaitForCreateInstanceResult"}; + const auto& result = channel.ReceiveMessage<...>(...); + slow.Reset(); + // result remains valid and usable here + +--*/ + +#pragma once + +#include +#include +#include +#include + +class SlowOperationWatcher +{ +public: + // Name is restricted to a string-literal reference (const char (&)[N]) to guarantee + // static storage duration: the raw pointer is dereferenced later from a threadpool + // callback, so accepting a `const char*` would make UAF via a temporary (e.g. + // std::string::c_str()) easy. Keep Name a short CamelCase phase identifier that the + // backend query can switch on (e.g. "WaitForMiniInitConnect"). + template + explicit SlowOperationWatcher( + const char (&Name)[N], + std::chrono::milliseconds SlowThreshold = std::chrono::seconds{10}, + std::source_location Location = std::source_location::current()) : + SlowOperationWatcher(static_cast(Name), SlowThreshold, Location) + { + } + + ~SlowOperationWatcher() noexcept = default; + + // Disarm the watcher early. After Reset() returns, the threshold callback is + // guaranteed not to fire. Relies on wil::unique_threadpool_timer's destroyer to + // cancel pending callbacks and drain any in-flight one. + void Reset() noexcept; + + SlowOperationWatcher(const SlowOperationWatcher&) = delete; + SlowOperationWatcher& operator=(const SlowOperationWatcher&) = delete; + SlowOperationWatcher(SlowOperationWatcher&&) = delete; + SlowOperationWatcher& operator=(SlowOperationWatcher&&) = delete; + +private: + explicit SlowOperationWatcher(_In_z_ const char* Name, std::chrono::milliseconds SlowThreshold, std::source_location Location); + + static void CALLBACK OnTimerFired(PTP_CALLBACK_INSTANCE, PVOID Context, PTP_TIMER) noexcept; + + const char* const m_name; + const std::chrono::milliseconds m_slowThreshold; + const std::source_location m_location; + wil::unique_threadpool_timer m_timer; +}; diff --git a/src/windows/common/precomp.h b/src/windows/common/precomp.h index 08a8140c1..3f9cb2d9d 100644 --- a/src/windows/common/precomp.h +++ b/src/windows/common/precomp.h @@ -134,6 +134,7 @@ Module Name: // Telemetry Header #include "WslTelemetry.h" +#include "SlowOperationWatcher.h" // LxCore headers #include diff --git a/src/windows/service/exe/PluginManager.cpp b/src/windows/service/exe/PluginManager.cpp index e4d23f226..29c39b9e0 100644 --- a/src/windows/service/exe/PluginManager.cpp +++ b/src/windows/service/exe/PluginManager.cpp @@ -181,6 +181,7 @@ void PluginManager::OnVmStarted(const WSLSessionInformation* Session, const WSLV WSL_LOG( "PluginOnVmStartedCall", TraceLoggingValue(e.name.c_str(), "Plugin"), TraceLoggingValue(Session->UserSid, "Sid")); + SlowOperationWatcher slowOperation{"PluginOnVmStarted"}; ThrowIfPluginError(e.hooks.OnVMStarted(Session, Settings), Session->SessionId, e.name.c_str()); } } @@ -217,6 +218,7 @@ void PluginManager::OnDistributionStarted(const WSLSessionInformation* Session, TraceLoggingValue(Session->UserSid, "Sid"), TraceLoggingValue(Distribution->Id, "DistributionId")); + SlowOperationWatcher slowOperation{"PluginOnDistributionStarted"}; ThrowIfPluginError(e.hooks.OnDistributionStarted(Session, Distribution), Session->SessionId, e.name.c_str()); } } diff --git a/src/windows/service/exe/WslCoreInstance.cpp b/src/windows/service/exe/WslCoreInstance.cpp index 98b7379c4..3bd416f7f 100644 --- a/src/windows/service/exe/WslCoreInstance.cpp +++ b/src/windows/service/exe/WslCoreInstance.cpp @@ -44,8 +44,12 @@ WslCoreInstance::WslCoreInstance( m_initChannel = std::make_shared(InitSocket.release(), m_runtimeId, m_socketTimeout); // Read a message from the init daemon. This will let us know if anything failed during startup. + // The watcher is disarmed as soon as the receive returns so its reported duration reflects + // only the wait, not the rest of the constructor. gsl::span span; + SlowOperationWatcher slowOperation{"WaitForCreateInstanceResult"}; const auto& result = m_initChannel->GetChannel().ReceiveMessage(&span, m_socketTimeout); + slowOperation.Reset(); if (result.WarningsOffset != 0) { for (const auto& e : wsl::shared::string::Split(wsl::shared::string::FromSpan(span, result.WarningsOffset), '\n')) @@ -373,6 +377,7 @@ void WslCoreInstance::Initialize() // If drive mounting is supported, ensure that DrvFs has been initialized. if (WI_IsFlagSet(m_configuration.Flags, LXSS_DISTRO_FLAGS_ENABLE_DRIVE_MOUNTING)) { + SlowOperationWatcher slowOperation{"WaitForDrvFsInit"}; drvfsMount = m_initializeDrvFs(m_userToken.get()); } @@ -394,8 +399,12 @@ void WslCoreInstance::Initialize() transaction.Send(gsl::span(config)); // Init replies with information about the distribution. + // The watcher is disarmed as soon as the receive returns so its reported duration reflects + // only the wait, not the subsequent interop-server launch. gsl::span span; + SlowOperationWatcher slowOperation{"WaitForInitConfigResponse"}; const auto& response = transaction.Receive(&span); + slowOperation.Reset(); m_defaultUid = response.DefaultUid; m_plan9Port = response.Plan9Port; m_distributionInfo.PidNamespace = response.PidNamespace; diff --git a/src/windows/service/exe/WslCoreVm.cpp b/src/windows/service/exe/WslCoreVm.cpp index 72551f377..907adf151 100644 --- a/src/windows/service/exe/WslCoreVm.cpp +++ b/src/windows/service/exe/WslCoreVm.cpp @@ -321,7 +321,10 @@ void WslCoreVm::Initialize(const GUID& VmId, const wil::shared_handle& UserToken // Create the utility VM and store the runtime ID. std::wstring json = GenerateConfigJson(); - m_system = wsl::windows::common::hcs::CreateComputeSystem(m_machineId.c_str(), json.c_str()); + { + SlowOperationWatcher slowOperation{"HcsCreateSystem"}; + m_system = wsl::windows::common::hcs::CreateComputeSystem(m_machineId.c_str(), json.c_str()); + } m_runtimeId = wsl::windows::common::hcs::GetRuntimeId(m_system.get()); WI_ASSERT(IsEqualGUID(VmId, m_runtimeId)); @@ -346,6 +349,7 @@ void WslCoreVm::Initialize(const GUID& VmId, const wil::shared_handle& UserToken // Start the utility VM. try { + SlowOperationWatcher slowOperation{"HcsStartSystem"}; wsl::windows::common::hcs::StartComputeSystem(m_system.get(), json.c_str()); } catch (...) @@ -436,13 +440,20 @@ void WslCoreVm::Initialize(const GUID& VmId, const wil::shared_handle& UserToken } // Accept a connection from mini_init with a receive timeout so the service does not get stuck waiting for a response from the VM. - m_miniInitChannel = wsl::shared::SocketChannel{AcceptConnection(m_vmConfig.KernelBootTimeout), "mini_init", m_terminatingEvent.get()}; + { + SlowOperationWatcher slowOperation{"WaitForMiniInitConnect"}; + m_miniInitChannel = + wsl::shared::SocketChannel{AcceptConnection(m_vmConfig.KernelBootTimeout), "mini_init", m_terminatingEvent.get()}; + } // Accept the connection from the Linux guest for notifications. m_notifyChannel = AcceptConnection(m_vmConfig.KernelBootTimeout); // Receive and parse the guest kernel version - ReadGuestCapabilities(); + { + SlowOperationWatcher slowOperation{"ReadGuestCapabilities"}; + ReadGuestCapabilities(); + } // Mount the system distro. // N.B. If using SCSI, the system distro is added during VM creation. @@ -551,7 +562,10 @@ void WslCoreVm::Initialize(const GUID& VmId, const wil::shared_handle& UserToken wsl::windows::common::hcs::unique_hcn_network natNetwork; if (m_vmConfig.NetworkingMode == NetworkingMode::Nat) { - natNetwork = wsl::core::NatNetworking::CreateNetwork(m_vmConfig); + { + SlowOperationWatcher slowOperation{"CreateNatNetwork"}; + natNetwork = wsl::core::NatNetworking::CreateNetwork(m_vmConfig); + } if (!natNetwork) { EMIT_USER_WARNING(wsl::shared::Localization::MessageNetworkInitializationFailedFallback2( @@ -1177,7 +1191,9 @@ std::shared_ptr WslCoreVm::CreateInstance( { // Add the VHD to the machine. auto lock = m_lock.lock_exclusive(); + SlowOperationWatcher slowOperation{"AttachDistroVhd"}; const auto lun = AttachDiskLockHeld(Configuration.VhdFilePath.c_str(), DiskType::VHD, MountFlags::None, {}, false, m_userToken.get()); + slowOperation.Reset(); // Launch the init daemon and create the instance. int flags = LxMiniInitMessageFlagNone; @@ -1241,7 +1257,9 @@ std::shared_ptr WslCoreVm::CreateInstanceInternal( WI_ClearFlagIf(localConfig.Flags, LXSS_DISTRO_FLAGS_ENABLE_DRIVE_MOUNTING, !m_vmConfig.EnableHostFileSystemAccess); // Establish a communication channel with the init daemon. + SlowOperationWatcher slowOperation{"WaitForInitDaemonConnect"}; auto initSocket = AcceptConnection(ReceiveTimeout); + slowOperation.Reset(); // If the system distro is enabled, establish a communication channel with its init daemon. wil::unique_socket systemDistroSocket;