From 20cc67a218998ed56295d4f4f32f4c8a89f83251 Mon Sep 17 00:00:00 2001 From: Jan Krivanek Date: Thu, 21 May 2026 07:36:49 +0200 Subject: [PATCH 1/3] Stabilize TaskNodesDieAfterBuild: bump WaitForExit timeout to 15s The TaskHost child process is expected to terminate shortly after the build completes, but on slow CI agents (Helix Linux/macOS in particular) the existing 3-second budget has proven insufficient and produces flaky test failures (~3 distinct branches in the past week). Bump the wait to 15 seconds and include the captured pid + HasExited state in the assertion message so future failures are easier to diagnose. Fixes #43 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs b/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs index a11f0472571..6541e7c451d 100644 --- a/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs +++ b/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs @@ -87,7 +87,14 @@ public void TaskNodesDieAfterBuild(bool taskHostFactorySpecified, bool envVariab try { Process taskHostNode = Process.GetProcessById(pid); - taskHostNode.WaitForExit(3000).ShouldBeTrue("The process with taskHostNode is still running."); + + // The task host should exit shortly after the build completes. Use a generous + // timeout because slow CI agents have been observed to take up to ~10s for the + // child process to drain stdio and exit (issue #43). + const int TaskHostExitTimeoutMs = 15000; + bool exited = taskHostNode.WaitForExit(TaskHostExitTimeoutMs); + exited.ShouldBeTrue( + $"TaskHost (pid={pid}) was still running after {TaskHostExitTimeoutMs}ms. HasExited={taskHostNode.HasExited}"); } // We expect the TaskHostNode to exit quickly. If it exits before Process.GetProcessById, it will throw an ArgumentException. From 0117981d3316a65707ef41dcd77af8823e5f6637 Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Thu, 21 May 2026 08:11:39 +0200 Subject: [PATCH 2/3] Add elapsed-time + process-identity telemetry to TaskNodesDieAfterBuild Reviewer (expert-reviewer on PR #44) noted that a bare WaitForExit timeout bump papers over a PID-reuse race: between build-end and GetProcessById, the OS may have recycled the pid to an unrelated process. We capture ProcessName and StartTime up front so a future failure shows the identity rather than looking like the task host hung. The Stopwatch around WaitForExit is mandated by the flaky-test-survey skill workflow: every timeout bump must log actual elapsed ms so a follow-up PR can tune the timeout back down to a tight-but-safe value. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../BackEnd/TaskHostFactory_Tests.cs | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs b/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs index 6541e7c451d..2df2805e822 100644 --- a/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs +++ b/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs @@ -88,13 +88,30 @@ public void TaskNodesDieAfterBuild(bool taskHostFactorySpecified, bool envVariab { Process taskHostNode = Process.GetProcessById(pid); + // Capture identity up-front so a PID-reuse race (the OS recycled this + // pid to an unrelated process between build-end and GetProcessById) is + // visible in the failure diagnostic rather than looking like the task + // host hung. See jankratochvilcz/msbuild#43. + string capturedName = SafeGetProcessField(() => taskHostNode.ProcessName); + string capturedStart = SafeGetProcessField(() => taskHostNode.StartTime.ToString("O", CultureInfo.InvariantCulture)); + // The task host should exit shortly after the build completes. Use a generous // timeout because slow CI agents have been observed to take up to ~10s for the // child process to drain stdio and exit (issue #43). + // TELEMETRY: elapsedMs is logged so a future iteration can tune this back down + // to a tight-but-safe value. If observed elapsed never approaches the timeout, + // shrink TaskHostExitTimeoutMs in a follow-up PR. const int TaskHostExitTimeoutMs = 15000; + Stopwatch sw = Stopwatch.StartNew(); bool exited = taskHostNode.WaitForExit(TaskHostExitTimeoutMs); + sw.Stop(); + _output.WriteLine( + $"TaskHostFactory wait: pid={pid} processName={capturedName} startTime={capturedStart} " + + $"exited={exited} elapsedMs={sw.ElapsedMilliseconds} timeoutMs={TaskHostExitTimeoutMs}"); + exited.ShouldBeTrue( - $"TaskHost (pid={pid}) was still running after {TaskHostExitTimeoutMs}ms. HasExited={taskHostNode.HasExited}"); + $"TaskHost (pid={pid}, name={capturedName}, started={capturedStart}) was still running after {TaskHostExitTimeoutMs}ms. " + + $"elapsedMs={sw.ElapsedMilliseconds} HasExited={taskHostNode.HasExited}"); } // We expect the TaskHostNode to exit quickly. If it exits before Process.GetProcessById, it will throw an ArgumentException. @@ -155,6 +172,21 @@ public void TaskNodesDieAfterBuild(bool taskHostFactorySpecified, bool envVariab } } + // Some Process fields (ProcessName, StartTime) can throw if the process + // has already exited or access is denied. We capture them best-effort for + // diagnostic output; never let a diagnostic read fail the test. + private static string SafeGetProcessField(Func read) + { + try + { + return read(); + } + catch (Exception ex) + { + return $""; + } + } + /// /// Verifies that transient (TaskHostFactory) and sidecar (AssemblyTaskFactory) task hosts /// can coexist in the same build and operate independently. From 9b0b907963b618af8ca00e7a9b53913726be5037 Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Thu, 21 May 2026 09:00:16 +0200 Subject: [PATCH 3/3] Drop fork-specific issue refs from source comments Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs b/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs index 2df2805e822..a2e9e19e42d 100644 --- a/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs +++ b/src/Build.UnitTests/BackEnd/TaskHostFactory_Tests.cs @@ -91,13 +91,13 @@ public void TaskNodesDieAfterBuild(bool taskHostFactorySpecified, bool envVariab // Capture identity up-front so a PID-reuse race (the OS recycled this // pid to an unrelated process between build-end and GetProcessById) is // visible in the failure diagnostic rather than looking like the task - // host hung. See jankratochvilcz/msbuild#43. + // host hung. string capturedName = SafeGetProcessField(() => taskHostNode.ProcessName); string capturedStart = SafeGetProcessField(() => taskHostNode.StartTime.ToString("O", CultureInfo.InvariantCulture)); // The task host should exit shortly after the build completes. Use a generous // timeout because slow CI agents have been observed to take up to ~10s for the - // child process to drain stdio and exit (issue #43). + // child process to drain stdio and exit. // TELEMETRY: elapsedMs is logged so a future iteration can tune this back down // to a tight-but-safe value. If observed elapsed never approaches the timeout, // shrink TaskHostExitTimeoutMs in a follow-up PR. @@ -109,9 +109,12 @@ public void TaskNodesDieAfterBuild(bool taskHostFactorySpecified, bool envVariab $"TaskHostFactory wait: pid={pid} processName={capturedName} startTime={capturedStart} " + $"exited={exited} elapsedMs={sw.ElapsedMilliseconds} timeoutMs={TaskHostExitTimeoutMs}"); + // Wrap HasExited in SafeGetProcessField — Process.HasExited can throw on + // access-denied / transient handle failures, and the message is evaluated + // eagerly even when the assertion passes. exited.ShouldBeTrue( $"TaskHost (pid={pid}, name={capturedName}, started={capturedStart}) was still running after {TaskHostExitTimeoutMs}ms. " + - $"elapsedMs={sw.ElapsedMilliseconds} HasExited={taskHostNode.HasExited}"); + $"elapsedMs={sw.ElapsedMilliseconds} HasExited={SafeGetProcessField(() => taskHostNode.HasExited.ToString())}"); } // We expect the TaskHostNode to exit quickly. If it exits before Process.GetProcessById, it will throw an ArgumentException.