diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs index fe799684..1faa690c 100644 --- a/PolyPilot.Tests/MultiAgentRegressionTests.cs +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -1987,6 +1987,34 @@ public async Task CancellationToken_PropagatedToWorkerTasks() Assert.True(workerCancelled); } + /// + /// INV-O14: The re-resume loop must NOT skip IsProcessing siblings. Their + /// CopilotSession is tied to the old client (which was disposed), so the event + /// stream is permanently dead. The loop must force-complete them so the orchestrator + /// retries immediately rather than waiting 2–5 min for the watchdog. + /// + [Fact] + public void ReconnectLoop_IsProcessingSiblings_ForceCompletedNotSkipped() + { + var source = File.ReadAllText(Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs")); + + // Find the Task.Run sibling re-resume block + var taskRunIdx = source.IndexOf("Re-resume all OTHER non-codespace sessions"); + Assert.True(taskRunIdx >= 0, "Re-resume loop must exist in SendPromptAsync"); + + // Find the IsProcessing check inside that block + var blockEnd = source.IndexOf("catch (Exception reEx)", taskRunIdx); + Assert.True(blockEnd > taskRunIdx, "Catch block must follow the re-resume loop"); + var loopBlock = source.Substring(taskRunIdx, blockEnd - taskRunIdx); + + // INV-O14: must NOT use bare 'continue' on IsProcessing — this was the bug + Assert.DoesNotContain("if (otherState.Info.IsProcessing) continue;", loopBlock); + + // INV-O14: must call ForceCompleteProcessingAsync for IsProcessing siblings + Assert.Contains("ForceCompleteProcessingAsync", loopBlock); + Assert.Contains("client-recreated-dead-event-stream", loopBlock); + } + #endregion #region PendingOrchestration Persistence Tests diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index a932c5b0..450629bb 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -2650,10 +2650,17 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis if (kvp.Key == sessionName) continue; var otherState = kvp.Value; if (string.IsNullOrEmpty(otherState.Info.SessionId)) continue; - // Skip siblings that are actively processing — re-resuming - // them would orphan mid-turn state and cause TaskCanceledException - // in orchestrator workers. Let their existing watchdog handle recovery. - if (otherState.Info.IsProcessing) continue; + // INV-O14: IsProcessing siblings have dead event streams — + // their CopilotSession was tied to the old client which was + // just disposed. Force-abort so the orchestrator retries + // immediately instead of waiting 2–5 min for the watchdog. + if (otherState.Info.IsProcessing) + { + Debug($"[RECONNECT] Sibling '{kvp.Key}' is IsProcessing with dead event stream — force-completing before re-resume"); + try { await ForceCompleteProcessingAsync(kvp.Key, otherState, "client-recreated-dead-event-stream"); } + catch (Exception forceEx) { Debug($"[RECONNECT] Failed to force-complete sibling '{kvp.Key}': {forceEx.Message}"); } + // Fall through to re-resume the session on the new client + } var otherMeta = sessionSnapshots.FirstOrDefault(m => m.SessionName == kvp.Key); if (otherMeta?.GroupId != null && groupSnapshots.Any(g => g.Id == otherMeta.GroupId && g.IsCodespace))