diff --git a/package.json b/package.json index 30429c4b4..1893c5bf8 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,7 @@ "test:watch": "pnpm --filter @sentry/junior test:watch", "evals": "pnpm --filter @sentry/junior-evals evals", "evals:record": "pnpm --filter @sentry/junior-evals evals:record", - "typecheck": "pnpm --filter @sentry/junior-plugin-api typecheck && pnpm --filter @sentry/junior-scheduler typecheck && pnpm --filter @sentry/junior typecheck && pnpm --filter @sentry/junior-dashboard typecheck && pnpm --filter @sentry/junior-testing typecheck && pnpm --filter @sentry/junior-example typecheck", + "typecheck": "pnpm --filter @sentry/junior-plugin-api typecheck && pnpm --filter @sentry/junior-scheduler typecheck && pnpm --filter @sentry/junior typecheck && pnpm --filter @sentry/junior-evals typecheck && pnpm --filter @sentry/junior-dashboard typecheck && pnpm --filter @sentry/junior-testing typecheck && pnpm --filter @sentry/junior-example typecheck", "skills:check": "pnpm --filter @sentry/junior skills:check", "test:ci": "pnpm --filter @sentry/junior build && pnpm --filter @sentry/junior-dashboard build && pnpm --filter @sentry/junior test:coverage && pnpm --filter @sentry/junior-dashboard test:coverage" }, diff --git a/packages/junior-evals/README.md b/packages/junior-evals/README.md index f4dd13a49..c30b6a65a 100644 --- a/packages/junior-evals/README.md +++ b/packages/junior-evals/README.md @@ -23,7 +23,7 @@ Quick mapping: - `evals/*`: Integration-style coverage for conversation-level agent behavior and quality scoring through the runtime harness. - `tests/unit/*` (or non-integration tests): isolated logic/invariant tests. -This separation is enforced by `pnpm --filter @sentry/junior run test:slack-boundary`. +This separation is enforced by `pnpm --filter @sentry/junior run test:boundaries`. ## What Is In Scope @@ -59,22 +59,28 @@ For each `it()` case inside a `describeEval()` suite: 2. Create a fresh runtime instance for the case via the chat composition root; do not mutate the production singleton runtime. 3. Route message events through real ingress + queue-worker behavior, with only the external queue transport replaced by an in-memory harness shim. 4. Return observed artifacts as JSON for LLM judgment, including structured `assistant_posts` with text plus actual attached-file metadata, and Slack-visible metadata. + The output also includes compact `turn_diagnostics` so evals can assert user-facing runtime metadata such as selected thinking level without scraping logs. The helper pretty-prints this JSON so failure output stays readable in local runs and CI. 5. `vitest-evals` scores the output against `criteria` (A–E → 1.0–0.0). Harness override knobs (in `EvalOverrides`): -- `auto_complete_mcp_oauth`: after our app genuinely starts an MCP OAuth flow for the listed providers, the harness immediately completes the fake provider callback. -- `auto_complete_oauth`: after our app genuinely starts a generic OAuth flow for the listed providers, the harness immediately completes the fake provider callback. -- `credential_providers`: seed normal provider credentials for the listed providers. GitHub uses dummy GitHub App env vars plus an intercepted installation-token exchange; Sentry uses the normal OAuth token store. -- `fail_reply_call`: force a non-retryable reply failure on a specific call. -- `mock_image_generation`: stub the image-generation HTTP response with a valid image payload while still exercising the real attachment path. -- `plugin_dirs`: load plugin fixtures from eval-local directories without adding workspace packages. -- `reply_texts`: override returned reply text per call. -- `reply_timeout_ms`: lower or set the per-reply harness timeout for a specific scenario. It cannot exceed 30 seconds. -- `subscribed_decisions`: controls the subscribed-message reply gate in the harness. If you use it, do not claim that reply-selection behavior is being validated by the eval itself. - -These knobs work by overriding services on the eval-local runtime instance. They must not reintroduce mutable global runtime behavior seams. +- `auth.autoCompleteMcpOAuth`: after our app genuinely starts an MCP OAuth flow for the listed providers, the harness immediately completes the fake provider callback. +- `auth.autoCompleteOAuth`: after our app genuinely starts a generic OAuth flow for the listed providers, the harness immediately completes the fake provider callback. +- `auth.credentialProviders`: seed normal provider credentials for the listed providers. GitHub uses dummy GitHub App env vars plus an intercepted installation-token exchange; Sentry uses the normal OAuth token store. +- `plugins.pluginDirs`: load plugin fixtures from eval-local directories without adding workspace packages. +- `plugins.pluginPackages`: load named workspace plugin packages for plugin-specific behavior evals. +- `plugins.skillDirs`: load skill fixture directories into the real reply-generation path. +- `replyGeneration.cannedResults`: return structured reply results for downstream delivery or resilience scenarios. +- `replyGeneration.cannedTexts`: return reply text per successful call for downstream delivery scenarios. +- `replyGeneration.failCall`: force a non-retryable reply failure on a specific call. +- `replyGeneration.mockImageGeneration`: stub the image-generation HTTP response with a valid image payload while still exercising the real attachment path. +- `replyGeneration.timeoutMs`: lower or set the per-reply harness timeout for a specific scenario. It cannot exceed 30 seconds. +- `replyGeneration.unsetGatewayCredentials`: remove gateway credentials for the duration of real reply generation when the scenario explicitly covers missing credential behavior. +- `subscribedReplyDecisions`: controls the subscribed-message reply gate in the harness. If you use it, do not claim that reply-selection behavior is being validated by the eval itself. + +These knobs configure role-named scenario adapters on the eval-local runtime instance. They must not reintroduce mutable global runtime behavior seams or nested production service override bags. +`replyGeneration.cannedTexts` and `replyGeneration.cannedResults` bypass real reply generation, so use them only for downstream delivery behavior, not prompt, model-routing, or thinking-level coverage. Tool replay: @@ -106,7 +112,7 @@ Evals require real Vercel Sandbox access. If sandbox bootstrap fails, the eval f - Add core cases under `evals/core/*.eval.ts` and plugin-specific cases under `evals//` using `describeEval()` with `slackEvals`. - Use event builders (`mention`, `threadMessage`, `threadStart`) from `evals/helpers.ts`. -- Use `auto_complete_mcp_oauth` or `auto_complete_oauth` when the harness should instantly complete the fake provider callback after our app has genuinely initiated auth. +- Use `auth.autoCompleteMcpOAuth` or `auth.autoCompleteOAuth` when the harness should instantly complete the fake provider callback after our app has genuinely initiated auth. - For multi-turn, pass the same `thread` override so events land in one thread. - Keep each case focused on one primary behavior. - Encode all expectations in `criteria`; do not add deterministic inline assertions. @@ -127,6 +133,7 @@ Do not do these in eval files: - Do not import `@/chat/slack/*` directly. - Do not use MSW Slack helpers (`queueSlackApiResponse`, `getCapturedSlackApiCalls`, `queueSlackApiError`, `queueSlackRateLimit`). +- Do not import raw Slack capture wrappers. Use eval artifact helpers that expose Slack-visible posts, reactions, canvases, or files instead. - Do not validate raw Slack Web API request payload shapes from evals. - Do not validate implementation internals (exact tool names, sandbox IDs, or other non-user-visible details) unless the scenario explicitly evaluates those surfaces. diff --git a/packages/junior-evals/evals/behavior-harness.ts b/packages/junior-evals/evals/behavior-harness.ts index 4d8263396..f16515eca 100644 --- a/packages/junior-evals/evals/behavior-harness.ts +++ b/packages/junior-evals/evals/behavior-harness.ts @@ -17,7 +17,7 @@ import { } from "@junior-tests/fixtures/plugin-app"; import { createSlackRuntime } from "@/chat/app/factory"; import type { AssistantLifecycleEvent } from "@/chat/runtime/slack-runtime"; -import type { JuniorRuntimeServiceOverrides } from "@/chat/app/services"; +import type { JuniorRuntimeScenarioAdapters } from "@/chat/app/services"; import { createUserTokenStore } from "@/chat/capabilities/factory"; import type { EmittedLogRecord } from "@/chat/logging"; import { @@ -39,6 +39,10 @@ import { generateAssistantReply } from "@/chat/respond"; import { schedulerPlugin } from "@sentry/junior-scheduler"; import { getStateAdapter } from "@/chat/state/adapter"; import { resetSkillDiscoveryCache } from "@/chat/skills"; +import type { + AgentTurnDiagnostics, + AssistantReply, +} from "@/chat/services/turn-result"; import { createWebFetchTool } from "@/chat/tools/web/fetch-tool"; import { createWebSearchTool } from "@/chat/tools/web/search"; import type { @@ -50,7 +54,7 @@ import { FakeSlackAdapter, createTestThread, type TestThread, -} from "@junior-tests/fixtures/slack-harness"; +} from "@junior-tests/fixtures/slack/harness"; import { EVAL_OAUTH_CODE, EVAL_OAUTH_PROVIDER, @@ -59,12 +63,12 @@ import { EVAL_MCP_AUTH_CODE, EVAL_MCP_AUTH_PROVIDER, } from "@junior-tests/msw/handlers/eval-mcp-auth"; -import { runMcpOauthCallbackRoute } from "@junior-tests/fixtures/mcp-oauth-callback-harness"; -import { runOauthCallbackRoute } from "@junior-tests/fixtures/oauth-callback-harness"; +import { runMcpOauthCallbackRoute } from "@junior-tests/fixtures/mcp/oauth-callback-harness"; +import { runOauthCallbackRoute } from "@junior-tests/fixtures/oauth/callback-harness"; import { - readCapturedSlackApiCalls, - type CapturedSlackApiCall, -} from "@junior-tests/msw/captured-slack-api-calls"; + collectEvalSlackArtifacts, + findLatestOAuthStateFromEvalSlackArtifacts, +} from "@junior-tests/fixtures/slack/eval-artifacts"; import { createSlackDestination } from "@/chat/destination"; import { ALL as sandboxEgressProxyALL } from "@/handlers/sandbox-egress-proxy"; import { createMockImageGenerateDeps } from "./fixtures/image-generate"; @@ -81,6 +85,7 @@ interface EvalEventThreadFixture { } interface EvalEventMessageFixture { + attachments?: Message["attachments"]; author?: { full_name?: string; is_bot?: boolean; @@ -142,20 +147,32 @@ interface EvalReplyResultFixture { used_primary_text?: boolean; } +interface EvalAuthOverrides { + autoCompleteMcpOAuth?: string[]; + autoCompleteOAuth?: string[]; + credentialProviders?: Array<"github" | "sentry">; +} + +interface EvalPluginOverrides { + pluginDirs?: string[]; + pluginPackages?: string[]; + skillDirs?: string[]; +} + +interface EvalReplyGenerationFixture { + cannedResults?: EvalReplyResultFixture[]; + cannedTexts?: string[]; + failCall?: number; + mockImageGeneration?: boolean; + timeoutMs?: number; + unsetGatewayCredentials?: boolean; +} + export interface EvalOverrides { - auto_complete_mcp_oauth?: string[]; - auto_complete_oauth?: string[]; - credential_providers?: Array<"github" | "sentry">; - fail_reply_call?: number; - mock_image_generation?: boolean; - plugin_dirs?: string[]; - plugin_packages?: string[]; - reply_results?: EvalReplyResultFixture[]; - reply_timeout_ms?: number; - reply_texts?: string[]; - skill_dirs?: string[]; - subscribed_decisions?: SubscribedDecisionFixture[]; - unset_gateway_api_key?: boolean; + auth?: EvalAuthOverrides; + plugins?: EvalPluginOverrides; + replyGeneration?: EvalReplyGenerationFixture; + subscribedReplyDecisions?: SubscribedDecisionFixture[]; } export interface EvalScenario { @@ -182,6 +199,7 @@ export interface EvalResult { timestamp: string; }>; slackAdapter: FakeSlackAdapter; + turnDiagnostics: EvalTurnDiagnostics[]; toolInvocations: EvalToolInvocation[]; } @@ -204,6 +222,13 @@ export interface EvalCanvasArtifact { title: string; } +export interface EvalTurnDiagnostics { + modelId: string; + outcome: AgentTurnDiagnostics["outcome"]; + thinkingLevel?: AgentTurnDiagnostics["thinkingLevel"]; + toolCalls: string[]; +} + export interface EvalToolInvocation { arguments?: Record; tool: string; @@ -233,6 +258,7 @@ interface QueueDelivery { } interface RuntimeObservations { + turnDiagnostics: EvalTurnDiagnostics[]; toolInvocations: EvalToolInvocation[]; } @@ -383,6 +409,19 @@ function toEvalToolInvocation(input: { return invocation; } +function toEvalTurnDiagnostics( + diagnostics: AgentTurnDiagnostics, +): EvalTurnDiagnostics { + return { + modelId: diagnostics.modelId, + outcome: diagnostics.outcome, + ...(diagnostics.thinkingLevel + ? { thinkingLevel: diagnostics.thinkingLevel } + : {}), + toolCalls: diagnostics.toolCalls, + }; +} + // --------------------------------------------------------------------------- // Internal constants and small helpers // --------------------------------------------------------------------------- @@ -401,20 +440,6 @@ function resolveEvalRelativePath(entry: string): string { : path.resolve(EVAL_PACKAGE_ROOT, entry); } -function toFirstString(value: unknown): string | undefined { - if (typeof value === "string") { - const trimmed = value.trim(); - return trimmed.length > 0 ? trimmed : undefined; - } - if (Array.isArray(value)) { - for (const entry of value) { - const resolved = toFirstString(entry); - if (resolved) return resolved; - } - } - return undefined; -} - function buildRuntimeThreadId(fixture: EvalEventThreadFixture): string { if (fixture.channel_id && fixture.thread_ts) { return `slack:${fixture.channel_id}:${fixture.thread_ts}`; @@ -450,6 +475,7 @@ const HARNESS_ENV_KEYS = [ ] as const; const DEFAULT_EVAL_BASE_URL = "https://junior.example.com"; const SENTRY_EVAL_SCOPE = "event:read org:read project:read team:read"; +const EVAL_TOKEN_EXPIRES_AT_MS = Date.parse("2099-01-01T00:00:00.000Z"); const DUMMY_GITHUB_APP_PRIVATE_KEY = generateKeyPairSync("rsa", { modulusLength: 2048, }) @@ -498,8 +524,8 @@ function isSandboxReachableBaseUrl(value: string): boolean { function scenarioNeedsEvalEgress(scenario: EvalScenario): boolean { return Boolean( - scenario.overrides?.credential_providers?.length || - scenario.overrides?.auto_complete_oauth?.length, + scenario.overrides?.auth?.credentialProviders?.length || + scenario.overrides?.auth?.autoCompleteOAuth?.length, ); } @@ -790,14 +816,6 @@ function createEvalThread(args: { return thread; } -function buildReactionKey(input: { - channel: string; - emoji: string; - timestamp: string; -}): string { - return `${input.channel}:${input.timestamp}:${input.emoji}`; -} - function toEvalFiles(value: unknown): EvalAttachedFile[] { if (!value || typeof value !== "object") { return []; @@ -842,89 +860,6 @@ function toEvalFiles(value: unknown): EvalAttachedFile[] { }); } -export function collectSlackArtifactsFromCapturedCalls( - calls: CapturedSlackApiCall[], -): Pick { - const canvases: EvalResult["canvases"] = []; - const channelPosts: EvalResult["channelPosts"] = []; - const reactions = new Map(); - - for (const call of calls) { - if (call.method === "canvases.create") { - const title = toFirstString(call.params.title) ?? ""; - const documentContent = - call.params.document_content && - typeof call.params.document_content === "object" - ? (call.params.document_content as Record) - : undefined; - const markdown = documentContent - ? (toFirstString(documentContent.markdown) ?? "") - : ""; - if (!title && markdown.length === 0) { - continue; - } - canvases.push({ - title, - markdown, - }); - continue; - } - - if (call.method === "chat.postMessage") { - const channel = toFirstString(call.params.channel); - const text = toFirstString(call.params.text); - if (!channel || text === undefined) { - continue; - } - const threadTs = toFirstString(call.params.thread_ts); - channelPosts.push({ - channel, - text, - ...(threadTs ? { thread_ts: threadTs } : {}), - }); - continue; - } - - if (call.method === "reactions.add") { - const channel = toFirstString(call.params.channel); - const emoji = toFirstString(call.params.name); - const timestamp = toFirstString(call.params.timestamp); - if (!channel || !emoji || !timestamp) { - continue; - } - const reaction = { - channel, - emoji, - timestamp, - }; - reactions.set(buildReactionKey(reaction), reaction); - continue; - } - - if (call.method === "reactions.remove") { - const channel = toFirstString(call.params.channel); - const emoji = toFirstString(call.params.name); - const timestamp = toFirstString(call.params.timestamp); - if (!channel || !emoji || !timestamp) { - continue; - } - reactions.delete( - buildReactionKey({ - channel, - emoji, - timestamp, - }), - ); - } - } - - return { - canvases, - channelPosts, - reactions: [...reactions.values()], - }; -} - function toEvalAssistantPost(value: unknown): EvalAssistantPost { if (typeof value === "string") { return { @@ -961,7 +896,7 @@ function toIncomingMessage(event: MentionEvent | SubscribedMessageEvent) { id: event.message.id ?? "", text: event.message.text ?? "", isMention: event.message.is_mention, - attachments: [], + attachments: event.message.attachments ?? [], metadata: { dateSent: new Date(), edited: false }, channelId: event.thread.channel_id, threadId: runtimeThreadId, @@ -1064,7 +999,7 @@ async function seedCredentialProviderTokens(input: { await userTokenStore.set(userId, "sentry", { accessToken: "eval-sentry-access-token", refreshToken: "eval-sentry-refresh-token", - expiresAt: Date.now() + 60 * 60 * 1000, + expiresAt: EVAL_TOKEN_EXPIRES_AT_MS, scope: SENTRY_EVAL_SCOPE, }); } @@ -1085,55 +1020,6 @@ function getDefaultAuthCode( ); } -function extractSlackLinkUrl(text: string): URL | undefined { - const match = text.match(/<([^|>]+)\|/); - if (!match?.[1]) { - return undefined; - } - try { - return new URL(match[1]); - } catch { - return undefined; - } -} - -function findLatestOAuthStateFromSlackCalls(args: { - authorizeEndpoint: string; - consumedStates: Set; -}): string | undefined { - const expectedUrl = new URL(args.authorizeEndpoint); - const calls = readCapturedSlackApiCalls(); - - for (let index = calls.length - 1; index >= 0; index -= 1) { - const call = calls[index]; - if ( - call.method !== "chat.postEphemeral" && - call.method !== "chat.postMessage" - ) { - continue; - } - const text = toFirstString(call.params.text); - if (!text) { - continue; - } - const authLink = extractSlackLinkUrl(text); - if (!authLink) { - continue; - } - if ( - authLink.origin !== expectedUrl.origin || - authLink.pathname !== expectedUrl.pathname - ) { - continue; - } - const state = authLink.searchParams.get("state")?.trim(); - if (state && !args.consumedStates.has(state)) { - return state; - } - } - return undefined; -} - async function autoCompleteMcpOauth(args: { provider: string; requesterUserId: string; @@ -1172,7 +1058,7 @@ async function autoCompleteOauth(args: { throw new Error(`Unknown OAuth provider "${provider}" in eval harness`); } - const state = findLatestOAuthStateFromSlackCalls({ + const state = findLatestOAuthStateFromEvalSlackArtifacts({ authorizeEndpoint: providerConfig.authorizeEndpoint, consumedStates: args.consumedStates, }); @@ -1223,17 +1109,20 @@ async function setupHarnessEnvironment( try { const configuredSkillDirs = - scenario.overrides?.skill_dirs?.map(resolveEvalRelativePath) ?? []; + scenario.overrides?.plugins?.skillDirs?.map(resolveEvalRelativePath) ?? + []; const configuredPluginDirs = - scenario.overrides?.plugin_dirs?.map(resolveEvalRelativePath) ?? []; + scenario.overrides?.plugins?.pluginDirs?.map(resolveEvalRelativePath) ?? + []; const autoCompleteMcpOauthProviders = new Set( - scenario.overrides?.auto_complete_mcp_oauth?.map((p) => p.trim()) ?? [], + scenario.overrides?.auth?.autoCompleteMcpOAuth?.map((p) => p.trim()) ?? + [], ); const autoCompleteOauthProviders = new Set( - scenario.overrides?.auto_complete_oauth?.map((p) => p.trim()) ?? [], + scenario.overrides?.auth?.autoCompleteOAuth?.map((p) => p.trim()) ?? [], ); const credentialProviders = new Set( - scenario.overrides?.credential_providers ?? [], + scenario.overrides?.auth?.credentialProviders ?? [], ); const authRequesterUsers = new Set( scenario.events.flatMap((event) => @@ -1256,12 +1145,12 @@ async function setupHarnessEnvironment( configuredPluginDirs.length > 0 ? await createPluginAppFixture(configuredPluginDirs, { linkNodeModules: Boolean( - scenario.overrides?.plugin_packages?.length, + scenario.overrides?.plugins?.pluginPackages?.length, ), }) : undefined; setPluginCatalogConfig({ - packages: scenario.overrides?.plugin_packages ?? [], + packages: scenario.overrides?.plugins?.pluginPackages ?? [], }); const stateAdapter = getStateAdapter(); @@ -1335,14 +1224,15 @@ function buildRuntimeServices( env: HarnessEnvironment, threadRecordsById: Map, observations: RuntimeObservations, -): JuniorRuntimeServiceOverrides { - const replyResults = scenario.overrides?.reply_results ?? []; - const replyTexts = scenario.overrides?.reply_texts ?? []; - const subscribedDecisions = scenario.overrides?.subscribed_decisions ?? []; +): JuniorRuntimeScenarioAdapters { + const replyResults = scenario.overrides?.replyGeneration?.cannedResults ?? []; + const replyTexts = scenario.overrides?.replyGeneration?.cannedTexts ?? []; + const subscribedDecisions = + scenario.overrides?.subscribedReplyDecisions ?? []; const replyTimeoutMs = - scenario.overrides?.reply_timeout_ms && - scenario.overrides.reply_timeout_ms > 0 - ? scenario.overrides.reply_timeout_ms + scenario.overrides?.replyGeneration?.timeoutMs && + scenario.overrides.replyGeneration.timeoutMs > 0 + ? scenario.overrides.replyGeneration.timeoutMs : Number.parseInt( process.env.EVAL_AGENT_REPLY_TIMEOUT_MS ?? (scenarioNeedsEvalEgress(scenario) ? "60000" : "30000"), @@ -1352,177 +1242,178 @@ function buildRuntimeServices( let decisionIndex = 0; const replyState = { successfulCount: 0 }; - const services: JuniorRuntimeServiceOverrides = { + const adapters: JuniorRuntimeScenarioAdapters = { ...(subscribedDecisions.length > 0 ? { - subscribedReplyPolicy: { - // The mock bypasses the generic Zod-typed `completeObject` signature - // since we return a fixed fixture rather than parsing a schema. - completeObject: async () => { - const next = - subscribedDecisions[ - Math.min(decisionIndex, subscribedDecisions.length - 1) - ]; - decisionIndex += 1; - return { - object: { - should_reply: next.should_reply, - confidence: next.should_reply ? 1 : 0, - reason: next.reason, - }, - text: JSON.stringify({ - should_reply: next.should_reply, - confidence: next.should_reply ? 1 : 0, - reason: next.reason, - }), - } as any; - }, + classifySubscribedReply: async (params) => { + const next = + subscribedDecisions[ + Math.min(decisionIndex, subscribedDecisions.length - 1) + ]; + decisionIndex += 1; + const parsed = params.schema.parse({ + should_reply: next.should_reply, + confidence: next.should_reply ? 1 : 0, + reason: next.reason, + }); + return { + object: parsed, + text: JSON.stringify(parsed), + }; }, } : {}), - replyExecutor: { - generateAssistantReply: async (text, context) => { - replyCallCount += 1; - const mockImageGeneration = scenario.overrides?.mock_image_generation; - if (scenario.overrides?.fail_reply_call === replyCallCount) { - throw new Error(`forced reply failure on call ${replyCallCount}`); - } - const replyResult = replyResults[replyCallCount - 1]; - if (replyResult) { - if (replyResult.stream_text) { - await context?.onTextDelta?.(replyResult.stream_text); - } - replyState.successfulCount += 1; - observations.toolInvocations.push( - ...(replyResult.tool_invocations ?? - (replyResult.tool_calls ?? []).map((tool) => ({ tool }))), - ); - return { - text: replyResult.text, - deliveryMode: "thread", - deliveryPlan: { - mode: "thread", - postThreadText: true, - attachFiles: "none", - }, - diagnostics: { - assistantMessageCount: replyResult.assistant_message_count ?? 1, - ...(replyResult.error_message - ? { errorMessage: replyResult.error_message } - : {}), - modelId: "eval-reply-result", - outcome: replyResult.outcome ?? "success", - ...(replyResult.stop_reason - ? { stopReason: replyResult.stop_reason } - : {}), - toolCalls: replyResult.tool_calls ?? [], - toolErrorCount: replyResult.tool_error_count ?? 0, - toolResultCount: replyResult.tool_result_count ?? 0, - usedPrimaryText: replyResult.used_primary_text ?? true, - }, - }; - } - const replyText = replyTexts[replyState.successfulCount]; - if (typeof replyText === "string") { - replyState.successfulCount += 1; - return { - text: replyText, - deliveryMode: "thread", - deliveryPlan: { - mode: "thread", - postThreadText: true, - attachFiles: "none", - }, - diagnostics: { - assistantMessageCount: 1, - modelId: "eval-reply-text", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; + generateAssistantReply: async (text, context) => { + replyCallCount += 1; + const mockImageGeneration = + scenario.overrides?.replyGeneration?.mockImageGeneration; + if (scenario.overrides?.replyGeneration?.failCall === replyCallCount) { + throw new Error(`forced reply failure on call ${replyCallCount}`); + } + const replyResult = replyResults[replyCallCount - 1]; + if (replyResult) { + if (replyResult.stream_text) { + await context?.onTextDelta?.(replyResult.stream_text); } - - const gatewaySnapshot = snapshotEnv([ - "AI_GATEWAY_API_KEY", - "VERCEL_OIDC_TOKEN", - ]); - const baseToolOverrides: ToolHooks["toolOverrides"] = { - ...(context?.toolOverrides ?? {}), + replyState.successfulCount += 1; + observations.toolInvocations.push( + ...(replyResult.tool_invocations ?? + (replyResult.tool_calls ?? []).map((tool) => ({ tool }))), + ); + const reply: AssistantReply = { + text: replyResult.text, + deliveryMode: "thread", + deliveryPlan: { + mode: "thread", + postThreadText: true, + attachFiles: "none", + }, + diagnostics: { + assistantMessageCount: replyResult.assistant_message_count ?? 1, + ...(replyResult.error_message + ? { errorMessage: replyResult.error_message } + : {}), + modelId: "eval-reply-result", + outcome: replyResult.outcome ?? "success", + ...(replyResult.stop_reason + ? { stopReason: replyResult.stop_reason } + : {}), + toolCalls: replyResult.tool_calls ?? [], + toolErrorCount: replyResult.tool_error_count ?? 0, + toolResultCount: replyResult.tool_result_count ?? 0, + usedPrimaryText: replyResult.used_primary_text ?? true, + }, }; - const toolOverrides = { - ...baseToolOverrides, - webFetch: createReplayWebFetchDeps(baseToolOverrides), - webSearch: createReplayWebSearchDeps(baseToolOverrides), - ...(mockImageGeneration - ? { imageGenerate: createMockImageGenerateDeps() } - : {}), + observations.turnDiagnostics.push( + toEvalTurnDiagnostics(reply.diagnostics), + ); + return reply; + } + const replyText = replyTexts[replyState.successfulCount]; + if (typeof replyText === "string") { + replyState.successfulCount += 1; + const reply: AssistantReply = { + text: replyText, + deliveryMode: "thread", + deliveryPlan: { + mode: "thread", + postThreadText: true, + attachFiles: "none", + }, + diagnostics: { + assistantMessageCount: 1, + modelId: "eval-reply-text", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + }, }; - if (scenario.overrides?.unset_gateway_api_key) { - delete process.env.AI_GATEWAY_API_KEY; - delete process.env.VERCEL_OIDC_TOKEN; - } - let reply: Awaited>; - try { - reply = await Promise.race([ - generateAssistantReply(text, { - ...context, - onToolInvocation: (invocation) => { - observations.toolInvocations.push( - toEvalToolInvocation(invocation), - ); - }, - ...(env.configuredSkillDirs.length > 0 - ? { skillDirs: env.configuredSkillDirs } - : {}), - toolOverrides, - }), - new Promise((_, reject) => - setTimeout( - () => - reject( - new Error( - `generateAssistantReply timed out after ${replyTimeoutMs}ms`, - ), + observations.turnDiagnostics.push( + toEvalTurnDiagnostics(reply.diagnostics), + ); + return reply; + } + + const gatewaySnapshot = snapshotEnv([ + "AI_GATEWAY_API_KEY", + "VERCEL_OIDC_TOKEN", + ]); + const baseToolOverrides: ToolHooks["toolOverrides"] = { + ...(context?.toolOverrides ?? {}), + }; + const toolOverrides = { + ...baseToolOverrides, + webFetch: createReplayWebFetchDeps(baseToolOverrides), + webSearch: createReplayWebSearchDeps(baseToolOverrides), + ...(mockImageGeneration + ? { imageGenerate: createMockImageGenerateDeps() } + : {}), + }; + if (scenario.overrides?.replyGeneration?.unsetGatewayCredentials) { + delete process.env.AI_GATEWAY_API_KEY; + delete process.env.VERCEL_OIDC_TOKEN; + } + let reply: Awaited>; + try { + reply = await Promise.race([ + generateAssistantReply(text, { + ...context, + onToolInvocation: (invocation) => { + observations.toolInvocations.push( + toEvalToolInvocation(invocation), + ); + }, + ...(env.configuredSkillDirs.length > 0 + ? { skillDirs: env.configuredSkillDirs } + : {}), + toolOverrides, + }), + new Promise((_, reject) => + setTimeout( + () => + reject( + new Error( + `generateAssistantReply timed out after ${replyTimeoutMs}ms`, ), - replyTimeoutMs, - ), + ), + replyTimeoutMs, ), - ]); - } finally { - if (scenario.overrides?.unset_gateway_api_key) { - gatewaySnapshot.restore(); - } + ), + ]); + } finally { + if (scenario.overrides?.replyGeneration?.unsetGatewayCredentials) { + gatewaySnapshot.restore(); } + } - replyState.successfulCount += 1; - return reply; - }, + replyState.successfulCount += 1; + observations.turnDiagnostics.push( + toEvalTurnDiagnostics(reply.diagnostics), + ); + return reply; }, - visionContext: { - listThreadReplies: async ({ channelId, threadTs, targetMessageTs }) => { - const threadId = buildRuntimeThreadId({ - id: `slack:${channelId}:${threadTs}`, - channel_id: channelId, - thread_ts: threadTs, - }); - const replies = (threadRecordsById.get(threadId)?.transcript ?? []).map( - (message) => buildThreadReplyFromMessage(threadTs, message), - ); - if (!targetMessageTs || targetMessageTs.length === 0) { - return replies; - } - const targets = new Set(targetMessageTs); - return replies.filter( - (reply) => typeof reply.ts === "string" && targets.has(reply.ts), - ); - }, + listThreadReplies: async ({ channelId, threadTs, targetMessageTs }) => { + const threadId = buildRuntimeThreadId({ + id: `slack:${channelId}:${threadTs}`, + channel_id: channelId, + thread_ts: threadTs, + }); + const replies = (threadRecordsById.get(threadId)?.transcript ?? []).map( + (message) => buildThreadReplyFromMessage(threadTs, message), + ); + if (!targetMessageTs || targetMessageTs.length === 0) { + return replies; + } + const targets = new Set(targetMessageTs); + return replies.filter( + (reply) => typeof reply.ts === "string" && targets.has(reply.ts), + ); }, }; - return services; + return adapters; } // --------------------------------------------------------------------------- @@ -1649,8 +1540,7 @@ function collectResults( .filter((record) => record.thread.threadTs) .map((record) => `${record.thread.channelId}:${record.thread.threadTs}`), ); - const { canvases, channelPosts, reactions } = - collectSlackArtifactsFromCapturedCalls(readCapturedSlackApiCalls()); + const { canvases, channelPosts, reactions } = collectEvalSlackArtifacts(); const threadPosts = [...threadRecordsById.values()].flatMap((record) => record.thread.posts.map((post) => ({ ...toEvalAssistantPost(post), @@ -1680,6 +1570,7 @@ function collectResults( reactions, posts: [...threadPosts, ...callbackThreadPosts], slackAdapter, + turnDiagnostics: observations.turnDiagnostics, toolInvocations: observations.toolInvocations, }; } @@ -1707,6 +1598,7 @@ export async function runEvalScenario( const threadRecordsById = new Map(); const readyQueueDeliveries: QueueDelivery[] = []; const observations: RuntimeObservations = { + turnDiagnostics: [], toolInvocations: [], }; const channelStateById = new Map< @@ -1744,7 +1636,7 @@ export async function runEvalScenario( return record; }; - const services = buildRuntimeServices( + const adapters = buildRuntimeServices( scenario, env, threadRecordsById, @@ -1753,7 +1645,7 @@ export async function runEvalScenario( const slackRuntime = createSlackRuntime({ getSlackAdapter: () => slackAdapter as any, - services, + adapters, }); await processEvents({ @@ -1778,7 +1670,7 @@ export async function runEvalScenario( } } -// Compile-time guards for Thread and Message fakes are in tests/fixtures/slack-harness.ts. +// Compile-time guards for Thread and Message fakes are in tests/fixtures/slack/harness.ts. // The toIncomingMessage function below still needs a local check since it maps from eval-specific fixtures. type AssertAssignable<_TSub extends TSuper, TSuper> = true; type _MessageCheck = AssertAssignable< diff --git a/packages/junior-evals/evals/core/coding-file-tools.eval.ts b/packages/junior-evals/evals/core/coding-file-tools.eval.ts index 8590f6855..cffc009c5 100644 --- a/packages/junior-evals/evals/core/coding-file-tools.eval.ts +++ b/packages/junior-evals/evals/core/coding-file-tools.eval.ts @@ -2,7 +2,9 @@ import { describeEval } from "vitest-evals"; import { mention, rubric, slackEvals } from "../helpers"; const codingFixtureOverrides = { - skill_dirs: ["evals/fixtures/coding-skills"], + plugins: { + skillDirs: ["evals/fixtures/coding-skills"], + }, }; describeEval("Coding File Tools", slackEvals, (it) => { @@ -21,6 +23,7 @@ describeEval("Coding File Tools", slackEvals, (it) => { "A small source edit in the sandbox fixture updates the requested value and reports the changed file.", pass: [ "The final reply identifies the changed config file and says the default retry count is now 3.", + "turn_diagnostics shows the turn used xhigh thinking.", ], fail: [ "Do not answer with only a plan or promise to edit later.", diff --git a/packages/junior-evals/evals/core/lifecycle-and-resilience.eval.ts b/packages/junior-evals/evals/core/lifecycle-and-resilience.eval.ts index f30a9b563..cafe758a9 100644 --- a/packages/junior-evals/evals/core/lifecycle-and-resilience.eval.ts +++ b/packages/junior-evals/evals/core/lifecycle-and-resilience.eval.ts @@ -23,7 +23,9 @@ describeEval("Lifecycle and Resilience", slackEvals, (it) => { run, }) => { await run({ - overrides: { fail_reply_call: 1 }, + overrides: { + replyGeneration: { failCall: 1 }, + }, events: [mention("What's the status of the deploy?")], criteria: rubric({ contract: @@ -44,13 +46,15 @@ describeEval("Lifecycle and Resilience", slackEvals, (it) => { }) => { await run({ overrides: { - reply_results: [ - { - stream_text: "Budget is still on track for Friday.", - text: "Budget is still on track for Friday.", - outcome: "provider_error", - }, - ], + replyGeneration: { + cannedResults: [ + { + stream_text: "Budget is still on track for Friday.", + text: "Budget is still on track for Friday.", + outcome: "provider_error", + }, + ], + }, }, events: [mention("Quick budget update?")], criteria: rubric({ diff --git a/packages/junior-evals/evals/core/media-and-attachments.eval.ts b/packages/junior-evals/evals/core/media-and-attachments.eval.ts index 27cb17180..c9b581317 100644 --- a/packages/junior-evals/evals/core/media-and-attachments.eval.ts +++ b/packages/junior-evals/evals/core/media-and-attachments.eval.ts @@ -1,12 +1,53 @@ import { describeEval } from "vitest-evals"; +import type { Message } from "chat"; import { mention, rubric, slackEvals } from "../helpers"; describeEval("Media and Attachments", slackEvals, (it) => { + it("when image analysis is unavailable, acknowledge the image without inventing contents", async ({ + run, + }) => { + await run({ + events: [ + mention("<@U_APP> what does this screenshot show?", { + attachments: [ + { + type: "image", + mimeType: "image/png", + name: "dashboard-screenshot.png", + url: "https://files.slack.com/private/dashboard-screenshot.png", + fetchData: async () => Buffer.from("not-real-image-bytes"), + }, + ] as Message["attachments"], + }), + ], + requireSandboxReady: false, + criteria: rubric({ + contract: + "When Slack includes an image but this runtime cannot analyze images, the assistant is honest about the limitation.", + pass: [ + "assistant_posts contains exactly one reply.", + "The reply acknowledges that an image or screenshot was attached.", + "The reply clearly says it cannot inspect or analyze the image contents in this runtime.", + ], + allow: [ + "The reply may ask the user to describe the screenshot or provide text from it.", + ], + fail: [ + "Do not claim no image or screenshot was attached.", + "Do not invent visual details such as colors, charts, UI labels, or people in the image.", + "Do not say the image was successfully analyzed.", + ], + }), + }); + }); + it("when the user asks for an image, attach an image instead of replying with text alone", async ({ run, }) => { await run({ - overrides: { mock_image_generation: true }, + overrides: { + replyGeneration: { mockImageGeneration: true }, + }, events: [mention("show me how you feel")], criteria: rubric({ contract: diff --git a/packages/junior-evals/evals/core/oauth-workflows.eval.ts b/packages/junior-evals/evals/core/oauth-workflows.eval.ts index 0c04b6f40..eebf23a8c 100644 --- a/packages/junior-evals/evals/core/oauth-workflows.eval.ts +++ b/packages/junior-evals/evals/core/oauth-workflows.eval.ts @@ -71,8 +71,8 @@ describeEval("OAuth Workflows", slackEvals, (it) => { }) => { const result = await run({ overrides: { - auto_complete_mcp_oauth: ["eval-auth"], - plugin_dirs: ["evals/fixtures/plugins"], + auth: { autoCompleteMcpOAuth: ["eval-auth"] }, + plugins: { pluginDirs: ["evals/fixtures/plugins"] }, }, events: [ threadMessage( @@ -125,8 +125,8 @@ describeEval("OAuth Workflows", slackEvals, (it) => { }) => { const result = await run({ overrides: { - auto_complete_oauth: ["eval-oauth"], - plugin_dirs: ["evals/fixtures/plugins"], + auth: { autoCompleteOAuth: ["eval-oauth"] }, + plugins: { pluginDirs: ["evals/fixtures/plugins"] }, }, events: [ threadMessage( @@ -179,8 +179,8 @@ describeEval("OAuth Workflows", slackEvals, (it) => { }) => { const result = await run({ overrides: { - auto_complete_oauth: ["eval-oauth"], - plugin_dirs: ["evals/fixtures/plugins"], + auth: { autoCompleteOAuth: ["eval-oauth"] }, + plugins: { pluginDirs: ["evals/fixtures/plugins"] }, }, events: [ threadMessage( diff --git a/packages/junior-evals/evals/core/passive-behavior.eval.ts b/packages/junior-evals/evals/core/passive-behavior.eval.ts index f5e6cfd6c..16d4a5fbf 100644 --- a/packages/junior-evals/evals/core/passive-behavior.eval.ts +++ b/packages/junior-evals/evals/core/passive-behavior.eval.ts @@ -13,9 +13,11 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: [ - "The deploy changed the billing worker and the API auth flow.", - ], + replyGeneration: { + cannedTexts: [ + "The deploy changed the billing worker and the API auth flow.", + ], + }, }, events: [ mention( @@ -52,7 +54,9 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: ["You need the budget by Friday."], + replyGeneration: { + cannedTexts: ["You need the budget by Friday."], + }, }, events: [ mention("I need the budget by Friday.", { @@ -84,9 +88,11 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: [ - "The deploy changed the billing worker and the API auth flow.", - ], + replyGeneration: { + cannedTexts: [ + "The deploy changed the billing worker and the API auth flow.", + ], + }, }, events: [ mention( @@ -121,9 +127,11 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: [ - "The billing worker handles invoice processing and payment retries.", - ], + replyGeneration: { + cannedTexts: [ + "The billing worker handles invoice processing and payment retries.", + ], + }, }, events: [ mention("What does the billing worker do?", { @@ -157,7 +165,9 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: ["Here's the deployment status."], + replyGeneration: { + cannedTexts: ["Here's the deployment status."], + }, }, events: [ mention("Show me the deployment status.", { thread: canYouThread }), @@ -185,7 +195,9 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: ["The deploy changed three services."], + replyGeneration: { + cannedTexts: ["The deploy changed three services."], + }, }, events: [ mention( @@ -220,10 +232,12 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: [ - "The deploy changed billing, auth, and the API gateway.", - "The three services were billing, auth, and the API gateway.", - ], + replyGeneration: { + cannedTexts: [ + "The deploy changed billing, auth, and the API gateway.", + "The three services were billing, auth, and the API gateway.", + ], + }, }, events: [ mention("What changed in the deploy?", { @@ -255,7 +269,11 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: ["The deploy changed billing, auth, and the API gateway."], + replyGeneration: { + cannedTexts: [ + "The deploy changed billing, auth, and the API gateway.", + ], + }, }, events: [ mention("What changed in the deploy?", { @@ -290,10 +308,12 @@ describeEval("Passive Behavior", slackEvals, (it) => { }) => { await run({ overrides: { - reply_texts: [ - "I can help in this thread.", - "I'm back because you mentioned me again.", - ], + replyGeneration: { + cannedTexts: [ + "I can help in this thread.", + "I'm back because you mentioned me again.", + ], + }, }, events: [ mention("Can you help in this thread?", { thread: optOutThread }), diff --git a/packages/junior-evals/evals/core/research-reply-shape.eval.ts b/packages/junior-evals/evals/core/research-reply-shape.eval.ts index aa81a3d7c..79e5d0447 100644 --- a/packages/junior-evals/evals/core/research-reply-shape.eval.ts +++ b/packages/junior-evals/evals/core/research-reply-shape.eval.ts @@ -19,6 +19,7 @@ describeEval("Research Reply Shape", slackEvals, (it) => { "The thread reply is a concise researched answer, not a status update or process note.", "The answer coherently summarizes Slack agent streaming across the provided sources.", "The answer stays brief enough for a normal Slack reply and does not create a canvas.", + "turn_diagnostics shows the turn used high or xhigh thinking.", ], fail: [ "Do not include process chatter such as 'let me check', 'fetching', or similar tool-progress narration.", diff --git a/packages/junior-evals/evals/core/routing-and-continuity.eval.ts b/packages/junior-evals/evals/core/routing-and-continuity.eval.ts index 4297d907e..9c2ce00d9 100644 --- a/packages/junior-evals/evals/core/routing-and-continuity.eval.ts +++ b/packages/junior-evals/evals/core/routing-and-continuity.eval.ts @@ -1,6 +1,23 @@ import { describeEval } from "vitest-evals"; +import { expect } from "vitest"; import { mention, rubric, slackEvals, threadMessage } from "../helpers"; +type EvalOutput = { + turn_diagnostics?: Array<{ thinkingLevel?: string }>; +}; + +function outputOf(result: { output?: unknown }): EvalOutput { + return (result.output ?? {}) as EvalOutput; +} + +function expectThinkingLevel(output: EvalOutput, expected: string): void { + const levels = + output.turn_diagnostics + ?.map((diagnostic) => diagnostic.thinkingLevel) + .filter((level): level is string => typeof level === "string") ?? []; + expect(levels).toContain(expected); +} + describeEval("Routing and Continuity", slackEvals, (it) => { it("when a thread message explicitly mentions Junior, post a direct reply", async ({ run, @@ -19,6 +36,33 @@ describeEval("Routing and Continuity", slackEvals, (it) => { }); }); + it("when the task is a deterministic one-step transform, route with low thinking", async ({ + run, + }) => { + const result = await run({ + events: [ + mention( + "@bot alphabetize these words and reply with only the sorted list: gamma, alpha, beta.", + ), + ], + requireSandboxReady: false, + criteria: rubric({ + contract: + "A deterministic one-step transform uses low thinking and returns only the transformed result.", + pass: [ + "assistant_posts contains exactly one concise reply.", + "The reply lists alpha, beta, gamma in that order.", + "turn_diagnostics shows the turn used low thinking.", + ], + fail: [ + "Do not use tools or sandbox setup for this request.", + "Do not include process chatter or explanation around the sorted list.", + ], + }), + }); + expectThinkingLevel(outputOf(result), "low"); + }); + it("when asked to post in channel, send a channel post instead of a thread reply", async ({ run, }) => { diff --git a/packages/junior-evals/evals/core/skill-infra.eval.ts b/packages/junior-evals/evals/core/skill-infra.eval.ts index 9cb810a05..fadd0d615 100644 --- a/packages/junior-evals/evals/core/skill-infra.eval.ts +++ b/packages/junior-evals/evals/core/skill-infra.eval.ts @@ -6,7 +6,7 @@ describeEval("Skill Infrastructure", slackEvals, (it) => { run, }) => { await run({ - overrides: { skill_dirs: ["evals/fixtures/skills"] }, + overrides: { plugins: { skillDirs: ["evals/fixtures/skills"] } }, events: [mention("/candidate-brief David Cramer")], criteria: rubric({ contract: @@ -30,7 +30,7 @@ describeEval("Skill Infrastructure", slackEvals, (it) => { run, }) => { await run({ - overrides: { skill_dirs: ["evals/fixtures/skills"] }, + overrides: { plugins: { skillDirs: ["evals/fixtures/skills"] } }, events: [ mention("/candidate-brief Alice Example", { thread: candidateBriefThread, @@ -57,7 +57,7 @@ describeEval("Skill Infrastructure", slackEvals, (it) => { run, }) => { await run({ - overrides: { skill_dirs: ["evals/fixtures/skills"] }, + overrides: { plugins: { skillDirs: ["evals/fixtures/skills"] } }, events: [mention("/list-working-directory")], criteria: rubric({ contract: @@ -75,7 +75,7 @@ describeEval("Skill Infrastructure", slackEvals, (it) => { run, }) => { await run({ - overrides: { skill_dirs: ["evals/fixtures/skills"] }, + overrides: { plugins: { skillDirs: ["evals/fixtures/skills"] } }, events: [ mention( "Can you double-check what the source handbook says about closed tracking issues proving capability support? I think there was a note for this.", @@ -103,7 +103,7 @@ describeEval("Skill Infrastructure", slackEvals, (it) => { }) => { await run({ overrides: { - plugin_dirs: ["evals/fixtures/plugins"], + plugins: { pluginDirs: ["evals/fixtures/plugins"] }, }, events: [ mention( diff --git a/packages/junior-evals/evals/core/skill-invocation-control.eval.ts b/packages/junior-evals/evals/core/skill-invocation-control.eval.ts index a7c9ce756..ae5f45991 100644 --- a/packages/junior-evals/evals/core/skill-invocation-control.eval.ts +++ b/packages/junior-evals/evals/core/skill-invocation-control.eval.ts @@ -8,7 +8,7 @@ describeEval("Skill Invocation Control", slackEvals, (it) => { run, }) => { await run({ - overrides: { skill_dirs: skillDirs }, + overrides: { plugins: { skillDirs } }, events: [mention("What's the weather like in San Francisco today?")], criteria: rubric({ contract: @@ -31,7 +31,7 @@ describeEval("Skill Invocation Control", slackEvals, (it) => { run, }) => { await run({ - overrides: { skill_dirs: skillDirs }, + overrides: { plugins: { skillDirs } }, events: [ mention( "Use the weather-lookup skill to check the weather in San Francisco.", @@ -55,7 +55,7 @@ describeEval("Skill Invocation Control", slackEvals, (it) => { run, }) => { await run({ - overrides: { skill_dirs: skillDirs }, + overrides: { plugins: { skillDirs } }, events: [ mention( "Can you double-check what the source handbook says about capability support verification?", diff --git a/packages/junior-evals/evals/github/skill-workflows.eval.ts b/packages/junior-evals/evals/github/skill-workflows.eval.ts index d71b06b39..173d0ba5b 100644 --- a/packages/junior-evals/evals/github/skill-workflows.eval.ts +++ b/packages/junior-evals/evals/github/skill-workflows.eval.ts @@ -7,8 +7,10 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => { }) => { await run({ overrides: { - plugin_packages: ["@sentry/junior-github"], - skill_dirs: ["../junior/skills"], + plugins: { + pluginPackages: ["@sentry/junior-github"], + skillDirs: ["../junior/skills"], + }, }, events: [ mention( @@ -51,8 +53,10 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => { }) => { await run({ overrides: { - plugin_packages: ["@sentry/junior-github"], - skill_dirs: ["../junior/skills"], + plugins: { + pluginPackages: ["@sentry/junior-github"], + skillDirs: ["../junior/skills"], + }, }, events: [ mention("Set the default repo to getsentry/junior for this channel.", { @@ -88,8 +92,10 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => { }) => { await run({ overrides: { - plugin_packages: ["@sentry/junior-github"], - skill_dirs: ["../junior/skills"], + plugins: { + pluginPackages: ["@sentry/junior-github"], + skillDirs: ["../junior/skills"], + }, }, events: [ threadMessage( @@ -129,8 +135,10 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => { }) => { await run({ overrides: { - plugin_packages: ["@sentry/junior-github"], - skill_dirs: ["../junior/skills"], + plugins: { + pluginPackages: ["@sentry/junior-github"], + skillDirs: ["../junior/skills"], + }, }, events: [ threadMessage( diff --git a/packages/junior-evals/evals/helpers.ts b/packages/junior-evals/evals/helpers.ts index 24677e1fc..dd7bccae9 100644 --- a/packages/junior-evals/evals/helpers.ts +++ b/packages/junior-evals/evals/helpers.ts @@ -1,8 +1,10 @@ import { + createJudgeHarness, createJudge, type DescribeEvalOptions, type JudgeContext, } from "vitest-evals"; +import type { Message } from "chat"; import { completeText, resolveGatewayModel } from "@/chat/pi/client"; import { toJsonValue, @@ -20,6 +22,8 @@ import { runEvalScenario, } from "./behavior-harness"; +const JUDGE_MESSAGE_TIMESTAMP_MS = Date.parse("2026-06-05T12:00:00.000Z"); + function hasAssistantStatusPending(result: EvalResult): boolean { const lastByThread = new Map(); for (const call of result.slackAdapter.statusCalls) { @@ -35,17 +39,19 @@ function toJson(value: unknown): JsonValue { return toJsonValue(value) ?? null; } -function toJsonRecord( - value: Record, -): Record { - const record: Record = {}; +interface JsonObject { + [key: string]: JsonValue; +} + +function toJsonRecord(value: Record): JsonObject { + const record: JsonObject = {}; for (const [key, entry] of Object.entries(value)) { record[key] = toJson(entry); } return record; } -function buildEvalOutput(result: EvalResult): Record { +function buildEvalOutput(result: EvalResult): JsonObject { return { assistant_posts: toJson(result.posts), observed_tool_invocations: toJson(result.toolInvocations), @@ -57,10 +63,11 @@ function buildEvalOutput(result: EvalResult): Record { suggested_prompts_set: result.slackAdapter.promptCalls.length > 0, assistant_status_pending: hasAssistantStatusPending(result), }, + turn_diagnostics: toJson(result.turnDiagnostics), }; } -function serializeEvalOutput(output: Record): string { +function serializeEvalOutput(output: JsonObject): string { return JSON.stringify(output, null, 2); } @@ -99,7 +106,7 @@ function toLogMetadata(record: EmittedLogRecord): Record { }); } -function toHarnessRun(result: EvalResult): HarnessRun { +function toHarnessRun(result: EvalResult): HarnessRun { const output = buildEvalOutput(result); const toolCalls = result.toolInvocations.map(toToolCallRecord); const messages: NormalizedMessage[] = [ @@ -128,7 +135,6 @@ function toHarnessRun(result: EvalResult): HarnessRun { output, session: { messages, - outputText: serializeEvalOutput(output), metadata: toJsonRecord({ slack_metadata: output.slack_metadata, log_records: result.logRecords.map(toLogMetadata), @@ -249,10 +255,10 @@ function assertStatusCleared(input: SlackEvalInput, result: EvalResult): void { } function assertTimeoutBudget(input: SlackEvalInput): void { - const replyTimeout = input.overrides?.reply_timeout_ms; + const replyTimeout = input.overrides?.replyGeneration?.timeoutMs; if (replyTimeout !== undefined && replyTimeout > MAX_EVAL_TIMEOUT_MS) { throw new Error( - `Eval reply_timeout_ms ${replyTimeout} exceeds the ${MAX_EVAL_TIMEOUT_MS}ms budget. Use fixtures, mocks, or tool replay instead of raising timeouts.`, + `Eval replyGeneration.timeoutMs ${replyTimeout} exceeds the ${MAX_EVAL_TIMEOUT_MS}ms budget. Use fixtures, mocks, or tool replay instead of raising timeouts.`, ); } if ( @@ -337,24 +343,8 @@ function parseJudgeResult(text: string): JudgeResultPayload { } /** Replays Slack events through the real runtime and returns normalized artifacts. */ -export const slackHarness: Harness = { +export const slackHarness: Harness = { name: "slack", - prompt: async (input, options) => { - const { text } = await completeText({ - modelId: EVAL_JUDGE_MODEL_ID, - system: options?.system, - messages: [ - { - role: "user", - content: input, - timestamp: Date.now(), - }, - ], - temperature: 0, - metadata: options?.metadata, - }); - return text; - }, run: async (input) => { const logRecords: EmittedLogRecord[] = []; const unregisterLogSink = registerLogRecordSink((record) => { @@ -400,32 +390,60 @@ export const slackHarness: Harness = { }, }; +const rubricJudgeHarness = createJudgeHarness({ + name: "rubric-judge", + run: async ({ prompt, system }, options) => { + const { text } = await completeText({ + modelId: EVAL_JUDGE_MODEL_ID, + system, + messages: [ + { + role: "user", + content: prompt, + timestamp: JUDGE_MESSAGE_TIMESTAMP_MS, + }, + ], + temperature: 0, + metadata: options.metadata, + }); + return text; + }, +}); + /** Scores Slack eval output against the case rubric. */ export const RubricJudge = createJudge( "RubricJudge", async ({ input, output, - harness, + runJudge, }: JudgeContext< SlackEvalInput, + JsonObject, Record, typeof slackHarness >) => { - const object = parseJudgeResult( - await harness.prompt( - formatJudgePrompt( - serializeEvalOutput(output as Record), + if (!runJudge) { + throw new Error("RubricJudge requires a judge harness."); + } + const response = await runJudge( + { + prompt: formatJudgePrompt( + serializeEvalOutput(output), formatRubric(input.criteria), ), - { - system: EVAL_SYSTEM, - metadata: { - judge: "RubricJudge", - }, + system: EVAL_SYSTEM, + }, + { + metadata: { + judge: "RubricJudge", }, - ), + }, ); + if (typeof response !== "string") { + throw new Error("RubricJudge expected the judge harness to return text."); + } + const object = parseJudgeResult(response); const answer = object.answer as keyof typeof CHOICE_SCORES; return { @@ -441,9 +459,10 @@ export const RubricJudge = createJudge( /** Shared vitest-evals suite options for Slack conversation evals. */ export const slackEvals = { harness: slackHarness, + judgeHarness: rubricJudgeHarness, judges: [RubricJudge], judgeThreshold: 0.75, -} satisfies DescribeEvalOptions; +} satisfies DescribeEvalOptions; // ── Event builders ───────────────────────────────────────── @@ -461,6 +480,7 @@ const DEFAULT_AUTHOR = { }; type AuthorOverrides = Partial; +type AttachmentOverrides = Message["attachments"]; interface ThreadOverrides { id?: string; @@ -471,7 +491,11 @@ interface ThreadOverrides { /** Builds a first-turn mention event for a harnessed Slack eval. */ export function mention( text: string, - opts?: { author?: AuthorOverrides; thread?: ThreadOverrides }, + opts?: { + attachments?: AttachmentOverrides; + author?: AuthorOverrides; + thread?: ThreadOverrides; + }, ) { const seq = nextId(); return { @@ -486,6 +510,7 @@ export function mention( id: `m-${seq}`, text, is_mention: true, + attachments: opts?.attachments, author: { ...DEFAULT_AUTHOR, ...opts?.author }, }, }; @@ -495,6 +520,7 @@ export function mention( export function threadMessage( text: string, opts?: { + attachments?: AttachmentOverrides; author?: AuthorOverrides; thread?: ThreadOverrides; is_mention?: boolean; @@ -513,6 +539,7 @@ export function threadMessage( id: `m-${seq}`, text, is_mention: opts?.is_mention ?? false, + attachments: opts?.attachments, author: { ...DEFAULT_AUTHOR, ...opts?.author }, }, }; diff --git a/packages/junior-evals/evals/sentry/skill-workflows.eval.ts b/packages/junior-evals/evals/sentry/skill-workflows.eval.ts index 405c1b65e..44465ab9a 100644 --- a/packages/junior-evals/evals/sentry/skill-workflows.eval.ts +++ b/packages/junior-evals/evals/sentry/skill-workflows.eval.ts @@ -14,8 +14,12 @@ describeEval("Sentry Skill Workflows", slackEvals, (it) => { }) => { const result = await run({ overrides: { - credential_providers: ["sentry"], - plugin_packages: ["@sentry/junior-sentry"], + auth: { + credentialProviders: ["sentry"], + }, + plugins: { + pluginPackages: ["@sentry/junior-sentry"], + }, }, events: [ mention("are you working", { thread: followUpThread }), diff --git a/packages/junior-evals/package.json b/packages/junior-evals/package.json index 3f0a18f95..01f57cdc5 100644 --- a/packages/junior-evals/package.json +++ b/packages/junior-evals/package.json @@ -5,7 +5,8 @@ "type": "module", "scripts": { "test": "vitest run", - "preevals": "node ../junior/scripts/check-slack-test-boundary.mjs", + "typecheck": "tsc --noEmit", + "preevals": "node ../junior/scripts/check-test-boundaries.mjs", "evals": "JUNIOR_STATE_ADAPTER=memory VITEST_EVALS_REPLAY_MODE=auto pnpm exec vitest run -c vitest.evals.config.ts", "evals:record": "JUNIOR_STATE_ADAPTER=memory VITEST_EVALS_REPLAY_MODE=record pnpm exec vitest run -c vitest.evals.config.ts" }, @@ -16,10 +17,12 @@ "@sentry/junior-scheduler": "workspace:*", "@sentry/junior-sentry": "workspace:*", "@sentry/junior-testing": "workspace:*", + "ai": "^6.0.190", "chat": "4.29.0", "tinyrainbow": "^3.1.0", "typescript": "^6.0.3", "vitest": "^4.1.7", - "vitest-evals": "0.11.0" + "vitest-evals": "0.11.0", + "zod": "^4.4.3" } } diff --git a/packages/junior-evals/tests/unit/harness/behavior-harness.test.ts b/packages/junior-evals/tests/unit/harness/behavior-harness.test.ts index 90aca6ad6..c7bbdc152 100644 --- a/packages/junior-evals/tests/unit/harness/behavior-harness.test.ts +++ b/packages/junior-evals/tests/unit/harness/behavior-harness.test.ts @@ -1,70 +1,14 @@ -import { afterAll, afterEach, describe, expect, it, vi } from "vitest"; +import { afterAll, describe, expect, it, vi } from "vitest"; -const { - handleSubscribedMessageMock, - observedRuntimeIds, - originalStateAdapterEnv, - noopAsync, - handleNewMentionMock, -} = vi.hoisted(() => { +import { collectEvalSlackArtifactsFromSlackApiCalls } from "@junior-tests/fixtures/slack/eval-artifacts"; +import { runEvalScenario } from "../../../evals/behavior-harness"; + +const { originalStateAdapterEnv } = vi.hoisted(() => { const originalStateAdapterEnv = process.env.JUNIOR_STATE_ADAPTER; process.env.JUNIOR_STATE_ADAPTER = "memory"; - const observedRuntimeIds = { - destinationChannelId: undefined as string | undefined, - juniorBaseUrl: undefined as string | undefined, - messageThreadId: undefined as string | undefined, - threadId: undefined as string | undefined, - }; - - return { - observedRuntimeIds, - originalStateAdapterEnv, - noopAsync: vi.fn(async () => {}), - handleNewMentionMock: vi.fn( - async ( - thread: { id: string; post: (value: unknown) => Promise }, - message: { threadId?: string }, - options?: { destination?: { channelId?: string } }, - ) => { - observedRuntimeIds.destinationChannelId = - options?.destination?.channelId; - observedRuntimeIds.juniorBaseUrl = process.env.JUNIOR_BASE_URL; - observedRuntimeIds.threadId = thread.id; - observedRuntimeIds.messageThreadId = message.threadId; - await thread.post("observed"); - }, - ), - handleSubscribedMessageMock: vi.fn( - async ( - thread: { id: string; post: (value: unknown) => Promise }, - message: { threadId?: string }, - options?: { destination?: { channelId?: string } }, - ) => { - observedRuntimeIds.destinationChannelId = - options?.destination?.channelId; - observedRuntimeIds.juniorBaseUrl = process.env.JUNIOR_BASE_URL; - observedRuntimeIds.threadId = thread.id; - observedRuntimeIds.messageThreadId = message.threadId; - await thread.post("observed"); - }, - ), - }; + return { originalStateAdapterEnv }; }); -vi.mock("@/chat/app/factory", () => ({ - createSlackRuntime: vi.fn(() => ({ - handleNewMention: handleNewMentionMock, - handleSubscribedMessage: handleSubscribedMessageMock, - handleAssistantThreadStarted: noopAsync, - handleAssistantContextChanged: noopAsync, - })), -})); - -import { - collectSlackArtifactsFromCapturedCalls, - runEvalScenario, -} from "../../../evals/behavior-harness"; - describe("behavior harness", () => { afterAll(() => { if (originalStateAdapterEnv === undefined) { @@ -74,18 +18,13 @@ describe("behavior harness", () => { process.env.JUNIOR_STATE_ADAPTER = originalStateAdapterEnv; }); - afterEach(() => { - observedRuntimeIds.destinationChannelId = undefined; - observedRuntimeIds.juniorBaseUrl = undefined; - observedRuntimeIds.threadId = undefined; - observedRuntimeIds.messageThreadId = undefined; - handleNewMentionMock.mockClear(); - handleSubscribedMessageMock.mockClear(); - noopAsync.mockClear(); - }); - - it("normalizes eval thread fixtures to Slack-style runtime thread ids", async () => { + it("routes eval thread fixtures through the real Slack runtime", async () => { const result = await runEvalScenario({ + overrides: { + replyGeneration: { + cannedTexts: ["observed"], + }, + }, events: [ { type: "new_mention", @@ -106,11 +45,6 @@ describe("behavior harness", () => { ], }); - expect(handleNewMentionMock).toHaveBeenCalledTimes(1); - expect(observedRuntimeIds.threadId).toBe("slack:CAUTH:1700000000.0001"); - expect(observedRuntimeIds.messageThreadId).toBe( - "slack:CAUTH:1700000000.0001", - ); expect(result.posts).toEqual([ { channel: "CAUTH", @@ -121,27 +55,6 @@ describe("behavior harness", () => { ]); }); - it("normalizes eval destinations from adapter channel ids", async () => { - await runEvalScenario({ - events: [ - { - type: "new_mention", - thread: { - id: "slack:CAUTH:1700000000.0001", - }, - message: { - id: "m-auth-1", - text: "hello", - is_mention: true, - }, - }, - ], - }); - - expect(handleNewMentionMock).toHaveBeenCalledTimes(1); - expect(observedRuntimeIds.destinationChannelId).toBe("CAUTH"); - }); - it("rejects sandbox HTTP interception evals without a tunnel token", async () => { const previousBaseUrl = process.env.JUNIOR_BASE_URL; const previousTunnelToken = process.env.CLOUDFLARE_TUNNEL_TOKEN; @@ -151,7 +64,9 @@ describe("behavior harness", () => { await expect( runEvalScenario({ overrides: { - credential_providers: ["github"], + auth: { + credentialProviders: ["github"], + }, }, events: [], }), @@ -179,7 +94,9 @@ describe("behavior harness", () => { await expect( runEvalScenario({ overrides: { - credential_providers: ["github"], + auth: { + credentialProviders: ["github"], + }, }, events: [], }), @@ -203,6 +120,12 @@ describe("behavior harness", () => { }; const result = await runEvalScenario({ + overrides: { + replyGeneration: { + cannedTexts: ["observed first", "observed second"], + }, + subscribedReplyDecisions: [{ should_reply: true, reason: "mentioned" }], + }, events: [ { type: "new_mention", @@ -231,78 +154,22 @@ describe("behavior harness", () => { ], }); - expect(handleNewMentionMock).toHaveBeenCalledTimes(1); - expect(handleSubscribedMessageMock).toHaveBeenCalledTimes(1); expect(result.posts).toEqual([ { channel: "CQUEUE", files: [], - text: "observed", + text: "observed first", thread_ts: "1700000000.0002", }, { channel: "CQUEUE", files: [], - text: "observed", + text: "observed second", thread_ts: "1700000000.0002", }, ]); }); - it("preserves attached file metadata on assistant thread posts", async () => { - handleNewMentionMock.mockImplementationOnce( - async (thread: { post: (value: unknown) => Promise }) => { - await thread.post({ - raw: "", - files: [ - { - data: Buffer.from("png"), - filename: "generated.png", - mimeType: "image/png", - }, - ], - }); - }, - ); - - const result = await runEvalScenario({ - events: [ - { - type: "new_mention", - thread: { - id: "fixture-media-thread", - channel_id: "CMEDIA", - thread_ts: "1700000000.0003", - }, - message: { - id: "m-media-1", - text: "show me how you feel", - is_mention: true, - author: { - user_id: "UMEDIA", - }, - }, - }, - ], - }); - - expect(result.posts).toEqual([ - { - channel: "CMEDIA", - text: "", - thread_ts: "1700000000.0003", - files: [ - { - filename: "generated.png", - isImage: true, - mimeType: "image/png", - sizeBytes: 3, - }, - ], - }, - ]); - }); - it("restores cwd when setup fails after creating a plugin fixture", async () => { const cwd = process.cwd(); @@ -310,8 +177,10 @@ describe("behavior harness", () => { runEvalScenario({ events: [], overrides: { - plugin_dirs: ["evals/fixtures/plugins"], - plugin_packages: ["../bad-package"], + plugins: { + pluginDirs: ["evals/fixtures/plugins"], + pluginPackages: ["../bad-package"], + }, }, }), ).rejects.toThrow("Plugin package names must be valid npm package names"); @@ -320,11 +189,9 @@ describe("behavior harness", () => { }); it("collects created canvas metadata from captured Slack API calls", () => { - const artifacts = collectSlackArtifactsFromCapturedCalls([ + const artifacts = collectEvalSlackArtifactsFromSlackApiCalls([ { method: "canvases.create", - url: "https://slack.test/api/canvases.create", - headers: {}, params: { title: "Slack Streaming Timeline", document_content: { @@ -335,8 +202,6 @@ describe("behavior harness", () => { }, { method: "chat.postMessage", - url: "https://slack.test/api/chat.postMessage", - headers: {}, params: { channel: "CTEST", text: "Created a canvas with the full notes.", diff --git a/packages/junior-evals/vitest.config.ts b/packages/junior-evals/vitest.config.ts index 713719f3e..9668cede1 100644 --- a/packages/junior-evals/vitest.config.ts +++ b/packages/junior-evals/vitest.config.ts @@ -2,17 +2,23 @@ import { defineConfig } from "vitest/config"; import path from "node:path"; const juniorPackageRoot = path.resolve(__dirname, "../junior"); +const pluginApiPackageRoot = path.resolve(__dirname, "../junior-plugin-api"); export default defineConfig({ resolve: { alias: { "@": path.resolve(juniorPackageRoot, "src"), "@junior-tests": path.resolve(juniorPackageRoot, "tests"), + "@sentry/junior-plugin-api": path.resolve( + pluginApiPackageRoot, + "src/index.ts", + ), }, }, test: { environment: "node", include: ["tests/**/*.test.ts"], setupFiles: [path.resolve(juniorPackageRoot, "tests/msw/setup.ts")], + unstubEnvs: true, }, }); diff --git a/packages/junior-evals/vitest.evals.config.ts b/packages/junior-evals/vitest.evals.config.ts index 731d4c517..b0fd9d3fc 100644 --- a/packages/junior-evals/vitest.evals.config.ts +++ b/packages/junior-evals/vitest.evals.config.ts @@ -5,6 +5,7 @@ import fs from "node:fs"; import { createEnvFileLoader } from "../junior/src/env/files"; const juniorPackageRoot = path.resolve(__dirname, "../junior"); +const pluginApiPackageRoot = path.resolve(__dirname, "../junior-plugin-api"); const workspaceRoot = path.resolve(__dirname, "../.."); const applyEnvFile = createEnvFileLoader(); const EVAL_TEST_TIMEOUT_MS = 60_000; @@ -32,6 +33,10 @@ export default defineConfig({ alias: { "@": path.resolve(juniorPackageRoot, "src"), "@junior-tests": path.resolve(juniorPackageRoot, "tests"), + "@sentry/junior-plugin-api": path.resolve( + pluginApiPackageRoot, + "src/index.ts", + ), }, }, test: { @@ -42,5 +47,6 @@ export default defineConfig({ setupFiles: [path.resolve(juniorPackageRoot, "tests/msw/setup.ts")], reporters: [new DefaultEvalReporter()], testTimeout: EVAL_TEST_TIMEOUT_MS, + unstubEnvs: true, }, }); diff --git a/packages/junior/package.json b/packages/junior/package.json index 084fad02e..47c9f756e 100644 --- a/packages/junior/package.json +++ b/packages/junior/package.json @@ -48,13 +48,13 @@ "build": "tsup && tsc -p tsconfig.build.json --emitDeclarationOnly", "lint": "oxlint --config .oxlintrc.json --deny-warnings src tests scripts bin tsup.config.ts", "lint:fix": "oxlint --config .oxlintrc.json --deny-warnings --fix src tests scripts bin tsup.config.ts", - "test": "pnpm run test:slack-boundary && pnpm run test:arch-boundary && vitest run --maxWorkers=4", + "test": "pnpm run test:boundaries && pnpm run test:arch-boundary && vitest run --maxWorkers=4", "test:watch": "vitest", - "test:slack-boundary": "node scripts/check-slack-test-boundary.mjs", + "test:boundaries": "node scripts/check-test-boundaries.mjs", "test:arch-boundary": "depcruise --config .dependency-cruiser.mjs src/chat", "typecheck": "tsc --noEmit", "skills:check": "node scripts/check-skills.mjs", - "test:coverage": "pnpm run test:slack-boundary && pnpm run test:arch-boundary && vitest run --maxWorkers=4 --coverage --reporter=default --reporter=junit --outputFile.junit=coverage/results.junit.xml" + "test:coverage": "pnpm run test:boundaries && pnpm run test:arch-boundary && vitest run --maxWorkers=4 --coverage --reporter=default --reporter=junit --outputFile.junit=coverage/results.junit.xml" }, "dependencies": { "@sentry/junior-plugin-api": "workspace:*", diff --git a/packages/junior/scripts/check-slack-test-boundary.mjs b/packages/junior/scripts/check-slack-test-boundary.mjs deleted file mode 100644 index 728760cfb..000000000 --- a/packages/junior/scripts/check-slack-test-boundary.mjs +++ /dev/null @@ -1,148 +0,0 @@ -import fs from "node:fs/promises"; -import path from "node:path"; - -const repoRoot = process.cwd(); - -const EVAL_SOURCE_EXTENSIONS = new Set([".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"]); - -const FORBIDDEN_EVAL_PATTERNS = [ - /queueSlackApiResponse/, - /getCapturedSlackApiCalls/, - /queueSlackApiError/, - /queueSlackRateLimit/, - /@\/chat\/slack-actions\// -]; - -const INTEGRATION_BEHAVIOR_ROOT = path.join(repoRoot, "tests", "integration", "slack"); -const FORBIDDEN_INTEGRATION_BEHAVIOR_PATTERNS = [ - /\bvi\.mock\(/ -]; - -async function pathExists(targetPath) { - try { - await fs.access(targetPath); - return true; - } catch { - return false; - } -} - -async function listFilesRecursive(dirPath) { - const entries = await fs.readdir(dirPath, { withFileTypes: true }); - const files = []; - - for (const entry of entries) { - const nextPath = path.join(dirPath, entry.name); - if (entry.isDirectory()) { - files.push(...(await listFilesRecursive(nextPath))); - continue; - } - files.push(nextPath); - } - - return files; -} - -function toRelative(filePath) { - return path.relative(repoRoot, filePath).split(path.sep).join("/"); -} - -function findPatternLineNumbers(source, pattern) { - const lines = source.split("\n"); - const lineNumbers = []; - - for (let index = 0; index < lines.length; index += 1) { - if (pattern.test(lines[index])) { - lineNumbers.push(index + 1); - } - } - - return lineNumbers; -} - -async function checkMswDirectory() { - const mswPath = path.join(repoRoot, "tests", "msw"); - if (!(await pathExists(mswPath))) { - return []; - } - - const files = await listFilesRecursive(mswPath); - return files - .filter((filePath) => /\.test\.[cm]?[jt]sx?$/.test(filePath)) - .map((filePath) => `Unexpected test file under tests/msw: ${toRelative(filePath)}`); -} - -async function checkEvalSources() { - const evalsPath = path.join(repoRoot, "evals"); - if (!(await pathExists(evalsPath))) { - return []; - } - - const violations = []; - const files = await listFilesRecursive(evalsPath); - - for (const filePath of files) { - const extension = path.extname(filePath); - if (!EVAL_SOURCE_EXTENSIONS.has(extension)) { - continue; - } - - const source = await fs.readFile(filePath, "utf8"); - for (const pattern of FORBIDDEN_EVAL_PATTERNS) { - const lineNumbers = findPatternLineNumbers(source, pattern); - if (lineNumbers.length === 0) { - continue; - } - violations.push( - `Forbidden eval boundary pattern "${pattern.source}" in ${toRelative(filePath)} at line(s): ${lineNumbers.join(", ")}` - ); - } - } - - return violations; -} - -async function checkIntegrationBehaviorSources() { - if (!(await pathExists(INTEGRATION_BEHAVIOR_ROOT))) { - return []; - } - - const violations = []; - const files = await listFilesRecursive(INTEGRATION_BEHAVIOR_ROOT); - const testFiles = files.filter((filePath) => /\.test\.[cm]?[jt]sx?$/.test(filePath)); - - for (const filePath of testFiles) { - const source = await fs.readFile(filePath, "utf8"); - for (const pattern of FORBIDDEN_INTEGRATION_BEHAVIOR_PATTERNS) { - const lineNumbers = findPatternLineNumbers(source, pattern); - if (lineNumbers.length === 0) { - continue; - } - violations.push( - `Forbidden integration behavior pattern "${pattern.source}" in ${toRelative(filePath)} at line(s): ${lineNumbers.join(", ")}` - ); - } - } - - return violations; -} - -async function main() { - const violations = [ - ...(await checkMswDirectory()), - ...(await checkEvalSources()), - ...(await checkIntegrationBehaviorSources()) - ]; - - if (violations.length > 0) { - console.error("Slack test boundary check failed:"); - for (const violation of violations) { - console.error(`- ${violation}`); - } - process.exit(1); - } - - console.log("Slack test boundary check passed."); -} - -await main(); diff --git a/packages/junior/scripts/check-test-boundaries.mjs b/packages/junior/scripts/check-test-boundaries.mjs new file mode 100644 index 000000000..154b7507b --- /dev/null +++ b/packages/junior/scripts/check-test-boundaries.mjs @@ -0,0 +1,303 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const scriptPath = fileURLToPath(import.meta.url); +const juniorRoot = path.resolve(path.dirname(scriptPath), ".."); +const monorepoRoot = path.resolve(juniorRoot, "../.."); + +const EVAL_SOURCE_EXTENSIONS = new Set([ + ".ts", + ".tsx", + ".js", + ".jsx", + ".mjs", + ".cjs", +]); + +const FORBIDDEN_EVAL_PATTERNS = [ + /queueSlackApiResponse/, + /readCapturedSlackApiCalls/, + /captured-slack-api-calls/, + /getCapturedSlackApiCalls/, + /queueSlackApiError/, + /queueSlackRateLimit/, + /@\/chat\/slack-actions\//, + /auto_complete_mcp_oauth/, + /auto_complete_oauth/, + /credential_providers/, + /fail_reply_call/, + /mock_image_generation/, + /plugin_dirs/, + /plugin_packages/, + /reply_results/, + /reply_timeout_ms/, + /reply_texts/, + /skill_dirs/, + /subscribed_decisions/, + /unset_gateway_api_key/, +]; + +const VI_MODULE_MOCK_PATTERN = /\bvi\.(?:mock|doMock)\(\s*["']([^"']+)["']/g; +const OBSERVABILITY_LOGGING_MODULE = "@/chat/logging"; +const OBSERVABILITY_SENTRY_MODULE = "@/chat/sentry"; +const SENTRY_OBSERVABILITY_SIDE_EFFECT_PATTERN = + /\b(?:captureException|captureMessage|spanToJSON|startInactiveSpan|startSpan|withActiveSpan)\b/; +const OBSERVABILITY_ASSERTION_PATTERN = + /\bexpect\([^;\n]*(?:logException|logWarn|logInfo|setSpanAttributes|withSpan|captureException|startSpan|startInactiveSpan)[^;\n]*\)/g; +const LOGGING_CONTRACT_TEST_PATH_PATTERN = /(?:^|\/)tests\/unit\/logging\//; + +function defaultBoundaryCheckRoots() { + return { + evalsRoot: path.join(monorepoRoot, "packages", "junior-evals", "evals"), + evalTestsRoot: path.join(monorepoRoot, "packages", "junior-evals", "tests"), + integrationRoot: path.join(juniorRoot, "tests", "integration"), + mswRoot: path.join(juniorRoot, "tests", "msw"), + reportRoot: monorepoRoot, + testRoot: path.join(juniorRoot, "tests"), + }; +} + +async function pathExists(targetPath) { + try { + await fs.access(targetPath); + return true; + } catch { + return false; + } +} + +async function listFilesRecursive(dirPath) { + const entries = await fs.readdir(dirPath, { withFileTypes: true }); + const files = []; + + for (const entry of entries) { + const nextPath = path.join(dirPath, entry.name); + if (entry.isDirectory()) { + files.push(...(await listFilesRecursive(nextPath))); + continue; + } + files.push(nextPath); + } + + return files; +} + +function toRelative(filePath, reportRoot) { + return path.relative(reportRoot, filePath).split(path.sep).join("/"); +} + +function findPatternLineNumbers(source, pattern) { + const lines = source.split("\n"); + const lineNumbers = []; + + for (let index = 0; index < lines.length; index += 1) { + if (pattern.test(lines[index])) { + lineNumbers.push(index + 1); + } + } + + return lineNumbers; +} + +function findViModuleMocks(source) { + const mocks = []; + VI_MODULE_MOCK_PATTERN.lastIndex = 0; + + let match = VI_MODULE_MOCK_PATTERN.exec(source); + while (match) { + mocks.push({ + index: match.index, + lineNumber: source.slice(0, match.index).split("\n").length, + moduleName: match[1], + snippet: source.slice(match.index, match.index + 1_200), + }); + match = VI_MODULE_MOCK_PATTERN.exec(source); + } + + return mocks; +} + +function findPatternMatches(source, pattern) { + const matches = []; + pattern.lastIndex = 0; + + let match = pattern.exec(source); + while (match) { + matches.push({ + lineNumber: source.slice(0, match.index).split("\n").length, + }); + match = pattern.exec(source); + } + + return matches; +} + +function isTestFile(filePath) { + return /\.test\.[cm]?[jt]sx?$/.test(filePath); +} + +function isLoggingContractTestPath(relativePath) { + return LOGGING_CONTRACT_TEST_PATH_PATTERN.test(relativePath); +} + +function isObservabilitySideEffectMock(mock) { + if (mock.moduleName === OBSERVABILITY_LOGGING_MODULE) { + return true; + } + return ( + mock.moduleName === OBSERVABILITY_SENTRY_MODULE && + SENTRY_OBSERVABILITY_SIDE_EFFECT_PATTERN.test(mock.snippet) + ); +} + +async function checkMswDirectory(mswRoot, reportRoot) { + if (!(await pathExists(mswRoot))) { + return []; + } + + const files = await listFilesRecursive(mswRoot); + return files + .filter(isTestFile) + .map( + (filePath) => + `Unexpected test file under tests/msw: ${toRelative(filePath, reportRoot)}`, + ); +} + +async function checkEvalSources(evalsRoot, reportRoot) { + if (!(await pathExists(evalsRoot))) { + return []; + } + + const violations = []; + const files = await listFilesRecursive(evalsRoot); + + for (const filePath of files) { + const extension = path.extname(filePath); + if (!EVAL_SOURCE_EXTENSIONS.has(extension)) { + continue; + } + + const source = await fs.readFile(filePath, "utf8"); + for (const pattern of FORBIDDEN_EVAL_PATTERNS) { + const lineNumbers = findPatternLineNumbers(source, pattern); + if (lineNumbers.length === 0) { + continue; + } + violations.push( + `Forbidden eval boundary pattern "${pattern.source}" in ${toRelative(filePath, reportRoot)} at line(s): ${lineNumbers.join(", ")}`, + ); + } + } + + return violations; +} + +async function checkIntegrationSources(integrationRoot, reportRoot) { + if (!(await pathExists(integrationRoot))) { + return []; + } + + const violations = []; + const files = await listFilesRecursive(integrationRoot); + const testFiles = files.filter(isTestFile); + + for (const filePath of testFiles) { + const source = await fs.readFile(filePath, "utf8"); + const relativePath = toRelative(filePath, reportRoot); + for (const mock of findViModuleMocks(source)) { + violations.push( + `Forbidden integration module mock "${mock.moduleName}" in ${relativePath}:${mock.lineNumber}. Integration tests must use real runtime wiring and fake deterministic agent/model output only through explicit composition or named harness ports.`, + ); + } + } + + return violations; +} + +async function checkObservabilityBoundaries(testRoot, reportRoot) { + if (!(await pathExists(testRoot))) { + return []; + } + + const violations = []; + const files = await listFilesRecursive(testRoot); + const testFiles = files.filter(isTestFile); + + for (const filePath of testFiles) { + const source = await fs.readFile(filePath, "utf8"); + const relativePath = toRelative(filePath, reportRoot); + if (isLoggingContractTestPath(relativePath)) { + continue; + } + + for (const mock of findViModuleMocks(source)) { + if (!isObservabilitySideEffectMock(mock)) { + continue; + } + violations.push( + `Forbidden observability module mock "${mock.moduleName}" in ${relativePath}:${mock.lineNumber}. Observability mocks belong only in rare logging contract tests under tests/unit/logging/**.`, + ); + } + + for (const match of findPatternMatches( + source, + OBSERVABILITY_ASSERTION_PATTERN, + )) { + violations.push( + `Forbidden observability assertion in ${relativePath}:${match.lineNumber}. Telemetry assertions belong only in rare logging contract tests under tests/unit/logging/**.`, + ); + } + } + + return violations; +} + +/** Return all boundary violations across Junior tests and evals. */ +export async function runTestBoundaryCheck(roots = {}) { + const resolvedRoots = { + ...defaultBoundaryCheckRoots(), + ...roots, + }; + return [ + ...(await checkMswDirectory( + resolvedRoots.mswRoot, + resolvedRoots.reportRoot, + )), + ...(await checkEvalSources( + resolvedRoots.evalsRoot, + resolvedRoots.reportRoot, + )), + ...(await checkIntegrationSources( + resolvedRoots.integrationRoot, + resolvedRoots.reportRoot, + )), + ...(await checkObservabilityBoundaries( + resolvedRoots.testRoot, + resolvedRoots.reportRoot, + )), + ...(await checkObservabilityBoundaries( + resolvedRoots.evalTestsRoot, + resolvedRoots.reportRoot, + )), + ]; +} + +async function main() { + const violations = await runTestBoundaryCheck(); + + if (violations.length > 0) { + console.error("Test boundary check failed:"); + for (const violation of violations) { + console.error(`- ${violation}`); + } + process.exit(1); + } + + console.log("Test boundary check passed."); +} + +if (process.argv[1] && path.resolve(process.argv[1]) === scriptPath) { + await main(); +} diff --git a/packages/junior/src/app.ts b/packages/junior/src/app.ts index 40303f927..7f08c1772 100644 --- a/packages/junior/src/app.ts +++ b/packages/junior/src/app.ts @@ -376,17 +376,17 @@ export async function createApp(options?: JuniorAppOptions): Promise { } const waitUntil = options?.waitUntil ?? (await defaultWaitUntil()); - const runtimeServiceOverrides = { - sandbox: { - tracePropagation: { domains: sandboxEgressTracePropagationDomains }, + const runtimeScenarioAdapters = { + sandboxTracePropagation: { + domains: sandboxEgressTracePropagationDomains, }, }; const slackWebhookServices = createProductionSlackWebhookServices({ - services: runtimeServiceOverrides, + adapters: runtimeScenarioAdapters, }); const generateReplyWithTracePropagation = withSandboxTracePropagation( generateAssistantReply, - runtimeServiceOverrides.sandbox.tracePropagation, + runtimeScenarioAdapters.sandboxTracePropagation, ); const app = new Hono(); @@ -442,7 +442,7 @@ export async function createApp(options?: JuniorAppOptions): Promise { conversationWorkOptions ??= options?.conversationWork ?? createProductionConversationWorkOptions({ - services: runtimeServiceOverrides, + adapters: runtimeScenarioAdapters, }); return conversationWorkOptions; }; diff --git a/packages/junior/src/chat/app/factory.ts b/packages/junior/src/chat/app/factory.ts index eaa37a655..d63814c8a 100644 --- a/packages/junior/src/chat/app/factory.ts +++ b/packages/junior/src/chat/app/factory.ts @@ -6,7 +6,7 @@ import { type SlackTurnRuntime, } from "@/chat/runtime/slack-runtime"; import { createJuniorRuntimeServices } from "@/chat/app/services"; -import type { JuniorRuntimeServiceOverrides } from "@/chat/app/services"; +import type { JuniorRuntimeScenarioAdapters } from "@/chat/app/services"; import { coerceThreadConversationState } from "@/chat/state/conversation"; import { coerceThreadArtifactsState } from "@/chat/state/artifacts"; import { logException, logWarn, withSpan } from "@/chat/logging"; @@ -43,9 +43,9 @@ import type { SubscribedReplyDecision } from "@/chat/services/subscribed-reply-p import { botConfig } from "@/chat/config"; export interface CreateSlackRuntimeOptions { + adapters?: JuniorRuntimeScenarioAdapters; getSlackAdapter: () => SlackAdapter; now?: () => number; - services?: JuniorRuntimeServiceOverrides; } async function persistAssistantContextChannelId(args: { @@ -97,10 +97,11 @@ function upsertSkippedConversationMessage( }); } +/** Build a Slack runtime with production wiring plus optional scenario adapters. */ export function createSlackRuntime( options: CreateSlackRuntimeOptions, ): SlackTurnRuntime { - const services = createJuniorRuntimeServices(options.services); + const services = createJuniorRuntimeServices(options.adapters); const prepareTurnState = createPrepareTurnState({ compactConversationIfNeeded: services.conversationMemory.compactConversationIfNeeded, diff --git a/packages/junior/src/chat/app/production.ts b/packages/junior/src/chat/app/production.ts index 1e483da35..70eeee937 100644 --- a/packages/junior/src/chat/app/production.ts +++ b/packages/junior/src/chat/app/production.ts @@ -15,7 +15,7 @@ import { createSlackConversationWorker } from "@/chat/task-execution/slack-work" import { getVercelConversationWorkQueue } from "@/chat/task-execution/vercel-queue"; import type { VercelConversationWorkCallbackOptions } from "@/chat/task-execution/vercel-callback"; import { resumeAwaitingSlackContinuation } from "@/chat/runtime/agent-continue-runner"; -import type { JuniorRuntimeServiceOverrides } from "@/chat/app/services"; +import type { JuniorRuntimeScenarioAdapters } from "@/chat/app/services"; import { generateAssistantReply } from "@/chat/respond"; import { getConfiguredConversationStore } from "@/chat/conversations/configured"; import type { ConversationStore } from "@/chat/conversations/store"; @@ -65,12 +65,12 @@ export function getProductionConversationStore(): ConversationStore { /** Create production-backed services for Slack webhook ingress. */ export function createProductionSlackWebhookServices(options?: { - services?: JuniorRuntimeServiceOverrides; + adapters?: JuniorRuntimeScenarioAdapters; }): SlackWebhookServices { const conversationStore = getProductionConversationStore(); const runtime = createSlackRuntime({ + adapters: options?.adapters, getSlackAdapter: getProductionSlackAdapter, - services: options?.services, }); return { getSlackAdapter: getProductionSlackAdapter, @@ -95,12 +95,12 @@ export function getProductionSlackWebhookServices(): SlackWebhookServices { /** Return the production queue callback options for conversation work. */ export function createProductionConversationWorkOptions(options?: { - services?: JuniorRuntimeServiceOverrides; + adapters?: JuniorRuntimeScenarioAdapters; }): VercelConversationWorkCallbackOptions { const conversationStore = getProductionConversationStore(); const runtime = createSlackRuntime({ + adapters: options?.adapters, getSlackAdapter: getProductionSlackAdapter, - services: options?.services, }); return { conversationStore, @@ -112,7 +112,7 @@ export function createProductionConversationWorkOptions(options?: { await resumeAwaitingSlackContinuation(conversationId, { generateReply: withSandboxTracePropagation( generateAssistantReply, - options?.services?.sandbox?.tracePropagation, + options?.adapters?.sandboxTracePropagation, ), }), runtime, diff --git a/packages/junior/src/chat/app/services.ts b/packages/junior/src/chat/app/services.ts index fa5e697a9..38e21e23a 100644 --- a/packages/junior/src/chat/app/services.ts +++ b/packages/junior/src/chat/app/services.ts @@ -1,13 +1,13 @@ import { completeObject, completeText } from "@/chat/pi/client"; import { generateAssistantReply as generateAssistantReplyImpl, - type AssistantReplyRequestContext, + type ReplyRequestContext, } from "@/chat/respond"; import type { SandboxEgressTracePropagationConfig } from "@/chat/sandbox/egress-tracing"; import { - getAwaitingAgentContinueRequest, - scheduleAgentContinue, -} from "@/chat/services/agent-continue"; + getAwaitingTurnContinuationRequest, + scheduleTurnTimeoutResume, +} from "@/chat/services/timeout-resume"; import { createConversationMemoryService, type ConversationMemoryDeps, @@ -41,15 +41,20 @@ export interface JuniorRuntimeServices { visionContext: VisionContextService; } -export interface JuniorRuntimeServiceOverrides { - conversationMemory?: Partial; - contextCompactor?: Partial; - replyExecutor?: Partial>; - subscribedReplyPolicy?: Partial; - sandbox?: { - tracePropagation?: SandboxEgressTracePropagationConfig; - }; - visionContext?: Partial; +/** Scenario adapters for deterministic runtime tests and evals. */ +export interface JuniorRuntimeScenarioAdapters { + autoCompactionTriggerTokens?: ContextCompactorDeps["autoCompactionTriggerTokens"]; + classifySubscribedReply?: SubscribedReplyPolicyDeps["completeObject"]; + compactConversationText?: ContextCompactorDeps["completeText"]; + describeImagesText?: VisionContextDeps["completeText"]; + downloadSlackFile?: VisionContextDeps["downloadFile"]; + generateAssistantReply?: ReplyExecutorServices["generateAssistantReply"]; + generateThreadTitleText?: ConversationMemoryDeps["completeText"]; + getAwaitingTurnContinuationRequest?: ReplyExecutorServices["getAwaitingAgentContinueRequest"]; + listThreadReplies?: VisionContextDeps["listThreadReplies"]; + lookupSlackUser?: ReplyExecutorServices["lookupSlackUser"]; + sandboxTracePropagation?: SandboxEgressTracePropagationConfig; + scheduleTurnTimeoutResume?: ReplyExecutorServices["scheduleAgentContinue"]; } /** Apply app-owned sandbox egress trace config unless a turn overrides it. */ @@ -57,60 +62,54 @@ export function withSandboxTracePropagation( generateReply: typeof generateAssistantReplyImpl, tracePropagation?: SandboxEgressTracePropagationConfig, ): typeof generateAssistantReplyImpl { - return async (messageText: string, context: AssistantReplyRequestContext) => + return async (messageText: string, context: ReplyRequestContext) => await generateReply(messageText, { ...context, sandbox: { - ...context?.sandbox, - tracePropagation: - context?.sandbox?.tracePropagation ?? tracePropagation, + ...context.sandbox, + tracePropagation: context.sandbox?.tracePropagation ?? tracePropagation, }, }); } +/** Compose the concrete service set used by the Slack runtime. */ export function createJuniorRuntimeServices( - overrides: JuniorRuntimeServiceOverrides = {}, + adapters: JuniorRuntimeScenarioAdapters = {}, ): JuniorRuntimeServices { const conversationMemory = createConversationMemoryService({ - completeText: overrides.conversationMemory?.completeText ?? completeText, + completeText: adapters.generateThreadTitleText ?? completeText, }); const contextCompactor = createContextCompactor({ - completeText: overrides.contextCompactor?.completeText ?? completeText, - autoCompactionTriggerTokens: - overrides.contextCompactor?.autoCompactionTriggerTokens, + completeText: adapters.compactConversationText ?? completeText, + autoCompactionTriggerTokens: adapters.autoCompactionTriggerTokens, }); const visionContext = createVisionContextService({ - completeText: overrides.visionContext?.completeText ?? completeText, - listThreadReplies: - overrides.visionContext?.listThreadReplies ?? listThreadReplies, - downloadFile: - overrides.visionContext?.downloadFile ?? downloadPrivateSlackFile, + completeText: adapters.describeImagesText ?? completeText, + listThreadReplies: adapters.listThreadReplies ?? listThreadReplies, + downloadFile: adapters.downloadSlackFile ?? downloadPrivateSlackFile, }); return { conversationMemory, contextCompactor, replyExecutor: { - contextCompactor: - overrides.replyExecutor?.contextCompactor ?? contextCompactor, + contextCompactor, generateAssistantReply: - overrides.replyExecutor?.generateAssistantReply ?? + adapters.generateAssistantReply ?? withSandboxTracePropagation( generateAssistantReplyImpl, - overrides.sandbox?.tracePropagation, + adapters.sandboxTracePropagation, ), getAwaitingAgentContinueRequest: - overrides.replyExecutor?.getAwaitingAgentContinueRequest ?? - getAwaitingAgentContinueRequest, - lookupSlackUser: - overrides.replyExecutor?.lookupSlackUser ?? lookupSlackUser, + adapters.getAwaitingTurnContinuationRequest ?? + getAwaitingTurnContinuationRequest, + lookupSlackUser: adapters.lookupSlackUser ?? lookupSlackUser, scheduleAgentContinue: - overrides.replyExecutor?.scheduleAgentContinue ?? scheduleAgentContinue, + adapters.scheduleTurnTimeoutResume ?? scheduleTurnTimeoutResume, generateThreadTitle: conversationMemory.generateThreadTitle, }, subscribedReplyPolicy: createSubscribedReplyPolicy({ - completeObject: - overrides.subscribedReplyPolicy?.completeObject ?? completeObject, + completeObject: adapters.classifySubscribedReply ?? completeObject, }), visionContext, }; diff --git a/packages/junior/src/chat/capabilities/catalog.ts b/packages/junior/src/chat/capabilities/catalog.ts index 8b19e48a0..09e7aa52b 100644 --- a/packages/junior/src/chat/capabilities/catalog.ts +++ b/packages/junior/src/chat/capabilities/catalog.ts @@ -45,12 +45,10 @@ function cloneProviderDefinition( }; } -/** Build (and cache) the capability catalog from registered plugins. */ -function getCapabilityCatalog() { - const signature = getPluginCatalogSignature(); - if (cachedCatalog?.signature === signature) return cachedCatalog; - - const providers = getPluginCapabilityProviders(); +function buildCapabilityCatalog( + signature: string, + providers: CapabilityProviderDefinition[], +): NonNullable { const capabilityToProvider = new Map(); for (const provider of providers) { @@ -64,10 +62,22 @@ function getCapabilityCatalog() { } } - cachedCatalog = { signature, providers, capabilityToProvider }; + return { signature, providers, capabilityToProvider }; +} + +/** Build (and cache) the capability catalog from registered plugins. */ +function getCapabilityCatalog() { + const signature = getPluginCatalogSignature(); + if (cachedCatalog?.signature === signature) return cachedCatalog; + + cachedCatalog = buildCapabilityCatalog( + signature, + getPluginCapabilityProviders(), + ); return cachedCatalog; } +/** Return the plugin provider that owns a capability. */ export function getCapabilityProvider( capability: string, ): CapabilityProviderDefinition | undefined { @@ -75,10 +85,12 @@ export function getCapabilityProvider( return provider ? cloneProviderDefinition(provider) : undefined; } +/** Check whether a capability is registered by any plugin provider. */ export function isKnownCapability(capability: string): boolean { return getCapabilityCatalog().capabilityToProvider.has(capability); } +/** List all registered capability providers. */ export function listCapabilityProviders(): CapabilityProviderDefinition[] { return getCapabilityCatalog().providers.map(cloneProviderDefinition); } diff --git a/packages/junior/src/chat/local/runner.ts b/packages/junior/src/chat/local/runner.ts index e35d64e46..2002d7e94 100644 --- a/packages/junior/src/chat/local/runner.ts +++ b/packages/junior/src/chat/local/runner.ts @@ -11,10 +11,8 @@ import { type AssistantReply, } from "@/chat/respond"; import { THREAD_STATE_TTL_MS } from "chat"; -import { - stripRuntimeTurnContext, - trimTrailingAssistantMessages, -} from "@/chat/respond-helpers"; +import { stripRuntimeTurnContext } from "@/chat/respond/runtime-turn-context"; +import { trimTrailingAssistantMessages } from "@/chat/respond/pi-messages"; import { buildDeliveredTurnStatePatch } from "@/chat/runtime/delivered-turn-state"; import { getPersistedSandboxState, diff --git a/packages/junior/src/chat/mcp/oauth-provider.ts b/packages/junior/src/chat/mcp/oauth-provider.ts index 2516f2b42..9771d53c0 100644 --- a/packages/junior/src/chat/mcp/oauth-provider.ts +++ b/packages/junior/src/chat/mcp/oauth-provider.ts @@ -19,6 +19,15 @@ import { type McpAuthSessionState, } from "./auth-store"; +type McpOAuthSessionContext = Omit< + McpAuthSessionState, + | "authSessionId" + | "authorizationUrl" + | "codeVerifier" + | "createdAtMs" + | "updatedAtMs" +>; + function createClientMetadata(callbackUrl: string): OAuthClientMetadata { return { client_name: "Junior MCP Client", @@ -29,20 +38,14 @@ function createClientMetadata(callbackUrl: string): OAuthClientMetadata { }; } +/** OAuth client provider backed by Junior's MCP auth-session state store. */ export class StateBackedMcpOAuthClientProvider implements OAuthClientProvider { readonly clientMetadata: OAuthClientMetadata; constructor( readonly authSessionId: string, private readonly callbackUrl: string, - private readonly sessionContext?: Omit< - McpAuthSessionState, - | "authSessionId" - | "authorizationUrl" - | "codeVerifier" - | "createdAtMs" - | "updatedAtMs" - >, + private readonly sessionContext?: McpOAuthSessionContext, ) { this.clientMetadata = createClientMetadata(callbackUrl); } diff --git a/packages/junior/src/chat/mcp/oauth.ts b/packages/junior/src/chat/mcp/oauth.ts index 164031735..355a229ae 100644 --- a/packages/junior/src/chat/mcp/oauth.ts +++ b/packages/junior/src/chat/mcp/oauth.ts @@ -13,6 +13,7 @@ import { } from "./auth-store"; import { StateBackedMcpOAuthClientProvider } from "./oauth-provider"; +/** Return the callback path registered for an MCP provider OAuth flow. */ export function getMcpOAuthCallbackPath(provider: string): string { return `/api/oauth/callback/mcp/${provider}`; } @@ -25,6 +26,7 @@ function requirePluginWithMcp(provider: string): PluginDefinition { return plugin; } +/** Create the state-backed OAuth provider used by MCP clients during auth pause/resume. */ export async function createMcpOAuthClientProvider(input: { provider: string; conversationId: string; @@ -102,6 +104,7 @@ export async function createMcpOAuthClientProvider(input: { ); } +/** Finish the MCP OAuth code exchange and return the updated auth session. */ export async function finalizeMcpAuthorization( provider: string, authSessionId: string, diff --git a/packages/junior/src/chat/mcp/tool-manager.ts b/packages/junior/src/chat/mcp/tool-manager.ts index 312feb7a4..8c5651e46 100644 --- a/packages/junior/src/chat/mcp/tool-manager.ts +++ b/packages/junior/src/chat/mcp/tool-manager.ts @@ -23,6 +23,7 @@ import type { PluginDefinition } from "@/chat/plugins/types"; import { McpAuthorizationRequiredError, PluginMcpClient, + type PluginMcpClientOptions, type PluginMcpListedTool, type PluginMcpToolCallResult, } from "./client"; @@ -163,6 +164,19 @@ export interface McpToolManagerOptions { provider: string, error: McpAuthorizationRequiredError, ) => Promise | boolean | void; + clientFactory?: ( + plugin: PluginDefinition, + options: PluginMcpClientOptions, + ) => McpToolClient; +} + +export interface McpToolClient { + callTool( + name: string, + args: Record | undefined, + ): Promise; + close(): Promise; + listTools(): Promise; } export interface ManagedMcpToolResult { @@ -201,7 +215,7 @@ export class McpToolManager { private readonly pluginsByProvider = new Map(); private readonly activeProviders = new Set(); private readonly authorizationPendingProviders = new Set(); - private readonly clientsByProvider = new Map(); + private readonly clientsByProvider = new Map(); private readonly toolsByProvider = new Map(); constructor( @@ -330,7 +344,7 @@ export class McpToolManager { return tools.filter((tool) => allowedToolSet.has(tool.name)); } - private async getClient(plugin: PluginDefinition): Promise { + private async getClient(plugin: PluginDefinition): Promise { const existing = this.clientsByProvider.get(plugin.manifest.name); if (existing) { return existing; @@ -339,17 +353,20 @@ export class McpToolManager { const authProvider = this.options.authProviderFactory ? await this.options.authProviderFactory(plugin) : undefined; - const client = new PluginMcpClient(plugin, { + const clientOptions = { ...(authProvider ? { authProvider } : {}), ...(this.options.fetch ? { fetch: this.options.fetch } : {}), - }); + } satisfies PluginMcpClientOptions; + const client = this.options.clientFactory + ? this.options.clientFactory(plugin, clientOptions) + : new PluginMcpClient(plugin, clientOptions); this.clientsByProvider.set(plugin.manifest.name, client); return client; } private toManagedTool( plugin: PluginDefinition, - client: PluginMcpClient, + client: McpToolClient, tool: PluginMcpListedTool, ): ManagedMcpTool { const outputSchema = toOptionalRecord(tool.outputSchema); diff --git a/packages/junior/src/chat/respond-helpers.ts b/packages/junior/src/chat/respond-helpers.ts deleted file mode 100644 index 246237ff3..000000000 --- a/packages/junior/src/chat/respond-helpers.ts +++ /dev/null @@ -1,417 +0,0 @@ -/** - * Pure helper functions used by the agent reply orchestration in respond.ts. - * - * These are extracted to reduce the size of the main orchestration module and - * make individual helpers independently testable. - */ -import type { - AssistantMessage, - ToolResultMessage, -} from "@earendil-works/pi-ai"; -import type { PiMessage } from "@/chat/pi/messages"; -import type { Skill } from "@/chat/skills"; -import { TURN_CONTEXT_TAG } from "@/chat/turn-context-tag"; - -const MAX_INLINE_ATTACHMENT_BASE64_CHARS = 120_000; -const RUNTIME_TURN_CONTEXT_START = `<${TURN_CONTEXT_TAG}>`; - -/** Extract conversation and session identifiers from correlation context. */ -export function getSessionIdentifiers(context: { - correlation?: { - conversationId?: string; - threadId?: string; - turnId?: string; - runId?: string; - }; -}): { - conversationId?: string; - sessionId?: string; -} { - return { - conversationId: - context.correlation?.conversationId ?? - context.correlation?.threadId ?? - context.correlation?.runId, - sessionId: context.correlation?.turnId, - }; -} - -/** Detect polite execution deferral phrases that signal the model is stalling. */ -export function isExecutionDeferralResponse(text: string): boolean { - return /\b(want me to proceed|do you want me to proceed|shall i proceed|can i proceed|should i proceed|let me do that now|give me a moment|tag me again|fresh invocation)\b/i.test( - text, - ); -} - -/** Detect disclaimers about missing tool access. */ -export function isToolAccessDisclaimerResponse(text: string): boolean { - return /\b(i (don't|do not) have access to (active )?tool|tool results came back empty|prior results .* empty|cannot access .*tool|need to (run|load) .*tool .* first)\b/i.test( - text, - ); -} - -/** True when the model produced an escape response instead of executing. */ -export function isExecutionEscapeResponse(text: string): boolean { - const trimmed = text.trim(); - if (!trimmed) return false; - return ( - isExecutionDeferralResponse(trimmed) || - isToolAccessDisclaimerResponse(trimmed) - ); -} - -/** Best-effort JSON extraction from text that may contain fenced blocks. */ -export function parseJsonCandidate(text: string): unknown { - const trimmed = text.trim(); - if (!trimmed) return undefined; - - try { - return JSON.parse(trimmed) as unknown; - } catch { - const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); - if (!fenced) return undefined; - try { - return JSON.parse(fenced[1]) as unknown; - } catch { - return undefined; - } - } -} - -/** Check whether a parsed object looks like a raw tool call/result payload. */ -export function isToolPayloadShape(payload: unknown): boolean { - if (!payload || typeof payload !== "object") return false; - const record = payload as Record; - - const type = typeof record.type === "string" ? record.type.toLowerCase() : ""; - if (type.startsWith("tool-")) return true; - if ( - type === "tool_use" || - type === "tool_call" || - type === "tool_result" || - type === "tool_error" - ) - return true; - - const hasToolName = - typeof record.toolName === "string" || typeof record.name === "string"; - const hasToolInput = - Object.prototype.hasOwnProperty.call(record, "input") || - Object.prototype.hasOwnProperty.call(record, "args"); - if (hasToolName && hasToolInput) return true; - - return false; -} - -/** Detect responses that are raw tool payloads leaked as text. */ -export function isRawToolPayloadResponse(text: string): boolean { - const parsed = parseJsonCandidate(text); - if (Array.isArray(parsed)) { - return parsed.some((entry) => isToolPayloadShape(entry)); - } - if (isToolPayloadShape(parsed)) { - return true; - } - - const compact = text.replace(/\s+/g, " "); - return /"type"\s*:\s*"tool[-_](use|call|result|error)"/i.test(compact); -} - -/** Redact image data from prompt content parts for observability. */ -export function toObservablePromptPart( - part: - | { type: "text"; text: string } - | { type: "image"; data: string; mimeType: string }, -): Record { - if (part.type === "text") { - return { - type: "text", - text: part.text, - }; - } - - return { - type: "image", - mimeType: part.mimeType, - data: `[omitted:${part.data.length}]`, - }; -} - -/** Truncate message text for log attributes. */ -export function summarizeMessageText(text: string): string { - const normalized = text.trim().replace(/\s+/g, " "); - if (!normalized) { - return "[empty]"; - } - return normalized.length > 1_200 - ? `${normalized.slice(0, 1_200)}...` - : normalized; -} - -function isStructuredThreadContext(context: string): boolean { - return /^/.test(context); -} - -function renderThreadContextForPrompt(context: string): string { - if (isStructuredThreadContext(context)) { - return context; - } - return ["", context, ""].join("\n"); -} - -/** - * Put prior thread text before the current instruction when no Pi history - * exists. Structured thread XML is already a top-level prompt block. - */ -export function buildUserTurnText( - userInput: string, - conversationContext?: string, -): string { - const trimmedContext = conversationContext?.trim(); - - if (!trimmedContext) { - return userInput; - } - - return [ - renderThreadContextForPrompt(trimmedContext), - "", - "", - userInput, - "", - ].join("\n"); -} - -/** Encode a non-image attachment as base64 XML for the prompt. */ -export function encodeNonImageAttachmentForPrompt(attachment: { - data: Buffer; - mediaType: string; - filename?: string; -}): string { - const base64 = attachment.data.toString("base64"); - const wasTruncated = base64.length > MAX_INLINE_ATTACHMENT_BASE64_CHARS; - const encodedPayload = wasTruncated - ? `${base64.slice(0, MAX_INLINE_ATTACHMENT_BASE64_CHARS)}...` - : base64; - - return [ - "", - `filename: ${attachment.filename ?? "unnamed"}`, - `media_type: ${attachment.mediaType}`, - "encoding: base64", - `truncated: ${wasTruncated ? "true" : "false"}`, - "", - encodedPayload, - "", - "", - ].join("\n"); -} - -/** Type guard for Pi SDK tool result messages. */ -export function isToolResultMessage( - value: unknown, -): value is ToolResultMessage { - return ( - typeof value === "object" && - value !== null && - (value as { role?: unknown }).role === "toolResult" - ); -} - -/** Extract the tool name from a raw tool result message. */ -export function normalizeToolNameFromResult( - result: unknown, -): string | undefined { - if (!result || typeof result !== "object") return undefined; - const record = result as { toolName?: unknown; name?: unknown }; - if (typeof record.toolName === "string" && record.toolName.length > 0) { - return record.toolName; - } - if (typeof record.name === "string" && record.name.length > 0) { - return record.name; - } - return undefined; -} - -/** Check whether a tool result carries an error flag. */ -export function isToolResultError(result: unknown): boolean { - if (!result || typeof result !== "object") return false; - return Boolean((result as { isError?: unknown }).isError); -} - -/** Type guard for Pi SDK assistant messages. */ -export function isAssistantMessage(value: unknown): value is AssistantMessage { - return ( - typeof value === "object" && - value !== null && - (value as { role?: unknown }).role === "assistant" - ); -} - -/** Extract role string from a raw Pi message. */ -export function getPiMessageRole(value: unknown): string | undefined { - if (!value || typeof value !== "object") { - return undefined; - } - const role = (value as { role?: unknown }).role; - return typeof role === "string" ? role : undefined; -} - -function getUserMessageContent(message: PiMessage): unknown[] | undefined { - const record = message as { role?: unknown; content?: unknown }; - return record.role === "user" && Array.isArray(record.content) - ? record.content - : undefined; -} - -function isRuntimeTurnContextPart(part: unknown, marker: string): boolean { - return ( - part !== null && - typeof part === "object" && - (part as { type?: unknown }).type === "text" && - typeof (part as { text?: unknown }).text === "string" && - (part as { text: string }).text.startsWith(marker) - ); -} - -function prependRuntimeTurnContext( - message: PiMessage, - turnContextPrompt: string, -): PiMessage | undefined { - const content = getUserMessageContent(message); - if (!content) { - return undefined; - } - - const contextIndex = content.findIndex((part) => - isRuntimeTurnContextPart(part, RUNTIME_TURN_CONTEXT_START), - ); - if (contextIndex >= 0) { - return undefined; - } - - return { - ...message, - content: [{ type: "text", text: turnContextPrompt }, ...content], - } as PiMessage; -} - -/** - * Add bootstrap context only for stored boundaries captured before prompt(). - */ -export function prependMissingRuntimeTurnContext( - messages: PiMessage[], - turnContextPrompt: string, -): PiMessage[] { - if (hasRuntimeTurnContext(messages)) { - return messages; - } - - for (let index = messages.length - 1; index >= 0; index -= 1) { - const updated = prependRuntimeTurnContext( - messages[index], - turnContextPrompt, - ); - if (!updated) { - continue; - } - - const nextMessages = [...messages]; - nextMessages[index] = updated; - return nextMessages; - } - - return [ - ...messages, - { - role: "user", - content: [{ type: "text", text: turnContextPrompt }], - timestamp: Date.now(), - } as PiMessage, - ]; -} - -/** Return whether Pi history already carries session bootstrap context. */ -export function hasRuntimeTurnContext(messages: PiMessage[]): boolean { - return messages.some((message) => - getUserMessageContent(message)?.some((part) => - isRuntimeTurnContextPart(part, RUNTIME_TURN_CONTEXT_START), - ), - ); -} - -/** Remove volatile runtime context before reusing messages as history. */ -export function stripRuntimeTurnContext(messages: PiMessage[]): PiMessage[] { - return messages.flatMap((message) => { - const content = getUserMessageContent(message); - if (!content) { - return [message]; - } - - const nextContent = content.filter( - (part) => !isRuntimeTurnContextPart(part, RUNTIME_TURN_CONTEXT_START), - ); - if (nextContent.length === content.length) { - return [message]; - } - if (nextContent.length === 0) { - return []; - } - return [{ ...message, content: nextContent } as PiMessage]; - }); -} - -/** Concatenate text content parts from an assistant message. */ -export function extractAssistantText(message: AssistantMessage): string { - const content = - (message as { content?: Array<{ type?: unknown; text?: unknown }> }) - .content ?? []; - return content - .filter( - (part): part is { type: "text"; text: string } => - part.type === "text" && typeof part.text === "string", - ) - .map((part) => part.text) - .join("\n"); -} - -/** Return assistant messages that belong to the terminal post-tool reply phase. */ -export function getTerminalAssistantMessages( - messages: readonly unknown[], -): AssistantMessage[] { - let lastToolResultIndex = -1; - for (let index = messages.length - 1; index >= 0; index -= 1) { - if (isToolResultMessage(messages[index])) { - lastToolResultIndex = index; - break; - } - } - - return messages.slice(lastToolResultIndex + 1).filter(isAssistantMessage); -} - -/** Upsert a skill into the active skills list by name. */ -export function upsertActiveSkill(activeSkills: Skill[], next: Skill): void { - const existing = activeSkills.find((skill) => skill.name === next.name); - if (existing) { - existing.body = next.body; - existing.description = next.description; - existing.skillPath = next.skillPath; - existing.allowedTools = next.allowedTools; - existing.pluginProvider = next.pluginProvider; - return; - } - - activeSkills.push(next); -} - -/** Remove trailing assistant messages before committing a resumable boundary. */ -export function trimTrailingAssistantMessages( - messages: PiMessage[], -): PiMessage[] { - let end = messages.length; - while (end > 0 && getPiMessageRole(messages[end - 1]) === "assistant") { - end -= 1; - } - return end === messages.length ? [...messages] : messages.slice(0, end); -} diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts index 78f825187..7f00b7d63 100644 --- a/packages/junior/src/chat/respond.ts +++ b/packages/junior/src/chat/respond.ts @@ -7,7 +7,11 @@ * and persists resumable checkpoints. Slack delivery and thread presentation * should stay outside this file. */ -import { Agent, type AgentTool } from "@earendil-works/pi-agent-core"; +import { + Agent, + type AgentTool, + type StreamFn, +} from "@earendil-works/pi-agent-core"; import type { Destination, Source } from "@sentry/junior-plugin-api"; import { THREAD_STATE_TTL_MS, type FileUpload } from "chat"; import { botConfig } from "@/chat/config"; @@ -41,7 +45,10 @@ import { getPluginProviders, } from "@/chat/plugins/registry"; import { createAgentPluginHookRunner } from "@/chat/plugins/agent-hooks"; -import { McpToolManager } from "@/chat/mcp/tool-manager"; +import { + McpToolManager, + type McpToolManagerOptions, +} from "@/chat/mcp/tool-manager"; import { inferActiveMcpProvidersFromPiMessages, inferLoadedSkillNamesFromPiMessages, @@ -76,9 +83,10 @@ import { createSandboxExecutor, type SandboxAcquiredState, type SandboxExecutor, + type SandboxExecutorFactory, } from "@/chat/sandbox/sandbox"; import type { SandboxEgressTracePropagationConfig } from "@/chat/sandbox/egress-tracing"; -import type { SandboxWorkspace } from "@/chat/sandbox/workspace"; +import { createLazySandboxWorkspace } from "@/chat/sandbox/lazy-workspace"; import { shouldEmitDevAgentTrace } from "@/chat/runtime/dev-agent-trace"; import type { AssistantStatusSpec } from "@/chat/slack/assistant-thread/status"; import type { SlackConversationContext } from "@/chat/slack/conversation-context"; @@ -92,16 +100,21 @@ import { isRetryableTurnError, } from "@/chat/runtime/turn"; import { + buildSteeringPiMessage, buildUserTurnText, - encodeNonImageAttachmentForPrompt, - getSessionIdentifiers, + buildUserTurnInput, + type ReplyRequestAttachment, + toObservablePromptPart, + type UserTurnContentPart, +} from "@/chat/respond/user-turn-input"; +import { getSessionIdentifiers } from "@/chat/respond/session-identifiers"; +import { hasRuntimeTurnContext, - isAssistantMessage, prependMissingRuntimeTurnContext, - summarizeMessageText, - toObservablePromptPart, - upsertActiveSkill, -} from "@/chat/respond-helpers"; +} from "@/chat/respond/runtime-turn-context"; +import { isAssistantMessage } from "@/chat/respond/pi-messages"; +import { summarizeMessageText } from "@/chat/respond/reply-output-guards"; +import { upsertActiveSkill } from "@/chat/respond/active-skills"; import { buildTurnResult, type AssistantReply, @@ -153,9 +166,93 @@ import { // Re-export types for backward compatibility with existing consumers. export type { AssistantReply, AgentTurnDiagnostics }; +export type { ReplyRequestAttachment }; const AGENT_ABORT_SETTLE_GRACE_MS = 5_000; +type ReplyAgentOptions = { + getApiKey: () => string | undefined; + initialState: { + model: unknown; + systemPrompt: string; + thinkingLevel?: unknown; + tools: AgentTool[]; + }; + prepareNextTurn: () => Promise | unknown; + steeringMode: "all"; + streamFn: StreamFn; +}; + +type ReplyAgent = { + abort(): void; + continue(): Promise; + prompt(message: unknown): Promise; + state: { + messages: PiMessage[]; + model: unknown; + systemPrompt: string; + tools: unknown[]; + }; + steer(message: unknown): void; + subscribe( + listener: ( + event: + | { toolResults: unknown[]; type: "turn_end" } + | { type: "message_start" } + | { + assistantMessageEvent: { + delta?: string; + type?: string; + }; + type: "message_update"; + }, + ) => void | Promise, + ): () => void; +}; + +/** Services that host-owned runtime harnesses may replace while keeping respond wiring real. */ +export interface ReplyRuntimeServices { + createMcpAuthOrchestration: typeof createMcpAuthOrchestration; + discoverSkills: typeof discoverSkills; + findSkillByName: typeof findSkillByName; + getConfigDefaults: typeof getConfigDefaults; + getPluginMcpProviders: typeof getPluginMcpProviders; + getPluginProviders: typeof getPluginProviders; + parseSkillInvocation: typeof parseSkillInvocation; +} + +/** Host-owned execution ports for deterministic component, integration, and eval harnesses. */ +export interface ReplyRuntimeHarness { + /** Override the Pi model transport when a host owns deterministic execution. */ + streamFn?: StreamFn; + /** Override Pi Agent construction for controlled runtime harnesses. */ + agentFactory?: (options: ReplyAgentOptions) => ReplyAgent; + /** Override sandbox execution for controlled runtime hosts. */ + sandboxExecutorFactory?: SandboxExecutorFactory; + /** Override MCP client construction for controlled runtime harnesses. */ + mcpClientFactory?: McpToolManagerOptions["clientFactory"]; + /** Override runtime discovery/auth services for controlled runtime harnesses. */ + runtimeServices?: ReplyRuntimeServices; + /** Reuse a preselected reasoning level when routing already made that choice. */ + turnThinkingSelection?: TurnThinkingSelection; +} + +const defaultReplyRuntimeServices: ReplyRuntimeServices = { + createMcpAuthOrchestration, + discoverSkills, + findSkillByName, + getConfigDefaults, + getPluginMcpProviders, + getPluginProviders, + parseSkillInvocation, +}; + +function createDefaultReplyAgent(options: ReplyAgentOptions): ReplyAgent { + return new Agent( + options as ConstructorParameters[0], + ) as ReplyAgent; +} + function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } @@ -233,6 +330,8 @@ export interface ReplyRequestContext { /** Per-turn override for app-owned sandbox egress trace propagation. */ tracePropagation?: SandboxEgressTracePropagationConfig; }; + /** Deterministic execution ports owned by component, integration, or eval harnesses. */ + harness?: ReplyRuntimeHarness; onSandboxAcquired?: (sandbox: SandboxAcquiredState) => void | Promise; onArtifactStateUpdated?: ( artifactState: ThreadArtifactsState, @@ -262,13 +361,6 @@ export interface ReplyRequestContext { export type AssistantReplyRequestContext = ReplyRequestContext; -export interface ReplyRequestAttachment { - data?: Buffer; - mediaType: string; - filename?: string; - promptText?: string; -} - export interface ReplySteeringMessage { omittedImageAttachmentCount?: number; text: string; @@ -277,37 +369,6 @@ export interface ReplySteeringMessage { } let startupDiscoveryLogged = false; -const MAX_ROUTER_ATTACHMENT_PREVIEW_CHARS = 2_000; - -type UserTurnContentPart = - | { type: "text"; text: string } - | { type: "image"; data: string; mimeType: string }; - -type UserTurnAttachment = NonNullable< - ReplyRequestContext["userAttachments"] ->[number]; - -function buildOmittedImageAttachmentNotice(count: number): string { - return [ - "", - `count: ${count}`, - "Slack included image attachments with this turn, but this runtime cannot analyze images because no vision model is configured.", - "Do not claim that no image was attached.", - "If the user asks about image contents, explain that image analysis is unavailable in this runtime and continue with any text or non-image files that are still available.", - "", - ].join("\n"); -} - -function trimRouterAttachmentText(text: string): string { - const normalized = text.replaceAll("\0", " ").trim(); - if (!normalized) { - return ""; - } - return normalized.length <= MAX_ROUTER_ATTACHMENT_PREVIEW_CHARS - ? normalized - : `${normalized.slice(0, MAX_ROUTER_ATTACHMENT_PREVIEW_CHARS)}...`; -} - function extractSliceUsage( messages: PiMessage[], beforeMessageCount: number, @@ -444,122 +505,6 @@ function surfaceFromContext( return undefined; } -function supportsRouterTextPreview(mediaType: string): boolean { - const baseMediaType = mediaType.split(";", 1)[0]?.trim().toLowerCase(); - if (!baseMediaType) { - return false; - } - return ( - baseMediaType.startsWith("text/") || - baseMediaType === "application/json" || - baseMediaType === "application/xml" || - baseMediaType === "application/x-www-form-urlencoded" || - baseMediaType.endsWith("+json") || - baseMediaType.endsWith("+xml") - ); -} - -function buildRouterAttachmentBlock(attachment: UserTurnAttachment): string { - if (attachment.promptText) { - return trimRouterAttachmentText(attachment.promptText); - } - - const header = [ - "", - `filename: ${attachment.filename ?? "unnamed"}`, - `media_type: ${attachment.mediaType}`, - ]; - - if (attachment.data && supportsRouterTextPreview(attachment.mediaType)) { - const preview = trimRouterAttachmentText(attachment.data.toString("utf8")); - if (preview) { - return [ - ...header, - "", - preview, - "", - "", - ].join("\n"); - } - } - - return [...header, ""].join("\n"); -} - -function buildUserTurnInput(args: { - omittedImageAttachmentCount: number; - userAttachments?: ReplyRequestContext["userAttachments"]; - userTurnText: string; -}): { - routerBlocks: string[]; - userContentParts: UserTurnContentPart[]; -} { - const routerBlocks: string[] = []; - const userContentParts: UserTurnContentPart[] = [ - { type: "text", text: args.userTurnText }, - ]; - - if (args.omittedImageAttachmentCount > 0) { - const omittedImagesNotice = buildOmittedImageAttachmentNotice( - args.omittedImageAttachmentCount, - ); - userContentParts.push({ type: "text", text: omittedImagesNotice }); - routerBlocks.push(omittedImagesNotice); - } - - for (const attachment of args.userAttachments ?? []) { - routerBlocks.push(buildRouterAttachmentBlock(attachment)); - - if (attachment.promptText) { - userContentParts.push({ - type: "text", - text: attachment.promptText, - }); - continue; - } - - if (attachment.mediaType.startsWith("image/")) { - if (!attachment.data) { - throw new Error("Image attachment is missing image data"); - } - userContentParts.push({ - type: "image", - data: attachment.data.toString("base64"), - mimeType: attachment.mediaType, - }); - continue; - } - - if (!attachment.data) { - throw new Error("Attachment is missing attachment data"); - } - - userContentParts.push({ - type: "text", - text: encodeNonImageAttachmentForPrompt({ - data: attachment.data, - mediaType: attachment.mediaType, - filename: attachment.filename, - }), - }); - } - - return { routerBlocks, userContentParts }; -} - -function buildSteeringPiMessage(message: ReplySteeringMessage): PiMessage { - const { userContentParts } = buildUserTurnInput({ - userTurnText: message.text, - userAttachments: message.userAttachments, - omittedImageAttachmentCount: message.omittedImageAttachmentCount ?? 0, - }); - return { - role: "user", - content: userContentParts, - timestamp: message.timestampMs ?? Date.now(), - } as PiMessage; -} - /** Run a full agent turn: discover skills, execute tools, and return the assistant reply. */ export async function generateAssistantReply( messageText: string, @@ -572,6 +517,9 @@ export async function generateAssistantReply( assertCorrelationDestinationMatch(context); const replyStartedAtMs = Date.now(); + const harness = context.harness ?? {}; + const runtimeServices = + harness.runtimeServices ?? defaultReplyRuntimeServices; const configuredTurnDeadlineAtMs = replyStartedAtMs + botConfig.turnTimeoutMs; const contextTurnDeadlineAtMs = typeof context.turnDeadlineAtMs === "number" && @@ -684,12 +632,12 @@ export async function generateAssistantReply( }; // ── Skill discovery ────────────────────────────────────────────── - const availableSkills = await discoverSkills({ + const availableSkills = await runtimeServices.discoverSkills({ additionalRoots: context.skillDirs, }); if (!startupDiscoveryLogged) { startupDiscoveryLogged = true; - const plugins = getPluginProviders(); + const plugins = runtimeServices.getPluginProviders(); const roots = [ ...new Set(availableSkills.map((skill) => skill.skillPath)), ].sort(); @@ -730,9 +678,15 @@ export async function generateAssistantReply( "Agent message received", ); } - const skillInvocation = parseSkillInvocation(userInput, availableSkills); + const skillInvocation = runtimeServices.parseSkillInvocation( + userInput, + availableSkills, + ); const invokedSkill = skillInvocation - ? findSkillByName(skillInvocation.skillName, availableSkills) + ? runtimeServices.findSkillByName( + skillInvocation.skillName, + availableSkills, + ) : null; const activeSkills: Skill[] = []; const syncLoadedSkillNamesForResume = () => { @@ -759,7 +713,7 @@ export async function generateAssistantReply( ? await context.channelConfiguration.resolveValues() : {}; configurationValues = { - ...getConfigDefaults(), + ...runtimeServices.getConfigDefaults(), ...(context.configuration ?? {}), ...persistedConfigurationValues, }; @@ -772,38 +726,39 @@ export async function generateAssistantReply( const agentPluginHooks = createAgentPluginHookRunner({ requester: actorRequester, }); - sandboxExecutor = createSandboxExecutor({ - sandboxId: context.sandbox?.sandboxId, - sandboxDependencyProfileHash: - context.sandbox?.sandboxDependencyProfileHash, - traceContext: spanContext, - tracePropagation: context.sandbox?.tracePropagation, - credentialEgress: context.credentialContext, - agentHooks: agentPluginHooks, - onSandboxAcquired: async (sandbox) => { - lastKnownSandboxId = sandbox.sandboxId; - lastKnownSandboxDependencyProfileHash = - sandbox.sandboxDependencyProfileHash; - await context.onSandboxAcquired?.(sandbox); - }, - runBashCustomCommand: async (command) => { - const result = await maybeExecuteJrRpcCustomCommand(command, { - activeSkill: skillSandbox.getActiveSkill(), - channelConfiguration: context.channelConfiguration, - requesterId: actorRequester?.userId, - onConfigurationValueChanged: (key, value) => { - if (value === undefined) { - delete configurationValues[key]; - return; - } - configurationValues[key] = value; - }, - }); - return result.handled - ? { handled: true, result: result.result } - : { handled: false }; + sandboxExecutor = (harness.sandboxExecutorFactory ?? createSandboxExecutor)( + { + sandboxId: context.sandbox?.sandboxId, + sandboxDependencyProfileHash: + context.sandbox?.sandboxDependencyProfileHash, + traceContext: spanContext, + credentialEgress: context.credentialContext, + agentHooks: agentPluginHooks, + onSandboxAcquired: async (sandbox) => { + lastKnownSandboxId = sandbox.sandboxId; + lastKnownSandboxDependencyProfileHash = + sandbox.sandboxDependencyProfileHash; + await context.onSandboxAcquired?.(sandbox); + }, + runBashCustomCommand: async (command) => { + const result = await maybeExecuteJrRpcCustomCommand(command, { + activeSkill: skillSandbox.getActiveSkill(), + channelConfiguration: context.channelConfiguration, + requesterId: actorRequester?.userId, + onConfigurationValueChanged: (key, value) => { + if (value === undefined) { + delete configurationValues[key]; + return; + } + configurationValues[key] = value; + }, + }); + return result.handled + ? { handled: true, result: result.result } + : { handled: false }; + }, }, - }); + ); const currentSandboxExecutor = sandboxExecutor; sandboxExecutor.configureSkills(availableSkills); sandboxExecutor.configureReferenceFiles(listReferenceFiles()); @@ -819,69 +774,10 @@ export async function generateAssistantReply( }) : [], ); - let sandboxPromise: Promise | undefined; - let sandboxPromiseId: string | undefined; - const clearSandboxPromise = (): void => { - sandboxPromise = undefined; - sandboxPromiseId = undefined; - }; - const getSandbox = (reason: { - trigger: string; - path?: string; - cmd?: string; - cwd?: string; - }): Promise => { - const currentSandboxId = currentSandboxExecutor.getSandboxId(); - if ( - sandboxPromise && - sandboxPromiseId && - currentSandboxId !== sandboxPromiseId - ) { - clearSandboxPromise(); - } - - if (!sandboxPromise) { - logInfo( - "sandbox_boot_requested", - spanContext, - { - "app.sandbox.boot.trigger": reason.trigger, - ...(reason.path ? { "file.path": reason.path } : {}), - ...(reason.cmd ? { "process.executable.name": reason.cmd } : {}), - ...(reason.cwd ? { "file.directory": reason.cwd } : {}), - }, - "Lazy sandbox boot requested", - ); - sandboxPromise = currentSandboxExecutor - .createSandbox() - .then((sandbox) => { - sandboxPromiseId = sandbox.sandboxId; - return sandbox; - }) - .catch((error) => { - clearSandboxPromise(); - throw error; - }); - } - return sandboxPromise; - }; - const sandbox: SandboxWorkspace = { - readFileToBuffer: async (input) => - ( - await getSandbox({ - trigger: "workspace.readFileToBuffer", - path: input.path, - }) - ).readFileToBuffer(input), - runCommand: async (input) => - ( - await getSandbox({ - trigger: "workspace.runCommand", - cmd: input.cmd, - cwd: input.cwd, - }) - ).runCommand(input), - }; + const sandbox = createLazySandboxWorkspace({ + executor: currentSandboxExecutor, + logContext: spanContext, + }); // ── Restore skill runtime handles from durable Pi history ──────── for (const skillName of inferLoadedSkillNamesFromPiMessages( @@ -924,19 +820,21 @@ export async function generateAssistantReply( } as PiMessage, ]; - thinkingSelection = await selectTurnThinkingLevel({ - completeObject, - conversationContext: context.conversationContext, - context: { - threadId: context.correlation?.threadId, - channelId: context.correlation?.channelId, - requesterId: context.correlation?.requesterId, - runId: context.correlation?.runId, - }, - currentTurnBlocks: routerBlocks, - fastModelId: botConfig.fastModelId, - messageText: userInput, - }); + thinkingSelection = + harness.turnThinkingSelection ?? + (await selectTurnThinkingLevel({ + completeObject, + conversationContext: context.conversationContext, + context: { + threadId: context.correlation?.threadId, + channelId: context.correlation?.channelId, + requesterId: context.correlation?.requesterId, + runId: context.correlation?.runId, + }, + currentTurnBlocks: routerBlocks, + fastModelId: botConfig.fastModelId, + messageText: userInput, + })); setSpanAttributes({ "gen_ai.request.model": botConfig.modelId, "app.ai.reasoning_effort": thinkingSelection.thinkingLevel, @@ -955,7 +853,7 @@ export async function generateAssistantReply( const artifactStatePatch: Partial = {}; const toolCalls: string[] = []; let advisorTools: AgentTool[] = []; - let agent: Agent | undefined; + let agent: ReplyAgent | undefined; let latestSafeBoundaryMessages: PiMessage[] = []; const getResumeSnapshot = (): PiMessage[] => { const currentMessages = agent ? [...agent.state.messages] : []; @@ -965,18 +863,12 @@ export async function generateAssistantReply( }; // ── MCP auth orchestration ─────────────────────────────────────── - const slackDestination = - context.destination.platform === "slack" - ? context.destination - : undefined; - const slackChannelId = slackDestination?.channelId; - - const mcpAuth = createMcpAuthOrchestration({ + const mcpAuth = runtimeServices.createMcpAuthOrchestration({ abortAgent: () => agent?.abort(), conversationId: sessionConversationId, sessionId, requesterId: authRequesterId, - channelId: slackChannelId, + channelId: context.correlation?.channelId, destination: context.destination, threadTs: context.correlation?.threadTs, toolChannelId: context.toolChannelId, @@ -994,7 +886,7 @@ export async function generateAssistantReply( conversationId: sessionConversationId, sessionId, requesterId: authRequesterId, - channelId: slackChannelId, + channelId: context.correlation?.channelId, destination: context.destination, threadTs: context.correlation?.threadTs, userMessage: userInput, @@ -1005,10 +897,16 @@ export async function generateAssistantReply( userTokenStore, }); - mcpToolManager = new McpToolManager(getPluginMcpProviders(), { - authProviderFactory: mcpAuth.authProviderFactory, - onAuthorizationRequired: mcpAuth.onAuthorizationRequired, - }); + mcpToolManager = new McpToolManager( + runtimeServices.getPluginMcpProviders(), + { + authProviderFactory: mcpAuth.authProviderFactory, + ...(harness.mcpClientFactory + ? { clientFactory: harness.mcpClientFactory } + : {}), + onAuthorizationRequired: mcpAuth.onAuthorizationRequired, + }, + ); const turnMcpToolManager = mcpToolManager; const getPendingAuthPause = () => pluginAuth.getPendingPause() ?? mcpAuth.getPendingPause(); @@ -1373,9 +1271,10 @@ export async function generateAssistantReply( throw cooperativeYieldError; }; - agent = new Agent({ + agent = (harness.agentFactory ?? createDefaultReplyAgent)({ getApiKey: () => getPiGatewayApiKeyOverride(), - streamFn: createTracedStreamFn({ conversationPrivacy }), + streamFn: + harness.streamFn ?? createTracedStreamFn({ conversationPrivacy }), steeringMode: "all", prepareNextTurn: async () => { await drainSteeringMessages(); @@ -1713,6 +1612,9 @@ export async function generateAssistantReply( logContext: sessionRecordLogContext, requester, ...(surface ? { surface } : {}), + ...(turnStartMessageIndex !== undefined + ? { turnStartMessageIndex } + : {}), }); if (!sessionRecord) { throw new Error( @@ -1741,6 +1643,9 @@ export async function generateAssistantReply( logContext: sessionRecordLogContext, requester, ...(surface ? { surface } : {}), + ...(turnStartMessageIndex !== undefined + ? { turnStartMessageIndex } + : {}), }); if (!sessionRecord) { throw new Error( @@ -1749,7 +1654,7 @@ export async function generateAssistantReply( } if (sessionRecord.state === "awaiting_resume") { throw new RetryableTurnError( - "agent_continue", + "turn_timeout_resume", `conversation=${timeoutResumeConversationId} session=${timeoutResumeSessionId} slice=${sessionRecord.sliceId} version=${sessionRecord.version}`, { conversationId: timeoutResumeConversationId, @@ -1792,6 +1697,9 @@ export async function generateAssistantReply( logContext: sessionRecordLogContext, requester, ...(surface ? { surface } : {}), + ...(turnStartMessageIndex !== undefined + ? { turnStartMessageIndex } + : {}), }); if (sessionRecord) { throw new RetryableTurnError( diff --git a/packages/junior/src/chat/respond/active-skills.ts b/packages/junior/src/chat/respond/active-skills.ts new file mode 100644 index 000000000..4780ba2b8 --- /dev/null +++ b/packages/junior/src/chat/respond/active-skills.ts @@ -0,0 +1,16 @@ +import type { Skill } from "@/chat/skills"; + +/** Upsert a skill into the active skills list by name. */ +export function upsertActiveSkill(activeSkills: Skill[], next: Skill): void { + const existing = activeSkills.find((skill) => skill.name === next.name); + if (existing) { + existing.body = next.body; + existing.description = next.description; + existing.skillPath = next.skillPath; + existing.allowedTools = next.allowedTools; + existing.pluginProvider = next.pluginProvider; + return; + } + + activeSkills.push(next); +} diff --git a/packages/junior/src/chat/respond/pi-messages.ts b/packages/junior/src/chat/respond/pi-messages.ts new file mode 100644 index 000000000..a29792320 --- /dev/null +++ b/packages/junior/src/chat/respond/pi-messages.ts @@ -0,0 +1,95 @@ +import type { + AssistantMessage, + ToolResultMessage, +} from "@earendil-works/pi-ai"; +import type { PiMessage } from "@/chat/pi/messages"; + +/** Type guard for Pi SDK tool result messages. */ +export function isToolResultMessage( + value: unknown, +): value is ToolResultMessage { + return ( + typeof value === "object" && + value !== null && + (value as { role?: unknown }).role === "toolResult" + ); +} + +/** Extract the tool name from a raw tool result message. */ +export function normalizeToolNameFromResult( + result: unknown, +): string | undefined { + if (!result || typeof result !== "object") return undefined; + const record = result as { toolName?: unknown; name?: unknown }; + if (typeof record.toolName === "string" && record.toolName.length > 0) { + return record.toolName; + } + if (typeof record.name === "string" && record.name.length > 0) { + return record.name; + } + return undefined; +} + +/** Check whether a tool result carries an error flag. */ +export function isToolResultError(result: unknown): boolean { + if (!result || typeof result !== "object") return false; + return Boolean((result as { isError?: unknown }).isError); +} + +/** Type guard for Pi SDK assistant messages. */ +export function isAssistantMessage(value: unknown): value is AssistantMessage { + return ( + typeof value === "object" && + value !== null && + (value as { role?: unknown }).role === "assistant" + ); +} + +/** Extract role string from a raw Pi message. */ +export function getPiMessageRole(value: unknown): string | undefined { + if (!value || typeof value !== "object") { + return undefined; + } + const role = (value as { role?: unknown }).role; + return typeof role === "string" ? role : undefined; +} + +/** Concatenate text content parts from an assistant message. */ +export function extractAssistantText(message: AssistantMessage): string { + const content = + (message as { content?: Array<{ type?: unknown; text?: unknown }> }) + .content ?? []; + return content + .filter( + (part): part is { type: "text"; text: string } => + part.type === "text" && typeof part.text === "string", + ) + .map((part) => part.text) + .join("\n"); +} + +/** Return assistant messages that belong to the terminal post-tool reply phase. */ +export function getTerminalAssistantMessages( + messages: readonly unknown[], +): AssistantMessage[] { + let lastToolResultIndex = -1; + for (let index = messages.length - 1; index >= 0; index -= 1) { + if (isToolResultMessage(messages[index])) { + lastToolResultIndex = index; + break; + } + } + + return messages.slice(lastToolResultIndex + 1).filter(isAssistantMessage); +} + +/** Remove trailing assistant messages before committing a resumable boundary. */ +export function trimTrailingAssistantMessages( + messages: PiMessage[], +): PiMessage[] { + let end = messages.length; + while (end > 0 && getPiMessageRole(messages[end - 1]) === "assistant") { + end -= 1; + } + return end === messages.length ? [...messages] : messages.slice(0, end); +} diff --git a/packages/junior/src/chat/respond/reply-output-guards.ts b/packages/junior/src/chat/respond/reply-output-guards.ts new file mode 100644 index 000000000..5387b2dd5 --- /dev/null +++ b/packages/junior/src/chat/respond/reply-output-guards.ts @@ -0,0 +1,91 @@ +/** Detect polite execution deferral phrases that signal the model is stalling. */ +export function isExecutionDeferralResponse(text: string): boolean { + return /\b(want me to proceed|do you want me to proceed|shall i proceed|can i proceed|should i proceed|let me do that now|give me a moment|tag me again|fresh invocation)\b/i.test( + text, + ); +} + +/** Detect disclaimers about missing tool access. */ +export function isToolAccessDisclaimerResponse(text: string): boolean { + return /\b(i (don't|do not) have access to (active )?tool|tool results came back empty|prior results .* empty|cannot access .*tool|need to (run|load) .*tool .* first)\b/i.test( + text, + ); +} + +/** True when the model produced an escape response instead of executing. */ +export function isExecutionEscapeResponse(text: string): boolean { + const trimmed = text.trim(); + if (!trimmed) return false; + return ( + isExecutionDeferralResponse(trimmed) || + isToolAccessDisclaimerResponse(trimmed) + ); +} + +/** Best-effort JSON extraction from text that may contain fenced blocks. */ +export function parseJsonCandidate(text: string): unknown { + const trimmed = text.trim(); + if (!trimmed) return undefined; + + try { + return JSON.parse(trimmed) as unknown; + } catch { + const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + if (!fenced) return undefined; + try { + return JSON.parse(fenced[1]) as unknown; + } catch { + return undefined; + } + } +} + +/** Check whether a parsed object looks like a raw tool call/result payload. */ +export function isToolPayloadShape(payload: unknown): boolean { + if (!payload || typeof payload !== "object") return false; + const record = payload as Record; + + const type = typeof record.type === "string" ? record.type.toLowerCase() : ""; + if (type.startsWith("tool-")) return true; + if ( + type === "tool_use" || + type === "tool_call" || + type === "tool_result" || + type === "tool_error" + ) + return true; + + const hasToolName = + typeof record.toolName === "string" || typeof record.name === "string"; + const hasToolInput = + Object.prototype.hasOwnProperty.call(record, "input") || + Object.prototype.hasOwnProperty.call(record, "args"); + if (hasToolName && hasToolInput) return true; + + return false; +} + +/** Detect responses that are raw tool payloads leaked as text. */ +export function isRawToolPayloadResponse(text: string): boolean { + const parsed = parseJsonCandidate(text); + if (Array.isArray(parsed)) { + return parsed.some((entry) => isToolPayloadShape(entry)); + } + if (isToolPayloadShape(parsed)) { + return true; + } + + const compact = text.replace(/\s+/g, " "); + return /"type"\s*:\s*"tool[-_](use|call|result|error)"/i.test(compact); +} + +/** Truncate message text for log attributes. */ +export function summarizeMessageText(text: string): string { + const normalized = text.trim().replace(/\s+/g, " "); + if (!normalized) { + return "[empty]"; + } + return normalized.length > 1_200 + ? `${normalized.slice(0, 1_200)}...` + : normalized; +} diff --git a/packages/junior/src/chat/respond/runtime-turn-context.ts b/packages/junior/src/chat/respond/runtime-turn-context.ts new file mode 100644 index 000000000..d67a5aa86 --- /dev/null +++ b/packages/junior/src/chat/respond/runtime-turn-context.ts @@ -0,0 +1,106 @@ +import type { PiMessage } from "@/chat/pi/messages"; +import { TURN_CONTEXT_TAG } from "@/chat/turn-context-tag"; + +const RUNTIME_TURN_CONTEXT_START = `<${TURN_CONTEXT_TAG}>`; + +function getUserMessageContent(message: PiMessage): unknown[] | undefined { + const record = message as { role?: unknown; content?: unknown }; + return record.role === "user" && Array.isArray(record.content) + ? record.content + : undefined; +} + +function isRuntimeTurnContextPart(part: unknown, marker: string): boolean { + return ( + part !== null && + typeof part === "object" && + (part as { type?: unknown }).type === "text" && + typeof (part as { text?: unknown }).text === "string" && + (part as { text: string }).text.startsWith(marker) + ); +} + +function prependRuntimeTurnContext( + message: PiMessage, + turnContextPrompt: string, +): PiMessage | undefined { + const content = getUserMessageContent(message); + if (!content) { + return undefined; + } + + const contextIndex = content.findIndex((part) => + isRuntimeTurnContextPart(part, RUNTIME_TURN_CONTEXT_START), + ); + if (contextIndex >= 0) { + return undefined; + } + + return { + ...message, + content: [{ type: "text", text: turnContextPrompt }, ...content], + } as PiMessage; +} + +/** Add bootstrap context only for stored boundaries captured before prompt(). */ +export function prependMissingRuntimeTurnContext( + messages: PiMessage[], + turnContextPrompt: string, +): PiMessage[] { + if (hasRuntimeTurnContext(messages)) { + return messages; + } + + for (let index = messages.length - 1; index >= 0; index -= 1) { + const updated = prependRuntimeTurnContext( + messages[index], + turnContextPrompt, + ); + if (!updated) { + continue; + } + + const nextMessages = [...messages]; + nextMessages[index] = updated; + return nextMessages; + } + + return [ + ...messages, + { + role: "user", + content: [{ type: "text", text: turnContextPrompt }], + timestamp: Date.now(), + } as PiMessage, + ]; +} + +/** Return whether Pi history already carries session bootstrap context. */ +export function hasRuntimeTurnContext(messages: PiMessage[]): boolean { + return messages.some((message) => + getUserMessageContent(message)?.some((part) => + isRuntimeTurnContextPart(part, RUNTIME_TURN_CONTEXT_START), + ), + ); +} + +/** Remove volatile runtime context before reusing messages as history. */ +export function stripRuntimeTurnContext(messages: PiMessage[]): PiMessage[] { + return messages.flatMap((message) => { + const content = getUserMessageContent(message); + if (!content) { + return [message]; + } + + const nextContent = content.filter( + (part) => !isRuntimeTurnContextPart(part, RUNTIME_TURN_CONTEXT_START), + ); + if (nextContent.length === content.length) { + return [message]; + } + if (nextContent.length === 0) { + return []; + } + return [{ ...message, content: nextContent } as PiMessage]; + }); +} diff --git a/packages/junior/src/chat/respond/session-identifiers.ts b/packages/junior/src/chat/respond/session-identifiers.ts new file mode 100644 index 000000000..48527dceb --- /dev/null +++ b/packages/junior/src/chat/respond/session-identifiers.ts @@ -0,0 +1,20 @@ +/** Extract conversation and session identifiers from correlation context. */ +export function getSessionIdentifiers(context: { + correlation?: { + conversationId?: string; + threadId?: string; + turnId?: string; + runId?: string; + }; +}): { + conversationId?: string; + sessionId?: string; +} { + return { + conversationId: + context.correlation?.conversationId ?? + context.correlation?.threadId ?? + context.correlation?.runId, + sessionId: context.correlation?.turnId, + }; +} diff --git a/packages/junior/src/chat/respond/user-turn-input.ts b/packages/junior/src/chat/respond/user-turn-input.ts new file mode 100644 index 000000000..20886aedf --- /dev/null +++ b/packages/junior/src/chat/respond/user-turn-input.ts @@ -0,0 +1,244 @@ +import type { PiMessage } from "@/chat/pi/messages"; + +const MAX_INLINE_ATTACHMENT_BASE64_CHARS = 120_000; +const MAX_ROUTER_ATTACHMENT_PREVIEW_CHARS = 2_000; + +export interface ReplyRequestAttachment { + data?: Buffer; + mediaType: string; + filename?: string; + promptText?: string; +} + +export interface ReplySteeringMessageInput { + omittedImageAttachmentCount?: number; + text: string; + timestampMs?: number; + userAttachments?: ReplyRequestAttachment[]; +} + +export type UserTurnContentPart = + | { type: "text"; text: string } + | { type: "image"; data: string; mimeType: string }; + +/** Redact image data from prompt content parts for observability. */ +export function toObservablePromptPart( + part: + | { type: "text"; text: string } + | { type: "image"; data: string; mimeType: string }, +): Record { + if (part.type === "text") { + return { + type: "text", + text: part.text, + }; + } + + return { + type: "image", + mimeType: part.mimeType, + data: `[omitted:${part.data.length}]`, + }; +} + +function isStructuredThreadContext(context: string): boolean { + return /^/.test(context); +} + +function renderThreadContextForPrompt(context: string): string { + if (isStructuredThreadContext(context)) { + return context; + } + return ["", context, ""].join("\n"); +} + +/** + * Put prior thread text before the current instruction when no Pi history + * exists. Structured thread XML is already a top-level prompt block. + */ +export function buildUserTurnText( + userInput: string, + conversationContext?: string, +): string { + const trimmedContext = conversationContext?.trim(); + + if (!trimmedContext) { + return userInput; + } + + return [ + renderThreadContextForPrompt(trimmedContext), + "", + "", + userInput, + "", + ].join("\n"); +} + +/** Encode a non-image attachment as base64 XML for the prompt. */ +export function encodeNonImageAttachmentForPrompt(attachment: { + data: Buffer; + mediaType: string; + filename?: string; +}): string { + const base64 = attachment.data.toString("base64"); + const wasTruncated = base64.length > MAX_INLINE_ATTACHMENT_BASE64_CHARS; + const encodedPayload = wasTruncated + ? `${base64.slice(0, MAX_INLINE_ATTACHMENT_BASE64_CHARS)}...` + : base64; + + return [ + "", + `filename: ${attachment.filename ?? "unnamed"}`, + `media_type: ${attachment.mediaType}`, + "encoding: base64", + `truncated: ${wasTruncated ? "true" : "false"}`, + "", + encodedPayload, + "", + "", + ].join("\n"); +} + +function buildOmittedImageAttachmentNotice(count: number): string { + return [ + "", + `count: ${count}`, + "Slack included image attachments with this turn, but this runtime cannot analyze images because no vision model is configured.", + "Do not claim that no image was attached.", + "If the user asks about image contents, explain that image analysis is unavailable in this runtime and continue with any text or non-image files that are still available.", + "", + ].join("\n"); +} + +function trimRouterAttachmentText(text: string): string { + const normalized = text.replaceAll("\0", " ").trim(); + if (!normalized) { + return ""; + } + return normalized.length <= MAX_ROUTER_ATTACHMENT_PREVIEW_CHARS + ? normalized + : `${normalized.slice(0, MAX_ROUTER_ATTACHMENT_PREVIEW_CHARS)}...`; +} + +function supportsRouterTextPreview(mediaType: string): boolean { + const baseMediaType = mediaType.split(";", 1)[0]?.trim().toLowerCase(); + if (!baseMediaType) { + return false; + } + return ( + baseMediaType.startsWith("text/") || + baseMediaType === "application/json" || + baseMediaType === "application/xml" || + baseMediaType === "application/x-www-form-urlencoded" || + baseMediaType.endsWith("+json") || + baseMediaType.endsWith("+xml") + ); +} + +function buildRouterAttachmentBlock( + attachment: ReplyRequestAttachment, +): string { + if (attachment.promptText) { + return trimRouterAttachmentText(attachment.promptText); + } + + const header = [ + "", + `filename: ${attachment.filename ?? "unnamed"}`, + `media_type: ${attachment.mediaType}`, + ]; + + if (attachment.data && supportsRouterTextPreview(attachment.mediaType)) { + const preview = trimRouterAttachmentText(attachment.data.toString("utf8")); + if (preview) { + return [ + ...header, + "", + preview, + "", + "", + ].join("\n"); + } + } + + return [...header, ""].join("\n"); +} + +/** Build the Pi user message parts and router-only attachment blocks for a turn. */ +export function buildUserTurnInput(args: { + omittedImageAttachmentCount: number; + userAttachments?: ReplyRequestAttachment[]; + userTurnText: string; +}): { + routerBlocks: string[]; + userContentParts: UserTurnContentPart[]; +} { + const routerBlocks: string[] = []; + const userContentParts: UserTurnContentPart[] = [ + { type: "text", text: args.userTurnText }, + ]; + + if (args.omittedImageAttachmentCount > 0) { + const omittedImagesNotice = buildOmittedImageAttachmentNotice( + args.omittedImageAttachmentCount, + ); + userContentParts.push({ type: "text", text: omittedImagesNotice }); + routerBlocks.push(omittedImagesNotice); + } + + for (const attachment of args.userAttachments ?? []) { + routerBlocks.push(buildRouterAttachmentBlock(attachment)); + + if (attachment.promptText) { + userContentParts.push({ + type: "text", + text: attachment.promptText, + }); + continue; + } + + if (attachment.mediaType.startsWith("image/")) { + if (!attachment.data) { + throw new Error("Image attachment is missing image data"); + } + userContentParts.push({ + type: "image", + data: attachment.data.toString("base64"), + mimeType: attachment.mediaType, + }); + continue; + } + + if (!attachment.data) { + throw new Error("Attachment is missing attachment data"); + } + + userContentParts.push({ + type: "text", + text: encodeNonImageAttachmentForPrompt({ + data: attachment.data, + mediaType: attachment.mediaType, + filename: attachment.filename, + }), + }); + } + + return { routerBlocks, userContentParts }; +} + +/** Convert a steered user message into the Pi transcript shape. */ +export function buildSteeringPiMessage( + message: ReplySteeringMessageInput, +): PiMessage { + const { userContentParts } = buildUserTurnInput({ + userTurnText: message.text, + userAttachments: message.userAttachments, + omittedImageAttachmentCount: message.omittedImageAttachmentCount ?? 0, + }); + return { + role: "user", + content: userContentParts, + timestamp: message.timestampMs ?? Date.now(), + } as PiMessage; +} diff --git a/packages/junior/src/chat/runtime/agent-continue-runner.ts b/packages/junior/src/chat/runtime/agent-continue-runner.ts index 7ba0a38fd..cb814e1e8 100644 --- a/packages/junior/src/chat/runtime/agent-continue-runner.ts +++ b/packages/junior/src/chat/runtime/agent-continue-runner.ts @@ -38,10 +38,10 @@ import { import { coerceThreadArtifactsState } from "@/chat/state/artifacts"; import { isRetryableTurnError, markTurnFailed } from "@/chat/runtime/turn"; import { - getAwaitingAgentContinueRequest, - scheduleAgentContinue as defaultScheduleAgentContinue, - type AgentContinueRequest, -} from "@/chat/services/agent-continue"; + getAwaitingTurnContinuationRequest as getAwaitingAgentContinueRequest, + scheduleTurnTimeoutResume as defaultScheduleAgentContinue, + type TurnContinuationRequest as AgentContinueRequest, +} from "@/chat/services/timeout-resume"; import { parseSlackThreadId } from "@/chat/slack/context"; import { createRequesterFromStoredSlackRequester } from "@/chat/requester"; import type { AssistantReply, generateAssistantReply } from "@/chat/respond"; @@ -340,13 +340,16 @@ export async function continueSlackAgentRun( ); }, onTimeoutPause: async (error: unknown) => { - if (!isRetryableTurnError(error, "agent_continue")) { + if ( + !isRetryableTurnError(error, "agent_continue") && + !isRetryableTurnError(error, "turn_timeout_resume") + ) { throw error; } const version = error.metadata?.version; if (typeof version !== "number") { throw new Error( - "Agent continuation did not include a session record version", + "Turn continuation did not include a session record version", ); } @@ -392,7 +395,7 @@ export async function resumeAwaitingSlackContinuation( conversationId, summary, errorMessage: - "Awaiting agent continuation metadata could not be materialized", + "Awaiting turn continuation metadata could not be materialized", }); continue; } diff --git a/packages/junior/src/chat/runtime/reply-executor.ts b/packages/junior/src/chat/runtime/reply-executor.ts index 0bae29098..71b90fc0a 100644 --- a/packages/junior/src/chat/runtime/reply-executor.ts +++ b/packages/junior/src/chat/runtime/reply-executor.ts @@ -116,10 +116,8 @@ import { setConversationTitle, } from "@/chat/state/conversation-details"; import { loadProjection } from "@/chat/state/session-log"; -import { - stripRuntimeTurnContext, - trimTrailingAssistantMessages, -} from "@/chat/respond-helpers"; +import { stripRuntimeTurnContext } from "@/chat/respond/runtime-turn-context"; +import { trimTrailingAssistantMessages } from "@/chat/respond/pi-messages"; import { requireSlackDestination } from "@/chat/destination"; function collectCanvasUrls(artifacts: Partial) { @@ -478,7 +476,7 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { expectedVersion: sessionRecord.version, sessionId: activeTurnId, errorMessage: - "Awaiting agent continuation metadata could not be materialized", + "Awaiting turn continuation metadata could not be materialized", }); markTurnFailed({ conversation: preparedState.conversation, @@ -1089,7 +1087,10 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { return; } - if (isRetryableTurnError(error, "agent_continue")) { + if ( + isRetryableTurnError(error, "agent_continue") || + isRetryableTurnError(error, "turn_timeout_resume") + ) { const conversationIdForResume = error.metadata?.conversationId; const sessionIdForResume = error.metadata?.sessionId; const version = error.metadata?.version; diff --git a/packages/junior/src/chat/runtime/slack-resume.ts b/packages/junior/src/chat/runtime/slack-resume.ts index 6753ddcfd..84983cc5e 100644 --- a/packages/junior/src/chat/runtime/slack-resume.ts +++ b/packages/junior/src/chat/runtime/slack-resume.ts @@ -55,9 +55,10 @@ async function postSlackMessageBestEffort( channelId: string, threadTs: string, text: string, + services: ResumeSlackTurnServices, ): Promise { try { - await postSlackApiMessage({ + await services.postSlackMessage({ channelId, threadTs, text, @@ -99,6 +100,9 @@ function createReadOnlyConfigService( }; } +/** Generates a resumed Slack turn reply at the agent execution boundary. */ +export type ResumeReplyGenerator = typeof generateAssistantReply; + /** Error raised when another worker already owns the resume lock. */ export class ResumeTurnBusyError extends Error { constructor(lockKey: string) { @@ -107,7 +111,8 @@ export class ResumeTurnBusyError extends Error { } } -interface ResumeSlackTurnArgs { +/** Inputs for resuming a Slack turn through the runtime delivery boundary. */ +export interface ResumeSlackTurnArgs { messageText: string; channelId: string; threadTs: string; @@ -115,7 +120,7 @@ interface ResumeSlackTurnArgs { replyContext?: AssistantReplyRequestContext; lockKey?: string; initialText?: string; - generateReply?: typeof generateAssistantReply; + generateReply?: ResumeReplyGenerator; onSuccess?: (reply: AssistantReply) => Promise; onFailure?: (error: unknown) => Promise; onAuthPause?: (error: unknown) => Promise; @@ -123,8 +128,33 @@ interface ResumeSlackTurnArgs { onPostDeliveryCommitFailure?: (error: unknown) => Promise; beforeStart?: () => Promise | false | void>; replyTimeoutMs?: number; + services?: ResumeSlackTurnServices; +} + +/** Runtime boundary used by timeout and auth resume orchestration. */ +export type ResumeSlackTurnRunner = typeof resumeSlackTurn; + +/** Services used by the Slack resume runner; component tests may replace external ports. */ +export interface ResumeSlackTurnServices { + createAssistantStatusSession: typeof createSlackWebApiAssistantStatusSession; + generateAssistantReply: ResumeReplyGenerator; + getStateAdapter: typeof getStateAdapter; + logException: typeof logException; + postSlackMessage: typeof postSlackApiMessage; + postSlackReplyPosts: typeof postSlackApiReplyPosts; + startProcessingReactionForMessage: typeof startSlackProcessingReactionForMessage; } +const defaultResumeSlackTurnServices: ResumeSlackTurnServices = { + createAssistantStatusSession: createSlackWebApiAssistantStatusSession, + generateAssistantReply, + getStateAdapter, + logException, + postSlackMessage: postSlackApiMessage, + postSlackReplyPosts: postSlackApiReplyPosts, + startProcessingReactionForMessage: startSlackProcessingReactionForMessage, +}; + function getDefaultLockKey(channelId: string, threadTs: string): string { return `slack:${channelId}:${threadTs}`; } @@ -152,15 +182,16 @@ async function postResumeFailureReply(args: { threadTs: string; eventId: string; logContext: LogContext; + services: ResumeSlackTurnServices; }): Promise { try { - await postSlackApiMessage({ + await args.services.postSlackMessage({ channelId: args.channelId, threadTs: args.threadTs, text: buildTurnFailureResponse(args.eventId), }); } catch (error) { - logException( + args.services.logException( error, "slack_resume_failure_reply_post_failed", args.logContext, @@ -179,9 +210,10 @@ async function handleResumeFailure(args: { eventName: string; lockKey: string; resumeArgs: ResumeSlackTurnArgs; + services: ResumeSlackTurnServices; }): Promise { const logContext = getResumeLogContext(args.resumeArgs, args.lockKey); - const capturedEventId = logException( + const capturedEventId = args.services.logException( args.error, args.eventName, logContext, @@ -195,6 +227,7 @@ async function handleResumeFailure(args: { threadTs: args.resumeArgs.threadTs, eventId, logContext, + services: args.services, }); } @@ -259,7 +292,8 @@ function createResumeReplyContext( export async function resumeSlackTurn( args: ResumeSlackTurnArgs, ): Promise { - const stateAdapter = getStateAdapter(); + const services = args.services ?? defaultResumeSlackTurnServices; + const stateAdapter = services.getStateAdapter(); await stateAdapter.connect(); const lockKey = args.lockKey ?? getDefaultLockKey(args.channelId, args.threadTs); @@ -268,7 +302,7 @@ export async function resumeSlackTurn( throw new ResumeTurnBusyError(lockKey); } - const status = createSlackWebApiAssistantStatusSession({ + const status = services.createAssistantStatusSession({ channelId: args.channelId, threadTs: args.threadTs, }); @@ -308,10 +342,10 @@ export async function resumeSlackTurn( } if (runArgs.messageTs) { - processingReaction = await startSlackProcessingReactionForMessage({ + processingReaction = await services.startProcessingReactionForMessage({ channelId: runArgs.channelId, timestamp: runArgs.messageTs, - logException, + logException: services.logException, logContext: { ...getResumeLogContext(runArgs, lockKey) }, }); } @@ -320,11 +354,13 @@ export async function resumeSlackTurn( runArgs.channelId, runArgs.threadTs, runArgs.initialText, + services, ); } status.start(); - const generateReply = runArgs.generateReply ?? generateAssistantReply; + const generateReply = + runArgs.generateReply ?? services.generateAssistantReply; const replyContext = createResumeReplyContext(runArgs, status); const replyPromise = generateReply(runArgs.messageText, replyContext); const replyTimeoutMs = resolveReplyTimeoutMs(runArgs.replyTimeoutMs); @@ -347,7 +383,7 @@ export async function resumeSlackTurn( : await replyPromise; reply = finalizeFailedTurnReply({ reply, - logException, + logException: services.logException, context: getResumeLogContext(runArgs, lockKey), }); @@ -356,7 +392,7 @@ export async function resumeSlackTurn( conversationId: runArgs.replyContext?.correlation?.conversationId ?? lockKey, }); - await postSlackApiReplyPosts({ + await services.postSlackReplyPosts({ channelId: runArgs.channelId, threadTs: runArgs.threadTs, posts: planSlackReplyPosts({ reply }), @@ -375,7 +411,7 @@ export async function resumeSlackTurn( try { await runArgs.onPostDeliveryCommitFailure?.(error); } catch (terminalizeError) { - logException( + services.logException( terminalizeError, "slack_resume_post_delivery_terminalize_failed", getResumeLogContext(runArgs, lockKey), @@ -394,7 +430,8 @@ export async function resumeSlackTurn( await onAuthPause(error); }; } else if ( - isRetryableTurnError(error, "agent_continue") && + (isRetryableTurnError(error, "agent_continue") || + isRetryableTurnError(error, "turn_timeout_resume")) && onTimeoutPause ) { deferredPauseKind = "timeout"; @@ -409,6 +446,7 @@ export async function resumeSlackTurn( eventName: "slack_resume_turn_failed", lockKey, resumeArgs: runArgs, + services, }); }; } @@ -422,7 +460,7 @@ export async function resumeSlackTurn( } if (postDeliveryCommitError) { - logException( + services.logException( postDeliveryCommitError, "slack_resume_success_handler_failed", getResumeLogContext(runArgs, lockKey), @@ -443,6 +481,7 @@ export async function resumeSlackTurn( deferredAuthInfo.requesterId, deferredAuthInfo.providerDisplayName, ), + services, ); } return true; @@ -453,6 +492,7 @@ export async function resumeSlackTurn( eventName: "slack_resume_pause_handler_failed", lockKey, resumeArgs: runArgs, + services, }); return true; } @@ -474,7 +514,7 @@ export async function resumeAuthorizedRequest(args: { connectedText: string; replyContext?: AssistantReplyRequestContext; lockKey?: string; - generateReply?: typeof generateAssistantReply; + generateReply?: ResumeReplyGenerator; onSuccess?: (reply: AssistantReply) => Promise; onFailure?: (error: unknown) => Promise; onAuthPause?: (error: unknown) => Promise; @@ -482,6 +522,7 @@ export async function resumeAuthorizedRequest(args: { onPostDeliveryCommitFailure?: (error: unknown) => Promise; beforeStart?: () => Promise | false | void>; replyTimeoutMs?: number; + services?: ResumeSlackTurnServices; }) { await resumeSlackTurn({ messageText: args.messageText, @@ -499,5 +540,6 @@ export async function resumeAuthorizedRequest(args: { onPostDeliveryCommitFailure: args.onPostDeliveryCommitFailure, beforeStart: args.beforeStart, replyTimeoutMs: args.replyTimeoutMs, + services: args.services, }); } diff --git a/packages/junior/src/chat/runtime/slack-runtime.ts b/packages/junior/src/chat/runtime/slack-runtime.ts index 39ba9bee5..718d57129 100644 --- a/packages/junior/src/chat/runtime/slack-runtime.ts +++ b/packages/junior/src/chat/runtime/slack-runtime.ts @@ -144,7 +144,7 @@ export interface SlackTurnRuntimeDependencies { body?: string, ) => void; modelId: string; - now: () => number; + now?: () => number; recordSkippedSteeringMessage: (args: { decision: SubscribedReplyDecision; message: Message; @@ -514,7 +514,7 @@ export function createSlackTurnRuntime< preparedState?: TPreparedState; text: TurnMessageText; }): Promise => { - const completedAtMs = deps.now(); + const completedAtMs = (deps.now ?? Date.now)(); logSkippedSubscribedDecision(args); if (args.preparedState) { await deps.onSubscribedMessageSkipped({ diff --git a/packages/junior/src/chat/runtime/timeout-resume-runner.ts b/packages/junior/src/chat/runtime/timeout-resume-runner.ts new file mode 100644 index 000000000..541699652 --- /dev/null +++ b/packages/junior/src/chat/runtime/timeout-resume-runner.ts @@ -0,0 +1,361 @@ +import { logException, logWarn } from "@/chat/logging"; +import { + ResumeTurnBusyError, + resumeSlackTurn as defaultResumeSlackTurn, + type ResumeReplyGenerator, + type ResumeSlackTurnRunner, +} from "@/chat/runtime/slack-resume"; +import { coerceThreadConversationState } from "@/chat/state/conversation"; +import { + failAgentTurnSessionRecord, + getAgentTurnSessionRecord, + type AgentTurnSessionRecord, +} from "@/chat/state/turn-session"; +import { + getPersistedThreadState, + getPersistedSandboxState, + persistThreadStateById, + getChannelConfigurationServiceById, +} from "@/chat/runtime/thread-state"; +import { buildDeliveredTurnStatePatch } from "@/chat/runtime/delivered-turn-state"; +import { + getTurnUserMessage, + getTurnUserReplyAttachmentContext, + getTurnUserSlackMessageTs, +} from "@/chat/runtime/turn-user-message"; +import { + buildConversationContext, + markConversationMessage, + updateConversationStats, +} from "@/chat/services/conversation-memory"; +import { coerceThreadArtifactsState } from "@/chat/state/artifacts"; +import { isRetryableTurnError, markTurnFailed } from "@/chat/runtime/turn"; +import { + scheduleTurnTimeoutResume as defaultScheduleTurnTimeoutResume, + type TurnContinuationRequest, +} from "@/chat/services/timeout-resume"; +import { parseSlackThreadId } from "@/chat/slack/context"; +import { lookupSlackRequester } from "@/chat/slack/user"; +import type { AssistantReply } from "@/chat/respond"; +import { persistAuthPauseTurnState } from "@/chat/runtime/auth-pause-state"; +import { + applyPendingAuthUpdate, + clearPendingAuth, +} from "@/chat/services/pending-auth"; + +const TIMEOUT_RESUME_LOCK_RETRY_DELAYS_MS = [250, 1_000, 2_000] as const; + +/** Runtime ports for timeout continuation execution. */ +export interface TimeoutResumeRunnerOptions { + generateReply?: ResumeReplyGenerator; + resumeSlackTurn?: ResumeSlackTurnRunner; + scheduleTurnTimeoutResume?: ( + request: TurnContinuationRequest, + ) => Promise; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function persistCompletedReplyState(args: { + sessionRecord: AgentTurnSessionRecord; + reply: AssistantReply; +}): Promise { + const currentState = await getPersistedThreadState( + args.sessionRecord.conversationId, + ); + const conversation = coerceThreadConversationState(currentState); + const artifacts = coerceThreadArtifactsState(currentState); + const userMessage = getTurnUserMessage( + conversation, + args.sessionRecord.sessionId, + ); + const statePatch = buildDeliveredTurnStatePatch({ + artifacts, + conversation, + reply: args.reply, + sessionId: args.sessionRecord.sessionId, + userMessageId: userMessage?.id, + }); + + await persistThreadStateById(args.sessionRecord.conversationId, { + ...statePatch, + }); +} + +async function failSessionRecordBestEffort(args: { + sessionRecord: AgentTurnSessionRecord; + errorMessage: string; +}): Promise { + try { + await failAgentTurnSessionRecord({ + conversationId: args.sessionRecord.conversationId, + expectedVersion: args.sessionRecord.version, + sessionId: args.sessionRecord.sessionId, + errorMessage: args.errorMessage, + }); + } catch (error) { + logException( + error, + "timeout_resume_session_record_fail_persist_failed", + {}, + { + "app.ai.conversation_id": args.sessionRecord.conversationId, + "app.ai.session_id": args.sessionRecord.sessionId, + }, + "Failed to mark timed-out turn session record failed", + ); + } +} + +async function persistFailedReplyState( + sessionRecord: AgentTurnSessionRecord, +): Promise { + const currentState = await getPersistedThreadState( + sessionRecord.conversationId, + ); + const conversation = coerceThreadConversationState(currentState); + clearPendingAuth(conversation, sessionRecord.sessionId); + + markTurnFailed({ + conversation, + nowMs: Date.now(), + sessionId: sessionRecord.sessionId, + userMessageId: getTurnUserMessage(conversation, sessionRecord.sessionId) + ?.id, + markConversationMessage, + updateConversationStats, + }); + + await failSessionRecordBestEffort({ + sessionRecord, + errorMessage: "Timed-out turn failed while resuming", + }); + await persistThreadStateById(sessionRecord.conversationId, { + conversation, + }); +} + +/** + * Resume one durable timeout continuation for a Slack thread. + * + * Returns false when the session became stale before generation began. + */ +export async function resumeTimedOutTurn( + payload: TurnContinuationRequest, + options: TimeoutResumeRunnerOptions = {}, +): Promise { + const thread = parseSlackThreadId(payload.conversationId); + if (!thread) { + throw new Error( + `Timeout resume requires a Slack thread conversation id, got "${payload.conversationId}"`, + ); + } + const scheduleTurnTimeoutResume = + options.scheduleTurnTimeoutResume ?? defaultScheduleTurnTimeoutResume; + const resumeSlackTurn = options.resumeSlackTurn ?? defaultResumeSlackTurn; + + return await resumeSlackTurn({ + messageText: "", + channelId: thread.channelId, + threadTs: thread.threadTs, + lockKey: payload.conversationId, + beforeStart: async () => { + const sessionRecord = await getAgentTurnSessionRecord( + payload.conversationId, + payload.sessionId, + ); + if ( + !sessionRecord || + sessionRecord.state !== "awaiting_resume" || + (sessionRecord.resumeReason !== "timeout" && + sessionRecord.resumeReason !== "yield") || + sessionRecord.version !== payload.expectedVersion + ) { + return false; + } + + const currentState = await getPersistedThreadState( + payload.conversationId, + ); + const conversation = coerceThreadConversationState(currentState); + const artifacts = coerceThreadArtifactsState(currentState); + const userMessage = getTurnUserMessage(conversation, payload.sessionId); + if (!userMessage?.author?.userId) { + throw new Error( + `Unable to locate the persisted user message for timeout resume session "${payload.sessionId}"`, + ); + } + if (conversation.processing.activeTurnId !== payload.sessionId) { + return false; + } + + const channelConfiguration = getChannelConfigurationServiceById( + thread.channelId, + ); + const conversationContext = buildConversationContext(conversation, { + excludeMessageId: userMessage.id, + }); + const sandbox = getPersistedSandboxState(currentState); + if (payload.destination.platform !== "slack") { + throw new Error( + `Timeout resume requires a Slack destination for "${payload.conversationId}"`, + ); + } + const requester = await lookupSlackRequester( + payload.destination.teamId, + userMessage.author.userId, + ); + + return { + messageText: userMessage.text, + messageTs: getTurnUserSlackMessageTs(userMessage), + replyContext: { + credentialContext: { + actor: { + type: "user", + userId: userMessage.author.userId, + }, + }, + requester, + destination: payload.destination, + correlation: { + conversationId: payload.conversationId, + turnId: payload.sessionId, + channelId: thread.channelId, + threadTs: thread.threadTs, + requesterId: userMessage.author.userId, + }, + toolChannelId: + artifacts.assistantContextChannelId ?? thread.channelId, + artifactState: artifacts, + pendingAuth: conversation.processing.pendingAuth, + conversationContext, + channelConfiguration, + piMessages: conversation.piMessages, + sandbox, + recordPendingAuth: async (nextPendingAuth) => { + await applyPendingAuthUpdate({ + conversation, + conversationId: payload.conversationId, + nextPendingAuth, + }); + await persistThreadStateById(payload.conversationId, { + conversation, + }); + }, + ...getTurnUserReplyAttachmentContext(userMessage), + }, + onSuccess: async (reply: AssistantReply) => { + await persistCompletedReplyState({ sessionRecord, reply }); + }, + onFailure: async () => { + await persistFailedReplyState(sessionRecord); + }, + onPostDeliveryCommitFailure: async () => { + await failAgentTurnSessionRecord({ + conversationId: sessionRecord.conversationId, + expectedVersion: sessionRecord.version, + sessionId: sessionRecord.sessionId, + errorMessage: + "Timed-out turn reply was delivered but completion state did not persist", + }); + }, + onAuthPause: async () => { + await persistAuthPauseTurnState({ + sessionId: payload.sessionId, + threadStateId: payload.conversationId, + }); + logWarn( + "timeout_resume_reparked_for_auth", + {}, + { + "app.ai.conversation_id": payload.conversationId, + "app.ai.session_id": payload.sessionId, + }, + "Resumed timed-out turn parked for auth", + ); + }, + onTimeoutPause: async (error: unknown) => { + if ( + !isRetryableTurnError(error, "turn_timeout_resume") && + !isRetryableTurnError(error, "agent_continue") + ) { + throw error; + } + const version = error.metadata?.version; + if (typeof version !== "number") { + throw new Error( + "Timed-out resume turn did not include a turn-session version", + ); + } + + await scheduleTurnTimeoutResume({ + conversationId: payload.conversationId, + destination: payload.destination, + sessionId: payload.sessionId, + expectedVersion: version, + }); + }, + generateReply: options.generateReply, + }; + }, + }); +} + +/** + * Retry timeout continuation when the normal Slack thread lock is briefly busy. + * + * Returns false when the session became stale before generation began. A busy + * lock that is rescheduled still returns true because runnable work remains + * durable. + */ +export async function resumeTimedOutTurnWithLockRetry( + payload: TurnContinuationRequest, + options: TimeoutResumeRunnerOptions = {}, +): Promise { + const scheduleTurnTimeoutResume = + options.scheduleTurnTimeoutResume ?? defaultScheduleTurnTimeoutResume; + for (const [attempt, delayMs] of [ + ...TIMEOUT_RESUME_LOCK_RETRY_DELAYS_MS, + undefined, + ].entries()) { + try { + return await resumeTimedOutTurn(payload, options); + } catch (error) { + if (!(error instanceof ResumeTurnBusyError)) { + throw error; + } + if (typeof delayMs !== "number") { + logWarn( + "timeout_resume_lock_busy", + {}, + { + "app.ai.conversation_id": payload.conversationId, + "app.ai.session_id": payload.sessionId, + "app.ai.resume_lock_retry_count": attempt, + }, + "Rescheduling timeout resume because another turn still owns the thread lock", + ); + await scheduleTurnTimeoutResume(payload); + return true; + } + + logWarn( + "timeout_resume_lock_busy_retrying", + {}, + { + "app.ai.conversation_id": payload.conversationId, + "app.ai.session_id": payload.sessionId, + "app.ai.resume_lock_retry_attempt": attempt + 1, + "app.ai.resume_lock_retry_delay_ms": delayMs, + }, + "Timeout resume lock was busy; retrying", + ); + await sleep(delayMs); + } + } + + return true; +} diff --git a/packages/junior/src/chat/runtime/turn.ts b/packages/junior/src/chat/runtime/turn.ts index 741e98236..4a0f83af8 100644 --- a/packages/junior/src/chat/runtime/turn.ts +++ b/packages/junior/src/chat/runtime/turn.ts @@ -15,7 +15,8 @@ export { buildDeterministicTurnId } from "@/chat/state/turn-id"; export type RetryableTurnReason = | "mcp_auth_resume" | "plugin_auth_resume" - | "agent_continue"; + | "agent_continue" + | "turn_timeout_resume"; /** Auth-pause reasons require a known provider before a resume can be parked. */ export type AuthResumeRetryableTurnReason = Extract< @@ -59,7 +60,7 @@ export class RetryableTurnError extends Error { metadata: AuthResumeRetryableTurnMetadata, ); constructor( - reason: "agent_continue", + reason: "agent_continue" | "turn_timeout_resume", message: string, metadata?: RetryableTurnMetadata, ); diff --git a/packages/junior/src/chat/sandbox/egress-credentials.ts b/packages/junior/src/chat/sandbox/egress-credentials.ts index 41d3d4912..dc5307e18 100644 --- a/packages/junior/src/chat/sandbox/egress-credentials.ts +++ b/packages/junior/src/chat/sandbox/egress-credentials.ts @@ -26,6 +26,17 @@ import { const HTTP_READ_METHODS = new Set(["GET", "HEAD", "OPTIONS"]); +interface SandboxEgressCredentialServices { + issueProviderCredentialLease: typeof issueProviderCredentialLease; + resolveProviderForHost: typeof resolveSandboxEgressProviderForHost; +} + +const defaultSandboxEgressCredentialServices: SandboxEgressCredentialServices = + { + issueProviderCredentialLease, + resolveProviderForHost: resolveSandboxEgressProviderForHost, + }; + export type SandboxEgressGrantSelection = | { grant: AgentPluginGrant; @@ -104,9 +115,10 @@ function credentialSubjectFromContext( function assertLeaseTransformsOwnedByProvider( provider: string, lease: Pick, + resolveProviderForHost: typeof resolveSandboxEgressProviderForHost, ): void { for (const transform of lease.headerTransforms) { - if (resolveSandboxEgressProviderForHost(transform.domain) !== provider) { + if (resolveProviderForHost(transform.domain) !== provider) { throw new Error( `Credential lease for ${provider} included header transform for unowned domain ${transform.domain}`, ); @@ -154,6 +166,7 @@ export async function sandboxEgressCredentialLease( provider: string, selection: SandboxEgressGrantSelection, context: SandboxEgressCredentialContext, + services: SandboxEgressCredentialServices = defaultSandboxEgressCredentialServices, ): Promise { const { grant } = selection; const cached = await getSandboxEgressCredentialLease( @@ -208,11 +221,8 @@ export async function sandboxEgressCredentialLease( } lease = pluginResult.lease; } else { - // Normalize broker credential-needed failures into the egress error shape. - // All CredentialUnavailableError throws in oauth-bearer-broker are user-actionable - // (missing token, scope gap, expired connection) and should trigger OAuth re-auth. try { - lease = await issueProviderCredentialLease({ + lease = await services.issueProviderCredentialLease({ context: context.credentials, provider, reason: grant.reason ?? `sandbox-egress:${provider}:default`, @@ -223,10 +233,9 @@ export async function sandboxEgressCredentialLease( provider, grant, kind: "auth_required", - authorization: authorizationForSandboxEgressGrant( - provider, - selection, - ), + ...(oauthAuthorizationForProvider(provider) + ? { authorization: oauthAuthorizationForProvider(provider) } + : {}), message: error.message, }); } @@ -257,7 +266,11 @@ export async function sandboxEgressCredentialLease( expiresAt: lease.expiresAt, headerTransforms, }; - assertLeaseTransformsOwnedByProvider(provider, cachedLease); + assertLeaseTransformsOwnedByProvider( + provider, + cachedLease, + services.resolveProviderForHost, + ); await setSandboxEgressCredentialLease(context, cachedLease); return cachedLease; } diff --git a/packages/junior/src/chat/sandbox/egress-proxy.ts b/packages/junior/src/chat/sandbox/egress-proxy.ts index 88c26f7de..c724aec9d 100644 --- a/packages/junior/src/chat/sandbox/egress-proxy.ts +++ b/packages/junior/src/chat/sandbox/egress-proxy.ts @@ -1,9 +1,10 @@ +import { issueProviderCredentialLease } from "@/chat/capabilities/factory"; import { logInfo, logWarn, withSpan } from "@/chat/logging"; -import { onPluginEgressResponse } from "@/chat/plugins/credential-hooks"; import { matchesSandboxEgressDomain, resolveSandboxEgressProviderForHost, } from "@/chat/sandbox/egress-policy"; +import { onPluginEgressResponse } from "@/chat/plugins/credential-hooks"; import { hasSandboxEgressLeaseTransformForHost, sandboxEgressCredentialLease, @@ -75,6 +76,8 @@ export type SandboxEgressHttpInterceptor = (input: { interface ProxyDeps { fetch?: typeof fetch; interceptHttp?: SandboxEgressHttpInterceptor; + issueProviderCredentialLease?: typeof issueProviderCredentialLease; + resolveProviderForHost?: typeof resolveSandboxEgressProviderForHost; tracePropagation?: SandboxEgressTracePropagationConfig; verifyOidc?: (token: string) => Promise; } @@ -636,7 +639,9 @@ async function proxySandboxEgressRequestImpl( } const upstreamUrl = upstreamResult.url; - const provider = resolveSandboxEgressProviderForHost(upstreamUrl.hostname); + const provider = ( + deps.resolveProviderForHost ?? resolveSandboxEgressProviderForHost + )(upstreamUrl.hostname); if (!provider) { logWarn( "sandbox_egress_provider_unresolved", @@ -737,6 +742,12 @@ async function proxySandboxEgressVerifiedRequest(input: { provider, grantSelection, credentialContext, + { + issueProviderCredentialLease: + deps.issueProviderCredentialLease ?? issueProviderCredentialLease, + resolveProviderForHost: + deps.resolveProviderForHost ?? resolveSandboxEgressProviderForHost, + }, ); } catch (error) { if (error instanceof SandboxEgressCredentialError) { @@ -843,7 +854,7 @@ async function proxySandboxEgressVerifiedRequest(input: { upstreamUrl, response: { headers: new Headers(upstream.headers), - readText: async (maxBytes) => + readText: async (maxBytes: number) => await responseTextWithinLimit(upstream, maxBytes), status: upstream.status, }, diff --git a/packages/junior/src/chat/sandbox/lazy-workspace.ts b/packages/junior/src/chat/sandbox/lazy-workspace.ts new file mode 100644 index 000000000..bd41eff7e --- /dev/null +++ b/packages/junior/src/chat/sandbox/lazy-workspace.ts @@ -0,0 +1,78 @@ +import { logInfo, type LogContext } from "@/chat/logging"; +import type { SandboxExecutor } from "@/chat/sandbox/sandbox"; +import type { SandboxWorkspace } from "@/chat/sandbox/workspace"; + +interface SandboxBootReason { + trigger: string; + path?: string; + cmd?: string; + cwd?: string; +} + +/** Create a workspace facade that boots the sandbox only when a tool needs it. */ +export function createLazySandboxWorkspace(args: { + executor: Pick; + logContext: LogContext; +}): SandboxWorkspace { + let sandboxPromise: Promise | undefined; + let sandboxPromiseId: string | undefined; + + const clearSandboxPromise = (): void => { + sandboxPromise = undefined; + sandboxPromiseId = undefined; + }; + + const getSandbox = (reason: SandboxBootReason): Promise => { + const currentSandboxId = args.executor.getSandboxId(); + if ( + sandboxPromise && + sandboxPromiseId && + currentSandboxId !== sandboxPromiseId + ) { + clearSandboxPromise(); + } + + if (!sandboxPromise) { + logInfo( + "sandbox_boot_requested", + args.logContext, + { + "app.sandbox.boot.trigger": reason.trigger, + ...(reason.path ? { "file.path": reason.path } : {}), + ...(reason.cmd ? { "process.executable.name": reason.cmd } : {}), + ...(reason.cwd ? { "file.directory": reason.cwd } : {}), + }, + "Lazy sandbox boot requested", + ); + sandboxPromise = args.executor + .createSandbox() + .then((sandbox) => { + sandboxPromiseId = sandbox.sandboxId; + return sandbox; + }) + .catch((error) => { + clearSandboxPromise(); + throw error; + }); + } + return sandboxPromise; + }; + + return { + readFileToBuffer: async (input) => + ( + await getSandbox({ + trigger: "workspace.readFileToBuffer", + path: input.path, + }) + ).readFileToBuffer(input), + runCommand: async (input) => + ( + await getSandbox({ + trigger: "workspace.runCommand", + cmd: input.cmd, + cwd: input.cwd, + }) + ).runCommand(input), + }; +} diff --git a/packages/junior/src/chat/sandbox/runtime-dependency-snapshots.ts b/packages/junior/src/chat/sandbox/runtime-dependency-snapshots.ts index 65533b5dc..9195ad54c 100644 --- a/packages/junior/src/chat/sandbox/runtime-dependency-snapshots.ts +++ b/packages/junior/src/chat/sandbox/runtime-dependency-snapshots.ts @@ -42,6 +42,10 @@ interface DependencyProfile { postinstall: PluginRuntimePostinstallCommand[]; } +interface RuntimeDependencySnapshotOptions { + createSandbox?: typeof Sandbox.create; +} + export type SnapshotResolveOutcome = | "no_profile" | "cache_hit" @@ -142,6 +146,7 @@ function buildDependencyProfile(runtime: string): DependencyProfile | null { }; } +/** Return the cache profile hash for the active runtime dependency set. */ export function getRuntimeDependencyProfileHash( runtime: string, ): string | undefined { @@ -474,6 +479,7 @@ async function createDependencySnapshot( profile: DependencyProfile, runtime: string, timeoutMs: number, + options: RuntimeDependencySnapshotOptions, ): Promise { return await withSnapshotSpan( "sandbox.snapshot.build", @@ -484,8 +490,9 @@ async function createDependencySnapshot( }, async () => { const sandboxCredentials = getVercelSandboxCredentials(); + const createSandbox = options.createSandbox ?? Sandbox.create; const sandbox = createSandboxInstance( - await Sandbox.create({ + await createSandbox({ timeout: timeoutMs, runtime, ...(sandboxCredentials ?? {}), @@ -630,15 +637,19 @@ function getRebuildReason(params: { return undefined; } -export async function resolveRuntimeDependencySnapshot(params: { - runtime: string; - timeoutMs: number; - forceRebuild?: boolean; - staleSnapshotId?: string; - onProgress?: ( - phase: RuntimeDependencySnapshotProgressPhase, - ) => void | Promise; -}): Promise { +/** Resolve or build the sandbox snapshot for the active runtime dependency set. */ +export async function resolveRuntimeDependencySnapshot( + params: { + runtime: string; + timeoutMs: number; + forceRebuild?: boolean; + staleSnapshotId?: string; + onProgress?: ( + phase: RuntimeDependencySnapshotProgressPhase, + ) => void | Promise; + }, + options: RuntimeDependencySnapshotOptions = {}, +): Promise { return await withSnapshotSpan( "sandbox.snapshot.resolve", "sandbox.snapshot.resolve", @@ -712,6 +723,7 @@ export async function resolveRuntimeDependencySnapshot(params: { profile, params.runtime, params.timeoutMs, + options, ); await setCachedSnapshot({ profileHash: profile.profileHash, @@ -747,6 +759,7 @@ export async function resolveRuntimeDependencySnapshot(params: { ); } +/** Detect provider errors that mean a cached snapshot id can no longer be used. */ export function isSnapshotMissingError(error: unknown): boolean { const searchable = error instanceof Error diff --git a/packages/junior/src/chat/sandbox/sandbox.ts b/packages/junior/src/chat/sandbox/sandbox.ts index ab8e214d9..5573c6a8c 100644 --- a/packages/junior/src/chat/sandbox/sandbox.ts +++ b/packages/junior/src/chat/sandbox/sandbox.ts @@ -94,6 +94,38 @@ export interface SandboxExecutor { dispose(): Promise; } +export interface SandboxExecutorOptions { + sandboxId?: string; + sandboxDependencyProfileHash?: string; + timeoutMs?: number; + traceContext?: LogContext; + tracePropagation?: SandboxEgressTracePropagationConfig; + credentialEgress?: CredentialContext; + agentHooks?: AgentPluginHookRunner; + onSandboxAcquired?: (sandbox: SandboxAcquiredState) => void | Promise; + runBashCustomCommand?: ( + command: string, + ) => Promise<{ handled: boolean; result?: BashCustomCommandResult }>; +} + +export type SandboxExecutorFactory = ( + options?: SandboxExecutorOptions, +) => SandboxExecutor; + +interface SandboxExecutorServices { + buildSandboxEgressNetworkPolicy: typeof buildSandboxEgressNetworkPolicy; + createSandboxEgressCredentialToken: typeof createSandboxEgressCredentialToken; + createSandboxSessionManager: typeof createSandboxSessionManager; + resolveSandboxCommandEnvironment: typeof resolveSandboxCommandEnvironment; +} + +const defaultSandboxExecutorServices: SandboxExecutorServices = { + buildSandboxEgressNetworkPolicy, + createSandboxEgressCredentialToken, + createSandboxSessionManager, + resolveSandboxCommandEnvironment, +}; + const SANDBOX_TOOL_NAMES = new Set([ "bash", "readFile", @@ -133,19 +165,10 @@ function sandboxStreamInterruptedResult(toolName: string) { } /** Create one sandbox-backed tool executor facade for the current turn. */ -export function createSandboxExecutor(options?: { - sandboxId?: string; - sandboxDependencyProfileHash?: string; - timeoutMs?: number; - traceContext?: LogContext; - tracePropagation?: SandboxEgressTracePropagationConfig; - credentialEgress?: CredentialContext; - agentHooks?: AgentPluginHookRunner; - onSandboxAcquired?: (sandbox: SandboxAcquiredState) => void | Promise; - runBashCustomCommand?: ( - command: string, - ) => Promise<{ handled: boolean; result?: BashCustomCommandResult }>; -}): SandboxExecutor { +export function createSandboxExecutor( + options?: SandboxExecutorOptions, + services: SandboxExecutorServices = defaultSandboxExecutorServices, +): SandboxExecutor { let availableSkills: SkillMetadata[] = []; let referenceFiles: string[] = []; const traceContext = options?.traceContext ?? {}; @@ -170,7 +193,7 @@ export function createSandboxExecutor(options?: { throw new Error("Sandbox credential egress is not configured"); } const now = Date.now(); - const token = createSandboxEgressCredentialToken({ + const token = services.createSandboxEgressCredentialToken({ credentials: credentialEgress, egressId, ttlMs: sandboxEgressTokenTtlMs, @@ -181,18 +204,18 @@ export function createSandboxExecutor(options?: { }); return token; }; - const sessionManager = createSandboxSessionManager({ + const sessionManager = services.createSandboxSessionManager({ sandboxId: options?.sandboxId, sandboxDependencyProfileHash: options?.sandboxDependencyProfileHash, timeoutMs: options?.timeoutMs, traceContext, commandEnv: credentialEgress - ? async () => await resolveSandboxCommandEnvironment() + ? async () => await services.resolveSandboxCommandEnvironment() : undefined, createNetworkPolicy: credentialEgress || hasTracePropagationDomains ? (egressId, traceHeaders) => - buildSandboxEgressNetworkPolicy({ + services.buildSandboxEgressNetworkPolicy({ ...(credentialEgress ? { credentialToken: sandboxEgressCredentialTokenFor(egressId) } : {}), @@ -306,8 +329,10 @@ export function createSandboxExecutor(options?: { // side-channel from the network layer — not a property of shell exit status — // and `clearSandboxEgressSignals` runs before each execution to prevent // cross-command leakage. - const authRequired = await consumeSandboxEgressAuthRequiredSignal(activeEgressId); - const permissionDenied = await consumeSandboxEgressPermissionDeniedSignal(activeEgressId); + const authRequired = + await consumeSandboxEgressAuthRequiredSignal(activeEgressId); + const permissionDenied = + await consumeSandboxEgressPermissionDeniedSignal(activeEgressId); return { result: { diff --git a/packages/junior/src/chat/sandbox/session.ts b/packages/junior/src/chat/sandbox/session.ts index 2ecadc9f9..2fe9815b1 100644 --- a/packages/junior/src/chat/sandbox/session.ts +++ b/packages/junior/src/chat/sandbox/session.ts @@ -71,6 +71,24 @@ interface SandboxToolExecutors { fs: SandboxFileSystem; } +interface SandboxSessionServices { + createBashTool: typeof createBashTool; + createSandbox: typeof Sandbox.create; + getRuntimeDependencyProfileHash: typeof getRuntimeDependencyProfileHash; + getSandbox: typeof Sandbox.get; + isSnapshotMissingError: typeof isSnapshotMissingError; + resolveRuntimeDependencySnapshot: typeof resolveRuntimeDependencySnapshot; +} + +const defaultSandboxSessionServices: SandboxSessionServices = { + createBashTool, + createSandbox: Sandbox.create, + getRuntimeDependencyProfileHash, + getSandbox: Sandbox.get, + isSnapshotMissingError, + resolveRuntimeDependencySnapshot, +}; + function createBashToolSandboxAdapter(sandbox: SandboxInstance) { return { async executeCommand(command: string) { @@ -182,22 +200,25 @@ function getCommandAbortedResult(): { } /** Manage sandbox lifecycle, sync, keepalive, and tool executor caching for one executor instance. */ -export function createSandboxSessionManager(options?: { - sandboxId?: string; - sandboxDependencyProfileHash?: string; - timeoutMs?: number; - traceContext?: LogContext; - commandEnv?: () => Promise>; - createNetworkPolicy?: ( - egressId: string, - traceHeaders?: TracePropagationHeaders, - ) => NetworkPolicy | undefined; - onSandboxPrepare?: (sandbox: SandboxInstance) => void | Promise; - onSandboxAcquired?: (sandbox: { - sandboxId: string; +export function createSandboxSessionManager( + options?: { + sandboxId?: string; sandboxDependencyProfileHash?: string; - }) => void | Promise; -}): SandboxSessionManager { + timeoutMs?: number; + traceContext?: LogContext; + commandEnv?: () => Promise>; + createNetworkPolicy?: ( + egressId: string, + traceHeaders?: TracePropagationHeaders, + ) => NetworkPolicy | undefined; + onSandboxPrepare?: (sandbox: SandboxInstance) => void | Promise; + onSandboxAcquired?: (sandbox: { + sandboxId: string; + sandboxDependencyProfileHash?: string; + }) => void | Promise; + }, + services: SandboxSessionServices = defaultSandboxSessionServices, +): SandboxSessionManager { let sandbox: SandboxInstance | null = null; let sandboxIdHint = options?.sandboxId; let availableSkills: SkillMetadata[] = []; @@ -211,7 +232,7 @@ export function createSandboxSessionManager(options?: { const timeoutMs = options?.timeoutMs ?? 1000 * 60 * 30; const traceContext = options?.traceContext ?? {}; const dependencyProfileHash = - getRuntimeDependencyProfileHash(SANDBOX_RUNTIME); + services.getRuntimeDependencyProfileHash(SANDBOX_RUNTIME); const resolveCommandEnv = options?.commandEnv ?? (async () => ({}) as Record); @@ -368,7 +389,7 @@ export function createSandboxSessionManager(options?: { const networkPolicy = preflightNetworkPolicy(sandboxName); try { return createSandboxInstance( - await Sandbox.create({ + await services.createSandbox({ timeout: timeoutMs, ...(networkPolicy ? { name: sandboxName, persistent: false, networkPolicy } @@ -424,7 +445,7 @@ export function createSandboxSessionManager(options?: { if (!snapshot.snapshotId) { const networkPolicy = preflightNetworkPolicy(sandboxName); return createSandboxInstance( - await Sandbox.create({ + await services.createSandbox({ timeout: timeoutMs, runtime, ...(networkPolicy @@ -442,14 +463,14 @@ export function createSandboxSessionManager(options?: { sandboxName, ); } catch (error) { - if (!isSnapshotMissingError(error)) { + if (!services.isSnapshotMissingError(error)) { throw error; } setSpanAttributes({ "app.sandbox.snapshot.rebuild_after_missing": true, }); - const rebuiltSnapshot = await resolveRuntimeDependencySnapshot({ + const rebuiltSnapshot = await services.resolveRuntimeDependencySnapshot({ runtime, timeoutMs, forceRebuild: true, @@ -483,7 +504,7 @@ export function createSandboxSessionManager(options?: { "app.sandbox.runtime": runtime, }, async () => { - const snapshot = await resolveRuntimeDependencySnapshot({ + const snapshot = await services.resolveRuntimeDependencySnapshot({ runtime, timeoutMs, }); @@ -587,7 +608,7 @@ export function createSandboxSessionManager(options?: { }, async () => createSandboxInstance( - await Sandbox.get({ + await services.getSandbox({ name: sandboxIdHint as string, resume: true, ...(sandboxCredentials ?? {}), @@ -732,7 +753,7 @@ export function createSandboxSessionManager(options?: { "app.sandbox.destination": SANDBOX_WORKSPACE_ROOT, }, async () => - await createBashTool({ + await services.createBashTool({ sandbox: createBashToolSandboxAdapter(sandboxInstance), destination: SANDBOX_WORKSPACE_ROOT, }), diff --git a/packages/junior/src/chat/services/context-compaction.ts b/packages/junior/src/chat/services/context-compaction.ts index d549668de..23d68f1a9 100644 --- a/packages/junior/src/chat/services/context-compaction.ts +++ b/packages/junior/src/chat/services/context-compaction.ts @@ -21,10 +21,8 @@ import { import { commitMessages } from "@/chat/state/session-log"; import type { ThreadConversationState } from "@/chat/state/conversation"; import { logWarn, setSpanAttributes } from "@/chat/logging"; -import { - stripRuntimeTurnContext, - trimTrailingAssistantMessages, -} from "@/chat/respond-helpers"; +import { stripRuntimeTurnContext } from "@/chat/respond/runtime-turn-context"; +import { trimTrailingAssistantMessages } from "@/chat/respond/pi-messages"; import { updateConversationStats } from "@/chat/services/conversation-memory"; const RETAINED_USER_MESSAGE_TOKENS = 20_000; diff --git a/packages/junior/src/chat/services/mcp-auth-orchestration.ts b/packages/junior/src/chat/services/mcp-auth-orchestration.ts index 72c6d13df..b0bcf6fff 100644 --- a/packages/junior/src/chat/services/mcp-auth-orchestration.ts +++ b/packages/junior/src/chat/services/mcp-auth-orchestration.ts @@ -65,6 +65,34 @@ export interface McpAuthOrchestration { getPendingPause: () => McpAuthorizationPauseError | undefined; } +type McpOAuthClientProviderFactoryInput = Parameters< + typeof createMcpOAuthClientProvider +>[0]; + +type McpAuthProvider = OAuthClientProvider & { + readonly authSessionId: string; +}; + +interface McpAuthOrchestrationServices { + createMcpOAuthClientProvider: ( + input: McpOAuthClientProviderFactoryInput, + ) => Promise; + deleteMcpAuthSession: typeof deleteMcpAuthSession; + deliverPrivateMessage: typeof deliverPrivateMessage; + getMcpAuthSession: typeof getMcpAuthSession; + patchMcpAuthSession: typeof patchMcpAuthSession; + recordAuthorizationRequested: typeof recordAuthorizationRequested; +} + +const defaultMcpAuthOrchestrationServices: McpAuthOrchestrationServices = { + createMcpOAuthClientProvider, + deleteMcpAuthSession, + deliverPrivateMessage, + getMcpAuthSession, + patchMcpAuthSession, + recordAuthorizationRequested, +}; + function authorizationId(args: { kind: "mcp"; provider: string; @@ -76,6 +104,7 @@ function authorizationId(args: { /** Create MCP authorization orchestration for a single agent run. */ export function createMcpAuthOrchestration( input: McpAuthOrchestrationInput, + services: McpAuthOrchestrationServices = defaultMcpAuthOrchestrationServices, ): McpAuthOrchestration { let pendingPause: McpAuthorizationPauseError | undefined; const authSessionIdsByProvider = new Map(); @@ -95,7 +124,7 @@ export function createMcpAuthOrchestration( ); } - const provider = await createMcpOAuthClientProvider({ + const provider = await services.createMcpOAuthClientProvider({ provider: plugin.manifest.name, conversationId: input.conversationId, destination: input.destination, @@ -129,7 +158,7 @@ export function createMcpAuthOrchestration( ); } if (input.authorizationFlowMode === "disabled") { - await deleteMcpAuthSession(authSessionId); + await services.deleteMcpAuthSession(authSessionId); throw new AuthorizationFlowDisabledError("mcp", provider); } const recordPendingAuth = input.recordPendingAuth; @@ -140,7 +169,7 @@ export function createMcpAuthOrchestration( } const latestArtifactState = input.getMergedArtifactState(); - await patchMcpAuthSession(authSessionId, { + await services.patchMcpAuthSession(authSessionId, { configuration: { ...input.getConfiguration() }, artifactState: latestArtifactState, toolChannelId: @@ -149,7 +178,7 @@ export function createMcpAuthOrchestration( input.channelId, }); - const authSession = await getMcpAuthSession(authSessionId); + const authSession = await services.getMcpAuthSession(authSessionId); if (!authSession?.authorizationUrl) { throw new Error(`Missing MCP authorization URL for plugin "${provider}"`); } @@ -157,6 +186,7 @@ export function createMcpAuthOrchestration( const reusingPendingLink = canReusePendingAuthLink({ pendingAuth: input.pendingAuth, kind: "mcp", + nowMs: Date.now(), provider, requesterId, sessionId, @@ -164,11 +194,11 @@ export function createMcpAuthOrchestration( const providerLabel = formatProviderLabel(provider); if (!reusingPendingLink) { - const delivery = await deliverPrivateMessage({ + const delivery = await services.deliverPrivateMessage({ channelId: authSession.channelId, threadTs: authSession.threadTs, userId: authSession.userId, - text: `<${authSession.authorizationUrl}|Click here to link your ${providerLabel} MCP access>. Once you've authorized, this thread will continue automatically.`, + text: `<${authSession.authorizationUrl}|Click here to link your ${formatProviderLabel(provider)} MCP access>. Once you've authorized, this thread will continue automatically.`, }); if (!delivery) { throw new Error( @@ -176,7 +206,7 @@ export function createMcpAuthOrchestration( ); } } else { - await deleteMcpAuthSession(authSessionId); + await services.deleteMcpAuthSession(authSessionId); } await recordPendingAuth({ @@ -188,7 +218,7 @@ export function createMcpAuthOrchestration( ? input.pendingAuth!.linkSentAtMs : Date.now(), }); - await recordAuthorizationRequested({ + await services.recordAuthorizationRequested({ conversationId, kind: "mcp", provider, diff --git a/packages/junior/src/chat/services/plugin-auth-orchestration.ts b/packages/junior/src/chat/services/plugin-auth-orchestration.ts index 4952276ff..960d89076 100644 --- a/packages/junior/src/chat/services/plugin-auth-orchestration.ts +++ b/packages/junior/src/chat/services/plugin-auth-orchestration.ts @@ -76,7 +76,19 @@ export interface PluginAuthOrchestration { getPendingPause: () => PluginAuthorizationPauseError | undefined; } -/** Normalize a sandbox egress auth signal and preserve host failure messages. */ +interface PluginAuthOrchestrationServices { + recordAuthorizationRequested: typeof recordAuthorizationRequested; + startOAuthFlow: typeof startOAuthFlow; + unlinkProvider: typeof unlinkProvider; +} + +const defaultPluginAuthOrchestrationServices: PluginAuthOrchestrationServices = + { + recordAuthorizationRequested, + startOAuthFlow, + unlinkProvider, + }; + function pluginAuthRequiredSignal(details: unknown): | { authorization?: { @@ -126,6 +138,7 @@ function authorizationId(args: { */ export function createPluginAuthOrchestration( input: PluginAuthOrchestrationInput, + services: PluginAuthOrchestrationServices = defaultPluginAuthOrchestrationServices, ): PluginAuthOrchestration { let pendingPause: PluginAuthorizationPauseError | undefined; @@ -159,6 +172,7 @@ export function createPluginAuthOrchestration( ? canReusePendingAuthLink({ pendingAuth: input.pendingAuth, kind: "plugin", + nowMs: Date.now(), provider, requesterId: input.requesterId, sessionId: input.sessionId, @@ -167,7 +181,7 @@ export function createPluginAuthOrchestration( : false; if (!reusingPendingLink) { - const oauthResult = await startOAuthFlow(provider, { + const oauthResult = await services.startOAuthFlow(provider, { requesterId: input.requesterId, channelId: input.channelId, destination: input.destination, @@ -194,7 +208,11 @@ export function createPluginAuthOrchestration( input.requesterId && input.userTokenStore ) { - await unlinkProvider(input.requesterId, provider, input.userTokenStore); + await services.unlinkProvider( + input.requesterId, + provider, + input.userTokenStore, + ); } if (input.sessionId && recordPendingAuth) { @@ -210,7 +228,7 @@ export function createPluginAuthOrchestration( }); } if (input.conversationId && input.sessionId) { - await recordAuthorizationRequested({ + await services.recordAuthorizationRequested({ conversationId: input.conversationId, kind: "plugin", provider, diff --git a/packages/junior/src/chat/services/provider-retry.ts b/packages/junior/src/chat/services/provider-retry.ts index 5936ffcdb..2665e9593 100644 --- a/packages/junior/src/chat/services/provider-retry.ts +++ b/packages/junior/src/chat/services/provider-retry.ts @@ -3,7 +3,7 @@ import type { PiMessage } from "@/chat/pi/messages"; import { getPiMessageRole, trimTrailingAssistantMessages, -} from "@/chat/respond-helpers"; +} from "@/chat/respond/pi-messages"; const PROVIDER_RETRY_DELAYS_MS = [2_000, 4_000, 8_000] as const; const PROVIDER_ERROR_PREFIX = "AI provider error:"; diff --git a/packages/junior/src/chat/services/timeout-resume.ts b/packages/junior/src/chat/services/timeout-resume.ts new file mode 100644 index 000000000..360e09005 --- /dev/null +++ b/packages/junior/src/chat/services/timeout-resume.ts @@ -0,0 +1,197 @@ +/** + * Timeout resume continuation scheduling. + * + * This module owns the durable queue handoff used when a turn times out but has + * a safe Pi continuation boundary. The signed request verifier remains for + * callbacks that were already in flight during a deployment rollover. + */ +import { createHmac, timingSafeEqual } from "node:crypto"; +import type { StateAdapter } from "chat"; +import type { Destination } from "@sentry/junior-plugin-api"; +import { parseDestination } from "@/chat/destination"; +import { getAgentTurnSessionRecord } from "@/chat/state/turn-session"; +import type { ConversationWorkQueue } from "@/chat/task-execution/queue"; +import { + markConversationWorkEnqueued, + requestConversationWork, +} from "@/chat/task-execution/store"; +import { getVercelConversationWorkQueue } from "@/chat/task-execution/vercel-queue"; + +const TURN_TIMEOUT_RESUME_HMAC_CONTEXT = "junior.turn_timeout_resume.v1"; +const TURN_TIMEOUT_RESUME_SIGNATURE_VERSION = "v1"; +const TURN_TIMEOUT_RESUME_MAX_SKEW_MS = 5 * 60 * 1000; +const TURN_TIMEOUT_RESUME_TIMESTAMP_HEADER = "x-junior-resume-timestamp"; +const TURN_TIMEOUT_RESUME_SIGNATURE_HEADER = "x-junior-resume-signature"; + +export interface TurnContinuationRequest { + conversationId: string; + destination: Destination; + expectedVersion: number; + sessionId: string; +} + +export interface ScheduleTurnTimeoutResumeOptions { + nowMs?: number; + queue?: ConversationWorkQueue; + state?: StateAdapter; +} + +/** Build the callback request for an awaiting automatic turn continuation. */ +export async function getAwaitingTurnContinuationRequest(args: { + conversationId: string; + sessionId: string; +}): Promise { + const sessionRecord = await getAgentTurnSessionRecord( + args.conversationId, + args.sessionId, + ); + if ( + !sessionRecord || + sessionRecord.state !== "awaiting_resume" || + (sessionRecord.resumeReason !== "timeout" && + sessionRecord.resumeReason !== "yield") || + (sessionRecord.resumeReason === "timeout" && sessionRecord.sliceId < 2) + ) { + return undefined; + } + if (!sessionRecord.destination) { + return undefined; + } + + return { + conversationId: args.conversationId, + destination: sessionRecord.destination, + sessionId: args.sessionId, + expectedVersion: sessionRecord.version, + }; +} + +function getTurnTimeoutResumeSecret(): string | undefined { + return process.env.JUNIOR_SECRET?.trim() || undefined; +} + +function buildSignedPayload(timestamp: string, body: string): string { + return `${TURN_TIMEOUT_RESUME_HMAC_CONTEXT}:${timestamp}:${body}`; +} + +function signTurnTimeoutResumeBody( + secret: string, + timestamp: string, + body: string, +): string { + const digest = createHmac("sha256", secret) + .update(buildSignedPayload(timestamp, body)) + .digest("hex"); + return `${TURN_TIMEOUT_RESUME_SIGNATURE_VERSION}=${digest}`; +} + +function timingSafeMatch(expected: string, actual: string): boolean { + const expectedBuffer = Buffer.from(expected); + const actualBuffer = Buffer.from(actual); + if (expectedBuffer.length !== actualBuffer.length) { + return false; + } + return timingSafeEqual(expectedBuffer, actualBuffer); +} + +/** + * Parse the signed resume body used by the durable conversation queue. + */ +function parseTurnTimeoutResumeRequest( + value: unknown, +): TurnContinuationRequest | undefined { + if (!value || typeof value !== "object") { + return undefined; + } + + const record = value as Record; + const destination = parseDestination(record.destination); + let expectedVersion = record.expectedVersion; + if (typeof expectedVersion !== "number") { + // Accept callbacks signed before the queue-resume destination cutover. + expectedVersion = record.expectedCheckpointVersion; + } + if ( + typeof record.conversationId !== "string" || + typeof record.sessionId !== "string" || + typeof expectedVersion !== "number" || + !destination + ) { + return undefined; + } + + return { + conversationId: record.conversationId, + destination, + sessionId: record.sessionId, + expectedVersion, + }; +} + +/** Schedule durable conversation work to resume a timed-out turn. */ +export async function scheduleTurnTimeoutResume( + request: TurnContinuationRequest, + options: ScheduleTurnTimeoutResumeOptions = {}, +): Promise { + const nowMs = options.nowMs ?? Date.now(); + await requestConversationWork({ + conversationId: request.conversationId, + destination: request.destination, + nowMs, + state: options.state, + }); + const queue = options.queue ?? getVercelConversationWorkQueue(); + await queue.send( + { + conversationId: request.conversationId, + destination: request.destination, + }, + { + idempotencyKey: [ + "timeout", + request.conversationId, + request.sessionId, + request.expectedVersion, + ].join(":"), + }, + ); + await markConversationWorkEnqueued({ + conversationId: request.conversationId, + nowMs, + state: options.state, + }); +} + +/** Verify and parse an authenticated timeout resume callback request. */ +export async function verifyTurnTimeoutResumeRequest( + request: Request, +): Promise { + const timestamp = + request.headers.get(TURN_TIMEOUT_RESUME_TIMESTAMP_HEADER)?.trim() ?? ""; + const signature = + request.headers.get(TURN_TIMEOUT_RESUME_SIGNATURE_HEADER)?.trim() ?? ""; + const secret = getTurnTimeoutResumeSecret(); + if (!timestamp || !signature || !secret) { + return undefined; + } + + const parsedTimestamp = Number.parseInt(timestamp, 10); + if ( + !Number.isFinite(parsedTimestamp) || + Math.abs(Date.now() - parsedTimestamp) > TURN_TIMEOUT_RESUME_MAX_SKEW_MS + ) { + return undefined; + } + + const body = await request.text(); + const expectedSignature = signTurnTimeoutResumeBody(secret, timestamp, body); + if (!timingSafeMatch(expectedSignature, signature)) { + return undefined; + } + + try { + return parseTurnTimeoutResumeRequest(JSON.parse(body)); + } catch { + return undefined; + } +} diff --git a/packages/junior/src/chat/services/turn-result.ts b/packages/junior/src/chat/services/turn-result.ts index d69a18ffb..b335de1e0 100644 --- a/packages/junior/src/chat/services/turn-result.ts +++ b/packages/junior/src/chat/services/turn-result.ts @@ -17,13 +17,15 @@ import { extractAssistantText, getTerminalAssistantMessages, isAssistantMessage, - isExecutionEscapeResponse, - isRawToolPayloadResponse, isToolResultError, isToolResultMessage, normalizeToolNameFromResult, +} from "@/chat/respond/pi-messages"; +import { + isExecutionEscapeResponse, + isRawToolPayloadResponse, summarizeMessageText, -} from "@/chat/respond-helpers"; +} from "@/chat/respond/reply-output-guards"; const POST_CANVAS_REPLY_MAX_CHARS = 700; const POST_CANVAS_REPLY_MAX_LINES = 8; diff --git a/packages/junior/src/chat/services/turn-session-record.ts b/packages/junior/src/chat/services/turn-session-record.ts index d0221a858..fbff73b3e 100644 --- a/packages/junior/src/chat/services/turn-session-record.ts +++ b/packages/junior/src/chat/services/turn-session-record.ts @@ -11,10 +11,11 @@ import type { PiMessage } from "@/chat/pi/messages"; import { getPiMessageRole, trimTrailingAssistantMessages, -} from "@/chat/respond-helpers"; +} from "@/chat/respond/pi-messages"; import { addAgentTurnUsage, type AgentTurnUsage } from "@/chat/usage"; export const AGENT_CONTINUE_MAX_SLICES = 48; +export const AGENT_TURN_TIMEOUT_RESUME_MAX_SLICES = 48; export interface TurnSessionContext { conversationId?: string; @@ -149,6 +150,9 @@ export async function persistRunningSessionRecord(args: { args.conversationId, args.sessionId, ); + const traceId = getActiveTraceId() ?? latestSessionRecord?.traceId; + const turnStartMessageIndex = + args.turnStartMessageIndex ?? latestSessionRecord?.turnStartMessageIndex; await upsertAgentTurnSessionRecord({ ...((args.channelName ?? latestSessionRecord?.channelName) ? { channelName: args.channelName ?? latestSessionRecord?.channelName } @@ -172,17 +176,8 @@ export async function persistRunningSessionRecord(args: { ...((args.requester ?? latestSessionRecord?.requester) ? { requester: args.requester ?? latestSessionRecord?.requester } : {}), - ...((getActiveTraceId() ?? latestSessionRecord?.traceId) - ? { traceId: getActiveTraceId() ?? latestSessionRecord?.traceId } - : {}), - ...((args.turnStartMessageIndex ?? - latestSessionRecord?.turnStartMessageIndex) !== undefined - ? { - turnStartMessageIndex: - args.turnStartMessageIndex ?? - latestSessionRecord?.turnStartMessageIndex, - } - : {}), + ...(traceId ? { traceId } : {}), + ...(turnStartMessageIndex !== undefined ? { turnStartMessageIndex } : {}), }); return true; } catch (recordError) { @@ -220,6 +215,9 @@ export async function persistCompletedSessionRecord(args: { args.conversationId, args.sessionId, ); + const traceId = getActiveTraceId() ?? latestSessionRecord?.traceId; + const turnStartMessageIndex = + args.turnStartMessageIndex ?? latestSessionRecord?.turnStartMessageIndex; await upsertAgentTurnSessionRecord({ ...((args.channelName ?? latestSessionRecord?.channelName) ? { channelName: args.channelName ?? latestSessionRecord?.channelName } @@ -249,17 +247,8 @@ export async function persistCompletedSessionRecord(args: { ...((args.requester ?? latestSessionRecord?.requester) ? { requester: args.requester ?? latestSessionRecord?.requester } : {}), - ...((getActiveTraceId() ?? latestSessionRecord?.traceId) - ? { traceId: getActiveTraceId() ?? latestSessionRecord?.traceId } - : {}), - ...((args.turnStartMessageIndex ?? - latestSessionRecord?.turnStartMessageIndex) !== undefined - ? { - turnStartMessageIndex: - args.turnStartMessageIndex ?? - latestSessionRecord?.turnStartMessageIndex, - } - : {}), + ...(traceId ? { traceId } : {}), + ...(turnStartMessageIndex !== undefined ? { turnStartMessageIndex } : {}), }); } catch (recordError) { logSessionRecordError( @@ -292,6 +281,7 @@ export async function persistAuthPauseSessionRecord(args: { logContext: SessionRecordLogContext; requester?: StoredSlackRequester; surface?: AgentTurnSurface; + turnStartMessageIndex?: number; }): Promise { const nextSliceId = args.currentSliceId + 1; try { @@ -306,6 +296,8 @@ export async function persistAuthPauseSessionRecord(args: { if (piMessages.length === 0 || !isContinuableBoundary(piMessages)) { return undefined; } + const turnStartMessageIndex = + args.turnStartMessageIndex ?? latestSessionRecord?.turnStartMessageIndex; return await upsertAgentTurnSessionRecord({ ...((args.channelName ?? latestSessionRecord?.channelName) ? { channelName: args.channelName ?? latestSessionRecord?.channelName } @@ -341,6 +333,7 @@ export async function persistAuthPauseSessionRecord(args: { ...((getActiveTraceId() ?? latestSessionRecord?.traceId) ? { traceId: getActiveTraceId() ?? latestSessionRecord?.traceId } : {}), + ...(turnStartMessageIndex !== undefined ? { turnStartMessageIndex } : {}), }); } catch (recordError) { logSessionRecordError( @@ -375,6 +368,7 @@ export async function persistTimeoutSessionRecord(args: { logContext: SessionRecordLogContext; requester?: StoredSlackRequester; surface?: AgentTurnSurface; + turnStartMessageIndex?: number; }): Promise { const nextSliceId = args.currentSliceId + 1; @@ -398,7 +392,9 @@ export async function persistTimeoutSessionRecord(args: { latestSessionRecord?.cumulativeUsage, args.currentUsage, ); - if (nextSliceId > AGENT_CONTINUE_MAX_SLICES) { + const turnStartMessageIndex = + args.turnStartMessageIndex ?? latestSessionRecord?.turnStartMessageIndex; + if (nextSliceId > AGENT_TURN_TIMEOUT_RESUME_MAX_SLICES) { return await upsertAgentTurnSessionRecord({ ...((args.channelName ?? latestSessionRecord?.channelName) ? { @@ -425,13 +421,16 @@ export async function persistTimeoutSessionRecord(args: { : {}), resumeReason: "timeout", resumedFromSliceId: latestSessionRecord?.resumedFromSliceId, - errorMessage: `Agent continuation exceeded slice limit (${AGENT_CONTINUE_MAX_SLICES})`, + errorMessage: `Turn exceeded timeout resume slice limit (${AGENT_TURN_TIMEOUT_RESUME_MAX_SLICES})`, ...((args.requester ?? latestSessionRecord?.requester) ? { requester: args.requester ?? latestSessionRecord?.requester } : {}), ...((getActiveTraceId() ?? latestSessionRecord?.traceId) ? { traceId: getActiveTraceId() ?? latestSessionRecord?.traceId } : {}), + ...(turnStartMessageIndex !== undefined + ? { turnStartMessageIndex } + : {}), }); } return await upsertAgentTurnSessionRecord({ @@ -463,6 +462,7 @@ export async function persistTimeoutSessionRecord(args: { ...((getActiveTraceId() ?? latestSessionRecord?.traceId) ? { traceId: getActiveTraceId() ?? latestSessionRecord?.traceId } : {}), + ...(turnStartMessageIndex !== undefined ? { turnStartMessageIndex } : {}), }); } catch (recordError) { logSessionRecordError( @@ -473,7 +473,7 @@ export async function persistTimeoutSessionRecord(args: { "app.ai.resume_from_slice_id": args.currentSliceId, "app.ai.resume_next_slice_id": nextSliceId, }, - "Failed to persist session record before scheduling agent continuation", + "Failed to persist timeout session record before scheduling resume", ); return undefined; } @@ -496,6 +496,7 @@ export async function persistYieldSessionRecord(args: { logContext: SessionRecordLogContext; requester?: StoredSlackRequester; surface?: AgentTurnSurface; + turnStartMessageIndex?: number; }): Promise { try { const latestSessionRecord = await getAgentTurnSessionRecord( @@ -509,6 +510,8 @@ export async function persistYieldSessionRecord(args: { if (piMessages.length === 0 || !isContinuableBoundary(piMessages)) { return undefined; } + const turnStartMessageIndex = + args.turnStartMessageIndex ?? latestSessionRecord?.turnStartMessageIndex; return await upsertAgentTurnSessionRecord({ ...((args.channelName ?? latestSessionRecord?.channelName) ? { channelName: args.channelName ?? latestSessionRecord?.channelName } @@ -544,6 +547,7 @@ export async function persistYieldSessionRecord(args: { ...((getActiveTraceId() ?? latestSessionRecord?.traceId) ? { traceId: getActiveTraceId() ?? latestSessionRecord?.traceId } : {}), + ...(turnStartMessageIndex !== undefined ? { turnStartMessageIndex } : {}), }); } catch (recordError) { logSessionRecordError( diff --git a/packages/junior/src/chat/slack/app-home.ts b/packages/junior/src/chat/slack/app-home.ts index 355a3d9aa..e91cbe8b1 100644 --- a/packages/junior/src/chat/slack/app-home.ts +++ b/packages/junior/src/chat/slack/app-home.ts @@ -1,4 +1,4 @@ -import fs from "node:fs"; +import { readFileSync } from "node:fs"; import path from "node:path"; import type { WebClient, KnownBlock, SectionBlock } from "@slack/web-api"; import { hasRequiredOAuthScope } from "@/chat/credentials/oauth-scope"; @@ -35,7 +35,7 @@ function clampSectionText(text: string): string { function loadDescriptionText(): string { const descriptionPath = path.join(homeDir(), "DESCRIPTION.md"); try { - const raw = fs.readFileSync(descriptionPath, "utf8").trim(); + const raw = readFileSync(descriptionPath, "utf8").trim(); if (raw.length > 0) { return clampSectionText(raw); } @@ -82,7 +82,10 @@ async function connectedOAuthTokens( plugin: PluginDefinition, userTokenStore: UserTokenStore, ): Promise { - if (plugin.manifest.oauth || plugin.manifest.credentials) { + if ( + plugin.manifest.oauth || + plugin.manifest.credentials?.type === "oauth-bearer" + ) { const stored = await userTokenStore.get(userId, plugin.manifest.name); return stored && hasRequiredOAuthScope(stored.scope, plugin.manifest.oauth?.scope) @@ -120,7 +123,9 @@ export async function buildHomeView( for (const plugin of providers) { const tokens = await connectedOAuthTokens(userId, plugin, userTokenStore); - if (!tokens && !(await hasConnectedMcpAccount(userId, plugin))) continue; + if (!tokens && !(await hasConnectedMcpAccount(userId, plugin))) { + continue; + } connectedSections.push({ type: "section", diff --git a/packages/junior/src/chat/slack/outbound.ts b/packages/junior/src/chat/slack/outbound.ts index 57288fa93..68dc98ae2 100644 --- a/packages/junior/src/chat/slack/outbound.ts +++ b/packages/junior/src/chat/slack/outbound.ts @@ -10,8 +10,71 @@ import { parseActorUserId } from "@/chat/requester"; const MAX_SLACK_MESSAGE_TEXT_CHARS = 40_000; -function requireSlackConversationId(channelId: string, action: string): string { - const normalized = normalizeSlackConversationId(channelId); +/** Slack Web API services used by the outbound boundary. */ +export interface SlackOutboundServices { + getSlackClient: typeof getSlackClient; + normalizeSlackConversationId: typeof normalizeSlackConversationId; + withSlackRetries: typeof withSlackRetries; +} + +const defaultSlackOutboundServices: SlackOutboundServices = { + getSlackClient, + normalizeSlackConversationId, + withSlackRetries, +}; + +interface PostSlackMessageInput { + blocks?: SlackMessageBlock[]; + channelId: string; + text: string; + threadTs?: string; + includePermalink?: boolean; +} + +interface DeleteSlackMessageInput { + channelId: string; + timestamp: string; +} + +interface PostSlackEphemeralMessageInput { + channelId: string; + userId: string; + text: string; + threadTs?: string; +} + +interface UploadFilesToThreadInput { + channelId: string; + threadTs: string; + files: Array<{ data: Buffer; filename: string }>; +} + +interface ReactionMessageInput { + channelId: string; + timestamp: string; + emoji: string; +} + +/** Bound Slack outbound operations for a concrete Slack Web API service set. */ +export interface SlackOutboundBoundary { + addReactionToMessage(input: ReactionMessageInput): Promise<{ ok: true }>; + deleteSlackMessage(input: DeleteSlackMessageInput): Promise; + postSlackEphemeralMessage( + input: PostSlackEphemeralMessageInput, + ): Promise<{ messageTs?: string }>; + postSlackMessage( + input: PostSlackMessageInput, + ): Promise<{ ts: string; permalink?: string }>; + removeReactionFromMessage(input: ReactionMessageInput): Promise<{ ok: true }>; + uploadFilesToThread(input: UploadFilesToThreadInput): Promise; +} + +function requireSlackConversationId( + channelId: string, + action: string, + services: Pick, +): string { + const normalized = services.normalizeSlackConversationId(channelId); if (!normalized) { throw new Error(`${action} requires a valid channel ID`); } @@ -52,11 +115,12 @@ function requireSlackMessageText(text: string, action: string): string { async function getPermalinkBestEffort(args: { channelId: string; messageTs: string; + services: SlackOutboundServices; }): Promise { try { - const response = await withSlackRetries( + const response = await args.services.withSlackRetries( () => - getSlackClient().chat.getPermalink({ + args.services.getSlackClient().chat.getPermalink({ channel: args.channelId, message_ts: args.messageTs, }), @@ -70,16 +134,14 @@ async function getPermalinkBestEffort(args: { } /** Post Slack `mrkdwn` text to a conversation or thread via the shared outbound boundary. */ -export async function postSlackMessage(input: { - blocks?: SlackMessageBlock[]; - channelId: string; - text: string; - threadTs?: string; - includePermalink?: boolean; -}): Promise<{ ts: string; permalink?: string }> { +export async function postSlackMessage( + input: PostSlackMessageInput, + services: SlackOutboundServices = defaultSlackOutboundServices, +): Promise<{ ts: string; permalink?: string }> { const channelId = requireSlackConversationId( input.channelId, "Slack message posting", + services, ); const text = requireSlackMessageText(input.text, "Slack message posting"); const threadTs = input.threadTs @@ -89,9 +151,9 @@ export async function postSlackMessage(input: { ) : undefined; - const response = await withSlackRetries( + const response = await services.withSlackRetries( () => - getSlackClient().chat.postMessage({ + services.getSlackClient().chat.postMessage({ channel: channelId, text, ...(input.blocks?.length @@ -116,6 +178,7 @@ export async function postSlackMessage(input: { permalink: await getPermalinkBestEffort({ channelId, messageTs: response.ts, + services, }), } : {}), @@ -123,22 +186,23 @@ export async function postSlackMessage(input: { } /** Delete a previously posted Slack message through the shared outbound boundary. */ -export async function deleteSlackMessage(input: { - channelId: string; - timestamp: string; -}): Promise { +export async function deleteSlackMessage( + input: DeleteSlackMessageInput, + services: SlackOutboundServices = defaultSlackOutboundServices, +): Promise { const channelId = requireSlackConversationId( input.channelId, "Slack message deletion", + services, ); const timestamp = requireSlackMessageTimestamp( input.timestamp, "Slack message deletion", ); - await withSlackRetries( + await services.withSlackRetries( () => - getSlackClient().chat.delete({ + services.getSlackClient().chat.delete({ channel: channelId, ts: timestamp, }), @@ -151,15 +215,14 @@ export async function deleteSlackMessage(input: { * Post an ephemeral Slack message. Delivery is best-effort on Slack's side, but * request validation and Web API behavior are centralized here. */ -export async function postSlackEphemeralMessage(input: { - channelId: string; - userId: string; - text: string; - threadTs?: string; -}): Promise<{ messageTs?: string }> { +export async function postSlackEphemeralMessage( + input: PostSlackEphemeralMessageInput, + services: SlackOutboundServices = defaultSlackOutboundServices, +): Promise<{ messageTs?: string }> { const channelId = requireSlackConversationId( input.channelId, "Slack ephemeral message posting", + services, ); const userId = parseActorUserId(input.userId); if (!userId) { @@ -176,9 +239,9 @@ export async function postSlackEphemeralMessage(input: { ) : undefined; - const response = await withSlackRetries( + const response = await services.withSlackRetries( () => - getSlackClient().chat.postEphemeral({ + services.getSlackClient().chat.postEphemeral({ channel: channelId, user: userId, text, @@ -194,14 +257,14 @@ export async function postSlackEphemeralMessage(input: { } /** Upload files into a Slack thread via the shared outbound file boundary. */ -export async function uploadFilesToThread(input: { - channelId: string; - threadTs: string; - files: Array<{ data: Buffer; filename: string }>; -}): Promise { +export async function uploadFilesToThread( + input: UploadFilesToThreadInput, + services: SlackOutboundServices = defaultSlackOutboundServices, +): Promise { const channelId = requireSlackConversationId( input.channelId, "Slack file upload", + services, ); const threadTs = requireSlackThreadTimestamp( input.threadTs, @@ -223,9 +286,9 @@ export async function uploadFilesToThread(input: { }; }); - await withSlackRetries( + await services.withSlackRetries( () => - getSlackClient().filesUploadV2({ + services.getSlackClient().filesUploadV2({ channel_id: channelId, thread_ts: threadTs, file_uploads: fileUploads, @@ -236,14 +299,14 @@ export async function uploadFilesToThread(input: { } /** Add a reaction to a Slack message, treating `already_reacted` as idempotent success. */ -export async function addReactionToMessage(input: { - channelId: string; - timestamp: string; - emoji: string; -}): Promise<{ ok: true }> { +export async function addReactionToMessage( + input: ReactionMessageInput, + services: SlackOutboundServices = defaultSlackOutboundServices, +): Promise<{ ok: true }> { const channelId = requireSlackConversationId( input.channelId, "Slack reaction", + services, ); const timestamp = requireSlackMessageTimestamp( input.timestamp, @@ -255,9 +318,9 @@ export async function addReactionToMessage(input: { } try { - await withSlackRetries( + await services.withSlackRetries( () => - getSlackClient().reactions.add({ + services.getSlackClient().reactions.add({ channel: channelId, timestamp, name: emoji, @@ -276,14 +339,14 @@ export async function addReactionToMessage(input: { } /** Remove a reaction from a Slack message, treating `no_reaction` as idempotent success. */ -export async function removeReactionFromMessage(input: { - channelId: string; - timestamp: string; - emoji: string; -}): Promise<{ ok: true }> { +export async function removeReactionFromMessage( + input: ReactionMessageInput, + services: SlackOutboundServices = defaultSlackOutboundServices, +): Promise<{ ok: true }> { const channelId = requireSlackConversationId( input.channelId, "Slack reaction removal", + services, ); const timestamp = requireSlackMessageTimestamp( input.timestamp, @@ -295,9 +358,9 @@ export async function removeReactionFromMessage(input: { } try { - await withSlackRetries( + await services.withSlackRetries( () => - getSlackClient().reactions.remove({ + services.getSlackClient().reactions.remove({ channel: channelId, timestamp, name: emoji, @@ -315,6 +378,22 @@ export async function removeReactionFromMessage(input: { return { ok: true }; } +/** Create the shared Slack outbound boundary with explicit Slack Web API services. */ +export function createSlackOutboundBoundary( + services: SlackOutboundServices = defaultSlackOutboundServices, +): SlackOutboundBoundary { + return { + addReactionToMessage: (input) => addReactionToMessage(input, services), + deleteSlackMessage: (input) => deleteSlackMessage(input, services), + postSlackEphemeralMessage: (input) => + postSlackEphemeralMessage(input, services), + postSlackMessage: (input) => postSlackMessage(input, services), + removeReactionFromMessage: (input) => + removeReactionFromMessage(input, services), + uploadFilesToThread: (input) => uploadFilesToThread(input, services), + }; +} + export const slackOutboundPolicy = { maxMessageTextChars: MAX_SLACK_MESSAGE_TEXT_CHARS, }; diff --git a/packages/junior/src/chat/tools/advisor/tool.ts b/packages/junior/src/chat/tools/advisor/tool.ts index 133cee159..e3c70e673 100644 --- a/packages/junior/src/chat/tools/advisor/tool.ts +++ b/packages/junior/src/chat/tools/advisor/tool.ts @@ -29,7 +29,7 @@ import type { PiMessage } from "@/chat/pi/messages"; import { extractAssistantText, isAssistantMessage, -} from "@/chat/respond-helpers"; +} from "@/chat/respond/pi-messages"; import { createStateAdvisorSessionStore, getAdvisorSessionKey, diff --git a/packages/junior/src/chat/tools/types.ts b/packages/junior/src/chat/tools/types.ts index 8f1829f3a..13852cc24 100644 --- a/packages/junior/src/chat/tools/types.ts +++ b/packages/junior/src/chat/tools/types.ts @@ -13,6 +13,7 @@ import type { ThreadArtifactsState } from "@/chat/state/artifacts"; import type { Skill } from "@/chat/skills"; import type { LoadSkillMetadata } from "@/chat/tools/skill/load-skill"; import type { AdvisorToolRuntimeContext } from "@/chat/tools/advisor/tool"; +import type { completeText } from "@/chat/pi/client"; import type { LocalRequester, Requester, @@ -20,6 +21,7 @@ import type { } from "@/chat/requester"; export interface ImageGenerateToolDeps { + completeText?: typeof completeText; fetch?: typeof fetch; } diff --git a/packages/junior/src/chat/tools/web/fetch-tool.ts b/packages/junior/src/chat/tools/web/fetch-tool.ts index 9eb1b026a..dcb45fd96 100644 --- a/packages/junior/src/chat/tools/web/fetch-tool.ts +++ b/packages/junior/src/chat/tools/web/fetch-tool.ts @@ -37,6 +37,7 @@ function extractHttpStatusFromMessage(message: string): number | null { return Number.isFinite(parsed) ? parsed : null; } +/** Create the web-fetch tool that retrieves a known public URL. */ export function createWebFetchTool(hooks: ToolHooks) { const override = hooks.toolOverrides?.webFetch; return tool({ diff --git a/packages/junior/src/chat/tools/web/image-generate.ts b/packages/junior/src/chat/tools/web/image-generate.ts index e30319cfa..367c052bc 100644 --- a/packages/junior/src/chat/tools/web/image-generate.ts +++ b/packages/junior/src/chat/tools/web/image-generate.ts @@ -20,9 +20,12 @@ ${JUNIOR_PERSONALITY} Rewrite the user's image request into a detailed image generation prompt that encodes this personality's visual aesthetic. Output ONLY the rewritten prompt text — no explanation, no wrapper.`; -async function enrichImagePrompt(rawPrompt: string): Promise { +async function enrichImagePrompt( + rawPrompt: string, + completeTextImpl: typeof completeText, +): Promise { try { - const { text } = await completeText({ + const { text } = await completeTextImpl({ modelId: botConfig.fastModelId, system: ENRICHMENT_SYSTEM_PROMPT, messages: [{ role: "user", content: rawPrompt, timestamp: Date.now() }], @@ -77,6 +80,7 @@ function parseImageGenerationError( } } +/** Create the image-generation tool that stores generated files as artifacts. */ export function createImageGenerateTool( hooks: ToolHooks, deps: ImageGenerateToolDeps = {}, @@ -93,6 +97,7 @@ export function createImageGenerateTool( }), execute: async ({ prompt }) => { const fetchImpl = deps.fetch ?? fetch; + const completeTextImpl = deps.completeText ?? completeText; // Raw fetch does not resolve AI Gateway env auth on its own, so this // path has to turn the documented env credential into a bearer token. const apiKey = getGatewayApiKey(); @@ -100,7 +105,7 @@ export function createImageGenerateTool( throw new Error(MISSING_GATEWAY_CREDENTIALS_ERROR); } const model = process.env.AI_IMAGE_MODEL ?? DEFAULT_IMAGE_MODEL; - const enrichedPrompt = await enrichImagePrompt(prompt); + const enrichedPrompt = await enrichImagePrompt(prompt, completeTextImpl); const response = await fetchImpl( "https://ai-gateway.vercel.sh/v1/chat/completions", { diff --git a/packages/junior/src/cli/snapshot-warmup.ts b/packages/junior/src/cli/snapshot-warmup.ts index 8365c3cec..8a0fe6504 100644 --- a/packages/junior/src/cli/snapshot-warmup.ts +++ b/packages/junior/src/cli/snapshot-warmup.ts @@ -12,6 +12,10 @@ import { disconnectStateAdapter } from "@/chat/state/adapter"; const DEFAULT_RUNTIME = "node22"; const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000; +interface SnapshotCreateOptions { + resolveRuntimeDependencySnapshot: typeof resolveRuntimeDependencySnapshot; +} + function progressMessage( phase: RuntimeDependencySnapshotProgressPhase, ): string { @@ -105,6 +109,7 @@ function logSnapshotProfile(log: (line: string) => void): void { export async function runSnapshotCreate( log: (line: string) => void = console.log, + options: SnapshotCreateOptions = { resolveRuntimeDependencySnapshot }, ): Promise { if (process.env.JUNIOR_SKIP_SNAPSHOT === "1") { log("Skipping sandbox snapshot create (JUNIOR_SKIP_SNAPSHOT=1)"); @@ -117,7 +122,7 @@ export async function runSnapshotCreate( try { logSnapshotProfile(log); const emitted = new Set(); - const snapshot = await resolveRuntimeDependencySnapshot({ + const snapshot = await options.resolveRuntimeDependencySnapshot({ runtime, timeoutMs, onProgress: async (phase) => { diff --git a/packages/junior/src/handlers/mcp-oauth-callback.ts b/packages/junior/src/handlers/mcp-oauth-callback.ts index fea2f0906..4f98d42f6 100644 --- a/packages/junior/src/handlers/mcp-oauth-callback.ts +++ b/packages/junior/src/handlers/mcp-oauth-callback.ts @@ -14,7 +14,7 @@ import { } from "@/chat/mcp/auth-store"; import { finalizeMcpAuthorization } from "@/chat/mcp/oauth"; import { logException, logWarn } from "@/chat/logging"; -import type { AssistantReply, generateAssistantReply } from "@/chat/respond"; +import type { AssistantReply } from "@/chat/respond"; import { getChannelConfigurationServiceById, getPersistedSandboxState, @@ -34,7 +34,10 @@ import { updateConversationStats, } from "@/chat/services/conversation-memory"; import { coerceThreadArtifactsState } from "@/chat/state/artifacts"; -import { resumeAuthorizedRequest } from "@/chat/runtime/slack-resume"; +import { + resumeAuthorizedRequest, + type ResumeReplyGenerator, +} from "@/chat/runtime/slack-resume"; import { persistAuthPauseTurnState } from "@/chat/runtime/auth-pause-state"; import { applyPendingAuthUpdate, @@ -58,6 +61,10 @@ import { } from "@/chat/requester"; import { requireSlackDestination } from "@/chat/destination"; +interface McpOAuthCallbackHandlerOptions { + generateReply?: ResumeReplyGenerator; +} + const CALLBACK_PAGES = { missing_state: { title: "Authorization failed", @@ -88,10 +95,6 @@ const CALLBACK_PAGES = { }, } as const; -interface McpOAuthCallbackOptions { - generateReply?: typeof generateAssistantReply; -} - function mcpAuthorizationId(args: { provider: string; sessionId: string; @@ -188,10 +191,10 @@ async function persistFailedReplyState( async function resumeAuthorizedMcpTurn(args: { authSession: McpAuthSessionState; - generateReply?: typeof generateAssistantReply; + generateReply?: ResumeReplyGenerator; provider: string; }): Promise { - const { authSession, generateReply, provider } = args; + const { authSession, provider } = args; if ( !authSession.channelId || !authSession.destination || @@ -241,7 +244,7 @@ async function resumeAuthorizedMcpTurn(args: { messageTs: getTurnUserSlackMessageTs(userMessage), lockKey: threadId, connectedText: "", - generateReply, + generateReply: args.generateReply, beforeStart: async () => { const lockedState = await getPersistedThreadState(threadId); const lockedConversation = coerceThreadConversationState(lockedState); @@ -429,13 +432,16 @@ async function resumeAuthorizedMcpTurn(args: { ); }, onTimeoutPause: async (error: unknown) => { - if (!isRetryableTurnError(error, "agent_continue")) { + if ( + !isRetryableTurnError(error, "agent_continue") && + !isRetryableTurnError(error, "turn_timeout_resume") + ) { throw error; } const version = error.metadata?.version; if (typeof version !== "number") { throw new Error( - "MCP OAuth agent continuation did not include a session record version", + "MCP OAuth turn continuation did not include a session record version", ); } await scheduleAgentContinue({ @@ -454,7 +460,7 @@ export async function GET( request: Request, provider: string, waitUntil: WaitUntilFn, - options: McpOAuthCallbackOptions = {}, + options: McpOAuthCallbackHandlerOptions = {}, ): Promise { const url = new URL(request.url); const state = url.searchParams.get("state")?.trim(); diff --git a/packages/junior/src/handlers/oauth-callback.ts b/packages/junior/src/handlers/oauth-callback.ts index 3f21621e8..7b1097ef8 100644 --- a/packages/junior/src/handlers/oauth-callback.ts +++ b/packages/junior/src/handlers/oauth-callback.ts @@ -18,6 +18,7 @@ import { ResumeTurnBusyError, resumeAuthorizedRequest, resumeSlackTurn, + type ResumeReplyGenerator, } from "@/chat/runtime/slack-resume"; import { persistAuthPauseTurnState } from "@/chat/runtime/auth-pause-state"; import { logException, logInfo } from "@/chat/logging"; @@ -65,11 +66,11 @@ import { import { escapeXml } from "@/chat/xml"; import type { WaitUntilFn } from "@/handlers/types"; import { scheduleAgentContinue } from "@/chat/services/agent-continue"; -import type { AssistantReply, generateAssistantReply } from "@/chat/respond"; +import type { AssistantReply } from "@/chat/respond"; import { requireSlackDestination } from "@/chat/destination"; -interface OAuthCallbackOptions { - generateReply?: typeof generateAssistantReply; +interface OAuthCallbackHandlerOptions { + generateReply?: ResumeReplyGenerator; } /** @@ -174,7 +175,7 @@ async function persistFailedOAuthReplyState(args: { async function resumeOAuthSessionRecordTurn( stored: OAuthStatePayload, - options: OAuthCallbackOptions, + options: OAuthCallbackHandlerOptions = {}, ): Promise { if ( !stored.resumeConversationId || @@ -443,13 +444,16 @@ async function resumeOAuthSessionRecordTurn( }); }, onTimeoutPause: async (error: unknown) => { - if (!isRetryableTurnError(error, "agent_continue")) { + if ( + !isRetryableTurnError(error, "agent_continue") && + !isRetryableTurnError(error, "turn_timeout_resume") + ) { throw error; } const version = error.metadata?.version; if (typeof version !== "number") { throw new Error( - "OAuth agent continuation did not include a session record version", + "OAuth turn continuation did not include a session record version", ); } await scheduleAgentContinue({ @@ -459,6 +463,7 @@ async function resumeOAuthSessionRecordTurn( expectedVersion: version, }); }, + generateReply: options.generateReply, }; }, }); @@ -468,7 +473,7 @@ async function resumeOAuthSessionRecordTurn( async function resumePendingOAuthMessage( stored: OAuthStatePayload, - options: OAuthCallbackOptions, + options: OAuthCallbackHandlerOptions = {}, ): Promise { if ( !stored.pendingMessage || @@ -503,7 +508,6 @@ async function resumePendingOAuthMessage( threadTs: stored.threadTs, messageTs: getTurnUserSlackMessageTs(latestUserMessage), connectedText: "", - generateReply: options.generateReply, replyContext: { credentialContext: { actor: { type: "user", userId: stored.userId }, @@ -520,6 +524,7 @@ async function resumePendingOAuthMessage( piMessages: conversation.piMessages, configuration: stored.configuration, }, + generateReply: options.generateReply, onSuccess: async (reply) => { logInfo( "oauth_callback_resume_complete", @@ -539,7 +544,7 @@ export async function GET( request: Request, provider: string, waitUntil: WaitUntilFn, - options: OAuthCallbackOptions = {}, + options: OAuthCallbackHandlerOptions = {}, ): Promise { const providerConfig = getPluginOAuthConfig(provider); if (!providerConfig) { diff --git a/packages/junior/tests/component/auth/mcp-auth-orchestration.test.ts b/packages/junior/tests/component/auth/mcp-auth-orchestration.test.ts new file mode 100644 index 000000000..139878a8d --- /dev/null +++ b/packages/junior/tests/component/auth/mcp-auth-orchestration.test.ts @@ -0,0 +1,233 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { McpAuthSessionState } from "@/chat/mcp/auth-store"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; +import type { PluginDefinition } from "@/chat/plugins/types"; +import { createMcpAuthOrchestration } from "@/chat/services/mcp-auth-orchestration"; +import { AuthorizationFlowDisabledError } from "@/chat/services/auth-pause"; +import { mockTestClock } from "../../fixtures/vitest"; + +type McpAuthServices = NonNullable< + Parameters[1] +>; +type McpAuthProvider = Awaited< + ReturnType +>; + +const githubMcpPlugin: PluginDefinition = { + dir: "/tmp/github-plugin", + manifest: { + name: "github", + displayName: "GitHub", + description: "GitHub MCP provider", + capabilities: [], + configKeys: [], + mcp: { + transport: "http", + url: "https://mcp.github.example.test", + }, + }, +}; + +const authSession: McpAuthSessionState = { + authSessionId: "auth_1", + provider: "github", + userId: "U123", + conversationId: "slack:C123:1700000000.000000", + sessionId: "scheduled:sched_1:1000", + userMessage: "", + channelId: "C123", + threadTs: "1700000000.000000", + authorizationUrl: "https://github.example.test/oauth/authorize", + createdAtMs: 1_700_000_000_000, + updatedAtMs: 1_700_000_000_000, +}; + +function createMcpAuthProvider(authSessionId: string): McpAuthProvider { + return { + authSessionId, + redirectUrl: "https://junior.example.test/api/oauth/callback/mcp/github", + clientMetadata: { + client_name: "Junior MCP Client", + redirect_uris: [ + "https://junior.example.test/api/oauth/callback/mcp/github", + ], + grant_types: ["authorization_code", "refresh_token"], + response_types: ["code"], + token_endpoint_auth_method: "none", + }, + state: vi.fn(async () => authSessionId), + clientInformation: vi.fn(async () => undefined), + saveClientInformation: vi.fn(async () => undefined), + tokens: vi.fn(async () => undefined), + saveTokens: vi.fn(async () => undefined), + redirectToAuthorization: vi.fn(async () => undefined), + saveCodeVerifier: vi.fn(async () => undefined), + codeVerifier: vi.fn(async () => "code-verifier"), + } satisfies McpAuthProvider; +} + +function createMcpAuthServices() { + return { + createMcpOAuthClientProvider: vi.fn(async () => + createMcpAuthProvider("auth_1"), + ), + deleteMcpAuthSession: vi.fn(async () => undefined), + deliverPrivateMessage: vi.fn(async () => "fallback_dm" as const), + getMcpAuthSession: vi.fn(async () => authSession), + patchMcpAuthSession: vi.fn(async (_authSessionId, patch) => ({ + ...authSession, + ...patch, + updatedAtMs: 1_700_000_000_001, + })), + recordAuthorizationRequested: vi.fn(async () => undefined), + } satisfies McpAuthServices; +} + +function baseInput( + overrides: { + abortAgent?: () => void; + pendingAuth?: Parameters< + typeof createMcpAuthOrchestration + >[0]["pendingAuth"]; + recordPendingAuth?: Parameters< + typeof createMcpAuthOrchestration + >[0]["recordPendingAuth"]; + authorizationFlowMode?: Parameters< + typeof createMcpAuthOrchestration + >[0]["authorizationFlowMode"]; + } = {}, +): Parameters[0] { + return { + abortAgent: overrides.abortAgent ?? vi.fn(), + conversationId: "slack:C123:1700000000.000000", + sessionId: "scheduled:sched_1:1000", + requesterId: "U123", + channelId: "C123", + threadTs: "1700000000.000000", + userMessage: "", + pendingAuth: overrides.pendingAuth, + getConfiguration: () => ({ repo: "getsentry/junior" }), + getArtifactState: () => undefined, + getMergedArtifactState: () => ({ + assistantContextChannelId: "C456", + }), + recordPendingAuth: overrides.recordPendingAuth ?? vi.fn(), + authorizationFlowMode: overrides.authorizationFlowMode, + }; +} + +describe("createMcpAuthOrchestration", () => { + beforeEach(() => { + mockTestClock(1_700_000_000_000); + setPluginCatalogConfig({ + inlineManifests: [{ manifest: githubMcpPlugin.manifest }], + }); + }); + + afterEach(() => { + setPluginCatalogConfig(undefined); + vi.useRealTimers(); + }); + + it("sends a private auth link and records the paused session", async () => { + const services = createMcpAuthServices(); + const abortAgent = vi.fn(); + const recordPendingAuth = vi.fn(async () => undefined); + const orchestration = createMcpAuthOrchestration( + baseInput({ abortAgent, recordPendingAuth }), + services, + ); + + await orchestration.authProviderFactory(githubMcpPlugin); + await expect(orchestration.onAuthorizationRequired("github")).resolves.toBe( + true, + ); + + expect(services.patchMcpAuthSession).toHaveBeenCalledWith("auth_1", { + configuration: { repo: "getsentry/junior" }, + artifactState: { assistantContextChannelId: "C456" }, + toolChannelId: "C456", + }); + expect(services.deliverPrivateMessage).toHaveBeenCalledWith( + expect.objectContaining({ + userId: "U123", + text: expect.stringContaining( + "https://github.example.test/oauth/authorize", + ), + }), + ); + expect(recordPendingAuth).toHaveBeenCalledWith({ + kind: "mcp", + provider: "github", + requesterId: "U123", + sessionId: "scheduled:sched_1:1000", + linkSentAtMs: 1_700_000_000_000, + }); + expect(services.recordAuthorizationRequested).toHaveBeenCalledWith( + expect.objectContaining({ + authorizationId: "scheduled:sched_1:1000:mcp:github", + delivery: "private_link_sent", + }), + ); + expect(abortAgent).toHaveBeenCalledTimes(1); + }); + + it("reuses a fresh pending auth link without delivering a duplicate link", async () => { + const services = createMcpAuthServices(); + const abortAgent = vi.fn(); + const recordPendingAuth = vi.fn(async () => undefined); + const orchestration = createMcpAuthOrchestration( + baseInput({ + abortAgent, + recordPendingAuth, + pendingAuth: { + kind: "mcp", + provider: "github", + requesterId: "U123", + sessionId: "scheduled:sched_1:1000", + linkSentAtMs: 1_699_999_999_000, + }, + }), + services, + ); + + await orchestration.authProviderFactory(githubMcpPlugin); + await expect(orchestration.onAuthorizationRequired("github")).resolves.toBe( + true, + ); + + expect(services.deliverPrivateMessage).not.toHaveBeenCalled(); + expect(services.deleteMcpAuthSession).toHaveBeenCalledWith("auth_1"); + expect(recordPendingAuth).toHaveBeenCalledWith( + expect.objectContaining({ + linkSentAtMs: 1_699_999_999_000, + }), + ); + expect(services.recordAuthorizationRequested).toHaveBeenCalledWith( + expect.objectContaining({ + authorizationId: "scheduled:sched_1:1000:mcp:github", + delivery: "private_link_reused", + }), + ); + expect(abortAgent).toHaveBeenCalledTimes(1); + }); + + it("deletes the auth session and does not abort when auth flow is disabled", async () => { + const services = createMcpAuthServices(); + const abortAgent = vi.fn(); + const orchestration = createMcpAuthOrchestration( + baseInput({ abortAgent, authorizationFlowMode: "disabled" }), + services, + ); + + await orchestration.authProviderFactory(githubMcpPlugin); + await expect( + orchestration.onAuthorizationRequired("github"), + ).rejects.toBeInstanceOf(AuthorizationFlowDisabledError); + + expect(services.deleteMcpAuthSession).toHaveBeenCalledWith("auth_1"); + expect(services.patchMcpAuthSession).not.toHaveBeenCalled(); + expect(services.deliverPrivateMessage).not.toHaveBeenCalled(); + expect(abortAgent).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/junior/tests/component/auth/plugin-auth-orchestration.test.ts b/packages/junior/tests/component/auth/plugin-auth-orchestration.test.ts new file mode 100644 index 000000000..c0532c62e --- /dev/null +++ b/packages/junior/tests/component/auth/plugin-auth-orchestration.test.ts @@ -0,0 +1,311 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; +import { + createPluginAuthOrchestration, + PluginAuthorizationPauseError, + PluginCredentialFailureError, +} from "@/chat/services/plugin-auth-orchestration"; +import { AuthorizationFlowDisabledError } from "@/chat/services/auth-pause"; +import type { UserTokenStore } from "@/chat/credentials/user-token-store"; +import type { PluginManifest } from "@/chat/plugins/types"; +import { mockTestClock } from "../../fixtures/vitest"; + +type PluginAuthServices = NonNullable< + Parameters[1] +>; + +const pluginManifests = { + github: { + name: "github", + displayName: "GitHub", + description: "GitHub provider", + capabilities: [], + configKeys: [], + domains: ["api.github.com", "github.com"], + oauth: { + clientIdEnv: "GITHUB_CLIENT_ID", + clientSecretEnv: "GITHUB_CLIENT_SECRET", + authorizeEndpoint: "https://github.com/login/oauth/authorize", + tokenEndpoint: "https://github.com/login/oauth/access_token", + }, + }, + sentry: { + name: "sentry", + displayName: "Sentry", + description: "Sentry provider", + capabilities: [], + configKeys: [], + credentials: { + type: "oauth-bearer", + domains: ["sentry.io"], + authTokenEnv: "SENTRY_AUTH_TOKEN", + }, + oauth: { + clientIdEnv: "SENTRY_CLIENT_ID", + clientSecretEnv: "SENTRY_CLIENT_SECRET", + authorizeEndpoint: "https://sentry.io/oauth/authorize/", + tokenEndpoint: "https://sentry.io/oauth/token/", + }, + }, +} satisfies Record; + +const sentryAuthSignal = { + provider: "sentry", + grant: { name: "default", access: "read" as const }, + authorization: { type: "oauth" as const, provider: "sentry" }, + createdAtMs: 1_700_000_000_000, +}; + +function configurePluginCatalog(): void { + setPluginCatalogConfig({ + inlineManifests: Object.values(pluginManifests).map((manifest) => ({ + manifest, + })), + }); +} + +function createPluginAuthServices() { + return { + recordAuthorizationRequested: vi.fn(async () => undefined), + startOAuthFlow: vi.fn(), + unlinkProvider: vi.fn(async () => undefined), + } satisfies PluginAuthServices; +} + +function createTestUserTokenStore(): UserTokenStore { + return { + get: vi.fn(async () => undefined), + set: vi.fn(async () => undefined), + delete: vi.fn(async () => undefined), + }; +} + +function createInput( + overrides: Partial[0]> = {}, +): Parameters[0] { + return { + abortAgent: vi.fn(), + requesterId: "U123", + userMessage: "check Sentry", + userTokenStore: createTestUserTokenStore(), + ...overrides, + }; +} + +async function expectPluginCredentialFailure( + promise: Promise, + expected: { message: string; provider: string }, +): Promise { + let caught: unknown; + try { + await promise; + } catch (error) { + caught = error; + } + expect(caught).toBeInstanceOf(PluginCredentialFailureError); + expect(caught).toMatchObject(expected); +} + +describe("createPluginAuthOrchestration", () => { + beforeEach(() => { + mockTestClock(1_700_000_000_000); + configurePluginCatalog(); + }); + + afterEach(() => { + setPluginCatalogConfig(undefined); + vi.useRealTimers(); + }); + + it("starts oauth from a structured auth_required signal", async () => { + const services = createPluginAuthServices(); + services.startOAuthFlow.mockResolvedValue({ + ok: true, + delivery: "fallback_dm", + }); + const abortAgent = vi.fn(); + const userTokenStore = createTestUserTokenStore(); + const orchestration = createPluginAuthOrchestration( + createInput({ abortAgent, userTokenStore }), + services, + ); + + await expect( + orchestration.maybeHandleAuthSignal({ + exit_code: 0, + stderr: "401 unauthorized", + auth_required: sentryAuthSignal, + }), + ).rejects.toBeInstanceOf(PluginAuthorizationPauseError); + + expect(services.startOAuthFlow).toHaveBeenCalledWith( + "sentry", + expect.objectContaining({ + requesterId: "U123", + userMessage: "check Sentry", + }), + ); + expect(services.unlinkProvider).toHaveBeenCalledWith( + "U123", + "sentry", + userTokenStore, + ); + expect(abortAgent).toHaveBeenCalledTimes(1); + }); + + it("requires a pending-auth recorder before starting a resumable oauth flow", async () => { + const services = createPluginAuthServices(); + services.startOAuthFlow.mockResolvedValue({ + ok: true, + delivery: "fallback_dm", + }); + const abortAgent = vi.fn(); + const orchestration = createPluginAuthOrchestration( + createInput({ + abortAgent, + conversationId: "slack:C123:1700000000.000000", + sessionId: "run_new", + }), + services, + ); + + await expect( + orchestration.maybeHandleAuthSignal({ auth_required: sentryAuthSignal }), + ).rejects.toThrow( + 'Missing pending auth recorder for plugin authorization pause "sentry"', + ); + + expect(services.startOAuthFlow).not.toHaveBeenCalled(); + expect(services.unlinkProvider).not.toHaveBeenCalled(); + expect(abortAgent).not.toHaveBeenCalled(); + }); + + it("reuses a fresh pending oauth link without starting a duplicate flow", async () => { + const services = createPluginAuthServices(); + const abortAgent = vi.fn(); + const recordPendingAuth = vi.fn(async () => undefined); + const userTokenStore = createTestUserTokenStore(); + const orchestration = createPluginAuthOrchestration( + createInput({ + abortAgent, + conversationId: "slack:C123:1700000000.000000", + sessionId: "scheduled:sched_1:1000", + pendingAuth: { + kind: "plugin", + provider: "sentry", + requesterId: "U123", + sessionId: "scheduled:sched_1:1000", + linkSentAtMs: 1_699_999_999_000, + }, + recordPendingAuth, + userTokenStore, + }), + services, + ); + + await expect( + orchestration.maybeHandleAuthSignal({ auth_required: sentryAuthSignal }), + ).rejects.toBeInstanceOf(PluginAuthorizationPauseError); + + expect(services.startOAuthFlow).not.toHaveBeenCalled(); + expect(services.unlinkProvider).toHaveBeenCalledWith( + "U123", + "sentry", + userTokenStore, + ); + expect(recordPendingAuth).toHaveBeenCalledWith({ + kind: "plugin", + provider: "sentry", + requesterId: "U123", + sessionId: "scheduled:sched_1:1000", + linkSentAtMs: 1_699_999_999_000, + }); + expect(services.recordAuthorizationRequested).toHaveBeenCalledWith( + expect.objectContaining({ + authorizationId: "scheduled:sched_1:1000:plugin:sentry", + delivery: "private_link_reused", + }), + ); + expect(abortAgent).toHaveBeenCalledTimes(1); + }); + + it("does not start oauth or abort when authorization is disabled", async () => { + const services = createPluginAuthServices(); + const abortAgent = vi.fn(); + const orchestration = createPluginAuthOrchestration( + createInput({ abortAgent, authorizationFlowMode: "disabled" }), + services, + ); + + await expect( + orchestration.maybeHandleAuthSignal({ auth_required: sentryAuthSignal }), + ).rejects.toBeInstanceOf(AuthorizationFlowDisabledError); + + expect(services.startOAuthFlow).not.toHaveBeenCalled(); + expect(services.unlinkProvider).not.toHaveBeenCalled(); + expect(abortAgent).not.toHaveBeenCalled(); + }); + + it("surfaces non-oauth auth signals as credential failures", async () => { + const services = createPluginAuthServices(); + const orchestration = createPluginAuthOrchestration( + createInput({ userMessage: "inspect a repo" }), + services, + ); + + await expectPluginCredentialFailure( + orchestration.maybeHandleAuthSignal({ + auth_required: { + provider: "github", + grant: { name: "installation-read", access: "read" as const }, + createdAtMs: 1_700_000_000_000, + message: "Missing GITHUB_APP_ID", + }, + }), + { provider: "github", message: "Missing GITHUB_APP_ID" }, + ); + + expect(services.startOAuthFlow).not.toHaveBeenCalled(); + expect(services.unlinkProvider).not.toHaveBeenCalled(); + }); + + it("ignores auth-like output without a structured signal", async () => { + const services = createPluginAuthServices(); + const orchestration = createPluginAuthOrchestration( + createInput({ userMessage: "check GitHub" }), + services, + ); + + await expect( + orchestration.maybeHandleAuthSignal({ + exit_code: 1, + stderr: "401 unauthorized bad credentials missing scope", + }), + ).resolves.toBeUndefined(); + + expect(services.startOAuthFlow).not.toHaveBeenCalled(); + expect(services.unlinkProvider).not.toHaveBeenCalled(); + }); + + it("ignores auth_required payloads that fail schema validation", async () => { + const services = createPluginAuthServices(); + const orchestration = createPluginAuthOrchestration( + createInput({ userMessage: "create an issue" }), + services, + ); + + await expect( + orchestration.maybeHandleAuthSignal({ + auth_required: { + provider: "github", + grant: { name: "user-write", access: "write" }, + authorization: { type: "oauth", provider: "sentry" }, + createdAtMs: 1_700_000_000_000, + }, + }), + ).resolves.toBeUndefined(); + + expect(services.startOAuthFlow).not.toHaveBeenCalled(); + expect(services.unlinkProvider).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/junior/tests/unit/mcp/oauth.test.ts b/packages/junior/tests/component/mcp/oauth-client-provider.test.ts similarity index 66% rename from packages/junior/tests/unit/mcp/oauth.test.ts rename to packages/junior/tests/component/mcp/oauth-client-provider.test.ts index fad78ac67..c8dba39a5 100644 --- a/packages/junior/tests/unit/mcp/oauth.test.ts +++ b/packages/junior/tests/component/mcp/oauth-client-provider.test.ts @@ -1,60 +1,59 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { getMcpAuthSession, patchMcpAuthSession } from "@/chat/mcp/auth-store"; +import { createMcpOAuthClientProvider } from "@/chat/mcp/oauth"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { + DEFAULT_TEST_NOW_MS, + mockTestClock, + stubTestEnv, +} from "../../fixtures/vitest"; -const ORIGINAL_ENV = { ...process.env }; const SLACK_DESTINATION = { platform: "slack", teamId: "T123", channelId: "C123", } as const; -function buildPlugin() { - return { - dir: "/tmp/plugins/demo", - skillsDir: "/tmp/plugins/demo/skills", - manifest: { - name: "demo", - displayName: "Demo", - description: "Demo plugin", - capabilities: [], - configKeys: [], - mcp: { - transport: "http" as const, - url: "https://mcp.example.com", +function registerMcpPlugin(): void { + setPluginCatalogConfig({ + inlineManifests: [ + { + manifest: { + name: "demo", + displayName: "Demo", + description: "Demo plugin", + capabilities: [], + configKeys: [], + mcp: { + transport: "http", + url: "https://mcp.example.com", + }, + }, }, - }, - }; + ], + }); } -describe("createMcpOAuthClientProvider", () => { +describe("MCP OAuth client provider session state", () => { beforeEach(async () => { - process.env = { - ...ORIGINAL_ENV, + stubTestEnv({ JUNIOR_BASE_URL: "https://junior.example.com", JUNIOR_STATE_ADAPTER: "memory", - }; - vi.resetModules(); - vi.doMock("@/chat/plugins/registry", () => ({ - getPluginDefinition: (provider: string) => - provider === "demo" ? buildPlugin() : undefined, - })); - - const { disconnectStateAdapter } = await import("@/chat/state/adapter"); + }); + mockTestClock(); + registerMcpPlugin(); await disconnectStateAdapter(); }); afterEach(async () => { - const { disconnectStateAdapter } = await import("@/chat/state/adapter"); + setPluginCatalogConfig(undefined); await disconnectStateAdapter(); - vi.doUnmock("@/chat/plugins/registry"); - vi.resetModules(); - process.env = { ...ORIGINAL_ENV }; + vi.useRealTimers(); + vi.unstubAllEnvs(); }); it("persists and reuses the pending auth session for the same turn", async () => { - const { getMcpAuthSession, patchMcpAuthSession } = - await import("@/chat/mcp/auth-store"); - const { createMcpOAuthClientProvider } = await import("@/chat/mcp/oauth"); - const firstProvider = await createMcpOAuthClientProvider({ provider: "demo", conversationId: "conversation-1", @@ -79,12 +78,15 @@ describe("createMcpOAuthClientProvider", () => { channelId: "C123", threadTs: "1712345.0001", configuration: { region: "us" }, + createdAtMs: DEFAULT_TEST_NOW_MS, + updatedAtMs: DEFAULT_TEST_NOW_MS, }); await patchMcpAuthSession(firstProvider.authSessionId, { authorizationUrl: "https://auth.example.com/start", codeVerifier: "code-verifier", }); + vi.setSystemTime(new Date(DEFAULT_TEST_NOW_MS + 5_000)); const reusedProvider = await createMcpOAuthClientProvider({ provider: "demo", @@ -101,6 +103,9 @@ describe("createMcpOAuthClientProvider", () => { }); expect(reusedProvider.authSessionId).toBe(firstProvider.authSessionId); + expect(reusedProvider.redirectUrl).toBe( + "https://junior.example.com/api/oauth/callback/mcp/demo", + ); const reusedSession = await getMcpAuthSession(reusedProvider.authSessionId); expect(reusedSession).toMatchObject({ @@ -118,10 +123,8 @@ describe("createMcpOAuthClientProvider", () => { artifactState: { assistantContextChannelId: "C999" }, authorizationUrl: "https://auth.example.com/start", codeVerifier: "code-verifier", + createdAtMs: DEFAULT_TEST_NOW_MS, + updatedAtMs: DEFAULT_TEST_NOW_MS + 5_000, }); - expect(reusedSession?.createdAtMs).toBe(initialSession?.createdAtMs); - expect(reusedSession?.updatedAtMs).toBeGreaterThanOrEqual( - initialSession?.updatedAtMs ?? 0, - ); }); }); diff --git a/packages/junior/tests/component/mcp/oauth-provider.test.ts b/packages/junior/tests/component/mcp/oauth-provider.test.ts new file mode 100644 index 000000000..ee1c4e406 --- /dev/null +++ b/packages/junior/tests/component/mcp/oauth-provider.test.ts @@ -0,0 +1,165 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + getMcpAuthSession, + getMcpServerSessionId, + getMcpStoredOAuthCredentials, + putMcpAuthSession, + putMcpStoredOAuthCredentials, +} from "@/chat/mcp/auth-store"; +import { StateBackedMcpOAuthClientProvider } from "@/chat/mcp/oauth-provider"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { + DEFAULT_TEST_NOW_MS, + mockTestClock, + stubTestEnv, +} from "../../fixtures/vitest"; + +type ProviderSessionContext = NonNullable< + ConstructorParameters[2] +>; + +const SESSION_CONTEXT: ProviderSessionContext = { + provider: "demo", + userId: "U123", + conversationId: "conversation-1", + sessionId: "turn-1", + userMessage: "/demo", +}; + +function createProvider(sessionContext?: ProviderSessionContext) { + return new StateBackedMcpOAuthClientProvider( + "auth-session-1", + "https://junior.example.com/callback", + sessionContext, + ); +} + +async function seedSession(): Promise { + await putMcpAuthSession({ + authSessionId: "auth-session-1", + ...SESSION_CONTEXT, + authorizationUrl: "https://example.com/oauth/start", + codeVerifier: "code-verifier", + createdAtMs: 1, + updatedAtMs: 1, + }); +} + +async function seedCredentials(): Promise { + await putMcpStoredOAuthCredentials("U123", "demo", { + clientInformation: { client_id: "client-1" }, + discoveryState: { authorizationServerUrl: "https://example.com" }, + tokens: { + access_token: "access", + token_type: "Bearer", + }, + }); +} + +describe("StateBackedMcpOAuthClientProvider credential state", () => { + beforeEach(async () => { + stubTestEnv({ JUNIOR_STATE_ADAPTER: "memory" }); + mockTestClock(); + await disconnectStateAdapter(); + }); + + afterEach(async () => { + await disconnectStateAdapter(); + vi.useRealTimers(); + vi.unstubAllEnvs(); + }); + + it("preserves the authorization URL when only clearing the verifier", async () => { + await seedSession(); + await seedCredentials(); + const provider = createProvider(); + + await provider.invalidateCredentials("verifier"); + + await expect(getMcpStoredOAuthCredentials("U123", "demo")).resolves.toEqual( + { + clientInformation: { client_id: "client-1" }, + discoveryState: { authorizationServerUrl: "https://example.com" }, + tokens: { + access_token: "access", + token_type: "Bearer", + }, + }, + ); + await expect(getMcpAuthSession("auth-session-1")).resolves.toMatchObject({ + authorizationUrl: "https://example.com/oauth/start", + updatedAtMs: DEFAULT_TEST_NOW_MS, + }); + expect( + (await getMcpAuthSession("auth-session-1"))?.codeVerifier, + ).toBeUndefined(); + }); + + it("clears the authorization URL when invalidating all credentials", async () => { + await seedSession(); + await seedCredentials(); + const provider = createProvider(); + + await provider.invalidateCredentials("all"); + + await expect(getMcpStoredOAuthCredentials("U123", "demo")).resolves.toEqual( + {}, + ); + const session = await getMcpAuthSession("auth-session-1"); + expect(session?.authorizationUrl).toBeUndefined(); + expect(session?.codeVerifier).toBeUndefined(); + }); + + it("reads stored credentials without requiring a persisted auth session", async () => { + await seedCredentials(); + const provider = createProvider(SESSION_CONTEXT); + + await expect(provider.tokens()).resolves.toEqual({ + access_token: "access", + token_type: "Bearer", + }); + }); + + it("creates the auth session lazily when redirecting to authorization", async () => { + const provider = createProvider({ + ...SESSION_CONTEXT, + channelId: "C123", + }); + + await provider.redirectToAuthorization( + new URL("https://example.com/oauth/start"), + ); + + await expect(getMcpAuthSession("auth-session-1")).resolves.toMatchObject({ + authSessionId: "auth-session-1", + provider: "demo", + userId: "U123", + conversationId: "conversation-1", + sessionId: "turn-1", + userMessage: "/demo", + channelId: "C123", + authorizationUrl: "https://example.com/oauth/start", + createdAtMs: DEFAULT_TEST_NOW_MS, + updatedAtMs: DEFAULT_TEST_NOW_MS, + }); + }); + + it("stores the opaque MCP server session outside agent-visible state", async () => { + const provider = createProvider(SESSION_CONTEXT); + + await provider.saveMcpServerSessionId("mcp-session-123"); + + await expect(getMcpServerSessionId("U123", "demo")).resolves.toBe( + "mcp-session-123", + ); + await expect(provider.getMcpServerSessionId()).resolves.toBe( + "mcp-session-123", + ); + + await provider.saveMcpServerSessionId(undefined); + + await expect( + getMcpServerSessionId("U123", "demo"), + ).resolves.toBeUndefined(); + }); +}); diff --git a/packages/junior/tests/component/runtime/agent-continue-runner.test.ts b/packages/junior/tests/component/runtime/agent-continue-runner.test.ts index 1bcd1654f..33e4299f9 100644 --- a/packages/junior/tests/component/runtime/agent-continue-runner.test.ts +++ b/packages/junior/tests/component/runtime/agent-continue-runner.test.ts @@ -1,4 +1,5 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { RetryableTurnError } from "@/chat/runtime/turn"; import { disconnectStateAdapter } from "@/chat/state/adapter"; import { persistThreadStateById } from "@/chat/runtime/thread-state"; import { @@ -23,6 +24,66 @@ function restoreEnv(name: string, value: string | undefined): void { process.env[name] = value; } +async function prepareAwaitingContinuation(args: { + conversationId: string; + messageId: string; + requester?: Parameters[0]["requester"]; + sessionId: string; + text?: string; +}) { + const sessionRecord = await upsertAgentTurnSessionRecord({ + conversationId: args.conversationId, + sessionId: args.sessionId, + sliceId: 2, + state: "awaiting_resume", + destination: SLACK_DESTINATION, + resumeReason: "timeout", + requester: args.requester, + piMessages: [ + { + role: "user", + content: [{ type: "text", text: args.text ?? "hello" }], + timestamp: 1, + }, + ], + }); + await persistThreadStateById(args.conversationId, { + artifacts: { + listColumnMap: {}, + }, + conversation: { + schemaVersion: 1, + backfill: {}, + compactions: [], + piMessages: [], + messages: [ + { + id: args.messageId, + role: "user", + text: "resume this request", + createdAtMs: 1, + author: { + userId: "U123", + }, + }, + ], + processing: { + activeTurnId: args.sessionId, + }, + stats: { + compactedMessageCount: 0, + estimatedContextTokens: 0, + totalMessageCount: 1, + updatedAtMs: 1, + }, + vision: { + byFileId: {}, + }, + }, + }); + return sessionRecord; +} + describe("agent continuation runner callbacks", () => { beforeEach(async () => { process.env.JUNIOR_STATE_ADAPTER = "memory"; @@ -38,60 +99,16 @@ describe("agent continuation runner callbacks", () => { it("fails the session when delivery succeeded but completion state did not persist", async () => { const conversationId = "slack:C123:1712345.0005"; const sessionId = "turn_msg_5"; - const sessionRecord = await upsertAgentTurnSessionRecord({ + const sessionRecord = await prepareAwaitingContinuation({ conversationId, + messageId: "msg.5", sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - resumeReason: "timeout", requester: { slackUserId: "U123", slackUserName: "stored-user", fullName: "Stored User", email: "stored@example.com", }, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "hello" }], - timestamp: 1, - }, - ], - }); - await persistThreadStateById(conversationId, { - artifacts: { - listColumnMap: {}, - }, - conversation: { - schemaVersion: 1, - backfill: {}, - compactions: [], - piMessages: [], - messages: [ - { - id: "msg.5", - role: "user", - text: "resume this request", - createdAtMs: 1, - author: { - userId: "U123", - }, - }, - ], - processing: { - activeTurnId: sessionId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 1, - updatedAtMs: 1, - }, - vision: { - byFileId: {}, - }, - }, }); const { continueSlackAgentRun } = @@ -140,57 +157,70 @@ describe("agent continuation runner callbacks", () => { }); }); - it("fails before continuing when stored requester and message author differ", async () => { - const conversationId = "slack:C123:1712345.0006"; - const sessionId = "turn_msg_6"; - const sessionRecord = await upsertAgentTurnSessionRecord({ + it("requeues when a resumed timeout continuation times out again", async () => { + const conversationId = "slack:C123:1712345.0007"; + const sessionId = "turn_msg_7"; + const sessionRecord = await prepareAwaitingContinuation({ conversationId, + messageId: "msg.7", sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - resumeReason: "timeout", requester: { - slackUserId: "U999", - slackUserName: "wrong-user", + slackUserId: "U123", + slackUserName: "stored-user", }, - piMessages: [ + text: "keep going", + }); + const scheduleAgentContinue = vi.fn(async () => undefined); + const { continueSlackAgentRun } = + await import("@/chat/runtime/agent-continue-runner"); + + await expect( + continueSlackAgentRun( { - role: "user", - content: [{ type: "text", text: "hello" }], - timestamp: 1, + conversationId, + destination: SLACK_DESTINATION, + sessionId, + expectedVersion: sessionRecord.version, }, - ], - }); - await persistThreadStateById(conversationId, { - conversation: { - schemaVersion: 1, - backfill: {}, - compactions: [], - piMessages: [], - messages: [ - { - id: "msg.6", - role: "user", - text: "resume this request", - createdAtMs: 1, - author: { - userId: "U123", - }, + { + scheduleAgentContinue, + resumeTurn: async (args) => { + const prepared = await args.beforeStart?.(); + if (!prepared) { + throw new Error("Expected the continuation to prepare"); + } + await prepared.onTimeoutPause?.( + new RetryableTurnError("turn_timeout_resume", "timed out again", { + conversationId, + sessionId, + version: sessionRecord.version + 1, + sliceId: sessionRecord.sliceId + 1, + }), + ); + return true; }, - ], - processing: { - activeTurnId: sessionId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 1, - updatedAtMs: 1, - }, - vision: { - byFileId: {}, }, + ), + ).resolves.toBe(true); + + expect(scheduleAgentContinue).toHaveBeenCalledWith({ + conversationId, + destination: SLACK_DESTINATION, + sessionId, + expectedVersion: sessionRecord.version + 1, + }); + }); + + it("fails before continuing when stored requester and message author differ", async () => { + const conversationId = "slack:C123:1712345.0006"; + const sessionId = "turn_msg_6"; + const sessionRecord = await prepareAwaitingContinuation({ + conversationId, + messageId: "msg.6", + sessionId, + requester: { + slackUserId: "U999", + slackUserName: "wrong-user", }, }); diff --git a/packages/junior/tests/component/runtime/agent-continue.test.ts b/packages/junior/tests/component/runtime/agent-continue.test.ts index 862a27dda..b167bca62 100644 --- a/packages/junior/tests/component/runtime/agent-continue.test.ts +++ b/packages/junior/tests/component/runtime/agent-continue.test.ts @@ -129,7 +129,7 @@ describe("agent continuation scheduling", () => { ).resolves.toMatchObject({ state: "failed", errorMessage: - "Awaiting agent continuation metadata could not be materialized", + "Awaiting turn continuation metadata could not be materialized", }); }); diff --git a/packages/junior/tests/component/runtime/respond-lazy-sandbox.test.ts b/packages/junior/tests/component/runtime/respond-lazy-sandbox.test.ts new file mode 100644 index 000000000..bfbf0a430 --- /dev/null +++ b/packages/junior/tests/component/runtime/respond-lazy-sandbox.test.ts @@ -0,0 +1,305 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { + afterAll, + afterEach, + beforeEach, + describe, + expect, + it, + vi, +} from "vitest"; +import type { PiMessage } from "@/chat/pi/messages"; +import type { TurnThinkingSelection } from "@/chat/services/turn-thinking-level"; +import { + configureRespondRuntimeEnv, + restoreRespondRuntimeEnv, +} from "../../fixtures/respond/env"; +import { + createScriptedReplyAgentFactory, + type ScriptedReplyAgent, +} from "../../fixtures/respond/agent"; +import { + createScriptedSandboxExecutorFactory, + createScriptedSandboxExecutorState, + type ScriptedSandboxExecutorState, +} from "../../fixtures/respond/sandbox"; +import { + makeTestReplyContext, + type TestReplyRequestContext, +} from "../../fixtures/reply-context"; + +const originalEnv = configureRespondRuntimeEnv(); + +const { generateAssistantReply } = await import("@/chat/respond"); +const { disconnectStateAdapter } = await import("@/chat/state/adapter"); +const { resetSkillDiscoveryCache } = await import("@/chat/skills"); + +type AgentMode = + | "plain" + | "loadSkill" + | "attachFile" + | "attachFileThenError" + | "bashThenError"; + +const agentMode: { value: AgentMode } = { + value: "plain", +}; +const selectedThinkingLevels: unknown[] = []; +let sandboxState: ScriptedSandboxExecutorState; +let skillRoot: string | undefined; + +const baseAgentFactory = createScriptedReplyAgentFactory({ + async continue() { + return {}; + }, + async prompt(agent, message) { + agent.state.messages.push(message as PiMessage); + + if (agentMode.value === "loadSkill") { + await executeAgentTool(agent, "loadSkill", { + skill_name: "demo-skill", + }); + agent.state.messages.push(assistantText("Loaded demo skill.")); + return {}; + } + + if ( + agentMode.value === "attachFile" || + agentMode.value === "attachFileThenError" + ) { + await executeAgentTool(agent, "attachFile", { + path: "report.txt", + }); + if (agentMode.value === "attachFileThenError") { + throw new Error("agent exploded"); + } + agent.state.messages.push(assistantText("Attached report.")); + return {}; + } + + if (agentMode.value === "bashThenError") { + await executeAgentTool(agent, "bash", { + command: "pwd", + }); + throw new Error("agent exploded"); + } + + agent.state.messages.push(assistantText("Plain reply.")); + return {}; + }, +}); + +const agentFactory: typeof baseAgentFactory = (options) => { + selectedThinkingLevels.push(options.initialState.thinkingLevel); + return baseAgentFactory(options); +}; + +function assistantText(text: string): PiMessage { + return { + role: "assistant", + content: [{ type: "text", text }], + stopReason: "stop", + } as PiMessage; +} + +async function executeAgentTool( + agent: ScriptedReplyAgent, + name: string, + params: Record, +): Promise { + const tool = agent.state.tools.find( + ( + candidate, + ): candidate is { + execute: (toolCallId: unknown, params: unknown) => Promise; + name: string; + } => + typeof candidate === "object" && + candidate !== null && + "name" in candidate && + candidate.name === name && + "execute" in candidate && + typeof candidate.execute === "function", + ); + if (!tool) { + throw new Error(`${name} tool missing`); + } + await tool.execute(`tool-call-${name}`, params); +} + +function thinkingSelection( + thinkingLevel: TurnThinkingSelection["thinkingLevel"], +): TurnThinkingSelection { + return { + thinkingLevel, + confidence: 1, + reason: "test", + }; +} + +async function writeDemoSkill(): Promise { + const root = await fs.mkdtemp(path.join(os.tmpdir(), "junior-skills-")); + const skillDir = path.join(root, "demo-skill"); + await fs.mkdir(skillDir); + await fs.writeFile( + path.join(skillDir, "SKILL.md"), + [ + "---", + "name: demo-skill", + "description: Demo skill", + "---", + "", + "Skill instructions", + "", + ].join("\n"), + "utf8", + ); + return root; +} + +function sandboxExecutorFactory() { + return createScriptedSandboxExecutorFactory(sandboxState, { + canExecute: (toolName) => + agentMode.value === "bashThenError" && toolName === "bash", + }); +} + +async function generateReply( + message: string, + options: TestReplyRequestContext = {}, +) { + const { harness, ...restOptions } = options; + return await generateAssistantReply(message, { + skillDirs: skillRoot ? [skillRoot] : [], + ...makeTestReplyContext(restOptions), + harness: { + agentFactory, + sandboxExecutorFactory: sandboxExecutorFactory(), + turnThinkingSelection: thinkingSelection("medium"), + ...harness, + }, + }); +} + +describe("generateAssistantReply lazy sandbox boot", () => { + beforeEach(async () => { + agentMode.value = "plain"; + selectedThinkingLevels.length = 0; + sandboxState = createScriptedSandboxExecutorState(); + skillRoot = await writeDemoSkill(); + resetSkillDiscoveryCache(); + await disconnectStateAdapter(); + }); + + afterEach(async () => { + await disconnectStateAdapter(); + resetSkillDiscoveryCache(); + if (skillRoot) { + await fs.rm(skillRoot, { recursive: true, force: true }); + skillRoot = undefined; + } + }); + + afterAll(() => { + restoreRespondRuntimeEnv(originalEnv); + }); + + it("does not create a sandbox for turns that never touch sandbox-backed tools", async () => { + const reply = await generateReply("hello", { + harness: { + turnThinkingSelection: thinkingSelection("none"), + }, + }); + + expect(reply.text).toBe("Plain reply."); + expect(sandboxState.createSandboxCalls).toBe(0); + expect(reply.sandboxId).toBeUndefined(); + expect(reply.sandboxDependencyProfileHash).toBeUndefined(); + expect(reply.diagnostics.toolCalls).toEqual([]); + expect(selectedThinkingLevels).toEqual(["off"]); + }); + + it("does not create a sandbox when loadSkill only reads host-side skill data", async () => { + agentMode.value = "loadSkill"; + + const reply = await generateReply("load the demo skill"); + + expect(reply.text).toBe("Loaded demo skill."); + expect(sandboxState.createSandboxCalls).toBe(0); + expect(reply.sandboxId).toBeUndefined(); + expect(reply.diagnostics.toolCalls).toEqual(["loadSkill"]); + expect(selectedThinkingLevels).toEqual(["medium"]); + }); + + it("does not create a sandbox for restored skill history at turn start", async () => { + const reply = await generateReply("hello", { + piMessages: [ + { + role: "toolResult", + toolName: "loadSkill", + isError: false, + details: { + skill_name: "demo-skill", + }, + content: [{ type: "text", text: "loaded" }], + } as PiMessage, + ], + }); + + expect(reply.text).toBe("Plain reply."); + expect(sandboxState.createSandboxCalls).toBe(0); + expect(reply.diagnostics.toolCalls).toEqual([]); + }); + + it("memoizes the lazy sandbox workspace across file reads and MIME detection", async () => { + agentMode.value = "attachFile"; + + const reply = await generateReply("attach the report"); + + expect(reply.text).toBe("Attached report."); + expect(sandboxState.createSandboxCalls).toBe(1); + expect(reply.diagnostics.toolCalls).toEqual(["attachFile"]); + expect(selectedThinkingLevels).toEqual(["medium"]); + }); + + it("retains sandbox reuse metadata after lazy boot on error turns", async () => { + agentMode.value = "attachFileThenError"; + + const reply = await generateReply("attach the report"); + + expect(reply.text).toContain("Error: agent exploded"); + expect(sandboxState.createSandboxCalls).toBe(1); + expect(reply.sandboxId).toBe("sandbox-test"); + expect(reply.sandboxDependencyProfileHash).toBe("hash-test"); + }); + + it("reports sandbox metadata as soon as lazy boot succeeds on error turns", async () => { + agentMode.value = "attachFileThenError"; + const onSandboxAcquired = vi.fn(); + + const reply = await generateReply("attach the report", { + onSandboxAcquired, + }); + + expect(reply.text).toContain("Error: agent exploded"); + expect(onSandboxAcquired).toHaveBeenCalledTimes(1); + expect(onSandboxAcquired).toHaveBeenCalledWith({ + sandboxId: "sandbox-test", + sandboxDependencyProfileHash: "hash-test", + }); + }); + + it("retains sandbox reuse metadata after executor-backed boot on error turns", async () => { + agentMode.value = "bashThenError"; + + const reply = await generateReply("run pwd"); + + expect(reply.text).toContain("Error: agent exploded"); + expect(sandboxState.createSandboxCalls).toBe(1); + expect(sandboxState.executedTools).toEqual(["bash"]); + expect(reply.sandboxId).toBe("sandbox-test"); + expect(reply.sandboxDependencyProfileHash).toBe("hash-test"); + }); +}); diff --git a/packages/junior/tests/component/runtime/respond-mcp-auth-resume.test.ts b/packages/junior/tests/component/runtime/respond-mcp-auth-resume.test.ts new file mode 100644 index 000000000..4f77fa160 --- /dev/null +++ b/packages/junior/tests/component/runtime/respond-mcp-auth-resume.test.ts @@ -0,0 +1,233 @@ +import { afterAll, afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + cleanupRespondMcpProgressiveLoadingTest, + generateAssistantReply, + getAgentTurnSessionRecord, + isRetryableTurnError, + makeDemoMcpTool, + makeReplyContext, + respondMcpProgressiveLoadingHarness, + restoreRespondMcpProgressiveLoadingEnv, + setupRespondMcpProgressiveLoadingTest, + upsertAgentTurnSessionRecord, + type PiMessage, +} from "../../fixtures/respond/mcp-progressive-loading"; + +const { + DEMO_SKILL, + callToolMock, + completeEmptyAssistantOnAbort, + continueStopsOnAbort, + deliverPrivateMessageMock, + listToolsMock, + omitFinalAssistantAfterTool, + pushPreToolAssistantMessage, + recordToolResultMessage, +} = respondMcpProgressiveLoadingHarness; + +// Component-style runtime coverage: real respond orchestration with explicit +// fake ports for the agent, MCP client, and sandbox executor. +describe("generateAssistantReply MCP auth resume", () => { + beforeEach(setupRespondMcpProgressiveLoadingTest); + + afterEach(cleanupRespondMcpProgressiveLoadingTest); + afterAll(restoreRespondMcpProgressiveLoadingEnv); + + it("parks for auth when MCP auth is requested during a tool call", async () => { + listToolsMock.mockReset(); + listToolsMock.mockImplementation(async (plugin, options) => { + await options.authProvider?.redirectToAuthorization?.( + new URL(`https://auth.example.com/${plugin.manifest.name}`), + ); + return [makeDemoMcpTool("ping")]; + }); + callToolMock.mockImplementationOnce(async (plugin) => { + const { McpAuthorizationRequiredError } = + await import("@/chat/mcp/client"); + throw new McpAuthorizationRequiredError( + plugin.manifest.name, + "Auth required", + ); + }); + + const context = makeReplyContext({ + conversationId: "conversation-4", + threadTs: "1712345.0004", + turnId: "turn-4", + }); + + const firstError = await generateAssistantReply("help me", context).catch( + (error) => error, + ); + + expect(isRetryableTurnError(firstError, "mcp_auth_resume")).toBe(true); + expect(deliverPrivateMessageMock).toHaveBeenCalledTimes(1); + + const pausedSessionRecord = await getAgentTurnSessionRecord( + "conversation-4", + "turn-4", + ); + expect(pausedSessionRecord).toMatchObject({ + state: "awaiting_resume", + resumeReason: "auth", + }); + + const reply = await generateAssistantReply("help me", context); + + expect(reply.text).toBe("resumed reply"); + + const resumedSessionRecord = await getAgentTurnSessionRecord( + "conversation-4", + "turn-4", + ); + expect(resumedSessionRecord).toMatchObject({ + state: "completed", + }); + }); + + it("does not leak provisional pre-tool assistant text as the final reply", async () => { + pushPreToolAssistantMessage.value = true; + recordToolResultMessage.value = true; + omitFinalAssistantAfterTool.value = true; + listToolsMock.mockReset(); + listToolsMock.mockResolvedValue([makeDemoMcpTool("ping")]); + + const reply = await generateAssistantReply( + "help me", + makeReplyContext({ + conversationId: "conversation-5", + threadTs: "1712345.0005", + turnId: "turn-5", + }), + ); + + expect(reply.text).toBe(""); + expect(reply.diagnostics.outcome).toBe("execution_failure"); + expect(reply.diagnostics.usedPrimaryText).toBe(false); + }); + + it("falls back to the latest stored record when auth pause captures no messages", async () => { + continueStopsOnAbort.value = true; + + const priorMessages: PiMessage[] = [ + { + role: "user", + content: [{ type: "text", text: "help me" }], + timestamp: 1, + }, + { + role: "toolResult", + toolCallId: "tool-call-1", + toolName: "loadSkill", + isError: false, + details: { + ok: true, + skill_name: DEMO_SKILL.name, + mcp_provider: "demo", + }, + content: [{ type: "text", text: "loaded" }], + timestamp: 2, + } as PiMessage, + { + role: "assistant", + content: [{ type: "text", text: "working on it" }], + api: "responses", + provider: "openai", + model: "gpt-5.3", + usage: { + input: 1, + output: 1, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 2, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, + }, + timestamp: 3, + stopReason: "toolUse", + }, + ]; + const expectedResumeMessages = priorMessages.slice(0, 2); + await upsertAgentTurnSessionRecord({ + conversationId: "conversation-5", + sessionId: "turn-5", + sliceId: 1, + state: "awaiting_resume", + piMessages: priorMessages, + resumeReason: "auth", + }); + + callToolMock.mockImplementationOnce(async (plugin) => { + const { McpAuthorizationRequiredError } = + await import("@/chat/mcp/client"); + throw new McpAuthorizationRequiredError( + plugin.manifest.name, + "Auth required", + ); + }); + + const firstError = await generateAssistantReply("help me", { + credentialContext: { + actor: { type: "user", userId: "U123" }, + }, + requester: { userId: "U123" }, + correlation: { + conversationId: "conversation-5", + turnId: "turn-5", + channelId: "C123", + threadTs: "1712345.0005", + }, + }).catch((error) => error); + + expect(isRetryableTurnError(firstError, "mcp_auth_resume")).toBe(true); + + const resumedSessionRecord = await getAgentTurnSessionRecord( + "conversation-5", + "turn-5", + ); + expect(resumedSessionRecord).toMatchObject({ + state: "awaiting_resume", + sliceId: 2, + resumedFromSliceId: 1, + piMessages: expectedResumeMessages, + resumeReason: "auth", + }); + }); + + it("still parks for auth when abort leaves an empty completed assistant frame", async () => { + completeEmptyAssistantOnAbort.value = true; + + const firstError = await generateAssistantReply("help me", { + credentialContext: { + actor: { type: "user", userId: "U123" }, + }, + requester: { userId: "U123" }, + correlation: { + conversationId: "conversation-6", + turnId: "turn-6", + channelId: "C123", + threadTs: "1712345.0006", + }, + }).catch((error) => error); + + expect(isRetryableTurnError(firstError, "mcp_auth_resume")).toBe(true); + + const pausedSessionRecord = await getAgentTurnSessionRecord( + "conversation-6", + "turn-6", + ); + expect(pausedSessionRecord).toMatchObject({ + state: "awaiting_resume", + resumeReason: "auth", + }); + expect(pausedSessionRecord?.piMessages.at(-1)).toMatchObject({ + role: "toolResult", + toolName: "loadSkill", + }); + }); +}); diff --git a/packages/junior/tests/component/runtime/respond-mcp-session-context.test.ts b/packages/junior/tests/component/runtime/respond-mcp-session-context.test.ts new file mode 100644 index 000000000..0a3308733 --- /dev/null +++ b/packages/junior/tests/component/runtime/respond-mcp-session-context.test.ts @@ -0,0 +1,87 @@ +import { afterAll, afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + cleanupRespondMcpProgressiveLoadingTest, + generateAssistantReply, + getAgentTurnSessionRecord, + isRetryableTurnError, + makeReplyContext, + restoreRespondMcpProgressiveLoadingEnv, + setupRespondMcpProgressiveLoadingTest, + type PiMessage, +} from "../../fixtures/respond/mcp-progressive-loading"; + +// Component-style runtime coverage: real respond orchestration with explicit +// fake ports for the agent, MCP client, and sandbox executor. +describe("generateAssistantReply MCP session context", () => { + beforeEach(setupRespondMcpProgressiveLoadingTest); + + afterEach(cleanupRespondMcpProgressiveLoadingTest); + afterAll(restoreRespondMcpProgressiveLoadingEnv); + + it("preserves prior MCP history and current follow-up across auth resume", async () => { + const priorMessages = [ + { + role: "user", + content: [{ type: "text", text: "prior question" }], + timestamp: 1, + }, + { + role: "toolResult", + toolName: "callMcpTool", + isError: false, + content: [{ type: "text", text: "pong" }], + input: { + tool_name: "mcp__demo__ping", + arguments: { query: "prior" }, + }, + }, + ] as unknown as PiMessage[]; + + const firstError = await generateAssistantReply("current follow-up", { + ...makeReplyContext({ + conversationId: "conversation-restore-auth", + threadTs: "1712345.0091", + turnId: "turn-restore-auth", + }), + piMessages: priorMessages, + }).catch((error) => error); + + expect(isRetryableTurnError(firstError, "mcp_auth_resume")).toBe(true); + + const pausedSessionRecord = await getAgentTurnSessionRecord( + "conversation-restore-auth", + "turn-restore-auth", + ); + expect(pausedSessionRecord).toMatchObject({ + state: "awaiting_resume", + resumeReason: "auth", + }); + expect(pausedSessionRecord?.piMessages).toHaveLength(3); + expect(pausedSessionRecord?.piMessages[0]).toMatchObject({ + role: "user", + content: [{ type: "text", text: "prior question" }], + }); + expect(pausedSessionRecord?.piMessages.at(-1)).toMatchObject({ + role: "user", + content: [{ type: "text", text: "current follow-up" }], + }); + + const reply = await generateAssistantReply("current follow-up", { + ...makeReplyContext({ + conversationId: "conversation-restore-auth", + threadTs: "1712345.0091", + turnId: "turn-restore-auth", + }), + piMessages: priorMessages, + }); + + expect(reply.text).toBe("resumed reply"); + const completedSessionRecord = await getAgentTurnSessionRecord( + "conversation-restore-auth", + "turn-restore-auth", + ); + expect(completedSessionRecord).toMatchObject({ + state: "completed", + }); + }); +}); diff --git a/packages/junior/tests/component/runtime/respond-mcp-skill-loading.test.ts b/packages/junior/tests/component/runtime/respond-mcp-skill-loading.test.ts new file mode 100644 index 000000000..38f901e6e --- /dev/null +++ b/packages/junior/tests/component/runtime/respond-mcp-skill-loading.test.ts @@ -0,0 +1,135 @@ +import { afterAll, afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + cleanupRespondMcpProgressiveLoadingTest, + generateAssistantReply, + getAgentTurnSessionRecord, + isRetryableTurnError, + makeDemoMcpTools, + makeReplyContext, + respondMcpProgressiveLoadingHarness, + restoreRespondMcpProgressiveLoadingEnv, + setupRespondMcpProgressiveLoadingTest, +} from "../../fixtures/respond/mcp-progressive-loading"; + +const { + agentInitialToolNames, + callToolMock, + clientOptions, + continueCallCount, + deliverPrivateMessageMock, + listToolsMock, + loadSkillExecutionErrorCount, + promptCallCount, + resumeTurnContextCounts, + searchMcpToolNames, +} = respondMcpProgressiveLoadingHarness; + +// Component-style runtime coverage: real respond orchestration with explicit +// fake ports for the agent, MCP client, and sandbox executor. +describe("generateAssistantReply MCP skill loading", () => { + beforeEach(setupRespondMcpProgressiveLoadingTest); + + afterEach(cleanupRespondMcpProgressiveLoadingTest); + afterAll(restoreRespondMcpProgressiveLoadingEnv); + + it("persists loaded plugin skills across auth pause and resume", async () => { + const context = makeReplyContext({ + conversationId: "conversation-1", + threadTs: "1712345.0001", + turnId: "turn-1", + }); + + const firstError = await generateAssistantReply("help me", context).catch( + (error) => error, + ); + + expect(isRetryableTurnError(firstError, "mcp_auth_resume")).toBe(true); + expect(agentInitialToolNames[0]).toContain("loadSkill"); + expect(agentInitialToolNames[0]).toContain("searchMcpTools"); + expect(agentInitialToolNames[0]).toContain("callMcpTool"); + expect(agentInitialToolNames[0]).not.toContain("mcp__demo__ping"); + + const pausedSessionRecord = await getAgentTurnSessionRecord( + "conversation-1", + "turn-1", + ); + expect(pausedSessionRecord).toMatchObject({ + state: "awaiting_resume", + resumeReason: "auth", + }); + expect(pausedSessionRecord?.piMessages.at(-1)).toMatchObject({ + role: "toolResult", + toolName: "loadSkill", + }); + expect(deliverPrivateMessageMock).toHaveBeenCalledTimes(1); + expect(loadSkillExecutionErrorCount.value).toBe(0); + + const reply = await generateAssistantReply("help me", context); + + expect(reply.text).toBe("resumed reply"); + expect(promptCallCount.value).toBe(1); + expect(continueCallCount.value).toBe(1); + expect(clientOptions).not.toContainEqual( + expect.objectContaining({ sessionId: expect.any(String) }), + ); + expect(agentInitialToolNames[1]).toContain("loadSkill"); + expect(agentInitialToolNames[1]).toContain("searchMcpTools"); + expect(agentInitialToolNames[1]).toContain("callMcpTool"); + expect(agentInitialToolNames[1]).not.toContain("mcp__demo__ping"); + expect(resumeTurnContextCounts).toEqual([1]); + expect(searchMcpToolNames).toEqual([]); + expect(callToolMock).toHaveBeenCalledWith( + expect.objectContaining({ + manifest: expect.objectContaining({ name: "demo" }), + }), + "ping", + { query: "hello" }, + ); + + const resumedSessionRecord = await getAgentTurnSessionRecord( + "conversation-1", + "turn-1", + ); + expect(resumedSessionRecord).toMatchObject({ + state: "completed", + }); + }); + + it("searches loadSkill-activated MCP tools in the same turn without replay", async () => { + listToolsMock.mockReset(); + listToolsMock.mockResolvedValue(makeDemoMcpTools()); + + const reply = await generateAssistantReply( + "help me", + makeReplyContext({ + conversationId: "conversation-2", + threadTs: "1712345.0002", + turnId: "turn-2", + }), + ); + + expect(reply.text).toBe("resumed reply"); + expect(promptCallCount.value).toBe(1); + expect(continueCallCount.value).toBe(0); + expect(agentInitialToolNames[0]).toContain("loadSkill"); + expect(agentInitialToolNames[0]).toContain("searchMcpTools"); + expect(agentInitialToolNames[0]).toContain("callMcpTool"); + expect(agentInitialToolNames[0]).not.toContain("mcp__demo__ping"); + expect(searchMcpToolNames).toEqual([["mcp__demo__ping"]]); + expect(callToolMock).toHaveBeenCalledWith( + expect.objectContaining({ + manifest: expect.objectContaining({ name: "demo" }), + }), + "ping", + { query: "hello" }, + ); + + const sessionRecord = await getAgentTurnSessionRecord( + "conversation-2", + "turn-2", + ); + expect(sessionRecord).toMatchObject({ + state: "completed", + }); + }); +}); diff --git a/packages/junior/tests/unit/runtime/respond-provider-retry.test.ts b/packages/junior/tests/component/runtime/respond-provider-retry.test.ts similarity index 50% rename from packages/junior/tests/unit/runtime/respond-provider-retry.test.ts rename to packages/junior/tests/component/runtime/respond-provider-retry.test.ts index 0a90db587..254d8e01b 100644 --- a/packages/junior/tests/unit/runtime/respond-provider-retry.test.ts +++ b/packages/junior/tests/component/runtime/respond-provider-retry.test.ts @@ -1,237 +1,117 @@ -import { Buffer } from "node:buffer"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { Destination } from "@sentry/junior-plugin-api"; import type { PiMessage } from "@/chat/pi/messages"; - -const { agentMode, counters } = vi.hoisted(() => ({ - agentMode: { - value: "providerRetry" as - | "providerRetry" - | "cooperativeYield" - | "steering" - | "steeringSteerThrows", - }, - counters: { - continueCalls: 0, - promptCalls: 0, +import type { TurnThinkingSelection } from "@/chat/services/turn-thinking-level"; +import { createJuniorReporting } from "@/reporting"; +import { + createScriptedReplyAgentFactory, + type ScriptedReplyAgent, +} from "../../fixtures/respond/agent"; +import { + makeTestReplyContext, + type TestReplyRequestContext, +} from "../../fixtures/reply-context"; + +const { generateAssistantReply } = await import("@/chat/respond"); +const { isCooperativeTurnYieldError } = await import("@/chat/runtime/turn"); +const { getAwaitingTurnContinuationRequest } = + await import("@/chat/services/timeout-resume"); +const { disconnectStateAdapter } = await import("@/chat/state/adapter"); +const turnSessionState = await import("@/chat/state/turn-session"); + +type AgentMode = + | "providerRetry" + | "cooperativeYield" + | "steering" + | "steeringSteerThrows"; + +const agentMode: { value: AgentMode } = { + value: "providerRetry", +}; +const counters = { + continueCalls: 0, + promptCalls: 0, +}; +const turnThinkingSelection = { + thinkingLevel: "medium", + confidence: 1, + reason: "test", +} satisfies TurnThinkingSelection; + +const agentFactory = createScriptedReplyAgentFactory({ + async continue(agent) { + counters.continueCalls += 1; + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "Recovered." }], + stopReason: "stop", + usage: { + input: 2, + output: 2, + }, + } as PiMessage); + return {}; }, -})); - -vi.mock("@earendil-works/pi-agent-core", () => { - class MockAgent { - state: { - messages: unknown[]; - model: unknown; - systemPrompt: string; - tools: unknown[]; - }; - private prepareNextTurn?: () => Promise | unknown; - private steeringMessages: unknown[] = []; - - constructor(input: { - initialState: { - model: unknown; - systemPrompt: string; - tools: unknown[]; - }; - prepareNextTurn?: () => Promise | unknown; - }) { - this.state = { - messages: [], - model: input.initialState.model, - systemPrompt: input.initialState.systemPrompt, - tools: input.initialState.tools, - }; - this.prepareNextTurn = input.prepareNextTurn; - } - - subscribe() { - return () => undefined; - } - - steer(message: unknown) { - if (agentMode.value === "steeringSteerThrows") { - throw new Error("steer failed"); - } - this.steeringMessages.push(message); - } - - abort() { - return undefined; - } - - private recordRunFailure(error: unknown) { - this.state.messages.push({ + async prompt(agent, message) { + counters.promptCalls += 1; + agent.state.messages.push(message as PiMessage); + if ( + agentMode.value === "cooperativeYield" || + agentMode.value === "steering" || + agentMode.value === "steeringSteerThrows" + ) { + await agent.prepareNextTurn?.(); + agent.state.messages.push(...agent.steeringMessages); + agent.state.messages.push({ role: "assistant", - content: [{ type: "text", text: "" }], - stopReason: "error", - errorMessage: error instanceof Error ? error.message : String(error), - usage: { - input: 0, - output: 0, - }, - }); - } - - async prompt(message: unknown) { - counters.promptCalls += 1; - this.state.messages.push(message); - if ( - agentMode.value === "cooperativeYield" || - agentMode.value === "steering" || - agentMode.value === "steeringSteerThrows" - ) { - try { - await this.prepareNextTurn?.(); - } catch (error) { - this.recordRunFailure(error); - return {}; - } - this.state.messages.push(...this.steeringMessages); - this.state.messages.push({ - role: "assistant", - content: [{ type: "text", text: "Steered." }], - stopReason: "stop", - usage: { - input: 2, - output: 2, - }, - }); - return {}; - } - this.state.messages.push({ - role: "toolResult", - toolName: "bash", - isError: false, - content: [{ type: "text", text: "ok" }], - }); - this.state.messages.push({ - role: "assistant", - content: [], - stopReason: "error", - errorMessage: "Anthropic stream ended before message_stop", - usage: { - input: 10, - output: 1, - }, - }); - return {}; - } - - async continue() { - counters.continueCalls += 1; - this.state.messages.push({ - role: "assistant", - content: [{ type: "text", text: "Recovered." }], + content: [{ type: "text", text: "Steered." }], stopReason: "stop", usage: { input: 2, output: 2, }, - }); + } as PiMessage); return {}; } - } - - return { Agent: MockAgent }; -}); - -vi.mock("@/chat/config", async (importOriginal) => { - const original = await importOriginal(); - const memoryConfig = original.readChatConfig({ - ...process.env, - AGENT_TURN_TIMEOUT_MS: "10000", - FUNCTION_MAX_DURATION_SECONDS: "60", - JUNIOR_STATE_ADAPTER: "memory", - }); - return { - ...original, - botConfig: memoryConfig.bot, - getChatConfig: () => memoryConfig, - getRuntimeMetadata: () => ({ version: "test" }), - }; -}); - -vi.mock("@/chat/capabilities/factory", () => ({ - createUserTokenStore: () => ({ - get: async () => undefined, - set: async () => undefined, - delete: async () => undefined, - }), -})); - -vi.mock("@/chat/capabilities/jr-rpc-command", () => ({ - maybeExecuteJrRpcCustomCommand: async () => ({ handled: false }), -})); - -vi.mock("@/chat/pi/client", () => ({ - GEN_AI_PROVIDER_NAME: "vercel-ai-gateway", - GEN_AI_SERVER_ADDRESS: "ai-gateway.vercel.sh", - GEN_AI_SERVER_PORT: 443, - completeObject: async () => ({ - object: { - thinking_level: "medium", - confidence: 1, - reason: "test-router", - }, - }), - getPiGatewayApiKeyOverride: () => "test-gateway-key", - resolveGatewayModel: (modelId: string) => modelId, -})); - -vi.mock("@/chat/prompt", async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - buildSystemPrompt: () => "System prompt", - }; + agent.state.messages.push({ + role: "toolResult", + toolName: "bash", + isError: false, + content: [{ type: "text", text: "ok" }], + } as PiMessage); + agent.state.messages.push({ + role: "assistant", + content: [], + stopReason: "error", + errorMessage: "Anthropic stream ended before message_stop", + usage: { + input: 10, + output: 1, + }, + } as unknown as PiMessage); + return {}; + }, + steer(agent: ScriptedReplyAgent, message: unknown) { + if (agentMode.value === "steeringSteerThrows") { + throw new Error("steer failed"); + } + agent.steeringMessages.push(message as PiMessage); + }, }); -vi.mock("@/chat/runtime/dev-agent-trace", () => ({ - shouldEmitDevAgentTrace: () => false, -})); - -vi.mock("@/chat/sandbox/sandbox", () => ({ - createSandboxExecutor: () => ({ - configureSkills: () => undefined, - configureReferenceFiles: () => undefined, - createSandbox: async () => ({ - readFileToBuffer: async () => Buffer.from("", "utf8"), - runCommand: async () => ({ - stdout: "", - stderr: "", - exitCode: 0, - }), - }), - canExecute: () => false, - execute: async () => { - throw new Error("sandbox executor should not execute in this test"); +async function generateReply( + message: string, + options: TestReplyRequestContext = {}, +) { + return await generateAssistantReply(message, { + ...makeTestReplyContext(options), + harness: { + ...options.harness, + agentFactory, + turnThinkingSelection, }, - getSandboxId: () => undefined, - getDependencyProfileHash: () => undefined, - dispose: async () => undefined, - }), -})); - -vi.mock("@/chat/plugins/registry", async (importOriginal) => ({ - ...(await importOriginal()), - getPluginMcpProviders: () => [], - getPluginProviders: () => [], -})); - -vi.mock("@/chat/skills", async (importOriginal) => ({ - ...(await importOriginal()), - discoverSkills: async () => [], - findSkillByName: () => null, - parseSkillInvocation: () => null, -})); - -import { generateAssistantReply } from "@/chat/respond"; -import { isCooperativeTurnYieldError } from "@/chat/runtime/turn"; -import { getAwaitingAgentContinueRequest } from "@/chat/services/agent-continue"; -import { disconnectStateAdapter } from "@/chat/state/adapter"; -import * as turnSessionState from "@/chat/state/turn-session"; -import { createJuniorReporting } from "@/reporting"; + }); +} const TEST_DESTINATION = { platform: "slack", @@ -244,7 +124,6 @@ describe("generateAssistantReply provider retry", () => { agentMode.value = "providerRetry"; counters.continueCalls = 0; counters.promptCalls = 0; - process.env.JUNIOR_STATE_ADAPTER = "memory"; await disconnectStateAdapter(); vi.useFakeTimers(); }); @@ -252,13 +131,11 @@ describe("generateAssistantReply provider retry", () => { afterEach(async () => { vi.useRealTimers(); await disconnectStateAdapter(); - delete process.env.JUNIOR_STATE_ADAPTER; }); it("continues from the last safe boundary after a transient provider stream error", async () => { - const replyPromise = generateAssistantReply("help me", { - destination: TEST_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, + const replyPromise = generateReply("help me", { + requester: { userId: "U123" }, correlation: { conversationId: "conversation-1", turnId: "turn-1", @@ -267,6 +144,9 @@ describe("generateAssistantReply provider retry", () => { }, }); + await vi.waitFor(() => { + expect(counters.promptCalls).toBe(1); + }); await vi.advanceTimersByTimeAsync(2_000); const reply = await replyPromise; @@ -326,10 +206,9 @@ describe("generateAssistantReply provider retry", () => { }, ] satisfies PiMessage[]; - const reply = await generateAssistantReply("help me", { - destination: TEST_DESTINATION, + const reply = await generateReply("help me", { + requester: { userId: "U123" }, piMessages: priorMessages, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, correlation: { conversationId: "slack:C123:1712345.0001", turnId: "turn-steering", @@ -384,9 +263,8 @@ describe("generateAssistantReply provider retry", () => { it("parks the turn when the worker asks to yield at a Pi boundary", async () => { agentMode.value = "cooperativeYield"; - const error = await generateAssistantReply("help me", { - destination: TEST_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, + const error = await generateReply("help me", { + requester: { userId: "U123" }, correlation: { conversationId: "conversation-yield", turnId: "turn-yield", @@ -416,7 +294,7 @@ describe("generateAssistantReply provider retry", () => { "user", ]); await expect( - getAwaitingAgentContinueRequest({ + getAwaitingTurnContinuationRequest({ conversationId: "conversation-yield", sessionId: "turn-yield", }), @@ -431,8 +309,8 @@ describe("generateAssistantReply provider retry", () => { it("keeps steered messages when yielding after steering drain", async () => { agentMode.value = "cooperativeYield"; - const error = await generateAssistantReply("help me", { - requester: { platform: "slack", teamId: "T123", userId: "U123" }, + const error = await generateReply("help me", { + requester: { userId: "U123" }, correlation: { conversationId: "conversation-yield-steering", turnId: "turn-yield-steering", @@ -475,49 +353,13 @@ describe("generateAssistantReply provider retry", () => { expect(serializedMessages).toContain("actually do the other thing"); }); - it("throws when a cooperative yield cannot persist its resumable boundary", async () => { - agentMode.value = "cooperativeYield"; - const upsertSpy = vi - .spyOn(turnSessionState, "upsertAgentTurnSessionRecord") - .mockRejectedValue(new Error("storage unavailable")); - - const error = await generateAssistantReply("help me", { - destination: TEST_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, - correlation: { - conversationId: "conversation-yield-persist-failure", - turnId: "turn-yield-persist-failure", - channelId: "C123", - threadTs: "1712345.0004", - }, - shouldYield: () => true, - }).then( - () => undefined, - (caught: unknown) => caught, - ); - upsertSpy.mockRestore(); - - expect(error).toBeInstanceOf(Error); - expect((error as Error).message).toContain( - "Failed to persist cooperative yield continuation", - ); - expect(isCooperativeTurnYieldError(error)).toBe(false); - await expect( - turnSessionState.getAgentTurnSessionRecord( - "conversation-yield-persist-failure", - "turn-yield-persist-failure", - ), - ).resolves.toBeUndefined(); - }); - it("rejects steering injection when Pi steer fails", async () => { agentMode.value = "steeringSteerThrows"; let injectRejected = false; let injectCompleted = false; - await generateAssistantReply("help me", { - destination: TEST_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, + await generateReply("help me", { + requester: { userId: "U123" }, correlation: { conversationId: "conversation-steering-failure", turnId: "turn-steering-failure", diff --git a/packages/junior/tests/component/runtime/respond-startup-error.test.ts b/packages/junior/tests/component/runtime/respond-startup-error.test.ts new file mode 100644 index 000000000..013a6c333 --- /dev/null +++ b/packages/junior/tests/component/runtime/respond-startup-error.test.ts @@ -0,0 +1,63 @@ +import { afterAll, afterEach, describe, expect, it } from "vitest"; +import { + configureRespondRuntimeEnv, + restoreRespondRuntimeEnv, +} from "../../fixtures/respond/env"; +import { makeTestReplyContext } from "../../fixtures/reply-context"; + +const originalEnv = configureRespondRuntimeEnv(); + +const { generateAssistantReply } = await import("@/chat/respond"); +const { disconnectStateAdapter } = await import("@/chat/state/adapter"); + +describe("generateAssistantReply startup errors", () => { + afterEach(async () => { + await disconnectStateAdapter(); + }); + + afterAll(() => { + restoreRespondRuntimeEnv(originalEnv); + }); + + it("preserves sandbox reuse metadata on non-retryable startup failures", async () => { + const reply = await generateAssistantReply( + "hello", + makeTestReplyContext({ + sandbox: { + sandboxId: "sb-123", + sandboxDependencyProfileHash: "hash-abc", + }, + harness: { + sandboxExecutorFactory: () => { + throw new Error("sandbox executor failed"); + }, + }, + }), + ); + + expect(reply.text).toContain("Error: sandbox executor failed"); + expect(reply.sandboxId).toBe("sb-123"); + expect(reply.sandboxDependencyProfileHash).toBe("hash-abc"); + expect(reply.diagnostics.outcome).toBe("provider_error"); + expect(reply.diagnostics.modelId).toBe("openai/gpt-5.4"); + expect(reply.diagnostics.thinkingLevel).toBeUndefined(); + }); + + it("propagates startup failures when durable input commit is required", async () => { + await expect( + generateAssistantReply( + "hello", + makeTestReplyContext({ + onInputCommitted: async () => { + throw new Error("input should not commit before startup succeeds"); + }, + harness: { + sandboxExecutorFactory: () => { + throw new Error("sandbox executor failed"); + }, + }, + }), + ), + ).rejects.toThrow("sandbox executor failed"); + }); +}); diff --git a/packages/junior/tests/component/runtime/respond-timeout-resume.test.ts b/packages/junior/tests/component/runtime/respond-timeout-resume.test.ts new file mode 100644 index 000000000..ef6ec843f --- /dev/null +++ b/packages/junior/tests/component/runtime/respond-timeout-resume.test.ts @@ -0,0 +1,382 @@ +import { + afterAll, + afterEach, + beforeEach, + describe, + expect, + it, + vi, +} from "vitest"; +import type { PiMessage } from "@/chat/pi/messages"; +import type { TurnThinkingSelection } from "@/chat/services/turn-thinking-level"; +import { + configureRespondRuntimeEnv, + restoreRespondRuntimeEnv, +} from "../../fixtures/respond/env"; +import { createScriptedReplyAgentFactory } from "../../fixtures/respond/agent"; +import { + makeTestReplyContext, + type TestReplyRequestContext, +} from "../../fixtures/reply-context"; +import { mockTestClock } from "../../fixtures/vitest"; + +const originalEnv = configureRespondRuntimeEnv(); +const { generateAssistantReply } = await import("@/chat/respond"); +const { isRetryableTurnError, isTurnInputCommitLostError } = + await import("@/chat/runtime/turn"); +const { AGENT_TURN_TIMEOUT_RESUME_MAX_SLICES } = + await import("@/chat/services/turn-session-record"); +const { disconnectStateAdapter } = await import("@/chat/state/adapter"); +const { getAgentTurnSessionRecord, upsertAgentTurnSessionRecord } = + await import("@/chat/state/turn-session"); + +type PromptMode = + | "settlesAfterAbort" + | "hangsAfterAbort" + | "continueSettlesAfterAbort" + | "providerRetryThenHangs"; + +const promptAborted = { value: false }; +const promptMode: { value: PromptMode } = { + value: "settlesAfterAbort", +}; +let resolveAbort: (() => void) | undefined; +const turnThinkingSelection = { + thinkingLevel: "medium", + confidence: 1, + reason: "test", +} satisfies TurnThinkingSelection; + +const agentFactory = createScriptedReplyAgentFactory({ + abort() { + promptAborted.value = true; + resolveAbort?.(); + }, + async continue(agent) { + if (promptMode.value === "continueSettlesAfterAbort") { + await new Promise((resolve) => { + resolveAbort = resolve; + }); + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "continued partial" }], + } as PiMessage); + return {}; + } + if (promptMode.value === "providerRetryThenHangs") { + await new Promise((resolve) => { + resolveAbort = resolve; + }); + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "continued partial" }], + stopReason: "stop", + } as PiMessage); + return {}; + } + + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "continued" }], + stopReason: "stop", + } as PiMessage); + return {}; + }, + async prompt(agent, message) { + agent.state.messages.push(message as PiMessage); + if (promptMode.value === "providerRetryThenHangs") { + await new Promise((resolve) => setTimeout(resolve, 8_000)); + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "provider error" }], + stopReason: "error", + errorMessage: "Provider returned error: 503 service unavailable", + } as PiMessage); + return {}; + } + if (promptMode.value === "hangsAfterAbort") { + await new Promise(() => undefined); + return {}; + } + await new Promise((resolve) => { + resolveAbort = resolve; + }); + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "partial" }], + } as PiMessage); + return {}; + }, +}); + +async function generateReply( + message: string, + options: TestReplyRequestContext = {}, +) { + const { harness, ...restOptions } = options; + return await generateAssistantReply(message, { + ...makeTestReplyContext(restOptions), + harness: { + agentFactory, + turnThinkingSelection, + ...harness, + }, + }); +} + +describe("generateAssistantReply timeout resume", () => { + beforeEach(async () => { + promptAborted.value = false; + promptMode.value = "settlesAfterAbort"; + resolveAbort = undefined; + await disconnectStateAdapter(); + mockTestClock(); + }); + + afterEach(async () => { + vi.useRealTimers(); + await disconnectStateAdapter(); + }); + + afterAll(() => { + restoreRespondRuntimeEnv(originalEnv); + }); + + it("rejects durable input when no prompt checkpoint can be persisted", async () => { + const onInputCommitted = vi.fn(); + + const error = await generateReply("help me", { + onInputCommitted, + }).catch((caught) => caught); + + expect(isTurnInputCommitLostError(error)).toBe(true); + expect(onInputCommitted).not.toHaveBeenCalled(); + }); + + it("stores the last safe boundary and throws a retryable timeout error", async () => { + const replyPromise = generateReply("help me", { + requester: { userId: "U123" }, + correlation: { + conversationId: "conversation-1", + turnId: "turn-1", + channelId: "C123", + threadTs: "1712345.0001", + }, + }).catch((caught) => caught); + + await vi.advanceTimersByTimeAsync(10_000); + const error = await replyPromise; + + expect(promptAborted.value).toBe(true); + expect(isRetryableTurnError(error, "turn_timeout_resume")).toBe(true); + expect(error.metadata).toMatchObject({ + conversationId: "conversation-1", + sessionId: "turn-1", + version: expect.any(Number), + sliceId: 2, + }); + + const sessionRecord = await getAgentTurnSessionRecord( + "conversation-1", + "turn-1", + ); + expect(sessionRecord).toMatchObject({ + state: "awaiting_resume", + resumeReason: "timeout", + resumedFromSliceId: 1, + sliceId: 2, + }); + expect(sessionRecord?.piMessages).toEqual([ + expect.objectContaining({ + role: "user", + }), + ]); + }); + + it("throws terminal timeout failures instead of returning an error reply after the slice cap", async () => { + promptMode.value = "continueSettlesAfterAbort"; + const piMessages: PiMessage[] = [ + { + role: "user", + content: [{ type: "text", text: "keep trying" }], + timestamp: 1, + } as PiMessage, + ]; + await upsertAgentTurnSessionRecord({ + conversationId: "conversation-timeout-cap", + sessionId: "turn-timeout-cap", + sliceId: AGENT_TURN_TIMEOUT_RESUME_MAX_SLICES, + state: "awaiting_resume", + piMessages, + resumeReason: "timeout", + }); + + const replyPromise = generateReply("help me", { + requester: { userId: "U123" }, + correlation: { + conversationId: "conversation-timeout-cap", + turnId: "turn-timeout-cap", + channelId: "C123", + threadTs: "1712345.0006", + }, + }).catch((caught) => caught); + + await vi.advanceTimersByTimeAsync(10_000); + const error = await replyPromise; + + expect(error).toBeInstanceOf(Error); + expect(error).not.toHaveProperty("text"); + expect(isRetryableTurnError(error, "turn_timeout_resume")).toBe(false); + expect(error.message).toContain("slice limit"); + + const sessionRecord = await getAgentTurnSessionRecord( + "conversation-timeout-cap", + "turn-timeout-cap", + ); + expect(sessionRecord).toMatchObject({ + state: "failed", + resumeReason: "timeout", + sliceId: AGENT_TURN_TIMEOUT_RESUME_MAX_SLICES, + errorMessage: expect.stringContaining("slice limit"), + }); + }); + + it("records the effective request deadline timeout budget", async () => { + const startedAtMs = Date.now(); + const replyPromise = generateReply("help me", { + requester: { userId: "U123" }, + turnDeadlineAtMs: startedAtMs + 2_500, + correlation: { + conversationId: "conversation-short-deadline", + turnId: "turn-short-deadline", + channelId: "C123", + threadTs: "1712345.0005", + }, + }).catch((caught) => caught); + + await vi.advanceTimersByTimeAsync(2_500); + const error = await replyPromise; + + expect(promptAborted.value).toBe(true); + expect(isRetryableTurnError(error, "turn_timeout_resume")).toBe(true); + const sessionRecord = await getAgentTurnSessionRecord( + "conversation-short-deadline", + "turn-short-deadline", + ); + expect(sessionRecord?.errorMessage).toBe( + "Agent turn timed out after 2500ms", + ); + }); + + it("persists omitted-image context in the session-recorded Pi user message", async () => { + const replyPromise = generateReply("what is in this image?", { + requester: { userId: "U123" }, + omittedImageAttachmentCount: 1, + correlation: { + conversationId: "conversation-2", + turnId: "turn-2", + channelId: "C123", + threadTs: "1712345.0002", + }, + }).catch((caught) => caught); + + await vi.advanceTimersByTimeAsync(10_000); + await replyPromise; + + const sessionRecord = await getAgentTurnSessionRecord( + "conversation-2", + "turn-2", + ); + const userMessage = sessionRecord?.piMessages[0] as + | { + role?: string; + content?: Array<{ type?: string; text?: string }>; + } + | undefined; + + expect(userMessage?.role).toBe("user"); + expect(userMessage?.content).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: "text", + text: expect.stringContaining(""), + }), + ]), + ); + }); + + it("persists agent continuation state when abort does not settle the agent run", async () => { + promptMode.value = "hangsAfterAbort"; + const replyPromise = generateReply("help me", { + requester: { userId: "U123" }, + correlation: { + conversationId: "conversation-hung", + turnId: "turn-hung", + channelId: "C123", + threadTs: "1712345.0003", + }, + }).catch((caught) => caught); + + await vi.advanceTimersByTimeAsync(15_000); + const error = await replyPromise; + + expect(promptAborted.value).toBe(true); + expect(isRetryableTurnError(error, "turn_timeout_resume")).toBe(true); + expect(error.metadata).toMatchObject({ + conversationId: "conversation-hung", + sessionId: "turn-hung", + version: expect.any(Number), + sliceId: 2, + }); + + const sessionRecord = await getAgentTurnSessionRecord( + "conversation-hung", + "turn-hung", + ); + expect(sessionRecord).toMatchObject({ + state: "awaiting_resume", + resumeReason: "timeout", + resumedFromSliceId: 1, + sliceId: 2, + }); + expect(sessionRecord?.piMessages).toEqual([ + expect.objectContaining({ + role: "user", + }), + ]); + }); + + it("uses one wall-clock timeout budget across provider retries", async () => { + promptMode.value = "providerRetryThenHangs"; + const replyPromise = generateReply("help me", { + requester: { userId: "U123" }, + correlation: { + conversationId: "conversation-retry", + turnId: "turn-retry", + channelId: "C123", + threadTs: "1712345.0004", + }, + }).catch((caught) => caught); + + await vi.advanceTimersByTimeAsync(10_000); + const error = await replyPromise; + + expect(promptAborted.value).toBe(true); + expect(isRetryableTurnError(error, "turn_timeout_resume")).toBe(true); + const sessionRecord = await getAgentTurnSessionRecord( + "conversation-retry", + "turn-retry", + ); + expect(sessionRecord).toMatchObject({ + state: "awaiting_resume", + resumeReason: "timeout", + resumedFromSliceId: 1, + sliceId: 2, + }); + expect(sessionRecord?.piMessages).toEqual([ + expect.objectContaining({ + role: "user", + }), + ]); + }); +}); diff --git a/packages/junior/tests/unit/handlers/oauth-resume.test.ts b/packages/junior/tests/component/runtime/slack-resume.test.ts similarity index 62% rename from packages/junior/tests/unit/handlers/oauth-resume.test.ts rename to packages/junior/tests/component/runtime/slack-resume.test.ts index 637990291..17b30d3a2 100644 --- a/packages/junior/tests/unit/handlers/oauth-resume.test.ts +++ b/packages/junior/tests/component/runtime/slack-resume.test.ts @@ -1,90 +1,70 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { RetryableTurnError } from "@/chat/runtime/turn"; -import { disconnectStateAdapter, getStateAdapter } from "@/chat/state/adapter"; - -const { logExceptionMock, postMessageMock, setStatusMock } = vi.hoisted(() => ({ - logExceptionMock: vi.fn(), - postMessageMock: vi.fn(), - setStatusMock: vi.fn(), -})); - -vi.mock("@/chat/config", async (importOriginal) => { - const original = await importOriginal(); - const memoryConfig = original.readChatConfig({ - ...process.env, - JUNIOR_STATE_ADAPTER: "memory", - }); - return { - ...original, - botConfig: memoryConfig.bot, - getChatConfig: () => memoryConfig, - }; -}); - -vi.mock("@/chat/slack/client", () => ({ - SlackActionError: class SlackActionError extends Error { - code: string; - - constructor(message: string, code: string) { - super(message); - this.name = "SlackActionError"; - this.code = code; - } - }, - normalizeSlackConversationId: (value: string | undefined) => value, - withSlackRetries: async (task: () => Promise) => await task(), - getSlackClient: () => ({ - chat: { - postMessage: postMessageMock, - }, - assistant: { - threads: { - setStatus: setStatusMock, - }, - }, - }), -})); +import type { ResumeSlackTurnServices } from "@/chat/runtime/slack-resume"; +import { + createOauthResumeSlackFixture, + makeResumeDiagnostics, +} from "../../fixtures/oauth/resume-slack"; +import { TEST_SLACK_DESTINATION } from "../../fixtures/reply-context"; +import { mockTestClock } from "../../fixtures/vitest"; -vi.mock("@/chat/logging", async (importOriginal) => { - const original = await importOriginal(); - return { - ...original, - logException: logExceptionMock, - }; -}); +type Testbed = Awaited>; -import { - resumeAuthorizedRequest, - resumeSlackTurn, -} from "@/chat/runtime/slack-resume"; +describe("Slack resume runtime", () => { + let testbed: Testbed; + let services: ResumeSlackTurnServices; -const TEST_SLACK_DESTINATION = { - platform: "slack", - teamId: "T-test", - channelId: "C-test", -} as const; + const logExceptionMock = vi.fn(); + const postMessageMock = vi.fn(); + const postReplyPostsMock = vi.fn(); + const createAssistantStatusSessionMock = vi.fn(); + const startProcessingReactionMock = vi.fn(); -describe("resumeAuthorizedRequest", () => { beforeEach(async () => { - vi.useFakeTimers(); + testbed = await createOauthResumeSlackFixture(); + mockTestClock(); + logExceptionMock.mockReset(); logExceptionMock.mockReturnValue("evt_test"); postMessageMock.mockReset(); - setStatusMock.mockReset(); postMessageMock.mockResolvedValue({ ts: "1700000000.100" }); - setStatusMock.mockResolvedValue(undefined); - await disconnectStateAdapter(); + postReplyPostsMock.mockReset(); + postReplyPostsMock.mockResolvedValue("1700000000.200"); + createAssistantStatusSessionMock.mockReset(); + createAssistantStatusSessionMock.mockReturnValue({ + start: vi.fn(), + stop: vi.fn(async () => undefined), + update: vi.fn(), + }); + startProcessingReactionMock.mockReset(); + startProcessingReactionMock.mockResolvedValue({ + complete: vi.fn(async () => undefined), + keep: vi.fn(), + stop: vi.fn(async () => undefined), + }); + + services = { + createAssistantStatusSession: createAssistantStatusSessionMock, + generateAssistantReply: vi.fn(async () => ({ + text: "default resumed answer", + diagnostics: makeResumeDiagnostics(), + })), + getStateAdapter: testbed.getStateAdapter, + logException: logExceptionMock, + postSlackMessage: postMessageMock, + postSlackReplyPosts: postReplyPostsMock, + startProcessingReactionForMessage: startProcessingReactionMock, + }; }); afterEach(async () => { vi.useRealTimers(); - await disconnectStateAdapter(); + await testbed.cleanup(); }); it("fails fast when resumed reply generation exceeds the configured timeout", async () => { const onFailure = vi.fn(async () => undefined); - const resumePromise = resumeAuthorizedRequest({ + const resumePromise = testbed.resumeAuthorizedRequest({ messageText: "tell me the saved deadline", channelId: "C-test", threadTs: "1700000000.0001", @@ -99,6 +79,7 @@ describe("resumeAuthorizedRequest", () => { generateReply: () => new Promise(() => {}), replyTimeoutMs: 10, onFailure, + services, }); await vi.advanceTimersByTimeAsync(10); @@ -107,8 +88,8 @@ describe("resumeAuthorizedRequest", () => { expect(onFailure).toHaveBeenCalledTimes(1); expect(postMessageMock).toHaveBeenLastCalledWith( expect.objectContaining({ - channel: "C-test", - thread_ts: "1700000000.0001", + channelId: "C-test", + threadTs: "1700000000.0001", text: expect.stringContaining( "I ran into an internal error while processing that. Reference: `event_id=", ), @@ -121,7 +102,7 @@ describe("resumeAuthorizedRequest", () => { logExceptionMock.mockReturnValueOnce(undefined); await expect( - resumeAuthorizedRequest({ + testbed.resumeAuthorizedRequest({ messageText: "tell me the saved deadline", channelId: "C-test", threadTs: "1700000000.0004", @@ -137,6 +118,7 @@ describe("resumeAuthorizedRequest", () => { throw new Error("resume failed"); }, onFailure, + services, }), ).rejects.toThrow( "Sentry did not return an event ID for slack_resume_turn_failed", @@ -146,15 +128,15 @@ describe("resumeAuthorizedRequest", () => { expect(postMessageMock).toHaveBeenCalledTimes(1); expect(postMessageMock).toHaveBeenCalledWith( expect.objectContaining({ - channel: "C-test", - thread_ts: "1700000000.0004", + channelId: "C-test", + threadTs: "1700000000.0004", text: "connected", }), ); expect(postMessageMock).not.toHaveBeenCalledWith( expect.objectContaining({ - channel: "C-test", - thread_ts: "1700000000.0004", + channelId: "C-test", + threadTs: "1700000000.0004", text: expect.stringContaining("event_id=unknown"), }), ); @@ -164,7 +146,7 @@ describe("resumeAuthorizedRequest", () => { const onFailure = vi.fn(async () => undefined); await expect( - resumeSlackTurn({ + testbed.resumeSlackTurn({ messageText: "continue this turn", channelId: "C-test", threadTs: "1700000000.0005", @@ -177,35 +159,32 @@ describe("resumeAuthorizedRequest", () => { }, generateReply: async () => ({ text: "Final resumed answer", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, + diagnostics: makeResumeDiagnostics(), }), onSuccess: async () => { throw new Error("state write failed"); }, onFailure, + services, }), ).rejects.toThrow("state write failed"); expect(onFailure).not.toHaveBeenCalled(); - expect(postMessageMock).toHaveBeenCalledWith( + expect(postReplyPostsMock).toHaveBeenCalledWith( expect.objectContaining({ - channel: "C-test", - thread_ts: "1700000000.0005", - text: expect.stringContaining("Final resumed answer"), + channelId: "C-test", + threadTs: "1700000000.0005", + posts: expect.arrayContaining([ + expect.objectContaining({ + text: expect.stringContaining("Final resumed answer"), + }), + ]), }), ); expect(postMessageMock).not.toHaveBeenCalledWith( expect.objectContaining({ - channel: "C-test", - thread_ts: "1700000000.0005", + channelId: "C-test", + threadTs: "1700000000.0005", text: expect.stringContaining( "I ran into an internal error while processing that.", ), @@ -215,7 +194,7 @@ describe("resumeAuthorizedRequest", () => { it("releases the thread lock before scheduling another timeout slice", async () => { const onTimeoutPause = vi.fn(async () => { - const stateAdapter = getStateAdapter(); + const stateAdapter = testbed.getStateAdapter(); await stateAdapter.connect(); const lock = await stateAdapter.acquireLock( "slack:C-test:1700000000.0002", @@ -227,7 +206,7 @@ describe("resumeAuthorizedRequest", () => { } }); - await resumeSlackTurn({ + await testbed.resumeSlackTurn({ messageText: "continue this turn", channelId: "C-test", threadTs: "1700000000.0002", @@ -239,14 +218,19 @@ describe("resumeAuthorizedRequest", () => { requester: { platform: "slack", teamId: "T-test", userId: "U-test" }, }, generateReply: async () => { - throw new RetryableTurnError("agent_continue", "timed out again", { - conversationId: "conversation-1", - sessionId: "turn-1", - version: 3, - sliceId: 3, - }); + throw new testbed.RetryableTurnError( + "turn_timeout_resume", + "timed out again", + { + conversationId: "conversation-1", + sessionId: "turn-1", + version: 3, + sliceId: 3, + }, + ); }, onTimeoutPause, + services, }); expect(onTimeoutPause).toHaveBeenCalledTimes(1); @@ -256,7 +240,7 @@ describe("resumeAuthorizedRequest", () => { it("posts the canonical failure response when timeout pause handling throws", async () => { const onFailure = vi.fn(async () => undefined); - await resumeSlackTurn({ + await testbed.resumeSlackTurn({ messageText: "continue this turn", channelId: "C-test", threadTs: "1700000000.0003", @@ -268,24 +252,29 @@ describe("resumeAuthorizedRequest", () => { requester: { platform: "slack", teamId: "T-test", userId: "U-test" }, }, generateReply: async () => { - throw new RetryableTurnError("agent_continue", "timed out again", { - conversationId: "conversation-1", - sessionId: "turn-1", - version: 3, - sliceId: 6, - }); + throw new testbed.RetryableTurnError( + "turn_timeout_resume", + "timed out again", + { + conversationId: "conversation-1", + sessionId: "turn-1", + version: 3, + sliceId: 6, + }, + ); }, onTimeoutPause: async () => { throw new Error("continuation scheduling failed"); }, onFailure, + services, }); expect(onFailure).toHaveBeenCalledTimes(1); expect(postMessageMock).toHaveBeenCalledWith( expect.objectContaining({ - channel: "C-test", - thread_ts: "1700000000.0003", + channelId: "C-test", + threadTs: "1700000000.0003", text: expect.stringContaining( "I ran into an internal error while processing that. Reference: `event_id=", ), diff --git a/packages/junior/tests/component/runtime/timeout-resume-runner-lifecycle.test.ts b/packages/junior/tests/component/runtime/timeout-resume-runner-lifecycle.test.ts new file mode 100644 index 000000000..e26ccebf0 --- /dev/null +++ b/packages/junior/tests/component/runtime/timeout-resume-runner-lifecycle.test.ts @@ -0,0 +1,146 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + resumeTimedOutTurn, + type TimeoutResumeRunnerOptions, +} from "@/chat/runtime/timeout-resume-runner"; +import { getPersistedThreadState } from "@/chat/runtime/thread-state"; +import { RetryableTurnError } from "@/chat/runtime/turn"; +import { getStateAdapter } from "@/chat/state/adapter"; +import { upsertAgentTurnSessionRecord } from "@/chat/state/turn-session"; +import { + cleanupTimeoutResumeRunnerTest, + createResumeSlackTurnMock, + createTimeoutResumeScenario, + prepareResumeArgs, + setupTimeoutResumeRunnerTest, + TIMEOUT_RESUME_DESTINATION, +} from "../../fixtures/timeout-resume-runner"; + +describe("timeout resume runner lifecycle", () => { + beforeEach(setupTimeoutResumeRunnerTest); + afterEach(cleanupTimeoutResumeRunnerTest); + + it("drops stale callbacks after the resume lock is acquired", async () => { + const { conversationId, payload, sessionId, sessionRecord } = + await createTimeoutResumeScenario({ + conversationId: "slack:C123:1712345.0000", + messageId: "msg.0", + sessionId: "turn_msg_0", + }); + const resumeSlackTurn = createResumeSlackTurnMock(); + resumeSlackTurn.mockImplementationOnce(async (args) => { + await upsertAgentTurnSessionRecord({ + conversationId, + sessionId, + sliceId: sessionRecord.sliceId, + state: "completed", + piMessages: sessionRecord.piMessages, + }); + + return (await prepareResumeArgs(args)) !== false; + }); + + await expect( + resumeTimedOutTurn(payload, { resumeSlackTurn }), + ).resolves.toBe(false); + }); + + it("re-enqueues the next slice when a resumed turn times out again", async () => { + const { conversationId, payload, sessionId, sessionRecord } = + await createTimeoutResumeScenario({ + conversationId: "slack:C123:1712345.0001", + }); + const resumeSlackTurn = createResumeSlackTurnMock(); + const scheduleTurnTimeoutResume = vi + .fn< + NonNullable + >() + .mockResolvedValue(undefined); + resumeSlackTurn.mockImplementationOnce(async (args) => { + const runArgs = await prepareResumeArgs(args); + if (runArgs === false) return false; + await runArgs.onTimeoutPause?.( + new RetryableTurnError("turn_timeout_resume", "timed out again", { + conversationId, + sessionId, + version: sessionRecord.version + 1, + sliceId: sessionRecord.sliceId + 1, + }), + ); + return true; + }); + + await expect( + resumeTimedOutTurn(payload, { + resumeSlackTurn, + scheduleTurnTimeoutResume, + }), + ).resolves.toBe(true); + + expect(scheduleTurnTimeoutResume).toHaveBeenCalledWith({ + conversationId, + destination: TIMEOUT_RESUME_DESTINATION, + sessionId, + expectedVersion: sessionRecord.version + 1, + }); + }); + + it("persists timeout-resume failure state when continuation scheduling fails", async () => { + const { conversationId, payload, sessionId, sessionRecord } = + await createTimeoutResumeScenario({ + conversationId: "slack:C123:1712345.0003", + sliceId: 5, + }); + const resumeSlackTurn = createResumeSlackTurnMock(); + const scheduleTurnTimeoutResume = vi + .fn< + NonNullable + >() + .mockRejectedValueOnce(new Error("queue unavailable")); + resumeSlackTurn.mockImplementationOnce(async (args) => { + const runArgs = await prepareResumeArgs(args); + if (runArgs === false) return false; + try { + await runArgs.onTimeoutPause?.( + new RetryableTurnError("turn_timeout_resume", "timed out again", { + conversationId, + sessionId, + version: sessionRecord.version + 1, + sliceId: 6, + }), + ); + } catch (error) { + const adapter = getStateAdapter(); + const originalGet = adapter.get.bind(adapter); + vi.spyOn(adapter, "get").mockImplementation(async (key: string) => { + if (key.startsWith("junior:agent_turn_session:")) { + throw new Error("session record store unavailable"); + } + return await originalGet(key); + }); + await runArgs.onFailure?.(error); + } + return true; + }); + + await expect( + resumeTimedOutTurn(payload, { + resumeSlackTurn, + scheduleTurnTimeoutResume, + }), + ).resolves.toBe(true); + + expect(scheduleTurnTimeoutResume).toHaveBeenCalledWith({ + conversationId, + destination: TIMEOUT_RESUME_DESTINATION, + sessionId, + expectedVersion: sessionRecord.version + 1, + }); + + const persisted = await getPersistedThreadState(conversationId); + const conversation = (persisted.conversation ?? {}) as { + processing?: { activeTurnId?: string }; + }; + expect(conversation.processing?.activeTurnId).toBeUndefined(); + }); +}); diff --git a/packages/junior/tests/component/runtime/timeout-resume-runner-lock-retry.test.ts b/packages/junior/tests/component/runtime/timeout-resume-runner-lock-retry.test.ts new file mode 100644 index 000000000..7ab2f4e79 --- /dev/null +++ b/packages/junior/tests/component/runtime/timeout-resume-runner-lock-retry.test.ts @@ -0,0 +1,71 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + resumeTimedOutTurnWithLockRetry, + type TimeoutResumeRunnerOptions, +} from "@/chat/runtime/timeout-resume-runner"; +import { ResumeTurnBusyError } from "@/chat/runtime/slack-resume"; +import { + cleanupTimeoutResumeRunnerTest, + createResumeSlackTurnMock, + setupTimeoutResumeRunnerTest, + TIMEOUT_RESUME_DESTINATION, +} from "../../fixtures/timeout-resume-runner"; +import { mockTestClock } from "../../fixtures/vitest"; + +describe("timeout resume runner lock retry", () => { + beforeEach(async () => { + mockTestClock(); + await setupTimeoutResumeRunnerTest(); + }); + + afterEach(cleanupTimeoutResumeRunnerTest); + + it("retries when the timeout-resume callback races the active thread lock", async () => { + const conversationId = "slack:C123:1712345.0005"; + const payload = { + conversationId, + destination: TIMEOUT_RESUME_DESTINATION, + sessionId: "turn_msg_5", + expectedVersion: 1, + }; + const resumeSlackTurn = createResumeSlackTurnMock(); + resumeSlackTurn + .mockRejectedValueOnce(new ResumeTurnBusyError(conversationId)) + .mockResolvedValueOnce(true); + + const result = resumeTimedOutTurnWithLockRetry(payload, { + resumeSlackTurn, + }); + await vi.runOnlyPendingTimersAsync(); + + await expect(result).resolves.toBe(true); + expect(resumeSlackTurn).toHaveBeenCalledTimes(2); + }); + + it("reschedules when the timeout-resume callback remains lock-busy", async () => { + const conversationId = "slack:C123:1712345.0006"; + const payload = { + conversationId, + destination: TIMEOUT_RESUME_DESTINATION, + sessionId: "turn_msg_6", + expectedVersion: 1, + }; + const resumeSlackTurn = createResumeSlackTurnMock(); + const scheduleTurnTimeoutResume = vi + .fn< + NonNullable + >() + .mockResolvedValue(undefined); + resumeSlackTurn.mockRejectedValue(new ResumeTurnBusyError(conversationId)); + + const result = resumeTimedOutTurnWithLockRetry(payload, { + resumeSlackTurn, + scheduleTurnTimeoutResume, + }); + await vi.runAllTimersAsync(); + + await expect(result).resolves.toBe(true); + expect(resumeSlackTurn).toHaveBeenCalledTimes(4); + expect(scheduleTurnTimeoutResume).toHaveBeenCalledWith(payload); + }); +}); diff --git a/packages/junior/tests/component/sandbox/bash-tool-adapter.test.ts b/packages/junior/tests/component/sandbox/bash-tool-adapter.test.ts new file mode 100644 index 000000000..154fecb6b --- /dev/null +++ b/packages/junior/tests/component/sandbox/bash-tool-adapter.test.ts @@ -0,0 +1,73 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { createBashTool as createRealBashTool } from "bash-tool"; + +import { createSandboxSessionManager } from "@/chat/sandbox/session"; +import { makeSandbox, sandboxGetMock } from "../../fixtures/sandbox/executor"; + +const createSandboxMock = vi.fn(); +const resolveRuntimeDependencySnapshotMock = vi.fn(); + +function sandboxSessionServices() { + return { + createBashTool: createRealBashTool, + createSandbox: createSandboxMock as never, + getRuntimeDependencyProfileHash: () => undefined, + getSandbox: sandboxGetMock as never, + isSnapshotMissingError: () => false, + resolveRuntimeDependencySnapshot: + resolveRuntimeDependencySnapshotMock as never, + }; +} + +describe("bash-tool sandbox adapter", () => { + beforeEach(() => { + createSandboxMock.mockReset(); + resolveRuntimeDependencySnapshotMock.mockReset(); + sandboxGetMock.mockReset(); + }); + + it("lets real bash-tool initialize against Vercel Sandbox v2 shape", async () => { + const sandbox = makeSandbox("sbx_adapter_contract"); + sandbox.readFileToBuffer.mockResolvedValue(Buffer.from("file content")); + sandbox.runCommand.mockImplementation( + async (params: { cmd: string; args?: string[] }) => ({ + exitCode: 0, + stdout: async () => + params.cmd === "bash" && + params.args?.[0] === "-c" && + params.args[1]?.startsWith("ls /usr/bin") + ? "grep\nsed\ncat\n" + : "command stdout", + stderr: async () => "", + }), + ); + sandboxGetMock.mockResolvedValue(sandbox); + const manager = createSandboxSessionManager( + { sandboxId: "sbx_adapter_contract" }, + sandboxSessionServices(), + ); + + const executors = await manager.ensureToolExecutors(); + + expect(sandbox.runCommand).toHaveBeenCalledWith({ + cmd: "bash", + args: ["-c", expect.stringContaining("ls /usr/bin")], + }); + await expect(executors.readFile({ path: "file.txt" })).resolves.toEqual({ + content: "file content", + }); + await expect( + executors.writeFile({ path: "out.txt", content: "written" }), + ).resolves.toEqual({ success: true }); + + expect(sandbox.readFileToBuffer).toHaveBeenCalledWith({ + path: "/vercel/sandbox/file.txt", + }); + expect(sandbox.writeFiles).toHaveBeenCalledWith([ + { + path: "/vercel/sandbox/out.txt", + content: "written", + }, + ]); + }); +}); diff --git a/packages/junior/tests/component/sandbox/executor-bash.test.ts b/packages/junior/tests/component/sandbox/executor-bash.test.ts new file mode 100644 index 000000000..118aa211c --- /dev/null +++ b/packages/junior/tests/component/sandbox/executor-bash.test.ts @@ -0,0 +1,554 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + setSandboxEgressAuthRequiredSignal, + setSandboxEgressPermissionDeniedSignal, +} from "@/chat/sandbox/egress-session"; +import { + createSandboxExecutor, + createSandboxSessionManager, + createStreamInterruptedError, + credentialTokenFromForwardURL, + makeSandbox, + parseSandboxEgressCredentialToken, + sandboxGetMock, + sentryForwardURLFromPolicy, + setupSandboxExecutorTest, + cleanupSandboxExecutorTest, +} from "../../fixtures/sandbox/executor"; +import { mockTestClock } from "../../fixtures/vitest"; + +describe("sandbox executor bash execution", () => { + beforeEach(setupSandboxExecutorTest); + + afterEach(cleanupSandboxExecutorTest); + + it("runs bash commands through a noninteractive shell", async () => { + const sandbox = makeSandbox("sbx_bash"); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ sandboxId: "sbx_bash" }); + executor.configureSkills([]); + + await executor.execute({ + toolName: "bash", + input: { + command: "echo ok", + }, + }); + + const invocation = sandbox.runCommand.mock.calls[0]?.[0]; + expect(invocation).toMatchObject({ + cmd: "bash", + cwd: "/vercel/sandbox", + }); + expect(invocation.args?.[0]).toBe("-c"); + expect(invocation.args?.[1]).toContain( + 'export PATH="/vercel/sandbox/.junior/bin:$PATH"', + ); + expect(invocation.args?.[1]).toContain("export CI='1'"); + expect(invocation.args?.[1]).toContain("export TERM='dumb'"); + expect(invocation.args?.[1]).toContain("export GH_PROMPT_DISABLED='1'"); + expect(invocation.args?.[1]).toContain("export GIT_TERMINAL_PROMPT='0'"); + expect(invocation.args?.[1]).toContain("exec { + mockTestClock(); + const sandbox = makeSandbox("sbx_bash_timeout"); + sandbox.runCommand.mockImplementationOnce( + async (input) => + await new Promise((_, reject) => { + input.signal?.addEventListener("abort", () => { + reject(new Error("aborted")); + }); + }), + ); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ sandboxId: "sbx_bash_timeout" }); + executor.configureSkills([]); + + const responsePromise = executor.execute({ + toolName: "bash", + input: { + command: "sleep 999", + }, + }); + + await vi.advanceTimersByTimeAsync(5 * 60 * 1000); + const response = await responsePromise; + + expect(response.result).toMatchObject({ + ok: false, + exit_code: 124, + timed_out: true, + stderr: "Command timed out after 300000ms", + }); + }); + + it("aborts bash commands when the agent turn is cancelled", async () => { + const sandbox = makeSandbox("sbx_bash_abort"); + sandbox.runCommand.mockImplementationOnce( + async (input) => + await new Promise((_, reject) => { + input.signal?.addEventListener("abort", () => { + reject(new Error("aborted")); + }); + }), + ); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ sandboxId: "sbx_bash_abort" }); + executor.configureSkills([]); + const abortController = new AbortController(); + + const responsePromise = executor.execute({ + toolName: "bash", + input: { + command: "sleep 999", + }, + signal: abortController.signal, + }); + + await Promise.resolve(); + abortController.abort(); + const response = await responsePromise; + + expect(response.result).toMatchObject({ + ok: false, + exit_code: 130, + timed_out: false, + stderr: "Command aborted because the agent turn was cancelled.", + }); + }); + + it("resolves sandbox command environment for each bash command", async () => { + const sandbox = makeSandbox("sbx_dynamic_env"); + sandboxGetMock.mockResolvedValue(sandbox); + const commandEnv = vi + .fn<() => Promise>>() + .mockResolvedValueOnce({ + GIT_AUTHOR_NAME: "first-bot", + }) + .mockResolvedValueOnce({ + GIT_AUTHOR_NAME: "second-bot", + }); + + const manager = createSandboxSessionManager({ + sandboxId: "sbx_dynamic_env", + commandEnv, + }); + const bash = (await manager.ensureToolExecutors()).bash; + + await bash({ command: "git commit --allow-empty -m first" }); + await bash({ command: "git commit --allow-empty -m second" }); + + expect(commandEnv).toHaveBeenCalledTimes(2); + expect(sandbox.runCommand.mock.calls[0]?.[0].args?.[1]).toContain( + "export GIT_AUTHOR_NAME='first-bot'", + ); + expect(sandbox.runCommand.mock.calls[1]?.[0].args?.[1]).toContain( + "export GIT_AUTHOR_NAME='second-bot'", + ); + }); + + it("configures lazy user actor auth for sandbox egress", async () => { + const sandbox = makeSandbox("sbx_authorize_credentials"); + sandbox.runCommand.mockImplementationOnce(async () => { + const activePolicy = sandbox.update.mock.calls.at(-1)?.[0].networkPolicy; + const activeCredentialToken = credentialTokenFromForwardURL( + sentryForwardURLFromPolicy(activePolicy), + ); + + expect( + parseSandboxEgressCredentialToken(activeCredentialToken), + ).toMatchObject({ + credentials: { actor: { type: "user", userId: "U123" } }, + egressId: "sbx_authorize_credentials_session", + }); + return { + exitCode: 0, + stdout: async () => "", + stderr: async () => "", + }; + }); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_authorize_credentials", + credentialEgress: { + actor: { type: "user", userId: "U123" }, + }, + }); + executor.configureSkills([]); + + await executor.execute({ + toolName: "bash", + input: { + command: "sentry-cli issues list", + }, + }); + + expect(sandbox.update).toHaveBeenCalledTimes(1); + expect( + credentialTokenFromForwardURL( + sentryForwardURLFromPolicy( + sandbox.update.mock.calls[0]?.[0].networkPolicy, + ), + ), + ).toBeTruthy(); + const invocation = sandbox.runCommand.mock.calls[0]?.[0]; + expect(invocation.args?.[1]).toContain( + "export SENTRY_AUTH_TOKEN='host_managed_credential'", + ); + expect(invocation.args?.[1]).toContain("sentry-cli issues list"); + }); + + it("clears stale sandbox egress signals before running bash commands", async () => { + const sandbox = makeSandbox("sbx_stale_auth_signal"); + sandbox.runCommand.mockImplementationOnce(async () => ({ + exitCode: 1, + stdout: async () => "", + stderr: async () => "command-controlled output", + })); + sandboxGetMock.mockResolvedValue(sandbox); + await setSandboxEgressAuthRequiredSignal( + { + credentials: { actor: { type: "user", userId: "U123" } }, + egressId: "sbx_stale_auth_signal_session", + expiresAtMs: Date.now() + 60_000, + contextId: "ctx-stale", + }, + { + provider: "github", + grant: { + name: "user-write", + access: "write", + }, + }, + ); + await setSandboxEgressPermissionDeniedSignal( + { + credentials: { actor: { type: "user", userId: "U123" } }, + egressId: "sbx_stale_auth_signal_session", + expiresAtMs: Date.now() + 60_000, + contextId: "ctx-stale-permission", + }, + { + provider: "github", + grant: { + name: "user-write", + access: "write", + }, + message: + "github returned HTTP 403 after Junior injected the user-write grant. Junior forwarded the request; this is not a local runtime block.", + source: "upstream", + status: 403, + upstreamHost: "github.com", + upstreamPath: "/getsentry/junior.git/info/refs", + }, + ); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_stale_auth_signal", + }); + executor.configureSkills([]); + + const response = await executor.execute<{ + auth_required?: unknown; + exit_code: number; + permission_denied?: unknown; + }>({ + toolName: "bash", + input: { + command: "printf stale", + }, + }); + + expect(response.result.exit_code).toBe(1); + expect(response.result.auth_required).toBeUndefined(); + expect(response.result.permission_denied).toBeUndefined(); + }); + + it("attaches sandbox egress auth signals to failed bash results", async () => { + const sandbox = makeSandbox("sbx_fresh_auth_signal"); + sandbox.runCommand.mockImplementationOnce(async () => { + await setSandboxEgressAuthRequiredSignal( + { + credentials: { actor: { type: "user", userId: "U123" } }, + egressId: "sbx_fresh_auth_signal_session", + expiresAtMs: Date.now() + 60_000, + contextId: "ctx-fresh", + }, + { + provider: "github", + grant: { + name: "user-write", + access: "write", + }, + }, + ); + return { + exitCode: 1, + stdout: async () => "", + stderr: async () => + "junior-auth-required provider=github grant=user-write access=write 401 unauthorized", + }; + }); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_fresh_auth_signal", + }); + executor.configureSkills([]); + + const response = await executor.execute<{ + auth_required?: unknown; + exit_code: number; + }>({ + toolName: "bash", + input: { + command: "gh issue create", + }, + }); + + expect(response.result.exit_code).toBe(1); + expect(response.result.auth_required).toMatchObject({ + provider: "github", + grant: { + name: "user-write", + access: "write", + }, + }); + }); + + it("attaches sandbox egress permission signals to failed bash results", async () => { + const sandbox = makeSandbox("sbx_permission_signal"); + sandbox.runCommand.mockImplementationOnce(async () => { + await setSandboxEgressPermissionDeniedSignal( + { + credentials: { actor: { type: "user", userId: "U123" } }, + egressId: "sbx_permission_signal_session", + expiresAtMs: Date.now() + 60_000, + contextId: "ctx-permission", + }, + { + provider: "github", + grant: { + name: "user-write", + access: "write", + reason: "github.git-write", + }, + message: + "github returned HTTP 403 after Junior injected the user-write grant. Junior forwarded the request; this is not a local runtime block.", + source: "upstream", + status: 403, + upstreamHost: "github.com", + upstreamPath: "/getsentry/junior.git/info/refs", + acceptedPermissions: "contents=write", + }, + ); + return { + exitCode: 1, + stdout: async () => "", + stderr: async () => "remote: Permission denied", + }; + }); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_permission_signal", + }); + executor.configureSkills([]); + + const response = await executor.execute<{ + exit_code: number; + permission_denied?: unknown; + }>({ + toolName: "bash", + input: { + command: "git push", + }, + }); + + expect(response.result.exit_code).toBe(1); + expect(response.result.permission_denied).toMatchObject({ + provider: "github", + grant: { + name: "user-write", + access: "write", + reason: "github.git-write", + }, + message: + "github returned HTTP 403 after Junior injected the user-write grant. Junior forwarded the request; this is not a local runtime block.", + source: "upstream", + status: 403, + upstreamHost: "github.com", + upstreamPath: "/getsentry/junior.git/info/refs", + acceptedPermissions: "contents=write", + }); + }); + + it("prefers write sandbox egress auth signals over read signals", async () => { + const sandbox = makeSandbox("sbx_mixed_auth_signal"); + sandbox.runCommand.mockImplementationOnce(async () => { + const context = { + credentials: { actor: { type: "user" as const, userId: "U123" } }, + egressId: "sbx_mixed_auth_signal_session", + expiresAtMs: Date.now() + 60_000, + contextId: "ctx-mixed", + }; + await setSandboxEgressAuthRequiredSignal(context, { + provider: "github", + grant: { + name: "user-write", + access: "write", + }, + }); + await setSandboxEgressAuthRequiredSignal(context, { + provider: "github", + grant: { + name: "installation-read", + access: "read", + }, + }); + return { + exitCode: 1, + stdout: async () => "", + stderr: async () => + "junior-auth-required provider=github grant=user-write access=write 401 unauthorized", + }; + }); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_mixed_auth_signal", + }); + executor.configureSkills([]); + + const response = await executor.execute<{ + auth_required?: unknown; + exit_code: number; + }>({ + toolName: "bash", + input: { + command: "gh issue create", + }, + }); + + expect(response.result.exit_code).toBe(1); + expect(response.result.auth_required).toMatchObject({ + provider: "github", + grant: { + name: "user-write", + access: "write", + }, + }); + }); + + it("configures lazy system actor credential context for sandbox egress", async () => { + const sandbox = makeSandbox("sbx_authorize_system_credentials"); + sandbox.runCommand.mockImplementationOnce(async () => { + const activePolicy = sandbox.update.mock.calls.at(-1)?.[0].networkPolicy; + const activeCredentialToken = credentialTokenFromForwardURL( + sentryForwardURLFromPolicy(activePolicy), + ); + + expect( + parseSandboxEgressCredentialToken(activeCredentialToken), + ).toMatchObject({ + credentials: { actor: { type: "system", id: "scheduler" } }, + egressId: "sbx_authorize_system_credentials_session", + }); + return { + exitCode: 0, + stdout: async () => "", + stderr: async () => "", + }; + }); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_authorize_system_credentials", + credentialEgress: { + actor: { type: "system", id: "scheduler" }, + }, + }); + executor.configureSkills([]); + + await executor.execute({ + toolName: "bash", + input: { + command: "sentry-cli issues list", + }, + }); + + expect(sandbox.update).toHaveBeenCalledTimes(1); + const invocation = sandbox.runCommand.mock.calls[0]?.[0]; + expect(invocation.args?.[1]).toContain( + "export SENTRY_AUTH_TOKEN='host_managed_credential'", + ); + expect(invocation.args?.[1]).toContain("sentry-cli issues list"); + }); + + it("makes registered provider placeholders available to sandbox commands", async () => { + const sandbox = makeSandbox("sbx_registered_credentials"); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_registered_credentials", + credentialEgress: { + actor: { type: "user", userId: "U123" }, + }, + }); + executor.configureSkills([]); + + await executor.execute({ + toolName: "bash", + input: { + command: "echo local-only", + }, + }); + + expect(sandbox.update).toHaveBeenCalledTimes(1); + expect( + credentialTokenFromForwardURL( + sentryForwardURLFromPolicy( + sandbox.update.mock.calls[0]?.[0].networkPolicy, + ), + ), + ).toBeTruthy(); + const invocation = sandbox.runCommand.mock.calls[0]?.[0]; + expect(invocation.args?.[1]).toContain( + "export SENTRY_AUTH_TOKEN='host_managed_credential'", + ); + expect(invocation.args?.[1]).toContain("echo local-only"); + }); + + it("returns a failed bash result when the command stream ends without a status", async () => { + const streamError = createStreamInterruptedError(); + const sandbox = makeSandbox("sbx_stream_interrupted"); + sandbox.runCommand.mockRejectedValueOnce(streamError); + sandboxGetMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_stream_interrupted", + }); + executor.configureSkills([]); + + const response = await executor.execute({ + toolName: "bash", + input: { + command: "pnpm test", + }, + }); + + expect(response.result).toMatchObject({ + ok: false, + exit_code: 125, + stderr: + "Command stream ended before the command finished. The command may still have produced side effects; inspect the workspace or rerun only if it is safe.", + }); + }); +}); diff --git a/packages/junior/tests/component/sandbox/executor-lifecycle.test.ts b/packages/junior/tests/component/sandbox/executor-lifecycle.test.ts new file mode 100644 index 000000000..e213fe394 --- /dev/null +++ b/packages/junior/tests/component/sandbox/executor-lifecycle.test.ts @@ -0,0 +1,367 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + createApiError, + createSandboxExecutor, + createSandboxSessionManager, + cleanupSandboxExecutorTest, + expectWorkspaceToDelegate, + getRuntimeDependencyProfileHashMock, + makeSandbox, + sandboxCreateMock, + sandboxGetMock, + setupSandboxExecutorTest, +} from "../../fixtures/sandbox/executor"; + +describe("sandbox executor lifecycle", () => { + beforeEach(setupSandboxExecutorTest); + + afterEach(cleanupSandboxExecutorTest); + + it("recreates a sandbox when sandboxId hint points to a stopped sandbox", async () => { + const stoppedSandbox = makeSandbox("sbx_stopped", { + mkDirError: createApiError( + 410, + "Gone", + "sandbox_stopped", + "Sandbox has stopped execution and is no longer available", + ), + }); + const freshSandbox = makeSandbox("sbx_fresh"); + + sandboxGetMock.mockResolvedValue(stoppedSandbox); + sandboxCreateMock.mockResolvedValue(freshSandbox); + + const executor = createSandboxExecutor({ sandboxId: "sbx_stopped" }); + executor.configureSkills([]); + + const sandbox = await executor.createSandbox(); + + await expectWorkspaceToDelegate(sandbox, freshSandbox); + expect(sandboxGetMock).toHaveBeenCalledWith({ + name: "sbx_stopped", + resume: true, + }); + expect(sandboxCreateMock).toHaveBeenCalledTimes(1); + expect(stoppedSandbox.mkDir).toHaveBeenCalled(); + expect(freshSandbox.mkDir).toHaveBeenCalled(); + expect(executor.getSandboxId()).toBe("sbx_fresh"); + }); + + it("reports acquired sandbox metadata immediately after fresh sandbox boot", async () => { + const freshSandbox = makeSandbox("sbx_fresh"); + const onSandboxAcquired = vi.fn(); + sandboxCreateMock.mockResolvedValue(freshSandbox); + + const executor = createSandboxExecutor({ + onSandboxAcquired, + }); + executor.configureSkills([]); + + await executor.createSandbox(); + await executor.createSandbox(); + + expect(onSandboxAcquired).toHaveBeenCalledTimes(1); + expect(onSandboxAcquired).toHaveBeenCalledWith({ + sandboxId: "sbx_fresh", + }); + }); + + it("shares in-flight sandbox setup across parallel executor initialization", async () => { + const freshSandbox = makeSandbox("sbx_parallel_boot"); + sandboxCreateMock.mockResolvedValue(freshSandbox); + + let markPrepareStarted: () => void = () => {}; + let releasePrepare: () => void = () => {}; + const prepareStarted = new Promise((resolve) => { + markPrepareStarted = resolve; + }); + const prepareReleased = new Promise((resolve) => { + releasePrepare = resolve; + }); + const onSandboxPrepare = vi.fn(async () => { + markPrepareStarted(); + await prepareReleased; + }); + const manager = createSandboxSessionManager({ + onSandboxPrepare, + }); + manager.configureSkills([]); + + const first = manager.ensureToolExecutors(); + await prepareStarted; + const second = manager.ensureToolExecutors(); + await new Promise((resolve) => setTimeout(resolve, 0)); + + expect(sandboxCreateMock).toHaveBeenCalledTimes(1); + expect(onSandboxPrepare).toHaveBeenCalledTimes(1); + + releasePrepare(); + const [firstExecutors, secondExecutors] = await Promise.all([ + first, + second, + ]); + + expect(firstExecutors).toBe(secondExecutors); + }); + + it("reports acquired sandbox metadata when restoring from a sandbox id hint", async () => { + const restoredSandbox = makeSandbox("sbx_restored"); + const onSandboxAcquired = vi.fn(); + sandboxGetMock.mockResolvedValue(restoredSandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_restored", + onSandboxAcquired, + }); + executor.configureSkills([]); + + await executor.createSandbox(); + + expect(onSandboxAcquired).toHaveBeenCalledTimes(1); + expect(onSandboxAcquired).toHaveBeenCalledWith({ + sandboxId: "sbx_restored", + }); + }); + + it("refreshes network policy when restoring from a sandbox id hint", async () => { + const restoredSandbox = makeSandbox("sbx_restored"); + const networkPolicy = { + allow: { + "*": [], + "api.example.com": [ + { + forwardURL: "https://junior.example.com/api/internal/proxy", + }, + ], + }, + }; + sandboxGetMock.mockResolvedValue(restoredSandbox); + + const manager = createSandboxSessionManager({ + sandboxId: "sbx_restored", + createNetworkPolicy: vi.fn(() => networkPolicy), + }); + manager.configureSkills([]); + + await manager.createSandbox(); + + expect(restoredSandbox.update).toHaveBeenCalledWith({ networkPolicy }); + }); + + it("keeps restored sandbox policy tracking tied to the applied policy", async () => { + const restoredSandbox = makeSandbox("sbx_restored_policy"); + const firstPolicy = { + allow: { + "*": [], + "api.first.example": [ + { + forwardURL: "https://junior.example.com/api/internal/proxy", + }, + ], + }, + }; + const secondPolicy = { + allow: { + "*": [], + "api.second.example": [ + { + forwardURL: "https://junior.example.com/api/internal/proxy", + }, + ], + }, + }; + const createNetworkPolicy = vi + .fn() + .mockReturnValueOnce(firstPolicy) + .mockReturnValueOnce(secondPolicy); + sandboxGetMock.mockResolvedValue(restoredSandbox); + + const manager = createSandboxSessionManager({ + sandboxId: "sbx_restored_policy", + createNetworkPolicy, + }); + manager.configureSkills([]); + + await manager.createSandbox(); + await manager.createSandbox(); + + expect(restoredSandbox.update).toHaveBeenNthCalledWith(1, { + networkPolicy: firstPolicy, + }); + expect(restoredSandbox.update).toHaveBeenNthCalledWith(2, { + networkPolicy: secondPolicy, + }); + expect(createNetworkPolicy).toHaveBeenCalledTimes(2); + }); + + it("refreshes changed network policy when reusing a cached sandbox", async () => { + const sandbox = makeSandbox("sbx_cached_policy"); + sandboxCreateMock.mockResolvedValue(sandbox); + let providerDomain = "api.first.example"; + const createNetworkPolicy = vi.fn((sandboxId: string) => ({ + allow: { + "*": [], + [providerDomain]: [ + { + forwardURL: `https://junior.example.com/api/internal/sandbox-egress/${sandboxId}`, + }, + ], + }, + })); + + const manager = createSandboxSessionManager({ createNetworkPolicy }); + manager.configureSkills([]); + + await manager.createSandbox(); + await manager.createSandbox(); + expect(sandbox.update).toHaveBeenCalledTimes(1); + expect(sandbox.update).toHaveBeenCalledWith({ + networkPolicy: { + allow: { + "*": [], + "api.first.example": [ + { + forwardURL: + "https://junior.example.com/api/internal/sandbox-egress/sbx_cached_policy_session", + }, + ], + }, + }, + }); + + sandbox.currentSession.mockReturnValue({ + sessionId: "sbx_cached_policy_resumed_session", + }); + await manager.createSandbox(); + + expect(sandbox.update).toHaveBeenCalledTimes(2); + expect(sandbox.update).toHaveBeenLastCalledWith({ + networkPolicy: { + allow: { + "*": [], + "api.first.example": [ + { + forwardURL: + "https://junior.example.com/api/internal/sandbox-egress/sbx_cached_policy_resumed_session", + }, + ], + }, + }, + }); + + providerDomain = "api.second.example"; + await manager.createSandbox(); + + expect(sandbox.update).toHaveBeenCalledTimes(3); + expect(sandbox.update).toHaveBeenLastCalledWith({ + networkPolicy: { + allow: { + "*": [], + "api.second.example": [ + { + forwardURL: + "https://junior.example.com/api/internal/sandbox-egress/sbx_cached_policy_resumed_session", + }, + ], + }, + }, + }); + }); + + it("passes token-based Vercel Sandbox credentials to the sandbox SDK", async () => { + process.env.VERCEL_TOKEN = "sandbox-token"; + process.env.VERCEL_TEAM_ID = "team_123"; + process.env.VERCEL_PROJECT_ID = "prj_123"; + + const stoppedSandbox = makeSandbox("sbx_stopped", { + mkDirError: createApiError( + 410, + "Gone", + "sandbox_stopped", + "Sandbox has stopped execution and is no longer available", + ), + }); + const freshSandbox = makeSandbox("sbx_fresh"); + + sandboxGetMock.mockResolvedValue(stoppedSandbox); + sandboxCreateMock.mockResolvedValue(freshSandbox); + + const executor = createSandboxExecutor({ sandboxId: "sbx_stopped" }); + executor.configureSkills([]); + + await executor.createSandbox(); + + expect(sandboxGetMock).toHaveBeenCalledWith({ + name: "sbx_stopped", + resume: true, + token: "sandbox-token", + teamId: "team_123", + projectId: "prj_123", + }); + expect(sandboxCreateMock).toHaveBeenCalledWith({ + timeout: 1000 * 60 * 30, + runtime: "node22", + token: "sandbox-token", + teamId: "team_123", + projectId: "prj_123", + }); + }); + + it("recreates sandbox when dependency profile hash changed", async () => { + const freshSandbox = makeSandbox("sbx_fresh_after_profile_change"); + getRuntimeDependencyProfileHashMock.mockReturnValue("current-profile"); + sandboxCreateMock.mockResolvedValue(freshSandbox); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_old", + sandboxDependencyProfileHash: "old-profile", + }); + executor.configureSkills([]); + + const sandbox = await executor.createSandbox(); + + await expectWorkspaceToDelegate(sandbox, freshSandbox); + expect(sandboxGetMock).not.toHaveBeenCalled(); + expect(sandboxCreateMock).toHaveBeenCalledTimes(1); + }); + + it("surfaces a generic sandbox setup failure for non-recoverable sync errors", async () => { + const forbiddenSandbox = makeSandbox("sbx_forbidden", { + mkDirError: createApiError( + 403, + "Forbidden", + "forbidden", + "You do not have permission to access this sandbox", + ), + }); + + sandboxGetMock.mockResolvedValue(forbiddenSandbox); + + const executor = createSandboxExecutor({ sandboxId: "sbx_forbidden" }); + executor.configureSkills([]); + + await expect(executor.createSandbox()).rejects.toThrow( + "sandbox setup failed", + ); + expect(sandboxCreateMock).not.toHaveBeenCalled(); + }); + + it("defers to SDK OIDC resolution when VERCEL_OIDC_TOKEN is set without explicit credentials", async () => { + process.env.VERCEL_OIDC_TOKEN = "oidc-jwt-token"; + process.env.VERCEL_TEAM_ID = "team_123"; + process.env.VERCEL_PROJECT_ID = "prj_123"; + + const freshSandbox = makeSandbox("sbx_oidc"); + sandboxCreateMock.mockResolvedValue(freshSandbox); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + await executor.createSandbox(); + + expect(sandboxCreateMock).toHaveBeenCalledWith({ + timeout: 1000 * 60 * 30, + runtime: "node22", + }); + }); +}); diff --git a/packages/junior/tests/component/sandbox/executor-snapshots.test.ts b/packages/junior/tests/component/sandbox/executor-snapshots.test.ts new file mode 100644 index 000000000..56f37d134 --- /dev/null +++ b/packages/junior/tests/component/sandbox/executor-snapshots.test.ts @@ -0,0 +1,187 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + createApiError, + createSandboxExecutor, + createSandboxSessionManager, + expectWorkspaceToDelegate, + isSnapshotMissingErrorMock, + makeSandbox, + resolveRuntimeDependencySnapshotMock, + sandboxCreateMock, + setupSandboxExecutorTest, + cleanupSandboxExecutorTest, +} from "../../fixtures/sandbox/executor"; + +describe("sandbox executor dependency snapshots", () => { + beforeEach(setupSandboxExecutorTest); + + afterEach(cleanupSandboxExecutorTest); + + it("creates fresh sandboxes from dependency snapshots when available", async () => { + const snapshotSandbox = makeSandbox("sbx_snapshot"); + resolveRuntimeDependencySnapshotMock.mockResolvedValue({ + snapshotId: "snap_123", + profileHash: "hash_123", + dependencyCount: 2, + cacheHit: true, + resolveOutcome: "cache_hit", + }); + sandboxCreateMock.mockResolvedValue(snapshotSandbox); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + const sandbox = await executor.createSandbox(); + + await expectWorkspaceToDelegate(sandbox, snapshotSandbox); + expect(sandboxCreateMock).toHaveBeenCalledWith({ + timeout: 1000 * 60 * 30, + source: { + type: "snapshot", + snapshotId: "snap_123", + }, + }); + }); + + it("rebuilds snapshot when cached snapshot is missing", async () => { + const rebuiltSandbox = makeSandbox("sbx_rebuilt"); + resolveRuntimeDependencySnapshotMock + .mockResolvedValueOnce({ + snapshotId: "snap_missing", + profileHash: "hash_1", + dependencyCount: 2, + cacheHit: true, + resolveOutcome: "cache_hit", + }) + .mockResolvedValueOnce({ + snapshotId: "snap_rebuilt", + profileHash: "hash_1", + dependencyCount: 2, + cacheHit: false, + resolveOutcome: "forced_rebuild", + rebuildReason: "snapshot_missing", + }); + const missingError = new Error("snapshot not found"); + sandboxCreateMock + .mockRejectedValueOnce(missingError) + .mockResolvedValueOnce(rebuiltSandbox); + isSnapshotMissingErrorMock.mockImplementation( + (error: unknown) => error === missingError, + ); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + const sandbox = await executor.createSandbox(); + + await expectWorkspaceToDelegate(sandbox, rebuiltSandbox); + expect(resolveRuntimeDependencySnapshotMock).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + runtime: "node22", + timeoutMs: 1000 * 60 * 30, + forceRebuild: true, + staleSnapshotId: "snap_missing", + }), + ); + expect(sandboxCreateMock).toHaveBeenNthCalledWith(2, { + timeout: 1000 * 60 * 30, + source: { + type: "snapshot", + snapshotId: "snap_rebuilt", + }, + }); + }); + + it("uses a fresh sandbox name when retrying snapshot boot with network policy", async () => { + const snapshotSandbox = makeSandbox("sbx_snapshot_policy_ready"); + resolveRuntimeDependencySnapshotMock.mockResolvedValue({ + snapshotId: "snap_policy_retry", + profileHash: "hash_policy_retry", + dependencyCount: 2, + cacheHit: true, + resolveOutcome: "cache_hit", + }); + const snapshottingError = createApiError( + 422, + "Unprocessable Entity", + "sandbox_snapshotting", + "Sandbox is creating a snapshot and will be stopped shortly.", + ); + sandboxCreateMock + .mockRejectedValueOnce(snapshottingError) + .mockResolvedValueOnce(snapshotSandbox); + const createNetworkPolicy = vi.fn((sandboxId: string) => ({ + allow: { + "*": [], + "api.example.com": [ + { + forwardURL: `https://junior.example.com/api/internal/sandbox-egress/${sandboxId}`, + }, + ], + }, + })); + + const manager = createSandboxSessionManager({ createNetworkPolicy }); + manager.configureSkills([]); + + const sandbox = await manager.createSandbox(); + + const firstCreate = sandboxCreateMock.mock.calls[0]?.[0] as { + name?: string; + networkPolicy?: unknown; + }; + const secondCreate = sandboxCreateMock.mock.calls[1]?.[0] as { + name?: string; + networkPolicy?: unknown; + }; + expect(firstCreate.name).toMatch(/^junior-/); + expect(secondCreate.name).toMatch(/^junior-/); + expect(secondCreate.name).not.toBe(firstCreate.name); + expect(createNetworkPolicy).toHaveBeenNthCalledWith(1, firstCreate.name); + expect(createNetworkPolicy).toHaveBeenNthCalledWith(2, secondCreate.name); + expect(createNetworkPolicy).toHaveBeenNthCalledWith( + 3, + "sbx_snapshot_policy_ready_session", + undefined, + ); + expect(secondCreate.networkPolicy).toEqual({ + allow: { + "*": [], + "api.example.com": [ + { + forwardURL: `https://junior.example.com/api/internal/sandbox-egress/${secondCreate.name}`, + }, + ], + }, + }); + expect(snapshotSandbox.update).toHaveBeenCalledWith({ + networkPolicy: { + allow: { + "*": [], + "api.example.com": [ + { + forwardURL: + "https://junior.example.com/api/internal/sandbox-egress/sbx_snapshot_policy_ready_session", + }, + ], + }, + }, + }); + await expectWorkspaceToDelegate(sandbox, snapshotSandbox); + }); + + it("wraps snapshot resolution failures as sandbox setup errors", async () => { + resolveRuntimeDependencySnapshotMock.mockRejectedValueOnce( + new Error("lock timeout"), + ); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + await expect(executor.createSandbox()).rejects.toThrow( + "sandbox setup failed", + ); + expect(sandboxCreateMock).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/junior/tests/component/sandbox/executor-tools.test.ts b/packages/junior/tests/component/sandbox/executor-tools.test.ts new file mode 100644 index 000000000..5614944ec --- /dev/null +++ b/packages/junior/tests/component/sandbox/executor-tools.test.ts @@ -0,0 +1,422 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SANDBOX_WORKSPACE_ROOT, sandboxSkillDir } from "@/chat/sandbox/paths"; +import { + createApiError, + createBashTool, + createSandboxExecutor, + createStreamInterruptedError, + makeBashToolFacade, + makeSandbox, + sandboxCreateMock, + sandboxGetMock, + setupSandboxExecutorTest, + cleanupSandboxExecutorTest, +} from "../../fixtures/sandbox/executor"; + +describe("sandbox executor tool execution", () => { + beforeEach(setupSandboxExecutorTest); + + afterEach(cleanupSandboxExecutorTest); + + it("returns structured file-tool results when sandbox command streams end", async () => { + const sandbox = makeSandbox("sbx_find_files_interrupted"); + sandbox.fs.stat.mockRejectedValueOnce(createStreamInterruptedError()); + sandboxCreateMock.mockResolvedValueOnce(sandbox); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + const response = await executor.execute({ + toolName: "findFiles", + input: { pattern: "*.ts" }, + }); + + expect(response.result).toMatchObject({ + content: [ + { + type: "text", + text: expect.stringContaining( + "Sandbox command stream was interrupted during findFiles", + ), + }, + ], + details: { + ok: false, + error: "stream_interrupted", + tool: "findFiles", + }, + }); + }); + + it("recognizes stream interruptions wrapped by writeFile errors", async () => { + const sandbox = makeSandbox("sbx_write_file_interrupted"); + const writeFileExecute = vi.fn(async () => { + throw createStreamInterruptedError(); + }); + sandboxCreateMock.mockResolvedValueOnce(sandbox); + vi.mocked(createBashTool).mockResolvedValueOnce( + makeBashToolFacade({ writeFile: writeFileExecute }) as never, + ); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + const response = await executor.execute({ + toolName: "writeFile", + input: { path: "file.ts", content: "new content" }, + }); + + expect(response.result).toMatchObject({ + details: { + ok: false, + error: "stream_interrupted", + tool: "writeFile", + }, + }); + }); + + it("routes matching bash commands through custom command handler", async () => { + const sandbox = makeSandbox("sbx_custom"); + sandboxGetMock.mockResolvedValue(sandbox); + const runBashCustomCommand = vi.fn(async (command: string) => + command === "jr-rpc config get github.repo" + ? { + handled: true, + result: { + ok: true, + command, + cwd: "/", + exit_code: 0, + signal: null, + timed_out: false, + stdout: "credential_enabled\n", + stderr: "", + stdout_truncated: false, + stderr_truncated: false, + }, + } + : { handled: false }, + ); + + const executor = createSandboxExecutor({ + sandboxId: "sbx_custom", + runBashCustomCommand, + }); + executor.configureSkills([]); + + const response = await executor.execute({ + toolName: "bash", + input: { + command: "jr-rpc config get github.repo", + }, + }); + + expect(runBashCustomCommand).toHaveBeenCalledWith( + "jr-rpc config get github.repo", + ); + expect(sandbox.runCommand).not.toHaveBeenCalled(); + expect(response.result).toMatchObject({ + ok: true, + exit_code: 0, + }); + }); + + it("extends sandbox keepalive for each tool execution", async () => { + process.env.VERCEL_SANDBOX_KEEPALIVE_MS = "5000"; + const sandbox = makeSandbox("sbx_keepalive"); + sandboxCreateMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + await executor.execute({ + toolName: "bash", + input: { + command: "echo first", + }, + }); + await executor.execute({ + toolName: "bash", + input: { + command: "echo second", + }, + }); + + expect(sandbox.extendTimeout).toHaveBeenCalledTimes(2); + expect(sandbox.extendTimeout).toHaveBeenNthCalledWith(1, 5000); + expect(sandbox.extendTimeout).toHaveBeenNthCalledWith(2, 5000); + }); + + it("recreates cached sandboxes before reusing cached tool executors", async () => { + const stoppedSandboxError = createApiError( + 410, + "Gone", + "sandbox_stopped", + "Sandbox has stopped execution and is no longer available", + ); + const firstSandbox = makeSandbox("sbx_cached_first"); + let stopCachedSandbox = false; + firstSandbox.mkDir.mockImplementation(async (directory: string) => { + if (stopCachedSandbox && directory === SANDBOX_WORKSPACE_ROOT) { + throw stoppedSandboxError; + } + }); + firstSandbox.runCommand + .mockResolvedValueOnce({ + exitCode: 0, + stdout: async () => "first\n", + stderr: async () => "", + }) + .mockRejectedValueOnce(new Error("expired sandbox should not be reused")); + + const secondSandbox = makeSandbox("sbx_cached_second"); + secondSandbox.runCommand.mockResolvedValueOnce({ + exitCode: 0, + stdout: async () => "second\n", + stderr: async () => "", + }); + + sandboxCreateMock + .mockResolvedValueOnce(firstSandbox) + .mockResolvedValueOnce(secondSandbox); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + await executor.execute({ + toolName: "bash", + input: { + command: "echo first", + }, + }); + stopCachedSandbox = true; + + const response = await executor.execute({ + toolName: "bash", + input: { + command: "echo second", + }, + }); + + expect(response.result).toMatchObject({ + ok: true, + stdout: "second\n", + exit_code: 0, + }); + expect(sandboxCreateMock).toHaveBeenCalledTimes(2); + expect(executor.getSandboxId()).toBe("sbx_cached_second"); + }); + + it("reads virtual skill files without booting a sandbox before sandbox state exists", async () => { + const skillRoot = await fs.mkdtemp( + path.join(os.tmpdir(), "junior-skill-read-"), + ); + await fs.mkdir(path.join(skillRoot, "references")); + await fs.writeFile( + path.join(skillRoot, "references", "note.md"), + "Reference note", + "utf8", + ); + + const executor = createSandboxExecutor(); + executor.configureSkills([ + { + name: "demo-skill", + description: "Demo skill", + skillPath: skillRoot, + }, + ]); + + const response = await executor.execute({ + toolName: "readFile", + input: { + path: `${sandboxSkillDir("demo-skill")}/references/note.md`, + }, + }); + + expect(response.result).toEqual({ + content: "Reference note", + end_line: 1, + path: `${sandboxSkillDir("demo-skill")}/references/note.md`, + start_line: 1, + success: true, + total_lines: 1, + truncated: false, + }); + expect(sandboxGetMock).not.toHaveBeenCalled(); + expect(sandboxCreateMock).not.toHaveBeenCalled(); + }); + + it("falls through to sandbox when a virtual skill file is missing on the host", async () => { + const skillRoot = await fs.mkdtemp( + path.join(os.tmpdir(), "junior-skill-read-missing-"), + ); + const sandbox = makeSandbox("sbx_missing_virtual_skill_file"); + sandboxCreateMock.mockResolvedValue(sandbox); + vi.mocked(createBashTool).mockResolvedValue( + makeBashToolFacade({ + readFile: vi.fn(async () => ({ content: "from sandbox" })), + }) as never, + ); + + const executor = createSandboxExecutor(); + executor.configureSkills([ + { + name: "demo-skill", + description: "Demo skill", + skillPath: skillRoot, + }, + ]); + + const response = await executor.execute({ + toolName: "readFile", + input: { + path: `${sandboxSkillDir("demo-skill")}/references/missing.md`, + }, + }); + + expect(response.result).toEqual({ + content: "from sandbox", + end_line: 1, + path: `${sandboxSkillDir("demo-skill")}/references/missing.md`, + start_line: 1, + success: true, + total_lines: 1, + truncated: false, + }); + expect(sandboxCreateMock).toHaveBeenCalledTimes(1); + }); + + it("returns a readFile tool result when the sandbox path is missing", async () => { + const sandbox = makeSandbox("sbx_missing_read_file"); + sandboxCreateMock.mockResolvedValue(sandbox); + vi.mocked(createBashTool).mockResolvedValue( + makeBashToolFacade({ + readFile: vi.fn(async () => { + throw new Error("File not found: /vercel/sandbox/missing.ts"); + }), + }) as never, + ); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + const response = await executor.execute({ + toolName: "readFile", + input: { + path: "missing.ts", + }, + }); + + expect(response.result).toEqual({ + content: "", + error: "not_found", + path: "missing.ts", + success: false, + }); + }); + + it("throws ToolInputError when editFile targets a missing path", async () => { + const sandbox = makeSandbox("sbx_missing_edit_file"); + sandbox.fs.readFile.mockRejectedValue( + Object.assign(new Error("ENOENT: no such file or directory"), { + code: "ENOENT", + }), + ); + sandboxCreateMock.mockResolvedValue(sandbox); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + await expect( + executor.execute({ + toolName: "editFile", + input: { + path: "missing.ts", + edits: [{ oldText: "a", newText: "b" }], + }, + }), + ).rejects.toThrow("File not found: missing.ts"); + }); + + it("keeps sandbox API failures as readFile errors", async () => { + const sandbox = makeSandbox("sbx_read_file_api_error"); + sandboxCreateMock.mockResolvedValue(sandbox); + vi.mocked(createBashTool).mockResolvedValue( + makeBashToolFacade({ + readFile: vi.fn(async () => { + throw createApiError( + 410, + "Gone", + "sandbox_stopped", + "Sandbox has stopped execution and is no longer available", + ); + }), + }) as never, + ); + + const executor = createSandboxExecutor(); + executor.configureSkills([]); + + await expect( + executor.execute({ + toolName: "readFile", + input: { + path: "missing.ts", + }, + }), + ).rejects.toThrow("Status code 410 is not ok"); + }); + + it("reads virtual skill files from sandbox when a sandbox id hint exists", async () => { + const skillRoot = await fs.mkdtemp( + path.join(os.tmpdir(), "junior-skill-read-hinted-"), + ); + await fs.mkdir(path.join(skillRoot, "references")); + await fs.writeFile( + path.join(skillRoot, "references", "note.md"), + "Host note", + "utf8", + ); + const sandbox = makeSandbox("sbx_existing"); + sandboxGetMock.mockResolvedValue(sandbox); + vi.mocked(createBashTool).mockResolvedValue( + makeBashToolFacade({ + readFile: vi.fn(async () => ({ content: "Sandbox note" })), + }) as never, + ); + + const executor = createSandboxExecutor({ sandboxId: "sbx_existing" }); + executor.configureSkills([ + { + name: "demo-skill", + description: "Demo skill", + skillPath: skillRoot, + }, + ]); + + const response = await executor.execute({ + toolName: "readFile", + input: { + path: `${sandboxSkillDir("demo-skill")}/references/note.md`, + }, + }); + + expect(response.result).toEqual({ + content: "Sandbox note", + end_line: 1, + path: `${sandboxSkillDir("demo-skill")}/references/note.md`, + start_line: 1, + success: true, + total_lines: 1, + truncated: false, + }); + expect(sandboxGetMock).toHaveBeenCalledWith({ + name: "sbx_existing", + resume: true, + }); + }); +}); diff --git a/packages/junior/tests/component/sandbox/runtime-dependency-snapshot-cache.test.ts b/packages/junior/tests/component/sandbox/runtime-dependency-snapshot-cache.test.ts new file mode 100644 index 000000000..8577b2485 --- /dev/null +++ b/packages/junior/tests/component/sandbox/runtime-dependency-snapshot-cache.test.ts @@ -0,0 +1,288 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + cleanupRuntimeDependencySnapshotTest, + configureRuntimeDependencyPlugin, + getRuntimeSnapshotCacheEntry, + holdRuntimeSnapshotLock, + makeRuntimeDependencySandbox, + releaseRuntimeSnapshotLock, + resolveRuntimeDependencySnapshot, + sandboxCreateMock, + setRuntimeSnapshotCacheEntry, + setupRuntimeDependencySnapshotTest, +} from "../../fixtures/runtime-dependency-snapshots"; +import { mockTestClock } from "../../fixtures/vitest"; + +describe("runtime dependency snapshot cache", () => { + beforeEach(setupRuntimeDependencySnapshotTest); + afterEach(cleanupRuntimeDependencySnapshotTest); + + it("rebuilds stale snapshots for floating dependency selectors", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "npm", package: "sentry", version: "latest" }], + }); + sandboxCreateMock + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_1")) + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_2")); + + const first = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(first.snapshotId).toBe("snap_1"); + expect(first.cacheHit).toBe(false); + expect(first.resolveOutcome).toBe("rebuilt"); + expect(first.rebuildReason).toBe("cache_miss"); + + mockTestClock("2026-03-10T00:00:00.000Z"); + + const second = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(second.snapshotId).toBe("snap_2"); + expect(second.cacheHit).toBe(false); + expect(second.resolveOutcome).toBe("rebuilt"); + expect(second.rebuildReason).toBe("floating_stale"); + expect(sandboxCreateMock).toHaveBeenCalledTimes(2); + }); + + it("rebuilds stale snapshots for postinstall-only profiles", async () => { + configureRuntimeDependencyPlugin({ + postinstall: [{ cmd: "agent-browser", args: ["install"] }], + }); + sandboxCreateMock + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_post_1")) + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_post_2")); + + const first = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(first.snapshotId).toBe("snap_post_1"); + expect(first.cacheHit).toBe(false); + expect(first.resolveOutcome).toBe("rebuilt"); + expect(first.rebuildReason).toBe("cache_miss"); + + mockTestClock("2026-03-10T00:00:00.000Z"); + + const second = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(second.snapshotId).toBe("snap_post_2"); + expect(second.cacheHit).toBe(false); + expect(second.resolveOutcome).toBe("rebuilt"); + expect(second.rebuildReason).toBe("floating_stale"); + expect(sandboxCreateMock).toHaveBeenCalledTimes(2); + }); + + it("rebuilds when rebuild epoch changes", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "npm", package: "sentry", version: "latest" }], + }); + sandboxCreateMock + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_epoch_a")) + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_epoch_b")); + + process.env.SANDBOX_SNAPSHOT_REBUILD_EPOCH = "epoch-a"; + const first = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(first.snapshotId).toBe("snap_epoch_a"); + expect(first.cacheHit).toBe(false); + expect(first.resolveOutcome).toBe("rebuilt"); + + process.env.SANDBOX_SNAPSHOT_REBUILD_EPOCH = "epoch-b"; + const second = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(second.snapshotId).toBe("snap_epoch_b"); + expect(second.cacheHit).toBe(false); + expect(second.resolveOutcome).toBe("rebuilt"); + expect(sandboxCreateMock).toHaveBeenCalledTimes(2); + }); + + it("reuses cached rebuilt snapshot during force rebuild when stale id differs", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "npm", package: "sentry", version: "latest" }], + }); + sandboxCreateMock.mockResolvedValueOnce( + makeRuntimeDependencySandbox("snap_new"), + ); + + const first = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(first.snapshotId).toBe("snap_new"); + expect(first.cacheHit).toBe(false); + expect(first.resolveOutcome).toBe("rebuilt"); + + const forced = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + forceRebuild: true, + staleSnapshotId: "snap_old", + }); + expect(forced.snapshotId).toBe("snap_new"); + expect(forced.cacheHit).toBe(true); + expect(forced.resolveOutcome).toBe("cache_hit"); + expect(forced.rebuildReason).toBe("snapshot_missing"); + expect(sandboxCreateMock).toHaveBeenCalledTimes(1); + }); + + it("does not return stale cached snapshot while waiting on force rebuild lock", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "npm", package: "sentry", version: "latest" }], + }); + sandboxCreateMock + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_old")) + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_new")); + + const first = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(first.snapshotId).toBe("snap_old"); + expect(first.cacheHit).toBe(false); + expect(first.resolveOutcome).toBe("rebuilt"); + if (!first.profileHash) { + throw new Error("Expected snapshot profile hash"); + } + + await holdRuntimeSnapshotLock(first.profileHash); + let notifyWaitingForLock!: () => void; + const waitingForLock = new Promise((resolve) => { + notifyWaitingForLock = resolve; + }); + const secondPromise = resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + forceRebuild: true, + staleSnapshotId: "snap_old", + onProgress: (phase) => { + if (phase === "waiting_for_lock") { + notifyWaitingForLock(); + } + }, + }); + await waitingForLock; + await releaseRuntimeSnapshotLock(); + await vi.advanceTimersByTimeAsync(500); + + const second = await secondPromise; + expect(second.snapshotId).toBe("snap_new"); + expect(second.cacheHit).toBe(false); + expect(second.resolveOutcome).toBe("forced_rebuild"); + expect(second.rebuildReason).toBe("snapshot_missing"); + expect(sandboxCreateMock).toHaveBeenCalledTimes(2); + }); + + it("rebuilds when forceRebuild is true without stale snapshot id", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "npm", package: "sentry", version: "latest" }], + }); + sandboxCreateMock + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_initial")) + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_forced")); + + const first = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(first.snapshotId).toBe("snap_initial"); + expect(first.cacheHit).toBe(false); + expect(first.resolveOutcome).toBe("rebuilt"); + + const forced = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + forceRebuild: true, + }); + expect(forced.snapshotId).toBe("snap_forced"); + expect(forced.cacheHit).toBe(false); + expect(forced.resolveOutcome).toBe("forced_rebuild"); + expect(forced.rebuildReason).toBe("force_rebuild"); + expect(sandboxCreateMock).toHaveBeenCalledTimes(2); + }); + + it("reuses a concurrent rebuilt snapshot while waiting on force rebuild lock without stale id", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "npm", package: "sentry", version: "latest" }], + }); + sandboxCreateMock + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_initial")) + .mockResolvedValueOnce(makeRuntimeDependencySandbox("snap_forced")); + + const first = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(first.snapshotId).toBe("snap_initial"); + expect(first.cacheHit).toBe(false); + expect(first.resolveOutcome).toBe("rebuilt"); + if (!first.profileHash) { + throw new Error("Expected snapshot profile hash"); + } + + const cacheValue = await getRuntimeSnapshotCacheEntry(first.profileHash); + if (!cacheValue) { + throw new Error("Expected cached snapshot entry"); + } + const initialCached = JSON.parse(cacheValue) as { + profileHash: string; + snapshotId: string; + runtime: string; + createdAtMs: number; + dependencyCount: number; + }; + + await holdRuntimeSnapshotLock(first.profileHash); + setTimeout(() => { + void setRuntimeSnapshotCacheEntry( + first.profileHash!, + JSON.stringify({ + ...initialCached, + snapshotId: "snap_from_other_worker", + createdAtMs: Date.now(), + }), + ); + }, 100); + setTimeout(() => { + void releaseRuntimeSnapshotLock(); + }, 1_100); + + const concurrent = resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + forceRebuild: true, + }); + + await vi.advanceTimersByTimeAsync(2_000); + const snapshot = await concurrent; + expect(snapshot.snapshotId).toBe("snap_from_other_worker"); + expect(snapshot.cacheHit).toBe(true); + expect(snapshot.resolveOutcome).toBe("cache_hit_after_lock_wait"); + expect(snapshot.rebuildReason).toBe("force_rebuild"); + expect(sandboxCreateMock).toHaveBeenCalledTimes(1); + }); + + it("returns no_profile metadata when runtime dependency profile is empty", async () => { + configureRuntimeDependencyPlugin({}); + + const snapshot = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + + expect(snapshot).toMatchObject({ + dependencyCount: 0, + cacheHit: false, + resolveOutcome: "no_profile", + }); + expect(sandboxCreateMock).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/junior/tests/component/sandbox/runtime-dependency-snapshot-install.test.ts b/packages/junior/tests/component/sandbox/runtime-dependency-snapshot-install.test.ts new file mode 100644 index 000000000..8e051eb6d --- /dev/null +++ b/packages/junior/tests/component/sandbox/runtime-dependency-snapshot-install.test.ts @@ -0,0 +1,213 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + cleanupRuntimeDependencySnapshotTest, + configureRuntimeDependencyPlugin, + getRuntimeDependencyScript, + makeRuntimeDependencySandbox, + resolveRuntimeDependencySnapshot, + sandboxCreateMock, + setupRuntimeDependencySnapshotTest, +} from "../../fixtures/runtime-dependency-snapshots"; + +describe("runtime dependency snapshot install", () => { + beforeEach(setupRuntimeDependencySnapshotTest); + afterEach(cleanupRuntimeDependencySnapshotTest); + + it("stops the build sandbox after snapshot creation succeeds", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "npm", package: "sentry", version: "latest" }], + }); + const sandbox = makeRuntimeDependencySandbox("snap_stopped"); + sandboxCreateMock.mockResolvedValueOnce(sandbox); + + const snapshot = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(snapshot.snapshotId).toBe("snap_stopped"); + expect(sandbox.stop).toHaveBeenCalledTimes(1); + }); + + it("passes token-based Vercel Sandbox credentials to snapshot builds", async () => { + process.env.VERCEL_TOKEN = "sandbox-token"; + process.env.VERCEL_TEAM_ID = "team_123"; + process.env.VERCEL_PROJECT_ID = "prj_123"; + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "npm", package: "sentry", version: "1.0.0" }], + }); + const sandbox = makeRuntimeDependencySandbox("snap_creds"); + sandboxCreateMock.mockResolvedValueOnce(sandbox); + + const snapshot = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + + expect(snapshot.snapshotId).toBe("snap_creds"); + expect(sandboxCreateMock).toHaveBeenCalledWith({ + timeout: 60_000, + runtime: "node22", + token: "sandbox-token", + teamId: "team_123", + projectId: "prj_123", + }); + }); + + it("installs system dependencies via dnf", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "system", package: "gh" }], + }); + const sandbox = makeRuntimeDependencySandbox("snap_system"); + sandboxCreateMock.mockResolvedValueOnce(sandbox); + + const snapshot = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(snapshot.snapshotId).toBe("snap_system"); + const invocation = sandbox.runCommand.mock.calls[0]?.[0]; + expect(invocation).toMatchObject({ cmd: "bash", sudo: true }); + expect(getRuntimeDependencyScript(invocation)).toContain("exec { + configureRuntimeDependencyPlugin({ + dependencies: [ + { + type: "system", + url: "https://example.com/tool.rpm", + sha256: + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + }, + ], + }); + const sandbox = makeRuntimeDependencySandbox( + "snap_system_url", + async (params) => { + if (getRuntimeDependencyScript(params).includes("'sha256sum'")) { + return { + exitCode: 0, + stdout: async () => + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa /tmp/junior-runtime-dep.rpm", + stderr: async () => "", + }; + } + return { exitCode: 0, stdout: async () => "", stderr: async () => "" }; + }, + ); + sandboxCreateMock.mockResolvedValueOnce(sandbox); + + const snapshot = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(snapshot.snapshotId).toBe("snap_system_url"); + const scripts = sandbox.runCommand.mock.calls.map((call) => + getRuntimeDependencyScript(call[0]), + ); + expect(scripts).toEqual( + expect.arrayContaining([ + expect.stringContaining( + "'curl' '-fsSL' 'https://example.com/tool.rpm' '-o' '/tmp/junior-runtime-aaaaaaaaaaaa-tool.rpm'", + ), + expect.stringContaining( + "'sha256sum' '/tmp/junior-runtime-aaaaaaaaaaaa-tool.rpm'", + ), + expect.stringContaining( + "'dnf' 'install' '-y' '/tmp/junior-runtime-aaaaaaaaaaaa-tool.rpm'", + ), + ]), + ); + }); + + it("falls back to gh-cli repo bootstrap when dnf cannot resolve gh directly", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [{ type: "system", package: "gh" }], + }); + const sandbox = makeRuntimeDependencySandbox( + "snap_system_fallback", + async (params) => { + const script = getRuntimeDependencyScript(params); + if (!script.includes("'dnf'")) { + return { + exitCode: 1, + stdout: async () => "", + stderr: async () => "unsupported command", + }; + } + + if ( + script.includes("'dnf' 'install' '-y' 'gh'") && + !script.includes("'--repo' 'gh-cli'") + ) { + return { + exitCode: 1, + stdout: async () => "", + stderr: async () => "Unable to find a match: gh", + }; + } + + return { exitCode: 0, stdout: async () => "", stderr: async () => "" }; + }, + ); + sandboxCreateMock.mockResolvedValueOnce(sandbox); + + const snapshot = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(snapshot.snapshotId).toBe("snap_system_fallback"); + const scripts = sandbox.runCommand.mock.calls.map((call) => + getRuntimeDependencyScript(call[0]), + ); + expect(scripts).toEqual( + expect.arrayContaining([ + expect.stringContaining("'dnf' 'install' '-y' 'gh'"), + expect.stringContaining( + "'dnf' 'config-manager' 'addrepo' '--from-repofile=https://cli.github.com/packages/rpm/gh-cli.repo'", + ), + expect.stringContaining("'dnf' 'install' '-y' 'gh' '--repo' 'gh-cli'"), + ]), + ); + }); + + it("runs runtime-postinstall commands after dependency install", async () => { + configureRuntimeDependencyPlugin({ + dependencies: [ + { type: "npm", package: "example-cli", version: "latest" }, + ], + postinstall: [{ cmd: "example-cli", args: ["install"] }], + }); + const sandbox = makeRuntimeDependencySandbox("snap_postinstall"); + sandboxCreateMock.mockResolvedValueOnce(sandbox); + + const snapshot = await resolveRuntimeDependencySnapshot({ + runtime: "node22", + timeoutMs: 60_000, + }); + expect(snapshot.snapshotId).toBe("snap_postinstall"); + const npmInvocation = sandbox.runCommand.mock.calls[0]?.[0]; + expect(npmInvocation).toMatchObject({ + cmd: "bash", + }); + expect(npmInvocation.args?.[1]).toContain("exec { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +function createTask(overrides: Partial = {}): ScheduledTask { + return { + id: "sched_valid", + createdAtMs: Date.parse("2026-05-25T16:00:00.000Z"), + createdBy: { slackUserId: "U123" }, + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + executionActor: { + type: "system", + id: "scheduled-task", + }, + nextRunAtMs: Date.parse("2026-05-25T16:00:00.000Z"), + schedule: { + description: "Every Monday at 9am", + kind: "one_off", + timezone: "America/Los_Angeles", + }, + status: "active", + task: { + text: "Summarize open scheduler issues.", + }, + updatedAtMs: Date.parse("2026-05-25T16:00:00.000Z"), + version: 1, + ...overrides, + }; +} + +describe("scheduler store routing", () => { + beforeEach(async () => { + await disconnectStateAdapter(); + }); + + afterEach(async () => { + await disconnectStateAdapter(); + }); + + it("rejects invalid scheduled task routing context", async () => { + const store = createSchedulerStore(createPluginState("scheduler")); + + await expect( + store.saveTask( + createTask({ + id: "sched_bad_destination", + destination: { + platform: "slack", + teamId: "D_BAD_TEAM", + channelId: "D123", + }, + }), + ), + ).rejects.toThrow("Scheduled task routing context is invalid."); + await expect(store.getTask("sched_bad_destination")).resolves.toBe( + undefined, + ); + + await expect( + store.saveTask( + createTask({ + id: "sched_bad_credential_subject", + destination: { + platform: "slack", + teamId: "T123", + channelId: "D123", + }, + credentialSubject: { + type: "user", + userId: "U123", + allowedWhen: "private-direct-conversation", + binding: { + type: "slack-direct-conversation", + teamId: "T123", + channelId: "D123", + signature: "v1=test", + }, + } as ScheduledTask["credentialSubject"], + }), + ), + ).rejects.toThrow("Scheduled task routing context is invalid."); + await expect(store.getTask("sched_bad_credential_subject")).resolves.toBe( + undefined, + ); + }); +}); diff --git a/packages/junior/tests/fixtures/assistant-reply.ts b/packages/junior/tests/fixtures/assistant-reply.ts new file mode 100644 index 000000000..6803932db --- /dev/null +++ b/packages/junior/tests/fixtures/assistant-reply.ts @@ -0,0 +1,29 @@ +import type { AssistantReply } from "@/chat/respond"; + +type AssistantReplyOverrides = Partial< + Omit +> & { + diagnostics?: Partial; +}; + +/** Build a fully shaped successful assistant reply for deterministic runtime tests. */ +export function successfulAssistantReply( + text: string, + overrides: AssistantReplyOverrides = {}, +): AssistantReply { + const { diagnostics, ...replyOverrides } = overrides; + return { + text, + ...replyOverrides, + diagnostics: { + assistantMessageCount: 1, + modelId: "fake-agent-model", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + ...diagnostics, + }, + }; +} diff --git a/packages/junior/tests/fixtures/chat-runtime.ts b/packages/junior/tests/fixtures/chat-runtime.ts index dd64dea24..f71e7d0a1 100644 --- a/packages/junior/tests/fixtures/chat-runtime.ts +++ b/packages/junior/tests/fixtures/chat-runtime.ts @@ -1,26 +1,63 @@ +import type { Message, Thread } from "chat"; import type { SlackAdapter } from "@chat-adapter/slack"; -import { - createSlackRuntime, - type CreateSlackRuntimeOptions, -} from "@/chat/app/factory"; -import type { JuniorRuntimeServiceOverrides } from "@/chat/app/services"; -import { FakeSlackAdapter } from "./slack-harness"; +import { createSlackRuntime } from "@/chat/app/factory"; +import type { JuniorRuntimeScenarioAdapters } from "@/chat/app/services"; +import type { SlackTurnOptions } from "@/chat/runtime/slack-runtime"; +import { createTestDestination, FakeSlackAdapter } from "./slack/harness"; +type TestSlackTurnOptions = Omit & { + destination?: SlackTurnOptions["destination"]; +}; + +function withDefaultDestination( + thread: Thread, + hooks: TestSlackTurnOptions = {}, +): SlackTurnOptions { + return { + ...hooks, + destination: hooks.destination ?? createTestDestination(thread), + }; +} + +/** Create a local Slack runtime that uses fake Slack transport and real runtime wiring. */ export function createTestChatRuntime( args: { - now?: CreateSlackRuntimeOptions["now"]; - services?: JuniorRuntimeServiceOverrides; + adapters?: JuniorRuntimeScenarioAdapters; slackAdapter?: FakeSlackAdapter; } = {}, ) { const slackAdapter = args.slackAdapter ?? new FakeSlackAdapter(); + const runtime = createSlackRuntime({ + adapters: args.adapters, + getSlackAdapter: () => slackAdapter as unknown as SlackAdapter, + }); return { slackAdapter, - slackRuntime: createSlackRuntime({ - getSlackAdapter: () => slackAdapter as unknown as SlackAdapter, - now: args.now, - services: args.services, - }), + slackRuntime: { + ...runtime, + handleNewMention( + thread: Thread, + message: Message, + hooks?: TestSlackTurnOptions, + ) { + return runtime.handleNewMention( + thread, + message, + withDefaultDestination(thread, hooks), + ); + }, + handleSubscribedMessage( + thread: Thread, + message: Message, + hooks?: TestSlackTurnOptions, + ) { + return runtime.handleSubscribedMessage( + thread, + message, + withDefaultDestination(thread, hooks), + ); + }, + }, }; } diff --git a/packages/junior/tests/fixtures/check-cli.ts b/packages/junior/tests/fixtures/check-cli.ts new file mode 100644 index 000000000..63655e862 --- /dev/null +++ b/packages/junior/tests/fixtures/check-cli.ts @@ -0,0 +1,90 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { expect } from "vitest"; +import { runCheck } from "@/cli/check"; + +const tempRoots: string[] = []; + +function checkLogger(lines: string[]) { + return { + info: (line: string) => lines.push(line), + warn: (line: string) => lines.push(line), + error: (line: string) => lines.push(line), + }; +} + +/** Create a temporary repository root for CLI check tests. */ +export function makeTempDir(prefix: string): string { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), prefix)); + tempRoots.push(dir); + return dir; +} + +/** Remove all temporary repository roots created by CLI check tests. */ +export function cleanupCheckCliTempRoots(): void { + for (const root of tempRoots.splice(0)) { + fs.rmSync(root, { recursive: true, force: true }); + } +} + +/** Ensure a directory exists inside a CLI check fixture repository. */ +export function mkdir(targetPath: string): void { + fs.mkdirSync(targetPath, { recursive: true }); +} + +/** Write a fixture file, creating parent directories as needed. */ +export function writeFile(targetPath: string, contents: string): void { + fs.mkdirSync(path.dirname(targetPath), { recursive: true }); + fs.writeFileSync(targetPath, fixtureContents(targetPath, contents), "utf8"); +} + +function fixtureContents(targetPath: string, contents: string): string { + if (path.basename(targetPath) !== "plugin.yaml") { + return contents; + } + const lines = contents.split("\n"); + if (lines.some((line) => line.startsWith("display-name:"))) { + return contents; + } + const nameIndex = lines.findIndex((line) => line.startsWith("name:")); + if (nameIndex === -1) { + return contents; + } + const name = lines[nameIndex]!.slice("name:".length).trim(); + const displayName = name + .split(/[-_\s]+/) + .filter(Boolean) + .map((part) => `${part[0]!.toUpperCase()}${part.slice(1)}`) + .join(" "); + lines.splice(nameIndex + 1, 0, `display-name: ${displayName}`); + return lines.join("\n"); +} + +/** Write the required Junior app markdown files into a fixture repository. */ +export function writeAppFiles(repoRoot: string): void { + const appDir = path.join(repoRoot, "app"); + fs.mkdirSync(appDir, { recursive: true }); + writeFile(path.join(appDir, "SOUL.md"), "soul"); + writeFile(path.join(appDir, "WORLD.md"), "world"); + writeFile(path.join(appDir, "DESCRIPTION.md"), "description"); +} + +/** Run the check command and return captured logger lines. */ +export async function runCheckAndCollect(repoRoot: string): Promise { + const lines: string[] = []; + await runCheck(repoRoot, checkLogger(lines)); + return lines; +} + +/** Assert the check command fails and return captured logger lines. */ +export async function expectCheckFailure( + repoRoot: string, + expectedMessage: string, +): Promise { + const lines: string[] = []; + await expect(runCheck(repoRoot, checkLogger(lines))).rejects.toThrow( + expectedMessage, + ); + return lines; +} diff --git a/packages/junior/tests/fixtures/heartbeat.ts b/packages/junior/tests/fixtures/heartbeat.ts new file mode 100644 index 000000000..419098745 --- /dev/null +++ b/packages/junior/tests/fixtures/heartbeat.ts @@ -0,0 +1,176 @@ +import { vi } from "vitest"; +import { + createSchedulerStore, + type ScheduledTask, +} from "@sentry/junior-scheduler"; +import { createPluginState } from "@/chat/plugins/state"; +import { createSlackDirectCredentialSubject } from "@/chat/credentials/subject"; +import { persistThreadStateById } from "@/chat/runtime/thread-state"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { setAgentPlugins } from "@/chat/plugins/agent-hooks"; +import { mockTestClock } from "./vitest"; + +export const TEST_NOW_MS = Date.parse("2026-05-26T12:05:00.000Z"); +export const TEST_RUN_AT_MS = Date.parse("2026-05-26T12:00:00.000Z"); + +/** Reset shared heartbeat dependencies before each integration case. */ +export async function setupHeartbeatTestEnv(): Promise { + mockTestClock(TEST_NOW_MS); + process.env.JUNIOR_SCHEDULER_SECRET = "heartbeat-secret"; + process.env.JUNIOR_BASE_URL = "https://junior.example.com"; + process.env.JUNIOR_SECRET = "dispatch-secret"; + delete process.env.CRON_SECRET; + setAgentPlugins([]); + await disconnectStateAdapter(); +} + +/** Restore heartbeat test globals that route and plugin tests mutate. */ +export async function resetHeartbeatTestEnv( + originalFetch: typeof fetch, +): Promise { + global.fetch = originalFetch; + setAgentPlugins([]); + await disconnectStateAdapter(); + delete process.env.JUNIOR_SCHEDULER_SECRET; + delete process.env.CRON_SECRET; + delete process.env.JUNIOR_BASE_URL; + delete process.env.JUNIOR_SECRET; + vi.restoreAllMocks(); + vi.useRealTimers(); +} + +/** Build an authenticated internal heartbeat request. */ +export function heartbeatRequest(): Request { + return new Request("https://example.invalid/api/internal/heartbeat", { + headers: { authorization: "Bearer heartbeat-secret" }, + }); +} + +/** Build the scheduler store through the plugin state boundary. */ +export function schedulerStore() { + return createSchedulerStore(createPluginState("scheduler")); +} + +/** Build a one-off scheduler task with stable clock values. */ +export function createTask( + overrides: Partial = {}, +): ScheduledTask { + const nextRunAtMs = TEST_RUN_AT_MS; + return { + id: "sched_plugin_1", + createdAtMs: nextRunAtMs, + createdBy: { slackUserId: "U123" }, + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + nextRunAtMs, + schedule: { + description: "Once at noon", + kind: "one_off", + timezone: "UTC", + }, + status: "active", + task: { + text: "Post a digest. Summarize the latest state.", + }, + updatedAtMs: nextRunAtMs, + version: 1, + ...overrides, + }; +} + +/** Build a daily scheduler task that is stale relative to the heartbeat clock. */ +export function createDailyTask( + overrides: Partial = {}, +): ScheduledTask { + const nextRunAtMs = Date.parse("2026-05-24T12:00:00.000Z"); + return createTask({ + id: "sched_plugin_daily", + createdAtMs: nextRunAtMs, + nextRunAtMs, + schedule: { + description: "Daily at noon UTC", + kind: "recurring", + timezone: "UTC", + recurrence: { + frequency: "daily", + interval: 1, + startDate: "2026-05-24", + time: { + hour: 12, + minute: 0, + }, + }, + }, + updatedAtMs: nextRunAtMs, + ...overrides, + }); +} + +/** Capture dispatch callback requests while preserving mocked Slack API traffic. */ +export function mockDispatchCallbackFetch(originalFetch: typeof fetch) { + const fetchMock = vi.fn(async (...args: Parameters) => { + const input = args[0]; + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.href + : input.url; + if (url.startsWith("https://slack.com/api/")) { + return await originalFetch(...args); + } + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + return fetchMock; +} + +/** Create a valid direct Slack credential subject for dispatch tests. */ +export function createCredentialSubject( + input: { + channelId?: string; + teamId?: string; + userId?: string; + } = {}, +) { + const subject = createSlackDirectCredentialSubject({ + channelId: input.channelId ?? "D123", + teamId: input.teamId ?? "T123", + userId: input.userId ?? "U123", + }); + if (!subject) { + throw new Error("Expected test credential subject to be created"); + } + return subject; +} + +/** Persist only the active turn marker needed by heartbeat resume recovery. */ +export async function persistActiveTurn( + conversationId: string, + activeTurnId?: string, +): Promise { + await persistThreadStateById(conversationId, { + conversation: { + schemaVersion: 1, + backfill: {}, + compactions: [], + messages: [], + piMessages: [], + processing: { + activeTurnId, + }, + stats: { + compactedMessageCount: 0, + estimatedContextTokens: 0, + totalMessageCount: 0, + updatedAtMs: TEST_NOW_MS, + }, + vision: { + byFileId: {}, + }, + }, + }); +} diff --git a/packages/junior/tests/fixtures/mcp-oauth-callback-harness.ts b/packages/junior/tests/fixtures/mcp-oauth-callback-harness.ts deleted file mode 100644 index b3c2a3d96..000000000 --- a/packages/junior/tests/fixtures/mcp-oauth-callback-harness.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { - waitUntilCallbacks, - testWaitUntil, -} from "./oauth-callback-after-harness"; - -export async function runMcpOauthCallbackRoute(args: { - provider: string; - state: string; - code: string; -}) { - waitUntilCallbacks.length = 0; - const { GET } = await import("@/handlers/mcp-oauth-callback"); - const response = await GET( - new Request( - `https://junior.example.com/api/oauth/callback/mcp/${args.provider}?state=${encodeURIComponent(args.state)}&code=${encodeURIComponent(args.code)}`, - { method: "GET" }, - ), - args.provider, - testWaitUntil, - ); - const callbacks = waitUntilCallbacks.splice(0, waitUntilCallbacks.length); - for (const callback of callbacks) { - await callback(); - } - if (response.status === 200 && callbacks.length === 0) { - throw new Error( - `MCP OAuth callback route returned 200 without registering waitUntil() work for provider "${args.provider}"`, - ); - } - return response; -} diff --git a/packages/junior/tests/fixtures/mcp/auth-runtime-slack.ts b/packages/junior/tests/fixtures/mcp/auth-runtime-slack.ts new file mode 100644 index 000000000..949195c79 --- /dev/null +++ b/packages/junior/tests/fixtures/mcp/auth-runtime-slack.ts @@ -0,0 +1,338 @@ +import path from "node:path"; +import { expect, vi } from "vitest"; +import type { StreamFn } from "@earendil-works/pi-agent-core"; +import type { ResumeReplyGenerator } from "@/chat/runtime/slack-resume"; +import type { TurnThinkingSelection } from "@/chat/services/turn-thinking-level"; +import { + EVAL_MCP_AUTH_CODE, + EVAL_MCP_AUTH_PROVIDER, +} from "../../msw/handlers/eval-mcp-auth"; +import { + getCapturedSlackApiCalls, + resetSlackApiMockState, +} from "../../msw/handlers/slack-api"; +import { type TestThread } from "../slack/harness"; +import { createPluginAppFixture, type PluginAppFixture } from "../plugin-app"; +import { piTextResponse, piToolCallResponse } from "../pi-stream"; +import { + makeTestReplyContext, + type TestReplyRequestContext, +} from "../reply-context"; + +export const MCP_TOOL_NAME = "mcp__eval-auth__budget-echo"; +export const SKILL_NAME = "eval-auth"; +export const assistantReplyWithContext = + "The budget deadline you mentioned earlier was Friday."; +export const priorBudgetContext = "You need the budget by Friday."; + +const assistantReplyWithoutContext = "I need the earlier budget context first."; +const testThinkingSelection: TurnThinkingSelection = { + thinkingLevel: "medium", + reason: "test_default", +}; +const ORIGINAL_ENV = { ...process.env }; +const EVAL_MCP_PLUGIN_ROOT = path.resolve( + import.meta.dirname, + "../plugins/eval-auth", +); + +type ChatRuntimeModule = typeof import("../chat-runtime"); +type McpAuthStoreModule = typeof import("@/chat/mcp/auth-store"); +type McpOauthCallbackHarnessModule = typeof import("./oauth-callback-harness"); +type RespondModule = typeof import("@/chat/respond"); +type StateAdapterModule = typeof import("@/chat/state/adapter"); +type ThreadStateModule = typeof import("@/chat/runtime/thread-state"); +type TurnSessionStoreModule = typeof import("@/chat/state/turn-session"); + +type McpAuthAgentProbe = { + directProviderSearch: boolean; + searchToolNames: string[][]; +}; + +function extractTextContent(message: unknown): string { + if (!message || typeof message !== "object") { + return ""; + } + + const content = (message as { content?: unknown }).content; + if (!Array.isArray(content)) { + return ""; + } + + return content + .map((part) => { + if (!part || typeof part !== "object") { + return ""; + } + const candidate = part as { type?: unknown; text?: unknown }; + return candidate.type === "text" && typeof candidate.text === "string" + ? candidate.text + : ""; + }) + .join("\n"); +} + +function hasPriorBudgetContext(messages: unknown[]): boolean { + return messages.some((message) => + extractTextContent(message).includes(priorBudgetContext), + ); +} + +function hasCompletedMcpAuthorization(messages: unknown[]): boolean { + return messages.some((message) => + extractTextContent(message).includes( + `MCP authorization completed for provider "${EVAL_MCP_AUTH_PROVIDER}"`, + ), + ); +} + +function extractSearchToolNames(messages: unknown[]): string[] | undefined { + for (let index = messages.length - 1; index >= 0; index -= 1) { + const message = messages[index]; + if (!message || typeof message !== "object") { + continue; + } + + const candidate = message as { + details?: unknown; + role?: unknown; + toolName?: unknown; + }; + if ( + candidate.role !== "toolResult" || + candidate.toolName !== "searchMcpTools" || + !candidate.details || + typeof candidate.details !== "object" + ) { + continue; + } + + const tools = (candidate.details as { tools?: unknown }).tools; + if (!Array.isArray(tools)) { + return []; + } + return tools + .map((tool) => + tool && typeof tool === "object" + ? (tool as { tool_name?: unknown }).tool_name + : undefined, + ) + .filter((toolName): toolName is string => typeof toolName === "string"); + } + + return undefined; +} + +function recordSearchToolNames( + agentProbe: McpAuthAgentProbe, + messages: unknown[], +): void { + const toolNames = extractSearchToolNames(messages); + if (!toolNames) { + return; + } + + const previous = agentProbe.searchToolNames.at(-1); + if (previous && previous.join("\0") === toolNames.join("\0")) { + return; + } + + agentProbe.searchToolNames.push(toolNames); +} + +function createAgentProbe(): McpAuthAgentProbe { + return { + directProviderSearch: false, + searchToolNames: [], + }; +} + +function createMcpAuthStreamFn(agentProbe: McpAuthAgentProbe): StreamFn { + let initialPromptStarted = false; + let resumeStep = 0; + + return async (_model, context) => { + const messages = context.messages ?? []; + const authorizationCompleted = hasCompletedMcpAuthorization(messages); + + if (authorizationCompleted && resumeStep > 0) { + recordSearchToolNames(agentProbe, messages); + } + + if (!initialPromptStarted) { + initialPromptStarted = true; + if (agentProbe.directProviderSearch) { + return piToolCallResponse({ + id: "tool-search-provider", + name: "searchMcpTools", + parameters: { + provider: EVAL_MCP_AUTH_PROVIDER, + query: "budget echo query", + }, + }); + } + + return piToolCallResponse({ + id: "tool-load-skill", + name: "loadSkill", + parameters: { skill_name: SKILL_NAME }, + }); + } + + if (!authorizationCompleted) { + return piTextResponse("Authorization pending."); + } + + if (resumeStep === 0) { + resumeStep += 1; + return piToolCallResponse({ + id: "tool-search-resume", + name: "searchMcpTools", + parameters: { + provider: EVAL_MCP_AUTH_PROVIDER, + query: "budget echo query", + }, + }); + } + + if (resumeStep === 1) { + resumeStep += 1; + return piToolCallResponse({ + id: "tool-call-continue", + name: "callMcpTool", + parameters: { + tool_name: MCP_TOOL_NAME, + arguments: { query: "what did i say about the budget?" }, + }, + }); + } + + return piTextResponse( + hasPriorBudgetContext(context.messages ?? []) + ? assistantReplyWithContext + : assistantReplyWithoutContext, + ); + }; +} + +/** Starts the Slack runtime fixture for MCP auth parking and resume tests. */ +export async function createMcpAuthRuntimeSlackFixture() { + const agentProbe = createAgentProbe(); + resetSlackApiMockState(); + process.env = { + ...ORIGINAL_ENV, + JUNIOR_BASE_URL: "https://junior.example.com", + JUNIOR_STATE_ADAPTER: "memory", + SLACK_BOT_TOKEN: "xoxb-test-token", + }; + let pluginApp: PluginAppFixture | undefined = await createPluginAppFixture([ + EVAL_MCP_PLUGIN_ROOT, + ]); + + vi.resetModules(); + const chatRuntime: ChatRuntimeModule = await import("../chat-runtime"); + const mcpAuthStore: McpAuthStoreModule = + await import("@/chat/mcp/auth-store"); + const mcpOauthCallbackHarness: McpOauthCallbackHarnessModule = + await import("./oauth-callback-harness"); + const respond: RespondModule = await import("@/chat/respond"); + const stateAdapter: StateAdapterModule = await import("@/chat/state/adapter"); + const threadState: ThreadStateModule = + await import("@/chat/runtime/thread-state"); + const turnSessionStore: TurnSessionStoreModule = + await import("@/chat/state/turn-session"); + + await stateAdapter.disconnectStateAdapter(); + await stateAdapter.getStateAdapter().connect(); + + return { + agentProbe, + chatRuntime, + mcpAuthStore, + stateAdapter, + threadState, + turnSessionStore, + + /** Creates a deterministic MCP-auth reply generator for this fixture. */ + createMcpAuthReplyGenerator(): ResumeReplyGenerator { + const streamFn = createMcpAuthStreamFn(agentProbe); + return (messageText: string, context: TestReplyRequestContext = {}) => + respond.generateAssistantReply( + messageText, + makeTestReplyContext({ + ...context, + harness: { + ...context.harness, + streamFn, + turnThinkingSelection: testThinkingSelection, + }, + }), + ); + }, + + /** Mirrors fixture thread writes into the memory adapter used by callbacks. */ + async mirrorThreadStateToAdapter(thread: TestThread): Promise { + const originalSetState = thread.setState.bind(thread); + thread.setState = async (next, options) => { + await originalSetState(next, options); + await stateAdapter + .getStateAdapter() + .set(`thread-state:${thread.id}`, thread.getState()); + }; + + await stateAdapter + .getStateAdapter() + .set(`thread-state:${thread.id}`, thread.getState()); + }, + + /** Completes the parked MCP OAuth flow through the callback route. */ + async runMcpOauthCallback(args: { + state: string; + generateReply: ResumeReplyGenerator; + }) { + return await mcpOauthCallbackHarness.runMcpOauthCallbackRoute({ + provider: EVAL_MCP_AUTH_PROVIDER, + state: args.state, + code: EVAL_MCP_AUTH_CODE, + generateReply: args.generateReply, + }); + }, + + /** Disconnects memory state, plugin fixtures, and test environment. */ + async cleanup() { + await stateAdapter.disconnectStateAdapter(); + await pluginApp?.cleanup(); + pluginApp = undefined; + process.env = { ...ORIGINAL_ENV }; + }, + }; +} + +/** Asserts Slack processing reaction add/remove lifecycles for a message. */ +export function expectProcessingReactionLifecycles(args: { + channel: string; + completedCount?: number; + count: number; + timestamp: string; +}): void { + const call = (name: string) => + expect.objectContaining({ + params: expect.objectContaining({ + channel: args.channel, + timestamp: args.timestamp, + name, + }), + }); + const eyes = Array.from({ length: args.count }, () => call("eyes")); + const completed = Array.from({ length: args.completedCount ?? 0 }, () => + call("white_check_mark"), + ); + + expect(getCapturedSlackApiCalls("reactions.add")).toEqual([ + ...eyes, + ...completed, + ]); + expect(getCapturedSlackApiCalls("reactions.remove")).toEqual(eyes); +} + +export { EVAL_MCP_AUTH_PROVIDER }; diff --git a/packages/junior/tests/fixtures/mcp/oauth-callback-harness.ts b/packages/junior/tests/fixtures/mcp/oauth-callback-harness.ts new file mode 100644 index 000000000..e056106a1 --- /dev/null +++ b/packages/junior/tests/fixtures/mcp/oauth-callback-harness.ts @@ -0,0 +1,56 @@ +import { + waitUntilCallbacks, + testWaitUntil, +} from "../oauth/callback-after-harness"; +import type { ResumeReplyGenerator } from "@/chat/runtime/slack-resume"; + +type McpOauthCallbackHandler = + typeof import("@/handlers/mcp-oauth-callback").GET; + +export interface RunMcpOauthCallbackRequestArgs { + generateReply?: ResumeReplyGenerator; + handler?: McpOauthCallbackHandler; + provider: string; + request: Request; +} + +/** Runs the MCP OAuth callback handler and flushes deferred callback work. */ +export async function runMcpOauthCallbackRequest( + args: RunMcpOauthCallbackRequestArgs, +) { + waitUntilCallbacks.length = 0; + const GET = + args.handler ?? (await import("@/handlers/mcp-oauth-callback")).GET; + const response = await GET(args.request, args.provider, testWaitUntil, { + generateReply: args.generateReply, + }); + const callbacks = waitUntilCallbacks.splice(0, waitUntilCallbacks.length); + for (const callback of callbacks) { + await callback(); + } + if (response.status === 200 && callbacks.length === 0) { + throw new Error( + `MCP OAuth callback route returned 200 without registering waitUntil() work for provider "${args.provider}"`, + ); + } + return response; +} + +/** Runs the MCP OAuth callback route with encoded state and code values. */ +export async function runMcpOauthCallbackRoute(args: { + provider: string; + state: string; + code: string; + generateReply?: ResumeReplyGenerator; + handler?: McpOauthCallbackHandler; +}) { + return await runMcpOauthCallbackRequest({ + provider: args.provider, + request: new Request( + `https://junior.example.com/api/oauth/callback/mcp/${args.provider}?state=${encodeURIComponent(args.state)}&code=${encodeURIComponent(args.code)}`, + { method: "GET" }, + ), + generateReply: args.generateReply, + handler: args.handler, + }); +} diff --git a/packages/junior/tests/fixtures/mcp/oauth-callback-route.ts b/packages/junior/tests/fixtures/mcp/oauth-callback-route.ts new file mode 100644 index 000000000..d31401653 --- /dev/null +++ b/packages/junior/tests/fixtures/mcp/oauth-callback-route.ts @@ -0,0 +1,218 @@ +import path from "node:path"; +import { expect, vi } from "vitest"; +import { + EVAL_MCP_AUTH_CODE, + EVAL_MCP_AUTH_PROVIDER, +} from "../../msw/handlers/eval-mcp-auth"; +import { resetSlackApiMockState } from "../../msw/handlers/slack-api"; +import { createPluginAppFixture, type PluginAppFixture } from "../plugin-app"; +import { successfulAssistantReply } from "../assistant-reply"; +import type { ResumeReplyGenerator } from "@/chat/runtime/slack-resume"; + +const ORIGINAL_ENV = { ...process.env }; +const EVAL_MCP_PLUGIN_ROOT = path.resolve( + import.meta.dirname, + "../plugins/eval-auth", +); + +export const SLACK_DESTINATION = { + platform: "slack", + teamId: "T123", + channelId: "C123", +} as const; + +type ArtifactStateModule = typeof import("@/chat/state/artifacts"); +type ConversationStateModule = typeof import("@/chat/state/conversation"); +type McpAuthStoreModule = typeof import("@/chat/mcp/auth-store"); +type McpClientModule = typeof import("@/chat/mcp/client"); +type McpOauthModule = typeof import("@/chat/mcp/oauth"); +type McpOauthCallbackHarnessModule = typeof import("./oauth-callback-harness"); +type PluginRegistryModule = typeof import("@/chat/plugins/registry"); +type StateAdapterModule = typeof import("@/chat/state/adapter"); +type TurnSessionStoreModule = typeof import("@/chat/state/turn-session"); + +/** Starts the memory-backed MCP OAuth callback route integration fixture. */ +export async function createMcpOauthCallbackRouteFixture() { + const generateAssistantReplyMock = vi.fn(); + generateAssistantReplyMock.mockResolvedValue( + successfulAssistantReply( + "The budget deadline you mentioned earlier was Friday.", + { + artifactStatePatch: { + lastCanvasUrl: "https://example.com/canvas", + }, + sandboxId: "sandbox-1", + sandboxDependencyProfileHash: "hash-1", + }, + ), + ); + resetSlackApiMockState(); + process.env = { + ...ORIGINAL_ENV, + JUNIOR_STATE_ADAPTER: "memory", + JUNIOR_BASE_URL: "https://junior.example.com", + }; + let pluginApp: PluginAppFixture | undefined = await createPluginAppFixture([ + EVAL_MCP_PLUGIN_ROOT, + ]); + + vi.resetModules(); + const artifactState: ArtifactStateModule = + await import("@/chat/state/artifacts"); + const conversationState: ConversationStateModule = + await import("@/chat/state/conversation"); + const mcpAuthStore: McpAuthStoreModule = + await import("@/chat/mcp/auth-store"); + const mcpClient: McpClientModule = await import("@/chat/mcp/client"); + const mcpOauth: McpOauthModule = await import("@/chat/mcp/oauth"); + const mcpOauthCallbackHarness: McpOauthCallbackHarnessModule = + await import("./oauth-callback-harness"); + const pluginRegistry: PluginRegistryModule = + await import("@/chat/plugins/registry"); + const stateAdapter: StateAdapterModule = await import("@/chat/state/adapter"); + const turnSessionStore: TurnSessionStoreModule = + await import("@/chat/state/turn-session"); + + await stateAdapter.disconnectStateAdapter(); + await stateAdapter.getStateAdapter().connect(); + + return { + artifactState, + conversationState, + generateAssistantReplyMock, + mcpAuthStore, + stateAdapter, + turnSessionStore, + + /** Runs the MCP OAuth callback route with the fixture resume generator. */ + async runRoute(args: { provider: string; state: string; code: string }) { + return await mcpOauthCallbackHarness.runMcpOauthCallbackRoute({ + ...args, + generateReply: generateAssistantReplyMock, + }); + }, + + /** Runs an explicit MCP OAuth callback URL through the real handler. */ + async runCallbackUrl(args: { + provider?: string; + url: string; + }): Promise { + const provider = args.provider ?? EVAL_MCP_AUTH_PROVIDER; + return await mcpOauthCallbackHarness.runMcpOauthCallbackRequest({ + provider, + request: new Request(args.url, { method: "GET" }), + generateReply: generateAssistantReplyMock, + }); + }, + + /** Creates a pending MCP auth session by driving the real MCP client. */ + async createPendingAuthSession(args: { + conversationId: string; + sessionId: string; + userMessage: string; + channelId: string; + threadTs: string; + toolChannelId?: string; + configuration?: Record; + artifactState?: Record; + }) { + const authProvider = await mcpOauth.createMcpOAuthClientProvider({ + provider: EVAL_MCP_AUTH_PROVIDER, + conversationId: args.conversationId, + destination: SLACK_DESTINATION, + sessionId: args.sessionId, + userId: "U123", + userMessage: args.userMessage, + channelId: args.channelId, + threadTs: args.threadTs, + ...(args.toolChannelId ? { toolChannelId: args.toolChannelId } : {}), + ...(args.configuration ? { configuration: args.configuration } : {}), + ...(args.artifactState ? { artifactState: args.artifactState } : {}), + }); + + const plugin = pluginRegistry.getPluginDefinition(EVAL_MCP_AUTH_PROVIDER); + expect(plugin).toBeDefined(); + + const client = new mcpClient.PluginMcpClient(plugin!, { + authProvider, + }); + await expect(client.listTools()).rejects.toBeInstanceOf( + mcpClient.McpAuthorizationRequiredError, + ); + await client.close(); + + return authProvider; + }, + + /** Stores the awaiting turn-session record needed for OAuth resume. */ + async createAwaitingMcpTurnRecord(args: { + conversationId: string; + sessionId: string; + text: string; + }) { + await turnSessionStore.upsertAgentTurnSessionRecord({ + conversationId: args.conversationId, + sessionId: args.sessionId, + sliceId: 2, + state: "awaiting_resume", + destination: SLACK_DESTINATION, + piMessages: [ + { + role: "user", + content: [{ type: "text", text: args.text }], + timestamp: 1, + }, + ], + resumeReason: "auth", + resumedFromSliceId: 1, + }); + }, + + /** Stores a one-message thread state with pending MCP authorization. */ + async storePendingMcpThreadState(args: { + threadId: string; + messageId: string; + text: string; + sessionId: string; + }) { + await stateAdapter + .getStateAdapter() + .set(`thread-state:${args.threadId}`, { + conversation: { + messages: [ + { + id: args.messageId, + role: "user", + text: args.text, + createdAtMs: 1, + author: { + userId: "U123", + userName: "dcramer", + }, + }, + ], + processing: { + activeTurnId: undefined, + pendingAuth: { + kind: "mcp", + provider: EVAL_MCP_AUTH_PROVIDER, + requesterId: "U123", + sessionId: args.sessionId, + linkSentAtMs: 1, + }, + }, + }, + }); + }, + + /** Cleans up state, plugin fixtures, and environment after each scenario. */ + async cleanup() { + await stateAdapter.disconnectStateAdapter(); + await pluginApp?.cleanup(); + pluginApp = undefined; + process.env = { ...ORIGINAL_ENV }; + }, + }; +} + +export { EVAL_MCP_AUTH_CODE, EVAL_MCP_AUTH_PROVIDER }; diff --git a/packages/junior/tests/fixtures/mcp-test-server.ts b/packages/junior/tests/fixtures/mcp/test-server.ts similarity index 100% rename from packages/junior/tests/fixtures/mcp-test-server.ts rename to packages/junior/tests/fixtures/mcp/test-server.ts diff --git a/packages/junior/tests/fixtures/oauth-callback-harness.ts b/packages/junior/tests/fixtures/oauth-callback-harness.ts deleted file mode 100644 index 8a61e7082..000000000 --- a/packages/junior/tests/fixtures/oauth-callback-harness.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { - waitUntilCallbacks, - testWaitUntil, -} from "./oauth-callback-after-harness"; - -export async function runOauthCallbackRoute(args: { - provider: string; - state: string; - code: string; -}) { - waitUntilCallbacks.length = 0; - const { GET } = await import("@/handlers/oauth-callback"); - const response = await GET( - new Request( - `https://junior.example.com/api/oauth/callback/${args.provider}?state=${encodeURIComponent(args.state)}&code=${encodeURIComponent(args.code)}`, - { method: "GET" }, - ), - args.provider, - testWaitUntil, - ); - const callbacks = waitUntilCallbacks.splice(0, waitUntilCallbacks.length); - for (const callback of callbacks) { - await callback(); - } - if (response.status === 200 && callbacks.length === 0) { - throw new Error( - `OAuth callback route returned 200 without registering waitUntil() work for provider "${args.provider}"`, - ); - } - return response; -} diff --git a/packages/junior/tests/fixtures/oauth-callback-after-harness.ts b/packages/junior/tests/fixtures/oauth/callback-after-harness.ts similarity index 100% rename from packages/junior/tests/fixtures/oauth-callback-after-harness.ts rename to packages/junior/tests/fixtures/oauth/callback-after-harness.ts diff --git a/packages/junior/tests/fixtures/oauth/callback-harness.ts b/packages/junior/tests/fixtures/oauth/callback-harness.ts new file mode 100644 index 000000000..01f4d4fae --- /dev/null +++ b/packages/junior/tests/fixtures/oauth/callback-harness.ts @@ -0,0 +1,51 @@ +import { waitUntilCallbacks, testWaitUntil } from "./callback-after-harness"; +import type { ResumeReplyGenerator } from "@/chat/runtime/slack-resume"; + +type OAuthCallbackHandler = typeof import("@/handlers/oauth-callback").GET; + +export interface RunOauthCallbackRequestArgs { + generateReply?: ResumeReplyGenerator; + handler?: OAuthCallbackHandler; + provider: string; + request: Request; +} + +/** Runs the generic OAuth callback handler and flushes deferred callback work. */ +export async function runOauthCallbackRequest( + args: RunOauthCallbackRequestArgs, +) { + waitUntilCallbacks.length = 0; + const GET = args.handler ?? (await import("@/handlers/oauth-callback")).GET; + const response = await GET(args.request, args.provider, testWaitUntil, { + generateReply: args.generateReply, + }); + const callbacks = waitUntilCallbacks.splice(0, waitUntilCallbacks.length); + for (const callback of callbacks) { + await callback(); + } + if (response.status === 200 && callbacks.length === 0) { + throw new Error( + `OAuth callback route returned 200 without registering waitUntil() work for provider "${args.provider}"`, + ); + } + return response; +} + +/** Runs the generic OAuth callback route with encoded state and code values. */ +export async function runOauthCallbackRoute(args: { + provider: string; + state: string; + code: string; + generateReply?: ResumeReplyGenerator; + handler?: OAuthCallbackHandler; +}) { + return await runOauthCallbackRequest({ + provider: args.provider, + request: new Request( + `https://junior.example.com/api/oauth/callback/${args.provider}?state=${encodeURIComponent(args.state)}&code=${encodeURIComponent(args.code)}`, + { method: "GET" }, + ), + generateReply: args.generateReply, + handler: args.handler, + }); +} diff --git a/packages/junior/tests/fixtures/oauth/callback-route.ts b/packages/junior/tests/fixtures/oauth/callback-route.ts new file mode 100644 index 000000000..f2c9b9cfc --- /dev/null +++ b/packages/junior/tests/fixtures/oauth/callback-route.ts @@ -0,0 +1,157 @@ +import path from "node:path"; +import { vi } from "vitest"; +import { resetSlackApiMockState } from "../../msw/handlers/slack-api"; +import { createPluginAppFixture, type PluginAppFixture } from "../plugin-app"; +import { successfulAssistantReply } from "../assistant-reply"; +import type { ResumeReplyGenerator } from "@/chat/runtime/slack-resume"; + +export const EVAL_OAUTH_PROVIDER = "eval-oauth"; +export const EVAL_OAUTH_CODE = "eval-oauth-code"; +export const SLACK_DESTINATION = { + platform: "slack", + teamId: "T123", + channelId: "C123", +} as const; + +const ORIGINAL_ENV = { ...process.env }; +const EVAL_OAUTH_PLUGIN_ROOT = path.resolve( + import.meta.dirname, + "../plugins/eval-oauth", +); + +type StateAdapterModule = typeof import("@/chat/state/adapter"); +type OAuthCallbackHarnessModule = typeof import("./callback-harness"); +type TurnSessionStoreModule = typeof import("@/chat/state/turn-session"); +type UserTokenStoreModule = typeof import("@/chat/capabilities/factory"); + +/** Starts the memory-backed OAuth callback route integration fixture. */ +export async function createOauthCallbackRouteFixture() { + const generateAssistantReplyMock = vi.fn(); + generateAssistantReplyMock.mockResolvedValue( + successfulAssistantReply("Here are your Sentry issues."), + ); + resetSlackApiMockState(); + process.env = { + ...ORIGINAL_ENV, + JUNIOR_STATE_ADAPTER: "memory", + JUNIOR_BASE_URL: "https://junior.example.com", + }; + let pluginApp: PluginAppFixture | undefined = await createPluginAppFixture([ + EVAL_OAUTH_PLUGIN_ROOT, + ]); + + vi.resetModules(); + const stateAdapter: StateAdapterModule = await import("@/chat/state/adapter"); + const oauthCallbackHarness: OAuthCallbackHarnessModule = + await import("./callback-harness"); + const turnSessionStore: TurnSessionStoreModule = + await import("@/chat/state/turn-session"); + const userTokenStore: UserTokenStoreModule = + await import("@/chat/capabilities/factory"); + await stateAdapter.disconnectStateAdapter(); + await stateAdapter.getStateAdapter().connect(); + + return { + generateAssistantReplyMock, + stateAdapter, + turnSessionStore, + + /** Runs the OAuth callback route with the fixture resume generator. */ + async runRoute(args: { + state: string; + provider?: string; + code?: string; + }): Promise { + return await oauthCallbackHarness.runOauthCallbackRoute({ + provider: args.provider ?? EVAL_OAUTH_PROVIDER, + state: args.state, + code: args.code ?? EVAL_OAUTH_CODE, + generateReply: generateAssistantReplyMock, + }); + }, + + /** Runs an explicit OAuth callback URL through the real handler. */ + async runCallbackUrl(args: { + provider?: string; + url: string; + }): Promise { + const provider = args.provider ?? EVAL_OAUTH_PROVIDER; + return await oauthCallbackHarness.runOauthCallbackRequest({ + provider, + request: new Request(args.url, { method: "GET" }), + generateReply: generateAssistantReplyMock, + }); + }, + + /** Stores the awaiting turn-session record needed for OAuth resume. */ + async createAwaitingOauthTurnRecord(args: { + conversationId: string; + sessionId: string; + text?: string; + }) { + await turnSessionStore.upsertAgentTurnSessionRecord({ + conversationId: args.conversationId, + sessionId: args.sessionId, + sliceId: 2, + state: "awaiting_resume", + destination: SLACK_DESTINATION, + piMessages: args.text + ? [ + { + role: "user", + content: [{ type: "text", text: args.text }], + timestamp: 1, + }, + ] + : [], + resumeReason: "auth", + resumedFromSliceId: 1, + }); + }, + + /** Stores provider OAuth callback state in the memory adapter. */ + async storeOAuthState( + state: string, + overrides: Record = {}, + ) { + const destination = + overrides.destination ?? + (typeof overrides.channelId === "string" + ? { ...SLACK_DESTINATION, channelId: overrides.channelId } + : undefined); + await stateAdapter.getStateAdapter().set(`oauth-state:${state}`, { + userId: "U123", + provider: EVAL_OAUTH_PROVIDER, + ...(destination ? { destination } : {}), + ...overrides, + }); + }, + + /** Reads a raw OAuth state record from the memory adapter. */ + async getOAuthState(state: string): Promise { + return await stateAdapter + .getStateAdapter() + .get(`oauth-state:${state}`); + }, + + /** Reads the stored provider token for a fixture user. */ + async getStoredToken( + args: { + provider?: string; + userId?: string; + } = {}, + ) { + return await userTokenStore + .createUserTokenStore() + .get(args.userId ?? "U123", args.provider ?? EVAL_OAUTH_PROVIDER); + }, + + /** Disconnects memory state, plugin fixtures, and test environment. */ + async cleanup() { + await stateAdapter.disconnectStateAdapter(); + await pluginApp?.cleanup(); + pluginApp = undefined; + process.env = { ...ORIGINAL_ENV }; + }, + }; +} diff --git a/packages/junior/tests/fixtures/oauth/resume-slack.ts b/packages/junior/tests/fixtures/oauth/resume-slack.ts new file mode 100644 index 000000000..eff31c12b --- /dev/null +++ b/packages/junior/tests/fixtures/oauth/resume-slack.ts @@ -0,0 +1,59 @@ +import { vi } from "vitest"; +import type { AssistantReply } from "@/chat/respond"; + +const ORIGINAL_ENV = { ...process.env }; + +type StateAdapterModule = typeof import("@/chat/state/adapter"); +type SlackResumeModule = typeof import("@/chat/runtime/slack-resume"); +type TurnModule = typeof import("@/chat/runtime/turn"); +type TurnSessionStoreModule = typeof import("@/chat/state/turn-session"); + +type ResumeOutcome = "success" | "execution_failure" | "provider_error"; + +/** Build deterministic assistant diagnostics for OAuth resume Slack tests. */ +export function makeResumeDiagnostics( + outcome: ResumeOutcome = "success", + extras: Partial = {}, +): AssistantReply["diagnostics"] { + return { + assistantMessageCount: 1, + modelId: "fake-agent-model", + outcome, + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + ...extras, + }; +} + +/** Starts the memory-backed Slack OAuth resume integration fixture. */ +export async function createOauthResumeSlackFixture() { + process.env = { + ...ORIGINAL_ENV, + JUNIOR_STATE_ADAPTER: "memory", + }; + vi.resetModules(); + + const stateAdapter: StateAdapterModule = await import("@/chat/state/adapter"); + await stateAdapter.disconnectStateAdapter(); + const slackResume: SlackResumeModule = + await import("@/chat/runtime/slack-resume"); + const turn: TurnModule = await import("@/chat/runtime/turn"); + const turnSessionStore: TurnSessionStoreModule = + await import("@/chat/state/turn-session"); + + return { + getStateAdapter: stateAdapter.getStateAdapter, + resumeAuthorizedRequest: slackResume.resumeAuthorizedRequest, + resumeSlackTurn: slackResume.resumeSlackTurn, + RetryableTurnError: turn.RetryableTurnError, + turnSessionStore, + + /** Disconnects memory state and restores the test environment. */ + async cleanup() { + await stateAdapter.disconnectStateAdapter(); + process.env = { ...ORIGINAL_ENV }; + }, + }; +} diff --git a/packages/junior/tests/fixtures/pi-stream.ts b/packages/junior/tests/fixtures/pi-stream.ts new file mode 100644 index 000000000..976dc2594 --- /dev/null +++ b/packages/junior/tests/fixtures/pi-stream.ts @@ -0,0 +1,81 @@ +import type { StreamFn } from "@earendil-works/pi-agent-core"; +import type { Message } from "@earendil-works/pi-ai"; +import { DEFAULT_TEST_NOW_MS } from "./vitest"; + +type StreamResponse = Awaited>; +type AssistantMessage = Extract; + +const zeroUsage = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, +}; + +/** Build a Pi assistant message for deterministic streamFn tests. */ +export function piAssistantMessage( + content: AssistantMessage["content"], +): AssistantMessage { + return { + role: "assistant" as const, + api: "test", + provider: "test", + model: "test", + usage: zeroUsage, + stopReason: content.some((part) => part.type === "toolCall") + ? "toolUse" + : "stop", + content, + timestamp: DEFAULT_TEST_NOW_MS, + }; +} + +/** Build the AsyncIterable/result pair expected from a Pi streamFn. */ +export function piStreamResponse( + message: ReturnType, +): StreamResponse { + return { + async *[Symbol.asyncIterator]() { + yield { type: "done" as const }; + }, + result: async () => message, + } as unknown as StreamResponse; +} + +/** Build a Pi streamFn response that asks the agent to call one tool. */ +export function piToolCallResponse(args: { + id: string; + name: string; + parameters?: Record; +}): StreamResponse { + return piStreamResponse( + piAssistantMessage([ + { + type: "toolCall", + id: args.id, + name: args.name, + arguments: args.parameters ?? {}, + }, + ]), + ); +} + +/** Build a Pi streamFn response with one terminal text assistant message. */ +export function piTextResponse(text: string): StreamResponse { + return piStreamResponse( + piAssistantMessage([ + { + type: "text", + text, + }, + ]), + ); +} diff --git a/packages/junior/tests/fixtures/plugin-packages.ts b/packages/junior/tests/fixtures/plugin-packages.ts new file mode 100644 index 000000000..f232136ca --- /dev/null +++ b/packages/junior/tests/fixtures/plugin-packages.ts @@ -0,0 +1,154 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { expect, vi } from "vitest"; +import type { PluginCatalogConfig } from "@/chat/plugins/types"; + +const originalCwd = process.cwd(); +let configuredPackageNames: string[] = []; + +export interface PackagedPluginFixture { + packageName: string; + manifest: string[]; + skillName?: string; +} + +export interface PluginPackageAppFixture { + resolvedTempRoot: string; + tempRoot: string; +} + +/** Reset registry module state and process cwd after plugin package tests. */ +export function resetPluginPackageRegistryState(): void { + configuredPackageNames = []; + process.chdir(originalCwd); + vi.resetModules(); +} + +/** Configure the package list through the production registry config surface. */ +export async function setPluginPackages(packageNames: string[]): Promise { + configuredPackageNames = packageNames; + await setPluginCatalogConfigForTest({ packages: packageNames }); +} + +/** Apply a partial plugin catalog config while preserving the active package list. */ +export async function setPluginCatalogConfigForTest( + config: PluginCatalogConfig, +): Promise { + const { setPluginCatalogConfig } = await import("@/chat/plugins/registry"); + setPluginCatalogConfig({ + ...config, + packages: config.packages ?? configuredPackageNames, + }); +} + +/** Assert lazy registry validation fails when providers are materialized. */ +export async function expectPluginRegistryLoadFailure( + packageNames: string[], + message: string, +): Promise { + await setPluginPackages(packageNames); + const registry = await import("@/chat/plugins/registry"); + expect(() => registry.getPluginProviders()).toThrow(message); +} + +/** Create a temp app with installed plugin packages and empty local plugin roots. */ +export async function createPluginPackageApp( + plugins: PackagedPluginFixture[], +): Promise { + const tempRoot = await fs.mkdtemp( + path.join(os.tmpdir(), "junior-plugin-package-"), + ); + await fs.mkdir(path.join(tempRoot, "app", "plugins"), { recursive: true }); + for (const plugin of plugins) { + await writePackagedPlugin(tempRoot, plugin); + } + await fs.writeFile( + path.join(tempRoot, "package.json"), + JSON.stringify({ + name: "temp-junior-app", + private: true, + dependencies: Object.fromEntries( + plugins.map((plugin) => [`@acme/${plugin.packageName}`, "1.0.0"]), + ), + }), + "utf8", + ); + process.chdir(tempRoot); + + vi.resetModules(); + + await setPluginPackages( + plugins.map((plugin) => `@acme/${plugin.packageName}`), + ); + return { + tempRoot, + resolvedTempRoot: await fs.realpath(tempRoot), + }; +} + +/** Install another temp plugin package in an existing package-app fixture. */ +export async function installPackagedPlugin( + app: PluginPackageAppFixture, + plugin: PackagedPluginFixture, +): Promise { + await writePackagedPlugin(app.tempRoot, plugin); +} + +/** Build the expected skill root path for an installed temp plugin package. */ +export function pluginSkillRoot( + app: PluginPackageAppFixture, + packageName: string, +): string { + return path.join( + app.resolvedTempRoot, + "node_modules", + "@acme", + packageName, + "skills", + ); +} + +function withDefaultDisplayName(manifest: string[]): string[] { + if (manifest.some((line) => line.startsWith("display-name:"))) { + return manifest; + } + const nameIndex = manifest.findIndex((line) => line.startsWith("name:")); + if (nameIndex === -1) { + return manifest; + } + const name = manifest[nameIndex]!.slice("name:".length).trim(); + const displayName = name + .split(/[-_\s]+/) + .filter(Boolean) + .map((part) => `${part[0]!.toUpperCase()}${part.slice(1)}`) + .join(" "); + return [ + ...manifest.slice(0, nameIndex + 1), + `display-name: ${displayName}`, + ...manifest.slice(nameIndex + 1), + ]; +} + +async function writePackagedPlugin( + tempRoot: string, + plugin: PackagedPluginFixture, +): Promise { + const packageRoot = path.join( + tempRoot, + "node_modules", + "@acme", + plugin.packageName, + ); + const skillsDir = path.join( + packageRoot, + "skills", + plugin.skillName ?? "demo", + ); + await fs.mkdir(skillsDir, { recursive: true }); + await fs.writeFile( + path.join(packageRoot, "plugin.yaml"), + withDefaultDisplayName(plugin.manifest).join("\n"), + "utf8", + ); +} diff --git a/packages/junior/tests/fixtures/reply-context.ts b/packages/junior/tests/fixtures/reply-context.ts new file mode 100644 index 000000000..1fbd9664f --- /dev/null +++ b/packages/junior/tests/fixtures/reply-context.ts @@ -0,0 +1,68 @@ +import type { Destination } from "@sentry/junior-plugin-api"; +import type { AssistantReplyRequestContext } from "@/chat/respond"; +import type { Requester } from "@/chat/requester"; + +export const TEST_SLACK_DESTINATION = { + platform: "slack", + teamId: "T123", + channelId: "C123", +} as const satisfies Destination; + +export const TEST_SLACK_REQUESTER = { + platform: "slack", + teamId: TEST_SLACK_DESTINATION.teamId, + userId: "U123", +} as const satisfies Requester; + +type LegacyRequester = { + email?: string; + fullName?: string; + userId: string; + userName?: string; +}; + +export type TestReplyRequestContext = Omit< + Partial, + "destination" | "requester" +> & { + destination?: Destination; + requester?: Requester | LegacyRequester; +}; + +function requesterForDestination( + requester: Requester | LegacyRequester | undefined, + destination: Destination, +): Requester { + if (requester && "platform" in requester) { + return requester; + } + if (destination.platform === "local") { + return { + platform: "local", + userId: requester?.userId ?? TEST_SLACK_REQUESTER.userId, + ...(requester?.email ? { email: requester.email } : {}), + ...(requester?.fullName ? { fullName: requester.fullName } : {}), + ...(requester?.userName ? { userName: requester.userName } : {}), + }; + } + return { + platform: "slack", + teamId: destination.teamId, + userId: requester?.userId ?? TEST_SLACK_REQUESTER.userId, + ...(requester?.email ? { email: requester.email } : {}), + ...(requester?.fullName ? { fullName: requester.fullName } : {}), + ...(requester?.userName ? { userName: requester.userName } : {}), + }; +} + +/** Build a complete reply request context for runtime component tests. */ +export function makeTestReplyContext( + options: TestReplyRequestContext = {}, +): AssistantReplyRequestContext { + const destination = options.destination ?? TEST_SLACK_DESTINATION; + return { + ...options, + destination, + requester: requesterForDestination(options.requester, destination), + } as AssistantReplyRequestContext; +} diff --git a/packages/junior/tests/fixtures/respond/agent.ts b/packages/junior/tests/fixtures/respond/agent.ts new file mode 100644 index 000000000..c7d4480f2 --- /dev/null +++ b/packages/junior/tests/fixtures/respond/agent.ts @@ -0,0 +1,78 @@ +import type { PiMessage } from "@/chat/pi/messages"; + +interface ScriptedReplyAgentOptions { + initialState: { + model: unknown; + systemPrompt: string; + thinkingLevel?: unknown; + tools: unknown[]; + }; + prepareNextTurn?: () => Promise | unknown; +} + +export interface ScriptedReplyAgent { + prepareNextTurn?: () => Promise | unknown; + state: { + messages: PiMessage[]; + model: unknown; + systemPrompt: string; + thinkingLevel?: unknown; + tools: unknown[]; + }; + steeringMessages: PiMessage[]; +} + +export interface ScriptedReplyAgentScript { + abort?: (agent: ScriptedReplyAgent) => void; + continue: (agent: ScriptedReplyAgent) => Promise; + prompt: (agent: ScriptedReplyAgent, message: unknown) => Promise; + steer?: (agent: ScriptedReplyAgent, message: unknown) => void; +} + +class TestReplyAgent implements ScriptedReplyAgent { + prepareNextTurn?: () => Promise | unknown; + state: ScriptedReplyAgent["state"]; + steeringMessages: PiMessage[] = []; + + constructor( + options: ScriptedReplyAgentOptions, + private readonly script: ScriptedReplyAgentScript, + ) { + this.prepareNextTurn = options.prepareNextTurn; + this.state = { + messages: [], + model: options.initialState.model, + systemPrompt: options.initialState.systemPrompt, + thinkingLevel: options.initialState.thinkingLevel, + tools: options.initialState.tools, + }; + } + + abort(): void { + this.script.abort?.(this); + } + + async continue(): Promise { + return await this.script.continue(this); + } + + async prompt(message: unknown): Promise { + return await this.script.prompt(this, message); + } + + steer(message: unknown): void { + this.script.steer?.(this, message); + } + + subscribe(): () => void { + return () => undefined; + } +} + +/** Creates a `generateAssistantReply` agent factory backed by a scripted fake. */ +export function createScriptedReplyAgentFactory( + script: ScriptedReplyAgentScript, +) { + return (options: ScriptedReplyAgentOptions) => + new TestReplyAgent(options, script); +} diff --git a/packages/junior/tests/fixtures/respond/env.ts b/packages/junior/tests/fixtures/respond/env.ts new file mode 100644 index 000000000..fd054dabc --- /dev/null +++ b/packages/junior/tests/fixtures/respond/env.ts @@ -0,0 +1,52 @@ +export interface RespondRuntimeEnvSnapshot { + agentTurnTimeoutMs?: string; + aiAdvisorModel?: string; + aiFastModel?: string; + aiModel?: string; + functionMaxDurationSeconds?: string; + juniorStateAdapter?: string; +} + +/** Configure deterministic runtime env values before importing respond modules. */ +export function configureRespondRuntimeEnv(): RespondRuntimeEnvSnapshot { + const originalEnv: RespondRuntimeEnvSnapshot = { + agentTurnTimeoutMs: process.env.AGENT_TURN_TIMEOUT_MS, + aiAdvisorModel: process.env.AI_ADVISOR_MODEL, + aiFastModel: process.env.AI_FAST_MODEL, + aiModel: process.env.AI_MODEL, + functionMaxDurationSeconds: process.env.FUNCTION_MAX_DURATION_SECONDS, + juniorStateAdapter: process.env.JUNIOR_STATE_ADAPTER, + }; + + process.env.AGENT_TURN_TIMEOUT_MS = "10000"; + process.env.AI_ADVISOR_MODEL = "openai/gpt-5.5"; + process.env.AI_FAST_MODEL = "openai/gpt-5.4-mini"; + process.env.AI_MODEL = "openai/gpt-5.4"; + process.env.FUNCTION_MAX_DURATION_SECONDS = "60"; + process.env.JUNIOR_STATE_ADAPTER = "memory"; + + return originalEnv; +} + +/** Restore env values captured by configureRespondRuntimeEnv. */ +export function restoreRespondRuntimeEnv( + snapshot: RespondRuntimeEnvSnapshot, +): void { + restoreEnv("AGENT_TURN_TIMEOUT_MS", snapshot.agentTurnTimeoutMs); + restoreEnv("AI_ADVISOR_MODEL", snapshot.aiAdvisorModel); + restoreEnv("AI_FAST_MODEL", snapshot.aiFastModel); + restoreEnv("AI_MODEL", snapshot.aiModel); + restoreEnv( + "FUNCTION_MAX_DURATION_SECONDS", + snapshot.functionMaxDurationSeconds, + ); + restoreEnv("JUNIOR_STATE_ADAPTER", snapshot.juniorStateAdapter); +} + +function restoreEnv(name: string, value: string | undefined): void { + if (value === undefined) { + delete process.env[name]; + return; + } + process.env[name] = value; +} diff --git a/packages/junior/tests/fixtures/respond/mcp-progressive-loading.ts b/packages/junior/tests/fixtures/respond/mcp-progressive-loading.ts new file mode 100644 index 000000000..28f10385c --- /dev/null +++ b/packages/junior/tests/fixtures/respond/mcp-progressive-loading.ts @@ -0,0 +1,600 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { vi } from "vitest"; +import type { PiMessage } from "@/chat/pi/messages"; +import type { deliverPrivateMessage } from "@/chat/oauth-flow"; +import type { SkillMetadata } from "@/chat/skills"; +import type { + PluginMcpClientOptions, + PluginMcpListedTool, + PluginMcpToolCallResult, +} from "@/chat/mcp/client"; +import { McpAuthorizationRequiredError } from "@/chat/mcp/client"; +import type { PluginDefinition } from "@/chat/plugins/types"; +import type { TurnThinkingSelection } from "@/chat/services/turn-thinking-level"; +import { createScriptedReplyAgentFactory } from "./agent"; +import { configureRespondRuntimeEnv, restoreRespondRuntimeEnv } from "./env"; +import { + createScriptedSandboxExecutorFactory, + createScriptedSandboxExecutorState, +} from "./sandbox"; +import { + makeTestReplyContext, + type TestReplyRequestContext, +} from "../reply-context"; +import { DEFAULT_TEST_NOW_MS } from "../vitest"; + +const originalEnv = configureRespondRuntimeEnv(); +const originalCwd = process.cwd(); + +const DEMO_SKILL: SkillMetadata = { + name: "demo-skill", + description: "Demo skill", + skillPath: path.join(os.tmpdir(), "junior-demo-skill-placeholder"), + pluginProvider: "demo", +}; + +const demoPlugin: PluginDefinition = { + dir: path.join(os.tmpdir(), "junior-demo-plugin-placeholder"), + skillsDir: path.join(os.tmpdir(), "junior-demo-plugin-placeholder", "skills"), + manifest: { + name: "demo", + displayName: "Demo", + description: "Demo plugin", + capabilities: [], + configKeys: [], + mcp: { + transport: "http", + url: "https://mcp.example.com", + allowedTools: ["ping"], + }, + }, +}; + +const state = { + agentInitialToolNames: [] as string[][], + callToolMock: + vi.fn< + ( + plugin: PluginDefinition, + name: string, + args: Record | undefined, + ) => Promise + >(), + clientOptions: [] as Array>, + completeEmptyAssistantOnAbort: { value: false }, + continueCallCount: { value: 0 }, + continueStopsOnAbort: { value: false }, + deliverPrivateMessageMock: vi.fn(), + listToolsMock: + vi.fn< + ( + plugin: PluginDefinition, + options: PluginMcpClientOptions, + ) => Promise + >(), + loadSkillExecutionErrorCount: { value: 0 }, + omitFinalAssistantAfterTool: { value: false }, + promptCallCount: { value: 0 }, + pushPreToolAssistantMessage: { value: false }, + recordToolResultMessage: { value: false }, + resumeTurnContextCounts: [] as number[], + searchMcpToolNames: [] as string[][], +}; + +let abortedAgents = new WeakSet(); +let demoAppRoot: string | undefined; +const sandboxState = createScriptedSandboxExecutorState(); +const turnThinkingSelection = { + thinkingLevel: "medium", + confidence: 1, + reason: "test", +} satisfies TurnThinkingSelection; + +export const respondMcpProgressiveLoadingHarness = { + DEMO_SKILL, + agentInitialToolNames: state.agentInitialToolNames, + callToolMock: state.callToolMock, + clientOptions: state.clientOptions, + completeEmptyAssistantOnAbort: state.completeEmptyAssistantOnAbort, + continueCallCount: state.continueCallCount, + continueStopsOnAbort: state.continueStopsOnAbort, + deliverPrivateMessageMock: state.deliverPrivateMessageMock, + listToolsMock: state.listToolsMock, + loadSkillExecutionErrorCount: state.loadSkillExecutionErrorCount, + omitFinalAssistantAfterTool: state.omitFinalAssistantAfterTool, + promptCallCount: state.promptCallCount, + pushPreToolAssistantMessage: state.pushPreToolAssistantMessage, + recordToolResultMessage: state.recordToolResultMessage, + resumeTurnContextCounts: state.resumeTurnContextCounts, + searchMcpToolNames: state.searchMcpToolNames, +}; + +/** Build a demo MCP tool with the minimal schema needed by the fake client. */ +export function makeDemoMcpTool(name: "ping" | "mutate") { + return { + name, + title: name === "ping" ? "Ping" : "Mutate", + description: + name === "ping" + ? "Ping the demo MCP server" + : "Write through the demo MCP server", + inputSchema: { + type: "object", + properties: {}, + }, + } satisfies PluginMcpListedTool; +} + +/** Build the full demo MCP tool list exposed by the fake plugin provider. */ +export function makeDemoMcpTools() { + return [makeDemoMcpTool("ping"), makeDemoMcpTool("mutate")]; +} + +async function createDemoPluginApp(): Promise { + demoAppRoot = await fs.mkdtemp( + path.join(os.tmpdir(), "junior-respond-mcp-plugin-"), + ); + const pluginDir = path.join(demoAppRoot, "app", "plugins", "demo"); + const skillsDir = path.join(pluginDir, "skills"); + const skillDir = path.join(skillsDir, DEMO_SKILL.name); + + await fs.mkdir(skillDir, { recursive: true }); + await fs.writeFile( + path.join(demoAppRoot, "app", "SOUL.md"), + "# Test app\n", + "utf8", + ); + await fs.writeFile( + path.join(pluginDir, "plugin.yaml"), + [ + "name: demo", + "display-name: Demo", + "description: Demo plugin", + "mcp:", + " transport: http", + " url: https://mcp.example.com", + " allowed-tools:", + " - ping", + ].join("\n"), + "utf8", + ); + await fs.writeFile( + path.join(skillDir, "SKILL.md"), + [ + "---", + `name: ${DEMO_SKILL.name}`, + `description: ${DEMO_SKILL.description}`, + "---", + "", + "Skill instructions", + ].join("\n"), + "utf8", + ); + + DEMO_SKILL.skillPath = skillDir; + demoPlugin.dir = pluginDir; + demoPlugin.skillsDir = skillsDir; + process.chdir(demoAppRoot); +} + +/** Build the reply context shared by progressive MCP runtime tests. */ +export function makeReplyContext(args: { + conversationId: string; + threadTs: string; + turnId: string; +}): TestReplyRequestContext { + return { + credentialContext: { + actor: { type: "user" as const, userId: "U123" }, + }, + requester: { platform: "slack", teamId: "T123", userId: "U123" }, + destination: { + platform: "slack" as const, + teamId: "T123", + channelId: "C123", + }, + correlation: { + channelId: "C123", + conversationId: args.conversationId, + threadTs: args.threadTs, + turnId: args.turnId, + }, + }; +} + +async function executeAgentTool( + agent: { state: { tools: unknown[] } }, + name: string, + params: Record, +) { + const tool = agent.state.tools.find( + ( + candidate, + ): candidate is { + execute: (toolCallId: unknown, params: unknown) => Promise; + name: string; + } => + typeof candidate === "object" && + candidate !== null && + "name" in candidate && + candidate.name === name && + "execute" in candidate && + typeof candidate.execute === "function", + ); + if (!tool) { + throw new Error(`${name} tool missing`); + } + return await tool.execute(`tool-call-${name}`, params); +} + +function hasRuntimeTurnContext(message: unknown): boolean { + const candidate = message as { role?: unknown; content?: unknown }; + return ( + candidate.role === "user" && + Array.isArray(candidate.content) && + candidate.content.some( + (part) => + part && + typeof part === "object" && + (part as { type?: unknown }).type === "text" && + typeof (part as { text?: unknown }).text === "string" && + (part as { text: string }).text.includes(""), + ) + ); +} + +const scriptedAgentFactory = createScriptedReplyAgentFactory({ + abort(agent) { + abortedAgents.add(agent); + }, + async continue(agent) { + state.continueCallCount.value += 1; + state.resumeTurnContextCounts.push( + agent.state.messages.filter(hasRuntimeTurnContext).length, + ); + + const lastMessage = agent.state.messages.at(-1) as + | { role?: unknown } + | undefined; + if (lastMessage?.role === "assistant") { + throw new Error("Cannot continue from message role: assistant"); + } + await executeAgentTool(agent, "callMcpTool", { + tool_name: "mcp__demo__ping", + arguments: { query: "hello" }, + }); + if (abortedAgents.has(agent) && state.continueStopsOnAbort.value) { + return {}; + } + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "resumed reply" }], + stopReason: "stop", + } as PiMessage); + return {}; + }, + async prompt(agent, message) { + state.promptCallCount.value += 1; + abortedAgents.delete(agent); + agent.state.messages.push(message as PiMessage); + + let loadSkillResult: { + details?: { + mcp_provider?: string; + available_tool_count?: number; + }; + }; + try { + loadSkillResult = (await executeAgentTool(agent, "loadSkill", { + skill_name: DEMO_SKILL.name, + })) as { + details?: { + mcp_provider?: string; + available_tool_count?: number; + }; + }; + } catch (error) { + state.loadSkillExecutionErrorCount.value += 1; + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "loading demo skill" }], + } as PiMessage); + throw error; + } + + agent.state.messages.push({ + role: "toolResult", + toolCallId: "tool-call-1", + toolName: "loadSkill", + isError: false, + details: loadSkillResult.details, + content: [{ type: "text", text: "loaded" }], + } as PiMessage); + if (abortedAgents.has(agent)) { + agent.state.messages.push({ + role: "assistant", + content: [ + { + type: "text", + text: state.completeEmptyAssistantOnAbort.value + ? "" + : "loading demo skill", + }, + ], + ...(state.completeEmptyAssistantOnAbort.value + ? { stopReason: "stop" } + : {}), + } as PiMessage); + return {}; + } + + if (loadSkillResult.details?.mcp_provider) { + const searchResult = (await executeAgentTool(agent, "searchMcpTools", { + provider: loadSkillResult.details.mcp_provider, + query: "ping query", + })) as { + details?: { tools?: Array<{ tool_name: string }> }; + }; + state.searchMcpToolNames.push( + (searchResult.details?.tools ?? []).map((tool) => tool.tool_name), + ); + } + if (state.pushPreToolAssistantMessage.value) { + agent.state.messages.push({ + role: "assistant", + content: [ + { + type: "text", + text: "Let me search for related articles and compare perspectives.", + }, + ], + } as PiMessage); + } + + await executeAgentTool(agent, "callMcpTool", { + tool_name: "mcp__demo__ping", + arguments: { query: "hello" }, + }); + if (state.recordToolResultMessage.value) { + agent.state.messages.push({ + role: "toolResult", + toolName: "callMcpTool", + isError: false, + content: [{ type: "text", text: "pong" }], + } as PiMessage); + } + if (state.omitFinalAssistantAfterTool.value) { + return {}; + } + agent.state.messages.push({ + role: "assistant", + content: [{ type: "text", text: "resumed reply" }], + stopReason: "stop", + } as PiMessage); + return {}; + }, +}); + +const agentFactory: typeof scriptedAgentFactory = (options) => { + state.agentInitialToolNames.push( + options.initialState.tools.map((tool) => + typeof tool === "object" && + tool !== null && + "name" in tool && + typeof (tool as { name?: unknown }).name === "string" + ? (tool as { name: string }).name + : "", + ), + ); + return scriptedAgentFactory(options); +}; + +function mcpClientFactory( + plugin: PluginDefinition, + options: PluginMcpClientOptions, +) { + state.clientOptions.push({ ...options }); + return { + async listTools() { + return await state.listToolsMock(plugin, options); + }, + async callTool(name: string, args: Record | undefined) { + return await state.callToolMock(plugin, name, args); + }, + async close() { + return undefined; + }, + }; +} + +const { createMcpAuthOrchestration: createMcpAuthOrchestrationImpl } = + await import("@/chat/services/mcp-auth-orchestration"); +const { getConfigDefaults: getConfigDefaultsImpl } = + await import("@/chat/configuration/defaults"); +const { + deleteMcpAuthSession: deleteMcpAuthSessionImpl, + getMcpAuthSession: getMcpAuthSessionImpl, + patchMcpAuthSession: patchMcpAuthSessionImpl, + putMcpAuthSession: putMcpAuthSessionImpl, +} = await import("@/chat/mcp/auth-store"); +const { + discoverSkills: discoverSkillsImpl, + findSkillByName: findSkillByNameImpl, + parseSkillInvocation: parseSkillInvocationImpl, +} = await import("@/chat/skills"); +const { recordAuthorizationRequested: recordAuthorizationRequestedImpl } = + await import("@/chat/state/session-log"); +const { generateAssistantReply: generateAssistantReplyImpl } = + await import("@/chat/respond"); +const { isRetryableTurnError: isRetryableTurnErrorImpl } = + await import("@/chat/runtime/turn"); +const { disconnectStateAdapter: disconnectStateAdapterImpl } = + await import("@/chat/state/adapter"); +const { + getAgentTurnSessionRecord: getAgentTurnSessionRecordImpl, + upsertAgentTurnSessionRecord: upsertAgentTurnSessionRecordImpl, +} = await import("@/chat/state/turn-session"); + +const mcpAuthServices = { + createMcpOAuthClientProvider: async (input) => { + const authSessionId = `${input.provider}-auth-session`; + await putMcpAuthSessionImpl({ + authSessionId, + provider: input.provider, + userId: input.userId, + conversationId: input.conversationId, + sessionId: input.sessionId, + userMessage: input.userMessage, + ...(input.channelId ? { channelId: input.channelId } : {}), + ...(input.threadTs ? { threadTs: input.threadTs } : {}), + ...(input.toolChannelId ? { toolChannelId: input.toolChannelId } : {}), + ...(input.configuration ? { configuration: input.configuration } : {}), + ...(input.artifactState ? { artifactState: input.artifactState } : {}), + createdAtMs: DEFAULT_TEST_NOW_MS, + updatedAtMs: DEFAULT_TEST_NOW_MS, + }); + + return { + authSessionId, + redirectUrl: `https://junior.example.com/api/oauth/callback/mcp/${input.provider}`, + clientMetadata: { + client_name: "Junior MCP Client", + redirect_uris: [ + `https://junior.example.com/api/oauth/callback/mcp/${input.provider}`, + ], + grant_types: ["authorization_code", "refresh_token"], + response_types: ["code"], + token_endpoint_auth_method: "none", + }, + state: async () => `${input.provider}-auth-state`, + clientInformation: async () => undefined, + saveClientInformation: async () => undefined, + tokens: async () => undefined, + saveTokens: async () => undefined, + redirectToAuthorization: async (authorizationUrl: URL) => { + await patchMcpAuthSessionImpl(authSessionId, { + authorizationUrl: authorizationUrl.toString(), + }); + }, + saveCodeVerifier: async () => undefined, + codeVerifier: async () => "code-verifier", + }; + }, + deleteMcpAuthSession: deleteMcpAuthSessionImpl, + deliverPrivateMessage: state.deliverPrivateMessageMock, + getMcpAuthSession: getMcpAuthSessionImpl, + patchMcpAuthSession: patchMcpAuthSessionImpl, + recordAuthorizationRequested: recordAuthorizationRequestedImpl, +} satisfies NonNullable[1]>; + +type ReplyContext = NonNullable< + Parameters[1] +>; + +const respondRuntimeServices = { + createMcpAuthOrchestration: (input) => + createMcpAuthOrchestrationImpl(input, mcpAuthServices), + discoverSkills: discoverSkillsImpl, + findSkillByName: findSkillByNameImpl, + getConfigDefaults: getConfigDefaultsImpl, + getPluginMcpProviders: () => [demoPlugin], + getPluginProviders: () => [demoPlugin], + parseSkillInvocation: parseSkillInvocationImpl, +} satisfies NonNullable< + NonNullable["runtimeServices"] +>; + +/** Run respond through the explicit MCP/agent/sandbox ports used by this fixture. */ +export async function generateAssistantReply( + message: string, + context: TestReplyRequestContext = {}, +) { + const { harness, ...restContext } = context; + return await generateAssistantReplyImpl(message, { + ...makeTestReplyContext(restContext), + recordPendingAuth: restContext.recordPendingAuth ?? (async () => {}), + harness: { + agentFactory, + mcpClientFactory, + runtimeServices: respondRuntimeServices, + sandboxExecutorFactory: + createScriptedSandboxExecutorFactory(sandboxState), + turnThinkingSelection, + ...harness, + }, + }); +} + +export const getAgentTurnSessionRecord = getAgentTurnSessionRecordImpl; +export const isRetryableTurnError = isRetryableTurnErrorImpl; +export const upsertAgentTurnSessionRecord = upsertAgentTurnSessionRecordImpl; +export { McpAuthorizationRequiredError }; + +/** Reset MCP/respond runtime state before each progressive-loading test. */ +export async function setupRespondMcpProgressiveLoadingTest(): Promise { + if (demoAppRoot) { + await fs.rm(demoAppRoot, { recursive: true, force: true }); + demoAppRoot = undefined; + } + process.chdir(originalCwd); + await createDemoPluginApp(); + + state.agentInitialToolNames.length = 0; + state.callToolMock.mockReset(); + state.clientOptions.length = 0; + state.completeEmptyAssistantOnAbort.value = false; + state.continueCallCount.value = 0; + state.continueStopsOnAbort.value = false; + state.deliverPrivateMessageMock.mockReset(); + state.listToolsMock.mockReset(); + state.searchMcpToolNames.length = 0; + state.loadSkillExecutionErrorCount.value = 0; + state.omitFinalAssistantAfterTool.value = false; + state.promptCallCount.value = 0; + state.pushPreToolAssistantMessage.value = false; + state.recordToolResultMessage.value = false; + state.resumeTurnContextCounts.length = 0; + abortedAgents = new WeakSet(); + + process.env.JUNIOR_BASE_URL = "https://junior.example.com"; + + state.deliverPrivateMessageMock.mockResolvedValue("in_context"); + state.callToolMock.mockResolvedValue({ + content: [{ type: "text", text: "pong" }], + isError: false, + }); + state.listToolsMock + .mockImplementationOnce(async (plugin, options) => { + await options.authProvider?.redirectToAuthorization?.( + new URL(`https://auth.example.com/${plugin.manifest.name}`), + ); + throw new McpAuthorizationRequiredError( + plugin.manifest.name, + "Auth required", + ); + }) + .mockResolvedValue(makeDemoMcpTools()); + + await disconnectStateAdapterImpl(); +} + +/** Restore memory state and process globals after progressive-loading tests. */ +export async function cleanupRespondMcpProgressiveLoadingTest(): Promise { + await disconnectStateAdapterImpl(); + delete process.env.JUNIOR_BASE_URL; + process.chdir(originalCwd); + if (demoAppRoot) { + await fs.rm(demoAppRoot, { recursive: true, force: true }); + demoAppRoot = undefined; + } + vi.restoreAllMocks(); +} + +/** Restore import-time env values captured for the progressive MCP respond fixture. */ +export function restoreRespondMcpProgressiveLoadingEnv(): void { + restoreRespondRuntimeEnv(originalEnv); +} + +export type { PiMessage }; diff --git a/packages/junior/tests/fixtures/respond/sandbox.ts b/packages/junior/tests/fixtures/respond/sandbox.ts new file mode 100644 index 000000000..0ae1fedaa --- /dev/null +++ b/packages/junior/tests/fixtures/respond/sandbox.ts @@ -0,0 +1,132 @@ +import { Buffer } from "node:buffer"; +import type { + SandboxExecutionEnvelope, + SandboxExecutorFactory, +} from "@/chat/sandbox/sandbox"; +import type { SandboxInstance } from "@/chat/sandbox/workspace"; +import type { SkillMetadata } from "@/chat/skills"; + +export interface ScriptedSandboxExecutorState { + activeSandboxVersion: number; + configuredReferenceFiles: string[]; + configuredSkills: SkillMetadata[]; + createSandboxCalls: number; + executedTools: string[]; +} + +export interface ScriptedSandboxExecutorOptions { + canExecute?: (toolName: string) => boolean; +} + +/** Create mutable state for a scripted sandbox executor fixture. */ +export function createScriptedSandboxExecutorState(): ScriptedSandboxExecutorState { + return { + activeSandboxVersion: 1, + configuredReferenceFiles: [], + configuredSkills: [], + createSandboxCalls: 0, + executedTools: [], + }; +} + +function sandboxIdFor(version: number): string { + return version === 1 ? "sandbox-test" : `sandbox-test-${version}`; +} + +function createSandboxInstance(sandboxId: string): SandboxInstance { + return { + sandboxId, + sandboxEgressId: `${sandboxId}-session`, + fs: { + readFile: async () => "fixture", + writeFile: async () => undefined, + readdir: async () => [], + stat: async () => ({ + isDirectory: () => false, + }), + }, + extendTimeout: async () => undefined, + mkDir: async () => undefined, + readFileToBuffer: async () => Buffer.from("report contents", "utf8"), + runCommand: async () => ({ + exitCode: 0, + stdout: async () => "text/plain\n", + stderr: async () => "", + }), + snapshot: async () => ({ snapshotId: "snapshot-test" }), + stop: async () => undefined, + update: async () => undefined, + writeFiles: async () => undefined, + }; +} + +/** Create a sandbox executor factory with explicit, inspectable runtime state. */ +export function createScriptedSandboxExecutorFactory( + state: ScriptedSandboxExecutorState, + options: ScriptedSandboxExecutorOptions = {}, +): SandboxExecutorFactory { + return (factoryOptions = {}) => { + let currentSandboxId: string | undefined; + let currentDependencyProfileHash: string | undefined; + + const acquireSandbox = async (): Promise => { + state.createSandboxCalls += 1; + currentSandboxId = sandboxIdFor(state.activeSandboxVersion); + currentDependencyProfileHash = "hash-test"; + await factoryOptions.onSandboxAcquired?.({ + sandboxId: currentSandboxId, + sandboxDependencyProfileHash: currentDependencyProfileHash, + }); + return createSandboxInstance(currentSandboxId); + }; + + return { + configureSkills(skills) { + state.configuredSkills = [...skills]; + }, + configureReferenceFiles(files) { + state.configuredReferenceFiles = [...files]; + }, + getSandboxId() { + return currentSandboxId; + }, + getDependencyProfileHash() { + return currentDependencyProfileHash; + }, + canExecute(toolName) { + return options.canExecute?.(toolName) ?? false; + }, + async createSandbox() { + return await acquireSandbox(); + }, + async execute(params: { + input: unknown; + signal?: AbortSignal; + toolName: string; + }): Promise> { + const { input, toolName } = params; + if (!options.canExecute?.(toolName)) { + throw new Error(`sandbox executor cannot execute ${toolName}`); + } + state.executedTools.push(toolName); + await acquireSandbox(); + const rawInput = (input ?? {}) as { command?: unknown }; + return { + result: { + ok: true, + command: String(rawInput.command ?? ""), + cwd: "/workspace", + exit_code: 0, + signal: null, + timed_out: false, + stdout: "/workspace\n", + stderr: "", + stdout_truncated: false, + stderr_truncated: false, + } as T, + } satisfies SandboxExecutionEnvelope; + }, + dispose: async () => undefined, + }; + }; +} diff --git a/packages/junior/tests/fixtures/runtime-dependency-snapshots.ts b/packages/junior/tests/fixtures/runtime-dependency-snapshots.ts new file mode 100644 index 000000000..41843a089 --- /dev/null +++ b/packages/junior/tests/fixtures/runtime-dependency-snapshots.ts @@ -0,0 +1,179 @@ +import { vi } from "vitest"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; +import type { + PluginRuntimeDependency, + PluginRuntimePostinstallCommand, +} from "@/chat/plugins/types"; +import { resolveRuntimeDependencySnapshot as resolveRuntimeDependencySnapshotImpl } from "@/chat/sandbox/runtime-dependency-snapshots"; +import { disconnectStateAdapter, getStateAdapter } from "@/chat/state/adapter"; +import { mockTestClock, stubTestEnv } from "./vitest"; + +const SNAPSHOT_CACHE_PREFIX = "junior:sandbox_snapshot_profile"; +const SNAPSHOT_LOCK_PREFIX = "junior:sandbox_snapshot_lock"; +const SNAPSHOT_BUILD_LOCK_TTL_MS = 10 * 60 * 1000; + +export const sandboxCreateMock = vi.fn(); + +let heldSnapshotLock: Awaited< + ReturnType["acquireLock"]> +> | null = null; + +/** Configure the real plugin registry with one runtime-dependency test plugin. */ +export function configureRuntimeDependencyPlugin(args: { + dependencies?: PluginRuntimeDependency[]; + postinstall?: PluginRuntimePostinstallCommand[]; +}): void { + const dependencies = args.dependencies ?? []; + const postinstall = args.postinstall ?? []; + setPluginCatalogConfig({ + inlineManifests: + dependencies.length > 0 || postinstall.length > 0 + ? [ + { + manifest: { + name: "runtime-deps", + displayName: "Runtime Deps", + description: "Runtime dependency test plugin", + capabilities: [], + configKeys: [], + ...(dependencies.length > 0 + ? { runtimeDependencies: dependencies } + : {}), + ...(postinstall.length > 0 + ? { runtimePostinstall: postinstall } + : {}), + }, + }, + ] + : [], + }); +} + +export async function resolveRuntimeDependencySnapshot( + params: Parameters[0], +) { + return await resolveRuntimeDependencySnapshotImpl(params, { + createSandbox: sandboxCreateMock as never, + }); +} + +/** Builds a fake Vercel sandbox for runtime dependency snapshot tests. */ +export function makeRuntimeDependencySandbox( + snapshotId: string, + runCommandImpl?: (params: { + cmd: string; + args?: string[]; + sudo?: boolean; + }) => Promise<{ + exitCode: number; + stdout: () => Promise; + stderr: () => Promise; + }>, +) { + return { + name: `sbx_${snapshotId}`, + currentSession: vi.fn(() => ({ sessionId: `sbx_${snapshotId}_session` })), + runCommand: vi.fn( + runCommandImpl ?? + (async () => ({ + exitCode: 0, + stdout: async () => "", + stderr: async () => "", + })), + ), + snapshot: vi.fn(async () => ({ snapshotId })), + stop: vi.fn(async () => {}), + }; +} + +/** Extracts the generated shell script from a sandbox command invocation. */ +export function getRuntimeDependencyScript(params: { + cmd: string; + args?: string[]; + sudo?: boolean; +}): string { + return params.args?.[1] ?? ""; +} + +/** Resets runtime dependency snapshot mocks and environment before each test. */ +export async function setupRuntimeDependencySnapshotTest() { + vi.unstubAllEnvs(); + stubTestEnv({ JUNIOR_STATE_ADAPTER: "memory" }); + await releaseRuntimeSnapshotLock(); + await disconnectStateAdapter(); + sandboxCreateMock.mockReset(); + setPluginCatalogConfig(undefined); + delete process.env.SANDBOX_SNAPSHOT_REBUILD_EPOCH; + delete process.env.SANDBOX_SNAPSHOT_FLOATING_MAX_AGE_MS; + delete process.env.VERCEL_TOKEN; + delete process.env.VERCEL_TEAM_ID; + delete process.env.VERCEL_PROJECT_ID; + mockTestClock("2026-03-01T00:00:00.000Z"); +} + +/** Restores timer, registry, and state after runtime dependency snapshot tests. */ +export async function cleanupRuntimeDependencySnapshotTest() { + await releaseRuntimeSnapshotLock(); + setPluginCatalogConfig(undefined); + await disconnectStateAdapter(); + vi.useRealTimers(); + vi.unstubAllEnvs(); +} + +function snapshotCacheKey(profileHash: string): string { + return `${SNAPSHOT_CACHE_PREFIX}:${profileHash}`; +} + +function snapshotLockKey(profileHash: string): string { + return `${SNAPSHOT_LOCK_PREFIX}:${profileHash}`; +} + +/** Returns the raw runtime snapshot cache entry for one profile. */ +export async function getRuntimeSnapshotCacheEntry( + profileHash: string, +): Promise { + const state = getStateAdapter(); + await state.connect(); + const raw = await state.get(snapshotCacheKey(profileHash)); + return typeof raw === "string" ? raw : undefined; +} + +/** Writes a raw runtime snapshot cache entry for lock-wait scenarios. */ +export async function setRuntimeSnapshotCacheEntry( + profileHash: string, + value: string, +): Promise { + const state = getStateAdapter(); + await state.connect(); + await state.set( + snapshotCacheKey(profileHash), + value, + 30 * 24 * 60 * 60 * 1000, + ); +} + +/** Holds the snapshot build lock until `releaseRuntimeSnapshotLock` is called. */ +export async function holdRuntimeSnapshotLock( + profileHash: string, +): Promise { + const state = getStateAdapter(); + await state.connect(); + heldSnapshotLock = await state.acquireLock( + snapshotLockKey(profileHash), + SNAPSHOT_BUILD_LOCK_TTL_MS, + ); + if (!heldSnapshotLock) { + throw new Error("Expected to acquire runtime snapshot lock"); + } +} + +/** Releases a lock held by `holdRuntimeSnapshotLock`, if present. */ +export async function releaseRuntimeSnapshotLock(): Promise { + if (!heldSnapshotLock) { + return; + } + const state = getStateAdapter(); + await state.connect(); + await state.releaseLock(heldSnapshotLock); + heldSnapshotLock = null; +} diff --git a/packages/junior/tests/fixtures/sandbox/egress-proxy.ts b/packages/junior/tests/fixtures/sandbox/egress-proxy.ts new file mode 100644 index 000000000..68c6a3546 --- /dev/null +++ b/packages/junior/tests/fixtures/sandbox/egress-proxy.ts @@ -0,0 +1,333 @@ +import { vi } from "vitest"; + +const mocks = vi.hoisted(() => ({ + createRemoteJWKSetMock: vi.fn(() => async () => null), + decodeJwtMock: vi.fn(), + issueProviderCredentialLeaseMock: vi.fn(), + jwtVerifyMock: vi.fn(), +})); + +export const createRemoteJWKSetMock = mocks.createRemoteJWKSetMock; +export const decodeJwtMock = mocks.decodeJwtMock; +export const issueProviderCredentialLeaseMock = + mocks.issueProviderCredentialLeaseMock; +export const jwtVerifyMock = mocks.jwtVerifyMock; + +vi.mock("jose", () => ({ + createRemoteJWKSet: mocks.createRemoteJWKSetMock, + decodeJwt: mocks.decodeJwtMock, + jwtVerify: mocks.jwtVerifyMock, +})); + +import { + buildSandboxEgressNetworkPolicy as buildSandboxEgressNetworkPolicyImpl, + matchesSandboxEgressDomain as matchesSandboxEgressDomainImpl, + resolveSandboxCommandEnvironment as resolveSandboxCommandEnvironmentImpl, +} from "@/chat/sandbox/egress-policy"; +import { verifyVercelSandboxOidcToken as verifyVercelSandboxOidcTokenImpl } from "@/chat/sandbox/egress-oidc"; +import { + isSandboxEgressForwardedRequest as isSandboxEgressForwardedRequestImpl, + proxySandboxEgressRequest as proxySandboxEgressRequestImpl, +} from "@/chat/sandbox/egress-proxy"; +import { + createSandboxEgressCredentialToken as createSandboxEgressCredentialTokenImpl, + SANDBOX_EGRESS_PROXY_PATH as SANDBOX_EGRESS_PROXY_PATH_IMPL, +} from "@/chat/sandbox/egress-session"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; +import type { PluginManifest } from "@/chat/plugins/types"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { CredentialUnavailableError as CredentialUnavailableErrorImpl } from "@/chat/credentials/broker"; +import type { CredentialSubject } from "@/chat/credentials/context"; +import { ALL as sandboxEgressHandler } from "@/handlers/sandbox-egress-proxy"; +import { DEFAULT_TEST_EXPIRES_AT_ISO } from "../vitest"; + +export const CredentialUnavailableError = CredentialUnavailableErrorImpl; +export const SANDBOX_EGRESS_PROXY_PATH = SANDBOX_EGRESS_PROXY_PATH_IMPL; + +/** Call the route handler with mocks already registered. */ +export function ALL(request: Request): ReturnType { + return sandboxEgressHandler(request); +} + +/** Build a sandbox egress network policy through the real plugin registry. */ +export function buildSandboxEgressNetworkPolicy( + input?: Parameters[0], +): ReturnType { + return buildSandboxEgressNetworkPolicyImpl(input); +} + +/** Check domain matching through the real egress policy implementation. */ +export function matchesSandboxEgressDomain( + ...args: Parameters +): ReturnType { + return matchesSandboxEgressDomainImpl(...args); +} + +/** Resolve command environment through the real policy implementation. */ +export function resolveSandboxCommandEnvironment(): ReturnType< + typeof resolveSandboxCommandEnvironmentImpl +> { + return resolveSandboxCommandEnvironmentImpl(); +} + +/** Verify a sandbox OIDC token with mocked jose and discovery fetches. */ +export function verifyVercelSandboxOidcToken( + ...args: Parameters +): ReturnType { + return verifyVercelSandboxOidcTokenImpl(...args); +} + +/** Detect forwarded sandbox egress requests through the real proxy helper. */ +export function isSandboxEgressForwardedRequest( + ...args: Parameters +): ReturnType { + return isSandboxEgressForwardedRequestImpl(...args); +} + +/** Proxy a request through the real egress implementation. */ +export function proxySandboxEgressRequest( + request: Parameters[0], + deps: Parameters[1] = {}, +): ReturnType { + return proxySandboxEgressRequestImpl(request, { + ...deps, + issueProviderCredentialLease: issueProviderCredentialLeaseMock, + }); +} + +/** Create a signed egress credential token with the test secret. */ +export function createSandboxEgressCredentialToken( + ...args: Parameters +): ReturnType { + return createSandboxEgressCredentialTokenImpl(...args); +} + +export const EGRESS_ID = "junior-sbx"; +export const REQUESTER_ID = "U123"; + +let activeCredentialToken: string | undefined; + +/** Configure sandbox egress plugin manifests through the real catalog. */ +export function configureSandboxEgressPlugins( + plugins: Array<{ manifest: PluginManifest }>, +): void { + setPluginCatalogConfig({ + inlineManifests: plugins.map(({ manifest }) => ({ manifest })), + }); +} + +/** Reset mocked proxy dependencies and memory state before each egress test. */ +export async function setupSandboxEgressProxyTest(): Promise { + process.env.JUNIOR_STATE_ADAPTER = "memory"; + process.env.JUNIOR_BASE_URL = "https://junior.example.com"; + process.env.JUNIOR_SECRET = "test-secret"; + activeCredentialToken = undefined; + configureSandboxEgressPlugins([sentryPlugin()]); + createRemoteJWKSetMock.mockClear(); + createRemoteJWKSetMock.mockReturnValue(async () => null); + decodeJwtMock.mockReset(); + issueProviderCredentialLeaseMock.mockReset(); + jwtVerifyMock.mockReset(); + await disconnectStateAdapter(); +} + +/** Restore process globals and memory state after each egress test. */ +export async function cleanupSandboxEgressProxyTest(): Promise { + await disconnectStateAdapter(); + setPluginCatalogConfig(undefined); + delete process.env.JUNIOR_STATE_ADAPTER; + delete process.env.JUNIOR_BASE_URL; + delete process.env.JUNIOR_SECRET; + delete process.env.SENTRY_BOT_EMAIL; + vi.restoreAllMocks(); +} + +/** Build the Sentry plugin fixture used by egress policy and forwarding tests. */ +export function sentryPlugin(): { manifest: PluginManifest } { + return { + manifest: { + name: "sentry", + displayName: "Sentry", + description: "Sentry", + capabilities: ["sentry.api"], + configKeys: [], + envVars: { + SENTRY_BOT_EMAIL: { exposeToCommandEnv: true }, + }, + commandEnv: { + SENTRY_AUTHOR_EMAIL: "${SENTRY_BOT_EMAIL}", + SENTRY_READ_ONLY: "1", + }, + credentials: { + type: "oauth-bearer", + domains: ["sentry.io", "us.sentry.io"], + authTokenEnv: "SENTRY_AUTH_TOKEN", + authTokenPlaceholder: "host_managed_credential", + }, + }, + }; +} + +/** Build the GitHub plugin fixture used by delegated credential tests. */ +export function githubPlugin(): { manifest: PluginManifest } { + return { + manifest: { + name: "github", + displayName: "GitHub", + description: "GitHub", + capabilities: ["github.api"], + configKeys: [], + envVars: {}, + commandEnv: { + GITHUB_READ_ONLY: "1", + }, + credentials: { + type: "oauth-bearer", + domains: ["api.github.com", "github.com"], + authTokenEnv: "GITHUB_TOKEN", + authTokenPlaceholder: "host_managed_credential", + }, + }, + }; +} + +/** Build a provider with forwarding domains but no token placeholder. */ +export function headerOnlyPlugin() { + return { + manifest: { + name: "header-only", + displayName: "Header Only", + description: "Header-only", + capabilities: ["header-only.api"], + configKeys: [], + envVars: {}, + commandEnv: { + HEADER_ONLY_READ_ONLY: "1", + }, + domains: ["api.example.com"], + }, + }; +} + +/** Sign the active proxy URL credential as a user actor. */ +export function setSandboxEgressUserActor(userId = REQUESTER_ID): void { + activeCredentialToken = createSandboxEgressCredentialToken({ + credentials: { actor: { type: "user", userId } }, + egressId: EGRESS_ID, + ttlMs: 60_000, + }); +} + +/** Sign the active proxy URL credential as a system actor. */ +export function setSandboxEgressSystemActor(input?: { + subject?: CredentialSubject; +}): void { + activeCredentialToken = createSandboxEgressCredentialToken({ + credentials: { + actor: { type: "system", id: "scheduler" }, + ...(input?.subject ? { subject: input.subject } : {}), + }, + egressId: EGRESS_ID, + ttlMs: 60_000, + }); +} + +/** Replace the active credential token for negative proxy-context tests. */ +export function setActiveSandboxEgressCredentialToken( + token: string | undefined, +): void { + activeCredentialToken = token; +} + +/** Return the currently active signed credential token for request assertions. */ +export function activeSandboxEgressCredentialToken(): string | undefined { + return activeCredentialToken; +} + +/** Mock a Sentry provider lease with a host-specific header transform. */ +export function mockSentryLease( + domain = "sentry.io", + token = "sentry-token", +): void { + issueProviderCredentialLeaseMock.mockResolvedValue({ + id: "lease-1", + provider: "sentry", + env: { SENTRY_AUTH_TOKEN: "host_managed_credential" }, + headerTransforms: [ + { + domain, + headers: { Authorization: `Bearer ${token}` }, + }, + ], + expiresAt: DEFAULT_TEST_EXPIRES_AT_ISO, + }); +} + +/** Mock a GitHub provider lease with its bearer header transform. */ +export function mockGitHubLease(token = "github-token"): void { + issueProviderCredentialLeaseMock.mockResolvedValue({ + id: "lease-github", + provider: "github", + env: { GITHUB_TOKEN: "ghp_host_managed_credential" }, + headerTransforms: [ + { + domain: "api.github.com", + headers: { Authorization: `Bearer ${token}` }, + }, + ], + expiresAt: DEFAULT_TEST_EXPIRES_AT_ISO, + }); +} + +/** Build a forwarded request shaped like Vercel Sandbox egress traffic. */ +export function egressRequest( + input: { + host?: string; + method?: string; + path?: string; + proxyPath?: string; + forwardedPath?: string | null; + scheme?: string | null; + port?: string; + body?: BodyInit; + headers?: Record; + } = {}, +): Request { + const upstreamPath = input.path ?? "/api/0/issues/"; + const proxyPath = + input.proxyPath ?? + (activeCredentialToken + ? `${SANDBOX_EGRESS_PROXY_PATH}/${activeCredentialToken}` + : upstreamPath); + const forwardedPath = + input.forwardedPath === undefined ? upstreamPath : input.forwardedPath; + return new Request(`https://junior.example.com${proxyPath}`, { + method: input.method ?? "GET", + headers: { + "vercel-forwarded-host": input.host ?? "sentry.io", + ...(input.scheme === null + ? {} + : { "vercel-forwarded-scheme": input.scheme ?? "https" }), + "vercel-sandbox-oidc-token": "signed-token", + ...(forwardedPath !== null + ? { "vercel-forwarded-path": forwardedPath } + : {}), + ...(input.port ? { "vercel-forwarded-port": input.port } : {}), + ...(input.headers ?? {}), + }, + ...(input.body === undefined ? {} : { body: input.body }), + }); +} + +/** Proxy a sandbox egress request with a successful sandbox OIDC verifier. */ +export function proxy( + request: Request, + fetchMock: typeof fetch = vi.fn( + async () => new Response("ok"), + ) as typeof fetch, +): Promise { + return proxySandboxEgressRequest(request, { + fetch: fetchMock, + verifyOidc: async () => ({ sandbox_id: EGRESS_ID }), + }); +} diff --git a/packages/junior/tests/fixtures/sandbox/executor.ts b/packages/junior/tests/fixtures/sandbox/executor.ts new file mode 100644 index 000000000..ae450bf4c --- /dev/null +++ b/packages/junior/tests/fixtures/sandbox/executor.ts @@ -0,0 +1,295 @@ +import { expect, vi } from "vitest"; +import type { SandboxInstance } from "@/chat/sandbox/workspace"; + +import { + createSandboxEgressCredentialToken, + parseSandboxEgressCredentialToken as parseSandboxEgressCredentialTokenImpl, + SANDBOX_EGRESS_PROXY_PATH, +} from "@/chat/sandbox/egress-session"; +import { createSandboxExecutor as createSandboxExecutorImpl } from "@/chat/sandbox/sandbox"; +import { createSandboxSessionManager as createSandboxSessionManagerImpl } from "@/chat/sandbox/session"; +import { disconnectStateAdapter as disconnectStateAdapterImpl } from "@/chat/state/adapter"; + +export const sandboxGetMock = vi.fn(); +export const sandboxCreateMock = vi.fn(); +export const createBashTool = vi.fn(); +export const resolveRuntimeDependencySnapshotMock = vi.fn< + (...args: any[]) => Promise<{ + snapshotId?: string; + profileHash?: string; + dependencyCount: number; + cacheHit: boolean; + resolveOutcome: string; + rebuildReason?: string; + }> +>(async () => ({ + dependencyCount: 0, + cacheHit: false, + resolveOutcome: "no_profile", +})); +export const isSnapshotMissingErrorMock = vi.fn<(error: unknown) => boolean>( + () => false, +); +export const getRuntimeDependencyProfileHashMock = vi.fn< + (runtime: string) => string | undefined +>(() => undefined); + +function sandboxSessionServices() { + return { + createBashTool: createBashTool as never, + createSandbox: sandboxCreateMock as never, + getRuntimeDependencyProfileHash: getRuntimeDependencyProfileHashMock, + getSandbox: sandboxGetMock as never, + isSnapshotMissingError: isSnapshotMissingErrorMock, + resolveRuntimeDependencySnapshot: + resolveRuntimeDependencySnapshotMock as never, + }; +} + +function buildSandboxEgressNetworkPolicy(input?: { credentialToken?: string }) { + const path = input?.credentialToken + ? `${SANDBOX_EGRESS_PROXY_PATH}/${input.credentialToken}` + : SANDBOX_EGRESS_PROXY_PATH; + return { + allow: { + "*": [], + "sentry.io": [ + { + forwardURL: new URL( + path, + process.env.JUNIOR_BASE_URL ?? "https://junior.example.com", + ).toString(), + }, + ], + }, + }; +} + +async function resolveSandboxCommandEnvironment() { + return { + SENTRY_AUTH_TOKEN: "host_managed_credential", + SENTRY_READ_ONLY: "1", + }; +} + +export function createSandboxSessionManager( + options?: Parameters[0], +) { + return createSandboxSessionManagerImpl(options, sandboxSessionServices()); +} + +export function createSandboxExecutor( + options?: Parameters[0], +) { + return createSandboxExecutorImpl(options, { + buildSandboxEgressNetworkPolicy, + createSandboxEgressCredentialToken, + createSandboxSessionManager, + resolveSandboxCommandEnvironment, + }); +} + +export const disconnectStateAdapter = disconnectStateAdapterImpl; +export const parseSandboxEgressCredentialToken = + parseSandboxEgressCredentialTokenImpl; + +/** Build the default bash-tool facade used by sandbox executor component tests. */ +export function makeBashToolFacade( + options: { + readFile?: (input: unknown) => Promise<{ content: string }>; + writeFile?: (input: unknown) => Promise<{ success: boolean }>; + } = {}, +) { + return { + tools: { + readFile: { + execute: options.readFile ?? vi.fn(async () => ({ content: "" })), + }, + writeFile: { + execute: options.writeFile ?? vi.fn(async () => ({ success: true })), + }, + }, + }; +} + +/** Reset sandbox executor mocks and process env before each test. */ +export function setupSandboxExecutorTest(): void { + sandboxGetMock.mockReset(); + sandboxCreateMock.mockReset(); + createBashTool.mockReset(); + createBashTool.mockResolvedValue(makeBashToolFacade() as never); + resolveRuntimeDependencySnapshotMock.mockReset(); + resolveRuntimeDependencySnapshotMock.mockResolvedValue({ + dependencyCount: 0, + cacheHit: false, + resolveOutcome: "no_profile", + }); + isSnapshotMissingErrorMock.mockReset(); + isSnapshotMissingErrorMock.mockReturnValue(false); + getRuntimeDependencyProfileHashMock.mockReset(); + getRuntimeDependencyProfileHashMock.mockReturnValue(undefined); + delete process.env.VERCEL_TOKEN; + delete process.env.VERCEL_TEAM_ID; + delete process.env.VERCEL_PROJECT_ID; + delete process.env.VERCEL_OIDC_TOKEN; + delete process.env.VERCEL_SANDBOX_KEEPALIVE_MS; + process.env.JUNIOR_BASE_URL = "https://junior.example.com"; + process.env.JUNIOR_SECRET = "test-secret"; +} + +/** Restore sandbox executor test globals and memory state after each test. */ +export async function cleanupSandboxExecutorTest(): Promise { + vi.useRealTimers(); + await disconnectStateAdapterImpl(); + delete process.env.JUNIOR_BASE_URL; + delete process.env.JUNIOR_SECRET; +} + +export interface MockSandbox { + name: string; + currentSession: ReturnType; + fs: { + readFile: ReturnType; + writeFile: ReturnType; + readdir: ReturnType; + stat: ReturnType; + }; + mkDir: ReturnType; + writeFiles: ReturnType; + readFileToBuffer: ReturnType; + runCommand: ReturnType; + stop: ReturnType; + extendTimeout: ReturnType; + snapshot: ReturnType; + update: ReturnType; +} + +/** Build a Vercel Sandbox-shaped fake with overridable setup failures. */ +export function makeSandbox( + name: string, + options: { + mkDirError?: unknown; + writeFilesError?: unknown; + } = {}, +): MockSandbox { + return { + name, + currentSession: vi.fn(() => ({ sessionId: `${name}_session` })), + fs: { + readFile: vi.fn(async () => ""), + writeFile: vi.fn(async () => {}), + readdir: vi.fn(async () => []), + stat: vi.fn(async () => ({ isDirectory: () => false })), + }, + mkDir: vi.fn(async () => { + if (options.mkDirError) { + throw options.mkDirError; + } + }), + writeFiles: vi.fn(async () => { + if (options.writeFilesError) { + throw options.writeFilesError; + } + }), + readFileToBuffer: vi.fn(async () => Buffer.from("")), + runCommand: vi.fn(async () => ({ + exitCode: 0, + stdout: async () => "", + stderr: async () => "", + })), + stop: vi.fn(async () => {}), + extendTimeout: vi.fn(async () => {}), + snapshot: vi.fn(async () => ({ snapshotId: "snap_test" })), + update: vi.fn(async () => {}), + }; +} + +/** Extract the Sentry forward URL from a sandbox network policy fixture. */ +export function sentryForwardURLFromPolicy( + policy: unknown, +): string | undefined { + const allow = ( + policy as { allow?: Record> } + ).allow; + return allow?.["sentry.io"]?.[0]?.forwardURL; +} + +/** Extract the egress credential token from a sandbox proxy forward URL. */ +export function credentialTokenFromForwardURL( + forwardURL: string | undefined, +): string | undefined { + if (!forwardURL) { + return undefined; + } + const pathname = new URL(forwardURL).pathname; + const prefix = `${SANDBOX_EGRESS_PROXY_PATH}/`; + return pathname.startsWith(prefix) + ? pathname.slice(prefix.length) + : undefined; +} + +/** Build a Vercel-style API error object for sandbox setup tests. */ +export function createApiError( + status: number, + statusText: string, + code: string, + message: string, +): Error { + return Object.assign(new Error(`Status code ${status} is not ok`), { + response: { + status, + statusText, + url: "https://vercel.com/api/v1/sandboxes/sbx_test/fs/mkdir", + headers: { + get: (_name: string) => null, + }, + }, + json: { + error: { + code, + message, + }, + }, + sandboxId: "sbx_test", + }); +} + +/** Build an error shaped like an interrupted sandbox command stream. */ +export function createStreamInterruptedError(): Error { + return Object.assign(new Error("Stream ended before command finished"), { + name: "StreamError", + }); +} + +/** Assert that a SandboxInstance delegates file and command calls to the fake. */ +export async function expectWorkspaceToDelegate( + workspace: SandboxInstance, + sandbox: MockSandbox, +): Promise { + expect(workspace.sandboxId).toBe(sandbox.name); + expect(workspace.sandboxEgressId).toBe(`${sandbox.name}_session`); + const fileBuffer = Buffer.from("workspace file"); + const commandResult = { + exitCode: 0, + stdout: async () => "stdout", + stderr: async () => "stderr", + }; + + sandbox.readFileToBuffer.mockResolvedValueOnce(fileBuffer); + await expect( + workspace.readFileToBuffer({ path: "/tmp/workspace.txt" }), + ).resolves.toBe(fileBuffer); + expect(sandbox.readFileToBuffer).toHaveBeenCalledWith({ + path: "/tmp/workspace.txt", + }); + + sandbox.runCommand.mockResolvedValueOnce(commandResult); + await expect( + workspace.runCommand({ cmd: "pwd", args: ["-P"], cwd: "/tmp" }), + ).resolves.toBe(commandResult); + expect(sandbox.runCommand).toHaveBeenCalledWith({ + cmd: "pwd", + args: ["-P"], + cwd: "/tmp", + }); +} diff --git a/packages/junior/tests/fixtures/slack-api-outbox.ts b/packages/junior/tests/fixtures/slack/api-outbox.ts similarity index 96% rename from packages/junior/tests/fixtures/slack-api-outbox.ts rename to packages/junior/tests/fixtures/slack/api-outbox.ts index a50e84380..009155541 100644 --- a/packages/junior/tests/fixtures/slack-api-outbox.ts +++ b/packages/junior/tests/fixtures/slack/api-outbox.ts @@ -4,7 +4,7 @@ import { type CapturedSlackApiCall, type CapturedSlackFileUploadCall, type SlackApiMethod, -} from "../msw/handlers/slack-api"; +} from "../../msw/handlers/slack-api"; /** Read-only outbox for Slack MSW calls captured during a test. */ export class SlackApiOutbox { diff --git a/packages/junior/tests/fixtures/slack/behavior.ts b/packages/junior/tests/fixtures/slack/behavior.ts new file mode 100644 index 000000000..46b6721cb --- /dev/null +++ b/packages/junior/tests/fixtures/slack/behavior.ts @@ -0,0 +1,62 @@ +import type { JuniorRuntimeScenarioAdapters } from "@/chat/app/services"; +import { createTestChatRuntime } from "../chat-runtime"; +import type { FakeSlackAdapter } from "./harness"; + +const emptyThreadReplies = async () => []; + +/** Create a Slack runtime harness with deterministic empty thread hydration. */ +export function createSlackBehaviorRuntime( + args: { + adapters?: JuniorRuntimeScenarioAdapters; + slackAdapter?: FakeSlackAdapter; + } = {}, +) { + const adapters = args.adapters ?? {}; + return createTestChatRuntime({ + adapters: { + listThreadReplies: emptyThreadReplies, + ...adapters, + }, + slackAdapter: args.slackAdapter, + }); +} + +/** Extract user-visible text from a fake Slack post value. */ +export function postedText(value: unknown): string { + if (typeof value === "string") { + return value; + } + + if (value && typeof value === "object") { + const markdown = (value as { markdown?: unknown }).markdown; + if (typeof markdown === "string") { + return markdown; + } + const raw = (value as { raw?: unknown }).raw; + if (typeof raw === "string") { + return raw; + } + } + + return String(value); +} + +/** Read persisted conversation messages from a fake Slack thread state. */ +export function conversationMessages(thread: { + getState: () => Record; +}): Array<{ id?: string; text?: string }> { + const state = thread.getState() as { + conversation?: { + messages?: Array<{ id?: string; text?: string }>; + }; + }; + return state.conversation?.messages ?? []; +} + +/** Check whether any fake Slack post contains the expected visible text. */ +export function threadHasPostText( + thread: { posts: unknown[] }, + text: string, +): boolean { + return thread.posts.some((post) => postedText(post).includes(text)); +} diff --git a/packages/junior/tests/fixtures/slack/eval-artifacts.ts b/packages/junior/tests/fixtures/slack/eval-artifacts.ts new file mode 100644 index 000000000..44a2a5dd9 --- /dev/null +++ b/packages/junior/tests/fixtures/slack/eval-artifacts.ts @@ -0,0 +1,173 @@ +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +export interface EvalSlackApiCall { + method: string; + params: Record; +} + +export interface EvalSlackCanvasArtifact { + markdown: string; + title: string; +} + +export interface EvalSlackChannelPost { + channel: string; + text: string; + thread_ts?: string; +} + +export interface EvalSlackReaction { + channel: string; + emoji: string; + timestamp: string; +} + +export interface EvalSlackArtifacts { + canvases: EvalSlackCanvasArtifact[]; + channelPosts: EvalSlackChannelPost[]; + reactions: EvalSlackReaction[]; +} + +function toFirstString(value: unknown): string | undefined { + if (typeof value === "string") { + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; + } + if (Array.isArray(value)) { + for (const entry of value) { + const resolved = toFirstString(entry); + if (resolved) return resolved; + } + } + return undefined; +} + +function buildReactionKey(input: { + channel: string; + emoji: string; + timestamp: string; +}): string { + return `${input.channel}:${input.timestamp}:${input.emoji}`; +} + +export function collectEvalSlackArtifactsFromSlackApiCalls( + calls: EvalSlackApiCall[], +): EvalSlackArtifacts { + const canvases: EvalSlackCanvasArtifact[] = []; + const channelPosts: EvalSlackChannelPost[] = []; + const reactions = new Map(); + + for (const call of calls) { + if (call.method === "canvases.create") { + const title = toFirstString(call.params.title) ?? ""; + const documentContent = + call.params.document_content && + typeof call.params.document_content === "object" + ? (call.params.document_content as Record) + : undefined; + const markdown = documentContent + ? (toFirstString(documentContent.markdown) ?? "") + : ""; + if (!title && markdown.length === 0) { + continue; + } + canvases.push({ title, markdown }); + continue; + } + + if (call.method === "chat.postMessage") { + const channel = toFirstString(call.params.channel); + const text = toFirstString(call.params.text); + if (!channel || text === undefined) { + continue; + } + const threadTs = toFirstString(call.params.thread_ts); + channelPosts.push({ + channel, + text, + ...(threadTs ? { thread_ts: threadTs } : {}), + }); + continue; + } + + if (call.method === "reactions.add") { + const channel = toFirstString(call.params.channel); + const emoji = toFirstString(call.params.name); + const timestamp = toFirstString(call.params.timestamp); + if (!channel || !emoji || !timestamp) { + continue; + } + const reaction = { channel, emoji, timestamp }; + reactions.set(buildReactionKey(reaction), reaction); + continue; + } + + if (call.method === "reactions.remove") { + const channel = toFirstString(call.params.channel); + const emoji = toFirstString(call.params.name); + const timestamp = toFirstString(call.params.timestamp); + if (!channel || !emoji || !timestamp) { + continue; + } + reactions.delete(buildReactionKey({ channel, emoji, timestamp })); + } + } + + return { + canvases, + channelPosts, + reactions: [...reactions.values()], + }; +} + +/** Return Slack-visible artifacts captured by the eval-local Slack HTTP harness. */ +export function collectEvalSlackArtifacts(): EvalSlackArtifacts { + return collectEvalSlackArtifactsFromSlackApiCalls(getCapturedSlackApiCalls()); +} + +/** Find the latest auth state URL sent through eval-visible Slack messages. */ +export function findLatestOAuthStateFromEvalSlackArtifacts(args: { + authorizeEndpoint: string; + consumedStates: Set; +}): string | undefined { + const expectedUrl = new URL(args.authorizeEndpoint); + const calls = getCapturedSlackApiCalls(); + + for (let index = calls.length - 1; index >= 0; index -= 1) { + const call = calls[index]; + if ( + call.method !== "chat.postEphemeral" && + call.method !== "chat.postMessage" + ) { + continue; + } + const text = toFirstString(call.params.text); + if (!text) { + continue; + } + const match = text.match(/<([^|>]+)\|/); + if (!match?.[1]) { + continue; + } + + let authLink: URL; + try { + authLink = new URL(match[1]); + } catch { + continue; + } + + if ( + authLink.origin !== expectedUrl.origin || + authLink.pathname !== expectedUrl.pathname + ) { + continue; + } + const state = authLink.searchParams.get("state")?.trim(); + if (state && !args.consumedStates.has(state)) { + return state; + } + } + + return undefined; +} diff --git a/packages/junior/tests/fixtures/slack-harness.ts b/packages/junior/tests/fixtures/slack/harness.ts similarity index 100% rename from packages/junior/tests/fixtures/slack-harness.ts rename to packages/junior/tests/fixtures/slack/harness.ts diff --git a/packages/junior/tests/fixtures/slack/image-runtime.ts b/packages/junior/tests/fixtures/slack/image-runtime.ts new file mode 100644 index 000000000..96ecfaf70 --- /dev/null +++ b/packages/junior/tests/fixtures/slack/image-runtime.ts @@ -0,0 +1,64 @@ +import { vi } from "vitest"; + +const ORIGINAL_ENV = { ...process.env }; + +interface SlackImageConversationStateArgs { + messages?: unknown[]; + vision?: { + backfillCompletedAtMs?: number; + byFileId?: Record; + }; +} + +/** Create a Slack runtime after applying image-hydration environment flags. */ +export async function createSlackImageRuntime( + args: Parameters[0], + env: NodeJS.ProcessEnv = {}, +) { + process.env = { + ...ORIGINAL_ENV, + AI_VISION_MODEL: "", + SLACK_BOT_TOKEN: "", + SLACK_BOT_USER_TOKEN: "", + ...env, + }; + vi.resetModules(); + const { createTestChatRuntime } = await import("../chat-runtime"); + return createTestChatRuntime(args); +} + +/** Reset modules, mocks, and env mutations used by image-hydration tests. */ +export function resetSlackImageRuntimeEnv(): void { + vi.restoreAllMocks(); + process.env = { ...ORIGINAL_ENV }; + vi.resetModules(); +} + +/** Build persisted conversation state used by Slack image hydration tests. */ +export function createSlackImageConversationState( + args: SlackImageConversationStateArgs = {}, +) { + const messages = args.messages ?? []; + return { + conversation: { + schemaVersion: 1, + messages, + compactions: [], + backfill: { + completedAtMs: 1_700_000_000_000, + source: "recent_messages", + }, + processing: {}, + stats: { + estimatedContextTokens: 0, + totalMessageCount: messages.length, + compactedMessageCount: 0, + updatedAtMs: 1_700_000_000_000, + }, + vision: { + byFileId: {}, + ...(args.vision ?? {}), + }, + }, + }; +} diff --git a/packages/junior/tests/fixtures/slack/posts.ts b/packages/junior/tests/fixtures/slack/posts.ts new file mode 100644 index 000000000..59f1e09c9 --- /dev/null +++ b/packages/junior/tests/fixtures/slack/posts.ts @@ -0,0 +1,22 @@ +/** Extract comparable text from a Slack test post fixture. */ +export function toPostedText(value: unknown): string { + if (typeof value === "string") { + return value; + } + + if (value && typeof value === "object") { + const markdown = (value as { markdown?: unknown }).markdown; + if (typeof markdown === "string") { + return markdown; + } + const raw = (value as { raw?: unknown }).raw; + if (typeof raw === "string") { + return raw; + } + if ("files" in value) { + return ""; + } + } + + return String(value); +} diff --git a/packages/junior/tests/fixtures/slack/schedule-tools.ts b/packages/junior/tests/fixtures/slack/schedule-tools.ts new file mode 100644 index 000000000..33725d443 --- /dev/null +++ b/packages/junior/tests/fixtures/slack/schedule-tools.ts @@ -0,0 +1,176 @@ +import { vi } from "vitest"; +import { + AgentPluginToolInputError, + type SlackDestination, +} from "@sentry/junior-plugin-api"; +import { + createSchedulerStore, + createSlackScheduleCreateTaskTool as makeSlackScheduleCreateTaskTool, + createSlackScheduleDeleteTaskTool as makeSlackScheduleDeleteTaskTool, + createSlackScheduleListTasksTool as makeSlackScheduleListTasksTool, + createSlackScheduleRunTaskNowTool as makeSlackScheduleRunTaskNowTool, + createSlackScheduleUpdateTaskTool as makeSlackScheduleUpdateTaskTool, + type SchedulerToolContext, +} from "@sentry/junior-scheduler"; +import { createSlackDirectCredentialSubject } from "@/chat/credentials/subject"; +import { createPluginState } from "@/chat/plugins/state"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { DEFAULT_TEST_NOW_MS, mockTestClock } from "../vitest"; + +vi.hoisted(() => { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +export { AgentPluginToolInputError }; + +export const TEST_TEAM_ID = `TSCHEDULE${DEFAULT_TEST_NOW_MS}`; + +type CreateContextOverrides = Omit< + Partial, + "requester" +> & { + channelId?: string; + destination?: SlackDestination; + requester?: + | SchedulerToolContext["requester"] + | { + fullName?: string; + userId: string; + userName?: string; + }; + teamId?: string; +}; + +/** Creates the Slack schedule create tool for the supplied test context. */ +export function createSlackScheduleCreateTaskTool( + context: SchedulerToolContext, +) { + return makeSlackScheduleCreateTaskTool(context); +} + +/** Creates the Slack schedule delete tool for the supplied test context. */ +export function createSlackScheduleDeleteTaskTool( + context: SchedulerToolContext, +) { + return makeSlackScheduleDeleteTaskTool(context); +} + +/** Creates the Slack schedule list tool for the supplied test context. */ +export function createSlackScheduleListTasksTool( + context: SchedulerToolContext, +) { + return makeSlackScheduleListTasksTool(context); +} + +/** Creates the Slack schedule run-now tool for the supplied test context. */ +export function createSlackScheduleRunTaskNowTool( + context: SchedulerToolContext, +) { + return makeSlackScheduleRunTaskNowTool(context); +} + +/** Creates the Slack schedule update tool for the supplied test context. */ +export function createSlackScheduleUpdateTaskTool( + context: SchedulerToolContext, +) { + return makeSlackScheduleUpdateTaskTool(context); +} + +/** Builds the default Slack scheduler tool context shared by schedule tests. */ +export function createContext( + overrides: CreateContextOverrides = {}, +): SchedulerToolContext { + const { + channelId = "C123", + teamId = TEST_TEAM_ID, + destination: overrideDestination, + requester: overrideRequester, + source: overrideSource, + ...contextOverrides + } = overrides; + const source = + overrideSource ?? + overrideDestination ?? + ({ + platform: "slack", + teamId, + channelId, + } satisfies SlackDestination); + const context: SchedulerToolContext = { + source, + requester: + overrideRequester && "platform" in overrideRequester + ? overrideRequester + : { + platform: "slack", + teamId, + userId: overrideRequester?.userId ?? "U123", + userName: overrideRequester?.userName ?? "dcramer", + fullName: overrideRequester?.fullName ?? "David Cramer", + }, + userText: "schedule this weekly", + state: createPluginState("scheduler"), + ...contextOverrides, + }; + const credentialSubject = + context.credentialSubject ?? + createSlackDirectCredentialSubject({ + channelId: context.source?.channelId, + teamId: context.source?.teamId, + userId: context.requester?.userId, + }); + return { + ...context, + ...(credentialSubject ? { credentialSubject } : {}), + }; +} + +/** Runs a scheduler tool through the production execute contract. */ +export async function executeTool( + tool: { + execute?: ( + input: TInput, + options: { experimental_context?: unknown }, + ) => TResult; + }, + input: TInput, +): Promise> { + if (typeof tool?.execute !== "function") { + throw new Error("tool execute function missing"); + } + return await tool.execute(input, {}); +} + +/** Opens the memory-backed scheduler store used by schedule tool tests. */ +export function schedulerStore() { + return createSchedulerStore(createPluginState("scheduler")); +} + +/** Creates the standard weekly scheduler task used by update and run tests. */ +export async function createTask( + context = createContext(), + overrides: Record = {}, +) { + const tool = createSlackScheduleCreateTaskTool(context); + return await executeTool(tool, { + task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", + schedule: "Every Monday at 9am", + timezone: "America/Los_Angeles", + next_run_at: "2026-05-25T16:00:00.000Z", + recurrence: "weekly", + ...overrides, + }); +} + +/** Resets persistent state before each scheduler tool scenario. */ +export async function setupSlackScheduleToolTest() { + mockTestClock(); + await disconnectStateAdapter(); +} + +/** Restores timers, environment, and memory state after scheduler tool tests. */ +export async function cleanupSlackScheduleToolTest() { + vi.useRealTimers(); + delete process.env.JUNIOR_TIMEZONE; + await disconnectStateAdapter(); +} diff --git a/packages/junior/tests/fixtures/slack/turn-state.ts b/packages/junior/tests/fixtures/slack/turn-state.ts new file mode 100644 index 000000000..a3515c0b9 --- /dev/null +++ b/packages/junior/tests/fixtures/slack/turn-state.ts @@ -0,0 +1,60 @@ +import type { PiMessage } from "@/chat/pi/messages"; + +interface AwaitingSlackTurnStateArgs { + activeSessionId: string; + replied?: boolean; + userMessageId?: string; + userText?: string; +} + +/** Build Slack conversation state with an active turn for resume-path tests. */ +export function createAwaitingSlackTurnState(args: AwaitingSlackTurnStateArgs) { + return { + conversation: { + schemaVersion: 1, + backfill: { + completedAtMs: 1, + source: "recent_messages", + }, + compactions: [], + piMessages: [], + messages: [ + { + id: args.userMessageId ?? "msg-original", + role: "user", + text: args.userText ?? "please keep working", + createdAtMs: 1, + author: { + userId: "U-test", + }, + ...(args.replied === undefined + ? {} + : { meta: { replied: args.replied } }), + }, + ], + processing: { + activeTurnId: args.activeSessionId, + }, + stats: { + compactedMessageCount: 0, + estimatedContextTokens: 0, + totalMessageCount: 1, + updatedAtMs: 1, + }, + vision: { + byFileId: {}, + }, + }, + }; +} + +/** Build minimal Pi history for a user-authored turn session record. */ +export function createPiUserTurn(text: string): PiMessage[] { + return [ + { + role: "user", + content: [{ type: "text", text }], + timestamp: 1, + }, + ]; +} diff --git a/packages/junior/tests/fixtures/subscribed-decision.ts b/packages/junior/tests/fixtures/subscribed-decision.ts new file mode 100644 index 000000000..8f7a3f962 --- /dev/null +++ b/packages/junior/tests/fixtures/subscribed-decision.ts @@ -0,0 +1,15 @@ +import type { SubscribedDecisionInput } from "@/chat/services/subscribed-decision"; + +/** Build a subscribed-thread routing input with stable defaults. */ +export function makeSubscribedInput( + overrides: Partial = {}, +): SubscribedDecisionInput { + return { + rawText: "hello", + text: "hello", + hasAttachments: false, + isExplicitMention: false, + context: {}, + ...overrides, + }; +} diff --git a/packages/junior/tests/fixtures/timeout-resume-runner.ts b/packages/junior/tests/fixtures/timeout-resume-runner.ts new file mode 100644 index 000000000..2ed32566c --- /dev/null +++ b/packages/junior/tests/fixtures/timeout-resume-runner.ts @@ -0,0 +1,133 @@ +import { vi } from "vitest"; +import type { Destination } from "@sentry/junior-plugin-api"; +import type { + ResumeSlackTurnArgs, + ResumeSlackTurnRunner, +} from "@/chat/runtime/slack-resume"; +import { persistThreadStateById } from "@/chat/runtime/thread-state"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { upsertAgentTurnSessionRecord } from "@/chat/state/turn-session"; + +export type ResumeSlackTurnMock = ReturnType< + typeof vi.fn +>; + +export const TIMEOUT_RESUME_DESTINATION = { + platform: "slack", + teamId: "T123", + channelId: "C123", +} as const satisfies Destination; + +export interface TimeoutResumeScenarioOptions { + activeTurnId?: string; + conversationId?: string; + messageId?: string; + sessionId?: string; + sliceId?: number; +} + +/** Resets memory state before timeout resume runner tests. */ +export async function setupTimeoutResumeRunnerTest() { + process.env.JUNIOR_STATE_ADAPTER = "memory"; + await disconnectStateAdapter(); +} + +/** Restores timers and memory state after timeout resume runner tests. */ +export async function cleanupTimeoutResumeRunnerTest() { + vi.useRealTimers(); + await disconnectStateAdapter(); + delete process.env.JUNIOR_STATE_ADAPTER; + vi.restoreAllMocks(); +} + +/** Creates a typed fake for the Slack resume runtime boundary. */ +export function createResumeSlackTurnMock(): ResumeSlackTurnMock { + return vi.fn(); +} + +/** Stores the common awaiting timeout resume session and thread state. */ +export async function createTimeoutResumeScenario( + options: TimeoutResumeScenarioOptions = {}, +) { + const conversationId = options.conversationId ?? "slack:C123:1712345.0001"; + const sessionId = options.sessionId ?? "turn_msg_1"; + const sliceId = options.sliceId ?? 2; + const messageId = options.messageId ?? "msg.1"; + const activeTurnId = options.activeTurnId ?? sessionId; + const sessionRecord = await upsertAgentTurnSessionRecord({ + conversationId, + sessionId, + sliceId, + state: "awaiting_resume", + destination: TIMEOUT_RESUME_DESTINATION, + piMessages: [ + { + role: "user", + content: [{ type: "text", text: "hello" }], + timestamp: 1, + }, + ], + resumeReason: "timeout", + resumedFromSliceId: sliceId - 1, + errorMessage: "Agent turn timed out", + }); + + await persistThreadStateById(conversationId, { + artifacts: { + listColumnMap: {}, + }, + conversation: { + schemaVersion: 1, + backfill: {}, + compactions: [], + piMessages: [], + messages: [ + { + id: messageId, + role: "user", + text: "resume this request", + createdAtMs: 1, + author: { + userId: "U123", + }, + }, + ], + processing: { + activeTurnId, + }, + stats: { + compactedMessageCount: 0, + estimatedContextTokens: 0, + totalMessageCount: 1, + updatedAtMs: 1, + }, + vision: { + byFileId: {}, + }, + }, + }); + + return { + conversationId, + messageId, + payload: { + conversationId, + destination: TIMEOUT_RESUME_DESTINATION, + sessionId, + expectedVersion: sessionRecord.version, + }, + sessionId, + sessionRecord, + }; +} + +/** Runs the fake resume boundary as the real runner would when it starts. */ +export async function prepareResumeArgs( + args: ResumeSlackTurnArgs, +): Promise { + const prepared = await args.beforeStart?.(); + if (prepared === false) { + return false; + } + return { ...args, ...(prepared ?? {}) }; +} diff --git a/packages/junior/tests/fixtures/tool-runtime.ts b/packages/junior/tests/fixtures/tool-runtime.ts new file mode 100644 index 000000000..56973e08b --- /dev/null +++ b/packages/junior/tests/fixtures/tool-runtime.ts @@ -0,0 +1,140 @@ +import type { Static, TSchema } from "@sinclair/typebox"; +import type { SandboxWorkspace } from "@/chat/sandbox/workspace"; +import type { ThreadArtifactsState } from "@/chat/state/artifacts"; +import type { ToolDefinition } from "@/chat/tools/definition"; +import type { ToolRuntimeContext, ToolState } from "@/chat/tools/types"; +import type { SlackToolContext } from "@/chat/tools/slack/context"; + +interface TestToolStateOptions { + artifactState?: ThreadArtifactsState; + currentListId?: string; +} + +export type TestToolRuntimeOverrides = Partial & { + channelId?: string; + deliveryChannelId?: string; + destinationChannelId?: string; + messageTs?: string; + sourceChannelId?: string; + teamId?: string; + threadTs?: string; +}; + +/** + * Create the default sandbox for tests that should not exercise sandbox I/O. + */ +export function createUnavailableSandbox(): SandboxWorkspace { + const fail = () => { + throw new Error( + "Unexpected sandbox access. Provide a test sandbox fixture for this behavior.", + ); + }; + + return { + readFileToBuffer: fail, + runCommand: fail, + }; +} + +/** + * Create a typed tool runtime context for direct tool contract tests. + */ +export function createTestToolRuntimeContext( + overrides: TestToolRuntimeOverrides = {}, +): ToolRuntimeContext & SlackToolContext { + const teamId = + overrides.teamId ?? + (overrides.source?.platform === "slack" ? overrides.source.teamId : "T123"); + const sourceChannelId = + overrides.sourceChannelId ?? + (overrides.source?.platform === "slack" + ? overrides.source.channelId + : undefined) ?? + overrides.channelId ?? + "C123"; + const destinationChannelId = + overrides.destinationChannelId ?? + overrides.deliveryChannelId ?? + (overrides.destination?.platform === "slack" + ? overrides.destination.channelId + : undefined) ?? + sourceChannelId; + const source = + overrides.source ?? + ({ + platform: "slack", + teamId, + channelId: sourceChannelId, + ...(overrides.messageTs ? { messageTs: overrides.messageTs } : {}), + ...(overrides.threadTs ? { threadTs: overrides.threadTs } : {}), + } as const); + const destination = + overrides.destination ?? + ({ + platform: "slack", + teamId, + channelId: destinationChannelId, + } as const); + const requester = + overrides.requester ?? + ({ + platform: "slack", + teamId, + userId: "U123", + } as const); + + return { + sandbox: createUnavailableSandbox(), + ...overrides, + source, + destination, + requester, + destinationChannelId, + messageTs: + source.platform === "slack" ? source.messageTs : overrides.messageTs, + sourceChannelId, + teamId, + threadTs: + source.platform === "slack" ? source.threadTs : overrides.threadTs, + } as ToolRuntimeContext & SlackToolContext; +} + +/** + * Create in-memory tool state with operation-result dedupe support. + */ +export function createTestToolState( + options: TestToolStateOptions = {}, +): ToolState { + const operationResultCache = new Map(); + const artifactState: ThreadArtifactsState = { + listColumnMap: {}, + ...options.artifactState, + }; + + return { + artifactState, + patchArtifactState: (patch) => { + Object.assign(artifactState, patch); + }, + getCurrentListId: () => options.currentListId, + getOperationResult: (operationKey: string): T | undefined => + operationResultCache.get(operationKey) as T | undefined, + setOperationResult: (operationKey, result) => { + operationResultCache.set(operationKey, result); + }, + }; +} + +/** + * Execute a tool with typed input and the default direct-test options. + */ +export async function executeTestTool( + toolDefinition: ToolDefinition, + input: Static, +): Promise { + if (!toolDefinition.execute) { + throw new Error("tool execute function missing"); + } + + return await toolDefinition.execute(input, {}); +} diff --git a/packages/junior/tests/fixtures/turn-session-record.ts b/packages/junior/tests/fixtures/turn-session-record.ts new file mode 100644 index 000000000..a1ca578ae --- /dev/null +++ b/packages/junior/tests/fixtures/turn-session-record.ts @@ -0,0 +1,40 @@ +import { vi } from "vitest"; +import type { PiMessage } from "@/chat/pi/messages"; + +const ORIGINAL_ENV = { ...process.env }; + +/** Reset module state and use the memory adapter for turn-session record tests. */ +export async function setupTurnSessionRecordTest(): Promise { + process.env = { + ...ORIGINAL_ENV, + JUNIOR_STATE_ADAPTER: "memory", + }; + vi.resetModules(); + const { disconnectStateAdapter } = await import("@/chat/state/adapter"); + await disconnectStateAdapter(); +} + +/** Restore mocked modules, environment, and memory state after turn-session tests. */ +export async function cleanupTurnSessionRecordTest(): Promise { + const { disconnectStateAdapter } = await import("@/chat/state/adapter"); + await disconnectStateAdapter(); + vi.doUnmock("@/chat/logging"); + vi.doUnmock("@/chat/state/turn-session"); + vi.resetModules(); + process.env = { ...ORIGINAL_ENV }; +} + +/** Build a Pi text message fixture for turn-session record boundaries. */ +export function piTextMessage( + role: PiMessage["role"], + text: string, + timestamp: number, + extra: Record = {}, +): PiMessage { + return { + role, + ...extra, + content: [{ type: "text", text }], + timestamp, + } as PiMessage; +} diff --git a/packages/junior/tests/fixtures/vitest.ts b/packages/junior/tests/fixtures/vitest.ts new file mode 100644 index 000000000..820f19efa --- /dev/null +++ b/packages/junior/tests/fixtures/vitest.ts @@ -0,0 +1,67 @@ +import { afterEach, beforeEach, vi } from "vitest"; + +type TestEnvValues = Readonly>; +type TestClockValue = Date | number | string; + +export const DEFAULT_TEST_NOW_ISO = "2026-06-05T12:00:00.000Z"; +export const DEFAULT_TEST_NOW_MS = Date.parse(DEFAULT_TEST_NOW_ISO); +export const DEFAULT_TEST_EXPIRES_AT_ISO = "2099-01-01T00:00:00.000Z"; +export const DEFAULT_TEST_EXPIRES_AT_MS = Date.parse( + DEFAULT_TEST_EXPIRES_AT_ISO, +); +export const DEFAULT_TEST_EXPIRED_AT_MS = Date.parse( + "2000-01-01T00:00:00.000Z", +); + +function toTestDate(value: TestClockValue): Date { + return value instanceof Date ? value : new Date(value); +} + +/** Apply Vitest-managed env overrides so test cleanup can restore them safely. */ +export function stubTestEnv(values: TestEnvValues): void { + for (const [name, value] of Object.entries(values)) { + vi.stubEnv(name, value); + } +} + +/** Isolate suites that exercise shared state through the memory adapter. */ +export function useMemoryStateAdapter(): void { + beforeEach(async () => { + stubTestEnv({ JUNIOR_STATE_ADAPTER: "memory" }); + const { disconnectStateAdapter } = await import("@/chat/state/adapter"); + await disconnectStateAdapter(); + }); + + afterEach(async () => { + const { disconnectStateAdapter } = await import("@/chat/state/adapter"); + await disconnectStateAdapter(); + }); +} + +/** Freeze Date/timers at the shared default test clock unless overridden. */ +export function mockTestClock( + value: TestClockValue = DEFAULT_TEST_NOW_MS, +): void { + vi.useFakeTimers(); + vi.setSystemTime(toTestDate(value)); +} + +/** Apply the shared mocked clock around every test in a suite. */ +export function useMockedTestClock( + value: TestClockValue = DEFAULT_TEST_NOW_MS, +): void { + beforeEach(() => { + mockTestClock(value); + }); + + afterEach(() => { + vi.useRealTimers(); + }); +} + +/** Restore real timers after suites that use fake time for one or more cases. */ +export function useRealTimersAfterEach(): void { + afterEach(() => { + vi.useRealTimers(); + }); +} diff --git a/packages/junior/tests/integration/advisor/advisor-tool.test.ts b/packages/junior/tests/integration/advisor/advisor-tool.test.ts index 92912fd44..8a92375eb 100644 --- a/packages/junior/tests/integration/advisor/advisor-tool.test.ts +++ b/packages/junior/tests/integration/advisor/advisor-tool.test.ts @@ -12,14 +12,11 @@ import { type AdvisorToolRuntimeContext, } from "@/chat/tools/advisor/tool"; import { tool } from "@/chat/tools/definition"; +import { createTestToolRuntimeContext } from "../../fixtures/tool-runtime"; +import { DEFAULT_TEST_NOW_MS } from "../../fixtures/vitest"; type StreamResponse = Awaited>; -const LOCAL_DESTINATION = { - platform: "local", - conversationId: "local:test:advisor", -} as const; - const config: AdvisorConfig = { modelId: "openai/gpt-5.5", thinkingLevel: "xhigh", @@ -33,7 +30,7 @@ function assistantMessage(text: string) { model: "test", stopReason: "stop" as const, content: [{ type: "text" as const, text }], - timestamp: Date.now(), + timestamp: DEFAULT_TEST_NOW_MS, }; } @@ -92,11 +89,9 @@ async function executeAdvisor( describe("advisor tool", () => { it("is exposed only when advisor runtime context is enabled", () => { - const baseContext = { - destination: LOCAL_DESTINATION, - source: LOCAL_DESTINATION, - sandbox: {} as any, - }; + const baseContext = createTestToolRuntimeContext({ + channelId: "D12345", + }); expect(createTools([], {}, baseContext)).not.toHaveProperty("advisor"); const tools = createTools( @@ -187,11 +182,9 @@ describe("advisor tool", () => { createTools( [], {}, - { - destination: LOCAL_DESTINATION, - source: LOCAL_DESTINATION, - sandbox: {} as any, - }, + createTestToolRuntimeContext({ + channelId: "C12345", + }), ), ); @@ -200,6 +193,11 @@ describe("advisor tool", () => { "grep", "listDir", "readFile", + "slackCanvasRead", + "slackChannelListMessages", + "slackListGetItems", + "slackThreadRead", + "slackUserLookup", "systemTime", "webFetch", "webSearch", diff --git a/packages/junior/tests/integration/agent-continue-slack.test.ts b/packages/junior/tests/integration/agent-continue-slack.test.ts deleted file mode 100644 index 55693277c..000000000 --- a/packages/junior/tests/integration/agent-continue-slack.test.ts +++ /dev/null @@ -1,638 +0,0 @@ -import { Buffer } from "node:buffer"; -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { - SLACK_DESTINATION, - createConversationWorkQueueTestAdapter, - type ConversationWorkQueueTestAdapter, -} from "../fixtures/conversation-work"; -import { slackApiOutbox } from "../fixtures/slack-api-outbox"; -import { resetSlackApiMockState } from "../msw/handlers/slack-api"; - -const generateAssistantReplyMock = vi.fn(); - -const ORIGINAL_ENV = { ...process.env }; - -type StateAdapterModule = typeof import("@/chat/state/adapter"); -type ThreadStateModule = typeof import("@/chat/runtime/thread-state"); -type AgentContinueRunnerModule = - typeof import("@/chat/runtime/agent-continue-runner"); -type RequestDeadlineModule = typeof import("@/chat/runtime/request-deadline"); -type TurnSessionStoreModule = typeof import("@/chat/state/turn-session"); -type AgentContinueServiceModule = - typeof import("@/chat/services/agent-continue"); - -let stateAdapterModule: StateAdapterModule; -let threadStateModule: ThreadStateModule; -let agentContinueRunnerModule: AgentContinueRunnerModule; -let requestDeadlineModule: RequestDeadlineModule; -let turnSessionStoreModule: TurnSessionStoreModule; -let agentContinueServiceModule: AgentContinueServiceModule; -let queue: ConversationWorkQueueTestAdapter; - -function continueAgentRun(args: { - conversationId: string; - sessionId: string; - expectedVersion: number; -}): Promise { - return requestDeadlineModule.runWithTurnRequestDeadline(() => - agentContinueRunnerModule.continueSlackAgentRunWithLockRetry( - { - conversationId: args.conversationId, - destination: SLACK_DESTINATION, - expectedVersion: args.expectedVersion, - sessionId: args.sessionId, - }, - { - generateReply: generateAssistantReplyMock, - scheduleAgentContinue: (request) => - agentContinueServiceModule.scheduleAgentContinue(request, { - queue, - }), - }, - ), - ); -} - -describe("agent continuation Slack integration", () => { - beforeEach(async () => { - queue = createConversationWorkQueueTestAdapter(); - generateAssistantReplyMock.mockReset(); - generateAssistantReplyMock.mockResolvedValue({ - text: "Final resumed answer", - diagnostics: { - outcome: "success", - toolCalls: [], - }, - }); - resetSlackApiMockState(); - process.env = { - ...ORIGINAL_ENV, - JUNIOR_STATE_ADAPTER: "memory", - JUNIOR_BASE_URL: "https://junior.example.com", - JUNIOR_SECRET: "resume-secret", - SLACK_BOT_TOKEN: process.env.SLACK_BOT_TOKEN ?? "xoxb-test-token", - }; - - vi.resetModules(); - stateAdapterModule = await import("@/chat/state/adapter"); - threadStateModule = await import("@/chat/runtime/thread-state"); - agentContinueRunnerModule = - await import("@/chat/runtime/agent-continue-runner"); - requestDeadlineModule = await import("@/chat/runtime/request-deadline"); - turnSessionStoreModule = await import("@/chat/state/turn-session"); - agentContinueServiceModule = await import("@/chat/services/agent-continue"); - - await stateAdapterModule.disconnectStateAdapter(); - await stateAdapterModule.getStateAdapter().connect(); - }); - - afterEach(async () => { - await stateAdapterModule.disconnectStateAdapter(); - process.env = { ...ORIGINAL_ENV }; - vi.restoreAllMocks(); - }); - - it("posts the resumed reply through the Slack MSW harness and persists completion", async () => { - const conversationId = "slack:C123:1712345.0001"; - const sessionId = "turn_msg_1"; - const sessionRecord = - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "hello" }], - timestamp: 1, - }, - ], - resumeReason: "timeout", - resumedFromSliceId: 1, - errorMessage: "Agent turn timed out", - requester: { - slackUserId: "U123", - slackUserName: "testuser", - fullName: "Test User", - email: "testuser@example.com", - }, - }); - - await threadStateModule.persistThreadStateById(conversationId, { - artifacts: { - assistantContextChannelId: "C999", - listColumnMap: {}, - }, - conversation: { - schemaVersion: 1, - backfill: {}, - compactions: [], - piMessages: [], - messages: [ - { - id: "msg.1", - role: "user", - text: "resume this request", - createdAtMs: 1, - author: { - userId: "U123", - userName: "alice", - }, - meta: { - attachmentCount: 2, - imageAttachmentCount: 1, - imagesHydrated: false, - }, - }, - ], - processing: { - activeTurnId: sessionId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 1, - updatedAtMs: 1, - }, - vision: { - byFileId: {}, - }, - }, - }); - await threadStateModule.getChannelConfigurationServiceById("C123").set({ - key: "demo.org", - value: "acme", - source: "test", - }); - - const continued = await continueAgentRun({ - conversationId, - sessionId, - expectedVersion: sessionRecord.version, - }); - - expect(continued).toBe(true); - - expect(generateAssistantReplyMock).toHaveBeenCalledWith( - "resume this request", - expect.objectContaining({ - requester: expect.objectContaining({ - email: "testuser@example.com", - fullName: "Test User", - userId: "U123", - userName: "testuser", - }), - destination: SLACK_DESTINATION, - toolChannelId: "C999", - inboundAttachmentCount: 2, - omittedImageAttachmentCount: 1, - sandbox: expect.objectContaining({ - sandboxId: undefined, - sandboxDependencyProfileHash: undefined, - }), - }), - ); - const resumeContext = generateAssistantReplyMock.mock.calls[0]?.[1] as { - channelConfiguration?: { - resolve: (key: string) => Promise; - }; - turnDeadlineAtMs?: number; - }; - expect(resumeContext.turnDeadlineAtMs).toEqual(expect.any(Number)); - expect(resumeContext.turnDeadlineAtMs).toBeGreaterThan(Date.now()); - expect(await resumeContext.channelConfiguration?.resolve("demo.org")).toBe( - "acme", - ); - - expect(slackApiOutbox.calls("assistant.threads.setStatus")).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1712345.0001", - status: expect.any(String), - loading_messages: expect.arrayContaining([expect.any(String)]), - }), - }), - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1712345.0001", - status: "", - }), - }), - ]), - ); - expect(slackApiOutbox.messages()).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1712345.0001", - text: "Final resumed answer", - }), - }), - ]); - - const persisted = - await threadStateModule.getPersistedThreadState(conversationId); - const conversation = (persisted.conversation ?? {}) as { - messages?: Array<{ role?: string; text?: string }>; - processing?: { activeTurnId?: string }; - }; - expect(conversation.processing?.activeTurnId).toBeUndefined(); - expect(conversation.messages?.at(-1)).toMatchObject({ - role: "assistant", - text: "Final resumed answer", - }); - }); - - it("schedules another continuation for high slice ids", async () => { - const conversationId = "slack:C123:1712345.0002"; - const sessionId = "turn_msg_2"; - const sessionRecord = - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 5, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "hello" }], - timestamp: 1, - }, - ], - resumeReason: "timeout", - resumedFromSliceId: 4, - errorMessage: "Agent turn timed out", - requester: { - slackUserId: "U123", - slackUserName: "testuser", - fullName: "Test User", - email: "testuser@example.com", - }, - }); - - await threadStateModule.persistThreadStateById(conversationId, { - artifacts: { - listColumnMap: {}, - }, - conversation: { - schemaVersion: 1, - backfill: {}, - compactions: [], - piMessages: [], - messages: [ - { - id: "msg.2", - role: "user", - text: "resume this request", - createdAtMs: 1, - author: { - userId: "U123", - }, - }, - ], - processing: { - activeTurnId: sessionId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 1, - updatedAtMs: 1, - }, - vision: { - byFileId: {}, - }, - }, - }); - - const { RetryableTurnError } = await import("@/chat/runtime/turn"); - generateAssistantReplyMock.mockRejectedValueOnce( - new RetryableTurnError("agent_continue", "timed out again", { - conversationId, - sessionId, - version: sessionRecord.version + 1, - sliceId: 6, - }), - ); - - const continued = await continueAgentRun({ - conversationId, - sessionId, - expectedVersion: sessionRecord.version, - }); - - expect(continued).toBe(true); - - expect(slackApiOutbox.messages()).toEqual([]); - expect(queue.sentRecords()).toEqual([ - { - conversationId, - destination: SLACK_DESTINATION, - idempotencyKey: expect.stringContaining( - `agent-continue:${conversationId}:${sessionId}:`, - ), - }, - ]); - - const persisted = - await threadStateModule.getPersistedThreadState(conversationId); - const conversation = (persisted.conversation ?? {}) as { - processing?: { activeTurnId?: string }; - }; - expect(conversation.processing?.activeTurnId).toBe(sessionId); - }); - - it("terminalizes startup failures before the visible failure path runs", async () => { - const conversationId = "slack:C123:1712345.0007"; - const sessionId = "turn_msg_7"; - const sessionRecord = - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "hello" }], - timestamp: 1, - }, - ], - resumeReason: "timeout", - resumedFromSliceId: 1, - errorMessage: "Agent turn timed out", - }); - - await threadStateModule.persistThreadStateById(conversationId, { - artifacts: { - listColumnMap: {}, - }, - conversation: { - schemaVersion: 1, - backfill: {}, - compactions: [], - piMessages: [], - messages: [ - { - id: "msg.7", - role: "user", - text: "resume this request", - createdAtMs: 1, - author: {}, - }, - ], - processing: { - activeTurnId: sessionId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 1, - updatedAtMs: 1, - }, - vision: { - byFileId: {}, - }, - }, - }); - - const continued = await continueAgentRun({ - conversationId, - sessionId, - expectedVersion: sessionRecord.version, - }); - - expect(continued).toBe(true); - expect(generateAssistantReplyMock).not.toHaveBeenCalled(); - await expect( - turnSessionStoreModule.getAgentTurnSessionRecord( - conversationId, - sessionId, - ), - ).resolves.toMatchObject({ - state: "failed", - errorMessage: "Paused agent run failed while continuing", - }); - }); - - it("schedules a durable continuation without posting a notice when a resumed slice times out again", async () => { - const conversationId = "slack:C123:1712345.0006"; - const sessionId = "turn_msg_6"; - const sessionRecord = - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "hello" }], - timestamp: 1, - }, - ], - resumeReason: "timeout", - resumedFromSliceId: 1, - errorMessage: "Agent turn timed out", - requester: { - slackUserId: "U123", - slackUserName: "testuser", - fullName: "Test User", - email: "testuser@example.com", - }, - }); - - await threadStateModule.persistThreadStateById(conversationId, { - artifacts: { - listColumnMap: {}, - }, - conversation: { - schemaVersion: 1, - backfill: {}, - compactions: [], - piMessages: [], - messages: [ - { - id: "msg.6", - role: "user", - text: "resume this request", - createdAtMs: 1, - author: { - userId: "U123", - }, - }, - ], - processing: { - activeTurnId: sessionId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 1, - updatedAtMs: 1, - }, - vision: { - byFileId: {}, - }, - }, - }); - - const { RetryableTurnError } = await import("@/chat/runtime/turn"); - generateAssistantReplyMock.mockRejectedValueOnce( - new RetryableTurnError("agent_continue", "timed out again", { - conversationId, - sessionId, - version: sessionRecord.version + 1, - sliceId: 3, - }), - ); - - const continued = await continueAgentRun({ - conversationId, - sessionId, - expectedVersion: sessionRecord.version, - }); - - expect(continued).toBe(true); - - const postCalls = slackApiOutbox.messages(); - expect(postCalls).toEqual([]); - expect(queue.sentRecords()).toEqual([ - { - conversationId, - destination: SLACK_DESTINATION, - idempotencyKey: expect.stringContaining( - `agent-continue:${conversationId}:${sessionId}:`, - ), - }, - ]); - }); - - it("uploads resumed reply files through the shared delivery path", async () => { - const conversationId = "slack:C123:1712345.0003"; - const sessionId = "turn_msg_3"; - const sessionRecord = - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "hello" }], - timestamp: 1, - }, - ], - resumeReason: "timeout", - resumedFromSliceId: 1, - errorMessage: "Agent turn timed out", - requester: { - slackUserId: "U123", - slackUserName: "testuser", - fullName: "Test User", - email: "testuser@example.com", - }, - }); - - generateAssistantReplyMock.mockResolvedValueOnce({ - text: "Final resumed answer with artifact", - files: [ - { - data: Buffer.from("resume-file"), - filename: "resume.txt", - }, - ], - diagnostics: { - outcome: "success", - toolCalls: [], - }, - }); - - await threadStateModule.persistThreadStateById(conversationId, { - artifacts: { - assistantContextChannelId: "C999", - listColumnMap: {}, - }, - conversation: { - schemaVersion: 1, - backfill: {}, - compactions: [], - piMessages: [], - messages: [ - { - id: "msg.3", - role: "user", - text: "resume this request", - createdAtMs: 1, - author: { - userId: "U123", - userName: "alice", - }, - }, - ], - processing: { - activeTurnId: sessionId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 1, - updatedAtMs: 1, - }, - vision: { - byFileId: {}, - }, - }, - }); - - const continued = await continueAgentRun({ - conversationId, - sessionId, - expectedVersion: sessionRecord.version, - }); - - expect(continued).toBe(true); - - expect(slackApiOutbox.messages()).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1712345.0003", - text: "Final resumed answer with artifact", - }), - }), - ]); - expect(slackApiOutbox.calls("files.getUploadURLExternal")).toHaveLength(1); - expect(slackApiOutbox.calls("files.completeUploadExternal")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1712345.0003", - }), - }), - ]); - expect(slackApiOutbox.fileUploads()).toHaveLength(1); - - const persisted = - await threadStateModule.getPersistedThreadState(conversationId); - const conversation = (persisted.conversation ?? {}) as { - messages?: Array<{ role?: string; text?: string }>; - processing?: { activeTurnId?: string }; - }; - expect(conversation.processing?.activeTurnId).toBeUndefined(); - expect(conversation.messages?.at(-1)).toMatchObject({ - role: "assistant", - text: "Final resumed answer with artifact", - }); - }); -}); diff --git a/packages/junior/tests/integration/dashboard-reporting.test.ts b/packages/junior/tests/integration/dashboard-reporting.test.ts deleted file mode 100644 index 58ae753a5..000000000 --- a/packages/junior/tests/integration/dashboard-reporting.test.ts +++ /dev/null @@ -1,1044 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import type { PiMessage } from "@/chat/pi/messages"; - -vi.mock("@/chat/prompt", () => ({ - buildSystemPrompt: vi.fn(() => "[system prompt]"), - buildTurnContextPrompt: vi.fn(() => null), - JUNIOR_PERSONALITY: "", - JUNIOR_WORLD: null, -})); - -const SYSTEM_MESSAGE = { - role: "system", - parts: [{ type: "text", text: "[system prompt]" }], -}; - -const ORIGINAL_ENV = { ...process.env }; - -describe("dashboard reporting", () => { - beforeEach(async () => { - process.env = { - ...ORIGINAL_ENV, - JUNIOR_STATE_ADAPTER: "memory", - DATABASE_URL: undefined, - JUNIOR_DATABASE_URL: undefined, - }; - vi.resetModules(); - const { disconnectStateAdapter } = await import("@/chat/state/adapter"); - await disconnectStateAdapter(); - }); - - afterEach(async () => { - const { disconnectStateAdapter } = await import("@/chat/state/adapter"); - await disconnectStateAdapter(); - vi.useRealTimers(); - vi.resetModules(); - process.env = { ...ORIGINAL_ENV }; - }); - - it("indexes recent turn session summaries", async () => { - const { listAgentTurnSessionSummaries, upsertAgentTurnSessionRecord } = - await import("@/chat/state/turn-session"); - - await upsertAgentTurnSessionRecord({ - conversationId: "slack:C1:111", - sessionId: "turn-1", - sliceId: 1, - state: "running", - piMessages: [], - }); - await upsertAgentTurnSessionRecord({ - conversationId: "slack:C1:111", - sessionId: "turn-1", - sliceId: 2, - state: "completed", - piMessages: [], - cumulativeDurationMs: 1_200, - errorMessage: "provider failed with sensitive details", - loadedSkillNames: ["triage"], - }); - await upsertAgentTurnSessionRecord({ - conversationId: "slack:C2:222", - sessionId: "turn-2", - sliceId: 1, - state: "awaiting_resume", - piMessages: [], - resumeReason: "timeout", - }); - - const summaries = await listAgentTurnSessionSummaries(); - const turn1 = summaries.find((summary) => summary.sessionId === "turn-1"); - const turn2 = summaries.find((summary) => summary.sessionId === "turn-2"); - - expect( - summaries.filter((summary) => summary.sessionId === "turn-1"), - ).toHaveLength(1); - expect(turn1).toMatchObject({ - conversationId: "slack:C1:111", - sessionId: "turn-1", - sliceId: 2, - state: "completed", - cumulativeDurationMs: 1_200, - loadedSkillNames: ["triage"], - }); - expect(turn1?.startedAtMs).toBeLessThanOrEqual(turn1?.updatedAtMs ?? 0); - expect(turn1).not.toHaveProperty("errorMessage"); - expect(turn2).toMatchObject({ - conversationId: "slack:C2:222", - cumulativeDurationMs: 0, - sessionId: "turn-2", - state: "awaiting_resume", - resumeReason: "timeout", - }); - }); - - it("reads conversation title details when context is absent", async () => { - const { getConversationDetails, setConversationTitle } = - await import("@/chat/state/conversation-details"); - - await setConversationTitle("slack:C1:111", { - displayTitle: "Incident Triage", - titleSourceMessageId: "msg-1", - }); - - await expect(getConversationDetails("slack:C1:111")).resolves.toMatchObject( - { - conversationId: "slack:C1:111", - displayTitle: "Incident Triage", - titleSourceMessageId: "msg-1", - }, - ); - }); - - it("lists recent conversations through reporting", async () => { - const { getConfiguredConversationStore } = - await import("@/chat/conversations/configured"); - const { createJuniorReporting } = await import("@/reporting"); - const conversationStore = getConfiguredConversationStore(); - - await conversationStore.recordActivity({ - conversationId: "slack:C1:111", - channelName: "incidents", - nowMs: 1_000, - source: "slack", - title: "Incident follow-up", - }); - - const reporting = createJuniorReporting(); - - await expect(reporting.listRecentConversations()).resolves.toEqual([ - expect.objectContaining({ - channelName: "incidents", - conversationId: "slack:C1:111", - displayTitle: expect.any(String), - source: "slack", - status: "completed", - }), - ]); - }); - - it("mirrors local turn sessions as local conversation summaries", async () => { - const { recordAgentTurnSessionSummary } = - await import("@/chat/state/turn-session"); - const { getConfiguredConversationStore } = - await import("@/chat/conversations/configured"); - const conversationId = "local:workspace:run-123"; - - await recordAgentTurnSessionSummary({ - conversationId, - destination: { - platform: "local", - conversationId, - }, - sessionId: "local-turn-1", - sliceId: 1, - state: "completed", - surface: "internal", - ttlMs: 60_000, - }); - - await expect( - getConfiguredConversationStore().get({ - conversationId, - }), - ).resolves.toMatchObject({ - conversationId, - source: "local", - }); - }); - - it("redacts private conversation summaries", async () => { - const { getConfiguredConversationStore } = - await import("@/chat/conversations/configured"); - const { createJuniorReporting } = await import("@/reporting"); - const conversationStore = getConfiguredConversationStore(); - - await conversationStore.recordActivity({ - conversationId: "slack:G1:222", - channelName: "private-incident-room", - nowMs: 1_000, - source: "slack", - title: "Sensitive escalation", - }); - - const summaries = await createJuniorReporting().listRecentConversations(); - - expect(JSON.stringify(summaries)).not.toContain("private-incident-room"); - expect(JSON.stringify(summaries)).not.toContain("Sensitive escalation"); - expect(summaries[0]).toMatchObject({ - conversationId: "slack:G1:222", - status: "completed", - }); - }); - - it("refreshes conversation context ttl without replacing origin context", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-06-01T00:00:00.000Z")); - const { THREAD_STATE_TTL_MS } = await import("chat"); - const { getConversationDetails, initConversationContext } = - await import("@/chat/state/conversation-details"); - const startedAtMs = Date.now(); - - await initConversationContext("slack:C1:111", { - channelName: "first-channel", - originRequester: { fullName: "First Requester" }, - originSurface: "slack", - startedAtMs, - }); - - vi.setSystemTime(Date.now() + THREAD_STATE_TTL_MS - 1_000); - await initConversationContext("slack:C1:111", { - channelName: "later-channel", - originRequester: { fullName: "Later Requester" }, - originSurface: "slack", - startedAtMs: Date.now(), - }); - - vi.setSystemTime(Date.now() + 2_000); - await expect(getConversationDetails("slack:C1:111")).resolves.toMatchObject( - { - channelName: "first-channel", - originRequester: { fullName: "First Requester" }, - startedAtMs, - }, - ); - }); - - it("does not replace malformed conversation context with later turn metadata", async () => { - const { - getConversationDetails, - initConversationContext, - setConversationTitle, - } = await import("@/chat/state/conversation-details"); - const { getStateAdapter } = await import("@/chat/state/adapter"); - const { THREAD_STATE_TTL_MS } = await import("chat"); - const stateAdapter = getStateAdapter(); - await stateAdapter.connect(); - - await stateAdapter.set( - "junior:conversation:slack:C1:malformed:context", - { channelName: "first-channel" }, - THREAD_STATE_TTL_MS, - ); - await setConversationTitle("slack:C1:malformed", { - displayTitle: "Existing Title", - }); - - await initConversationContext("slack:C1:malformed", { - channelName: "later-channel", - originRequester: { fullName: "Later Requester" }, - originSurface: "slack", - startedAtMs: Date.now(), - }); - - const details = await getConversationDetails("slack:C1:malformed"); - - expect(details).toMatchObject({ - conversationId: "slack:C1:malformed", - displayTitle: "Existing Title", - }); - expect(details).not.toHaveProperty("channelName"); - expect(details).not.toHaveProperty("originRequester"); - expect(details).not.toHaveProperty("startedAtMs"); - }); - - it("uses conversation details title when conversation turns are absent", async () => { - const { initConversationContext, setConversationTitle } = - await import("@/chat/state/conversation-details"); - const { createJuniorReporting } = await import("@/reporting"); - - await initConversationContext("slack:C1:details-only", { - channelName: "proj-alpha", - originSurface: "slack", - startedAtMs: Date.now(), - }); - await setConversationTitle("slack:C1:details-only", { - displayTitle: "Details Only Title", - }); - - const report = await createJuniorReporting().getConversation( - "slack:C1:details-only", - ); - - expect(report).toMatchObject({ - conversationId: "slack:C1:details-only", - displayTitle: "Details Only Title", - runs: [], - }); - }); - - it("reports conversation-index detail when turn summaries are absent", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-06-04T12:00:00.000Z")); - const { requestConversationWork } = - await import("@/chat/task-execution/store"); - const { createJuniorReporting } = await import("@/reporting"); - - await requestConversationWork({ - conversationId: "slack:C1:index-only", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C1", - }, - nowMs: Date.now(), - }); - - const report = await createJuniorReporting().getConversation( - "slack:C1:index-only", - ); - - expect(report).toMatchObject({ - conversationId: "slack:C1:index-only", - runs: [ - expect.objectContaining({ - id: "slack:C1:index-only", - status: "active", - transcriptAvailable: false, - transcript: [], - }), - ], - }); - }); - - it("reports aggregate conversation stats beyond the session feed cap", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-06-04T12:00:00.000Z")); - const { recordAgentTurnSessionSummary } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - for (let index = 0; index < 55; index += 1) { - await recordAgentTurnSessionSummary({ - channelName: "proj-alpha", - conversationId: `slack:C1:${index}`, - cumulativeDurationMs: index + 1, - requester: { fullName: "Avery" }, - sessionId: `turn-${index}`, - sliceId: 1, - startedAtMs: Date.now() - index * 1000, - state: "completed", - }); - } - - const reporting = createJuniorReporting(); - const sessions = await reporting.getSessions(); - const stats = await reporting.getConversationStats(); - - expect(sessions.sessions).toHaveLength(50); - expect(stats).toMatchObject({ - conversations: 55, - requesters: [ - expect.objectContaining({ - conversations: 55, - label: "Avery", - }), - ], - sampleLimit: 5_000, - sampleSize: 55, - source: "conversation_index", - truncated: false, - runs: 55, - }); - }); - - it("reports aggregate conversation stats by requester and location", async () => { - vi.useFakeTimers(); - const { recordAgentTurnSessionSummary } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - vi.setSystemTime(new Date("2026-05-20T10:02:00.000Z")); - await recordAgentTurnSessionSummary({ - channelName: "old-project", - conversationId: "slack:C2:300", - cumulativeDurationMs: 8_000, - cumulativeUsage: { totalTokens: 500 }, - requester: { fullName: "Casey" }, - sessionId: "old-turn", - sliceId: 1, - startedAtMs: Date.parse("2026-05-20T10:00:00.000Z"), - state: "completed", - }); - vi.setSystemTime(new Date("2026-06-01T10:02:00.000Z")); - await recordAgentTurnSessionSummary({ - channelName: "proj-alpha", - conversationId: "slack:C1:100", - cumulativeDurationMs: 1_000, - cumulativeUsage: { inputTokens: 10, outputTokens: 5 }, - requester: { fullName: "Avery" }, - sessionId: "turn-1", - sliceId: 1, - startedAtMs: Date.parse("2026-06-01T10:00:00.000Z"), - state: "completed", - }); - vi.setSystemTime(new Date("2026-06-01T10:04:00.000Z")); - await recordAgentTurnSessionSummary({ - channelName: "proj-alpha", - conversationId: "slack:C1:100", - cumulativeDurationMs: 2_000, - cumulativeUsage: { totalTokens: 20 }, - requester: { fullName: "Blake" }, - sessionId: "turn-2", - sliceId: 1, - startedAtMs: Date.parse("2026-06-01T10:03:00.000Z"), - state: "failed", - }); - vi.setSystemTime(new Date("2026-06-04T11:02:00.000Z")); - await recordAgentTurnSessionSummary({ - conversationId: "slack:D1:200", - cumulativeDurationMs: 3_000, - requester: { fullName: "Avery" }, - sessionId: "turn-3", - sliceId: 1, - startedAtMs: Date.parse("2026-06-04T11:00:00.000Z"), - state: "awaiting_resume", - }); - - vi.setSystemTime(new Date("2026-06-04T12:00:00.000Z")); - const stats = await createJuniorReporting().getConversationStats(); - - expect(stats).toMatchObject({ - active: 1, - conversations: 2, - durationMs: 5_000, - failed: 1, - requesters: [ - { - active: 1, - conversations: 2, - durationMs: 4_000, - failed: 0, - hung: 0, - label: "Avery", - tokens: 15, - runs: 2, - }, - { - active: 0, - conversations: 1, - durationMs: 1_000, - failed: 1, - hung: 0, - label: "Blake", - tokens: 5, - runs: 1, - }, - ], - tokens: 20, - runs: 3, - }); - expect( - stats.locations.map((item) => ({ - conversations: item.conversations, - durationMs: item.durationMs, - label: item.label, - })), - ).toEqual([ - { conversations: 1, durationMs: 2_000, label: "#proj-alpha" }, - { conversations: 1, durationMs: 3_000, label: "Direct Message" }, - ]); - }); - - it("reports aggregate conversation stats from origin details when summaries omit metadata", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-06-04T12:00:00.000Z")); - const { initConversationContext } = - await import("@/chat/state/conversation-details"); - const { recordAgentTurnSessionSummary } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - await initConversationContext("slack:C1:100", { - channelName: "proj-alpha", - originRequester: { fullName: "Origin Requester" }, - originSurface: "slack", - startedAtMs: Date.parse("2026-06-04T10:00:00.000Z"), - }); - await recordAgentTurnSessionSummary({ - conversationId: "slack:C1:100", - cumulativeDurationMs: 1_000, - requester: { fullName: "Later Requester" }, - sessionId: "turn-1", - sliceId: 1, - startedAtMs: Date.parse("2026-06-04T10:05:00.000Z"), - state: "completed", - }); - - const stats = await createJuniorReporting().getConversationStats(); - - expect(stats.requesters).toEqual([ - expect.objectContaining({ - conversations: 1, - label: "Origin Requester", - runs: 1, - }), - ]); - expect(stats.locations).toEqual([ - expect.objectContaining({ - conversations: 1, - label: "#proj-alpha", - runs: 1, - }), - ]); - }); - - it("reports aggregate scheduler and API locations from stored turn surfaces", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-06-04T12:00:00.000Z")); - const { recordAgentTurnSessionSummary } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - await recordAgentTurnSessionSummary({ - conversationId: "agent-dispatch:dispatch_scheduler", - cumulativeDurationMs: 2_000, - requester: { fullName: "Scheduler" }, - sessionId: "dispatch:scheduler", - sliceId: 1, - state: "completed", - surface: "scheduler", - }); - await recordAgentTurnSessionSummary({ - conversationId: "agent-dispatch:dispatch_api", - cumulativeDurationMs: 1_000, - requester: { fullName: "API" }, - sessionId: "dispatch:api", - sliceId: 1, - state: "completed", - surface: "api", - }); - - const stats = await createJuniorReporting().getConversationStats(); - - expect(stats.locations.map((item) => item.label)).toEqual([ - "Scheduler", - "API", - ]); - }); - - it("hydrates capped aggregate samples before attributing cumulative turn metrics", async () => { - vi.useFakeTimers(); - const startedAtMs = Date.parse("2026-06-04T10:00:00.000Z"); - vi.setSystemTime(new Date(startedAtMs)); - const { recordAgentTurnSessionSummary } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - await recordAgentTurnSessionSummary({ - conversationId: "slack:C1:baseline", - cumulativeDurationMs: 1_000, - requester: { fullName: "Avery" }, - sessionId: "turn-baseline", - sliceId: 1, - startedAtMs, - state: "completed", - }); - for (let index = 0; index < 5_000; index += 1) { - vi.setSystemTime(new Date(startedAtMs + (index + 1) * 1000)); - await recordAgentTurnSessionSummary({ - conversationId: `slack:C_FILL:${index}`, - cumulativeDurationMs: 1, - requester: { fullName: "Filler" }, - sessionId: `turn-${index}`, - sliceId: 1, - state: "completed", - }); - } - vi.setSystemTime(new Date(startedAtMs + 5_001 * 1000)); - await recordAgentTurnSessionSummary({ - conversationId: "slack:C1:baseline", - cumulativeDurationMs: 1_500, - requester: { fullName: "Blake" }, - sessionId: "turn-latest", - sliceId: 1, - state: "completed", - }); - - const stats = await createJuniorReporting().getConversationStats(); - const avery = stats.requesters.find((item) => item.label === "Avery"); - const blake = stats.requesters.find((item) => item.label === "Blake"); - - expect(stats.truncated).toBe(true); - expect(stats.sampleSize).toBe(5_000); - expect(avery).toMatchObject({ durationMs: 1_000, runs: 1 }); - expect(blake).toMatchObject({ durationMs: 500, runs: 1 }); - }, 20_000); - - it("marks aggregate conversation stats truncated when the sample cap is reached", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-06-04T12:00:00.000Z")); - const { recordAgentTurnSessionSummary } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - for (let index = 0; index < 5_001; index += 1) { - await recordAgentTurnSessionSummary({ - conversationId: `slack:C1:${index}`, - sessionId: `turn-${index}`, - sliceId: 1, - state: "completed", - }); - } - - const stats = await createJuniorReporting().getConversationStats(); - - expect(stats).toMatchObject({ - sampleLimit: 5_000, - sampleSize: 5_000, - truncated: true, - }); - }, 20_000); - - it("reports only the current turn transcript from session history", async () => { - const { upsertAgentTurnSessionRecord } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - await upsertAgentTurnSessionRecord({ - conversationId: "slack:C1:222", - sessionId: "turn-current", - sliceId: 1, - state: "completed", - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "previous question" }], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "text", text: "previous answer" }], - timestamp: 2, - }, - { - role: "user", - content: [{ type: "text", text: "current question" }], - timestamp: 3, - }, - { - role: "assistant", - content: [ - { type: "thinking", text: "I should use a tool" }, - { - type: "toolCall", - name: "search", - arguments: { query: "current question" }, - }, - ], - timestamp: 4, - }, - { - role: "toolResult", - toolCallId: "search-1", - name: "search", - content: [{ type: "text", text: "tool result" }], - timestamp: 5, - }, - { - role: "assistant", - content: [{ type: "text", text: "current answer" }], - timestamp: 6, - }, - ] as PiMessage[], - }); - - const report = - await createJuniorReporting().getConversation("slack:C1:222"); - - expect(report.runs).toHaveLength(1); - expect(report.runs[0]).toMatchObject({ - transcriptMessageCount: 2, - }); - expect(report.runs[0]!.transcript).toEqual([ - { - role: "user", - timestamp: 3, - parts: [{ type: "text", text: "current question" }], - }, - { - role: "assistant", - timestamp: 4, - parts: [ - { type: "thinking", output: "I should use a tool" }, - { - type: "tool_call", - name: "search", - input: { query: "current question" }, - }, - ], - }, - { - role: "toolResult", - timestamp: 5, - parts: [ - { - type: "tool_result", - id: "search-1", - name: "search", - output: "tool result", - }, - ], - }, - { - role: "assistant", - timestamp: 6, - parts: [{ type: "text", text: "current answer" }], - }, - ]); - }); - - it("keeps the initial prompt when steering adds another user message", async () => { - const { upsertAgentTurnSessionRecord } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - await upsertAgentTurnSessionRecord({ - conversationId: "slack:C1:steering-transcript", - sessionId: "turn-steering", - sliceId: 1, - state: "completed", - turnStartMessageIndex: 2, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "previous question" }], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "text", text: "previous answer" }], - timestamp: 2, - }, - { - role: "user", - content: [{ type: "text", text: "hello" }], - timestamp: 3, - }, - { - role: "assistant", - content: [{ type: "text", text: "working" }], - timestamp: 4, - }, - { - role: "user", - content: [{ type: "text", text: "steering message" }], - timestamp: 5, - }, - { - role: "assistant", - content: [{ type: "text", text: "done" }], - timestamp: 6, - }, - ] as PiMessage[], - }); - - const report = await createJuniorReporting().getConversation( - "slack:C1:steering-transcript", - ); - - expect(report.runs).toHaveLength(1); - expect(report.runs[0]).toMatchObject({ - transcriptMessageCount: 4, - }); - expect(report.runs[0]!.transcript).toEqual([ - { - role: "user", - timestamp: 3, - parts: [{ type: "text", text: "hello" }], - }, - { - role: "assistant", - timestamp: 4, - parts: [{ type: "text", text: "working" }], - }, - { - role: "user", - timestamp: 5, - parts: [{ type: "text", text: "steering message" }], - }, - { - role: "assistant", - timestamp: 6, - parts: [{ type: "text", text: "done" }], - }, - ]); - }); - - it("reports a conversation after newer turns evict it from the global index", async () => { - const { recordAgentTurnSessionSummary, upsertAgentTurnSessionRecord } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - await upsertAgentTurnSessionRecord({ - conversationId: "slack:C1:999", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C1", - }, - sessionId: "target-turn", - sliceId: 1, - state: "completed", - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "target question" }], - timestamp: 1, - }, - ] as PiMessage[], - }); - - for (let index = 0; index < 5_005; index += 1) { - await recordAgentTurnSessionSummary({ - conversationId: `slack:C2:${index}`, - sessionId: `newer-turn-${index}`, - sliceId: 1, - state: "completed", - }); - } - - const report = - await createJuniorReporting().getConversation("slack:C1:999"); - - expect(report.runs).toHaveLength(1); - expect(report.runs[0]).toMatchObject({ - id: "target-turn", - transcriptAvailable: true, - }); - expect(report.runs[0]!.transcript).toEqual([ - SYSTEM_MESSAGE, - { - role: "user", - timestamp: 1, - parts: [{ type: "text", text: "target question" }], - }, - ]); - }, 20_000); - - it("keeps earlier turn transcripts pinned to their committed log prefix", async () => { - const { upsertAgentTurnSessionRecord } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - await upsertAgentTurnSessionRecord({ - conversationId: "slack:C1:333", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C1", - }, - sessionId: "turn-one", - sliceId: 1, - state: "completed", - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "first question" }], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "text", text: "first answer" }], - timestamp: 2, - }, - ] as PiMessage[], - }); - await upsertAgentTurnSessionRecord({ - conversationId: "slack:C1:333", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C1", - }, - sessionId: "turn-two", - sliceId: 1, - state: "completed", - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "first question" }], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "text", text: "first answer" }], - timestamp: 2, - }, - { - role: "user", - content: [{ type: "text", text: "second question" }], - timestamp: 3, - }, - { - role: "assistant", - content: [{ type: "text", text: "second answer" }], - timestamp: 4, - }, - ] as PiMessage[], - }); - - const report = - await createJuniorReporting().getConversation("slack:C1:333"); - - expect(report.runs).toHaveLength(2); - expect(report.runs[0]).toMatchObject({ id: "turn-one" }); - expect(report.runs[0]!.transcript).toEqual([ - SYSTEM_MESSAGE, - { - role: "user", - timestamp: 1, - parts: [{ type: "text", text: "first question" }], - }, - { - role: "assistant", - timestamp: 2, - parts: [{ type: "text", text: "first answer" }], - }, - ]); - expect(report.runs[1]).toMatchObject({ id: "turn-two" }); - expect(report.runs[1]!.transcript).toEqual([ - { - role: "user", - timestamp: 3, - parts: [{ type: "text", text: "second question" }], - }, - { - role: "assistant", - timestamp: 4, - parts: [{ type: "text", text: "second answer" }], - }, - ]); - }); - - it("redacts dashboard transcripts for non-public conversations", async () => { - const { upsertAgentTurnSessionRecord } = - await import("@/chat/state/turn-session"); - const { persistThreadStateById } = - await import("@/chat/runtime/thread-state"); - const { createJuniorReporting } = await import("@/reporting"); - const privateToolArgs = Object.fromEntries( - Array.from({ length: 25 }, (_, index) => [ - `privateKey${index}`, - `private value ${index}`, - ]), - ); - - // Store the generated title in thread state — the canonical location. - await persistThreadStateById("slack:D1:222", { - artifacts: { assistantTitle: "sensitive generated thread title" }, - }); - - await upsertAgentTurnSessionRecord({ - conversationId: "slack:D1:222", - sessionId: "turn-private", - sliceId: 1, - state: "completed", - channelName: "secret-dm-name", - requester: { - email: "david@sentry.io", - slackUserId: "U1", - }, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "private question" }], - timestamp: 1, - }, - { - role: "assistant", - content: [ - { type: "text", text: "private answer" }, - { - type: "toolCall", - name: "search", - arguments: privateToolArgs, - }, - ], - timestamp: 2, - }, - ] as PiMessage[], - traceId: "0123456789abcdef0123456789abcdef", - }); - - const report = - await createJuniorReporting().getConversation("slack:D1:222"); - - expect(report.runs[0]).toMatchObject({ - displayTitle: "Direct Message", - channelName: "Direct Message", - id: "turn-private", - requesterIdentity: { - email: "david@sentry.io", - slackUserId: "U1", - }, - traceId: "0123456789abcdef0123456789abcdef", - transcriptAvailable: false, - transcriptMessageCount: 2, - transcriptRedacted: true, - transcriptRedactionReason: "non_public_conversation", - transcript: [], - }); - expect(report.runs[0]).not.toHaveProperty("requester"); - expect(JSON.stringify(report)).not.toContain("private question"); - expect(JSON.stringify(report)).not.toContain("private answer"); - expect(JSON.stringify(report)).not.toContain("private value"); - expect(JSON.stringify(report)).not.toContain( - "sensitive generated thread title", - ); - expect(JSON.stringify(report)).not.toContain("secret-dm-name"); - const toolCall = report.runs[0]!.transcriptMetadata?.[1]?.parts.find( - (part) => part.type === "tool_call", - ); - expect(toolCall?.inputKeys).toHaveLength(20); - expect(toolCall?.inputKeys).toContain("privateKey0"); - expect(toolCall?.inputKeys).not.toContain("privateKey20"); - }); - - it("marks expired private transcripts as privacy redacted", async () => { - const { recordAgentTurnSessionSummary } = - await import("@/chat/state/turn-session"); - const { createJuniorReporting } = await import("@/reporting"); - - await recordAgentTurnSessionSummary({ - conversationId: "slack:D1:333", - sessionId: "turn-private-expired", - sliceId: 1, - state: "completed", - }); - - const report = - await createJuniorReporting().getConversation("slack:D1:333"); - - expect(report.runs[0]).toMatchObject({ - displayTitle: "Direct Message", - channelName: "Direct Message", - id: "turn-private-expired", - transcriptAvailable: false, - transcriptMetadata: [], - transcriptRedacted: true, - transcriptRedactionReason: "non_public_conversation", - transcript: [], - }); - }); -}); diff --git a/packages/junior/tests/integration/example-build-discovery.test.ts b/packages/junior/tests/integration/example-build-discovery.test.ts index 41d04643a..af436265e 100644 --- a/packages/junior/tests/integration/example-build-discovery.test.ts +++ b/packages/junior/tests/integration/example-build-discovery.test.ts @@ -105,7 +105,7 @@ function clearVercelEnv(): void { describe.sequential("example build discovery integration", () => { beforeAll(() => { buildJuniorPackage(); - }, 60_000); + }, 120_000); afterEach(() => { process.chdir(originalCwd); @@ -201,7 +201,7 @@ describe.sequential("example build discovery integration", () => { ); expect(oauth.status).toBe(400); expect(await oauth.text()).toContain("missing required parameters"); - }, 15_000); + }); it("routes the queue consumer endpoint through the app", async () => { process.chdir(exampleRoot); @@ -222,7 +222,7 @@ describe.sequential("example build discovery integration", () => { expect(response.status).toBe(400); expect(await response.text()).toContain("Invalid content type"); - }, 15_000); + }); it("does not expose discovery state from the public example app", async () => { const packageNames = await getExamplePluginPackages(); @@ -233,5 +233,5 @@ describe.sequential("example build discovery integration", () => { const response = await app.fetch(new Request("http://localhost/api/info")); expect(response.status).toBe(404); - }, 15_000); + }); }); diff --git a/packages/junior/tests/integration/heartbeat-turn-resume.test.ts b/packages/junior/tests/integration/heartbeat-turn-resume.test.ts new file mode 100644 index 000000000..5eb2ef66a --- /dev/null +++ b/packages/junior/tests/integration/heartbeat-turn-resume.test.ts @@ -0,0 +1,183 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { upsertAgentTurnSessionRecord } from "@/chat/state/turn-session"; +import { + getConversationWorkState, + requestConversationWork, +} from "@/chat/task-execution/store"; +import type { PiMessage } from "@/chat/pi/messages"; +import { GET as heartbeat } from "@/handlers/heartbeat"; +import { createConversationWorkQueueTestAdapter } from "../fixtures/conversation-work"; +import { + heartbeatRequest, + persistActiveTurn, + resetHeartbeatTestEnv, + setupHeartbeatTestEnv, + TEST_NOW_MS, +} from "../fixtures/heartbeat"; +import { mockTestClock } from "../fixtures/vitest"; +import { createWaitUntilCollector } from "../fixtures/wait-until"; + +vi.hoisted(() => { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +const TEST_DESTINATION = { + platform: "slack" as const, + teamId: "T123", + channelId: "C123", +}; + +describe("heartbeat turn resume recovery", () => { + const originalFetch = global.fetch; + + beforeEach(async () => { + await setupHeartbeatTestEnv(); + }); + + afterEach(async () => { + await resetHeartbeatTestEnv(originalFetch); + }); + + it("reschedules stale timeout resume records", async () => { + const queue = createConversationWorkQueueTestAdapter(); + const conversationId = "slack:C123:1712345.0001"; + const sessionId = "turn-timeout"; + const staleNowMs = TEST_NOW_MS - 3 * 60 * 1000; + mockTestClock(staleNowMs); + await upsertAgentTurnSessionRecord({ + conversationId, + destination: TEST_DESTINATION, + sessionId, + sliceId: 2, + state: "awaiting_resume", + resumeReason: "timeout", + piMessages: [ + { + role: "user", + content: [{ type: "text", text: "finish this" }], + timestamp: staleNowMs, + } as PiMessage, + ], + }); + await persistActiveTurn(conversationId, sessionId); + await requestConversationWork({ + conversationId, + destination: TEST_DESTINATION, + nowMs: staleNowMs, + }); + mockTestClock(TEST_NOW_MS); + + const waitUntil = createWaitUntilCollector(); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn, { + conversationWorkQueue: queue, + }); + + expect(response.status).toBe(202); + await waitUntil.flush(); + expect(queue.sentRecords()).toEqual([ + { + conversationId, + destination: TEST_DESTINATION, + idempotencyKey: expect.stringContaining( + `heartbeat:pending:${conversationId}:${TEST_NOW_MS}`, + ), + }, + ]); + await expect( + getConversationWorkState({ conversationId }), + ).resolves.toMatchObject({ + conversationId, + needsRun: true, + }); + }); + + it("reschedules stale cooperative yield resume records", async () => { + const queue = createConversationWorkQueueTestAdapter(); + const conversationId = "slack:C123:1712345.0008"; + const sessionId = "turn-yield"; + const staleNowMs = TEST_NOW_MS - 3 * 60 * 1000; + mockTestClock(staleNowMs); + await upsertAgentTurnSessionRecord({ + conversationId, + destination: TEST_DESTINATION, + sessionId, + sliceId: 1, + state: "awaiting_resume", + resumeReason: "yield", + piMessages: [ + { + role: "user", + content: [{ type: "text", text: "keep going" }], + timestamp: staleNowMs, + } as PiMessage, + ], + }); + await persistActiveTurn(conversationId, sessionId); + await requestConversationWork({ + conversationId, + destination: TEST_DESTINATION, + nowMs: staleNowMs, + }); + mockTestClock(TEST_NOW_MS); + + const waitUntil = createWaitUntilCollector(); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn, { + conversationWorkQueue: queue, + }); + + expect(response.status).toBe(202); + await waitUntil.flush(); + expect(queue.sentRecords()).toEqual([ + { + conversationId, + destination: TEST_DESTINATION, + idempotencyKey: expect.stringContaining( + `heartbeat:pending:${conversationId}:${TEST_NOW_MS}`, + ), + }, + ]); + await expect( + getConversationWorkState({ conversationId }), + ).resolves.toMatchObject({ + conversationId, + needsRun: true, + }); + }); + + it("skips stale timeout resume records for inactive turns", async () => { + const queue = createConversationWorkQueueTestAdapter(); + const conversationId = "slack:C123:1712345.0007"; + const sessionId = "turn-timeout-inactive"; + const staleNowMs = TEST_NOW_MS - 3 * 60 * 1000; + mockTestClock(staleNowMs); + await upsertAgentTurnSessionRecord({ + conversationId, + destination: TEST_DESTINATION, + sessionId, + sliceId: 2, + state: "awaiting_resume", + resumeReason: "timeout", + piMessages: [ + { + role: "user", + content: [{ type: "text", text: "finish this" }], + timestamp: staleNowMs, + } as PiMessage, + ], + }); + await persistActiveTurn(conversationId, "turn-newer"); + mockTestClock(TEST_NOW_MS); + + const waitUntil = createWaitUntilCollector(); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn, { + conversationWorkQueue: queue, + }); + + expect(response.status).toBe(202); + await waitUntil.flush(); + expect(queue.sentRecords()).toEqual([]); + await expect(getConversationWorkState({ conversationId })).resolves.toBe( + undefined, + ); + }); +}); diff --git a/packages/junior/tests/integration/heartbeat.test.ts b/packages/junior/tests/integration/heartbeat.test.ts index e3f6d6f62..bb2e020cd 100644 --- a/packages/junior/tests/integration/heartbeat.test.ts +++ b/packages/junior/tests/integration/heartbeat.test.ts @@ -1,189 +1,28 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { - defineJuniorPlugin, - type Destination, -} from "@sentry/junior-plugin-api"; -import { createHeartbeatContext } from "@/chat/agent-dispatch/context"; -import { recoverStaleDispatches } from "@/chat/agent-dispatch/heartbeat"; -import { - createSchedulerStore, - schedulerPlugin, - type ScheduledTask, -} from "@sentry/junior-scheduler"; -import { createPluginState } from "@/chat/plugins/state"; -import { - createOrGetDispatch, - getDispatchRecord, - getDispatchStorageKey, - listIncompleteDispatchIds, - updateDispatchRecord, - withDispatchLock, -} from "@/chat/agent-dispatch/store"; -import type { DispatchRecord } from "@/chat/agent-dispatch/types"; -import { disconnectStateAdapter, getStateAdapter } from "@/chat/state/adapter"; -import { upsertAgentTurnSessionRecord } from "@/chat/state/turn-session"; -import { persistThreadStateById } from "@/chat/runtime/thread-state"; -import { getConversationWorkState } from "@/chat/task-execution/store"; -import { scheduleAgentContinue } from "@/chat/services/agent-continue"; -import type { PiMessage } from "@/chat/pi/messages"; +import { defineJuniorPlugin } from "@sentry/junior-plugin-api"; import { setAgentPlugins } from "@/chat/plugins/agent-hooks"; import { GET as heartbeat } from "@/handlers/heartbeat"; -import { createSlackDirectCredentialSubject } from "@/chat/credentials/subject"; -import { createConversationWorkQueueTestAdapter } from "../fixtures/conversation-work"; +import { + heartbeatRequest, + resetHeartbeatTestEnv, + setupHeartbeatTestEnv, + TEST_NOW_MS, +} from "../fixtures/heartbeat"; import { createWaitUntilCollector } from "../fixtures/wait-until"; -import { getCapturedSlackApiCalls } from "../msw/handlers/slack-api"; vi.hoisted(() => { process.env.JUNIOR_STATE_ADAPTER = "memory"; }); -const TEST_NOW_MS = Date.parse("2026-05-26T12:05:00.000Z"); -const TEST_RUN_AT_MS = Date.parse("2026-05-26T12:00:00.000Z"); -const SLACK_DESTINATION = { - platform: "slack", - teamId: "T123", - channelId: "C123", -} satisfies Destination; - -function schedulerStore() { - return createSchedulerStore(createPluginState("scheduler")); -} - -function createTask(overrides: Partial = {}): ScheduledTask { - const nextRunAtMs = TEST_RUN_AT_MS; - return { - id: "sched_plugin_1", - createdAtMs: nextRunAtMs, - createdBy: { slackUserId: "U123" }, - destination: SLACK_DESTINATION, - nextRunAtMs, - schedule: { - description: "Once at noon", - kind: "one_off", - timezone: "UTC", - }, - status: "active", - task: { - text: "Post a digest. Summarize the latest state.", - }, - updatedAtMs: nextRunAtMs, - version: 1, - ...overrides, - }; -} - -function createDailyTask( - overrides: Partial = {}, -): ScheduledTask { - const nextRunAtMs = Date.parse("2026-05-24T12:00:00.000Z"); - return createTask({ - id: "sched_plugin_daily", - createdAtMs: nextRunAtMs, - nextRunAtMs, - schedule: { - description: "Daily at noon UTC", - kind: "recurring", - timezone: "UTC", - recurrence: { - frequency: "daily", - interval: 1, - startDate: "2026-05-24", - time: { - hour: 12, - minute: 0, - }, - }, - }, - updatedAtMs: nextRunAtMs, - ...overrides, - }); -} - -function mockDispatchCallbackFetch(originalFetch: typeof fetch) { - const fetchMock = vi.fn(async (...args: Parameters) => { - const input = args[0]; - const url = - typeof input === "string" - ? input - : input instanceof URL - ? input.href - : input.url; - if (url.startsWith("https://slack.com/api/")) { - return await originalFetch(...args); - } - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - return fetchMock; -} - -function createCredentialSubject( - input: { - channelId?: string; - teamId?: string; - userId?: string; - } = {}, -) { - const subject = createSlackDirectCredentialSubject({ - channelId: input.channelId ?? "D123", - teamId: input.teamId ?? "T123", - userId: input.userId ?? "U123", - }); - if (!subject) { - throw new Error("Expected test credential subject to be created"); - } - return subject; -} - -async function persistActiveTurn( - conversationId: string, - activeTurnId?: string, -): Promise { - await persistThreadStateById(conversationId, { - conversation: { - schemaVersion: 1, - backfill: {}, - compactions: [], - messages: [], - piMessages: [], - processing: { - activeTurnId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 0, - updatedAtMs: TEST_NOW_MS, - }, - vision: { - byFileId: {}, - }, - }, - }); -} - -describe("plugin heartbeat", () => { +describe("trusted plugin heartbeat route", () => { const originalFetch = global.fetch; beforeEach(async () => { - vi.useFakeTimers({ now: TEST_NOW_MS }); - process.env.JUNIOR_SCHEDULER_SECRET = "heartbeat-secret"; - process.env.JUNIOR_BASE_URL = "https://junior.example.com"; - process.env.JUNIOR_SECRET = "dispatch-secret"; - setAgentPlugins([]); - await disconnectStateAdapter(); + await setupHeartbeatTestEnv(); }); afterEach(async () => { - global.fetch = originalFetch; - setAgentPlugins([]); - await disconnectStateAdapter(); - delete process.env.JUNIOR_SCHEDULER_SECRET; - delete process.env.CRON_SECRET; - delete process.env.JUNIOR_BASE_URL; - delete process.env.JUNIOR_SECRET; - vi.restoreAllMocks(); - vi.useRealTimers(); + await resetHeartbeatTestEnv(originalFetch); }); it("rejects unauthenticated heartbeat requests", async () => { @@ -197,7 +36,7 @@ describe("plugin heartbeat", () => { expect(waitUntil.pendingCount()).toBe(0); }); - it("runs plugin heartbeat hooks", async () => { + it("runs trusted plugin heartbeat hooks", async () => { const seen: number[] = []; setAgentPlugins([ defineJuniorPlugin({ @@ -214,1042 +53,10 @@ describe("plugin heartbeat", () => { }), ]); const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - ); - - expect(response.status).toBe(202); - await waitUntil.flush(); - expect(seen).toHaveLength(1); - }); - - it("reschedules stale agent continuation records", async () => { - const queue = createConversationWorkQueueTestAdapter(); - const conversationId = "slack:C123:1712345.0001"; - const sessionId = "turn-timeout"; - const staleNowMs = TEST_NOW_MS - 3 * 60 * 1000; - vi.setSystemTime(staleNowMs); - await upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - destination: SLACK_DESTINATION, - state: "awaiting_resume", - resumeReason: "timeout", - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "finish this" }], - timestamp: staleNowMs, - } as PiMessage, - ], - }); - await persistActiveTurn(conversationId, sessionId); - await scheduleAgentContinue( - { - conversationId, - destination: SLACK_DESTINATION, - sessionId, - expectedVersion: 1, - }, - { queue, nowMs: staleNowMs }, - ); - queue.clearSentRecords(); - vi.setSystemTime(TEST_NOW_MS); - - const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - { conversationWorkQueue: queue }, - ); - - expect(response.status).toBe(202); - await waitUntil.flush(); - expect(queue.sentRecords()).toEqual([ - { - conversationId, - destination: SLACK_DESTINATION, - idempotencyKey: `heartbeat:pending:${conversationId}:${TEST_NOW_MS}`, - }, - ]); - await expect( - getConversationWorkState({ conversationId }), - ).resolves.toMatchObject({ - conversationId, - needsRun: true, - }); - }); - - it("reschedules stale cooperative yield continuation records", async () => { - const queue = createConversationWorkQueueTestAdapter(); - const conversationId = "slack:C123:1712345.0008"; - const sessionId = "turn-yield"; - const staleNowMs = TEST_NOW_MS - 3 * 60 * 1000; - vi.setSystemTime(staleNowMs); - await upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 1, - destination: SLACK_DESTINATION, - state: "awaiting_resume", - resumeReason: "yield", - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "keep going" }], - timestamp: staleNowMs, - } as PiMessage, - ], - }); - await persistActiveTurn(conversationId, sessionId); - await scheduleAgentContinue( - { - conversationId, - destination: SLACK_DESTINATION, - sessionId, - expectedVersion: 1, - }, - { queue, nowMs: staleNowMs }, - ); - queue.clearSentRecords(); - vi.setSystemTime(TEST_NOW_MS); - - const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - { conversationWorkQueue: queue }, - ); - - expect(response.status).toBe(202); - await waitUntil.flush(); - expect(queue.sentRecords()).toEqual([ - { - conversationId, - destination: SLACK_DESTINATION, - idempotencyKey: `heartbeat:pending:${conversationId}:${TEST_NOW_MS}`, - }, - ]); - await expect( - getConversationWorkState({ conversationId }), - ).resolves.toMatchObject({ - conversationId, - needsRun: true, - }); - }); - - it("skips stale agent continuation records for inactive runs", async () => { - const queue = createConversationWorkQueueTestAdapter(); - const conversationId = "slack:C123:1712345.0007"; - const sessionId = "turn-timeout-inactive"; - const staleNowMs = TEST_NOW_MS - 3 * 60 * 1000; - vi.setSystemTime(staleNowMs); - await upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - destination: SLACK_DESTINATION, - state: "awaiting_resume", - resumeReason: "timeout", - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "finish this" }], - timestamp: staleNowMs, - } as PiMessage, - ], - }); - await persistActiveTurn(conversationId, "turn-newer"); - vi.setSystemTime(TEST_NOW_MS); - - const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - { conversationWorkQueue: queue }, - ); - - expect(response.status).toBe(202); - await waitUntil.flush(); - expect(queue.sentRecords()).toEqual([]); - await expect(getConversationWorkState({ conversationId })).resolves.toBe( - undefined, - ); - }); - - it("does not scan stale agent continuation records outside active conversation work", async () => { - const queue = createConversationWorkQueueTestAdapter(); - const conversationId = "slack:C123:1712345.0009"; - const sessionId = "turn-timeout-no-active-work"; - const staleNowMs = TEST_NOW_MS - 3 * 60 * 1000; - vi.setSystemTime(staleNowMs); - await upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - destination: SLACK_DESTINATION, - state: "awaiting_resume", - resumeReason: "timeout", - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "finish this" }], - timestamp: staleNowMs, - } as PiMessage, - ], - }); - await persistActiveTurn(conversationId, sessionId); - vi.setSystemTime(TEST_NOW_MS); - - const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - { conversationWorkQueue: queue }, - ); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn); expect(response.status).toBe(202); await waitUntil.flush(); - expect(queue.sentRecords()).toEqual([]); - await expect(getConversationWorkState({ conversationId })).resolves.toBe( - undefined, - ); - }); - - it("scopes dispatch lookup to the plugin that created it", async () => { - const fetchMock = vi.fn(async () => { - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - - const schedulerCtx = createHeartbeatContext({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - const result = await schedulerCtx.agent.dispatch({ - idempotencyKey: "run-1", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C123", - }, - input: "Run the scheduled task.", - metadata: { runId: "run-1" }, - }); - - await expect(schedulerCtx.agent.get(result.id)).resolves.toEqual({ - id: result.id, - status: "pending", - }); - await expect( - createHeartbeatContext({ - plugin: "other-plugin", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - }).agent.get(result.id), - ).resolves.toBeUndefined(); - - await expect(getDispatchRecord(result.id)).resolves.toMatchObject({ - input: "Run the scheduled task.", - destination: { channelId: "C123" }, - metadata: { runId: "run-1" }, - }); - }); - - it("keeps plugin state isolated when plugin names and keys contain delimiters", async () => { - const first = createHeartbeatContext({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - const second = createHeartbeatContext({ - plugin: "scheduler:run", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - - await first.state.set("run:1", "first"); - await second.state.set("1", "second"); - - await expect(first.state.get("run:1")).resolves.toBe("first"); - await expect(second.state.get("1")).resolves.toBe("second"); - }); - - it("claims scheduled tasks from the scheduler legacy state namespace", async () => { - const task = createTask({ id: "sched_legacy" }); - const state = getStateAdapter(); - await state.connect(); - await state.set("junior:scheduler:tasks", [task.id]); - await state.set("junior:scheduler:team:T123:tasks", [task.id]); - await state.set("junior:scheduler:task:sched_legacy", task); - - const store = createSchedulerStore( - createPluginState("scheduler", { - legacyStatePrefixes: ["junior:scheduler"], - }), - ); - - await expect(store.listTasksForTeam("T123")).resolves.toMatchObject([ - { id: task.id }, - ]); - await expect( - store.claimDueRun({ nowMs: TEST_NOW_MS }), - ).resolves.toMatchObject({ - taskId: task.id, - }); - }); - - it("bounds dispatch fanout from one heartbeat context", async () => { - const fetchMock = vi.fn(async () => { - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - - const ctx = createHeartbeatContext({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - - for (let index = 0; index < 25; index += 1) { - await ctx.agent.dispatch({ - idempotencyKey: `run-${index}`, - destination: { - platform: "slack", - teamId: "T123", - channelId: "C123", - }, - input: "Run the scheduled task.", - }); - } - - await expect( - ctx.agent.dispatch({ - idempotencyKey: "run-over-limit", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C123", - }, - input: "Run the scheduled task.", - }), - ).rejects.toThrow("Plugin heartbeat exceeded the dispatch limit"); - }); - - it("does not count invalid dispatch requests against heartbeat fanout", async () => { - const fetchMock = vi.fn(async () => { - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - - const ctx = createHeartbeatContext({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - - for (let index = 0; index < 25; index += 1) { - await expect( - ctx.agent.dispatch({ - idempotencyKey: `invalid-${index}`, - destination: { - platform: "slack", - teamId: "not-a-team", - channelId: "C123", - }, - input: "Run the scheduled task.", - }), - ).rejects.toThrow("Dispatch destination teamId must be a Slack team id"); - } - - await expect( - ctx.agent.dispatch({ - idempotencyKey: "valid-after-invalid", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C123", - }, - input: "Run the scheduled task.", - }), - ).resolves.toMatchObject({ status: "created" }); - }); - - it("rejects plugin credential subjects that include runtime bindings", async () => { - mockDispatchCallbackFetch(originalFetch); - - const ctx = createHeartbeatContext({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - - await expect( - ctx.agent.dispatch({ - idempotencyKey: "run-delegated-mismatch", - credentialSubject: { - ...createCredentialSubject(), - binding: { - type: "slack-direct-conversation", - teamId: "T123", - channelId: "D999", - signature: "v1=test", - }, - } as any, - destination: { - platform: "slack", - teamId: "T123", - channelId: "D123", - }, - input: "Run the scheduled task.", - }), - ).rejects.toThrow("Dispatch credentialSubject binding is runtime-owned"); - expect(getCapturedSlackApiCalls("conversations.info")).toHaveLength(0); - await expect(listIncompleteDispatchIds()).resolves.toEqual([]); - }); - - it("binds delegated credential subjects before persistence", async () => { - mockDispatchCallbackFetch(originalFetch); - const ctx = createHeartbeatContext({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - - const result = await ctx.agent.dispatch({ - idempotencyKey: "run-delegated", - credentialSubject: createCredentialSubject(), - destination: { - platform: "slack", - teamId: "T123", - channelId: "D123", - }, - input: "Run the scheduled task.", - }); - - await expect(getDispatchRecord(result.id)).resolves.toMatchObject({ - credentialSubject: { - type: "user", - userId: "U123", - allowedWhen: "private-direct-conversation", - binding: { - type: "slack-direct-conversation", - teamId: "T123", - channelId: "D123", - signature: expect.any(String), - }, - }, - }); - expect(getCapturedSlackApiCalls("conversations.info")).toHaveLength(0); - }); - - it("fails stale dispatches that exceed retry attempts", async () => { - const created = await createOrGetDispatch({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - options: { - idempotencyKey: "run-exhausted", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C123", - }, - input: "Run the scheduled task.", - }, - }); - await withDispatchLock(created.record.id, async (state) => { - const record = await state.get( - getDispatchStorageKey(created.record.id), - ); - if (!record) { - throw new Error("Expected dispatch record to exist"); - } - await updateDispatchRecord(state, { - ...record, - attempt: record.maxAttempts, - lastCallbackAtMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - }); - - await expect( - recoverStaleDispatches({ - nowMs: Date.parse("2026-05-26T12:05:00.000Z"), - }), - ).resolves.toBe(0); - await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ - status: "failed", - errorMessage: "Dispatch exceeded retry attempts.", - }); - }); - - it("fails stale dispatches when the locked row no longer parses", async () => { - const created = await createOrGetDispatch({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - options: { - idempotencyKey: "run-exhausted-corrupt-row", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C123", - }, - input: "Run the scheduled task.", - }, - }); - await withDispatchLock(created.record.id, async (state) => { - const record = await state.get( - getDispatchStorageKey(created.record.id), - ); - if (!record) { - throw new Error("Expected dispatch record to exist"); - } - await updateDispatchRecord(state, { - ...record, - attempt: record.maxAttempts, - lastCallbackAtMs: Date.parse("2026-05-26T12:00:00.000Z"), - }); - }); - - const state = getStateAdapter(); - await state.connect(); - const storageKey = getDispatchStorageKey(created.record.id); - const current = await state.get(storageKey); - if (!current) { - throw new Error("Expected dispatch record to exist"); - } - const corruptRecord = { - ...(current as unknown as Record), - }; - delete corruptRecord.destination; - const originalGet = state.get.bind(state); - let recordReads = 0; - state.get = (async (key: string) => { - if (key === storageKey && recordReads++ === 1) { - return corruptRecord; - } - return await originalGet(key); - }) as typeof state.get; - - try { - await expect( - recoverStaleDispatches({ - nowMs: Date.parse("2026-05-26T12:05:00.000Z"), - }), - ).resolves.toBe(0); - } finally { - state.get = originalGet; - } - - await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ - status: "failed", - errorMessage: "Dispatch exceeded retry attempts.", - }); - }); - - it("removes terminal dispatches from the recovery index", async () => { - const created = await createOrGetDispatch({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - options: { - idempotencyKey: "run-terminal-index", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C123", - }, - input: "Run the scheduled task.", - }, - }); - - await expect(listIncompleteDispatchIds()).resolves.toContain( - created.record.id, - ); - - await withDispatchLock(created.record.id, async (state) => { - const record = await state.get( - getDispatchStorageKey(created.record.id), - ); - if (!record) { - throw new Error("missing dispatch record"); - } - await updateDispatchRecord(state, { - ...record, - status: "completed", - }); - }); - - await expect(listIncompleteDispatchIds()).resolves.not.toContain( - created.record.id, - ); - }); - - it("does not fail an active leased dispatch that reached max attempts", async () => { - const created = await createOrGetDispatch({ - plugin: "scheduler", - nowMs: Date.parse("2026-05-26T12:00:00.000Z"), - options: { - idempotencyKey: "run-active-max-attempts", - destination: { - platform: "slack", - teamId: "T123", - channelId: "C123", - }, - input: "Run the scheduled task.", - }, - }); - await withDispatchLock(created.record.id, async (state) => { - const record = await state.get( - getDispatchStorageKey(created.record.id), - ); - if (!record) { - throw new Error("Expected dispatch record to exist"); - } - await updateDispatchRecord(state, { - ...record, - attempt: record.maxAttempts, - lastCallbackAtMs: Date.parse("2026-05-26T12:00:00.000Z"), - leaseExpiresAtMs: Date.parse("2026-05-26T12:10:00.000Z"), - status: "running", - }); - }); - - await expect( - recoverStaleDispatches({ - nowMs: Date.parse("2026-05-26T12:05:00.000Z"), - }), - ).resolves.toBe(0); - await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ - status: "running", - attempt: created.record.maxAttempts, - }); - }); - - it("dispatches and reconciles scheduled runs from the scheduler plugin", async () => { - const fetchMock = vi.fn(async () => { - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - setAgentPlugins([schedulerPlugin()]); - const store = schedulerStore(); - await store.saveTask( - createTask({ - createdBy: { - slackUserId: "U039RR91S", - userName: "U039RR91S", - fullName: "W039RR91S", - }, - }), - ); - - const firstWaitUntil = createWaitUntilCollector(); - const firstResponse = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - firstWaitUntil.fn, - ); - expect(firstResponse.status).toBe(202); - await firstWaitUntil.flush(); - - const running = await store.getRun(`sched_plugin_1:${TEST_RUN_AT_MS}`); - expect(running).toMatchObject({ - status: "running", - dispatchId: expect.any(String), - }); - expect(fetchMock).toHaveBeenCalledTimes(1); - const dispatchRecord = await getDispatchRecord(running!.dispatchId!); - expect(dispatchRecord?.input).toContain( - "- creator_slack_user_id: U039RR91S", - ); - expect(dispatchRecord?.input).not.toContain("creator_user_name"); - expect(dispatchRecord?.input).not.toContain("creator_full_name"); - - await withDispatchLock(running!.dispatchId!, async (state) => { - const record = await state.get( - getDispatchStorageKey(running!.dispatchId!), - ); - if (!record) { - throw new Error("Expected dispatch record to exist"); - } - await updateDispatchRecord(state, { - ...record, - resultMessageTs: "1700000000.000001", - status: "completed", - }); - }); - - const secondWaitUntil = createWaitUntilCollector(); - const secondResponse = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - secondWaitUntil.fn, - ); - expect(secondResponse.status).toBe(202); - await secondWaitUntil.flush(); - - await expect(store.getRun(running!.id)).resolves.toMatchObject({ - status: "completed", - resultMessageTs: "1700000000.000001", - }); - await expect(store.getTask("sched_plugin_1")).resolves.toMatchObject({ - lastRunAtMs: Date.parse("2026-05-26T12:00:00.000Z"), - status: "paused", - }); - }); - - it("exposes sanitized scheduler operational reports through Junior reporting", async () => { - setAgentPlugins([schedulerPlugin()]); - const store = schedulerStore(); - await store.saveTask( - createTask({ - createdBy: { - slackUserId: "U123", - fullName: "Alice Reviewer", - userName: "alice", - }, - task: { - text: "Secret task text that must stay out of dashboard stats.", - }, - }), - ); - await store.saveTask( - createTask({ - createdBy: { - slackUserId: "U456", - fullName: "W039RR91S", - userName: "U456", - }, - id: "sched_plugin_blocked", - status: "blocked", - statusReason: "Secret blocked reason", - task: { - text: "Secret blocked task text", - }, - updatedAtMs: TEST_NOW_MS, - }), - ); - await store.saveTask( - createTask({ - createdBy: { - slackUserId: "unknown", - }, - id: "sched_plugin_corrupt_creator", - status: "blocked", - task: { - text: "Corrupt creator metadata task", - }, - updatedAtMs: TEST_NOW_MS + 1, - }), - ); - - const { createJuniorReporting } = await import("@/reporting"); - const feed = await createJuniorReporting().getPluginOperationalReports(); - const scheduler = feed.reports.find( - (report) => report.pluginName === "scheduler", - ); - - expect(feed.source).toBe("plugins"); - expect(scheduler).toMatchObject({ - pluginName: "scheduler", - title: "Scheduler", - }); - expect(scheduler?.metrics).toEqual( - expect.arrayContaining([ - expect.objectContaining({ label: "active", value: "1" }), - expect.objectContaining({ label: "blocked", value: "2" }), - expect.objectContaining({ label: "due now", value: "1" }), - ]), - ); - expect(scheduler?.recordSets?.map((recordSet) => recordSet.title)).toEqual([ - "Upcoming", - "Blocked", - "Running", - ]); - expect(scheduler?.recordSets?.[0]?.fields).toEqual( - expect.arrayContaining([ - expect.objectContaining({ key: "author", label: "Author" }), - ]), - ); - expect( - scheduler?.recordSets?.[0]?.records?.[0]?.values ?? {}, - ).toMatchObject({ - author: "Alice Reviewer (@alice)", - }); - const blockedRecords = scheduler?.recordSets?.[1]?.records ?? []; - expect( - blockedRecords.find((record) => record.id === "sched_plugin_blocked") - ?.values ?? {}, - ).toMatchObject({ - author: "Slack User U456", - }); - expect( - blockedRecords.find( - (record) => record.id === "sched_plugin_corrupt_creator", - )?.values ?? {}, - ).toMatchObject({ - author: "Invalid Slack creator metadata", - }); - expect(JSON.stringify(feed)).not.toContain("Secret"); - }); - - it("counts all running scheduler runs in operational summaries", async () => { - setAgentPlugins([schedulerPlugin()]); - const store = schedulerStore(); - for (let index = 0; index < 6; index += 1) { - await store.saveTask( - createTask({ - id: `sched_running_${index}`, - createdAtMs: TEST_RUN_AT_MS + index, - updatedAtMs: TEST_RUN_AT_MS + index, - }), - ); - } - for (let index = 0; index < 6; index += 1) { - await expect( - store.claimDueRun({ nowMs: TEST_NOW_MS + index }), - ).resolves.toBeDefined(); - } - - const { createJuniorReporting } = await import("@/reporting"); - const feed = await createJuniorReporting().getPluginOperationalReports(); - const scheduler = feed.reports.find( - (report) => report.pluginName === "scheduler", - ); - const runningSummary = scheduler?.metrics?.find( - (metric) => metric.label === "running", - ); - const runningSection = scheduler?.recordSets?.find( - (recordSet) => recordSet.title === "Running", - ); - - expect(runningSummary).toMatchObject({ value: "6" }); - expect(runningSection?.records).toHaveLength(5); - }); - - it("carries scheduled task credential subjects into dispatch records", async () => { - mockDispatchCallbackFetch(originalFetch); - setAgentPlugins([schedulerPlugin()]); - const store = schedulerStore(); - await store.saveTask( - createTask({ - destination: { - platform: "slack", - teamId: "T123", - channelId: "D123", - }, - credentialSubject: { - type: "user", - userId: "U123", - allowedWhen: "private-direct-conversation", - }, - }), - ); - - const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - ); - expect(response.status).toBe(202); - await waitUntil.flush(); - - const running = await store.getRun(`sched_plugin_1:${TEST_RUN_AT_MS}`); - expect(running?.dispatchId).toEqual(expect.any(String)); - await expect( - getDispatchRecord(running!.dispatchId!), - ).resolves.toMatchObject({ - credentialSubject: { - type: "user", - userId: "U123", - allowedWhen: "private-direct-conversation", - binding: { - type: "slack-direct-conversation", - teamId: "T123", - channelId: "D123", - signature: expect.any(String), - }, - }, - }); - expect(getCapturedSlackApiCalls("conversations.info")).toHaveLength(0); - }); - - it("fails scheduled runs when their dispatch record disappeared", async () => { - const fetchMock = vi.fn(async () => { - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - setAgentPlugins([schedulerPlugin()]); - const store = schedulerStore(); - await store.saveTask(createTask()); - - const firstWaitUntil = createWaitUntilCollector(); - const firstResponse = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - firstWaitUntil.fn, - ); - expect(firstResponse.status).toBe(202); - await firstWaitUntil.flush(); - - const running = await store.getRun(`sched_plugin_1:${TEST_RUN_AT_MS}`); - expect(running).toMatchObject({ - status: "running", - dispatchId: expect.any(String), - }); - const state = getStateAdapter(); - await state.connect(); - await state.delete(getDispatchStorageKey(running!.dispatchId!)); - - const secondWaitUntil = createWaitUntilCollector(); - const secondResponse = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - secondWaitUntil.fn, - ); - expect(secondResponse.status).toBe(202); - await secondWaitUntil.flush(); - - await expect(store.getRun(running!.id)).resolves.toMatchObject({ - status: "failed", - errorMessage: "Scheduled task dispatch record is missing.", - }); - await expect(store.getTask("sched_plugin_1")).resolves.toMatchObject({ - status: "paused", - }); - }); - - it("blocks malformed scheduled tasks without stopping the scheduler plugin heartbeat", async () => { - const fetchMock = vi.fn(async () => { - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - setAgentPlugins([schedulerPlugin()]); - const store = schedulerStore(); - await store.saveTask({ - ...createTask(), - id: "sched_plugin_malformed", - task: { - text: undefined, - } as unknown as ScheduledTask["task"], - }); - - const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - ); - expect(response.status).toBe(202); - await waitUntil.flush(); - - await expect( - store.getRun(`sched_plugin_malformed:${TEST_RUN_AT_MS}`), - ).resolves.toMatchObject({ - status: "blocked", - errorMessage: expect.stringContaining( - "Scheduled task prompt could not be built", - ), - }); - await expect( - store.getTask("sched_plugin_malformed"), - ).resolves.toMatchObject({ - status: "blocked", - statusReason: expect.stringContaining( - "Scheduled task prompt could not be built", - ), - }); - expect(fetchMock).not.toHaveBeenCalled(); - }); - - it("skips old recurring occurrences and advances to the next future run", async () => { - const fetchMock = vi.fn(async () => { - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - setAgentPlugins([schedulerPlugin()]); - const store = schedulerStore(); - const task = createDailyTask(); - await store.saveTask(task); - - const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - ); - expect(response.status).toBe(202); - await waitUntil.flush(); - - await expect( - store.getRun(`${task.id}:${task.nextRunAtMs}`), - ).resolves.toMatchObject({ - status: "skipped", - errorMessage: expect.stringContaining("more than 24 hours late"), - }); - await expect(store.getTask(task.id)).resolves.toMatchObject({ - status: "active", - nextRunAtMs: Date.parse("2026-05-27T12:00:00.000Z"), - }); - expect(fetchMock).not.toHaveBeenCalled(); - }); - - it("dedupes equivalent old recurring tasks during heartbeat recovery", async () => { - const fetchMock = vi.fn(async () => { - return new Response("Accepted", { status: 202 }); - }); - global.fetch = fetchMock as typeof fetch; - setAgentPlugins([schedulerPlugin()]); - const store = schedulerStore(); - const first = createDailyTask({ - id: "sched_plugin_duplicate_a", - createdAtMs: Date.parse("2026-05-24T12:00:00.000Z"), - }); - const duplicate = createDailyTask({ - id: "sched_plugin_duplicate_b", - createdAtMs: Date.parse("2026-05-24T12:00:01.000Z"), - }); - await store.saveTask(first); - await store.saveTask(duplicate); - - const waitUntil = createWaitUntilCollector(); - const response = await heartbeat( - new Request("https://example.invalid/api/internal/heartbeat", { - headers: { authorization: "Bearer heartbeat-secret" }, - }), - waitUntil.fn, - ); - expect(response.status).toBe(202); - await waitUntil.flush(); - - await expect( - store.getRun(`${duplicate.id}:${duplicate.nextRunAtMs}`), - ).resolves.toMatchObject({ - status: "skipped", - errorMessage: expect.stringContaining( - "Duplicate stale scheduled task was skipped", - ), - }); - await expect(store.getTask(first.id)).resolves.toMatchObject({ - status: "active", - nextRunAtMs: Date.parse("2026-05-27T12:00:00.000Z"), - }); - await expect(store.getTask(duplicate.id)).resolves.toMatchObject({ - status: "paused", - nextRunAtMs: undefined, - statusReason: expect.stringContaining(first.id), - }); - expect(fetchMock).not.toHaveBeenCalled(); + expect(seen).toEqual([TEST_NOW_MS]); }); }); diff --git a/packages/junior/tests/integration/mcp-auth-runtime-slack.test.ts b/packages/junior/tests/integration/mcp-auth-runtime-slack.test.ts deleted file mode 100644 index 081f9505b..000000000 --- a/packages/junior/tests/integration/mcp-auth-runtime-slack.test.ts +++ /dev/null @@ -1,792 +0,0 @@ -import path from "node:path"; -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { - EVAL_MCP_AUTH_CODE, - EVAL_MCP_AUTH_PROVIDER, -} from "../msw/handlers/eval-mcp-auth"; -import { - getCapturedSlackApiCalls, - resetSlackApiMockState, -} from "../msw/handlers/slack-api"; -import { - createTestMessage, - createTestThread, - type TestThread, -} from "../fixtures/slack-harness"; -import { - createPluginAppFixture, - type PluginAppFixture, -} from "../fixtures/plugin-app"; - -const { - agentProbe, - MCP_TOOL_NAME, - SKILL_NAME, - assistantReplyWithoutContext, - assistantReplyWithContext, - priorBudgetContext, -} = vi.hoisted(() => ({ - agentProbe: { - continueCallCount: 0, - directProviderSearch: false, - promptCallCount: 0, - searchToolNames: [] as string[][], - }, - MCP_TOOL_NAME: "mcp__eval-auth__budget-echo", - SKILL_NAME: "eval-auth", - assistantReplyWithoutContext: "I need the earlier budget context first.", - assistantReplyWithContext: - "The budget deadline you mentioned earlier was Friday.", - priorBudgetContext: "You need the budget by Friday.", -})); - -function resetAgentProbe(): void { - agentProbe.promptCallCount = 0; - agentProbe.continueCallCount = 0; - agentProbe.directProviderSearch = false; - agentProbe.searchToolNames.length = 0; -} - -function extractTextContent(message: unknown): string { - if (!message || typeof message !== "object") { - return ""; - } - - const content = (message as { content?: unknown }).content; - if (!Array.isArray(content)) { - return ""; - } - - return content - .map((part) => { - if (!part || typeof part !== "object") { - return ""; - } - const candidate = part as { type?: unknown; text?: unknown }; - return candidate.type === "text" && typeof candidate.text === "string" - ? candidate.text - : ""; - }) - .join("\n"); -} - -function hasPriorBudgetContext(messages: unknown[]): boolean { - return messages.some((message) => - extractTextContent(message).includes(priorBudgetContext), - ); -} - -vi.mock("@/chat/services/turn-thinking-level", async () => { - const actual = await vi.importActual< - typeof import("@/chat/services/turn-thinking-level") - >("@/chat/services/turn-thinking-level"); - return { - ...actual, - // Bypass the classifier to keep this an agent-boundary test with no - // model traffic. - selectTurnThinkingLevel: async () => ({ - thinkingLevel: "medium" as const, - reason: "test_default", - }), - }; -}); - -vi.mock("@earendil-works/pi-agent-core", () => { - class FakeAgent { - state: { - messages: unknown[]; - model: unknown; - systemPrompt: string; - tools: Array<{ - name: string; - execute: (toolCallId: unknown, params: unknown) => Promise; - }>; - }; - private aborted = false; - - constructor(input: { - initialState: { - model: unknown; - systemPrompt: string; - tools: Array<{ - name: string; - execute: (toolCallId: unknown, params: unknown) => Promise; - }>; - }; - }) { - this.state = { - messages: [], - model: input.initialState.model, - systemPrompt: input.initialState.systemPrompt, - tools: input.initialState.tools, - }; - } - - subscribe() { - return () => undefined; - } - - abort() { - this.aborted = true; - } - - async prompt(message: unknown) { - agentProbe.promptCallCount += 1; - this.aborted = false; - this.state.messages.push(message); - - if (agentProbe.directProviderSearch) { - const searchMcpTools = this.state.tools.find( - (tool) => tool.name === "searchMcpTools", - ); - if (!searchMcpTools) { - throw new Error("searchMcpTools missing"); - } - await searchMcpTools.execute("tool-search-provider", { - provider: EVAL_MCP_AUTH_PROVIDER, - query: "budget echo query", - }); - if (this.aborted) { - return {}; - } - throw new Error("Expected MCP auth pause while searching eval-auth"); - } - - const loadSkillTool = this.state.tools.find( - (tool) => tool.name === "loadSkill", - ); - if (!loadSkillTool) { - throw new Error("loadSkill tool missing"); - } - - await loadSkillTool.execute("tool-load-skill", { - skill_name: SKILL_NAME, - }); - - if (this.aborted) { - return {}; - } - - throw new Error("Expected MCP auth pause while loading eval-auth"); - } - - async continue() { - agentProbe.continueCallCount += 1; - this.aborted = false; - - const searchMcpTools = this.state.tools.find( - (tool) => tool.name === "searchMcpTools", - ); - if (!searchMcpTools) { - throw new Error("searchMcpTools missing on resume"); - } - const searchResult = (await searchMcpTools.execute("tool-search-resume", { - provider: EVAL_MCP_AUTH_PROVIDER, - query: "budget echo query", - })) as { - details?: { tools?: Array<{ tool_name?: unknown }> }; - }; - agentProbe.searchToolNames.push( - (searchResult.details?.tools ?? []) - .map((tool) => tool.tool_name) - .filter( - (toolName): toolName is string => typeof toolName === "string", - ), - ); - - const callMcpTool = this.state.tools.find( - (tool) => tool.name === "callMcpTool", - ); - if (!callMcpTool) { - throw new Error("callMcpTool missing on resume"); - } - - await callMcpTool.execute("tool-call-continue", { - tool_name: MCP_TOOL_NAME, - arguments: { query: "what did i say about the budget?" }, - }); - - if (this.aborted) { - return {}; - } - - this.state.messages.push({ - role: "assistant", - content: [ - { - type: "text", - text: hasPriorBudgetContext(this.state.messages) - ? assistantReplyWithContext - : assistantReplyWithoutContext, - }, - ], - stopReason: "stop", - }); - - return {}; - } - } - - return { Agent: FakeAgent }; -}); - -const ORIGINAL_ENV = { ...process.env }; -const EVAL_MCP_PLUGIN_ROOT = path.resolve( - import.meta.dirname, - "../fixtures/plugins/eval-auth", -); - -type ChatRuntimeModule = typeof import("../fixtures/chat-runtime"); -type McpAuthStoreModule = typeof import("@/chat/mcp/auth-store"); -type McpOauthCallbackHarnessModule = - typeof import("../fixtures/mcp-oauth-callback-harness"); -type StateAdapterModule = typeof import("@/chat/state/adapter"); -type ThreadStateModule = typeof import("@/chat/runtime/thread-state"); -type TurnSessionStoreModule = typeof import("@/chat/state/turn-session"); - -let chatRuntimeModule: ChatRuntimeModule; -let mcpAuthStoreModule: McpAuthStoreModule; -let mcpOauthCallbackHarnessModule: McpOauthCallbackHarnessModule; -let stateAdapterModule: StateAdapterModule; -let threadStateModule: ThreadStateModule; -let turnSessionStoreModule: TurnSessionStoreModule; - -async function mirrorThreadStateToAdapter(thread: TestThread): Promise { - const originalSetState = thread.setState.bind(thread); - thread.setState = async (next, options) => { - await originalSetState(next, options); - // The OAuth callback reloads state by thread id, so keep the fixture thread - // and the memory adapter in sync during the first parked turn. - await stateAdapterModule - .getStateAdapter() - .set(`thread-state:${thread.id}`, thread.getState()); - }; - - await stateAdapterModule - .getStateAdapter() - .set(`thread-state:${thread.id}`, thread.getState()); -} - -function expectProcessingReactionLifecycles(args: { - channel: string; - completedCount?: number; - count: number; - timestamp: string; -}): void { - const call = (name: string) => - expect.objectContaining({ - params: expect.objectContaining({ - channel: args.channel, - timestamp: args.timestamp, - name, - }), - }); - const eyes = Array.from({ length: args.count }, () => call("eyes")); - const completed = Array.from({ length: args.completedCount ?? 0 }, () => - call("white_check_mark"), - ); - - expect(getCapturedSlackApiCalls("reactions.add")).toEqual([ - ...eyes, - ...completed, - ]); - expect(getCapturedSlackApiCalls("reactions.remove")).toEqual(eyes); -} - -describe("mcp auth runtime slack integration", () => { - let pluginApp: PluginAppFixture | undefined; - - beforeEach(async () => { - resetAgentProbe(); - resetSlackApiMockState(); - process.env = { - ...ORIGINAL_ENV, - JUNIOR_BASE_URL: "https://junior.example.com", - JUNIOR_STATE_ADAPTER: "memory", - SLACK_BOT_TOKEN: "xoxb-test-token", - }; - pluginApp = await createPluginAppFixture([EVAL_MCP_PLUGIN_ROOT]); - - vi.resetModules(); - chatRuntimeModule = await import("../fixtures/chat-runtime"); - mcpAuthStoreModule = await import("@/chat/mcp/auth-store"); - mcpOauthCallbackHarnessModule = - await import("../fixtures/mcp-oauth-callback-harness"); - stateAdapterModule = await import("@/chat/state/adapter"); - threadStateModule = await import("@/chat/runtime/thread-state"); - turnSessionStoreModule = await import("@/chat/state/turn-session"); - - await stateAdapterModule.disconnectStateAdapter(); - await stateAdapterModule.getStateAdapter().connect(); - }, 45_000); - - afterEach(async () => { - await stateAdapterModule?.disconnectStateAdapter(); - await pluginApp?.cleanup(); - pluginApp = undefined; - process.env = { ...ORIGINAL_ENV }; - }, 45_000); - - it("parks an MCP auth challenge from the real Slack runtime and resumes after OAuth callback", async () => { - const threadId = "slack:C123:1700000000.001"; - const turnId = "turn_user-1"; - const { createTestChatRuntime } = chatRuntimeModule; - const { slackRuntime } = createTestChatRuntime({ - services: { - visionContext: { - listThreadReplies: async () => [], - }, - }, - }); - - const destination = { - platform: "slack" as const, - teamId: "T123", - channelId: "C123", - }; - const thread = createTestThread({ - id: threadId, - state: { - conversation: { - messages: [ - { - id: "assistant-1", - role: "assistant", - text: priorBudgetContext, - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - ], - }, - }, - }); - await mirrorThreadStateToAdapter(thread); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "user-1", - threadId, - text: "what did i say about the budget?", - isMention: true, - author: { - userId: "U123", - userName: "dcramer", - }, - raw: { - channel: "C123", - team_id: "T123", - ts: "1700000000.002", - thread_ts: "1700000000.001", - }, - }), - { destination }, - ); - - expect(agentProbe.promptCallCount).toBe(1); - expect(agentProbe.continueCallCount).toBe(0); - - expect(getCapturedSlackApiCalls("chat.postEphemeral")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - user: "U123", - thread_ts: "1700000000.001", - text: expect.stringContaining( - "Click here to link your Eval Auth MCP access", - ), - }), - }), - ]); - expect(thread.posts).toEqual([ - expect.objectContaining({ - markdown: expect.stringContaining( - "<@U123> I'll need you to authorize Eval Auth. I sent you a link.", - ), - }), - ]); - expect(getCapturedSlackApiCalls("chat.postMessage")).toHaveLength(0); - expectProcessingReactionLifecycles({ - channel: "C123", - timestamp: "1700000000.002", - count: 1, - }); - - const pendingAuthSession = - await mcpAuthStoreModule.getLatestMcpAuthSessionForUserProvider( - "U123", - EVAL_MCP_AUTH_PROVIDER, - ); - expect(pendingAuthSession).toMatchObject({ - provider: EVAL_MCP_AUTH_PROVIDER, - conversationId: threadId, - sessionId: turnId, - userId: "U123", - userMessage: "what did i say about the budget?", - channelId: "C123", - destination, - threadTs: "1700000000.001", - authorizationUrl: expect.stringContaining( - "https://eval-auth.example.test/oauth/authorize", - ), - }); - const parkedAuthSessionId = pendingAuthSession!.authSessionId; - - const pendingCheckpoint = - await turnSessionStoreModule.getAgentTurnSessionRecord(threadId, turnId); - expect(pendingCheckpoint).toMatchObject({ - conversationId: threadId, - sessionId: turnId, - sliceId: 2, - state: "awaiting_resume", - resumeReason: "auth", - resumedFromSliceId: 1, - }); - - const parkedState = - await threadStateModule.getPersistedThreadState(threadId); - expect(parkedState).toMatchObject({ - conversation: { - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId: turnId, - linkSentAtMs: expect.any(Number), - }, - }, - }, - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: pendingAuthSession!.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - const sessionRecordAfterAuth = - await turnSessionStoreModule.getAgentTurnSessionRecord(threadId, turnId); - expect(sessionRecordAfterAuth?.piMessages).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - role: "user", - content: [ - { - type: "text", - text: `MCP authorization completed for provider "${EVAL_MCP_AUTH_PROVIDER}". Continue the blocked request and retry the provider operation if needed.`, - }, - ], - }), - ]), - ); - expect(agentProbe.promptCallCount).toBe(1); - expect(agentProbe.continueCallCount).toBe(1); - expect(agentProbe.searchToolNames).toEqual([[MCP_TOOL_NAME]]); - - const latestReusableSession = - await mcpAuthStoreModule.getLatestMcpAuthSessionForUserProvider( - "U123", - EVAL_MCP_AUTH_PROVIDER, - ); - expect(latestReusableSession).toMatchObject({ - provider: EVAL_MCP_AUTH_PROVIDER, - conversationId: threadId, - sessionId: turnId, - userId: "U123", - userMessage: "what did i say about the budget?", - }); - expect(latestReusableSession?.authSessionId).not.toBe(parkedAuthSessionId); - expect(latestReusableSession?.authorizationUrl).toBeUndefined(); - expect(latestReusableSession?.codeVerifier).toBeUndefined(); - expect( - await mcpAuthStoreModule.getMcpStoredOAuthCredentials( - "U123", - EVAL_MCP_AUTH_PROVIDER, - ), - ).toMatchObject({ - tokens: { - access_token: "eval-auth-access-token", - refresh_token: "eval-auth-refresh-token", - }, - }); - - const completedCheckpoint = - await turnSessionStoreModule.getAgentTurnSessionRecord(threadId, turnId); - expect(completedCheckpoint).toMatchObject({ - conversationId: threadId, - sessionId: turnId, - sliceId: 2, - state: "completed", - }); - - const resumedState = - await threadStateModule.getPersistedThreadState(threadId); - expect(resumedState).toMatchObject({ - conversation: { - processing: { - activeTurnId: undefined, - pendingAuth: undefined, - }, - messages: expect.arrayContaining([ - expect.objectContaining({ - id: "user-1", - role: "user", - meta: expect.objectContaining({ - replied: true, - }), - }), - expect.objectContaining({ - role: "assistant", - text: assistantReplyWithContext, - }), - ]), - }, - }); - - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.001", - text: assistantReplyWithContext, - }), - }), - ]); - expectProcessingReactionLifecycles({ - channel: "C123", - timestamp: "1700000000.002", - count: 2, - completedCount: 1, - }); - }); - - it("parks a subscribed-thread MCP auth challenge with the same pending-auth state", async () => { - const threadId = "slack:C124:1700000000.002"; - const turnId = "turn_user-2"; - const { createTestChatRuntime } = chatRuntimeModule; - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => - ({ - object: { - should_reply: true, - confidence: 1, - reason: "requires thread follow-up", - }, - text: '{"should_reply":true,"confidence":1,"reason":"requires thread follow-up"}', - }) as never, - }, - visionContext: { - listThreadReplies: async () => [], - }, - }, - }); - - const destination = { - platform: "slack" as const, - teamId: "T123", - channelId: "C124", - }; - const thread = createTestThread({ - id: threadId, - state: { - conversation: { - messages: [ - { - id: "assistant-1", - role: "assistant", - text: priorBudgetContext, - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - ], - }, - }, - }); - await mirrorThreadStateToAdapter(thread); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "user-2", - threadId, - text: "what did i say about the budget?", - isMention: false, - author: { - userId: "U123", - userName: "dcramer", - }, - raw: { - channel: "C124", - team_id: "T123", - ts: "1700000000.004", - thread_ts: "1700000000.002", - }, - }), - { destination }, - ); - - expect(agentProbe.promptCallCount).toBe(1); - expect(agentProbe.continueCallCount).toBe(0); - expect(thread.posts).toEqual([ - expect.objectContaining({ - markdown: expect.stringContaining( - "<@U123> I'll need you to authorize Eval Auth. I sent you a link.", - ), - }), - ]); - - const pendingCheckpoint = - await turnSessionStoreModule.getAgentTurnSessionRecord(threadId, turnId); - expect(pendingCheckpoint).toMatchObject({ - conversationId: threadId, - sessionId: turnId, - sliceId: 2, - state: "awaiting_resume", - resumeReason: "auth", - resumedFromSliceId: 1, - }); - - const parkedState = - await threadStateModule.getPersistedThreadState(threadId); - expect(parkedState).toMatchObject({ - conversation: { - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId: turnId, - linkSentAtMs: expect.any(Number), - }, - }, - }, - }); - }); - - it("parks and resumes an MCP auth challenge from direct provider activation", async () => { - agentProbe.directProviderSearch = true; - const threadId = "slack:C125:1700000000.003"; - const turnId = "turn_user-3"; - const { createTestChatRuntime } = chatRuntimeModule; - const { slackRuntime } = createTestChatRuntime({ - services: { - visionContext: { - listThreadReplies: async () => [], - }, - }, - }); - - const destination = { - platform: "slack" as const, - teamId: "T123", - channelId: "C125", - }; - const thread = createTestThread({ - id: threadId, - state: { - conversation: { - messages: [ - { - id: "assistant-1", - role: "assistant", - text: priorBudgetContext, - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - ], - }, - }, - }); - await mirrorThreadStateToAdapter(thread); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "user-3", - threadId, - text: "use eval-auth directly for the budget answer", - isMention: true, - author: { - userId: "U123", - userName: "dcramer", - }, - raw: { - channel: "C125", - team_id: "T123", - ts: "1700000000.004", - thread_ts: "1700000000.003", - }, - }), - { destination }, - ); - - const pendingCheckpoint = - await turnSessionStoreModule.getAgentTurnSessionRecord(threadId, turnId); - expect(pendingCheckpoint).toMatchObject({ - conversationId: threadId, - sessionId: turnId, - sliceId: 2, - state: "awaiting_resume", - resumeReason: "auth", - }); - - const pendingAuthSession = - await mcpAuthStoreModule.getLatestMcpAuthSessionForUserProvider( - "U123", - EVAL_MCP_AUTH_PROVIDER, - ); - expect(pendingAuthSession).toMatchObject({ - provider: EVAL_MCP_AUTH_PROVIDER, - conversationId: threadId, - sessionId: turnId, - userId: "U123", - destination, - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: pendingAuthSession!.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - expect(agentProbe.promptCallCount).toBe(1); - expect(agentProbe.continueCallCount).toBe(1); - expect(agentProbe.searchToolNames).toEqual([[MCP_TOOL_NAME]]); - - const completedCheckpoint = - await turnSessionStoreModule.getAgentTurnSessionRecord(threadId, turnId); - expect(completedCheckpoint).toMatchObject({ - conversationId: threadId, - sessionId: turnId, - state: "completed", - }); - - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C125", - thread_ts: "1700000000.003", - text: assistantReplyWithContext, - }), - }), - ]); - }); -}); diff --git a/packages/junior/tests/integration/mcp-dynamic-tools.test.ts b/packages/junior/tests/integration/mcp-dynamic-tools.test.ts index 4229b24d8..ac802e121 100644 --- a/packages/junior/tests/integration/mcp-dynamic-tools.test.ts +++ b/packages/junior/tests/integration/mcp-dynamic-tools.test.ts @@ -8,7 +8,8 @@ import { import { createEchoMcpTestServer, type EchoMcpTestServer, -} from "../fixtures/mcp-test-server"; +} from "../fixtures/mcp/test-server"; +import { DEFAULT_TEST_NOW_MS } from "../fixtures/vitest"; type StreamResponse = Awaited>; @@ -49,7 +50,7 @@ function assistantMessage(content: Array>) { ? "toolCalls" : "stop", content, - timestamp: Date.now(), + timestamp: DEFAULT_TEST_NOW_MS, }; } @@ -230,7 +231,7 @@ describe("MCP tools loaded mid-turn", () => { await agent.prompt({ role: "user", content: [{ type: "text", text: "use the MCP tool" }], - timestamp: Date.now(), + timestamp: DEFAULT_TEST_NOW_MS, }); expect(toolsSeenByModel[0]).toEqual([ diff --git a/packages/junior/tests/integration/mcp-oauth-callback-slack.test.ts b/packages/junior/tests/integration/mcp-oauth-callback-slack.test.ts deleted file mode 100644 index 604004b50..000000000 --- a/packages/junior/tests/integration/mcp-oauth-callback-slack.test.ts +++ /dev/null @@ -1,1016 +0,0 @@ -import path from "node:path"; -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { - EVAL_MCP_AUTH_CODE, - EVAL_MCP_AUTH_PROVIDER, -} from "../msw/handlers/eval-mcp-auth"; -import { - getCapturedSlackApiCalls, - getCapturedSlackFileUploadCalls, - resetSlackApiMockState, -} from "../msw/handlers/slack-api"; -import { - createPluginAppFixture, - type PluginAppFixture, -} from "../fixtures/plugin-app"; - -const { generateAssistantReplyMock } = vi.hoisted(() => ({ - generateAssistantReplyMock: vi.fn(), -})); - -vi.mock("@/chat/respond", () => ({ - generateAssistantReply: generateAssistantReplyMock, -})); - -const ORIGINAL_ENV = { ...process.env }; -const EVAL_MCP_PLUGIN_ROOT = path.resolve( - import.meta.dirname, - "../fixtures/plugins/eval-auth", -); -const SLACK_DESTINATION = { - platform: "slack", - teamId: "T123", - channelId: "C123", -} as const; - -type ArtifactStateModule = typeof import("@/chat/state/artifacts"); -type ConversationStateModule = typeof import("@/chat/state/conversation"); -type McpAuthStoreModule = typeof import("@/chat/mcp/auth-store"); -type McpClientModule = typeof import("@/chat/mcp/client"); -type McpOauthModule = typeof import("@/chat/mcp/oauth"); -type McpOauthCallbackHarnessModule = - typeof import("../fixtures/mcp-oauth-callback-harness"); -type PluginRegistryModule = typeof import("@/chat/plugins/registry"); -type StateAdapterModule = typeof import("@/chat/state/adapter"); -type TurnSessionStoreModule = typeof import("@/chat/state/turn-session"); - -let artifactStateModule: ArtifactStateModule; -let conversationStateModule: ConversationStateModule; -let mcpAuthStoreModule: McpAuthStoreModule; -let mcpClientModule: McpClientModule; -let mcpOauthModule: McpOauthModule; -let mcpOauthCallbackHarnessModule: McpOauthCallbackHarnessModule; -let pluginRegistryModule: PluginRegistryModule; -let stateAdapterModule: StateAdapterModule; -let turnSessionStoreModule: TurnSessionStoreModule; -let pluginApp: PluginAppFixture | undefined; - -async function createPendingAuthSession(args: { - conversationId: string; - sessionId: string; - userMessage: string; - channelId: string; - threadTs: string; -}) { - const authProvider = await mcpOauthModule.createMcpOAuthClientProvider({ - provider: EVAL_MCP_AUTH_PROVIDER, - conversationId: args.conversationId, - destination: SLACK_DESTINATION, - sessionId: args.sessionId, - userId: "U123", - userMessage: args.userMessage, - channelId: args.channelId, - threadTs: args.threadTs, - }); - - const plugin = pluginRegistryModule.getPluginDefinition( - EVAL_MCP_AUTH_PROVIDER, - ); - expect(plugin).toBeDefined(); - - const client = new mcpClientModule.PluginMcpClient(plugin!, { - authProvider, - }); - await expect(client.listTools()).rejects.toBeInstanceOf( - mcpClientModule.McpAuthorizationRequiredError, - ); - await client.close(); - - return authProvider; -} - -async function createAwaitingMcpTurnRecord(args: { - conversationId: string; - requester?: { - email?: string; - fullName?: string; - platform?: "slack"; - slackUserId?: string; - slackUserName?: string; - teamId?: string; - }; - sessionId: string; - text: string; -}) { - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId: args.conversationId, - sessionId: args.sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: args.text }], - timestamp: 1, - }, - ], - ...(args.requester ? { requester: args.requester } : {}), - resumeReason: "auth", - resumedFromSliceId: 1, - }); -} - -describe("mcp oauth callback slack integration", () => { - beforeEach(async () => { - generateAssistantReplyMock.mockReset(); - generateAssistantReplyMock.mockResolvedValue({ - text: "The budget deadline you mentioned earlier was Friday.", - artifactStatePatch: { - lastCanvasUrl: "https://example.com/canvas", - }, - sandboxId: "sandbox-1", - sandboxDependencyProfileHash: "hash-1", - diagnostics: { - outcome: "success", - toolCalls: [], - }, - }); - resetSlackApiMockState(); - process.env = { - ...ORIGINAL_ENV, - JUNIOR_STATE_ADAPTER: "memory", - JUNIOR_BASE_URL: "https://junior.example.com", - }; - pluginApp = await createPluginAppFixture([EVAL_MCP_PLUGIN_ROOT]); - - vi.resetModules(); - artifactStateModule = await import("@/chat/state/artifacts"); - conversationStateModule = await import("@/chat/state/conversation"); - mcpAuthStoreModule = await import("@/chat/mcp/auth-store"); - mcpClientModule = await import("@/chat/mcp/client"); - mcpOauthModule = await import("@/chat/mcp/oauth"); - mcpOauthCallbackHarnessModule = - await import("../fixtures/mcp-oauth-callback-harness"); - pluginRegistryModule = await import("@/chat/plugins/registry"); - stateAdapterModule = await import("@/chat/state/adapter"); - turnSessionStoreModule = await import("@/chat/state/turn-session"); - - await stateAdapterModule.disconnectStateAdapter(); - await stateAdapterModule.getStateAdapter().connect(); - }); - - afterEach(async () => { - await stateAdapterModule?.disconnectStateAdapter(); - await pluginApp?.cleanup(); - pluginApp = undefined; - process.env = { ...ORIGINAL_ENV }; - }); - - it("finalizes MCP OAuth and resumes the stored thread with persisted context", async () => { - const threadId = "slack:C123:1700000000.001"; - const sessionId = "turn_user-1"; - - await stateAdapterModule.getStateAdapter().set(`thread-state:${threadId}`, { - conversation: { - messages: [ - { - id: "assistant-1", - role: "assistant", - text: "You need the budget by Friday.", - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - { - id: "user-1", - role: "user", - text: "what did i say about the budget?", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - meta: { - attachmentCount: 1, - imageAttachmentCount: 1, - imagesHydrated: false, - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - artifacts: { - assistantContextChannelId: "C999", - lastCanvasId: "F123", - }, - }); - await stateAdapterModule.getStateAdapter().set("channel-state:C123", { - configuration: { - schemaVersion: 1, - entries: { - region: { - key: "region", - value: "us", - scope: "conversation", - updatedAt: new Date(0).toISOString(), - }, - }, - }, - }); - await createAwaitingMcpTurnRecord({ - conversationId: "conversation-1", - requester: { - platform: "slack", - teamId: "T123", - slackUserId: "U123", - slackUserName: "stored-user", - fullName: "Stored User", - email: "stored@example.com", - }, - sessionId, - text: "what did i say about the budget?", - }); - - const authProvider = await mcpOauthModule.createMcpOAuthClientProvider({ - provider: EVAL_MCP_AUTH_PROVIDER, - conversationId: "conversation-1", - destination: SLACK_DESTINATION, - sessionId, - userId: "U123", - userMessage: "what did i say about the budget?", - channelId: "C123", - threadTs: "1700000000.001", - toolChannelId: "C999", - configuration: { - region: "us", - }, - artifactState: { - assistantContextChannelId: "C999", - lastCanvasId: "F123", - }, - }); - - const plugin = pluginRegistryModule.getPluginDefinition( - EVAL_MCP_AUTH_PROVIDER, - ); - expect(plugin).toBeDefined(); - - const client = new mcpClientModule.PluginMcpClient(plugin!, { - authProvider, - }); - await expect(client.listTools()).rejects.toBeInstanceOf( - mcpClientModule.McpAuthorizationRequiredError, - ); - await client.close(); - - const pendingSession = await mcpAuthStoreModule.getMcpAuthSession( - authProvider.authSessionId, - ); - expect(pendingSession).toMatchObject({ - authSessionId: authProvider.authSessionId, - provider: EVAL_MCP_AUTH_PROVIDER, - userId: "U123", - conversationId: "conversation-1", - destination: SLACK_DESTINATION, - sessionId, - userMessage: "what did i say about the budget?", - channelId: "C123", - threadTs: "1700000000.001", - toolChannelId: "C999", - configuration: { - region: "us", - }, - artifactState: { - assistantContextChannelId: "C999", - lastCanvasId: "F123", - }, - authorizationUrl: expect.stringContaining( - "https://eval-auth.example.test/oauth/authorize", - ), - codeVerifier: expect.any(String), - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: authProvider.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - - expect( - await mcpAuthStoreModule.getMcpAuthSession(authProvider.authSessionId), - ).toBeUndefined(); - - const storedCredentials = - await mcpAuthStoreModule.getMcpStoredOAuthCredentials( - "U123", - EVAL_MCP_AUTH_PROVIDER, - ); - expect(storedCredentials?.tokens).toMatchObject({ - access_token: "eval-auth-access-token", - refresh_token: "eval-auth-refresh-token", - }); - - expect(generateAssistantReplyMock).toHaveBeenCalledWith( - "what did i say about the budget?", - expect.objectContaining({ - requester: expect.objectContaining({ - email: "stored@example.com", - fullName: "Stored User", - platform: "slack", - teamId: "T123", - userId: "U123", - userName: "stored-user", - }), - destination: SLACK_DESTINATION, - toolChannelId: "C999", - inboundAttachmentCount: 1, - omittedImageAttachmentCount: 1, - artifactState: expect.objectContaining({ - assistantContextChannelId: "C999", - lastCanvasId: "F123", - }), - conversationContext: expect.stringContaining( - "You need the budget by Friday.", - ), - }), - ); - - const resumeContext = generateAssistantReplyMock.mock.calls[0]?.[1] as { - conversationContext?: string; - configuration?: Record; - }; - expect(resumeContext.conversationContext).not.toContain( - "what did i say about the budget?", - ); - expect(resumeContext.configuration?.region).toBe("us"); - - const persistedState = await stateAdapterModule - .getStateAdapter() - .get>(`thread-state:${threadId}`); - const conversation = - conversationStateModule.coerceThreadConversationState(persistedState); - const artifacts = - artifactStateModule.coerceThreadArtifactsState(persistedState); - - expect( - conversation.messages.find((message) => message.id === "user-1"), - ).toMatchObject({ - meta: { - replied: true, - }, - }); - expect(conversation.processing.pendingAuth).toBeUndefined(); - expect(conversation.messages.at(-1)).toMatchObject({ - role: "assistant", - text: "The budget deadline you mentioned earlier was Friday.", - }); - expect(artifacts).toMatchObject({ - assistantContextChannelId: "C999", - lastCanvasId: "F123", - lastCanvasUrl: "https://example.com/canvas", - }); - - expect(getCapturedSlackApiCalls("assistant.threads.setStatus")).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1700000000.001", - status: expect.any(String), - loading_messages: expect.arrayContaining([expect.any(String)]), - }), - }), - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1700000000.001", - status: "", - }), - }), - ]), - ); - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.001", - text: "The budget deadline you mentioned earlier was Friday.", - }), - }), - ]), - ); - }); - - it("fails MCP OAuth resume when stored requester team mismatches destination", async () => { - const threadId = "slack:C123:1700000000.006"; - const sessionId = "turn_user-6"; - - await stateAdapterModule.getStateAdapter().set(`thread-state:${threadId}`, { - conversation: { - messages: [ - { - id: "user-6", - role: "user", - text: "what did i say about the budget?", - createdAtMs: 2, - author: { userId: "U123" }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - }); - await createAwaitingMcpTurnRecord({ - conversationId: threadId, - requester: { - platform: "slack", - teamId: "T999", - slackUserId: "U123", - }, - sessionId, - text: "what did i say about the budget?", - }); - const authProvider = await createPendingAuthSession({ - conversationId: threadId, - sessionId, - userMessage: "what did i say about the budget?", - channelId: "C123", - threadTs: "1700000000.006", - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: authProvider.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - expect(generateAssistantReplyMock).not.toHaveBeenCalled(); - await expect( - turnSessionStoreModule.getAgentTurnSessionRecord(threadId, sessionId), - ).resolves.toMatchObject({ - state: "failed", - errorMessage: - "Stored Slack requester identity did not match OAuth requester", - }); - }); - - it("rebuilds MCP OAuth resume context from state loaded under the thread lock", async () => { - const threadId = "slack:C123:1700000000.005"; - const sessionId = "turn_user-5"; - const staleState = { - conversation: { - messages: [ - { - id: "assistant-old", - role: "assistant", - text: "Old MCP context that should not be used.", - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - { - id: "user-5", - role: "user", - text: "what did i say about the budget?", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - meta: { - slackTs: "1700000000.0051", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - artifacts: { - assistantContextChannelId: "COLD", - }, - }; - const freshState = { - conversation: { - messages: [ - { - id: "assistant-fresh", - role: "assistant", - text: "Fresh MCP context loaded after the lock.", - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - { - id: "user-5", - role: "user", - text: "what did i say about the budget?", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - meta: { - slackTs: "1700000000.0052", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - artifacts: { - assistantContextChannelId: "CFRESH", - }, - }; - - const authProvider = await createPendingAuthSession({ - conversationId: threadId, - sessionId, - userMessage: "what did i say about the budget?", - channelId: "C123", - threadTs: "1700000000.005", - }); - await createAwaitingMcpTurnRecord({ - conversationId: threadId, - sessionId, - text: "what did i say about the budget?", - }); - await stateAdapterModule - .getStateAdapter() - .set(`thread-state:${threadId}`, freshState); - - const adapter = stateAdapterModule.getStateAdapter(); - const originalGet = adapter.get.bind(adapter); - let threadReadCount = 0; - const getSpy = vi.spyOn(adapter, "get"); - getSpy.mockImplementation((async (key: string) => { - if (key === `thread-state:${threadId}` && threadReadCount++ === 0) { - return structuredClone(staleState); - } - return await originalGet(key); - }) as typeof adapter.get); - - try { - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: authProvider.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - } finally { - getSpy.mockRestore(); - } - - expect(generateAssistantReplyMock).toHaveBeenCalledWith( - "what did i say about the budget?", - expect.objectContaining({ - destination: SLACK_DESTINATION, - toolChannelId: "CFRESH", - conversationContext: expect.stringContaining( - "Fresh MCP context loaded after the lock.", - ), - }), - ); - const resumeContext = generateAssistantReplyMock.mock.calls[0]?.[1] as { - conversationContext?: string; - }; - expect(resumeContext.conversationContext).not.toContain( - "Old MCP context that should not be used.", - ); - expect(getCapturedSlackApiCalls("reactions.add")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - timestamp: "1700000000.0052", - name: "eyes", - }), - }), - expect.objectContaining({ - params: expect.objectContaining({ - timestamp: "1700000000.0052", - name: "white_check_mark", - }), - }), - ]); - }); - - it("does not resume a stale MCP-blocked request after a newer thread message", async () => { - const sessionId = "turn_user-4"; - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId: "conversation-4", - sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [], - resumeReason: "auth", - resumedFromSliceId: 1, - }); - await stateAdapterModule - .getStateAdapter() - .set("thread-state:slack:C123:1700000000.004", { - conversation: { - messages: [ - { - id: "user-4", - role: "user", - text: "what did i say about the budget?", - createdAtMs: 1, - author: { - userId: "U123", - userName: "dcramer", - }, - }, - { - id: "user-5", - role: "user", - text: "never mind, I'll handle it", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - }); - - const authProvider = await createPendingAuthSession({ - conversationId: "conversation-4", - sessionId, - userMessage: "what did i say about the budget?", - channelId: "C123", - threadTs: "1700000000.004", - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: authProvider.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - expect(generateAssistantReplyMock).not.toHaveBeenCalled(); - expect(getCapturedSlackApiCalls("chat.postMessage")).toHaveLength(0); - - const persistedState = await stateAdapterModule - .getStateAdapter() - .get>("thread-state:slack:C123:1700000000.004"); - const conversation = - conversationStateModule.coerceThreadConversationState(persistedState); - expect(conversation.processing.pendingAuth).toBeUndefined(); - - const sessionRecord = - await turnSessionStoreModule.getAgentTurnSessionRecord( - "conversation-4", - sessionId, - ); - expect(sessionRecord?.state).toBe("abandoned"); - }); - - it("does not resume MCP OAuth without an awaiting turn-session record", async () => { - const sessionId = "turn_missing_record"; - await stateAdapterModule - .getStateAdapter() - .set("thread-state:slack:C123:1700000000.006", { - conversation: { - messages: [ - { - id: "user-6", - role: "user", - text: "list mcp data", - createdAtMs: 1, - author: { - userId: "U123", - userName: "dcramer", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - }); - - const authProvider = await createPendingAuthSession({ - conversationId: "conversation-missing-record", - sessionId, - userMessage: "list mcp data", - channelId: "C123", - threadTs: "1700000000.006", - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: authProvider.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - expect(generateAssistantReplyMock).not.toHaveBeenCalled(); - expect(getCapturedSlackApiCalls("chat.postMessage")).toHaveLength(0); - }); - - it("does not resume MCP OAuth with a mismatched stored requester", async () => { - const sessionId = "turn_user-7"; - await stateAdapterModule - .getStateAdapter() - .set("thread-state:slack:C123:1700000000.007", { - conversation: { - messages: [ - { - id: "user-7", - role: "user", - text: "list mcp data", - createdAtMs: 1, - author: { - userId: "U123", - userName: "dcramer", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - }); - await createAwaitingMcpTurnRecord({ - conversationId: "conversation-mismatched-requester", - requester: { - slackUserId: "U999", - slackUserName: "wrong-user", - }, - sessionId, - text: "list mcp data", - }); - - const authProvider = await createPendingAuthSession({ - conversationId: "conversation-mismatched-requester", - sessionId, - userMessage: "list mcp data", - channelId: "C123", - threadTs: "1700000000.007", - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: authProvider.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - expect(generateAssistantReplyMock).not.toHaveBeenCalled(); - expect(getCapturedSlackApiCalls("chat.postMessage")).toHaveLength(0); - await expect( - turnSessionStoreModule.getAgentTurnSessionRecord( - "conversation-mismatched-requester", - sessionId, - ), - ).resolves.toMatchObject({ - state: "failed", - errorMessage: - "Stored Slack requester identity did not match OAuth requester", - }); - }); - - it("uploads resumed reply files without posting an extra thread message for empty inline text", async () => { - generateAssistantReplyMock.mockResolvedValueOnce({ - text: "", - files: [ - { - data: Buffer.from("hello"), - filename: "resume.txt", - }, - ], - deliveryPlan: { - mode: "thread", - postThreadText: true, - attachFiles: "inline", - }, - diagnostics: { - outcome: "success", - toolCalls: [], - }, - }); - await stateAdapterModule - .getStateAdapter() - .set("thread-state:slack:C123:1700000000.002", { - conversation: { - messages: [ - { - id: "msg.2", - role: "user", - text: "/demo upload", - createdAtMs: 1, - author: { - userId: "U123", - userName: "dcramer", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId: "turn_msg_2", - linkSentAtMs: 1, - }, - }, - }, - }); - await createAwaitingMcpTurnRecord({ - conversationId: "conversation-2", - sessionId: "turn_msg_2", - text: "/demo upload", - }); - - const authProvider = await createPendingAuthSession({ - conversationId: "conversation-2", - sessionId: "turn_msg_2", - userMessage: "/demo upload", - channelId: "C123", - threadTs: "1700000000.002", - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: authProvider.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - expect(getCapturedSlackApiCalls("chat.postMessage")).toHaveLength(0); - expect(getCapturedSlackApiCalls("files.getUploadURLExternal")).toHaveLength( - 1, - ); - expect(getCapturedSlackApiCalls("files.completeUploadExternal")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1700000000.002", - }), - }), - ]); - expect(getCapturedSlackFileUploadCalls()).toHaveLength(1); - }); - - it("uploads resumed reply files even when thread text delivery is suppressed", async () => { - generateAssistantReplyMock.mockResolvedValueOnce({ - text: "👍", - files: [ - { - data: Buffer.from("hello"), - filename: "resume.txt", - }, - ], - deliveryPlan: { - mode: "thread", - postThreadText: false, - attachFiles: "inline", - }, - diagnostics: { - outcome: "success", - toolCalls: [], - }, - }); - await stateAdapterModule - .getStateAdapter() - .set("thread-state:slack:C123:1700000000.003", { - conversation: { - messages: [ - { - id: "msg.3", - role: "user", - text: "/demo upload", - createdAtMs: 1, - author: { - userId: "U123", - userName: "dcramer", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "mcp", - provider: EVAL_MCP_AUTH_PROVIDER, - requesterId: "U123", - sessionId: "turn_msg_3", - linkSentAtMs: 1, - }, - }, - }, - }); - await createAwaitingMcpTurnRecord({ - conversationId: "conversation-3", - sessionId: "turn_msg_3", - text: "/demo upload", - }); - - const authProvider = await createPendingAuthSession({ - conversationId: "conversation-3", - sessionId: "turn_msg_3", - userMessage: "/demo upload", - channelId: "C123", - threadTs: "1700000000.003", - }); - - const response = - await mcpOauthCallbackHarnessModule.runMcpOauthCallbackRoute({ - provider: EVAL_MCP_AUTH_PROVIDER, - state: authProvider.authSessionId, - code: EVAL_MCP_AUTH_CODE, - }); - - expect(response.status).toBe(200); - expect(getCapturedSlackApiCalls("chat.postMessage")).toHaveLength(0); - expect(getCapturedSlackApiCalls("files.getUploadURLExternal")).toHaveLength( - 1, - ); - expect(getCapturedSlackApiCalls("files.completeUploadExternal")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1700000000.003", - }), - }), - ]); - expect(getCapturedSlackFileUploadCalls()).toHaveLength(1); - }); -}); diff --git a/packages/junior/tests/integration/oauth-callback-slack.test.ts b/packages/junior/tests/integration/oauth-callback-slack.test.ts deleted file mode 100644 index 7f3f819b6..000000000 --- a/packages/junior/tests/integration/oauth-callback-slack.test.ts +++ /dev/null @@ -1,770 +0,0 @@ -import path from "node:path"; -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { - getCapturedSlackApiCalls, - resetSlackApiMockState, -} from "../msw/handlers/slack-api"; -import { - createPluginAppFixture, - type PluginAppFixture, -} from "../fixtures/plugin-app"; - -const { generateAssistantReplyMock } = vi.hoisted(() => ({ - generateAssistantReplyMock: vi.fn(), -})); - -vi.mock("@/chat/respond", () => ({ - generateAssistantReply: generateAssistantReplyMock, -})); - -const ORIGINAL_ENV = { ...process.env }; -const EVAL_OAUTH_PLUGIN_ROOT = path.resolve( - import.meta.dirname, - "../fixtures/plugins/eval-oauth", -); -const SLACK_DESTINATION = { - platform: "slack", - teamId: "T123", - channelId: "C123", -} as const; - -type StateAdapterModule = typeof import("@/chat/state/adapter"); -type OAuthCallbackHarnessModule = - typeof import("../fixtures/oauth-callback-harness"); -type TurnSessionStoreModule = typeof import("@/chat/state/turn-session"); - -let stateAdapterModule: StateAdapterModule; -let oauthCallbackHarnessModule: OAuthCallbackHarnessModule; -let turnSessionStoreModule: TurnSessionStoreModule; -let pluginApp: PluginAppFixture | undefined; - -describe("oauth callback slack integration", () => { - beforeEach(async () => { - generateAssistantReplyMock.mockReset(); - generateAssistantReplyMock.mockResolvedValue({ - text: "Here are your Sentry issues.", - diagnostics: { - outcome: "success", - toolCalls: [], - }, - }); - resetSlackApiMockState(); - process.env = { - ...ORIGINAL_ENV, - JUNIOR_STATE_ADAPTER: "memory", - JUNIOR_BASE_URL: "https://junior.example.com", - }; - pluginApp = await createPluginAppFixture([EVAL_OAUTH_PLUGIN_ROOT]); - vi.resetModules(); - stateAdapterModule = await import("@/chat/state/adapter"); - oauthCallbackHarnessModule = - await import("../fixtures/oauth-callback-harness"); - turnSessionStoreModule = await import("@/chat/state/turn-session"); - await stateAdapterModule.disconnectStateAdapter(); - await stateAdapterModule.getStateAdapter().connect(); - }, 45_000); - - afterEach(async () => { - await stateAdapterModule?.disconnectStateAdapter(); - await pluginApp?.cleanup(); - pluginApp = undefined; - process.env = { ...ORIGINAL_ENV }; - }, 45_000); - - it("publishes app home through the Slack MSW harness after generic OAuth callback", async () => { - await stateAdapterModule - .getStateAdapter() - .set("oauth-state:eval-oauth-state", { - userId: "U123", - provider: "eval-oauth", - }); - - const response = await oauthCallbackHarnessModule.runOauthCallbackRoute({ - provider: "eval-oauth", - state: "eval-oauth-state", - code: "eval-oauth-code", - }); - - expect(response.status).toBe(200); - expect(getCapturedSlackApiCalls("views.publish")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - user_id: "U123", - view: expect.objectContaining({ - type: "home", - }), - }), - }), - ]); - }, 20_000); - - it("resumes a pending OAuth request with persisted thread context", async () => { - await stateAdapterModule - .getStateAdapter() - .set("oauth-state:eval-oauth-resume-state", { - userId: "U123", - provider: "eval-oauth", - channelId: "C123", - destination: SLACK_DESTINATION, - threadTs: "1700000000.001", - pendingMessage: "list my sentry issues", - }); - await stateAdapterModule - .getStateAdapter() - .set("thread-state:slack:C123:1700000000.001", { - conversation: { - messages: [ - { - id: "assistant-1", - role: "assistant", - text: "You need the budget by Friday.", - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - { - id: "user-1", - role: "user", - text: "list my sentry issues", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - }, - ], - }, - }); - - const response = await oauthCallbackHarnessModule.runOauthCallbackRoute({ - provider: "eval-oauth", - state: "eval-oauth-resume-state", - code: "eval-oauth-code", - }); - - expect(response.status).toBe(200); - expect(generateAssistantReplyMock).toHaveBeenCalledWith( - "list my sentry issues", - expect.objectContaining({ - destination: SLACK_DESTINATION, - conversationContext: expect.stringContaining( - "You need the budget by Friday.", - ), - }), - ); - const resumeContext = generateAssistantReplyMock.mock.calls[0]?.[1] as { - conversationContext?: string; - }; - expect(resumeContext.conversationContext).not.toContain( - "list my sentry issues", - ); - - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.001", - text: "Here are your Sentry issues.", - }), - }), - ]), - ); - }, 20_000); - - it("resumes a session-recorded OAuth turn with persisted thread state", async () => { - const conversationId = "slack:C123:1700000000.009"; - const sessionId = "turn_msg_9"; - - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [ - { - role: "user", - content: [{ type: "text", text: "list my sentry issues" }], - timestamp: 1, - }, - ], - resumeReason: "auth", - resumedFromSliceId: 1, - requester: { - platform: "slack", - teamId: "T123", - slackUserId: "U123", - slackUserName: "stored-user", - fullName: "Stored User", - email: "stored@example.com", - }, - }); - - await stateAdapterModule - .getStateAdapter() - .set("oauth-state:eval-oauth-session-record-state", { - userId: "U123", - provider: "eval-oauth", - channelId: "C123", - destination: SLACK_DESTINATION, - threadTs: "1700000000.009", - pendingMessage: "list my sentry issues", - resumeConversationId: conversationId, - resumeSessionId: sessionId, - scope: "read", - }); - await stateAdapterModule - .getStateAdapter() - .set(`thread-state:${conversationId}`, { - conversation: { - messages: [ - { - id: "assistant-1", - role: "assistant", - text: "You need the budget by Friday.", - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - { - id: "msg.9", - role: "user", - text: "list my sentry issues", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - meta: { - slackTs: "1700000000.010", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "plugin", - provider: "eval-oauth", - requesterId: "U123", - scope: "read", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - artifacts: { - assistantContextChannelId: "C999", - listColumnMap: {}, - }, - }); - - const response = await oauthCallbackHarnessModule.runOauthCallbackRoute({ - provider: "eval-oauth", - state: "eval-oauth-session-record-state", - code: "eval-oauth-code", - }); - - expect(response.status).toBe(200); - const sessionRecordAfterAuth = - await turnSessionStoreModule.getAgentTurnSessionRecord( - conversationId, - sessionId, - ); - expect(sessionRecordAfterAuth?.piMessages).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - role: "user", - content: [ - { - type: "text", - text: 'Authorization completed for provider "eval-oauth". Continue the blocked request and retry the provider operation if needed.', - }, - ], - }), - ]), - ); - expect(generateAssistantReplyMock).toHaveBeenCalledWith( - "list my sentry issues", - expect.objectContaining({ - requester: expect.objectContaining({ - email: "stored@example.com", - fullName: "Stored User", - platform: "slack", - teamId: "T123", - userId: "U123", - userName: "stored-user", - }), - destination: SLACK_DESTINATION, - correlation: expect.objectContaining({ - channelId: "C123", - threadTs: "1700000000.009", - requesterId: "U123", - }), - toolChannelId: "C999", - conversationContext: expect.stringContaining( - "You need the budget by Friday.", - ), - }), - ); - const resumeContext = generateAssistantReplyMock.mock.calls[0]?.[1] as { - conversationContext?: string; - }; - expect(resumeContext.conversationContext).not.toContain( - "list my sentry issues", - ); - - const persistedState = await stateAdapterModule - .getStateAdapter() - .get>(`thread-state:${conversationId}`); - const conversation = - (persistedState?.conversation as { - messages?: Array<{ role?: string; text?: string }>; - processing?: { activeTurnId?: string }; - }) ?? {}; - expect(conversation.processing?.activeTurnId).toBeUndefined(); - expect(conversation.messages?.at(-1)).toMatchObject({ - role: "assistant", - text: "Here are your Sentry issues.", - }); - - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.009", - text: "Here are your Sentry issues.", - }), - }), - ]), - ); - expect(getCapturedSlackApiCalls("reactions.add")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - timestamp: "1700000000.010", - name: "eyes", - }), - }), - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - timestamp: "1700000000.010", - name: "white_check_mark", - }), - }), - ]); - expect(getCapturedSlackApiCalls("reactions.remove")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - timestamp: "1700000000.010", - name: "eyes", - }), - }), - ]); - }); - - it("fails a session-recorded OAuth resume with mismatched requester team", async () => { - const conversationId = "slack:C123:1700000000.012"; - const sessionId = "turn_msg_12"; - - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [], - resumeReason: "auth", - resumedFromSliceId: 1, - requester: { - platform: "slack", - teamId: "T999", - slackUserId: "U123", - }, - }); - await stateAdapterModule - .getStateAdapter() - .set("oauth-state:eval-oauth-mismatched-requester-state", { - userId: "U123", - provider: "eval-oauth", - channelId: "C123", - destination: SLACK_DESTINATION, - threadTs: "1700000000.012", - pendingMessage: "list my sentry issues", - resumeConversationId: conversationId, - resumeSessionId: sessionId, - scope: "read", - }); - await stateAdapterModule - .getStateAdapter() - .set(`thread-state:${conversationId}`, { - conversation: { - messages: [ - { - id: "msg.12", - role: "user", - text: "list my sentry issues", - createdAtMs: 2, - author: { userId: "U123" }, - meta: { slackTs: "1700000000.0121" }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "plugin", - provider: "eval-oauth", - requesterId: "U123", - scope: "read", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - }); - - const response = await oauthCallbackHarnessModule.runOauthCallbackRoute({ - provider: "eval-oauth", - state: "eval-oauth-mismatched-requester-state", - code: "eval-oauth-code", - }); - - expect(response.status).toBe(200); - expect(generateAssistantReplyMock).not.toHaveBeenCalled(); - await expect( - turnSessionStoreModule.getAgentTurnSessionRecord( - conversationId, - sessionId, - ), - ).resolves.toMatchObject({ - state: "failed", - errorMessage: - "Stored Slack requester identity did not match OAuth requester", - }); - }); - - it("rebuilds session-recorded OAuth resume context from state loaded under the thread lock", async () => { - const conversationId = "slack:C123:1700000000.011"; - const sessionId = "turn_msg_11"; - const staleState = { - conversation: { - messages: [ - { - id: "assistant-old", - role: "assistant", - text: "Old context that should not be used.", - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - { - id: "msg.11", - role: "user", - text: "list my sentry issues", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - meta: { - slackTs: "1700000000.0111", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "plugin", - provider: "eval-oauth", - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - artifacts: { - assistantContextChannelId: "COLD", - }, - }; - const freshState = { - conversation: { - messages: [ - { - id: "assistant-fresh", - role: "assistant", - text: "Fresh context loaded after the lock.", - createdAtMs: 1, - author: { - userName: "junior", - isBot: true, - }, - }, - { - id: "msg.11", - role: "user", - text: "list my sentry issues", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - meta: { - slackTs: "1700000000.0112", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "plugin", - provider: "eval-oauth", - requesterId: "U123", - sessionId, - linkSentAtMs: 1, - }, - }, - }, - artifacts: { - assistantContextChannelId: "CFRESH", - }, - }; - - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [], - resumeReason: "auth", - resumedFromSliceId: 1, - requester: { slackUserId: "U123" }, - }); - await stateAdapterModule - .getStateAdapter() - .set("oauth-state:eval-oauth-locked-state", { - userId: "U123", - provider: "eval-oauth", - channelId: "C123", - destination: SLACK_DESTINATION, - threadTs: "1700000000.011", - pendingMessage: "list my sentry issues", - resumeConversationId: conversationId, - resumeSessionId: sessionId, - }); - await stateAdapterModule - .getStateAdapter() - .set(`thread-state:${conversationId}`, freshState); - - const adapter = stateAdapterModule.getStateAdapter(); - const originalGet = adapter.get.bind(adapter); - let threadReadCount = 0; - const getSpy = vi.spyOn(adapter, "get"); - getSpy.mockImplementation((async (key: string) => { - if (key === `thread-state:${conversationId}` && threadReadCount++ === 0) { - return structuredClone(staleState); - } - return await originalGet(key); - }) as typeof adapter.get); - - try { - const response = await oauthCallbackHarnessModule.runOauthCallbackRoute({ - provider: "eval-oauth", - state: "eval-oauth-locked-state", - code: "eval-oauth-code", - }); - - expect(response.status).toBe(200); - } finally { - getSpy.mockRestore(); - } - - expect(generateAssistantReplyMock).toHaveBeenCalledWith( - "list my sentry issues", - expect.objectContaining({ - toolChannelId: "CFRESH", - destination: SLACK_DESTINATION, - conversationContext: expect.stringContaining( - "Fresh context loaded after the lock.", - ), - }), - ); - const resumeContext = generateAssistantReplyMock.mock.calls[0]?.[1] as { - conversationContext?: string; - }; - expect(resumeContext.conversationContext).not.toContain( - "Old context that should not be used.", - ); - expect(getCapturedSlackApiCalls("reactions.add")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - timestamp: "1700000000.0112", - name: "eyes", - }), - }), - expect.objectContaining({ - params: expect.objectContaining({ - timestamp: "1700000000.0112", - name: "white_check_mark", - }), - }), - ]); - }); - - it("resumes the latest pending OAuth session when a reused link points at an abandoned session", async () => { - const conversationId = "slack:C123:1700000000.012"; - const oldSessionId = "turn_msg_old_12"; - const newSessionId = "turn_msg_new_12"; - - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId: oldSessionId, - sliceId: 2, - state: "abandoned", - destination: SLACK_DESTINATION, - piMessages: [], - resumeReason: "auth", - resumedFromSliceId: 1, - }); - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId: newSessionId, - sliceId: 2, - state: "awaiting_resume", - destination: SLACK_DESTINATION, - piMessages: [], - resumeReason: "auth", - resumedFromSliceId: 1, - }); - - await stateAdapterModule - .getStateAdapter() - .set("oauth-state:eval-oauth-reused-link-state", { - userId: "U123", - provider: "eval-oauth", - channelId: "C123", - destination: SLACK_DESTINATION, - threadTs: "1700000000.012", - pendingMessage: "old request", - resumeConversationId: conversationId, - resumeSessionId: oldSessionId, - }); - await stateAdapterModule - .getStateAdapter() - .set(`thread-state:${conversationId}`, { - conversation: { - messages: [ - { - id: "msg.old.12", - role: "user", - text: "old request", - createdAtMs: 1, - author: { - userId: "U123", - userName: "dcramer", - }, - }, - { - id: "msg.new.12", - role: "user", - text: "new request", - createdAtMs: 2, - author: { - userId: "U123", - userName: "dcramer", - }, - meta: { - slackTs: "1700000000.0123", - }, - }, - ], - processing: { - activeTurnId: undefined, - pendingAuth: { - kind: "plugin", - provider: "eval-oauth", - requesterId: "U123", - sessionId: newSessionId, - linkSentAtMs: 1, - }, - }, - }, - }); - - const response = await oauthCallbackHarnessModule.runOauthCallbackRoute({ - provider: "eval-oauth", - state: "eval-oauth-reused-link-state", - code: "eval-oauth-code", - }); - - expect(response.status).toBe(200); - expect(generateAssistantReplyMock).toHaveBeenCalledWith( - "new request", - expect.objectContaining({ - correlation: expect.objectContaining({ - turnId: newSessionId, - }), - }), - ); - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.012", - text: "Here are your Sentry issues.", - }), - }), - ]), - ); - }); - - it("does not re-post the pending message when the session record is already abandoned", async () => { - const conversationId = "slack:C123:1700000000.010"; - const sessionId = "turn_msg_10"; - - await turnSessionStoreModule.upsertAgentTurnSessionRecord({ - conversationId, - sessionId, - sliceId: 2, - state: "abandoned", - destination: SLACK_DESTINATION, - piMessages: [], - resumeReason: "auth", - resumedFromSliceId: 1, - requester: { slackUserId: "U123" }, - }); - - await stateAdapterModule - .getStateAdapter() - .set("oauth-state:eval-oauth-abandoned-state", { - userId: "U123", - provider: "eval-oauth", - channelId: "C123", - destination: SLACK_DESTINATION, - threadTs: "1700000000.010", - pendingMessage: "list my sentry issues", - resumeConversationId: conversationId, - resumeSessionId: sessionId, - }); - - const response = await oauthCallbackHarnessModule.runOauthCallbackRoute({ - provider: "eval-oauth", - state: "eval-oauth-abandoned-state", - code: "eval-oauth-code", - }); - - expect(response.status).toBe(200); - expect(generateAssistantReplyMock).not.toHaveBeenCalled(); - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([]); - }); -}); diff --git a/packages/junior/tests/integration/oauth-resume-slack.test.ts b/packages/junior/tests/integration/oauth-resume-slack.test.ts deleted file mode 100644 index da96acc14..000000000 --- a/packages/junior/tests/integration/oauth-resume-slack.test.ts +++ /dev/null @@ -1,428 +0,0 @@ -import { Buffer } from "node:buffer"; -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { - getSlackContinuationMarker, - getSlackInterruptionMarker, -} from "@/chat/slack/output"; -import { disconnectStateAdapter } from "@/chat/state/adapter"; -import { - getCapturedSlackApiCalls, - getCapturedSlackFileUploadCalls, - queueSlackApiError, -} from "../msw/handlers/slack-api"; - -function makeDiagnostics( - outcome: "success" | "execution_failure" | "provider_error" = "success", - extras: Record = {}, -) { - return { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - ...extras, - }; -} - -const TEST_SLACK_DESTINATION = { - platform: "slack", - teamId: "T123", - channelId: "C123", -} as const; - -describe("oauth resume slack integration", () => { - beforeEach(async () => { - process.env.JUNIOR_STATE_ADAPTER = "memory"; - vi.resetModules(); - await disconnectStateAdapter(); - }); - - afterEach(async () => { - await disconnectStateAdapter(); - delete process.env.JUNIOR_STATE_ADAPTER; - }); - - it("posts resumed status updates through the Slack MSW harness", async () => { - const { resumeAuthorizedRequest } = - await import("@/chat/runtime/slack-resume"); - await resumeAuthorizedRequest({ - messageText: "What budget deadline did I mention earlier?", - channelId: "C123", - threadTs: "1700000000.001", - connectedText: - "Your eval-auth MCP access is now connected. Continuing the original request...", - replyContext: { - credentialContext: { - actor: { type: "user", userId: "U123" }, - }, - destination: TEST_SLACK_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, - }, - generateReply: async () => - ({ - text: "The budget deadline you mentioned earlier was Friday.", - diagnostics: makeDiagnostics("success", { - durationMs: 842, - usage: { - totalTokens: 1234, - }, - }), - }) as any, - }); - - expect(getCapturedSlackApiCalls("assistant.threads.setStatus")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1700000000.001", - status: expect.any(String), - loading_messages: expect.arrayContaining([expect.any(String)]), - }), - }), - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1700000000.001", - status: "", - }), - }), - ]); - - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.001", - text: "Your eval-auth MCP access is now connected. Continuing the original request...", - }), - }), - expect.objectContaining({ - params: expect.objectContaining({ - blocks: [ - { - type: "markdown", - text: "The budget deadline you mentioned earlier was Friday.", - }, - { - type: "context", - elements: [ - expect.objectContaining({ - type: "mrkdwn", - text: expect.stringContaining( - "*ID:* slack:C123:1700000000.001", - ), - }), - ], - }, - ], - channel: "C123", - thread_ts: "1700000000.001", - text: "The budget deadline you mentioned earlier was Friday.", - }), - }), - ]); - }, 10_000); - - it("uses correlation IDs for resumed reply footers", async () => { - const { resumeAuthorizedRequest } = - await import("@/chat/runtime/slack-resume"); - const { upsertAgentTurnSessionRecord } = - await import("@/chat/state/turn-session"); - - await upsertAgentTurnSessionRecord({ - conversationId: "conversation-1", - sessionId: "turn-1", - sliceId: 2, - state: "awaiting_resume", - piMessages: [], - resumeReason: "timeout", - cumulativeDurationMs: 1_000, - cumulativeUsage: { - totalTokens: 1_000, - }, - }); - - await resumeAuthorizedRequest({ - messageText: "continue this turn", - channelId: "C123", - threadTs: "1700000000.007", - connectedText: "", - replyContext: { - credentialContext: { - actor: { type: "user", userId: "U123" }, - }, - destination: TEST_SLACK_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, - correlation: { - conversationId: "conversation-1", - turnId: "turn-1", - }, - }, - generateReply: async () => - ({ - text: "done", - diagnostics: makeDiagnostics("success", { - durationMs: 500, - usage: { - outputTokens: 7, - }, - }), - }) as any, - }); - - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.007", - text: "done", - blocks: [ - { - type: "markdown", - text: "done", - }, - { - type: "context", - elements: [ - { - type: "mrkdwn", - text: "*ID:* conversation-1", - }, - ], - }, - ], - }), - }), - ]); - }); - - it("chunks long resumed replies into explicit continuation messages", async () => { - const { resumeAuthorizedRequest } = - await import("@/chat/runtime/slack-resume"); - const longReply = Array.from( - { length: 80 }, - (_, i) => `line ${i + 1}`, - ).join("\n"); - - await resumeAuthorizedRequest({ - messageText: "Continue the original request", - channelId: "C123", - threadTs: "1700000000.002", - connectedText: "Connected. Continuing...", - replyContext: { - credentialContext: { - actor: { type: "user", userId: "U123" }, - }, - destination: TEST_SLACK_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, - }, - generateReply: async () => - ({ - text: longReply, - diagnostics: makeDiagnostics(), - }) as any, - }); - - const postCalls = getCapturedSlackApiCalls("chat.postMessage"); - expect(postCalls).toHaveLength(5); - expect(postCalls[0]?.params).toMatchObject({ - channel: "C123", - thread_ts: "1700000000.002", - text: "Connected. Continuing...", - }); - expect(postCalls[1]?.params.text).toContain(getSlackContinuationMarker()); - expect(postCalls[2]?.params.text).toContain(getSlackContinuationMarker()); - expect(postCalls[3]?.params.text).toContain(getSlackContinuationMarker()); - expect(postCalls[4]?.params.text).not.toContain( - getSlackContinuationMarker(), - ); - expect(postCalls[4]?.params.text).toContain("line 80"); - }); - - it("marks resumed provider-error partial replies as interrupted", async () => { - const { resumeAuthorizedRequest } = - await import("@/chat/runtime/slack-resume"); - - await resumeAuthorizedRequest({ - messageText: "Continue the original request", - channelId: "C123", - threadTs: "1700000000.003", - connectedText: "Connected. Continuing...", - replyContext: { - credentialContext: { - actor: { type: "user", userId: "U123" }, - }, - destination: TEST_SLACK_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, - }, - generateReply: async () => - ({ - text: "Partial output", - diagnostics: makeDiagnostics("provider_error"), - }) as any, - }); - - const postCalls = getCapturedSlackApiCalls("chat.postMessage"); - expect(postCalls).toHaveLength(2); - expect(postCalls[1]?.params).toMatchObject({ - channel: "C123", - thread_ts: "1700000000.003", - }); - expect(postCalls[1]?.params.text).toContain("Partial output"); - expect(postCalls[1]?.params.text).toContain( - getSlackInterruptionMarker().trim(), - ); - expect(postCalls[1]?.params.text).not.toContain("event_id="); - }); - - it("replaces resumed execution-failure replies before Slack planning", async () => { - const { resumeAuthorizedRequest } = - await import("@/chat/runtime/slack-resume"); - - await resumeAuthorizedRequest({ - messageText: "Continue the original request", - channelId: "C123", - threadTs: "1700000000.006", - connectedText: "Connected. Continuing...", - replyContext: { - credentialContext: { - actor: { type: "user", userId: "U123" }, - }, - destination: TEST_SLACK_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, - }, - generateReply: async () => - ({ - text: "", - diagnostics: makeDiagnostics("execution_failure", { - assistantMessageCount: 0, - usedPrimaryText: false, - }), - }) as any, - }); - - const postCalls = getCapturedSlackApiCalls("chat.postMessage"); - expect(postCalls).toHaveLength(2); - expect(postCalls[1]?.params).toMatchObject({ - channel: "C123", - thread_ts: "1700000000.006", - }); - expect(postCalls[1]?.params.text).toContain( - "I ran into an internal error while processing that. Reference: `event_id=", - ); - }); - - it("delivers resumed reply files through the shared reply planner", async () => { - const { resumeAuthorizedRequest } = - await import("@/chat/runtime/slack-resume"); - - await resumeAuthorizedRequest({ - messageText: "Continue the original request", - channelId: "C123", - threadTs: "1700000000.004", - connectedText: "Connected. Continuing...", - replyContext: { - credentialContext: { - actor: { type: "user", userId: "U123" }, - }, - destination: TEST_SLACK_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, - }, - generateReply: async () => - ({ - text: "Here is the resumed artifact.", - files: [ - { - data: Buffer.from("resume-file"), - filename: "resume.txt", - }, - ], - diagnostics: makeDiagnostics(), - }) as any, - }); - - const postCalls = getCapturedSlackApiCalls("chat.postMessage"); - expect(postCalls).toHaveLength(2); - expect(postCalls[0]?.params).toMatchObject({ - channel: "C123", - thread_ts: "1700000000.004", - text: "Connected. Continuing...", - }); - expect(postCalls[1]?.params).toMatchObject({ - channel: "C123", - thread_ts: "1700000000.004", - text: "Here is the resumed artifact.", - }); - expect(getCapturedSlackApiCalls("files.getUploadURLExternal")).toHaveLength( - 1, - ); - expect(getCapturedSlackApiCalls("files.completeUploadExternal")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: "C123", - thread_ts: "1700000000.004", - }), - }), - ]); - expect(getCapturedSlackFileUploadCalls()).toHaveLength(1); - }); - - it("keeps the resumed reply visible when file upload followups fail", async () => { - const { resumeAuthorizedRequest } = - await import("@/chat/runtime/slack-resume"); - queueSlackApiError("files.completeUploadExternal", { - error: "upload_failed", - }); - - await resumeAuthorizedRequest({ - messageText: "Continue the original request", - channelId: "C123", - threadTs: "1700000000.005", - connectedText: "Connected. Continuing...", - replyContext: { - credentialContext: { - actor: { type: "user", userId: "U123" }, - }, - destination: TEST_SLACK_DESTINATION, - requester: { platform: "slack", teamId: "T123", userId: "U123" }, - }, - generateReply: async () => - ({ - text: "Here is the resumed artifact.", - files: [ - { - data: Buffer.from("resume-file"), - filename: "resume.txt", - }, - ], - diagnostics: makeDiagnostics(), - }) as any, - }); - - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.005", - text: "Connected. Continuing...", - }), - }), - expect.objectContaining({ - params: expect.objectContaining({ - channel: "C123", - thread_ts: "1700000000.005", - text: "Here is the resumed artifact.", - }), - }), - ]); - expect(getCapturedSlackApiCalls("files.getUploadURLExternal")).toHaveLength( - 1, - ); - expect( - getCapturedSlackApiCalls("files.completeUploadExternal"), - ).toHaveLength(1); - }); -}); diff --git a/packages/junior/tests/integration/oauth/callback-app-home.test.ts b/packages/junior/tests/integration/oauth/callback-app-home.test.ts new file mode 100644 index 000000000..27b99312e --- /dev/null +++ b/packages/junior/tests/integration/oauth/callback-app-home.test.ts @@ -0,0 +1,39 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + EVAL_OAUTH_PROVIDER, + createOauthCallbackRouteFixture, +} from "../../fixtures/oauth/callback-route"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("oauth callback app home", () => { + beforeEach(async () => { + testbed = await createOauthCallbackRouteFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("publishes app home through the Slack MSW harness after generic OAuth callback", async () => { + await testbed.storeOAuthState("eval-oauth-state"); + + const response = await testbed.runRoute({ + provider: EVAL_OAUTH_PROVIDER, + state: "eval-oauth-state", + }); + + expect(response.status).toBe(200); + expect(getCapturedSlackApiCalls("views.publish")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + user_id: "U123", + view: expect.objectContaining({ + type: "home", + }), + }), + }), + ]); + }); +}); diff --git a/packages/junior/tests/integration/oauth/callback-resume-context.test.ts b/packages/junior/tests/integration/oauth/callback-resume-context.test.ts new file mode 100644 index 000000000..ecb3c0f4b --- /dev/null +++ b/packages/junior/tests/integration/oauth/callback-resume-context.test.ts @@ -0,0 +1,253 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + EVAL_OAUTH_PROVIDER, + SLACK_DESTINATION, + createOauthCallbackRouteFixture, +} from "../../fixtures/oauth/callback-route"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("oauth callback resume context", () => { + beforeEach(async () => { + testbed = await createOauthCallbackRouteFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("resumes a pending OAuth request with persisted thread context", async () => { + await testbed.storeOAuthState("eval-oauth-resume-state", { + channelId: "C123", + threadTs: "1700000000.001", + pendingMessage: "list my sentry issues", + }); + await testbed.stateAdapter + .getStateAdapter() + .set("thread-state:slack:C123:1700000000.001", { + conversation: { + messages: [ + { + id: "assistant-1", + role: "assistant", + text: "You need the budget by Friday.", + createdAtMs: 1, + author: { + userName: "junior", + isBot: true, + }, + }, + { + id: "user-1", + role: "user", + text: "list my sentry issues", + createdAtMs: 2, + author: { + userId: "U123", + userName: "dcramer", + }, + }, + ], + }, + }); + + const response = await testbed.runRoute({ + provider: EVAL_OAUTH_PROVIDER, + state: "eval-oauth-resume-state", + }); + + expect(response.status).toBe(200); + expect(testbed.generateAssistantReplyMock).toHaveBeenCalledWith( + "list my sentry issues", + expect.objectContaining({ + destination: SLACK_DESTINATION, + conversationContext: expect.stringContaining( + "You need the budget by Friday.", + ), + }), + ); + const resumeContext = testbed.generateAssistantReplyMock.mock + .calls[0]?.[1] as { + conversationContext?: string; + }; + expect(resumeContext.conversationContext).not.toContain( + "list my sentry issues", + ); + + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + thread_ts: "1700000000.001", + text: "Here are your Sentry issues.", + }), + }), + ]), + ); + }); + + it("resumes a session-recorded OAuth turn with persisted thread state", async () => { + const conversationId = "slack:C123:1700000000.009"; + const sessionId = "turn_msg_9"; + + await testbed.createAwaitingOauthTurnRecord({ + conversationId, + sessionId, + text: "list my sentry issues", + }); + + await testbed.storeOAuthState("eval-oauth-session-record-state", { + channelId: "C123", + threadTs: "1700000000.009", + pendingMessage: "list my sentry issues", + resumeConversationId: conversationId, + resumeSessionId: sessionId, + }); + await testbed.stateAdapter + .getStateAdapter() + .set(`thread-state:${conversationId}`, { + conversation: { + messages: [ + { + id: "assistant-1", + role: "assistant", + text: "You need the budget by Friday.", + createdAtMs: 1, + author: { + userName: "junior", + isBot: true, + }, + }, + { + id: "msg.9", + role: "user", + text: "list my sentry issues", + createdAtMs: 2, + author: { + userId: "U123", + userName: "dcramer", + }, + meta: { + slackTs: "1700000000.010", + }, + }, + ], + processing: { + activeTurnId: undefined, + pendingAuth: { + kind: "plugin", + provider: EVAL_OAUTH_PROVIDER, + requesterId: "U123", + sessionId, + linkSentAtMs: 1, + }, + }, + }, + artifacts: { + assistantContextChannelId: "C999", + listColumnMap: {}, + }, + }); + + const response = await testbed.runRoute({ + provider: EVAL_OAUTH_PROVIDER, + state: "eval-oauth-session-record-state", + }); + + expect(response.status).toBe(200); + const sessionRecordAfterAuth = + await testbed.turnSessionStore.getAgentTurnSessionRecord( + conversationId, + sessionId, + ); + expect(sessionRecordAfterAuth?.piMessages).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + role: "user", + content: [ + { + type: "text", + text: 'Authorization completed for provider "eval-oauth". Continue the blocked request and retry the provider operation if needed.', + }, + ], + }), + ]), + ); + expect(testbed.generateAssistantReplyMock).toHaveBeenCalledWith( + "list my sentry issues", + expect.objectContaining({ + requester: expect.objectContaining({ userId: "U123" }), + destination: SLACK_DESTINATION, + correlation: expect.objectContaining({ + channelId: "C123", + threadTs: "1700000000.009", + requesterId: "U123", + }), + toolChannelId: "C999", + conversationContext: expect.stringContaining( + "You need the budget by Friday.", + ), + }), + ); + const resumeContext = testbed.generateAssistantReplyMock.mock + .calls[0]?.[1] as { + conversationContext?: string; + }; + expect(resumeContext.conversationContext).not.toContain( + "list my sentry issues", + ); + + const persistedState = await testbed.stateAdapter + .getStateAdapter() + .get>(`thread-state:${conversationId}`); + const conversation = + (persistedState?.conversation as { + messages?: Array<{ role?: string; text?: string }>; + processing?: { activeTurnId?: string }; + }) ?? {}; + expect(conversation.processing?.activeTurnId).toBeUndefined(); + expect(conversation.messages?.at(-1)).toMatchObject({ + role: "assistant", + text: "Here are your Sentry issues.", + }); + + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + thread_ts: "1700000000.009", + text: "Here are your Sentry issues.", + }), + }), + ]), + ); + expect(getCapturedSlackApiCalls("reactions.add")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + timestamp: "1700000000.010", + name: "eyes", + }), + }), + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + timestamp: "1700000000.010", + name: "white_check_mark", + }), + }), + ]); + expect(getCapturedSlackApiCalls("reactions.remove")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + timestamp: "1700000000.010", + name: "eyes", + }), + }), + ]); + }); +}); diff --git a/packages/junior/tests/integration/oauth/callback-resume-guards.test.ts b/packages/junior/tests/integration/oauth/callback-resume-guards.test.ts new file mode 100644 index 000000000..19f8bb6f4 --- /dev/null +++ b/packages/junior/tests/integration/oauth/callback-resume-guards.test.ts @@ -0,0 +1,154 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + EVAL_OAUTH_PROVIDER, + SLACK_DESTINATION, + createOauthCallbackRouteFixture, +} from "../../fixtures/oauth/callback-route"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("oauth callback resume guards", () => { + beforeEach(async () => { + testbed = await createOauthCallbackRouteFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("does not re-post the pending message when the session record is already abandoned", async () => { + const conversationId = "slack:C123:1700000000.010"; + const sessionId = "turn_msg_10"; + + await testbed.turnSessionStore.upsertAgentTurnSessionRecord({ + conversationId, + sessionId, + sliceId: 2, + state: "abandoned", + destination: SLACK_DESTINATION, + piMessages: [], + resumeReason: "auth", + resumedFromSliceId: 1, + }); + + await testbed.storeOAuthState("eval-oauth-abandoned-state", { + channelId: "C123", + threadTs: "1700000000.010", + pendingMessage: "list my sentry issues", + resumeConversationId: conversationId, + resumeSessionId: sessionId, + }); + + const response = await testbed.runRoute({ + provider: EVAL_OAUTH_PROVIDER, + state: "eval-oauth-abandoned-state", + }); + + expect(response.status).toBe(200); + expect(testbed.generateAssistantReplyMock).not.toHaveBeenCalled(); + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([]); + }); + + it("resumes the latest pending OAuth session when a reused link points at an abandoned session", async () => { + const conversationId = "slack:C123:1700000000.012"; + const oldSessionId = "turn_msg_old_12"; + const newSessionId = "turn_msg_new_12"; + + await testbed.turnSessionStore.upsertAgentTurnSessionRecord({ + conversationId, + sessionId: oldSessionId, + sliceId: 2, + state: "abandoned", + destination: SLACK_DESTINATION, + piMessages: [], + resumeReason: "auth", + resumedFromSliceId: 1, + }); + await testbed.turnSessionStore.upsertAgentTurnSessionRecord({ + conversationId, + sessionId: newSessionId, + sliceId: 2, + state: "awaiting_resume", + destination: SLACK_DESTINATION, + piMessages: [], + resumeReason: "auth", + resumedFromSliceId: 1, + }); + + await testbed.storeOAuthState("eval-oauth-reused-link-state", { + channelId: "C123", + threadTs: "1700000000.012", + pendingMessage: "old request", + resumeConversationId: conversationId, + resumeSessionId: oldSessionId, + }); + await testbed.stateAdapter + .getStateAdapter() + .set(`thread-state:${conversationId}`, { + conversation: { + messages: [ + { + id: "msg.old.12", + role: "user", + text: "old request", + createdAtMs: 1, + author: { + userId: "U123", + userName: "dcramer", + }, + }, + { + id: "msg.new.12", + role: "user", + text: "new request", + createdAtMs: 2, + author: { + userId: "U123", + userName: "dcramer", + }, + meta: { + slackTs: "1700000000.0123", + }, + }, + ], + processing: { + activeTurnId: undefined, + pendingAuth: { + kind: "plugin", + provider: EVAL_OAUTH_PROVIDER, + requesterId: "U123", + sessionId: newSessionId, + linkSentAtMs: 1, + }, + }, + }, + }); + + const response = await testbed.runRoute({ + provider: EVAL_OAUTH_PROVIDER, + state: "eval-oauth-reused-link-state", + }); + + expect(response.status).toBe(200); + expect(testbed.generateAssistantReplyMock).toHaveBeenCalledWith( + "new request", + expect.objectContaining({ + correlation: expect.objectContaining({ + turnId: newSessionId, + }), + }), + ); + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + thread_ts: "1700000000.012", + text: "Here are your Sentry issues.", + }), + }), + ]), + ); + }); +}); diff --git a/packages/junior/tests/integration/oauth/callback-resume-lock.test.ts b/packages/junior/tests/integration/oauth/callback-resume-lock.test.ts new file mode 100644 index 000000000..f5eebe0c3 --- /dev/null +++ b/packages/junior/tests/integration/oauth/callback-resume-lock.test.ts @@ -0,0 +1,177 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + EVAL_OAUTH_PROVIDER, + SLACK_DESTINATION, + createOauthCallbackRouteFixture, +} from "../../fixtures/oauth/callback-route"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("oauth callback resume lock", () => { + beforeEach(async () => { + testbed = await createOauthCallbackRouteFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("rebuilds session-recorded OAuth resume context from state loaded under the thread lock", async () => { + const conversationId = "slack:C123:1700000000.011"; + const sessionId = "turn_msg_11"; + const staleState = { + conversation: { + messages: [ + { + id: "assistant-old", + role: "assistant", + text: "Old context that should not be used.", + createdAtMs: 1, + author: { + userName: "junior", + isBot: true, + }, + }, + { + id: "msg.11", + role: "user", + text: "list my sentry issues", + createdAtMs: 2, + author: { + userId: "U123", + userName: "dcramer", + }, + meta: { + slackTs: "1700000000.0111", + }, + }, + ], + processing: { + activeTurnId: undefined, + pendingAuth: { + kind: "plugin", + provider: EVAL_OAUTH_PROVIDER, + requesterId: "U123", + sessionId, + linkSentAtMs: 1, + }, + }, + }, + artifacts: { + assistantContextChannelId: "COLD", + }, + }; + const freshState = { + conversation: { + messages: [ + { + id: "assistant-fresh", + role: "assistant", + text: "Fresh context loaded after the lock.", + createdAtMs: 1, + author: { + userName: "junior", + isBot: true, + }, + }, + { + id: "msg.11", + role: "user", + text: "list my sentry issues", + createdAtMs: 2, + author: { + userId: "U123", + userName: "dcramer", + }, + meta: { + slackTs: "1700000000.0112", + }, + }, + ], + processing: { + activeTurnId: undefined, + pendingAuth: { + kind: "plugin", + provider: EVAL_OAUTH_PROVIDER, + requesterId: "U123", + sessionId, + linkSentAtMs: 1, + }, + }, + }, + artifacts: { + assistantContextChannelId: "CFRESH", + }, + }; + + await testbed.createAwaitingOauthTurnRecord({ + conversationId, + sessionId, + }); + await testbed.storeOAuthState("eval-oauth-locked-state", { + channelId: "C123", + threadTs: "1700000000.011", + pendingMessage: "list my sentry issues", + resumeConversationId: conversationId, + resumeSessionId: sessionId, + }); + await testbed.stateAdapter + .getStateAdapter() + .set(`thread-state:${conversationId}`, freshState); + + const adapter = testbed.stateAdapter.getStateAdapter(); + const originalGet = adapter.get.bind(adapter); + let threadReadCount = 0; + const getSpy = vi.spyOn(adapter, "get"); + getSpy.mockImplementation((async (key: string) => { + if (key === `thread-state:${conversationId}` && threadReadCount++ === 0) { + return structuredClone(staleState); + } + return await originalGet(key); + }) as typeof adapter.get); + + try { + const response = await testbed.runRoute({ + provider: EVAL_OAUTH_PROVIDER, + state: "eval-oauth-locked-state", + }); + + expect(response.status).toBe(200); + } finally { + getSpy.mockRestore(); + } + + expect(testbed.generateAssistantReplyMock).toHaveBeenCalledWith( + "list my sentry issues", + expect.objectContaining({ + destination: SLACK_DESTINATION, + toolChannelId: "CFRESH", + conversationContext: expect.stringContaining( + "Fresh context loaded after the lock.", + ), + }), + ); + const resumeContext = testbed.generateAssistantReplyMock.mock + .calls[0]?.[1] as { + conversationContext?: string; + }; + expect(resumeContext.conversationContext).not.toContain( + "Old context that should not be used.", + ); + expect(getCapturedSlackApiCalls("reactions.add")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + timestamp: "1700000000.0112", + name: "eyes", + }), + }), + expect.objectContaining({ + params: expect.objectContaining({ + timestamp: "1700000000.0112", + name: "white_check_mark", + }), + }), + ]); + }); +}); diff --git a/packages/junior/tests/integration/oauth/callback-route-guards.test.ts b/packages/junior/tests/integration/oauth/callback-route-guards.test.ts new file mode 100644 index 000000000..d3ad2774f --- /dev/null +++ b/packages/junior/tests/integration/oauth/callback-route-guards.test.ts @@ -0,0 +1,69 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + EVAL_OAUTH_CODE, + EVAL_OAUTH_PROVIDER, + createOauthCallbackRouteFixture, +} from "../../fixtures/oauth/callback-route"; + +let testbed: Awaited>; + +describe("oauth callback route guards", () => { + beforeEach(async () => { + testbed = await createOauthCallbackRouteFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("returns styled HTML 404 for unknown providers", async () => { + const response = await testbed.runCallbackUrl({ + provider: "unknown", + url: "https://junior.example.com/api/oauth/callback/unknown?code=abc&state=xyz", + }); + + expect(response.status).toBe(404); + const body = await response.text(); + expect(body).toContain(""); + expect(body).toContain("Unknown provider"); + }); + + it("returns styled HTML 400 when code or state is missing", async () => { + const response = await testbed.runCallbackUrl({ + url: `https://junior.example.com/api/oauth/callback/${EVAL_OAUTH_PROVIDER}`, + }); + + expect(response.status).toBe(400); + const body = await response.text(); + expect(body).toContain(""); + expect(body).toContain("missing required parameters"); + }); + + it("returns styled HTML 400 for expired state", async () => { + const response = await testbed.runCallbackUrl({ + url: `https://junior.example.com/api/oauth/callback/${EVAL_OAUTH_PROVIDER}?code=${EVAL_OAUTH_CODE}&state=missing-state`, + }); + + expect(response.status).toBe(400); + const body = await response.text(); + expect(body).toContain(""); + expect(body).toContain("expired"); + expect(body).toContain("connect your"); + expect(body).toContain("account again"); + }); + + it("returns styled HTML 400 for provider mismatch", async () => { + await testbed.storeOAuthState("provider-mismatch", { + provider: "different-provider", + }); + + const response = await testbed.runRoute({ + state: "provider-mismatch", + }); + + expect(response.status).toBe(400); + const body = await response.text(); + expect(body).toContain(""); + expect(body).toContain("mismatch"); + }); +}); diff --git a/packages/junior/tests/integration/oauth/callback-route-provider-errors.test.ts b/packages/junior/tests/integration/oauth/callback-route-provider-errors.test.ts new file mode 100644 index 000000000..a5c5267e1 --- /dev/null +++ b/packages/junior/tests/integration/oauth/callback-route-provider-errors.test.ts @@ -0,0 +1,46 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + EVAL_OAUTH_PROVIDER, + createOauthCallbackRouteFixture, +} from "../../fixtures/oauth/callback-route"; + +let testbed: Awaited>; + +describe("oauth callback route provider errors", () => { + beforeEach(async () => { + testbed = await createOauthCallbackRouteFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("deletes callback state and returns declined HTML when the user denies authorization", async () => { + await testbed.storeOAuthState("denied-state"); + + const response = await testbed.runCallbackUrl({ + url: `https://junior.example.com/api/oauth/callback/${EVAL_OAUTH_PROVIDER}?error=access_denied&state=denied-state`, + }); + + expect(response.status).toBe(400); + const body = await response.text(); + expect(body).toContain(""); + expect(body).toContain("declined"); + expect(body).toContain("ask Junior to connect your"); + expect(body).toContain("account again if you change your mind"); + expect(body).not.toContain("auth command"); + expect(await testbed.getOAuthState("denied-state")).toBeFalsy(); + }); + + it("escapes provider-returned error text in the HTML response", async () => { + const response = await testbed.runCallbackUrl({ + url: `https://junior.example.com/api/oauth/callback/${EVAL_OAUTH_PROVIDER}?error=%3Cscript%3Ealert(1)%3C/script%3E&state=xss-state`, + }); + + expect(response.status).toBe(400); + const body = await response.text(); + expect(body).toContain(""); + expect(body).not.toContain(""); + }); + + it("returns HTML 400 when the code parameter is missing", async () => { + const response = await testbed.runCallbackUrl({ + url: `https://junior.example.com/api/oauth/callback/mcp/${EVAL_MCP_AUTH_PROVIDER}?state=state-123`, + }); + + expect(response.status).toBe(400); + expect(await response.text()).toContain("Missing code parameter"); + }); + + it("does not reflect callback exception text in the HTML response", async () => { + const response = await testbed.runRoute({ + provider: EVAL_MCP_AUTH_PROVIDER, + state: "", + code: EVAL_MCP_AUTH_CODE, + }); + + expect(response.status).toBe(500); + const body = await response.text(); + expect(body).toContain( + "Junior could not finish the authorization callback. Return to Slack and retry the original request.", + ); + expect(body).not.toContain(""); + }); +}); diff --git a/packages/junior/tests/integration/scheduler-heartbeat-behavior.test.ts b/packages/junior/tests/integration/scheduler-heartbeat-behavior.test.ts new file mode 100644 index 000000000..ae9b2cfbd --- /dev/null +++ b/packages/junior/tests/integration/scheduler-heartbeat-behavior.test.ts @@ -0,0 +1,364 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + createSchedulerStore, + schedulerPlugin, + type ScheduledTask, +} from "@sentry/junior-scheduler"; +import { createPluginState } from "@/chat/plugins/state"; +import { + getDispatchRecord, + getDispatchStorageKey, + updateDispatchRecord, + withDispatchLock, +} from "@/chat/agent-dispatch/store"; +import type { DispatchRecord } from "@/chat/agent-dispatch/types"; +import { getStateAdapter } from "@/chat/state/adapter"; +import { setAgentPlugins } from "@/chat/plugins/agent-hooks"; +import { GET as heartbeat } from "@/handlers/heartbeat"; +import { + createDailyTask, + createTask, + heartbeatRequest, + mockDispatchCallbackFetch, + resetHeartbeatTestEnv, + schedulerStore, + setupHeartbeatTestEnv, + TEST_RUN_AT_MS, + TEST_NOW_MS, +} from "../fixtures/heartbeat"; +import { createWaitUntilCollector } from "../fixtures/wait-until"; +import { getCapturedSlackApiCalls } from "../msw/handlers/slack-api"; + +vi.hoisted(() => { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +describe("scheduler heartbeat behavior", () => { + const originalFetch = global.fetch; + + beforeEach(async () => { + await setupHeartbeatTestEnv(); + }); + + afterEach(async () => { + await resetHeartbeatTestEnv(originalFetch); + }); + + it("claims scheduled tasks from the scheduler legacy state namespace", async () => { + const task = createTask({ id: "sched_legacy" }); + const state = getStateAdapter(); + await state.connect(); + await state.set("junior:scheduler:tasks", [task.id]); + await state.set("junior:scheduler:team:T123:tasks", [task.id]); + await state.set("junior:scheduler:task:sched_legacy", task); + + const store = createSchedulerStore( + createPluginState("scheduler", { + legacyStatePrefixes: ["junior:scheduler"], + }), + ); + + await expect(store.listTasksForTeam("T123")).resolves.toMatchObject([ + { id: task.id }, + ]); + await expect( + store.claimDueRun({ nowMs: TEST_NOW_MS }), + ).resolves.toMatchObject({ + taskId: task.id, + }); + }); + + it("dispatches and reconciles scheduled runs from the scheduler plugin", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([schedulerPlugin()]); + const store = schedulerStore(); + await store.saveTask(createTask()); + + const firstWaitUntil = createWaitUntilCollector(); + const firstResponse = await heartbeat( + heartbeatRequest(), + firstWaitUntil.fn, + ); + expect(firstResponse.status).toBe(202); + await firstWaitUntil.flush(); + + const running = await store.getRun(`sched_plugin_1:${TEST_RUN_AT_MS}`); + expect(running).toMatchObject({ + status: "running", + dispatchId: expect.any(String), + }); + expect(fetchMock).toHaveBeenCalledTimes(1); + + await withDispatchLock(running!.dispatchId!, async (state) => { + const record = await state.get( + getDispatchStorageKey(running!.dispatchId!), + ); + if (!record) { + throw new Error("Expected dispatch record to exist"); + } + await updateDispatchRecord(state, { + ...record, + resultMessageTs: "1700000000.000001", + status: "completed", + }); + }); + + const secondWaitUntil = createWaitUntilCollector(); + const secondResponse = await heartbeat( + heartbeatRequest(), + secondWaitUntil.fn, + ); + expect(secondResponse.status).toBe(202); + await secondWaitUntil.flush(); + + await expect(store.getRun(running!.id)).resolves.toMatchObject({ + status: "completed", + resultMessageTs: "1700000000.000001", + }); + await expect(store.getTask("sched_plugin_1")).resolves.toMatchObject({ + lastRunAtMs: Date.parse("2026-05-26T12:00:00.000Z"), + status: "paused", + }); + }); + + it("carries scheduled task credential subjects into dispatch records", async () => { + mockDispatchCallbackFetch(originalFetch); + setAgentPlugins([schedulerPlugin()]); + const store = schedulerStore(); + await store.saveTask( + createTask({ + destination: { + platform: "slack", + teamId: "T123", + channelId: "D123", + }, + credentialSubject: { + type: "user", + userId: "U123", + allowedWhen: "private-direct-conversation", + }, + }), + ); + + const waitUntil = createWaitUntilCollector(); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn); + expect(response.status).toBe(202); + await waitUntil.flush(); + + const running = await store.getRun(`sched_plugin_1:${TEST_RUN_AT_MS}`); + expect(running?.dispatchId).toEqual(expect.any(String)); + await expect( + getDispatchRecord(running!.dispatchId!), + ).resolves.toMatchObject({ + credentialSubject: { + type: "user", + userId: "U123", + allowedWhen: "private-direct-conversation", + binding: { + type: "slack-direct-conversation", + teamId: "T123", + channelId: "D123", + signature: expect.any(String), + }, + }, + }); + expect(getCapturedSlackApiCalls("conversations.info")).toHaveLength(0); + }); + + it("fails scheduled runs when their dispatch record disappeared", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([schedulerPlugin()]); + const store = schedulerStore(); + await store.saveTask(createTask()); + + const firstWaitUntil = createWaitUntilCollector(); + const firstResponse = await heartbeat( + heartbeatRequest(), + firstWaitUntil.fn, + ); + expect(firstResponse.status).toBe(202); + await firstWaitUntil.flush(); + + const running = await store.getRun(`sched_plugin_1:${TEST_RUN_AT_MS}`); + expect(running).toMatchObject({ + status: "running", + dispatchId: expect.any(String), + }); + const state = getStateAdapter(); + await state.connect(); + await state.delete(getDispatchStorageKey(running!.dispatchId!)); + + const secondWaitUntil = createWaitUntilCollector(); + const secondResponse = await heartbeat( + heartbeatRequest(), + secondWaitUntil.fn, + ); + expect(secondResponse.status).toBe(202); + await secondWaitUntil.flush(); + + await expect(store.getRun(running!.id)).resolves.toMatchObject({ + status: "failed", + errorMessage: "Scheduled task dispatch record is missing.", + }); + await expect(store.getTask("sched_plugin_1")).resolves.toMatchObject({ + status: "paused", + }); + }); + + it("blocks malformed scheduled tasks without stopping the scheduler plugin heartbeat", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([schedulerPlugin()]); + const store = schedulerStore(); + await store.saveTask({ + ...createTask(), + id: "sched_plugin_malformed", + task: { + text: undefined, + } as unknown as ScheduledTask["task"], + }); + + const waitUntil = createWaitUntilCollector(); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn); + expect(response.status).toBe(202); + await waitUntil.flush(); + + await expect( + store.getRun(`sched_plugin_malformed:${TEST_RUN_AT_MS}`), + ).resolves.toMatchObject({ + status: "blocked", + errorMessage: expect.stringContaining( + "Scheduled task prompt could not be built", + ), + }); + await expect( + store.getTask("sched_plugin_malformed"), + ).resolves.toMatchObject({ + status: "blocked", + statusReason: expect.stringContaining( + "Scheduled task prompt could not be built", + ), + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("blocks scheduled runs with invalid credential routing without stopping the heartbeat", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([schedulerPlugin()]); + const store = schedulerStore(); + await store.saveTask({ + ...createTask(), + id: "sched_plugin_bad_credential_route", + credentialSubject: { + type: "user", + userId: "U123", + allowedWhen: "private-direct-conversation", + }, + }); + + const waitUntil = createWaitUntilCollector(); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn); + expect(response.status).toBe(202); + await waitUntil.flush(); + + await expect( + store.getRun(`sched_plugin_bad_credential_route:${TEST_RUN_AT_MS}`), + ).resolves.toMatchObject({ + status: "blocked", + errorMessage: expect.stringContaining( + "Scheduled task dispatch could not be created", + ), + }); + await expect( + store.getTask("sched_plugin_bad_credential_route"), + ).resolves.toMatchObject({ + status: "blocked", + statusReason: expect.stringContaining( + "Scheduled task dispatch could not be created", + ), + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("skips old recurring occurrences and advances to the next future run", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([schedulerPlugin()]); + const store = schedulerStore(); + const task = createDailyTask(); + await store.saveTask(task); + + const waitUntil = createWaitUntilCollector(); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn); + expect(response.status).toBe(202); + await waitUntil.flush(); + + await expect( + store.getRun(`${task.id}:${task.nextRunAtMs}`), + ).resolves.toMatchObject({ + status: "skipped", + errorMessage: expect.stringContaining("more than 24 hours late"), + }); + await expect(store.getTask(task.id)).resolves.toMatchObject({ + status: "active", + nextRunAtMs: Date.parse("2026-05-27T12:00:00.000Z"), + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("dedupes equivalent old recurring tasks during heartbeat recovery", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + setAgentPlugins([schedulerPlugin()]); + const store = schedulerStore(); + const first = createDailyTask({ + id: "sched_plugin_duplicate_a", + createdAtMs: Date.parse("2026-05-24T12:00:00.000Z"), + }); + const duplicate = createDailyTask({ + id: "sched_plugin_duplicate_b", + createdAtMs: Date.parse("2026-05-24T12:00:01.000Z"), + }); + await store.saveTask(first); + await store.saveTask(duplicate); + + const waitUntil = createWaitUntilCollector(); + const response = await heartbeat(heartbeatRequest(), waitUntil.fn); + expect(response.status).toBe(202); + await waitUntil.flush(); + + await expect( + store.getRun(`${duplicate.id}:${duplicate.nextRunAtMs}`), + ).resolves.toMatchObject({ + status: "skipped", + errorMessage: expect.stringContaining( + "Duplicate stale scheduled task was skipped", + ), + }); + await expect(store.getTask(first.id)).resolves.toMatchObject({ + status: "active", + nextRunAtMs: Date.parse("2026-05-27T12:00:00.000Z"), + }); + await expect(store.getTask(duplicate.id)).resolves.toMatchObject({ + status: "paused", + nextRunAtMs: undefined, + statusReason: expect.stringContaining(first.id), + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/junior/tests/integration/slack-schedule-plugin-wiring.test.ts b/packages/junior/tests/integration/slack-schedule-plugin-wiring.test.ts new file mode 100644 index 000000000..e40279402 --- /dev/null +++ b/packages/junior/tests/integration/slack-schedule-plugin-wiring.test.ts @@ -0,0 +1,71 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + createSchedulerStore, + schedulerPlugin, +} from "@sentry/junior-scheduler"; +import { + getAgentPluginTools, + setAgentPlugins, +} from "@/chat/plugins/agent-hooks"; +import { createPluginState } from "@/chat/plugins/state"; +import { + cleanupSlackScheduleToolTest, + executeTool, + setupSlackScheduleToolTest, +} from "../fixtures/slack/schedule-tools"; + +describe("Slack schedule plugin wiring", () => { + beforeEach(setupSlackScheduleToolTest); + + afterEach(async () => { + setAgentPlugins([]); + await cleanupSlackScheduleToolTest(); + }); + + it("binds scheduler tasks to the active Slack conversation", async () => { + const previous = setAgentPlugins([schedulerPlugin()]); + try { + const teamId = `TWIRING${Date.now()}`; + const tools = getAgentPluginTools({ + destination: { + platform: "slack", + teamId, + channelId: "DDM", + }, + source: { + platform: "slack", + teamId, + channelId: "CASSISTANT", + }, + requester: { + platform: "slack", + teamId, + userId: "U123", + userName: "alice", + fullName: "Alice", + }, + sandbox: {} as Parameters[0]["sandbox"], + }); + + expect(tools).toHaveProperty("slackScheduleCreateTask"); + + const result = await executeTool(tools.slackScheduleCreateTask, { + task: "Wiring test: post a weekly digest.", + schedule: "Every Monday at 9am", + timezone: "America/Los_Angeles", + next_run_at: "2026-06-09T16:00:00.000Z", + recurrence: "weekly", + }); + + expect(result).toMatchObject({ ok: true }); + const taskId = (result as { task: { id: string } }).task.id; + await expect( + createSchedulerStore(createPluginState("scheduler")).getTask(taskId), + ).resolves.toMatchObject({ + destination: { channelId: "CASSISTANT", teamId }, + }); + } finally { + setAgentPlugins(previous); + } + }); +}); diff --git a/packages/junior/tests/integration/slack-schedule-tools.test.ts b/packages/junior/tests/integration/slack-schedule-tools.test.ts deleted file mode 100644 index 109c6eded..000000000 --- a/packages/junior/tests/integration/slack-schedule-tools.test.ts +++ /dev/null @@ -1,1190 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { - AgentPluginToolInputError, - type AgentPluginToolDefinition, -} from "@sentry/junior-plugin-api"; -import { - createSchedulerStore, - createSlackScheduleCreateTaskTool, - createSlackScheduleDeleteTaskTool, - createSlackScheduleListTasksTool, - createSlackScheduleRunTaskNowTool, - createSlackScheduleUpdateTaskTool, - type ScheduledTask, - type SchedulerToolContext, -} from "@sentry/junior-scheduler"; -import { createSlackDirectCredentialSubject } from "@/chat/credentials/subject"; -import { - getAgentPluginTools, - setAgentPlugins, -} from "@/chat/plugins/agent-hooks"; -import { createPluginState } from "@/chat/plugins/state"; -import { disconnectStateAdapter } from "@/chat/state/adapter"; -import { schedulerPlugin } from "@sentry/junior-scheduler"; - -vi.hoisted(() => { - process.env.JUNIOR_STATE_ADAPTER = "memory"; -}); - -const TEST_TEAM_ID = `TSCHEDULE${Date.now()}`; - -function createContext( - overrides: Partial & { - channelId?: string; - teamId?: string; - } = {}, -): SchedulerToolContext { - const channelId = overrides.channelId ?? "C123"; - const teamId = overrides.teamId ?? TEST_TEAM_ID; - const contextOverrides = { ...overrides }; - delete contextOverrides.channelId; - delete contextOverrides.teamId; - const context: SchedulerToolContext = { - source: { - platform: "slack", - teamId, - channelId, - }, - requester: { - platform: "slack", - teamId, - userId: "U123", - userName: "dcramer", - fullName: "David Cramer", - }, - userText: "schedule this weekly", - state: createPluginState("scheduler"), - ...contextOverrides, - }; - const credentialSubject = - context.credentialSubject ?? - createSlackDirectCredentialSubject({ - channelId: context.source?.channelId, - teamId: context.source?.teamId, - userId: context.requester?.userId, - }); - return { - ...context, - ...(credentialSubject ? { credentialSubject } : {}), - }; -} - -async function executeTool( - tool: AgentPluginToolDefinition, - input: TInput, -) { - if (typeof tool?.execute !== "function") { - throw new Error("tool execute function missing"); - } - return await tool.execute(input, {}); -} - -function schedulerStore() { - return createSchedulerStore(createPluginState("scheduler")); -} - -async function createTask( - context = createContext(), - overrides: Record = {}, -) { - const tool = createSlackScheduleCreateTaskTool(context); - return await executeTool(tool, { - task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", - schedule: "Every Monday at 9am", - timezone: "America/Los_Angeles", - next_run_at: "2026-05-25T16:00:00.000Z", - recurrence: "weekly", - ...overrides, - }); -} - -describe("Slack schedule tools", () => { - beforeEach(async () => { - await disconnectStateAdapter(); - }); - - afterEach(async () => { - vi.useRealTimers(); - delete process.env.JUNIOR_TIMEZONE; - await disconnectStateAdapter(); - }); - - it("creates and lists tasks only for the active Slack conversation", async () => { - const created = await createTask(); - expect(created).toMatchObject({ - ok: true, - task: { - conversation_access: { - audience: "channel", - visibility: "unknown", - }, - credential_subject: null, - status: "active", - task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", - recurrence: { - frequency: "weekly", - interval: 1, - weekdays: [1], - }, - next_run_at: "2026-05-25T16:00:00.000Z", - }, - }); - - const listed = await executeTool( - createSlackScheduleListTasksTool(createContext()), - {}, - ); - expect(listed).toMatchObject({ - ok: true, - tasks: [ - { - task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", - schedule: "Every Monday at 9am", - }, - ], - }); - - const sameChannelOtherThread = await executeTool( - createSlackScheduleListTasksTool(createContext()), - {}, - ); - expect(sameChannelOtherThread).toMatchObject({ - ok: true, - tasks: [ - { - task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", - schedule: "Every Monday at 9am", - }, - ], - }); - }); - - it("creates clear recurring tasks without a second confirmation", async () => { - const result = await executeTool( - createSlackScheduleCreateTaskTool(createContext()), - { - task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", - schedule: "Every Monday at 9am", - timezone: "America/Los_Angeles", - next_run_at: "2026-05-25T16:00:00.000Z", - recurrence: "weekly", - }, - ); - - expect(result).toMatchObject({ - ok: true, - task: { - schedule: "Every Monday at 9am", - status: "active", - task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", - }, - }); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toMatchObject([ - { - destination: { channelId: "C123" }, - status: "active", - }, - ]); - }); - - it("does not store Slack ids as creator display identity", async () => { - const created = (await createTask( - createContext({ - requester: { - platform: "slack", - teamId: TEST_TEAM_ID, - userId: "U039RR91S", - userName: "unknown", - fullName: "W039RR91S", - }, - }), - )) as { task: { id: string } }; - - await expect(schedulerStore().getTask(created.task.id)).resolves.toEqual( - expect.objectContaining({ - createdBy: { - slackUserId: "U039RR91S", - }, - }), - ); - }); - - it("rejects synthetic unknown requester ids before creating a task", async () => { - const rejected = createTask( - createContext({ - requester: { - platform: "slack", - teamId: TEST_TEAM_ID, - userId: "unknown", - userName: "unknown", - fullName: "unknown", - }, - }), - ); - - await expect(rejected).rejects.toThrow(AgentPluginToolInputError); - await expect(rejected).rejects.toThrow( - "No active Slack requester context is available.", - ); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("rejects invalid Slack source before creating a task", async () => { - const rejected = executeTool( - createSlackScheduleCreateTaskTool(createContext({ teamId: "D123" })), - { - task: "Reminder: Remind David to wash his hands.", - schedule: "In 1 minute", - next_run_at: "2026-05-27T00:25:23.000Z", - }, - ); - - await expect(rejected).rejects.toThrow(AgentPluginToolInputError); - await expect(rejected).rejects.toThrow( - "Active Slack conversation workspace is invalid.", - ); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("rejects non-canonical Slack source context before creating a task", async () => { - const rejected = createTask( - createContext({ - source: { - platform: "slack", - teamId: TEST_TEAM_ID, - channelId: "C123", - threadTs: "1700000000.000", - } as SchedulerToolContext["source"], - }), - ); - - await expect(rejected).rejects.toThrow(AgentPluginToolInputError); - await expect(rejected).rejects.toThrow( - "Active Slack conversation must not include unknown fields.", - ); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("rejects invalid Slack credential subject context before creating a task", async () => { - const rejected = createTask( - createContext({ - channelId: "D123", - credentialSubject: { - type: "user", - userId: "U123", - allowedWhen: "private-direct-conversation", - binding: { - type: "slack-direct-conversation", - teamId: TEST_TEAM_ID, - channelId: "D123", - signature: "v1=test", - }, - } as SchedulerToolContext["credentialSubject"], - }), - ); - - await expect(rejected).rejects.toThrow(AgentPluginToolInputError); - await expect(rejected).rejects.toThrow( - "Active Slack credential subject is invalid.", - ); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("rejects invalid scheduled task routing context at the store boundary", async () => { - await createTask(); - const task = (await schedulerStore().listTasks()).at(0); - if (!task) { - throw new Error("Expected scheduled task to be created"); - } - - await expect( - schedulerStore().saveTask({ - ...task, - id: "sched_bad_destination", - destination: { - platform: "slack", - teamId: "D_BAD_TEAM", - channelId: "D123", - }, - }), - ).rejects.toThrow("Scheduled task routing context is invalid."); - await expect( - schedulerStore().getTask("sched_bad_destination"), - ).resolves.toBe(undefined); - - await expect( - schedulerStore().saveTask({ - ...task, - id: "sched_bad_credential_subject", - destination: { - platform: "slack", - teamId: TEST_TEAM_ID, - channelId: "D123", - }, - credentialSubject: { - type: "user", - userId: "U123", - allowedWhen: "private-direct-conversation", - binding: { - type: "slack-direct-conversation", - teamId: TEST_TEAM_ID, - channelId: "D123", - signature: "v1=test", - }, - } as ScheduledTask["credentialSubject"], - }), - ).rejects.toThrow("Scheduled task routing context is invalid."); - await expect( - schedulerStore().getTask("sched_bad_credential_subject"), - ).resolves.toBe(undefined); - }); - - it("creates explicit one-off reminders without a second confirmation", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-05-27T00:24:23.000Z")); - - const result = await executeTool( - createSlackScheduleCreateTaskTool( - createContext({ - channelId: "D123", - userText: "remind me in 1 minute to wash my hands", - }), - ), - { - task: "Wash hands reminder: Remind David to wash his hands.", - schedule: "In 1 minute", - next_run_at: "2026-05-27T00:25:23.000Z", - }, - ); - - expect(result).toMatchObject({ - ok: true, - task: { - next_run_at: "2026-05-27T00:25:23.000Z", - schedule: "In 1 minute", - status: "active", - task: "Wash hands reminder: Remind David to wash his hands.", - }, - }); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toMatchObject([ - { - conversationAccess: { - audience: "direct", - visibility: "private", - }, - credentialSubject: { - type: "user", - userId: "U123", - allowedWhen: "private-direct-conversation", - }, - destination: { channelId: "D123" }, - nextRunAtMs: Date.parse("2026-05-27T00:25:23.000Z"), - status: "active", - }, - ]); - }); - - it("creates short imperative one-off reminders without channel confirmation", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-05-27T00:24:23.000Z")); - - const result = await executeTool( - createSlackScheduleCreateTaskTool( - createContext({ - userText: "drink water in 1 minute in this conversation", - }), - ), - { - task: "Drink water reminder: Remind David to drink water.", - schedule: "In 1 minute", - next_run_at: "2026-05-27T00:25:23.000Z", - }, - ); - - expect(result).toMatchObject({ - ok: true, - task: { - next_run_at: "2026-05-27T00:25:23.000Z", - schedule: "In 1 minute", - status: "active", - task: "Drink water reminder: Remind David to drink water.", - }, - }); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toMatchObject([ - { - destination: { channelId: "C123" }, - nextRunAtMs: Date.parse("2026-05-27T00:25:23.000Z"), - status: "active", - }, - ]); - }); - - it("creates one-off reminders by omitting recurrence", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-05-28T02:17:48.005Z")); - - const result = await executeTool( - createSlackScheduleCreateTaskTool( - createContext({ - userText: "remind greg to drink water in 1m", - }), - ), - { - task: "Remind Greg to drink water.", - schedule: "In 1 minute", - next_run_at: "2026-05-28T02:18:48.005Z", - }, - ); - - expect(result).toMatchObject({ - ok: true, - task: { - next_run_at: "2026-05-28T02:18:48.005Z", - recurrence: null, - schedule: "In 1 minute", - status: "active", - task: "Remind Greg to drink water.", - }, - }); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toMatchObject([ - { - nextRunAtMs: Date.parse("2026-05-28T02:18:48.005Z"), - schedule: { - kind: "one_off", - recurrence: undefined, - }, - status: "active", - }, - ]); - }); - - it("rejects parseable non-ISO next run timestamps", async () => { - await expect( - createTask(createContext(), { - next_run_at: "05/25/2026 09:00", - }), - ).rejects.toThrow("Provide next_run_at as a valid ISO timestamp."); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("rejects missing next run timestamps with a tool error", async () => { - await expect( - createTask(createContext(), { - next_run_at: undefined, - }), - ).rejects.toThrow("Provide next_run_at as a valid ISO timestamp."); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("rejects recurring schedules that can run more than once per day", async () => { - await expect( - createTask(createContext(), { - schedule: "Every hour", - recurrence: "hourly", - }), - ).rejects.toThrow( - "Recurring scheduled tasks can run at most once per day.", - ); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("edits and deletes a task from the same Slack destination", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - const taskId = created.task.id; - - const updated = await executeTool( - createSlackScheduleUpdateTaskTool(context), - { - task_id: taskId, - task: "Daily scheduler digest: Summarize open scheduler issues.", - schedule: "Every day at 9am", - recurrence: "daily", - }, - ); - expect(updated).toMatchObject({ - ok: true, - task: { - id: taskId, - task: "Daily scheduler digest: Summarize open scheduler issues.", - schedule: "Every day at 9am", - version: 2, - }, - }); - - const deleted = await executeTool( - createSlackScheduleDeleteTaskTool(context), - { - task_id: taskId, - }, - ); - expect(deleted).toMatchObject({ - ok: true, - task: { - id: taskId, - status: "deleted", - }, - }); - - const listed = await executeTool( - createSlackScheduleListTasksTool(context), - {}, - ); - expect(listed).toMatchObject({ ok: true, tasks: [] }); - }); - - it("rejects edits that make a recurring task run more than once per day", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - - await expect( - executeTool(createSlackScheduleUpdateTaskTool(context), { - task_id: created.task.id, - schedule: "Every hour", - recurrence: "hourly", - }), - ).rejects.toThrow( - "Recurring scheduled tasks can run at most once per day.", - ); - await expect( - schedulerStore().getTask(created.task.id), - ).resolves.toMatchObject({ - schedule: { - description: "Every Monday at 9am", - }, - version: 1, - }); - }); - - it("converts recurring tasks to one-off tasks with recurrence null", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - - const updated = await executeTool( - createSlackScheduleUpdateTaskTool(context), - { - task_id: created.task.id, - schedule: "On June 1 at 9am", - next_run_at: "2026-06-01T16:00:00.000Z", - recurrence: null, - }, - ); - - expect(updated).toMatchObject({ - ok: true, - task: { - id: created.task.id, - next_run_at: "2026-06-01T16:00:00.000Z", - recurrence: null, - schedule: "On June 1 at 9am", - }, - }); - await expect( - schedulerStore().getTask(created.task.id), - ).resolves.toMatchObject({ - schedule: { - kind: "one_off", - recurrence: undefined, - }, - }); - }); - - it("rejects edits from another active Slack conversation", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - - await expect( - executeTool( - createSlackScheduleUpdateTaskTool(createContext({ channelId: "C999" })), - { - task_id: created.task.id, - task: "Wrong channel edit.", - }, - ), - ).rejects.toThrow( - "Scheduled task can only be managed from the Slack destination where it was created.", - ); - }); - - it("binds tasks to the raw conversation channel, not the assistant context channel", async () => { - // The scheduler receives an active Source built from the raw conversation - // channel by runtime wiring. Management works from any context with the - // same source conversation. - // - // In practice: a DM opened via Slack’s “Ask Junior” panel from #js-alerts - // has getAgentPluginTools build source.channelId = DDM rather than using - // the outbound assistant-context channel. Both creation and management - // from that DM use DDM, so the stored task destination never drifts. - const dmCtx = createContext({ channelId: "DDM" }); - const created = (await createTask(dmCtx)) as { task: { id: string } }; - const taskId = created.task.id; - - // Task is bound to the DM channel, not any assistant source channel. - await expect(schedulerStore().getTask(taskId)).resolves.toMatchObject({ - destination: { channelId: "DDM" }, - }); - - // Any context that resolves to the same DM channel can list and manage. - const listed = await executeTool( - createSlackScheduleListTasksTool(createContext({ channelId: "DDM" })), - {}, - ); - expect(listed).toMatchObject({ - ok: true, - tasks: [{ id: taskId }], - }); - - const deleted = await executeTool( - createSlackScheduleDeleteTaskTool(createContext({ channelId: "DDM" })), - { task_id: taskId }, - ); - expect(deleted).toMatchObject({ - ok: true, - task: { id: taskId, status: "deleted" }, - }); - }); - - it("rejects management from a different conversation channel", async () => { - // A task created in Alice’s DM cannot be managed from Bob’s DM. - const created = (await createTask( - createContext({ channelId: "DALICE" }), - )) as { task: { id: string } }; - - await expect( - executeTool( - createSlackScheduleDeleteTaskTool(createContext({ channelId: "DBOB" })), - { task_id: created.task.id }, - ), - ).rejects.toThrow( - "Scheduled task can only be managed from the Slack destination where it was created.", - ); - }); - - it("allows another requester to manage tasks in the same Slack destination", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - const otherRequester = createContext({ - requester: { - platform: "slack", - teamId: TEST_TEAM_ID, - userId: "U999", - userName: "alice", - fullName: "Alice Reviewer", - }, - }); - - const updated = await executeTool( - createSlackScheduleUpdateTaskTool(otherRequester), - { - task_id: created.task.id, - task: "Team-owned digest: Summarize open scheduler issues.", - }, - ); - const deleted = await executeTool( - createSlackScheduleDeleteTaskTool(otherRequester), - { - task_id: created.task.id, - }, - ); - - expect(updated).toMatchObject({ - ok: true, - task: { - id: created.task.id, - task: "Team-owned digest: Summarize open scheduler issues.", - version: 2, - }, - }); - expect(deleted).toMatchObject({ - ok: true, - task: { - id: created.task.id, - status: "deleted", - }, - }); - await expect( - schedulerStore().getTask(created.task.id), - ).resolves.toMatchObject({ - status: "deleted", - executionActor: { - type: "system", - id: "scheduled-task", - }, - task: { - text: "Team-owned digest: Summarize open scheduler issues.", - }, - version: 3, - }); - }); - - it("does not delegate user credentials in private group conversations", async () => { - const result = await createTask(createContext({ channelId: "G123" })); - - expect(result).toMatchObject({ - ok: true, - task: { - conversation_access: { - audience: "group", - visibility: "private", - }, - credential_subject: null, - }, - }); - const tasks = await schedulerStore().listTasksForTeam(TEST_TEAM_ID); - expect(tasks).toMatchObject([ - { - conversationAccess: { - audience: "group", - visibility: "private", - }, - destination: { channelId: "G123" }, - }, - ]); - expect(tasks[0]?.credentialSubject).toBeUndefined(); - }); - - it("rejects non-canonical Slack sources before storing tasks", async () => { - const context = createContext({ channelId: "D123" }); - await expect( - createTask( - { - ...context, - source: { - platform: "slack", - teamId: TEST_TEAM_ID, - channelId: "slack:D123:1700000000.000", - }, - }, - { - schedule: "In 1 minute", - next_run_at: "2026-05-27T00:25:23.000Z", - recurrence: undefined, - }, - ), - ).rejects.toThrow("Active Slack conversation channel is invalid."); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("stores canonical Slack destinations directly", async () => { - const result = await createTask(createContext({ channelId: "D123" }), { - schedule: "In 1 minute", - next_run_at: "2026-05-27T00:25:23.000Z", - recurrence: undefined, - }); - - expect(result).toMatchObject({ - ok: true, - task: { - conversation_access: { - audience: "direct", - visibility: "private", - }, - }, - }); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toMatchObject([ - { - destination: { channelId: "D123" }, - }, - ]); - }); - - it("creates one-off tasks with an exact timestamp using the default Pacific timezone", async () => { - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-05-25T12:00:00.000Z")); - - const created = await createTask(createContext(), { - schedule: "On May 26 at 9am", - next_run_at: "2026-05-26T16:00:00.000Z", - recurrence: undefined, - timezone: undefined, - }); - - expect(created).toMatchObject({ - ok: true, - task: { - next_run_at: "2026-05-26T16:00:00.000Z", - recurrence: null, - timezone: "America/Los_Angeles", - }, - }); - }); - - it("uses JUNIOR_TIMEZONE as the default schedule timezone", async () => { - process.env.JUNIOR_TIMEZONE = "America/New_York"; - vi.useFakeTimers(); - vi.setSystemTime(new Date("2026-05-25T12:00:00.000Z")); - - const created = await createTask(createContext(), { - schedule: "On May 26 at 9am", - next_run_at: "2026-05-26T13:00:00.000Z", - recurrence: undefined, - timezone: undefined, - }); - - expect(created).toMatchObject({ - ok: true, - task: { - next_run_at: "2026-05-26T13:00:00.000Z", - recurrence: null, - timezone: "America/New_York", - }, - }); - }); - - it("rejects invalid default timezones", async () => { - process.env.JUNIOR_TIMEZONE = "not/a-zone"; - - await expect( - createTask(createContext(), { - timezone: undefined, - }), - ).rejects.toThrow("timezone must be a valid IANA time zone."); - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("preserves a recurring task calendar anchor on content-only edits", async () => { - const context = createContext(); - const created = (await createTask(context, { - recurrence: "weekly", - })) as { - task: { id: string }; - }; - const store = schedulerStore(); - const task = await store.getTask(created.task.id); - expect(task?.schedule.recurrence).toMatchObject({ - interval: 1, - startDate: "2026-05-25", - }); - await store.saveTask({ - ...task!, - nextRunAtMs: Date.parse("2026-06-08T16:00:00.000Z"), - updatedAtMs: Date.parse("2026-05-26T16:00:00.000Z"), - version: task!.version + 1, - }); - - const updated = await executeTool( - createSlackScheduleUpdateTaskTool(context), - { - task_id: created.task.id, - task: "Renamed issue digest: Summarize open scheduler issues.", - }, - ); - - expect(updated).toMatchObject({ - ok: true, - task: { - task: "Renamed issue digest: Summarize open scheduler issues.", - }, - }); - await expect(store.getTask(created.task.id)).resolves.toMatchObject({ - nextRunAtMs: Date.parse("2026-06-08T16:00:00.000Z"), - schedule: { - recurrence: { - interval: 1, - startDate: "2026-05-25", - }, - }, - }); - }); - - it("clears stale block reasons when resuming a task", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - const store = schedulerStore(); - const task = await store.getTask(created.task.id); - expect(task).toBeDefined(); - await store.saveTask({ - ...task!, - status: "blocked", - statusReason: "Missing GitHub credentials.", - updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), - version: task!.version + 1, - }); - - const updated = await executeTool( - createSlackScheduleUpdateTaskTool(context), - { - task_id: created.task.id, - status: "active", - }, - ); - - expect(updated).toMatchObject({ - ok: true, - task: { - id: created.task.id, - status: "active", - }, - }); - const resumed = await store.getTask(created.task.id); - expect(resumed).toMatchObject({ - status: "active", - }); - expect(resumed?.statusReason).toBeUndefined(); - }); - - it("marks an active task due immediately without changing its scheduled next run", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - const store = schedulerStore(); - const task = await store.getTask(created.task.id); - expect(task).toBeDefined(); - const scheduledNextRunAtMs = Date.parse("2026-06-01T16:00:00.000Z"); - await store.saveTask({ - ...task!, - nextRunAtMs: scheduledNextRunAtMs, - updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), - version: task!.version + 1, - }); - - const beforeMs = Date.now(); - const result = await executeTool( - createSlackScheduleRunTaskNowTool(context), - { - task_id: created.task.id, - }, - ); - const afterMs = Date.now(); - - expect(result).toMatchObject({ - ok: true, - task: { - id: created.task.id, - status: "active", - next_run_at: "2026-06-01T16:00:00.000Z", - }, - }); - const due = await store.getTask(created.task.id); - expect(due).toMatchObject({ - status: "active", - nextRunAtMs: scheduledNextRunAtMs, - destination: { - teamId: context.source?.teamId, - channelId: context.source?.channelId, - }, - createdBy: { - slackUserId: context.requester?.userId, - }, - }); - expect(due?.statusReason).toBeUndefined(); - expect(due?.runNowAtMs).toBeGreaterThanOrEqual(beforeMs); - expect(due?.runNowAtMs).toBeLessThanOrEqual(afterMs); - - await expect(store.claimDueRun({ nowMs: afterMs })).resolves.toMatchObject({ - taskId: created.task.id, - scheduledForMs: due?.runNowAtMs, - status: "pending", - }); - }); - - it("does not run-now a paused task without an explicit resume", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - const store = schedulerStore(); - const task = await store.getTask(created.task.id); - expect(task).toBeDefined(); - await store.saveTask({ - ...task!, - status: "paused", - statusReason: "Paused by user.", - updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), - version: task!.version + 1, - }); - - await expect( - executeTool(createSlackScheduleRunTaskNowTool(context), { - task_id: created.task.id, - }), - ).rejects.toThrow( - "Scheduled task must be active before it can be run now. Resume the task first if you want it to run.", - ); - const paused = await store.getTask(created.task.id); - expect(paused).toMatchObject({ - status: "paused", - statusReason: "Paused by user.", - }); - expect(paused?.runNowAtMs).toBeUndefined(); - }); - - it("removes deleted tasks from scheduler listings", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - - await executeTool(createSlackScheduleDeleteTaskTool(context), { - task_id: created.task.id, - }); - - await expect( - schedulerStore().listTasksForTeam(TEST_TEAM_ID), - ).resolves.toEqual([]); - }); - - it("claims due runs idempotently", async () => { - const context = createContext(); - const created = (await createTask(context)) as { - task: { id: string }; - }; - const store = schedulerStore(); - const task = await store.getTask(created.task.id); - expect(task).toBeDefined(); - await store.saveTask({ - ...task!, - nextRunAtMs: 1000, - updatedAtMs: 1000, - }); - - const first = await store.claimDueRun({ nowMs: 2000 }); - const second = await store.claimDueRun({ nowMs: 2000 }); - - expect(first).toMatchObject({ - taskId: created.task.id, - scheduledForMs: 1000, - status: "pending", - }); - expect(second).toBeUndefined(); - }); -}); - -describe("Slack schedule tool wiring via getAgentPluginTools", () => { - // These tests exercise the real agent-hooks.ts path where the runtime-owned - // Destination is passed through to the scheduler plugin. - - beforeEach(async () => { - await disconnectStateAdapter(); - }); - - afterEach(async () => { - await disconnectStateAdapter(); - }); - - it("scheduler tools bind to the runtime-owned source", async () => { - // Verifies that real getAgentPluginTools wiring passes Source through to - // the scheduler, which stores it as the task destination. - const previous = setAgentPlugins([schedulerPlugin()]); - try { - const TEAM_ID = `TWIRING${Date.now()}`; - const tools = getAgentPluginTools({ - source: { - platform: "slack", - teamId: TEAM_ID, - channelId: "DDM", - }, - destination: { - platform: "slack", - teamId: TEAM_ID, - channelId: "DDM", - }, - requester: { - platform: "slack", - teamId: TEAM_ID, - userId: "U123", - userName: "alice", - fullName: "Alice", - }, - sandbox: {} as Parameters[0]["sandbox"], - }); - - expect(tools).toHaveProperty("slackScheduleCreateTask"); - - // Create a task through the real wired tool. - const result = await executeTool(tools.slackScheduleCreateTask, { - task: "Wiring test: post a weekly digest.", - schedule: "Every Monday at 9am", - timezone: "America/Los_Angeles", - next_run_at: "2026-06-09T16:00:00.000Z", - recurrence: "weekly", - }); - - expect(result).toMatchObject({ ok: true }); - const taskId = (result as { task: { id: string } }).task.id; - - // Task destination must be the raw DM channel, NOT the assistant context. - const stored = await createSchedulerStore( - createPluginState("scheduler"), - ).getTask(taskId); - expect(stored).toMatchObject({ - destination: { channelId: "DDM", teamId: TEAM_ID }, - conversationAccess: { audience: "direct", visibility: "private" }, - }); - // DM-based task gets a credential subject (private-direct exception). - expect(stored?.credentialSubject).toMatchObject({ - type: "user", - userId: "U123", - allowedWhen: "private-direct-conversation", - }); - } finally { - setAgentPlugins(previous); - } - }); -}); - -describe("Slack schedule tool execution modes", () => { - it("all write tools have executionMode sequential", () => { - const context = createContext(); - - const createTool = createSlackScheduleCreateTaskTool(context); - const listTool = createSlackScheduleListTasksTool(context); - const updateTool = createSlackScheduleUpdateTaskTool(context); - const deleteTool = createSlackScheduleDeleteTaskTool(context); - const runNowTool = createSlackScheduleRunTaskNowTool(context); - - // Write tools must force sequential execution so a same-turn - // slackScheduleListTasks call cannot race ahead of a preceding - // slackScheduleCreateTask / update / delete write. - expect(createTool.executionMode).toBe("sequential"); - expect(updateTool.executionMode).toBe("sequential"); - expect(deleteTool.executionMode).toBe("sequential"); - expect(runNowTool.executionMode).toBe("sequential"); - - // List is read-only; it inherits the sequential batch gate from any - // write tool it shares a turn with (pi-agent-core makes the whole - // batch sequential when any tool in it is sequential). - expect(listTool.executionMode).not.toBe("sequential"); - }); -}); diff --git a/packages/junior/tests/integration/slack/app-home-webhook.test.ts b/packages/junior/tests/integration/slack/app-home-webhook-behavior.test.ts similarity index 98% rename from packages/junior/tests/integration/slack/app-home-webhook.test.ts rename to packages/junior/tests/integration/slack/app-home-webhook-behavior.test.ts index ff15ee392..913b84e5a 100644 --- a/packages/junior/tests/integration/slack/app-home-webhook.test.ts +++ b/packages/junior/tests/integration/slack/app-home-webhook-behavior.test.ts @@ -14,7 +14,7 @@ import { createNoopSlackWebhookRuntime, deferred, } from "../../fixtures/conversation-work"; -import { slackApiOutbox } from "../../fixtures/slack-api-outbox"; +import { slackApiOutbox } from "../../fixtures/slack/api-outbox"; import { createSlackWebhookTestClient } from "../../fixtures/slack/webhook-client"; const SIGNING_SECRET = "test-signing-secret"; @@ -58,7 +58,7 @@ function createTokenStore( }; } -describe("Slack webhook: App Home events", () => { +describe("Slack behavior: App Home webhook", () => { beforeEach(() => { process.env = { ...ORIGINAL_ENV, diff --git a/packages/junior/tests/integration/slack/assistant-context-canvas-routing.test.ts b/packages/junior/tests/integration/slack/assistant-context-canvas-routing.test.ts index 49f4fc307..0f680188a 100644 --- a/packages/junior/tests/integration/slack/assistant-context-canvas-routing.test.ts +++ b/packages/junior/tests/integration/slack/assistant-context-canvas-routing.test.ts @@ -10,7 +10,7 @@ import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; +} from "../../fixtures/slack/harness"; import { getCapturedSlackApiCalls, queueSlackApiResponse, @@ -39,27 +39,25 @@ describe("Slack behavior: assistant context canvas routing", () => { }); const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - await createCanvas({ - title: "Shared update", - markdown: "Context-aware update", - channelId: context?.toolChannelId, - }); - return { - text: "Shared canvas created.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + adapters: { + generateAssistantReply: async (_prompt, context) => { + await createCanvas({ + title: "Shared update", + markdown: "Context-aware update", + channelId: context?.toolChannelId, + }); + return { + text: "Shared canvas created.", + diagnostics: { + assistantMessageCount: 1, + modelId: "fake-agent-model", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + }, + }; }, }, }); diff --git a/packages/junior/tests/integration/slack/assistant-context-channel-behavior.test.ts b/packages/junior/tests/integration/slack/assistant-context-channel-behavior.test.ts index 21e6dee84..00bbbee81 100644 --- a/packages/junior/tests/integration/slack/assistant-context-channel-behavior.test.ts +++ b/packages/junior/tests/integration/slack/assistant-context-channel-behavior.test.ts @@ -4,30 +4,28 @@ import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; +} from "../../fixtures/slack/harness"; describe("Slack behavior: assistant context channel routing", () => { it("prefers assistantContextChannelId over DM channel for tool execution context", async () => { const capturedToolChannelIds: Array = []; const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - capturedToolChannelIds.push(context?.toolChannelId); - return { - text: "Canvas draft prepared.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + adapters: { + generateAssistantReply: async (_prompt, context) => { + capturedToolChannelIds.push(context?.toolChannelId); + return { + text: "Canvas draft prepared.", + diagnostics: { + assistantMessageCount: 1, + modelId: "fake-agent-model", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + }, + }; }, }, }); diff --git a/packages/junior/tests/integration/slack/assistant-lifecycle-behavior.test.ts b/packages/junior/tests/integration/slack/assistant-lifecycle-behavior.test.ts index 98f050a93..3f1ebd956 100644 --- a/packages/junior/tests/integration/slack/assistant-lifecycle-behavior.test.ts +++ b/packages/junior/tests/integration/slack/assistant-lifecycle-behavior.test.ts @@ -6,7 +6,7 @@ import { import { coerceThreadArtifactsState } from "@/chat/state/artifacts"; import { disconnectStateAdapter } from "@/chat/state/adapter"; import { createTestChatRuntime } from "../../fixtures/chat-runtime"; -import { FakeSlackAdapter } from "../../fixtures/slack-harness"; +import { FakeSlackAdapter } from "../../fixtures/slack/harness"; describe("Slack behavior: assistant lifecycle", () => { afterEach(async () => { diff --git a/packages/junior/tests/integration/slack/assistant-status-behavior.test.ts b/packages/junior/tests/integration/slack/assistant-status-behavior.test.ts new file mode 100644 index 000000000..a7f2a37d2 --- /dev/null +++ b/packages/junior/tests/integration/slack/assistant-status-behavior.test.ts @@ -0,0 +1,262 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import type { JuniorRuntimeScenarioAdapters } from "@/chat/app/services"; +import { makeAssistantStatus } from "@/chat/slack/assistant-thread/status"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { + FakeSlackAdapter, + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; + +const emptyThreadReplies = async () => []; + +function createRuntime( + args: { + adapters?: JuniorRuntimeScenarioAdapters; + slackAdapter?: FakeSlackAdapter; + } = {}, +) { + const adapters = args.adapters ?? {}; + return createTestChatRuntime({ + slackAdapter: args.slackAdapter, + adapters: { + listThreadReplies: emptyThreadReplies, + ...adapters, + }, + }); +} + +describe("Slack behavior: assistant status", () => { + afterEach(async () => { + vi.restoreAllMocks(); + await disconnectStateAdapter(); + }); + + it("clears assistant status after successful reply", async () => { + const slackAdapter = new FakeSlackAdapter(); + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateAssistantReply: async (_prompt, context) => { + await context?.onStatus?.(makeAssistantStatus("running", "bash")); + return successfulAssistantReply("Done.", { + diagnostics: { + toolCalls: ["bash"], + toolResultCount: 1, + }, + }); + }, + }, + }); + + const thread = createTestThread({ + id: "slack:C_STATUS:1700002000.000", + }); + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "m-status-clear", + text: "<@U_APP> run a command", + isMention: true, + threadId: thread.id, + }), + ); + + expect(slackAdapter.statusCalls.length).toBeGreaterThan(0); + expect(slackAdapter.statusCalls.at(-1)).toEqual({ + channelId: "C_STATUS", + threadTs: "1700002000.000", + text: "", + loadingMessages: undefined, + }); + }); + + it("deletes redundant reply and clears status for reaction-only turn", async () => { + const slackAdapter = new FakeSlackAdapter(); + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateAssistantReply: async (_prompt, context) => { + await context?.onStatus?.(makeAssistantStatus("drafting", "reply")); + return successfulAssistantReply("Done!", { + deliveryMode: "thread", + diagnostics: { + toolCalls: ["slackMessageAddReaction"], + toolResultCount: 1, + }, + }); + }, + }, + }); + + const thread = createTestThread({ + id: "slack:C_STATUS:1700004000.000", + }); + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "m-reaction-only", + text: "<@U_APP> add a reaction to this message", + isMention: true, + threadId: thread.id, + }), + ); + + expect(thread.posts).toHaveLength(0); + expect(slackAdapter.statusCalls.length).toBeGreaterThan(0); + expect(slackAdapter.statusCalls.at(-1)).toEqual({ + channelId: "C_STATUS", + threadTs: "1700004000.000", + text: "", + loadingMessages: undefined, + }); + }); + + it("clears assistant status after agent error", async () => { + const slackAdapter = new FakeSlackAdapter(); + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateAssistantReply: async () => { + throw new Error("model exploded"); + }, + }, + }); + + const thread = createTestThread({ + id: "slack:C_STATUS:1700003000.000", + }); + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "m-status-error", + text: "<@U_APP> do something", + isMention: true, + threadId: thread.id, + }), + ); + + expect(slackAdapter.statusCalls.length).toBeGreaterThan(0); + expect(slackAdapter.statusCalls.at(-1)).toEqual({ + channelId: "C_STATUS", + threadTs: "1700003000.000", + text: "", + loadingMessages: undefined, + }); + }); + + it("emits assistant status updates in shared channel threads", async () => { + const slackAdapter = new FakeSlackAdapter(); + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateAssistantReply: async (_prompt, context) => { + await context?.onStatus?.( + makeAssistantStatus("reading", "channel messages"), + ); + return successfulAssistantReply("Done."); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_STATUS:1700000000.000" }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-status", + threadId: thread.id, + text: "show the channel", + isMention: true, + }), + ); + + expect(slackAdapter.statusCalls.length).toBeGreaterThan(0); + expect(slackAdapter.statusCalls[0]).toEqual( + expect.objectContaining({ + channelId: "C_STATUS", + threadTs: "1700000000.000", + }), + ); + expect(slackAdapter.statusCalls.at(-1)).toEqual({ + channelId: "C_STATUS", + threadTs: "1700000000.000", + text: "", + loadingMessages: undefined, + }); + }); + + it("posts the final reply even while the initial assistant status write is pending", async () => { + const slackAdapter = new FakeSlackAdapter(); + let releaseFirstStatus: (() => void) | undefined; + let statusCallCount = 0; + slackAdapter.setAssistantStatus = async ( + channelId, + threadTs, + text, + loadingMessages, + ) => { + statusCallCount += 1; + if (statusCallCount === 1) { + await new Promise((resolve) => { + releaseFirstStatus = resolve; + }); + } + slackAdapter.statusCalls.push({ + channelId, + threadTs, + text, + loadingMessages, + }); + }; + + let replyStarted = false; + const thread = createTestThread({ + id: "slack:D_STATUSORDER:1700000001.000", + }); + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateThreadTitleText: async () => + ({ text: "Status thread" }) as never, + generateAssistantReply: async () => { + replyStarted = true; + return successfulAssistantReply( + "Reply lands after the pending status is drained.", + ); + }, + }, + }); + + let settled = false; + const turnPromise = slackRuntime + .handleNewMention( + thread, + createTestMessage({ + id: "msg-status-order", + threadId: thread.id, + text: "answer quickly", + isMention: true, + }), + ) + .then(() => { + settled = true; + }); + + await vi.waitFor(() => { + expect(replyStarted).toBe(true); + expect(thread.posts).toEqual([ + expect.objectContaining({ + markdown: "Reply lands after the pending status is drained.", + }), + ]); + }); + + expect(settled).toBe(false); + + releaseFirstStatus!(); + await turnPromise; + }); +}); diff --git a/packages/junior/tests/integration/slack/assistant-thread-contract.test.ts b/packages/junior/tests/integration/slack/assistant-thread-contract.test.ts index 83fbf5e7d..0e4a9ce14 100644 --- a/packages/junior/tests/integration/slack/assistant-thread-contract.test.ts +++ b/packages/junior/tests/integration/slack/assistant-thread-contract.test.ts @@ -1,11 +1,12 @@ -import { createTestDestination } from "../../fixtures/slack-harness"; +import { createTestDestination } from "../../fixtures/slack/harness"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { createMemoryState } from "@chat-adapter/state-memory"; import type { SlackAdapter } from "@chat-adapter/slack"; import { slackEventsApiEnvelope } from "../../fixtures/slack/factories/events"; import { resetSlackApiMockState } from "../../msw/handlers/slack-api"; -import { slackApiOutbox } from "../../fixtures/slack-api-outbox"; +import { slackApiOutbox } from "../../fixtures/slack/api-outbox"; import { createSlackWebhookTestClient } from "../../fixtures/slack/webhook-client"; +import { piAssistantMessage } from "../../fixtures/pi-stream"; import { createSlackRuntime } from "@/chat/app/factory"; import { JuniorChat } from "@/chat/ingress/junior-chat"; import { makeAssistantStatus } from "@/chat/slack/assistant-thread/status"; @@ -66,8 +67,17 @@ function makeDiagnostics() { }; } +function completeTextResult( + text: string, +): Awaited> { + return { + text, + message: piAssistantMessage([{ type: "text", text }]), + }; +} + async function createDirectMessageBot(args: { - completeText?: ConversationMemoryDeps["completeText"]; + generateThreadTitleText?: ConversationMemoryDeps["completeText"]; generateAssistantReply: ReplyExecutorServices["generateAssistantReply"]; }) { const bot = new JuniorChat<{ slack: SlackAdapter }>({ @@ -83,18 +93,14 @@ async function createDirectMessageBot(args: { }); const slackRuntime = createSlackRuntime({ getSlackAdapter: () => bot.getAdapter("slack"), - services: { - ...(args.completeText + adapters: { + ...(args.generateThreadTitleText ? { - conversationMemory: { - completeText: - args.completeText as ConversationMemoryDeps["completeText"], - }, + generateThreadTitleText: + args.generateThreadTitleText as ConversationMemoryDeps["completeText"], } : {}), - replyExecutor: { - generateAssistantReply: args.generateAssistantReply, - }, + generateAssistantReply: args.generateAssistantReply, }, }); @@ -123,10 +129,8 @@ async function createMentionBot(args: { }); const slackRuntime = createSlackRuntime({ getSlackAdapter: () => bot.getAdapter("slack"), - services: { - replyExecutor: { - generateAssistantReply: args.generateAssistantReply, - }, + adapters: { + generateAssistantReply: args.generateAssistantReply, }, }); @@ -259,55 +263,17 @@ describe("Slack contract: assistant-thread delivery", () => { ); }); - it("posts assistant titles with a raw DM channel id when thread_ts is present", async () => { - const bot = await createDirectMessageBot({ - completeText: async () => - ({ - text: "Debugging Node.js Memory Leaks", - message: { role: "assistant", content: "" }, - }) as any, - generateAssistantReply: async () => ({ - text: "Here is how to debug memory leaks.", - diagnostics: makeDiagnostics(), - }), - }); - const waitUntil = slackWebhookClient.waitUntil(); - - const response = await handlePlatformWebhook( - createDirectMessageRequest("How do I debug memory leaks in Node?", { - threadTs: DM_THREAD_TS, - }), - "slack", - waitUntil.fn, - bot, - ); - - expect(response.status).toBe(200); - await waitUntil.flush(); - - expect(slackApiOutbox.calls("assistant.threads.setTitle")).toEqual([ - expect.objectContaining({ - params: expect.objectContaining({ - channel_id: DM_CHANNEL_ID, - thread_ts: DM_THREAD_TS, - title: "Debugging Node.js Memory Leaks", - }), - }), - ]); - }); - - it("lets the awaited webhook turn task finish before slow title generation", async () => { - let resolveTitle: (() => void) | undefined; + it("sets the assistant title after the webhook turn posts its reply", async () => { const bot = await createDirectMessageBot({ - completeText: async () => - await new Promise((resolve) => { - resolveTitle = () => { - resolve({ - text: "Debugging Node.js Memory Leaks", - message: { role: "assistant", content: "" }, - } as any); - }; - }), + generateThreadTitleText: async () => + await new Promise< + Awaited> + >((resolve) => + setTimeout( + () => resolve(completeTextResult("Debugging Node.js Memory Leaks")), + 10, + ), + ), generateAssistantReply: async () => ({ text: "Here is how to debug memory leaks.", diagnostics: makeDiagnostics(), @@ -326,9 +292,6 @@ describe("Slack contract: assistant-thread delivery", () => { expect(response.status).toBe(200); await waitUntil.flush(); - expect(slackApiOutbox.calls("assistant.threads.setTitle")).toEqual([]); - - resolveTitle!(); await vi.waitFor(() => { expect(slackApiOutbox.calls("assistant.threads.setTitle")).toEqual([ expect.objectContaining({ @@ -344,11 +307,8 @@ describe("Slack contract: assistant-thread delivery", () => { it("does not post assistant titles when the DM message omits thread_ts", async () => { const bot = await createDirectMessageBot({ - completeText: async () => - ({ - text: "Debugging Node.js Memory Leaks", - message: { role: "assistant", content: "" }, - }) as any, + generateThreadTitleText: async () => + completeTextResult("Debugging Node.js Memory Leaks"), generateAssistantReply: async () => ({ text: "Here is how to debug memory leaks.", diagnostics: makeDiagnostics(), diff --git a/packages/junior/tests/integration/slack/attachment-behavior.test.ts b/packages/junior/tests/integration/slack/attachment-behavior.test.ts index 6440c87b9..1fd4625ab 100644 --- a/packages/junior/tests/integration/slack/attachment-behavior.test.ts +++ b/packages/junior/tests/integration/slack/attachment-behavior.test.ts @@ -1,48 +1,20 @@ import { afterEach, describe, expect, it, vi } from "vitest"; import type { Message } from "chat"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { + createSlackImageRuntime, + resetSlackImageRuntimeEnv, +} from "../../fixtures/slack/image-runtime"; +import { toPostedText } from "../../fixtures/slack/posts"; import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; - -const ORIGINAL_ENV = { ...process.env }; - -async function createRuntime( - args: Parameters< - typeof import("../../fixtures/chat-runtime").createTestChatRuntime - >[0], -) { - process.env = { - ...ORIGINAL_ENV, - AI_VISION_MODEL: "openai/gpt-5.4", - SLACK_BOT_TOKEN: "", - SLACK_BOT_USER_TOKEN: "", - }; - vi.resetModules(); - const { createTestChatRuntime } = await import("../../fixtures/chat-runtime"); - return createTestChatRuntime(args); -} - -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - - return String(value); -} +} from "../../fixtures/slack/harness"; describe("Slack behavior: attachment handling", () => { afterEach(() => { - process.env = { ...ORIGINAL_ENV }; - vi.resetModules(); + resetSlackImageRuntimeEnv(); }); it("rehydrates attachment data and forwards it to the agent context", async () => { @@ -54,12 +26,10 @@ describe("Slack behavior: attachment handling", () => { const capturedAttachmentCounts: number[] = []; const capturedAttachmentMediaTypes: string[] = []; - const { slackRuntime } = await createRuntime({ - services: { - visionContext: { - completeText: completeTextMock, - }, - replyExecutor: { + const { slackRuntime } = await createSlackImageRuntime( + { + adapters: { + describeImagesText: completeTextMock, generateAssistantReply: async (_prompt, context) => { const attachments = context?.userAttachments ?? []; capturedAttachmentCounts.push(attachments.length); @@ -67,22 +37,16 @@ describe("Slack behavior: attachment handling", () => { capturedAttachmentMediaTypes.push(attachments[0].mediaType); } - return { - text: "Image received. The chart trend is upward.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; + return successfulAssistantReply( + "Image received. The chart trend is upward.", + ); }, }, }, - }); + { + AI_VISION_MODEL: "openai/gpt-5.4", + }, + ); const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700004000.000" }); const message = createTestMessage({ @@ -112,36 +76,28 @@ describe("Slack behavior: attachment handling", () => { expect(capturedAttachmentMediaTypes).toEqual(["image/png"]); expect(thread.posts).toHaveLength(1); expect(toPostedText(thread.posts[0])).toContain("chart trend is upward"); - }, 10_000); + }); it("posts a fallback error reply when required image analysis fails", async () => { const attachmentFetch = vi.fn(async () => Buffer.from("image-bytes")); const completeTextMock = vi.fn(async () => { throw new Error("vision unavailable"); }); - const generateAssistantReply = vi.fn(async () => ({ - text: "should not post", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - })); + const generateAssistantReply = vi.fn(async () => + successfulAssistantReply("should not post"), + ); - const { slackRuntime } = await createRuntime({ - services: { - visionContext: { - completeText: completeTextMock, - }, - replyExecutor: { + const { slackRuntime } = await createSlackImageRuntime( + { + adapters: { + describeImagesText: completeTextMock, generateAssistantReply, }, }, - }); + { + AI_VISION_MODEL: "openai/gpt-5.4", + }, + ); const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700004001.000" }); const message = createTestMessage({ diff --git a/packages/junior/tests/integration/slack/attachment-media-behavior.test.ts b/packages/junior/tests/integration/slack/attachment-media-behavior.test.ts index 14d9f6efc..55da5e3fb 100644 --- a/packages/junior/tests/integration/slack/attachment-media-behavior.test.ts +++ b/packages/junior/tests/integration/slack/attachment-media-behavior.test.ts @@ -1,50 +1,20 @@ import { afterEach, describe, expect, it, vi } from "vitest"; import type { Message } from "chat"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { + createSlackImageRuntime, + resetSlackImageRuntimeEnv, +} from "../../fixtures/slack/image-runtime"; +import { toPostedText } from "../../fixtures/slack/posts"; import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; - -const ORIGINAL_ENV = { ...process.env }; - -async function createRuntime( - args: Parameters< - typeof import("../../fixtures/chat-runtime").createTestChatRuntime - >[0], - env: NodeJS.ProcessEnv = {}, -) { - process.env = { - ...ORIGINAL_ENV, - AI_VISION_MODEL: "", - SLACK_BOT_TOKEN: "", - SLACK_BOT_USER_TOKEN: "", - ...env, - }; - vi.resetModules(); - const { createTestChatRuntime } = await import("../../fixtures/chat-runtime"); - return createTestChatRuntime(args); -} - -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - - return String(value); -} +} from "../../fixtures/slack/harness"; describe("Slack behavior: mixed attachment media", () => { afterEach(() => { - process.env = { ...ORIGINAL_ENV }; - vi.resetModules(); + resetSlackImageRuntimeEnv(); }); it("keeps valid attachments while skipping oversized and failed fetch attachments", async () => { @@ -61,34 +31,19 @@ describe("Slack behavior: mixed attachment media", () => { const capturedAttachmentMediaTypes: string[][] = []; const capturedAttachmentNames: string[][] = []; - const { slackRuntime } = await createRuntime( + const { slackRuntime } = await createSlackImageRuntime( { - services: { - visionContext: { - completeText: completeTextMock, - }, - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - const attachments = context?.userAttachments ?? []; - capturedAttachmentMediaTypes.push( - attachments.map((attachment) => attachment.mediaType), - ); - capturedAttachmentNames.push( - attachments.map((attachment) => attachment.filename ?? ""), - ); - return { - text: "Processed attachments.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + adapters: { + describeImagesText: completeTextMock, + generateAssistantReply: async (_prompt, context) => { + const attachments = context?.userAttachments ?? []; + capturedAttachmentMediaTypes.push( + attachments.map((attachment) => attachment.mediaType), + ); + capturedAttachmentNames.push( + attachments.map((attachment) => attachment.filename ?? ""), + ); + return successfulAssistantReply("Processed attachments."); }, }, }, @@ -150,7 +105,7 @@ describe("Slack behavior: mixed attachment media", () => { ["image/png", "application/pdf"], ]); expect(capturedAttachmentNames).toEqual([["chart.png", "incident.pdf"]]); - }, 20_000); + }); it("drops image attachments when AI_VISION_MODEL is unset", async () => { const imageFetch = vi.fn(async () => Buffer.from("image-bytes")); @@ -159,33 +114,20 @@ describe("Slack behavior: mixed attachment media", () => { const capturedAttachmentNames: string[][] = []; const capturedOmittedImageCounts: number[] = []; - const { slackRuntime } = await createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - const attachments = context?.userAttachments ?? []; - capturedAttachmentMediaTypes.push( - attachments.map((attachment) => attachment.mediaType), - ); - capturedAttachmentNames.push( - attachments.map((attachment) => attachment.filename ?? ""), - ); - capturedOmittedImageCounts.push( - context?.omittedImageAttachmentCount ?? 0, - ); - return { - text: "Processed attachments.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + const { slackRuntime } = await createSlackImageRuntime({ + adapters: { + generateAssistantReply: async (_prompt, context) => { + const attachments = context?.userAttachments ?? []; + capturedAttachmentMediaTypes.push( + attachments.map((attachment) => attachment.mediaType), + ); + capturedAttachmentNames.push( + attachments.map((attachment) => attachment.filename ?? ""), + ); + capturedOmittedImageCounts.push( + context?.omittedImageAttachmentCount ?? 0, + ); + return successfulAssistantReply("Processed attachments."); }, }, }); @@ -228,31 +170,19 @@ describe("Slack behavior: mixed attachment media", () => { const imageFetch = vi.fn(async () => Buffer.from("image-bytes")); const capturedOmittedImageCounts: number[] = []; const generateAssistantReply = vi.fn( - async (_prompt?: string, _context?: unknown) => { - return { - text: "I can’t inspect the attached image in this runtime, but I do see that an image was included.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + async (_prompt?: string, _context?: unknown) => + successfulAssistantReply( + "I can’t inspect the attached image in this runtime, but I do see that an image was included.", + ), ); - const { slackRuntime } = await createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (prompt, context) => { - capturedOmittedImageCounts.push( - context?.omittedImageAttachmentCount ?? 0, - ); - return generateAssistantReply(prompt, context); - }, + const { slackRuntime } = await createSlackImageRuntime({ + adapters: { + generateAssistantReply: async (prompt, context) => { + capturedOmittedImageCounts.push( + context?.omittedImageAttachmentCount ?? 0, + ); + return generateAssistantReply(prompt, context); }, }, }); diff --git a/packages/junior/tests/integration/slack/auth-pause-behavior.test.ts b/packages/junior/tests/integration/slack/auth-pause-behavior.test.ts new file mode 100644 index 000000000..1f58f7a1d --- /dev/null +++ b/packages/junior/tests/integration/slack/auth-pause-behavior.test.ts @@ -0,0 +1,176 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { RetryableTurnError } from "@/chat/runtime/turn"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { upsertAgentTurnSessionRecord } from "@/chat/state/turn-session"; +import { createSlackBehaviorRuntime } from "../../fixtures/slack/behavior"; +import { + createAwaitingSlackTurnState, + createPiUserTurn, +} from "../../fixtures/slack/turn-state"; +import { + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +interface AuthPauseConversationState { + processing?: { activeTurnId?: string }; + messages?: Array<{ + id?: string; + meta?: { replied?: boolean; skippedReason?: string }; + role?: string; + text?: string; + }>; +} + +function conversationState(thread: ReturnType) { + return (thread.getState() as { conversation?: AuthPauseConversationState }) + .conversation; +} + +function expectAuthPauseParked( + thread: ReturnType, + messageId: string, +): void { + expect(thread.posts).toEqual([ + expect.objectContaining({ + markdown: expect.stringContaining("I sent you a link"), + }), + ]); + const conversation = conversationState(thread); + expect(conversation?.processing?.activeTurnId).toBeUndefined(); + expect(conversation?.messages).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + role: "assistant", + text: expect.stringContaining("Click here to link"), + }), + ]), + ); + expect( + conversation?.messages?.find((message) => message.id === messageId), + ).toMatchObject({ + meta: { + replied: true, + skippedReason: undefined, + }, + }); +} + +describe("Slack behavior: auth-pause turns", () => { + beforeEach(async () => { + await disconnectStateAdapter(); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + await disconnectStateAdapter(); + }); + + it.each([ + { + name: "MCP", + threadId: "slack:C_AUTH:1700000000.000", + messageId: "msg-auth-pause", + text: "please use notion", + resumeReason: "mcp_auth_resume", + authKind: "mcp", + authProvider: "notion", + }, + { + name: "plugin", + threadId: "slack:C_PLUGIN_AUTH:1700000000.000", + messageId: "msg-plugin-auth-pause", + text: "please use github", + resumeReason: "plugin_auth_resume", + authKind: "plugin", + authProvider: "github", + }, + ] as const)( + "parks $name auth resume turns without rethrowing to the queue", + async ({ + authKind, + authProvider, + messageId, + resumeReason, + text, + threadId, + }) => { + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply: async () => { + throw new RetryableTurnError(resumeReason, "simulated auth pause", { + authDisposition: "link_sent", + authKind, + authProvider, + authProviderDisplayName: + authProvider === "github" ? "GitHub" : "Notion", + }); + }, + }, + }); + + const thread = createTestThread({ id: threadId }); + await expect( + slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: messageId, + threadId, + text, + isMention: true, + }), + ), + ).resolves.toBeUndefined(); + + expectAuthPauseParked(thread, messageId); + }, + ); + + it("parks auth-paused active turns without starting a new follow-up turn", async () => { + const conversationId = "slack:C_AUTH_PARKED:1700000000.000"; + const activeSessionId = "turn_msg-auth-original"; + const generateAssistantReply = vi.fn(); + const onTurnStatePersisted = vi.fn(); + await upsertAgentTurnSessionRecord({ + conversationId, + sessionId: activeSessionId, + sliceId: 1, + state: "awaiting_resume", + resumeReason: "auth", + piMessages: createPiUserTurn("please use notion"), + }); + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply, + }, + }); + + const thread = createTestThread({ + id: conversationId, + state: createAwaitingSlackTurnState({ activeSessionId }), + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-auth-follow-up", + threadId: conversationId, + text: "any update?", + isMention: true, + }), + { onTurnStatePersisted }, + ); + + expect(generateAssistantReply).not.toHaveBeenCalled(); + expect(onTurnStatePersisted).toHaveBeenCalledOnce(); + expect(thread.posts).toEqual([]); + const conversation = conversationState(thread); + expect(conversation?.processing?.activeTurnId).toBe(activeSessionId); + const followUp = conversation?.messages?.find( + (message) => message.id === "msg-auth-follow-up", + ); + expect(followUp).toBeDefined(); + expect(followUp?.meta?.replied).toBeUndefined(); + expect(followUp?.meta?.skippedReason).toBeUndefined(); + }); +}); diff --git a/packages/junior/tests/integration/slack/bot-handlers.test.ts b/packages/junior/tests/integration/slack/bot-handlers.test.ts deleted file mode 100644 index 002b823d4..000000000 --- a/packages/junior/tests/integration/slack/bot-handlers.test.ts +++ /dev/null @@ -1,2328 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import type { Destination } from "@sentry/junior-plugin-api"; -import type { JuniorRuntimeServiceOverrides } from "@/chat/app/services"; -import type { ReplyRequestContext } from "@/chat/respond"; -import { makeAssistantStatus } from "@/chat/slack/assistant-thread/status"; -import { getSlackInterruptionMarker } from "@/chat/slack/output"; -import { RetryableTurnError } from "@/chat/runtime/turn"; -import { disconnectStateAdapter } from "@/chat/state/adapter"; -import { - getAgentTurnSessionRecord, - upsertAgentTurnSessionRecord, -} from "@/chat/state/turn-session"; -import { - getCapturedSlackApiCalls, - resetSlackApiMockState, -} from "../../msw/handlers/slack-api"; -import { - FakeSlackAdapter, - createTestThread, - createTestMessage, - createTestDestination, -} from "../../fixtures/slack-harness"; -import { createTestChatRuntime } from "../../fixtures/chat-runtime"; - -const emptyThreadReplies = async () => []; - -function postIncludes(thread: { posts: unknown[] }, text: string): boolean { - return thread.posts.some((post) => { - if (typeof post === "string") { - return post.includes(text); - } - if ( - post && - typeof post === "object" && - "markdown" in (post as Record) - ) { - return String((post as { markdown: string }).markdown).includes(text); - } - return false; - }); -} - -function createRuntime( - args: { - services?: JuniorRuntimeServiceOverrides; - slackAdapter?: FakeSlackAdapter; - } = {}, -) { - const services = args.services ?? {}; - return createTestChatRuntime({ - slackAdapter: args.slackAdapter, - services: { - ...services, - visionContext: { - listThreadReplies: emptyThreadReplies, - ...(services.visionContext ?? {}), - }, - }, - }); -} - -function slackDestination(channelId: string) { - return { - platform: "slack", - teamId: "T123", - channelId, - } satisfies Destination; -} - -function rawSlackMessage( - conversationId: string, - destination: Destination, -): Record { - if (destination.platform !== "slack") { - throw new Error("Expected Slack destination"); - } - const [, , threadTs = "1700000000.000"] = conversationId.split(":"); - return { - channel: destination.channelId, - team_id: destination.teamId, - ts: threadTs, - thread_ts: threadTs, - }; -} - -function createAwaitingContinuationState(args: { - activeSessionId: string; - replied?: boolean; - userMessageId?: string; - userText?: string; -}) { - return { - conversation: { - schemaVersion: 1, - backfill: { - completedAtMs: 1, - source: "recent_messages", - }, - compactions: [], - piMessages: [], - messages: [ - { - id: args.userMessageId ?? "msg-original", - role: "user", - text: args.userText ?? "please keep working", - createdAtMs: 1, - author: { - userId: "U-test", - }, - ...(args.replied === undefined - ? {} - : { meta: { replied: args.replied } }), - }, - ], - processing: { - activeTurnId: args.activeSessionId, - }, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 1, - updatedAtMs: 1, - }, - vision: { - byFileId: {}, - }, - }, - }; -} - -function turnPiMessages(text: string) { - return [ - { - role: "user" as const, - content: [{ type: "text" as const, text }], - timestamp: 1, - }, - ]; -} - -// ── Tests ──────────────────────────────────────────────────────────── - -describe("bot handlers (integration)", () => { - beforeEach(async () => { - await disconnectStateAdapter(); - }); - - afterEach(async () => { - resetSlackApiMockState(); - vi.restoreAllMocks(); - await disconnectStateAdapter(); - }); - - it("handleNewMention: posts reply from generateAssistantReply", async () => { - const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => ({ - text: "Hello from the bot!", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - visionContext: { - listThreadReplies: async () => [], - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_INT:1700000000.000" }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-new-mention", - threadId: "slack:C_INT:1700000000.000", - text: "hey bot", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(thread.posts.length).toBeGreaterThan(0); - const hasReply = thread.posts.some((p) => { - if (typeof p === "string") return p.includes("Hello from the bot!"); - if ( - p && - typeof p === "object" && - "markdown" in (p as Record) - ) { - return String((p as { markdown: string }).markdown).includes( - "Hello from the bot!", - ); - } - return false; - }); - expect(hasReply).toBe(true); - }); - - it("does not replay a message that already has a delivered reply", async () => { - const conversationId = "slack:C_REPLAY:1700000000.000"; - const generateAssistantReply = vi.fn(); - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - }, - }, - }); - const thread = createTestThread({ - id: conversationId, - state: { - conversation: { - schemaVersion: 1, - backfill: { - completedAtMs: 1, - source: "recent_messages", - }, - compactions: [], - piMessages: [], - messages: [ - { - id: "msg-replayed", - role: "user", - text: "please answer once", - createdAtMs: 1, - author: { - userId: "U-test", - }, - meta: { - replied: true, - slackTs: "1700000000.000", - }, - }, - { - id: "assistant-reply", - role: "assistant", - text: "Already answered.", - createdAtMs: 2, - author: { - isBot: true, - userName: "Junior", - }, - meta: { - replied: true, - }, - }, - ], - processing: {}, - stats: { - compactedMessageCount: 0, - estimatedContextTokens: 0, - totalMessageCount: 2, - updatedAtMs: 2, - }, - vision: { - byFileId: {}, - }, - }, - }, - }); - - await expect( - slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-replayed", - threadId: conversationId, - text: "please answer once", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ), - ).resolves.toBeUndefined(); - - expect(generateAssistantReply).not.toHaveBeenCalled(); - expect(thread.posts).toEqual([]); - }); - - it("handleSubscribedMessage with explicit mention: replies when should_reply is true", async () => { - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => - ({ - object: { - should_reply: true, - confidence: 1, - reason: "explicit mention", - }, - text: '{"should_reply":true,"confidence":1,"reason":"explicit mention"}', - }) as any, - }, - replyExecutor: { - generateAssistantReply: async () => ({ - text: "Replying to mention", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - visionContext: { - listThreadReplies: async () => [], - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_SUB:1700000000.000" }); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "msg-sub-mention", - threadId: "slack:C_SUB:1700000000.000", - text: "<@UBOT> check this", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(thread.posts.length).toBeGreaterThan(0); - }); - - it("handleSubscribedMessage skip: does not reply when should_reply is false", async () => { - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => - ({ - object: { - should_reply: false, - confidence: 0, - reason: "passive conversation", - }, - text: '{"should_reply":false,"confidence":0,"reason":"passive conversation"}', - }) as any, - }, - visionContext: { - listThreadReplies: async () => [], - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_SKIP:1700000000.000" }); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "msg-sub-skip", - threadId: "slack:C_SKIP:1700000000.000", - text: "just chatting among ourselves", - }), - { destination: createTestDestination(thread) }, - ); - - // Should not have posted a reply (no generateAssistantReply call) - const hasReply = thread.posts.some((p) => { - if (typeof p === "string") return !p.startsWith("Error:"); - if ( - p && - typeof p === "object" && - "markdown" in (p as Record) - ) - return true; - return false; - }); - expect(hasReply).toBe(false); - - // Verify state was persisted with replied: false - const state = thread.getState(); - const conversation = ( - state as { - conversation?: { messages?: Array<{ meta?: { replied?: boolean } }> }; - } - ).conversation; - const lastMsg = conversation?.messages?.[conversation.messages.length - 1]; - expect(lastMsg?.meta?.replied).toBe(false); - }); - - it("handleAssistantThreadStarted: sets title and suggested prompts via adapter", async () => { - const fakeAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createTestChatRuntime({ - slackAdapter: fakeAdapter, - }); - - await slackRuntime.handleAssistantThreadStarted({ - threadId: "slack:C_ASSIST:1700000000.000", - channelId: "C_ASSIST", - threadTs: "1700000000.000", - userId: "U-starter", - }); - - expect(fakeAdapter.titleCalls.length).toBe(1); - expect(fakeAdapter.titleCalls[0].title).toBe("Junior"); - expect(fakeAdapter.titleCalls[0].channelId).toBe("C_ASSIST"); - expect(fakeAdapter.promptCalls.length).toBe(1); - expect(fakeAdapter.promptCalls[0].prompts.length).toBe(3); - }); - - it("error recovery: posts safe error message when generateAssistantReply throws", async () => { - const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => { - throw new Error("LLM unavailable"); - }, - }, - visionContext: { - listThreadReplies: async () => [], - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_ERR:1700000000.000" }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-err", - threadId: "slack:C_ERR:1700000000.000", - text: "trigger an error", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - const errorPost = thread.posts.find( - (p) => - typeof p === "string" && - p.includes("I ran into an internal error while processing that."), - ); - expect(errorPost).toBeDefined(); - expect(String(errorPost)).not.toContain("LLM unavailable"); - }); - - it("does not persist an assistant message when final Slack delivery fails", async () => { - const finalText = "This reply never reaches Slack."; - const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => ({ - text: finalText, - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - visionContext: { - listThreadReplies: async () => [], - }, - }, - }); - const thread = createTestThread({ - id: "slack:C_DELIVERY_FAIL:1700000000.000", - }); - thread.post = vi.fn(async () => { - throw new Error("Slack unavailable"); - }) as typeof thread.post; - - await expect( - slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-delivery-fail", - threadId: "slack:C_DELIVERY_FAIL:1700000000.000", - text: "please answer", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ), - ).rejects.toThrow("Slack unavailable"); - - const conversation = ( - thread.getState() as { - conversation?: { - messages?: Array<{ - id?: string; - meta?: { replied?: boolean; skippedReason?: string }; - role?: string; - text?: string; - }>; - processing?: { activeTurnId?: string }; - }; - } - ).conversation; - expect(conversation?.processing?.activeTurnId).toBeUndefined(); - expect(conversation?.messages).not.toEqual( - expect.arrayContaining([ - expect.objectContaining({ - role: "assistant", - text: finalText, - }), - ]), - ); - expect( - conversation?.messages?.find( - (message) => message.id === "msg-delivery-fail", - ), - ).toMatchObject({ - meta: { - replied: false, - skippedReason: "reply failed", - }, - }); - }); - - it("passes conversation and turn correlation IDs into assistant reply context", async () => { - const capturedCorrelation: Array<{ - conversationId?: string; - threadId?: string; - turnId?: string; - runId?: string; - }> = []; - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - capturedCorrelation.push({ - conversationId: context?.correlation?.conversationId, - threadId: context?.correlation?.threadId, - turnId: context?.correlation?.turnId, - runId: context?.correlation?.runId, - }); - return { - text: "Done.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ - id: "slack:C_CORRELATION:1700000000.000", - runId: "run-123", - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-correlation", - threadId: "slack:C_CORRELATION:1700000000.000", - text: "trace this turn", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(capturedCorrelation).toHaveLength(1); - expect(capturedCorrelation[0]).toEqual( - expect.objectContaining({ - conversationId: "slack:C_CORRELATION:1700000000.000", - threadId: "slack:C_CORRELATION:1700000000.000", - runId: "run-123", - }), - ); - expect(capturedCorrelation[0].turnId).toBe("turn_msg-correlation"); - }); - - it("parks MCP auth resume turns without rethrowing to the queue", async () => { - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => { - throw new RetryableTurnError( - "mcp_auth_resume", - "simulated auth pause", - { - authDisposition: "link_sent", - authKind: "mcp", - authProvider: "notion", - authProviderDisplayName: "Notion", - }, - ); - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_AUTH:1700000000.000" }); - await expect( - slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-auth-pause", - threadId: "slack:C_AUTH:1700000000.000", - text: "please use notion", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ), - ).resolves.toBeUndefined(); - - expect(thread.posts).toEqual([ - expect.objectContaining({ - markdown: expect.stringContaining( - "<@U-test> I'll need you to authorize Notion. I sent you a link.", - ), - }), - ]); - const state = thread.getState(); - const conversation = ( - state as { - conversation?: { - processing?: { activeTurnId?: string }; - messages?: Array<{ - id?: string; - meta?: { replied?: boolean; skippedReason?: string }; - role?: string; - text?: string; - }>; - }; - } - ).conversation; - expect(conversation?.processing?.activeTurnId).toBeUndefined(); - expect(conversation?.messages).not.toEqual( - expect.arrayContaining([ - expect.objectContaining({ - role: "assistant", - text: expect.stringContaining("authorize Notion"), - }), - ]), - ); - expect( - conversation?.messages?.find( - (message) => message.id === "msg-auth-pause", - ), - ).toMatchObject({ - meta: { - replied: true, - skippedReason: undefined, - }, - }); - }); - - it("parks plugin auth resume turns without rethrowing to the queue", async () => { - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => { - throw new RetryableTurnError( - "plugin_auth_resume", - "simulated plugin auth pause", - { - authDisposition: "link_sent", - authKind: "plugin", - authProvider: "github", - authProviderDisplayName: "GitHub", - }, - ); - }, - }, - }, - }); - - const thread = createTestThread({ - id: "slack:C_PLUGIN_AUTH:1700000000.000", - }); - await expect( - slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-plugin-auth-pause", - threadId: "slack:C_PLUGIN_AUTH:1700000000.000", - text: "please use github", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ), - ).resolves.toBeUndefined(); - - expect(thread.posts).toEqual([ - expect.objectContaining({ - markdown: expect.stringContaining( - "<@U-test> I'll need you to authorize GitHub. I sent you a link.", - ), - }), - ]); - const state = thread.getState(); - const conversation = ( - state as { - conversation?: { - processing?: { activeTurnId?: string }; - messages?: Array<{ - id?: string; - meta?: { replied?: boolean; skippedReason?: string }; - role?: string; - text?: string; - }>; - }; - } - ).conversation; - expect(conversation?.processing?.activeTurnId).toBeUndefined(); - expect(conversation?.messages).not.toEqual( - expect.arrayContaining([ - expect.objectContaining({ - role: "assistant", - text: expect.stringContaining("authorize GitHub"), - }), - ]), - ); - expect( - conversation?.messages?.find( - (message) => message.id === "msg-plugin-auth-pause", - ), - ).toMatchObject({ - meta: { - replied: true, - skippedReason: undefined, - }, - }); - }); - - it("schedules durable continuation without posting a notice", async () => { - const scheduleAgentContinue = vi.fn().mockResolvedValue(undefined); - const conversationId = "slack:C9TIMEOUT:1700000000.000"; - const destination = slackDestination("C9TIMEOUT"); - const sessionId = "turn_msg-timeout"; - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - scheduleAgentContinue, - generateAssistantReply: async () => { - throw new RetryableTurnError( - "agent_continue", - "simulated timeout continuation", - { - conversationId, - sessionId, - version: 3, - sliceId: 2, - }, - ); - }, - }, - }, - }); - - const thread = createTestThread({ id: conversationId }); - await expect( - slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-timeout", - threadId: conversationId, - text: "please keep working", - isMention: true, - raw: rawSlackMessage(conversationId, destination), - }), - { destination }, - ), - ).resolves.toBeUndefined(); - - expect(scheduleAgentContinue).toHaveBeenCalledWith({ - conversationId, - destination, - sessionId, - expectedVersion: 3, - }); - expect(thread.posts).toEqual([]); - - const state = thread.getState(); - const conversation = ( - state as { - conversation?: { - processing?: { activeTurnId?: string }; - }; - } - ).conversation; - expect(conversation?.processing?.activeTurnId).toBe(sessionId); - }); - - it("schedules agent continuations with the provided destination", async () => { - const scheduleAgentContinue = vi.fn().mockResolvedValue(undefined); - const conversationId = "slack:C9TIMECTX:1700000000.000"; - const destination = slackDestination("C9TIMECTX"); - const sessionId = "turn_msg-timeout-context"; - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - scheduleAgentContinue, - generateAssistantReply: async () => { - throw new RetryableTurnError( - "agent_continue", - "simulated timeout continuation", - { - conversationId, - sessionId, - version: 4, - sliceId: 2, - }, - ); - }, - }, - }, - }); - - const thread = createTestThread({ id: conversationId }); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-timeout-context", - threadId: conversationId, - text: "please keep working", - isMention: true, - raw: rawSlackMessage(conversationId, { - ...destination, - teamId: "TWRONG", - }), - }), - { - destination, - }, - ); - - expect(scheduleAgentContinue).toHaveBeenCalledWith({ - conversationId, - destination, - sessionId, - expectedVersion: 4, - }); - }); - - it("does not post a Slack continuation notice when a live turn times out", async () => { - resetSlackApiMockState(); - const scheduleAgentContinue = vi.fn().mockResolvedValue(undefined); - const conversationId = "slack:C9TIMEAPI:1700000000.000"; - const destination = slackDestination("C9TIMEAPI"); - const sessionId = "turn_msg-timeout-api"; - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - scheduleAgentContinue, - generateAssistantReply: async () => { - throw new RetryableTurnError( - "agent_continue", - "simulated timeout continuation", - { - conversationId, - sessionId, - version: 3, - sliceId: 2, - }, - ); - }, - }, - }, - }); - - const thread = createTestThread({ id: conversationId }); - (thread.adapter as { name?: string }).name = "slack"; - - await expect( - slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-timeout-api", - threadId: conversationId, - text: "please keep working", - isMention: true, - raw: rawSlackMessage(conversationId, destination), - }), - { destination }, - ), - ).resolves.toBeUndefined(); - - expect(scheduleAgentContinue).toHaveBeenCalledWith({ - conversationId, - destination, - sessionId, - expectedVersion: 3, - }); - expect(thread.posts).toEqual([]); - expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([]); - }); - - it("reschedules an awaiting agent continuation without replying to the follow-up", async () => { - const conversationId = "slack:C9TIMERTY:1700000000.000"; - const destination = slackDestination("C9TIMERTY"); - const activeSessionId = "turn_msg-original"; - const scheduleAgentContinue = vi.fn().mockResolvedValue(undefined); - const getAwaitingAgentContinueRequest = vi.fn().mockResolvedValue({ - conversationId, - destination, - sessionId: activeSessionId, - expectedVersion: 4, - }); - const generateAssistantReply = vi.fn(); - const onInputCommitted = vi.fn(); - const onTurnStatePersisted = vi.fn(); - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - getAwaitingAgentContinueRequest, - scheduleAgentContinue, - }, - }, - }); - - const thread = createTestThread({ - id: conversationId, - state: createAwaitingContinuationState({ activeSessionId }), - }); - - await expect( - slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-retry", - threadId: conversationId, - text: "what happened?", - isMention: true, - }), - { - destination, - onInputCommitted, - onTurnStatePersisted, - }, - ), - ).resolves.toBeUndefined(); - - expect(getAwaitingAgentContinueRequest).toHaveBeenCalledWith({ - conversationId, - sessionId: activeSessionId, - }); - expect(scheduleAgentContinue).toHaveBeenCalledWith({ - conversationId, - destination, - sessionId: activeSessionId, - expectedVersion: 4, - }); - expect(generateAssistantReply).not.toHaveBeenCalled(); - expect(onTurnStatePersisted).toHaveBeenCalledOnce(); - expect(onInputCommitted).toHaveBeenCalledOnce(); - expect(thread.posts).toEqual([]); - - const state = thread.getState(); - const conversation = ( - state as { - conversation?: { - messages?: Array<{ - id?: string; - meta?: { replied?: boolean; skippedReason?: string }; - }>; - processing?: { activeTurnId?: string }; - }; - } - ).conversation; - expect(conversation?.processing?.activeTurnId).toBe(activeSessionId); - const followUp = conversation?.messages?.find( - (message) => message.id === "msg-retry", - ); - expect(followUp).toBeDefined(); - expect(followUp?.meta?.replied).toBeUndefined(); - expect(followUp?.meta?.skippedReason).toBeUndefined(); - }); - - it("parks auth-paused active turns without starting a new follow-up turn", async () => { - const conversationId = "slack:C_AUTH_PARKED:1700000000.000"; - const activeSessionId = "turn_msg-auth-original"; - const generateAssistantReply = vi.fn(); - const onTurnStatePersisted = vi.fn(); - await upsertAgentTurnSessionRecord({ - conversationId, - sessionId: activeSessionId, - sliceId: 1, - state: "awaiting_resume", - resumeReason: "auth", - piMessages: turnPiMessages("please use notion"), - }); - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - }, - }, - }); - - const thread = createTestThread({ - id: conversationId, - state: createAwaitingContinuationState({ activeSessionId }), - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-auth-follow-up", - threadId: conversationId, - text: "any update?", - isMention: true, - }), - { - destination: createTestDestination(thread), - onTurnStatePersisted, - }, - ); - - expect(generateAssistantReply).not.toHaveBeenCalled(); - expect(onTurnStatePersisted).toHaveBeenCalledOnce(); - expect(thread.posts).toEqual([]); - const state = thread.getState(); - const conversation = ( - state as { - conversation?: { - messages?: Array<{ - id?: string; - meta?: { replied?: boolean; skippedReason?: string }; - }>; - processing?: { activeTurnId?: string }; - }; - } - ).conversation; - expect(conversation?.processing?.activeTurnId).toBe(activeSessionId); - const followUp = conversation?.messages?.find( - (message) => message.id === "msg-auth-follow-up", - ); - expect(followUp).toBeDefined(); - expect(followUp?.meta?.replied).toBeUndefined(); - expect(followUp?.meta?.skippedReason).toBeUndefined(); - }); - - it("fails malformed awaiting continuations before handling the follow-up", async () => { - const conversationId = "slack:C_BAD_CONTINUATION:1700000000.000"; - const activeSessionId = "turn_msg-timeout-original"; - const generateAssistantReply = vi.fn().mockResolvedValue({ - text: "Recovered.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }); - await upsertAgentTurnSessionRecord({ - conversationId, - sessionId: activeSessionId, - sliceId: 1, - state: "awaiting_resume", - resumeReason: "timeout", - piMessages: turnPiMessages("please keep working"), - }); - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - }, - }, - }); - - const thread = createTestThread({ - id: conversationId, - state: createAwaitingContinuationState({ activeSessionId }), - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-timeout-follow-up", - threadId: conversationId, - text: "what happened?", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(generateAssistantReply).toHaveBeenCalledOnce(); - expect(postIncludes(thread, "Recovered.")).toBe(true); - const failedRecord = await getAgentTurnSessionRecord( - conversationId, - activeSessionId, - ); - expect(failedRecord?.state).toBe("failed"); - expect(failedRecord?.errorMessage).toBe( - "Awaiting agent continuation metadata could not be materialized", - ); - const state = thread.getState(); - const conversation = ( - state as { - conversation?: { processing?: { activeTurnId?: string } }; - } - ).conversation; - expect(conversation?.processing?.activeTurnId).toBeUndefined(); - }); - - it("reschedules an awaiting continuation for repeated delivery of the active message", async () => { - const conversationId = "slack:C9TIMEDUP:1700000000.000"; - const destination = slackDestination("C9TIMEDUP"); - const activeSessionId = "turn_msg-duplicate"; - const scheduleAgentContinue = vi.fn().mockResolvedValue(undefined); - const getAwaitingAgentContinueRequest = vi.fn().mockResolvedValue({ - conversationId, - destination, - sessionId: activeSessionId, - expectedVersion: 4, - }); - const generateAssistantReply = vi.fn(); - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - getAwaitingAgentContinueRequest, - scheduleAgentContinue, - }, - }, - }); - - const thread = createTestThread({ - id: conversationId, - state: createAwaitingContinuationState({ - activeSessionId, - userMessageId: "msg-duplicate", - }), - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-duplicate", - threadId: conversationId, - text: "please keep working", - isMention: true, - }), - { destination }, - ); - - expect(scheduleAgentContinue).toHaveBeenCalledWith({ - conversationId, - destination, - sessionId: activeSessionId, - expectedVersion: 4, - }); - expect(generateAssistantReply).not.toHaveBeenCalled(); - }); - - it("does not reschedule an awaiting continuation for an already-replied duplicate", async () => { - const conversationId = "slack:C9TIMEREPD:1700000000.000"; - const destination = slackDestination("C9TIMEREPD"); - const activeSessionId = "turn_msg-replied-duplicate"; - const scheduleAgentContinue = vi.fn().mockResolvedValue(undefined); - const getAwaitingAgentContinueRequest = vi.fn().mockResolvedValue({ - conversationId, - destination, - sessionId: activeSessionId, - expectedVersion: 4, - }); - const generateAssistantReply = vi.fn(); - const onTurnStatePersisted = vi.fn(); - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - getAwaitingAgentContinueRequest, - scheduleAgentContinue, - }, - }, - }); - - const thread = createTestThread({ - id: conversationId, - state: createAwaitingContinuationState({ - activeSessionId, - replied: true, - userMessageId: "msg-replied-duplicate", - }), - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-replied-duplicate", - threadId: conversationId, - text: "please keep working", - isMention: true, - }), - { - destination, - onTurnStatePersisted, - }, - ); - - expect(getAwaitingAgentContinueRequest).not.toHaveBeenCalled(); - expect(scheduleAgentContinue).not.toHaveBeenCalled(); - expect(generateAssistantReply).not.toHaveBeenCalled(); - expect(onTurnStatePersisted).toHaveBeenCalledOnce(); - expect(thread.posts).toEqual([]); - }); - - it("keeps awaiting continuation state without a visible acknowledgement", async () => { - const conversationId = "slack:C9TIMENOTI:1700000000.000"; - const destination = slackDestination("C9TIMENOTI"); - const activeSessionId = "turn_msg-original"; - const scheduleAgentContinue = vi.fn().mockResolvedValue(undefined); - const getAwaitingAgentContinueRequest = vi.fn().mockResolvedValue({ - conversationId, - destination, - sessionId: activeSessionId, - expectedVersion: 4, - }); - const generateAssistantReply = vi.fn(); - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - getAwaitingAgentContinueRequest, - scheduleAgentContinue, - }, - }, - }); - - const thread = createTestThread({ - id: conversationId, - state: createAwaitingContinuationState({ activeSessionId }), - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-retry-notice-fail", - threadId: conversationId, - text: "what happened?", - isMention: true, - }), - { destination }, - ); - - expect(scheduleAgentContinue).toHaveBeenCalledWith({ - conversationId, - destination, - sessionId: activeSessionId, - expectedVersion: 4, - }); - expect(generateAssistantReply).not.toHaveBeenCalled(); - expect(thread.posts).toEqual([]); - - const state = thread.getState(); - const conversation = ( - state as { - conversation?: { - processing?: { activeTurnId?: string }; - }; - } - ).conversation; - expect(conversation?.processing?.activeTurnId).toBe(activeSessionId); - }); - - it("does not start a new turn when rescheduling an active continuation fails", async () => { - const conversationId = "slack:C9TIMEFAIL:1700000000.000"; - const destination = slackDestination("C9TIMEFAIL"); - const activeSessionId = "turn_msg-original"; - const scheduleAgentContinue = vi - .fn() - .mockRejectedValue(new Error("resume callback unavailable")); - const getAwaitingAgentContinueRequest = vi.fn().mockResolvedValue({ - conversationId, - destination, - sessionId: activeSessionId, - expectedVersion: 4, - }); - const generateAssistantReply = vi.fn(); - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - getAwaitingAgentContinueRequest, - scheduleAgentContinue, - }, - }, - }); - - const thread = createTestThread({ - id: conversationId, - state: createAwaitingContinuationState({ activeSessionId }), - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-retry-fail", - threadId: conversationId, - text: "what happened?", - isMention: true, - }), - { destination }, - ); - - expect(generateAssistantReply).not.toHaveBeenCalled(); - expect(thread.posts).toEqual([ - expect.stringContaining( - "I ran into an internal error while processing that.", - ), - ]); - }); - - it("posts an interruption marker on the finalized provider-error reply", async () => { - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - await context?.onTextDelta?.("Partial output..."); - return { - text: "Partial output...", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "provider_error" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ - id: "slack:C_STREAM_FAIL:1700000000.000", - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-stream-fail", - threadId: "slack:C_STREAM_FAIL:1700000000.000", - text: "do work", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(thread.posts).toHaveLength(1); - const postText = - typeof thread.posts[0] === "string" - ? thread.posts[0] - : ((thread.posts[0] as { markdown?: string }).markdown ?? ""); - expect(postText).toContain("Partial output..."); - expect(postText).toContain(getSlackInterruptionMarker().trim()); - expect(postText).not.toContain("event_id="); - }); - - it("emits assistant status updates in shared channel threads", async () => { - const fakeAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - await context?.onStatus?.( - makeAssistantStatus("reading", "channel messages"), - ); - return { - text: "Done.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_STATUS:1700000000.000" }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-status", - threadId: "slack:C_STATUS:1700000000.000", - text: "show the channel", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(fakeAdapter.statusCalls.length).toBeGreaterThan(0); - expect(fakeAdapter.statusCalls[0]).toEqual( - expect.objectContaining({ - channelId: "C_STATUS", - threadTs: "1700000000.000", - }), - ); - expect(fakeAdapter.statusCalls.at(-1)).toEqual({ - channelId: "C_STATUS", - threadTs: "1700000000.000", - text: "", - loadingMessages: undefined, - }); - }); - - it("does not block assistant reply generation on slow assistant status writes", async () => { - const fakeAdapter = new FakeSlackAdapter(); - let releaseFirstStatus: (() => void) | undefined; - let statusCallCount = 0; - fakeAdapter.setAssistantStatus = async () => { - statusCallCount += 1; - if (statusCallCount !== 1) { - return; - } - await new Promise((resolve) => { - releaseFirstStatus = resolve; - }); - }; - - let replyStarted = false; - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => ({ text: "Status thread" }) as never, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyStarted = true; - return { - text: "Still replied while status was pending.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - let settled = false; - const thread = createTestThread({ - id: "slack:D_STATUSBLOCK:1700000000.000", - }); - const turnPromise = slackRuntime - .handleNewMention( - thread, - createTestMessage({ - id: "msg-status-block", - threadId: "slack:D_STATUSBLOCK:1700000000.000", - text: "show the channel", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ) - .then(() => { - settled = true; - }); - - await vi.waitFor(() => { - expect(replyStarted).toBe(true); - }); - - expect(settled).toBe(false); - - releaseFirstStatus!(); - await turnPromise; - }); - - it("posts the final reply even while the initial assistant status write is pending", async () => { - const fakeAdapter = new FakeSlackAdapter(); - let releaseFirstStatus: (() => void) | undefined; - let statusCallCount = 0; - fakeAdapter.setAssistantStatus = async ( - channelId, - threadTs, - text, - loadingMessages, - ) => { - statusCallCount += 1; - if (statusCallCount === 1) { - await new Promise((resolve) => { - releaseFirstStatus = resolve; - }); - } - fakeAdapter.statusCalls.push({ - channelId, - threadTs, - text, - loadingMessages, - }); - }; - - let replyStarted = false; - const thread = createTestThread({ - id: "slack:D_STATUSORDER:1700000001.000", - }); - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => ({ text: "Status thread" }) as never, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyStarted = true; - return { - text: "Reply lands after the pending status is drained.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - let settled = false; - const turnPromise = slackRuntime - .handleNewMention( - thread, - createTestMessage({ - id: "msg-status-order", - threadId: thread.id, - text: "answer quickly", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ) - .then(() => { - settled = true; - }); - - await vi.waitFor(() => { - expect(replyStarted).toBe(true); - expect(thread.posts).toEqual([ - expect.objectContaining({ - markdown: "Reply lands after the pending status is drained.", - }), - ]); - }); - - expect(settled).toBe(false); - - releaseFirstStatus!(); - await turnPromise; - }); - - it("thread title: generates and sets title after first assistant reply", async () => { - const fakeAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => - ({ - text: "Debugging Node.js Memory Leaks", - message: { role: "assistant", content: "" }, - }) as any, - }, - replyExecutor: { - generateAssistantReply: async () => ({ - text: "Here is how to debug memory leaks.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - }, - }); - - const thread = createTestThread({ id: "slack:D_TITLE:1700000000.000" }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-title-1", - threadId: "slack:D_TITLE:1700000000.000", - text: "How do I debug memory leaks in Node?", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - await new Promise((r) => setTimeout(r, 0)); - - const generatedTitleCall = fakeAdapter.titleCalls.find( - (c) => c.title !== "Junior", - ); - expect(generatedTitleCall).toBeDefined(); - expect(generatedTitleCall!.title).toBe("Debugging Node.js Memory Leaks"); - expect(generatedTitleCall!.channelId).toBe("D_TITLE"); - expect(generatedTitleCall!.threadTs).toBe("1700000000.000"); - }); - - it("thread title: uses the first human message we know about in the thread", async () => { - const fakeAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async (params) => { - const prompt = - typeof params.messages[0]?.content === "string" - ? params.messages[0].content - : ""; - return { - text: prompt.includes("Original production issue summary") - ? "Production Issue Summary" - : "Follow-up Clarification", - message: { role: "assistant", content: "" }, - } as any; - }, - }, - replyExecutor: { - generateAssistantReply: async () => ({ - text: "Here is the updated answer.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - }, - }); - - const thread = createTestThread({ id: "slack:D_TITLE4:1700000000.000" }); - const earlierMessage = createTestMessage({ - id: "msg-title4-earlier", - threadId: "slack:D_TITLE4:1700000000.000", - text: "Original production issue summary", - author: { userId: "U-title4", isBot: false }, - }); - earlierMessage.metadata.dateSent = new Date(1_700_000_000_000); - thread.recentMessages = [earlierMessage]; - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-title4-current", - threadId: "slack:D_TITLE4:1700000000.000", - text: "Can you also include the regression window?", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - await new Promise((r) => setTimeout(r, 0)); - - const generatedTitleCall = fakeAdapter.titleCalls.find( - (c) => c.title !== "Junior", - ); - expect(generatedTitleCall).toBeDefined(); - expect(generatedTitleCall!.title).toBe("Production Issue Summary"); - }); - - it("thread title: still generates for a new thread with starter assistant content", async () => { - const fakeAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => - ({ - text: "Today's Date", - message: { role: "assistant", content: "" }, - }) as any, - }, - replyExecutor: { - generateAssistantReply: async () => ({ - text: "Today is April 16, 2026.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - }, - }); - - const thread = createTestThread({ - id: "slack:D_TITLE5:1700000000.000", - }); - const starterMessage = createTestMessage({ - id: "msg-title5-starter", - threadId: "slack:D_TITLE5:1700000000.000", - text: "How can I help?", - author: { - isBot: true, - isMe: true, - userId: "B-title5", - userName: "junior", - }, - }); - starterMessage.metadata.dateSent = new Date(1_700_000_000_000); - thread.recentMessages = [starterMessage]; - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-title5-user", - threadId: "slack:D_TITLE5:1700000000.000", - text: "what's today's date", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - await new Promise((r) => setTimeout(r, 0)); - - const generatedTitleCall = fakeAdapter.titleCalls.find( - (c) => c.title !== "Junior", - ); - expect(generatedTitleCall).toBeDefined(); - expect(generatedTitleCall!.title).toBe("Today's Date"); - }); - - it("thread title: does not block reply delivery when generation is slow", async () => { - const fakeAdapter = new FakeSlackAdapter(); - let resolveTitle: (() => void) | undefined; - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => - await new Promise((resolve) => { - resolveTitle = () => - resolve({ - text: "Today's Date", - message: { role: "assistant", content: "" }, - } as any); - }), - }, - replyExecutor: { - generateAssistantReply: async () => ({ - text: "Today is April 16, 2026.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - }, - }); - - const thread = createTestThread({ id: "slack:D_TITLE6:1700000000.000" }); - let settled = false; - const turnPromise = slackRuntime - .handleNewMention( - thread, - createTestMessage({ - id: "msg-title-6", - threadId: "slack:D_TITLE6:1700000000.000", - text: "what's today's date", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ) - .then(() => { - settled = true; - }); - - await vi.waitFor(() => { - expect(postIncludes(thread, "Today is April 16, 2026.")).toBe(true); - }); - await vi.waitFor(() => { - expect(settled).toBe(true); - }); - expect( - fakeAdapter.titleCalls.some((call) => call.title === "Today's Date"), - ).toBe(false); - - resolveTitle!(); - await turnPromise; - await vi.waitFor(() => { - expect( - fakeAdapter.titleCalls.some((call) => call.title === "Today's Date"), - ).toBe(true); - }); - }); - - it("thread title: preserves artifact updates when title resolves before completion", async () => { - const fakeAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => - ({ - text: "Today's Date", - message: { role: "assistant", content: "" }, - }) as any, - }, - replyExecutor: { - generateAssistantReply: async ( - _text: string, - context?: ReplyRequestContext, - ) => { - await vi.waitFor(() => { - expect( - fakeAdapter.titleCalls.some( - (call) => call.title === "Today's Date", - ), - ).toBe(true); - }); - await context?.onArtifactStateUpdated?.({ - lastCanvasId: "F_CANVAS", - }); - return { - text: "Today is April 16, 2026.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:D_TITLE7:1700000000.000" }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-title-7", - threadId: "slack:D_TITLE7:1700000000.000", - text: "what's today's date", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(thread.getState()).toMatchObject({ - artifacts: { - assistantTitle: "Today's Date", - lastCanvasId: "F_CANVAS", - }, - }); - }); - - it("thread title: does not generate title on subsequent replies", async () => { - const fakeAdapter = new FakeSlackAdapter(); - let turnCount = 0; - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => - ({ - text: "Some Title", - message: { role: "assistant", content: "" }, - }) as any, - }, - replyExecutor: { - generateAssistantReply: async () => { - turnCount += 1; - return { - text: `reply-${turnCount}`, - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:D_TITLE2:1700000000.000" }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-t2-1", - threadId: "slack:D_TITLE2:1700000000.000", - text: "first message", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - await new Promise((r) => setTimeout(r, 0)); - - const titleCallsAfterFirst = fakeAdapter.titleCalls.filter( - (c) => c.title !== "Junior", - ).length; - expect(titleCallsAfterFirst).toBe(1); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-t2-2", - threadId: "slack:D_TITLE2:1700000000.000", - text: "second message", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - await new Promise((r) => setTimeout(r, 0)); - - const titleCallsAfterSecond = fakeAdapter.titleCalls.filter( - (c) => c.title !== "Junior", - ).length; - expect(titleCallsAfterSecond).toBe(1); - }); - - it("thread title: ignores Slack permission errors when setting title", async () => { - const fakeAdapter = new FakeSlackAdapter(); - fakeAdapter.setAssistantTitle = async () => { - const error = new Error( - "An API error occurred: no_permission", - ) as Error & { - data?: { error?: string }; - }; - error.data = { error: "no_permission" }; - throw error; - }; - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => - ({ - text: "Permission Safe Title", - message: { role: "assistant", content: "" }, - }) as any, - }, - replyExecutor: { - generateAssistantReply: async () => ({ - text: "This reply should still succeed.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - }, - }); - - const thread = createTestThread({ id: "slack:D_TITLE3:1700000000.000" }); - - await expect( - slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-title-3", - threadId: "slack:D_TITLE3:1700000000.000", - text: "title this thread please", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ), - ).resolves.toBeUndefined(); - await new Promise((r) => setTimeout(r, 0)); - expect(thread.posts.length).toBeGreaterThan(0); - }); - - it("thread title: does not regenerate after stable Slack permission failures", async () => { - const fakeAdapter = new FakeSlackAdapter(); - fakeAdapter.setAssistantTitle = async () => { - const error = new Error( - "An API error occurred: no_permission", - ) as Error & { - data?: { error?: string }; - }; - error.data = { error: "no_permission" }; - throw error; - }; - - let titleGenerationCount = 0; - const { slackRuntime } = createRuntime({ - slackAdapter: fakeAdapter, - services: { - conversationMemory: { - completeText: async () => { - titleGenerationCount += 1; - return { - text: "Stable Permission Title", - message: { role: "assistant", content: "" }, - } as any; - }, - }, - replyExecutor: { - generateAssistantReply: async () => ({ - text: "Reply still succeeds.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }), - }, - }, - }); - - const thread = createTestThread({ id: "slack:D_TITLE7:1700000000.000" }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-title7-1", - threadId: "slack:D_TITLE7:1700000000.000", - text: "first message", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-title7-2", - threadId: "slack:D_TITLE7:1700000000.000", - text: "second message", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(titleGenerationCount).toBe(1); - }); - - it("new mention first turn has no conversation context without prior thread messages", async () => { - const capturedContexts: Array = []; - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - capturedContexts.push(context?.conversationContext); - return { - text: "First reply.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const threadId = "slack:C_FIRST_EMPTY:1700000000.000"; - const thread = createTestThread({ id: threadId }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-first-current", - threadId, - text: "Can you summarize this?", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - expect(capturedContexts).toEqual([undefined]); - }); - - it("new mention first turn uses pre-existing thread transcript without the current message", async () => { - const capturedContexts: Array = []; - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - capturedContexts.push(context?.conversationContext); - return { - text: "Follow-up reply.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const threadId = "slack:C_FIRST_EXISTING:1700000000.000"; - const thread = createTestThread({ id: threadId }); - const priorMessage = createTestMessage({ - id: "msg-first-prior", - threadId, - text: "Original production issue summary.", - author: { userId: "U-prior", userName: "alice", isBot: false }, - }); - priorMessage.metadata.dateSent = new Date(1_700_000_000_000); - const currentMessage = createTestMessage({ - id: "msg-first-current", - threadId, - text: "Can you include the regression window?", - isMention: true, - author: { userId: "U-current", userName: "bob", isBot: false }, - }); - currentMessage.metadata.dateSent = new Date(1_700_000_001_000); - thread.recentMessages = [priorMessage, currentMessage]; - - await slackRuntime.handleNewMention(thread, currentMessage, { - destination: createTestDestination(thread), - }); - - expect(capturedContexts).toHaveLength(1); - expect(capturedContexts[0]).toContain(""); - expect(capturedContexts[0]).toContain("Original production issue summary."); - expect(capturedContexts[0]).not.toContain( - "Can you include the regression window?", - ); - }); - - it("subscribed message: does not include newer thread messages in turn context", async () => { - const capturedContexts: Array = []; - const { slackRuntime } = createRuntime({ - services: { - conversationMemory: { - completeText: async () => ({ text: "Context thread" }) as never, - }, - subscribedReplyPolicy: { - completeObject: async () => - ({ - object: { - should_reply: true, - confidence: 1, - reason: "follow-up", - }, - text: '{"should_reply":true,"confidence":1,"reason":"follow-up"}', - }) as any, - }, - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - capturedContexts.push(context?.conversationContext); - return { - text: "Responding to first message only.", - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const threadId = "slack:D_ORDER:1700000000.000"; - const thread = createTestThread({ id: threadId }); - const firstMessage = createTestMessage({ - id: "1700000000.100", - threadId, - text: "you work now?", - isMention: false, - }); - const laterMessage = createTestMessage({ - id: "1700000000.200", - threadId, - text: "hello", - isMention: false, - }); - - Object.defineProperty(thread, "messages", { - configurable: true, - get() { - return (async function* () { - // Chat SDK thread iterators are newest-first. - yield laterMessage; - yield firstMessage; - })(); - }, - }); - - await slackRuntime.handleSubscribedMessage(thread, firstMessage, { - destination: createTestDestination(thread), - }); - - expect(capturedContexts).toHaveLength(1); - expect(capturedContexts[0]).toBeUndefined(); - }); - - it("multi-turn state continuity: second turn sees first turn's conversation state", async () => { - let turnCount = 0; - const { slackRuntime } = createRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => { - turnCount += 1; - return { - text: `reply-${turnCount}`, - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_MULTI:1700000000.000" }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-t1", - threadId: "slack:C_MULTI:1700000000.000", - text: "first turn", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - const stateAfterFirstTurn = thread.getState(); - const conv1 = ( - stateAfterFirstTurn as { conversation?: { messages?: unknown[] } } - ).conversation; - expect(conv1).toBeDefined(); - const messageCountAfterFirst = conv1?.messages?.length ?? 0; - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "msg-t2", - threadId: "slack:C_MULTI:1700000000.000", - text: "second turn", - isMention: true, - }), - { destination: createTestDestination(thread) }, - ); - - const stateAfterSecondTurn = thread.getState(); - const conv2 = ( - stateAfterSecondTurn as { conversation?: { messages?: unknown[] } } - ).conversation; - expect(conv2).toBeDefined(); - expect(conv2?.messages?.length ?? 0).toBeGreaterThan( - messageCountAfterFirst, - ); - }); -}); diff --git a/packages/junior/tests/integration/slack/bot-image-hydration.test.ts b/packages/junior/tests/integration/slack/bot-image-hydration.test.ts deleted file mode 100644 index 7ebfa9669..000000000 --- a/packages/junior/tests/integration/slack/bot-image-hydration.test.ts +++ /dev/null @@ -1,1126 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import type { Thread } from "chat"; -import { - createTestMessage, - createTestThread, - createTestDestination, -} from "../../fixtures/slack-harness"; - -const listThreadRepliesMock = vi.fn(); -const ORIGINAL_ENV = { ...process.env }; - -async function createRuntime( - args: Parameters< - typeof import("../../fixtures/chat-runtime").createTestChatRuntime - >[0], - env: NodeJS.ProcessEnv = {}, -) { - process.env = { - ...ORIGINAL_ENV, - AI_VISION_MODEL: "", - SLACK_BOT_TOKEN: "", - SLACK_BOT_USER_TOKEN: "", - ...env, - }; - vi.resetModules(); - const { createTestChatRuntime } = await import("../../fixtures/chat-runtime"); - return createTestChatRuntime(args); -} - -function makeSuccessReply(text = "ok") { - return { - text, - diagnostics: { - assistantMessageCount: 1, - modelId: "test-model", - outcome: "success" as const, - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; -} - -function extractImageAttachmentSummary( - promptText: string | undefined, -): string | undefined { - if (!promptText) { - return undefined; - } - - const match = promptText.match(/\n([\s\S]*)\n<\/summary>/); - return match?.[1]; -} - -describe("bot image hydration", () => { - beforeEach(() => { - listThreadRepliesMock.mockReset(); - }); - afterEach(() => { - vi.restoreAllMocks(); - process.env = { ...ORIGINAL_ENV }; - vi.resetModules(); - }); - - it("hydrates thread image backfill once across agent instances with shared state", async () => { - listThreadRepliesMock.mockResolvedValue([ - { - ts: "1700000000.100", - files: [], - }, - ]); - - const { slackRuntime } = await createRuntime( - { - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock, - }, - replyExecutor: { - generateAssistantReply: async () => makeSuccessReply(), - }, - }, - }, - { - AI_VISION_MODEL: "openai/gpt-5.4", - }, - ); - const firstThread = createTestThread({ - id: "slack:C_IMAGE:1700000000.000", - state: { - conversation: { - schemaVersion: 1, - messages: [ - { - id: "1700000000.100", - role: "user", - text: "candidate profile image posted earlier", - createdAtMs: 1700000000100, - meta: { - slackTs: "1700000000.100", - }, - author: { - userId: "U-user", - userName: "user", - }, - }, - ], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 1, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }); - - await slackRuntime.handleNewMention( - firstThread, - createTestMessage({ - id: "1700000000.200", - text: "/brief on this candidate", - threadId: "slack:C_IMAGE:1700000000.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - }), - { destination: createTestDestination(firstThread) }, - ); - - const persisted = firstThread.getState(); - const secondThread = createTestThread({ - id: "slack:C_IMAGE:1700000000.000", - state: persisted, - }); - - await slackRuntime.handleNewMention( - secondThread, - createTestMessage({ - id: "1700000000.300", - text: "follow up without new images", - threadId: "slack:C_IMAGE:1700000000.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - }), - { destination: createTestDestination(secondThread) }, - ); - - expect(listThreadRepliesMock).toHaveBeenCalledTimes(1); - }, 20_000); - - it("does not hydrate thread images when AI_VISION_MODEL is unset", async () => { - const { slackRuntime } = await createRuntime({ - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock, - }, - replyExecutor: { - generateAssistantReply: async () => makeSuccessReply(), - }, - }, - }); - const thread = createTestThread({ - id: "slack:C_IMAGE:1700000001.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "1700000001.200", - text: "", - threadId: "slack:C_IMAGE:1700000001.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - attachments: [ - { - type: "image", - mimeType: "image/png", - name: "screen.png", - data: Buffer.from("fake-image"), - }, - ], - }), - { destination: createTestDestination(thread) }, - ); - - expect(listThreadRepliesMock).not.toHaveBeenCalled(); - const persistedState = thread.getState() as { - conversation: { - messages: Array<{ - author?: { - isBot?: boolean; - }; - text: string; - meta?: { - attachmentCount?: number; - imageAttachmentCount?: number; - imagesHydrated?: boolean; - slackTs?: string; - }; - }>; - vision: { - backfillCompletedAtMs?: number; - }; - }; - }; - expect( - persistedState.conversation.vision.backfillCompletedAtMs, - ).toBeUndefined(); - const persistedMessage = persistedState.conversation.messages.find( - (entry) => entry.meta?.slackTs === "1700000001.200", - ); - expect(persistedMessage).toMatchObject({ - author: { - isBot: false, - }, - text: "[non-text message]", - meta: { - attachmentCount: 1, - imageAttachmentCount: 1, - imagesHydrated: false, - slackTs: "1700000001.200", - }, - }); - }, 20_000); - - it("backfills older image messages after vision is enabled later", async () => { - const firstRuntime = await createRuntime({ - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock, - }, - replyExecutor: { - generateAssistantReply: async () => makeSuccessReply(), - }, - }, - }); - const firstThread = createTestThread({ - id: "slack:C_IMAGE:1700000002.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }); - - await firstRuntime.slackRuntime.handleNewMention( - firstThread, - createTestMessage({ - id: "1700000002.100", - text: "what is in this screenshot?", - threadId: "slack:C_IMAGE:1700000002.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - attachments: [ - { - type: "image", - mimeType: "image/png", - name: "screen.png", - data: Buffer.from("fake-image"), - }, - ], - }), - { destination: createTestDestination(firstThread) }, - ); - - listThreadRepliesMock.mockResolvedValue([ - { - ts: "1700000002.100", - files: [ - { - id: "F_OLD", - mimetype: "image/png", - url_private_download: "https://files.slack.com/private/old.png", - }, - ], - }, - ]); - const downloadFileMock = vi.fn(async () => Buffer.from("downloaded-image")); - const completeTextMock = vi.fn(async () => ({ - text: "Recovered screenshot context", - message: {} as never, - })); - - const secondRuntime = await createRuntime( - { - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock, - downloadFile: downloadFileMock, - completeText: completeTextMock, - }, - replyExecutor: { - generateAssistantReply: async () => makeSuccessReply(), - }, - }, - }, - { - AI_VISION_MODEL: "openai/gpt-5.4", - }, - ); - const secondThread = createTestThread({ - id: "slack:C_IMAGE:1700000002.000", - state: firstThread.getState(), - }); - - await secondRuntime.slackRuntime.handleNewMention( - secondThread, - createTestMessage({ - id: "1700000002.200", - text: "follow up without new uploads", - threadId: "slack:C_IMAGE:1700000002.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - }), - { destination: createTestDestination(secondThread) }, - ); - - expect(listThreadRepliesMock).toHaveBeenCalledTimes(1); - expect(downloadFileMock).toHaveBeenCalledTimes(1); - expect(completeTextMock).toHaveBeenCalledTimes(1); - const persistedState = secondThread.getState() as { - conversation: { - messages: Array<{ - id: string; - meta?: { - imagesHydrated?: boolean; - imageFileIds?: string[]; - }; - }>; - vision: { - backfillCompletedAtMs?: number; - byFileId: Record; - }; - }; - }; - expect( - persistedState.conversation.messages.find( - (message) => message.id === "1700000002.100", - )?.meta, - ).toEqual( - expect.objectContaining({ - imagesHydrated: true, - imageFileIds: ["F_OLD"], - }), - ); - expect(persistedState.conversation.vision.byFileId.F_OLD?.summary).toBe( - "Recovered screenshot context", - ); - expect(persistedState.conversation.vision.backfillCompletedAtMs).toBeTypeOf( - "number", - ); - }); - - it("hydrates skipped passive screenshots when a later explicit mention needs them", async () => { - listThreadRepliesMock.mockResolvedValue([ - { - ts: "1700000002.100", - files: [ - { - id: "F_PASSIVE", - mimetype: "image/png", - url_private_download: "https://files.slack.com/private/passive.png", - }, - ], - }, - ]); - const downloadFileMock = vi.fn(async () => Buffer.from("downloaded-image")); - const completeTextMock = vi.fn(async () => ({ - text: "Passive screenshot summary", - message: {} as never, - })); - const generateAssistantReply = vi.fn( - async (_text: string, context: any) => { - expect(context?.conversationContext).toContain( - "Passive screenshot summary", - ); - return makeSuccessReply(); - }, - ); - - const { slackRuntime } = await createRuntime( - { - services: { - subscribedReplyPolicy: { - completeObject: async () => { - throw new Error( - "classifier should not run for messages addressed to another bot", - ); - }, - }, - visionContext: { - listThreadReplies: listThreadRepliesMock, - downloadFile: downloadFileMock, - completeText: completeTextMock, - }, - replyExecutor: { - generateAssistantReply, - }, - }, - }, - { - AI_VISION_MODEL: "openai/gpt-5.4", - }, - ); - const thread = createTestThread({ - id: "slack:C_IMAGE:1700000006.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "1700000002.100", - text: "@Cursor can you look at this?", - threadId: "slack:C_IMAGE:1700000006.000", - isMention: false, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - attachments: [ - { - type: "image", - mimeType: "image/png", - name: "passive.png", - url: "https://files.slack.com/private/passive.png", - }, - ], - }), - { destination: createTestDestination(thread) }, - ); - - expect(generateAssistantReply).not.toHaveBeenCalled(); - expect(listThreadRepliesMock).not.toHaveBeenCalled(); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "1700000002.200", - text: "<@U_APP> what is in the screenshot above?", - threadId: "slack:C_IMAGE:1700000006.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - }), - { destination: createTestDestination(thread) }, - ); - - expect(listThreadRepliesMock).toHaveBeenCalledTimes(1); - expect(downloadFileMock).toHaveBeenCalledTimes(1); - expect(completeTextMock).toHaveBeenCalledTimes(1); - expect(generateAssistantReply).toHaveBeenCalledTimes(1); - - const persistedState = thread.getState() as { - conversation: { - messages: Array<{ - id: string; - meta?: { - imagesHydrated?: boolean; - imageFileIds?: string[]; - }; - }>; - vision: { - byFileId: Record; - }; - }; - }; - expect( - persistedState.conversation.messages.find( - (message) => message.id === "1700000002.100", - )?.meta, - ).toEqual( - expect.objectContaining({ - imagesHydrated: true, - imageFileIds: ["F_PASSIVE"], - }), - ); - expect(persistedState.conversation.vision.byFileId.F_PASSIVE?.summary).toBe( - "Passive screenshot summary", - ); - }); - - it("reuses the thread image summary instead of re-analyzing the same upload", async () => { - listThreadRepliesMock.mockResolvedValue([ - { - ts: "1700000003.100", - files: [ - { - id: "F_CUR", - mimetype: "image/png", - url_private_download: "https://files.slack.com/private/current.png", - }, - ], - }, - ]); - const downloadFileMock = vi.fn(async () => Buffer.from("downloaded-image")); - const completeTextMock = vi.fn(async () => ({ - text: "Current screenshot summary", - message: {} as never, - })); - const attachmentFetch = vi.fn(async () => Buffer.from("attachment-image")); - const generateAssistantReply = vi.fn( - async (_text: string, context: any) => { - expect(context?.userAttachments).toEqual([ - expect.objectContaining({ - mediaType: "image/png", - filename: "screen.png", - promptText: expect.stringContaining("Current screenshot summary"), - }), - ]); - return makeSuccessReply(); - }, - ); - - const { slackRuntime } = await createRuntime( - { - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock, - downloadFile: downloadFileMock, - completeText: completeTextMock, - }, - replyExecutor: { - generateAssistantReply, - }, - }, - }, - { - AI_VISION_MODEL: "openai/gpt-5.4", - }, - ); - - await slackRuntime.handleNewMention( - createTestThread({ - id: "slack:C_IMAGE:1700000003.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }), - createTestMessage({ - id: "1700000003.100", - text: "explain this screenshot", - threadId: "slack:C_IMAGE:1700000003.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - attachments: [ - { - type: "image", - mimeType: "image/png", - name: "screen.png", - fetchData: attachmentFetch, - }, - ], - }), - { - destination: createTestDestination( - createTestThread({ - id: "slack:C_IMAGE:1700000003.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }), - ), - }, - ); - - expect(downloadFileMock).toHaveBeenCalledTimes(1); - expect(completeTextMock).toHaveBeenCalledTimes(1); - expect(attachmentFetch).not.toHaveBeenCalled(); - expect(generateAssistantReply).toHaveBeenCalledTimes(1); - }); - - it("keeps cached image summaries aligned with attachment positions", async () => { - listThreadRepliesMock.mockResolvedValue([ - { - ts: "1700000004.100", - files: [ - { - id: "F_MISSING", - mimetype: "image/png", - url_private_download: "https://files.slack.com/private/missing.png", - }, - { - id: "F_CACHED", - mimetype: "image/png", - url_private_download: "https://files.slack.com/private/cached.png", - }, - ], - }, - ]); - const downloadFileMock = vi.fn(async () => Buffer.from("downloaded-image")); - let completeTextCallCount = 0; - const completeTextMock = vi.fn(async () => { - completeTextCallCount += 1; - if (completeTextCallCount === 1) { - return { - text: "", - message: {} as never, - }; - } - if (completeTextCallCount === 2) { - return { - text: "Second cached summary", - message: {} as never, - }; - } - return { - text: "First attachment summary", - message: {} as never, - }; - }); - const firstAttachmentFetch = vi.fn(async () => Buffer.from("first-image")); - const secondAttachmentFetch = vi.fn(async () => - Buffer.from("second-image"), - ); - const generateAssistantReply = vi.fn( - async (_text: string, context: any) => { - expect(context?.userAttachments).toEqual([ - expect.objectContaining({ - filename: "first.png", - promptText: expect.stringContaining("First attachment summary"), - }), - expect.objectContaining({ - filename: "second.png", - promptText: expect.stringContaining("Second cached summary"), - }), - ]); - return makeSuccessReply(); - }, - ); - - const { slackRuntime } = await createRuntime( - { - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock, - downloadFile: downloadFileMock, - completeText: completeTextMock, - }, - replyExecutor: { - generateAssistantReply, - }, - }, - }, - { - AI_VISION_MODEL: "openai/gpt-5.4", - }, - ); - - await slackRuntime.handleNewMention( - createTestThread({ - id: "slack:C_IMAGE:1700000004.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }), - createTestMessage({ - id: "1700000004.100", - text: "compare these screenshots", - threadId: "slack:C_IMAGE:1700000004.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - attachments: [ - { - type: "image", - mimeType: "image/png", - name: "first.png", - fetchData: firstAttachmentFetch, - }, - { - type: "image", - mimeType: "image/png", - name: "second.png", - fetchData: secondAttachmentFetch, - }, - ], - }), - { - destination: createTestDestination( - createTestThread({ - id: "slack:C_IMAGE:1700000004.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }), - ), - }, - ); - - expect(downloadFileMock).toHaveBeenCalledTimes(2); - expect(completeTextMock).toHaveBeenCalledTimes(3); - expect(firstAttachmentFetch).toHaveBeenCalledTimes(1); - expect(secondAttachmentFetch).not.toHaveBeenCalled(); - expect(generateAssistantReply).toHaveBeenCalledTimes(1); - }); - - it("truncates inline image summaries to the cached summary limit", async () => { - listThreadRepliesMock.mockResolvedValue([]); - const longSummary = "A".repeat(550); - const completeTextMock = vi.fn(async () => ({ - text: longSummary, - message: {} as never, - })); - const generateAssistantReply = vi.fn( - async (_text: string, context: any) => { - const promptText = context?.userAttachments?.[0]?.promptText; - const summary = extractImageAttachmentSummary(promptText); - expect(summary).toBe(longSummary.slice(0, 500)); - expect(summary).toHaveLength(500); - return makeSuccessReply(); - }, - ); - - const { slackRuntime } = await createRuntime( - { - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock, - completeText: completeTextMock, - }, - replyExecutor: { - generateAssistantReply, - }, - }, - }, - { - AI_VISION_MODEL: "openai/gpt-5.4", - }, - ); - - await slackRuntime.handleNewMention( - createTestThread({ - id: "slack:C_IMAGE:1700000005.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }), - createTestMessage({ - id: "1700000005.100", - text: "summarize this screenshot", - threadId: "slack:C_IMAGE:1700000005.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - attachments: [ - { - type: "image", - mimeType: "image/png", - name: "long.png", - data: Buffer.from("image-bytes"), - }, - ], - }), - { - destination: createTestDestination( - createTestThread({ - id: "slack:C_IMAGE:1700000005.000", - state: { - conversation: { - schemaVersion: 1, - messages: [], - compactions: [], - backfill: { - completedAtMs: 1700000000000, - source: "recent_messages", - }, - processing: {}, - stats: { - estimatedContextTokens: 0, - totalMessageCount: 0, - compactedMessageCount: 0, - updatedAtMs: 1700000000000, - }, - vision: { - byFileId: {}, - }, - }, - }, - }), - ), - }, - ); - - expect(completeTextMock).toHaveBeenCalledTimes(1); - expect(generateAssistantReply).toHaveBeenCalledTimes(1); - }); - - it("includes generated files in thread.post via SDK file upload", async () => { - const generatedFile = { - data: Buffer.from("fake-png"), - filename: "generated.png", - mimeType: "image/png", - }; - - const { slackRuntime } = await createRuntime({ - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock.mockResolvedValue([]), - }, - replyExecutor: { - generateAssistantReply: async () => ({ - ...makeSuccessReply("Here is your image"), - files: [generatedFile], - }), - }, - }, - }); - - const postSpy = vi.fn().mockResolvedValue(undefined); - const thread = createTestThread({ - id: "slack:C_UPLOAD:1700000000.000", - state: {}, - }); - thread.post = postSpy as unknown as Thread["post"]; - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "1700000000.200", - text: "generate an image", - threadId: "slack:C_UPLOAD:1700000000.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - }), - { destination: createTestDestination(thread) }, - ); - - const filePost = postSpy.mock.calls.find( - (call: unknown[]) => - typeof call[0] === "object" && - call[0] !== null && - "files" in (call[0] as Record) && - Array.isArray((call[0] as { files?: unknown[] }).files) && - (call[0] as { files: unknown[] }).files.length > 0, - ); - expect(filePost).toBeDefined(); - expect( - (filePost![0] as { files: Array<{ filename: string }> }).files[0] - .filename, - ).toBe("generated.png"); - }); - - it("attaches files inline on the finalized reply post", async () => { - const { slackRuntime } = await createRuntime({ - services: { - visionContext: { - listThreadReplies: listThreadRepliesMock.mockResolvedValue([]), - }, - replyExecutor: { - generateAssistantReply: async (_text: string, _context: any) => { - return { - ...makeSuccessReply("finalized content"), - files: [ - { - data: Buffer.from("fake-png"), - filename: "generated.png", - mimeType: "image/png", - }, - ], - }; - }, - }, - }, - }); - - const postSpy = vi.fn().mockResolvedValue(undefined); - const thread = createTestThread({ - id: "slack:C_STREAM:1700000000.000", - state: {}, - }); - thread.post = postSpy as unknown as Thread["post"]; - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "1700000000.200", - text: "generate an image", - threadId: "slack:C_STREAM:1700000000.000", - isMention: true, - author: { - userId: "U-user", - userName: "user", - fullName: "User Example", - isBot: false, - isMe: false, - }, - }), - { destination: createTestDestination(thread) }, - ); - - expect(postSpy.mock.calls).toHaveLength(1); - - const filePost = postSpy.mock.calls.find( - (call: unknown[]) => - typeof call[0] === "object" && - call[0] !== null && - "files" in (call[0] as Record) && - Array.isArray((call[0] as { files?: unknown[] }).files) && - (call[0] as { files: unknown[] }).files.length > 0, - ); - expect(filePost).toBeDefined(); - const filePostArg = filePost![0] as Record; - expect(filePostArg).toHaveProperty("markdown", "finalized content"); - expect((filePostArg.files as Array<{ filename: string }>)[0].filename).toBe( - "generated.png", - ); - }); -}); diff --git a/packages/junior/tests/integration/slack-canvases.test.ts b/packages/junior/tests/integration/slack/canvas-create-tool.test.ts similarity index 98% rename from packages/junior/tests/integration/slack-canvases.test.ts rename to packages/junior/tests/integration/slack/canvas-create-tool.test.ts index c6f22f51b..3fb70eb3c 100644 --- a/packages/junior/tests/integration/slack-canvases.test.ts +++ b/packages/junior/tests/integration/slack/canvas-create-tool.test.ts @@ -4,13 +4,13 @@ import { canvasesAccessSetOk, canvasesCreateOk, filesInfoOk, -} from "../fixtures/slack/factories/api"; +} from "../../fixtures/slack/factories/api"; import { getCapturedSlackApiCalls, queueSlackApiError, queueSlackApiResponse, queueSlackRateLimit, -} from "../msw/handlers/slack-api"; +} from "../../msw/handlers/slack-api"; describe("createCanvas", () => { beforeEach(() => { diff --git a/packages/junior/tests/integration/slack-canvas-edit.test.ts b/packages/junior/tests/integration/slack/canvas-edit-tool.test.ts similarity index 98% rename from packages/junior/tests/integration/slack-canvas-edit.test.ts rename to packages/junior/tests/integration/slack/canvas-edit-tool.test.ts index 81e8be3ad..2ccbbff2d 100644 --- a/packages/junior/tests/integration/slack-canvas-edit.test.ts +++ b/packages/junior/tests/integration/slack/canvas-edit-tool.test.ts @@ -4,12 +4,15 @@ import { createSlackCanvasWriteTool, } from "@/chat/tools/slack/canvas-tools"; import type { ToolState } from "@/chat/tools/types"; -import { canvasesEditOk, filesInfoOk } from "../fixtures/slack/factories/api"; +import { + canvasesEditOk, + filesInfoOk, +} from "../../fixtures/slack/factories/api"; import { getCapturedSlackApiCalls, queueSlackApiResponse, queueSlackPrivateFileDownload, -} from "../msw/handlers/slack-api"; +} from "../../msw/handlers/slack-api"; function createState( options: { diff --git a/packages/junior/tests/integration/slack/canvas-failure-recovery-behavior.test.ts b/packages/junior/tests/integration/slack/canvas-failure-recovery-behavior.test.ts index 512a7af65..f6c437a61 100644 --- a/packages/junior/tests/integration/slack/canvas-failure-recovery-behavior.test.ts +++ b/packages/junior/tests/integration/slack/canvas-failure-recovery-behavior.test.ts @@ -1,24 +1,12 @@ import { describe, expect, it, vi } from "vitest"; import type { ReplyRequestContext } from "@/chat/respond"; import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { toPostedText } from "../../fixtures/slack/posts"; import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; - -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - return String(value); -} +} from "../../fixtures/slack/harness"; describe("Slack behavior: canvas failure recovery", () => { it("points to a created canvas when reply generation fails before final text", async () => { @@ -40,10 +28,8 @@ describe("Slack behavior: canvas failure recovery", () => { }, ); const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - }, + adapters: { + generateAssistantReply, }, }); const thread = createTestThread({ @@ -78,10 +64,8 @@ describe("Slack behavior: canvas failure recovery", () => { throw new Error("forced unrelated failure"); }); const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - }, + adapters: { + generateAssistantReply, }, }); const thread = createTestThread({ diff --git a/packages/junior/tests/integration/slack-canvas-read.test.ts b/packages/junior/tests/integration/slack/canvas-read-tool.test.ts similarity index 98% rename from packages/junior/tests/integration/slack-canvas-read.test.ts rename to packages/junior/tests/integration/slack/canvas-read-tool.test.ts index 5cde24ace..95d1e02c2 100644 --- a/packages/junior/tests/integration/slack-canvas-read.test.ts +++ b/packages/junior/tests/integration/slack/canvas-read-tool.test.ts @@ -1,12 +1,12 @@ import { beforeEach, describe, expect, it } from "vitest"; import { createSlackCanvasReadTool } from "@/chat/tools/slack/canvas-tools"; -import { filesInfoOk } from "../fixtures/slack/factories/api"; +import { filesInfoOk } from "../../fixtures/slack/factories/api"; import { getCapturedSlackApiCalls, queueSlackApiError, queueSlackApiResponse, queueSlackPrivateFileDownload, -} from "../msw/handlers/slack-api"; +} from "../../msw/handlers/slack-api"; describe("createSlackCanvasReadTool", () => { beforeEach(() => { diff --git a/packages/junior/tests/integration/slack-channel-tools.test.ts b/packages/junior/tests/integration/slack/channel-tools.test.ts similarity index 80% rename from packages/junior/tests/integration/slack-channel-tools.test.ts rename to packages/junior/tests/integration/slack/channel-tools.test.ts index 6fe6629ea..cb4e9be5f 100644 --- a/packages/junior/tests/integration/slack-channel-tools.test.ts +++ b/packages/junior/tests/integration/slack/channel-tools.test.ts @@ -2,70 +2,34 @@ import { describe, expect, it } from "vitest"; import { createSlackChannelListMessagesTool } from "@/chat/tools/slack/channel-list-messages"; import { createSlackChannelPostMessageTool } from "@/chat/tools/slack/channel-post-message"; import { createSlackMessageAddReactionTool } from "@/chat/tools/slack/message-add-reaction"; -import type { SlackToolContext } from "@/chat/tools/slack/context"; -import type { ToolState } from "@/chat/tools/types"; +import { + createTestToolRuntimeContext, + createTestToolState, + executeTestTool, + type TestToolRuntimeOverrides, +} from "../../fixtures/tool-runtime"; import { chatGetPermalinkOk, chatPostMessageOk, conversationsHistoryPage, reactionsAddOk, -} from "../fixtures/slack/factories/api"; +} from "../../fixtures/slack/factories/api"; import { getCapturedSlackApiCalls, queueSlackApiError, queueSlackApiResponse, -} from "../msw/handlers/slack-api"; - -function createToolState(): ToolState { - const operationResultCache = new Map(); - const artifactState: Record = { - listColumnMap: {}, - }; - - return { - artifactState: artifactState as ToolState["artifactState"], - patchArtifactState: () => undefined, - getCurrentListId: () => undefined, - getOperationResult: (operationKey: string): T | undefined => - operationResultCache.get(operationKey) as T | undefined, - setOperationResult: (operationKey, result) => { - operationResultCache.set(operationKey, result); - }, - }; -} +} from "../../msw/handlers/slack-api"; function createContext( - _userText: string, - overrides: Partial = {}, -): SlackToolContext { - const sourceChannelId = overrides.sourceChannelId ?? "C123"; - const destinationChannelId = - overrides.destinationChannelId ?? sourceChannelId; - return { - destination: { - platform: "slack", - teamId: "T123", - channelId: destinationChannelId, - }, - source: { - platform: "slack", - teamId: "T123", - channelId: sourceChannelId, - messageTs: "1700000000.321", - }, - destinationChannelId, + userText: string, + overrides: TestToolRuntimeOverrides = {}, +) { + return createTestToolRuntimeContext({ + channelId: "C123", messageTs: "1700000000.321", - sourceChannelId, - teamId: "T123", + userText, ...overrides, - }; -} - -async function executeTool(tool: any, input: TInput) { - if (typeof tool?.execute !== "function") { - throw new Error("tool execute function missing"); - } - return await tool.execute(input, {} as any); + }); } describe("slack channel tools", () => { @@ -83,9 +47,9 @@ describe("slack channel tools", () => { }); const tool = createSlackChannelPostMessageTool( createContext("summarize this thread"), - createToolState(), + createTestToolState(), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { text: "Posting this update", }); @@ -119,11 +83,11 @@ describe("slack channel tools", () => { }), }); - await executeTool( - createSlackChannelPostMessageTool(context, createToolState()), + await executeTestTool( + createSlackChannelPostMessageTool(context, createTestToolState()), { text: "Shared update" }, ); - await executeTool(createSlackChannelListMessagesTool(context), { + await executeTestTool(createSlackChannelListMessagesTool(context), { limit: 10, }); @@ -154,13 +118,13 @@ describe("slack channel tools", () => { }); const tool = createSlackChannelPostMessageTool( createContext("please post this in #eng channel"), - createToolState(), + createTestToolState(), ); - const first = await executeTool(tool, { + const first = await executeTestTool(tool, { text: "Incident resolved.", }); - const second = await executeTool(tool, { + const second = await executeTestTool(tool, { text: "Incident resolved.", }); @@ -193,7 +157,7 @@ describe("slack channel tools", () => { createContext("list channel messages"), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { limit: 150, oldest: "1690000000.000", latest: "1710000000.000", @@ -233,10 +197,10 @@ describe("slack channel tools", () => { }); const tool = createSlackChannelPostMessageTool( createContext("please post this in #eng channel"), - createToolState(), + createTestToolState(), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { text: "Heads-up update", }); @@ -266,7 +230,7 @@ describe("slack channel tools", () => { createContext("list channel messages"), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { limit: 2, max_pages: 3, }); @@ -301,7 +265,7 @@ describe("slack channel tools", () => { createContext("list channel messages"), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { cursor: "expired-cursor", limit: 10, }); @@ -326,10 +290,10 @@ describe("slack channel tools", () => { }); const tool = createSlackMessageAddReactionTool( createContext("yep"), - createToolState(), + createTestToolState(), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { emoji: ":wave:", }); @@ -354,10 +318,10 @@ describe("slack channel tools", () => { }); const tool = createSlackMessageAddReactionTool( createContext("yep"), - createToolState(), + createTestToolState(), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { emoji: ":wave:", }); @@ -376,10 +340,10 @@ describe("slack channel tools", () => { }); const tool = createSlackMessageAddReactionTool( createContext("yep"), - createToolState(), + createTestToolState(), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { emoji: ":thumbsup::skin-tone-6:", }); @@ -400,13 +364,13 @@ describe("slack channel tools", () => { }); const tool = createSlackMessageAddReactionTool( createContext("ack"), - createToolState(), + createTestToolState(), ); - const first = await executeTool(tool, { + const first = await executeTestTool(tool, { emoji: "thumbsup", }); - const second = await executeTool(tool, { + const second = await executeTestTool(tool, { emoji: "thumbsup", }); diff --git a/packages/junior/tests/integration/slack/context-compaction-behavior.test.ts b/packages/junior/tests/integration/slack/context-compaction-behavior.test.ts new file mode 100644 index 000000000..180e0cc68 --- /dev/null +++ b/packages/junior/tests/integration/slack/context-compaction-behavior.test.ts @@ -0,0 +1,191 @@ +import { afterEach, describe, expect, it } from "vitest"; +import type { PiMessage } from "@/chat/pi/messages"; +import { persistThreadState } from "@/chat/runtime/thread-state"; +import { coerceThreadConversationState } from "@/chat/state/conversation"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { commitMessages } from "@/chat/state/session-log"; +import { upsertAgentTurnSessionRecord } from "@/chat/state/turn-session"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { + createTestDestination, + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +interface RuntimeCall { + piMessages?: PiMessage[]; +} + +describe("Slack behavior: context compaction", () => { + afterEach(async () => { + await disconnectStateAdapter(); + }); + + it("auto compacts oversized reusable Pi history before the next turn", async () => { + const calls: RuntimeCall[] = []; + const priorMessages: PiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "\nbootstrap instructions that must be replaced after compaction\n", + }, + { type: "text", text: "old context ".repeat(5_000) }, + ], + timestamp: 1, + }, + { + role: "assistant", + content: [{ type: "text", text: "old answer ".repeat(1_000) }], + timestamp: 2, + }, + ] as PiMessage[]; + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005005.000" }); + await commitMessages({ + conversationId: thread.id, + messages: priorMessages, + ttlMs: 60_000, + }); + const conversation = coerceThreadConversationState({}); + await persistThreadState(thread, { conversation }); + + const { slackAdapter, slackRuntime } = createTestChatRuntime({ + adapters: { + compactConversationText: async () => + ({ + text: "Compacted summary: old context is still relevant.", + }) as never, + autoCompactionTriggerTokens: 100, + generateAssistantReply: async (_prompt, context) => { + calls.push({ + piMessages: context?.piMessages, + }); + return successfulAssistantReply("Done."); + }, + }, + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "m-content-auto-compact", + text: "<@U_APP> continue", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }), + { destination: createTestDestination(thread) }, + ); + + expect(calls).toHaveLength(1); + const compactingStatusIndex = slackAdapter.statusCalls.findIndex((call) => + call.loadingMessages?.includes("Compacting context"), + ); + expect(compactingStatusIndex).toBeGreaterThanOrEqual(0); + expect( + slackAdapter.statusCalls.findIndex( + (call, index) => + index > compactingStatusIndex && + Boolean(call.text) && + !call.loadingMessages?.includes("Compacting context"), + ), + ).toBeGreaterThan(compactingStatusIndex); + expect(calls[0]?.piMessages?.length).toBeLessThan(priorMessages.length + 1); + expect(JSON.stringify(calls[0]?.piMessages)).toContain( + "Context handoff summary", + ); + expect(JSON.stringify(calls[0]?.piMessages)).toContain( + "old context is still relevant", + ); + expect(JSON.stringify(calls[0]?.piMessages)).not.toContain( + "bootstrap instructions", + ); + expect(JSON.stringify(calls[0]?.piMessages)).not.toContain( + "", + ); + }); + + it("keeps active-turn Pi history instead of compacting older completed history", async () => { + const calls: RuntimeCall[] = []; + const activeMessages: PiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "\nstale active turn bootstrap\n", + }, + { type: "text", text: "active session record tool context" }, + ], + timestamp: 3, + }, + ] as PiMessage[]; + const expectedActiveMessages: PiMessage[] = [ + { + role: "user", + content: [{ type: "text", text: "active session record tool context" }], + timestamp: 3, + }, + ] as PiMessage[]; + const priorMessages: PiMessage[] = [ + { + role: "user", + content: [{ type: "text", text: "older context ".repeat(5_000) }], + timestamp: 1, + }, + { + role: "assistant", + content: [{ type: "text", text: "older answer ".repeat(1_000) }], + timestamp: 2, + }, + ] as PiMessage[]; + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005006.000" }); + await commitMessages({ + conversationId: thread.id, + messages: priorMessages, + ttlMs: 60_000, + }); + await upsertAgentTurnSessionRecord({ + conversationId: thread.id, + sessionId: "turn-active-crashed", + sliceId: 1, + state: "running", + piMessages: activeMessages, + }); + const conversation = coerceThreadConversationState({}); + conversation.processing.activeTurnId = "turn-active-crashed"; + await persistThreadState(thread, { conversation }); + + const { slackRuntime } = createTestChatRuntime({ + adapters: { + compactConversationText: async () => { + throw new Error("active session record history should not compact"); + }, + autoCompactionTriggerTokens: 100, + generateAssistantReply: async (_prompt, context) => { + calls.push({ + piMessages: context?.piMessages, + }); + return successfulAssistantReply("Done."); + }, + }, + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "m-content-active-session-record", + text: "<@U_APP> continue", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }), + { destination: createTestDestination(thread) }, + ); + + expect(calls).toHaveLength(1); + expect(calls[0]?.piMessages).toEqual(expectedActiveMessages); + }); +}); diff --git a/packages/junior/tests/integration/slack/conversation-turn-steering-behavior.test.ts b/packages/junior/tests/integration/slack/conversation-turn-steering-behavior.test.ts index ea2a60cdd..6ae569070 100644 --- a/packages/junior/tests/integration/slack/conversation-turn-steering-behavior.test.ts +++ b/packages/junior/tests/integration/slack/conversation-turn-steering-behavior.test.ts @@ -9,10 +9,10 @@ import { slackEnvelope, slackWebhookRequest, } from "../../fixtures/conversation-work"; -import { slackApiOutbox } from "../../fixtures/slack-api-outbox"; +import { slackApiOutbox } from "../../fixtures/slack/api-outbox"; import { resetSlackApiMockState } from "../../msw/handlers/slack-api"; import { createSlackRuntime } from "@/chat/app/factory"; -import type { JuniorRuntimeServiceOverrides } from "@/chat/app/services"; +import type { JuniorRuntimeScenarioAdapters } from "@/chat/app/services"; import type { ReplyExecutorServices } from "@/chat/runtime/reply-executor"; import type { ReplySteeringMessage } from "@/chat/respond"; import { createJuniorSlackAdapter } from "@/chat/slack/adapter"; @@ -78,8 +78,8 @@ function reactionTargetsByName(name: string) { } type CompleteObjectOverride = NonNullable< - JuniorRuntimeServiceOverrides["subscribedReplyPolicy"] ->["completeObject"]; + JuniorRuntimeScenarioAdapters["classifySubscribedReply"] +>; interface RouterDecision { confidence: number; @@ -103,7 +103,6 @@ function completeObjectWithDecision( function createTurnHarness(args: { completeObject?: CompleteObjectOverride; generateAssistantReply: ReplyExecutorServices["generateAssistantReply"]; - services?: Parameters[0]["services"]; state: StateAdapter; }) { const queue = createConversationWorkQueueTestAdapter(); @@ -114,21 +113,11 @@ function createTurnHarness(args: { }); const runtime = createSlackRuntime({ getSlackAdapter: () => adapter, - services: { - ...(args.services ?? {}), - replyExecutor: { - ...(args.services?.replyExecutor ?? {}), - generateAssistantReply: args.generateAssistantReply, - }, - subscribedReplyPolicy: { - completeObject: - args.completeObject ?? - completeObjectWithDecision(() => ({ - should_reply: true, - confidence: 1, - reason: "steering follow-up", - })), - }, + adapters: { + ...(args.completeObject + ? { classifySubscribedReply: args.completeObject } + : {}), + generateAssistantReply: args.generateAssistantReply, }, }); const services = { diff --git a/packages/junior/tests/integration/slack/file-delivery-behavior.test.ts b/packages/junior/tests/integration/slack/file-delivery-behavior.test.ts index a1caf5743..81d6590ef 100644 --- a/packages/junior/tests/integration/slack/file-delivery-behavior.test.ts +++ b/packages/junior/tests/integration/slack/file-delivery-behavior.test.ts @@ -1,52 +1,39 @@ -import { describe, expect, it } from "vitest"; +import { Buffer } from "node:buffer"; +import { describe, expect, it, vi } from "vitest"; +import type { Thread } from "chat"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { postedText } from "../../fixtures/slack/behavior"; import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; +} from "../../fixtures/slack/harness"; -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - - return String(value); +function findFilePost(calls: unknown[][]): unknown[] | undefined { + return calls.find( + (call) => + typeof call[0] === "object" && + call[0] !== null && + "files" in (call[0] as Record) && + Array.isArray((call[0] as { files?: unknown[] }).files) && + (call[0] as { files: unknown[] }).files.length > 0, + ); } describe("Slack behavior: file delivery", () => { it("ignores file followup plans when the assistant reply has no files", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - await context?.onTextDelta?.("Preview is ready."); - return { - text: "Preview is ready.", - deliveryPlan: { - mode: "thread", - - postThreadText: true, - attachFiles: "followup", - }, - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + adapters: { + generateAssistantReply: async (_prompt, context) => { + await context?.onTextDelta?.("Preview is ready."); + return successfulAssistantReply("Preview is ready.", { + deliveryPlan: { + mode: "thread", + postThreadText: true, + attachFiles: "followup", + }, + }); }, }, }); @@ -64,6 +51,58 @@ describe("Slack behavior: file delivery", () => { destination: createTestDestination(thread), }); - expect(thread.posts.map(toPostedText)).toEqual(["Preview is ready."]); + expect(thread.posts.map(postedText)).toEqual(["Preview is ready."]); + }); + + it("attaches generated files inline on the finalized reply post", async () => { + const { slackRuntime } = createTestChatRuntime({ + adapters: { + generateAssistantReply: async () => { + return successfulAssistantReply("finalized content", { + files: [ + { + data: Buffer.from("fake-png"), + filename: "generated.png", + mimeType: "image/png", + }, + ], + }); + }, + }, + }); + + const postSpy = vi.fn().mockResolvedValue(undefined); + const thread = createTestThread({ + id: "slack:C_STREAM:1700000000.000", + state: {}, + }); + thread.post = postSpy as unknown as Thread["post"]; + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "1700000000.200", + text: "generate an image", + threadId: "slack:C_STREAM:1700000000.000", + isMention: true, + author: { + userId: "U-user", + userName: "user", + fullName: "User Example", + isBot: false, + isMe: false, + }, + }), + ); + + expect(postSpy.mock.calls).toHaveLength(1); + + const filePost = findFilePost(postSpy.mock.calls); + expect(filePost).toBeDefined(); + const filePostArg = filePost![0] as Record; + expect(filePostArg).toHaveProperty("markdown", "finalized content"); + expect((filePostArg.files as Array<{ filename: string }>)[0].filename).toBe( + "generated.png", + ); }); }); diff --git a/packages/junior/tests/integration/slack-file-upload.test.ts b/packages/junior/tests/integration/slack/file-upload-contract.test.ts similarity index 98% rename from packages/junior/tests/integration/slack-file-upload.test.ts rename to packages/junior/tests/integration/slack/file-upload-contract.test.ts index 3f6a90b73..d1fec65a8 100644 --- a/packages/junior/tests/integration/slack-file-upload.test.ts +++ b/packages/junior/tests/integration/slack/file-upload-contract.test.ts @@ -3,14 +3,14 @@ import { uploadFilesToThread } from "@/chat/slack/outbound"; import { filesCompleteUploadOk, filesGetUploadUrlOk, -} from "../fixtures/slack/factories/api"; +} from "../../fixtures/slack/factories/api"; import { getCapturedSlackApiCalls, getCapturedSlackFileUploadCalls, queueSlackApiError, queueSlackApiResponse, queueSlackRateLimit, -} from "../msw/handlers/slack-api"; +} from "../../msw/handlers/slack-api"; describe("uploadFilesToThread", () => { beforeEach(() => { diff --git a/packages/junior/tests/integration/slack/finalized-reply-behavior.test.ts b/packages/junior/tests/integration/slack/finalized-reply-behavior.test.ts index 44e6edc24..7585f58fc 100644 --- a/packages/junior/tests/integration/slack/finalized-reply-behavior.test.ts +++ b/packages/junior/tests/integration/slack/finalized-reply-behavior.test.ts @@ -10,29 +10,8 @@ import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; - -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - const raw = (value as { raw?: unknown }).raw; - if (typeof raw === "string") { - return raw; - } - if ("files" in value) { - return ""; - } - } - - return String(value); -} +} from "../../fixtures/slack/harness"; +import { toPostedText } from "../../fixtures/slack/posts"; function toPostedFiles(value: unknown): Array<{ filename: string }> { if ( @@ -67,16 +46,14 @@ function makeDiagnostics( describe("Slack behavior: finalized thread replies", () => { it("posts only the finalized assistant reply even when deltas were emitted", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - await context?.onTextDelta?.("Hello "); - await context?.onTextDelta?.("world"); - return { - text: "Hello world", - diagnostics: makeDiagnostics(), - }; - }, + adapters: { + generateAssistantReply: async (_prompt, context) => { + await context?.onTextDelta?.("Hello "); + await context?.onTextDelta?.("world"); + return { + text: "Hello world", + diagnostics: makeDiagnostics(), + }; }, }, }); @@ -101,17 +78,15 @@ describe("Slack behavior: finalized thread replies", () => { const finalReply = "I checked five outlets. The dominant story is the escalating US-Iran conflict."; const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - await context?.onTextDelta?.("Fetching sources now..."); - await context?.onAssistantMessageStart?.(); - await context?.onTextDelta?.(finalReply); - return { - text: finalReply, - diagnostics: makeDiagnostics({ toolCalls: ["webSearch"] }), - }; - }, + adapters: { + generateAssistantReply: async (_prompt, context) => { + await context?.onTextDelta?.("Fetching sources now..."); + await context?.onAssistantMessageStart?.(); + await context?.onTextDelta?.(finalReply); + return { + text: finalReply, + diagnostics: makeDiagnostics({ toolCalls: ["webSearch"] }), + }; }, }, }); @@ -134,14 +109,12 @@ describe("Slack behavior: finalized thread replies", () => { it("keeps file-only replies on the inline post path", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => ({ - text: "", - files: [{ data: Buffer.from("hello"), filename: "hello.txt" }], - diagnostics: makeDiagnostics(), - }), - }, + adapters: { + generateAssistantReply: async () => ({ + text: "", + files: [{ data: Buffer.from("hello"), filename: "hello.txt" }], + diagnostics: makeDiagnostics(), + }), }, }); @@ -166,19 +139,17 @@ describe("Slack behavior: finalized thread replies", () => { it("still delivers files when thread text is suppressed", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => ({ - text: "Posted it in channel.", - files: [{ data: Buffer.from("report"), filename: "report.txt" }], - deliveryPlan: { - mode: "channel_only", - postThreadText: false, - attachFiles: "inline", - }, - diagnostics: makeDiagnostics(), - }), - }, + adapters: { + generateAssistantReply: async () => ({ + text: "Posted it in channel.", + files: [{ data: Buffer.from("report"), filename: "report.txt" }], + deliveryPlan: { + mode: "channel_only", + postThreadText: false, + attachFiles: "inline", + }, + diagnostics: makeDiagnostics(), + }), }, }); @@ -203,16 +174,14 @@ describe("Slack behavior: finalized thread replies", () => { it("does not delete an ack reply when it also carries files", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => ({ - text: "ok", - files: [{ data: Buffer.from("report"), filename: "report.txt" }], - diagnostics: makeDiagnostics({ - toolCalls: ["slackMessageAddReaction"], - }), + adapters: { + generateAssistantReply: async () => ({ + text: "ok", + files: [{ data: Buffer.from("report"), filename: "report.txt" }], + diagnostics: makeDiagnostics({ + toolCalls: ["slackMessageAddReaction"], }), - }, + }), }, }); @@ -241,13 +210,11 @@ describe("Slack behavior: finalized thread replies", () => { (_, i) => `line ${i + 1}`, ).join("\n"); const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => ({ - text: longReply, - diagnostics: makeDiagnostics(), - }), - }, + adapters: { + generateAssistantReply: async () => ({ + text: longReply, + diagnostics: makeDiagnostics(), + }), }, }); @@ -277,13 +244,11 @@ describe("Slack behavior: finalized thread replies", () => { const repeated = "console.log('hello');\n".repeat(200); const longReply = `Here is the script:\n\`\`\`ts\n${repeated}\`\`\``; const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => ({ - text: longReply, - diagnostics: makeDiagnostics(), - }), - }, + adapters: { + generateAssistantReply: async () => ({ + text: longReply, + diagnostics: makeDiagnostics(), + }), }, }); @@ -314,12 +279,13 @@ describe("Slack behavior: finalized thread replies", () => { const partialEnd = "This should continue into a second post."; const longReply = `${partialStart} ${"A".repeat(slackOutputPolicy.maxInlineChars)}\n\n${partialEnd}`; const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => ({ + adapters: { + generateAssistantReply: async (_prompt, context) => { + await context?.onTextDelta?.(partialStart); + return { text: longReply, diagnostics: makeDiagnostics({ outcome: "provider_error" }), - }), + }; }, }, }); diff --git a/packages/junior/tests/integration/slack/image-cache-behavior.test.ts b/packages/junior/tests/integration/slack/image-cache-behavior.test.ts new file mode 100644 index 000000000..1ad11b2a3 --- /dev/null +++ b/packages/junior/tests/integration/slack/image-cache-behavior.test.ts @@ -0,0 +1,233 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { + createSlackImageConversationState, + createSlackImageRuntime, + resetSlackImageRuntimeEnv, +} from "../../fixtures/slack/image-runtime"; +import { + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +const listThreadRepliesMock = vi.fn(); + +describe("Slack behavior: image cache", () => { + beforeEach(() => { + listThreadRepliesMock.mockReset(); + }); + + afterEach(() => { + resetSlackImageRuntimeEnv(); + }); + + it("reuses the thread image summary instead of re-analyzing the same upload", async () => { + listThreadRepliesMock.mockResolvedValue([ + { + ts: "1700000003.100", + files: [ + { + id: "F_CUR", + mimetype: "image/png", + url_private_download: "https://files.slack.com/private/current.png", + }, + ], + }, + ]); + const downloadFileMock = vi.fn(async () => Buffer.from("downloaded-image")); + const completeTextMock = vi.fn(async () => ({ + text: "Current screenshot summary", + message: {} as never, + })); + const attachmentFetch = vi.fn(async () => Buffer.from("attachment-image")); + const generateAssistantReply = vi.fn( + async ( + _text: string, + context: + | { + userAttachments?: Array<{ + filename?: string; + mediaType?: string; + }>; + } + | undefined, + ) => { + expect(context?.userAttachments).toEqual([ + expect.objectContaining({ + mediaType: "image/png", + filename: "screen.png", + }), + ]); + return successfulAssistantReply("ok"); + }, + ); + + const { slackRuntime } = await createSlackImageRuntime( + { + adapters: { + listThreadReplies: listThreadRepliesMock, + downloadSlackFile: downloadFileMock, + describeImagesText: completeTextMock, + generateAssistantReply, + }, + }, + { + AI_VISION_MODEL: "openai/gpt-5.4", + }, + ); + + await slackRuntime.handleNewMention( + createTestThread({ + id: "slack:C_IMAGE:1700000003.000", + state: createSlackImageConversationState(), + }), + createTestMessage({ + id: "1700000003.100", + text: "explain this screenshot", + threadId: "slack:C_IMAGE:1700000003.000", + isMention: true, + author: { + userId: "U-user", + userName: "user", + fullName: "User Example", + isBot: false, + isMe: false, + }, + attachments: [ + { + type: "image", + mimeType: "image/png", + name: "screen.png", + fetchData: attachmentFetch, + }, + ], + }), + ); + + expect(downloadFileMock).toHaveBeenCalledTimes(1); + expect(completeTextMock).toHaveBeenCalledTimes(1); + expect(attachmentFetch).not.toHaveBeenCalled(); + expect(generateAssistantReply).toHaveBeenCalledTimes(1); + }); + + it("keeps cached image summaries aligned with attachment positions", async () => { + listThreadRepliesMock.mockResolvedValue([ + { + ts: "1700000004.100", + files: [ + { + id: "F_MISSING", + mimetype: "image/png", + url_private_download: "https://files.slack.com/private/missing.png", + }, + { + id: "F_CACHED", + mimetype: "image/png", + url_private_download: "https://files.slack.com/private/cached.png", + }, + ], + }, + ]); + const downloadFileMock = vi.fn(async () => Buffer.from("downloaded-image")); + let completeTextCallCount = 0; + const completeTextMock = vi.fn(async () => { + completeTextCallCount += 1; + if (completeTextCallCount === 1) { + return { + text: "", + message: {} as never, + }; + } + if (completeTextCallCount === 2) { + return { + text: "Second cached summary", + message: {} as never, + }; + } + return { + text: "First attachment summary", + message: {} as never, + }; + }); + const firstAttachmentFetch = vi.fn(async () => Buffer.from("first-image")); + const secondAttachmentFetch = vi.fn(async () => + Buffer.from("second-image"), + ); + const generateAssistantReply = vi.fn( + async ( + _text: string, + context: + | { + userAttachments?: Array<{ + filename?: string; + }>; + } + | undefined, + ) => { + expect(context?.userAttachments).toEqual([ + expect.objectContaining({ + filename: "first.png", + }), + expect.objectContaining({ + filename: "second.png", + }), + ]); + return successfulAssistantReply("ok"); + }, + ); + + const { slackRuntime } = await createSlackImageRuntime( + { + adapters: { + listThreadReplies: listThreadRepliesMock, + downloadSlackFile: downloadFileMock, + describeImagesText: completeTextMock, + generateAssistantReply, + }, + }, + { + AI_VISION_MODEL: "openai/gpt-5.4", + }, + ); + + await slackRuntime.handleNewMention( + createTestThread({ + id: "slack:C_IMAGE:1700000004.000", + state: createSlackImageConversationState(), + }), + createTestMessage({ + id: "1700000004.100", + text: "compare these screenshots", + threadId: "slack:C_IMAGE:1700000004.000", + isMention: true, + author: { + userId: "U-user", + userName: "user", + fullName: "User Example", + isBot: false, + isMe: false, + }, + attachments: [ + { + type: "image", + mimeType: "image/png", + name: "first.png", + fetchData: firstAttachmentFetch, + }, + { + type: "image", + mimeType: "image/png", + name: "second.png", + fetchData: secondAttachmentFetch, + }, + ], + }), + ); + + expect(downloadFileMock).toHaveBeenCalledTimes(2); + expect(completeTextMock).toHaveBeenCalledTimes(3); + expect(firstAttachmentFetch).toHaveBeenCalledTimes(1); + expect(secondAttachmentFetch).not.toHaveBeenCalled(); + expect(generateAssistantReply).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/junior/tests/integration/slack/image-hydration-behavior.test.ts b/packages/junior/tests/integration/slack/image-hydration-behavior.test.ts new file mode 100644 index 000000000..ea8adbe4a --- /dev/null +++ b/packages/junior/tests/integration/slack/image-hydration-behavior.test.ts @@ -0,0 +1,358 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { + createSlackImageConversationState, + createSlackImageRuntime, + resetSlackImageRuntimeEnv, +} from "../../fixtures/slack/image-runtime"; +import { + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +const listThreadRepliesMock = vi.fn(); + +describe("Slack behavior: image hydration", () => { + beforeEach(() => { + listThreadRepliesMock.mockReset(); + }); + + afterEach(() => { + resetSlackImageRuntimeEnv(); + }); + + it("does not hydrate thread images when AI_VISION_MODEL is unset", async () => { + const { slackRuntime } = await createSlackImageRuntime({ + adapters: { + listThreadReplies: listThreadRepliesMock, + generateAssistantReply: async () => successfulAssistantReply("ok"), + }, + }); + const thread = createTestThread({ + id: "slack:C_IMAGE:1700000001.000", + state: createSlackImageConversationState(), + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "1700000001.200", + text: "", + threadId: "slack:C_IMAGE:1700000001.000", + isMention: true, + author: { + userId: "U-user", + userName: "user", + fullName: "User Example", + isBot: false, + isMe: false, + }, + attachments: [ + { + type: "image", + mimeType: "image/png", + name: "screen.png", + data: Buffer.from("fake-image"), + }, + ], + }), + ); + + expect(listThreadRepliesMock).not.toHaveBeenCalled(); + const persistedState = thread.getState() as { + conversation: { + messages: Array<{ + author?: { + isBot?: boolean; + }; + text: string; + meta?: { + attachmentCount?: number; + imageAttachmentCount?: number; + imagesHydrated?: boolean; + slackTs?: string; + }; + }>; + vision: { + backfillCompletedAtMs?: number; + }; + }; + }; + expect( + persistedState.conversation.vision.backfillCompletedAtMs, + ).toBeUndefined(); + const persistedMessage = persistedState.conversation.messages.find( + (entry) => entry.meta?.slackTs === "1700000001.200", + ); + expect(persistedMessage).toMatchObject({ + author: { + isBot: false, + }, + text: "[non-text message]", + meta: { + attachmentCount: 1, + imageAttachmentCount: 1, + imagesHydrated: false, + slackTs: "1700000001.200", + }, + }); + }); + + it("backfills older image messages after vision is enabled later", async () => { + const firstRuntime = await createSlackImageRuntime({ + adapters: { + listThreadReplies: listThreadRepliesMock, + generateAssistantReply: async () => successfulAssistantReply("ok"), + }, + }); + const firstThread = createTestThread({ + id: "slack:C_IMAGE:1700000002.000", + state: createSlackImageConversationState(), + }); + + await firstRuntime.slackRuntime.handleNewMention( + firstThread, + createTestMessage({ + id: "1700000002.100", + text: "what is in this screenshot?", + threadId: "slack:C_IMAGE:1700000002.000", + isMention: true, + author: { + userId: "U-user", + userName: "user", + fullName: "User Example", + isBot: false, + isMe: false, + }, + attachments: [ + { + type: "image", + mimeType: "image/png", + name: "screen.png", + data: Buffer.from("fake-image"), + }, + ], + }), + ); + + listThreadRepliesMock.mockResolvedValue([ + { + ts: "1700000002.100", + files: [ + { + id: "F_OLD", + mimetype: "image/png", + url_private_download: "https://files.slack.com/private/old.png", + }, + ], + }, + ]); + const downloadFileMock = vi.fn(async () => Buffer.from("downloaded-image")); + const completeTextMock = vi.fn(async () => ({ + text: "Recovered screenshot context", + message: {} as never, + })); + + const secondRuntime = await createSlackImageRuntime( + { + adapters: { + listThreadReplies: listThreadRepliesMock, + downloadSlackFile: downloadFileMock, + describeImagesText: completeTextMock, + generateAssistantReply: async () => successfulAssistantReply("ok"), + }, + }, + { + AI_VISION_MODEL: "openai/gpt-5.4", + }, + ); + const secondThread = createTestThread({ + id: "slack:C_IMAGE:1700000002.000", + state: firstThread.getState(), + }); + + await secondRuntime.slackRuntime.handleNewMention( + secondThread, + createTestMessage({ + id: "1700000002.200", + text: "follow up without new uploads", + threadId: "slack:C_IMAGE:1700000002.000", + isMention: true, + author: { + userId: "U-user", + userName: "user", + fullName: "User Example", + isBot: false, + isMe: false, + }, + }), + ); + + expect(listThreadRepliesMock).toHaveBeenCalledTimes(1); + expect(downloadFileMock).toHaveBeenCalledTimes(1); + expect(completeTextMock).toHaveBeenCalledTimes(1); + const persistedState = secondThread.getState() as { + conversation: { + messages: Array<{ + id: string; + meta?: { + imagesHydrated?: boolean; + imageFileIds?: string[]; + }; + }>; + vision: { + backfillCompletedAtMs?: number; + byFileId: Record; + }; + }; + }; + expect( + persistedState.conversation.messages.find( + (message) => message.id === "1700000002.100", + )?.meta, + ).toEqual( + expect.objectContaining({ + imagesHydrated: true, + imageFileIds: ["F_OLD"], + }), + ); + expect(persistedState.conversation.vision.byFileId.F_OLD?.summary).toBe( + "Recovered screenshot context", + ); + expect(persistedState.conversation.vision.backfillCompletedAtMs).toBeTypeOf( + "number", + ); + }); + + it("hydrates skipped passive screenshots when a later explicit mention needs them", async () => { + listThreadRepliesMock.mockResolvedValue([ + { + ts: "1700000002.100", + files: [ + { + id: "F_PASSIVE", + mimetype: "image/png", + url_private_download: "https://files.slack.com/private/passive.png", + }, + ], + }, + ]); + const downloadFileMock = vi.fn(async () => Buffer.from("downloaded-image")); + const completeTextMock = vi.fn(async () => ({ + text: "Passive screenshot summary", + message: {} as never, + })); + const generateAssistantReply = vi.fn( + async ( + _text: string, + context: { conversationContext?: string } | undefined, + ) => { + expect(context?.conversationContext).toContain( + "Passive screenshot summary", + ); + return successfulAssistantReply("ok"); + }, + ); + + const { slackRuntime } = await createSlackImageRuntime( + { + adapters: { + classifySubscribedReply: async () => { + throw new Error( + "classifier should not run for messages addressed to another bot", + ); + }, + listThreadReplies: listThreadRepliesMock, + downloadSlackFile: downloadFileMock, + describeImagesText: completeTextMock, + generateAssistantReply, + }, + }, + { + AI_VISION_MODEL: "openai/gpt-5.4", + }, + ); + const thread = createTestThread({ + id: "slack:C_IMAGE:1700000006.000", + state: createSlackImageConversationState(), + }); + + await slackRuntime.handleSubscribedMessage( + thread, + createTestMessage({ + id: "1700000002.100", + text: "@Cursor can you look at this?", + threadId: "slack:C_IMAGE:1700000006.000", + isMention: false, + author: { + userId: "U-user", + userName: "user", + fullName: "User Example", + isBot: false, + isMe: false, + }, + attachments: [ + { + type: "image", + mimeType: "image/png", + name: "passive.png", + url: "https://files.slack.com/private/passive.png", + }, + ], + }), + ); + + expect(generateAssistantReply).not.toHaveBeenCalled(); + expect(listThreadRepliesMock).not.toHaveBeenCalled(); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "1700000002.200", + text: "<@U_APP> what is in the screenshot above?", + threadId: "slack:C_IMAGE:1700000006.000", + isMention: true, + author: { + userId: "U-user", + userName: "user", + fullName: "User Example", + isBot: false, + isMe: false, + }, + }), + ); + + expect(listThreadRepliesMock).toHaveBeenCalledTimes(1); + expect(downloadFileMock).toHaveBeenCalledTimes(1); + expect(completeTextMock).toHaveBeenCalledTimes(1); + expect(generateAssistantReply).toHaveBeenCalledTimes(1); + + const persistedState = thread.getState() as { + conversation: { + messages: Array<{ + id: string; + meta?: { + imagesHydrated?: boolean; + imageFileIds?: string[]; + }; + }>; + vision: { + byFileId: Record; + }; + }; + }; + expect( + persistedState.conversation.messages.find( + (message) => message.id === "1700000002.100", + )?.meta, + ).toEqual( + expect.objectContaining({ + imagesHydrated: true, + imageFileIds: ["F_PASSIVE"], + }), + ); + expect(persistedState.conversation.vision.byFileId.F_PASSIVE?.summary).toBe( + "Passive screenshot summary", + ); + }); +}); diff --git a/packages/junior/tests/integration/slack-list-create-update.test.ts b/packages/junior/tests/integration/slack/list-create-update-tools.test.ts similarity index 65% rename from packages/junior/tests/integration/slack-list-create-update.test.ts rename to packages/junior/tests/integration/slack/list-create-update-tools.test.ts index 2700567a5..780c55bab 100644 --- a/packages/junior/tests/integration/slack-list-create-update.test.ts +++ b/packages/junior/tests/integration/slack/list-create-update-tools.test.ts @@ -1,49 +1,15 @@ import { describe, expect, it } from "vitest"; import { createSlackListCreateTool } from "@/chat/tools/slack/list-tools"; import { createSlackListUpdateItemTool } from "@/chat/tools/slack/list-tools"; -import type { ToolState } from "@/chat/tools/types"; -import { slackListsCreateOk } from "../fixtures/slack/factories/api"; +import { slackListsCreateOk } from "../../fixtures/slack/factories/api"; +import { + createTestToolState, + executeTestTool, +} from "../../fixtures/tool-runtime"; import { getCapturedSlackApiCalls, queueSlackApiResponse, -} from "../msw/handlers/slack-api"; - -function createToolState( - options: { - currentListId?: string; - listColumnMap?: { - titleColumnId?: string; - completedColumnId?: string; - assigneeColumnId?: string; - dueDateColumnId?: string; - }; - } = {}, -): ToolState { - const operationResultCache = new Map(); - const artifactState: Record = { - listColumnMap: options.listColumnMap ?? {}, - }; - - return { - artifactState: artifactState as ToolState["artifactState"], - patchArtifactState: (patch) => { - Object.assign(artifactState, patch); - }, - getCurrentListId: () => options.currentListId, - getOperationResult: (operationKey: string): T | undefined => - operationResultCache.get(operationKey) as T | undefined, - setOperationResult: (operationKey, result) => { - operationResultCache.set(operationKey, result); - }, - }; -} - -async function executeTool(tool: any, input: TInput) { - if (typeof tool?.execute !== "function") { - throw new Error("tool execute function missing"); - } - return await tool.execute(input, {} as any); -} +} from "../../msw/handlers/slack-api"; describe("slack list create/update tools", () => { it("creates a list, persists thread artifact state, and deduplicates repeated create calls", async () => { @@ -60,11 +26,11 @@ describe("slack list create/update tools", () => { }, }); - const state = createToolState(); + const state = createTestToolState(); const tool = createSlackListCreateTool(state); - const first = await executeTool(tool, { name: "Incident checklist" }); - const second = await executeTool(tool, { name: "Incident checklist" }); + const first = await executeTestTool(tool, { name: "Incident checklist" }); + const second = await executeTestTool(tool, { name: "Incident checklist" }); expect(first).toMatchObject({ ok: true, @@ -93,16 +59,18 @@ describe("slack list create/update tools", () => { body: { ok: true }, }); - const state = createToolState({ + const state = createTestToolState({ currentListId: "LIST_ABC", - listColumnMap: { - titleColumnId: "COL_TITLE", - completedColumnId: "COL_DONE", + artifactState: { + listColumnMap: { + titleColumnId: "COL_TITLE", + completedColumnId: "COL_DONE", + }, }, }); const tool = createSlackListUpdateItemTool(state); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { item_id: "ROW_77", completed: true, title: "Ship durable workflow rollout", @@ -148,14 +116,13 @@ describe("slack list create/update tools", () => { }); it("fails fast when update fields cannot be mapped to list columns", async () => { - const state = createToolState({ + const state = createTestToolState({ currentListId: "LIST_ABC", - listColumnMap: {}, }); const tool = createSlackListUpdateItemTool(state); await expect( - executeTool(tool, { + executeTestTool(tool, { item_id: "ROW_77", completed: true, }), diff --git a/packages/junior/tests/integration/slack-list-tools.test.ts b/packages/junior/tests/integration/slack/list-read-tools.test.ts similarity index 65% rename from packages/junior/tests/integration/slack-list-tools.test.ts rename to packages/junior/tests/integration/slack/list-read-tools.test.ts index 64d8db8ea..230ccee83 100644 --- a/packages/junior/tests/integration/slack-list-tools.test.ts +++ b/packages/junior/tests/integration/slack/list-read-tools.test.ts @@ -1,43 +1,19 @@ import { describe, expect, it } from "vitest"; import { createSlackListGetItemsTool } from "@/chat/tools/slack/list-tools"; -import type { ToolState } from "@/chat/tools/types"; -import { slackListsItemsListPage } from "../fixtures/slack/factories/api"; +import { slackListsItemsListPage } from "../../fixtures/slack/factories/api"; +import { + createTestToolState, + executeTestTool, +} from "../../fixtures/tool-runtime"; import { getCapturedSlackApiCalls, queueSlackApiError, queueSlackApiResponse, -} from "../msw/handlers/slack-api"; - -function createToolState(options: { currentListId?: string } = {}): ToolState { - const operationResultCache = new Map(); - const artifactState: Record = { - listColumnMap: {}, - }; - - return { - artifactState: artifactState as ToolState["artifactState"], - patchArtifactState: (patch) => { - Object.assign(artifactState, patch); - }, - getCurrentListId: () => options.currentListId, - getOperationResult: (operationKey: string): T | undefined => - operationResultCache.get(operationKey) as T | undefined, - setOperationResult: (operationKey, result) => { - operationResultCache.set(operationKey, result); - }, - }; -} - -async function executeTool(tool: any, input: TInput) { - if (typeof tool?.execute !== "function") { - throw new Error("tool execute function missing"); - } - return await tool.execute(input, {} as any); -} +} from "../../msw/handlers/slack-api"; describe("slack list tools", () => { it("does not expose model-selectable list_id in schema", () => { - const tool = createSlackListGetItemsTool(createToolState()); + const tool = createSlackListGetItemsTool(createTestToolState()); expect(tool.inputSchema).toMatchObject({ properties: { limit: expect.any(Object), @@ -50,9 +26,9 @@ describe("slack list tools", () => { }); it("returns an actionable error when list context is unavailable", async () => { - const tool = createSlackListGetItemsTool(createToolState()); + const tool = createSlackListGetItemsTool(createTestToolState()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { limit: 10, }); @@ -76,12 +52,12 @@ describe("slack list tools", () => { }), }); const tool = createSlackListGetItemsTool( - createToolState({ + createTestToolState({ currentListId: "LIST_123", }), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { limit: 2, }); @@ -114,13 +90,13 @@ describe("slack list tools", () => { provided: "chat:write", }); const tool = createSlackListGetItemsTool( - createToolState({ + createTestToolState({ currentListId: "LIST_123", }), ); await expect( - executeTool(tool, { + executeTestTool(tool, { limit: 1, }), ).rejects.toMatchObject({ diff --git a/packages/junior/tests/integration/slack/mcp-auth-runtime-direct-provider.test.ts b/packages/junior/tests/integration/slack/mcp-auth-runtime-direct-provider.test.ts new file mode 100644 index 000000000..7ec392c25 --- /dev/null +++ b/packages/junior/tests/integration/slack/mcp-auth-runtime-direct-provider.test.ts @@ -0,0 +1,140 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + EVAL_MCP_AUTH_PROVIDER, + MCP_TOOL_NAME, + assistantReplyWithContext, + createMcpAuthRuntimeSlackFixture, + priorBudgetContext, +} from "../../fixtures/mcp/auth-runtime-slack"; +import { + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("mcp auth runtime direct provider activation", () => { + beforeEach(async () => { + testbed = await createMcpAuthRuntimeSlackFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("parks and resumes an MCP auth challenge from direct provider activation", async () => { + testbed.agentProbe.directProviderSearch = true; + const threadId = "slack:C125:1700000000.003"; + const turnId = "turn_user-3"; + const generateAssistantReply = testbed.createMcpAuthReplyGenerator(); + const { slackRuntime } = testbed.chatRuntime.createTestChatRuntime({ + adapters: { + generateAssistantReply, + listThreadReplies: async () => [], + }, + }); + + const destination = { + platform: "slack" as const, + teamId: "T123", + channelId: "C125", + }; + const thread = createTestThread({ + id: threadId, + state: { + conversation: { + messages: [ + { + id: "assistant-1", + role: "assistant", + text: priorBudgetContext, + createdAtMs: 1, + author: { + userName: "junior", + isBot: true, + }, + }, + ], + }, + }, + }); + await testbed.mirrorThreadStateToAdapter(thread); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "user-3", + threadId, + text: "use eval-auth directly for the budget answer", + isMention: true, + author: { + userId: "U123", + userName: "dcramer", + }, + raw: { + channel: "C125", + team_id: "T123", + ts: "1700000000.004", + thread_ts: "1700000000.003", + }, + }), + { destination }, + ); + + const pendingCheckpoint = + await testbed.turnSessionStore.getAgentTurnSessionRecord( + threadId, + turnId, + ); + expect(pendingCheckpoint).toMatchObject({ + conversationId: threadId, + sessionId: turnId, + sliceId: 2, + state: "awaiting_resume", + resumeReason: "auth", + }); + + const pendingAuthSession = + await testbed.mcpAuthStore.getLatestMcpAuthSessionForUserProvider( + "U123", + EVAL_MCP_AUTH_PROVIDER, + ); + expect(pendingAuthSession).toMatchObject({ + provider: EVAL_MCP_AUTH_PROVIDER, + conversationId: threadId, + sessionId: turnId, + userId: "U123", + destination, + }); + + const response = await testbed.runMcpOauthCallback({ + state: pendingAuthSession!.authSessionId, + generateReply: generateAssistantReply, + }); + + expect(response.status).toBe(200); + expect(testbed.agentProbe.searchToolNames).toEqual([[MCP_TOOL_NAME]]); + + const completedCheckpoint = + await testbed.turnSessionStore.getAgentTurnSessionRecord( + threadId, + turnId, + ); + expect(completedCheckpoint).toMatchObject({ + conversationId: threadId, + sessionId: turnId, + state: "completed", + }); + + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C125", + thread_ts: "1700000000.003", + text: assistantReplyWithContext, + }), + }), + ]); + }); +}); diff --git a/packages/junior/tests/integration/slack/mcp-auth-runtime-mention-resume.test.ts b/packages/junior/tests/integration/slack/mcp-auth-runtime-mention-resume.test.ts new file mode 100644 index 000000000..c07024214 --- /dev/null +++ b/packages/junior/tests/integration/slack/mcp-auth-runtime-mention-resume.test.ts @@ -0,0 +1,265 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + EVAL_MCP_AUTH_PROVIDER, + MCP_TOOL_NAME, + assistantReplyWithContext, + createMcpAuthRuntimeSlackFixture, + expectProcessingReactionLifecycles, + priorBudgetContext, +} from "../../fixtures/mcp/auth-runtime-slack"; +import { + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("mcp auth runtime mention resume", () => { + beforeEach(async () => { + testbed = await createMcpAuthRuntimeSlackFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("parks an MCP auth challenge from the real Slack runtime and resumes after OAuth callback", async () => { + const threadId = "slack:C123:1700000000.001"; + const turnId = "turn_user-1"; + const generateAssistantReply = testbed.createMcpAuthReplyGenerator(); + const { slackRuntime } = testbed.chatRuntime.createTestChatRuntime({ + adapters: { + generateAssistantReply, + listThreadReplies: async () => [], + }, + }); + + const destination = { + platform: "slack" as const, + teamId: "T123", + channelId: "C123", + }; + const thread = createTestThread({ + id: threadId, + state: { + conversation: { + messages: [ + { + id: "assistant-1", + role: "assistant", + text: priorBudgetContext, + createdAtMs: 1, + author: { + userName: "junior", + isBot: true, + }, + }, + ], + }, + }, + }); + await testbed.mirrorThreadStateToAdapter(thread); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "user-1", + threadId, + text: "what did i say about the budget?", + isMention: true, + author: { + userId: "U123", + userName: "dcramer", + }, + raw: { + channel: "C123", + team_id: "T123", + ts: "1700000000.002", + thread_ts: "1700000000.001", + }, + }), + { destination }, + ); + + expect(getCapturedSlackApiCalls("chat.postEphemeral")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + user: "U123", + thread_ts: "1700000000.001", + text: expect.stringContaining( + "Click here to link your Eval Auth MCP access", + ), + }), + }), + ]); + expect(thread.posts).toEqual([ + expect.objectContaining({ + markdown: expect.stringContaining("I sent you a link"), + }), + ]); + expect(getCapturedSlackApiCalls("chat.postMessage")).toHaveLength(0); + expectProcessingReactionLifecycles({ + channel: "C123", + timestamp: "1700000000.002", + count: 1, + }); + + const pendingAuthSession = + await testbed.mcpAuthStore.getLatestMcpAuthSessionForUserProvider( + "U123", + EVAL_MCP_AUTH_PROVIDER, + ); + expect(pendingAuthSession).toMatchObject({ + provider: EVAL_MCP_AUTH_PROVIDER, + conversationId: threadId, + sessionId: turnId, + userId: "U123", + userMessage: "what did i say about the budget?", + channelId: "C123", + destination, + threadTs: "1700000000.001", + authorizationUrl: expect.stringContaining( + "https://eval-auth.example.test/oauth/authorize", + ), + }); + const parkedAuthSessionId = pendingAuthSession!.authSessionId; + + const pendingCheckpoint = + await testbed.turnSessionStore.getAgentTurnSessionRecord( + threadId, + turnId, + ); + expect(pendingCheckpoint).toMatchObject({ + conversationId: threadId, + sessionId: turnId, + sliceId: 2, + state: "awaiting_resume", + resumeReason: "auth", + resumedFromSliceId: 1, + }); + + const parkedState = + await testbed.threadState.getPersistedThreadState(threadId); + expect(parkedState).toMatchObject({ + conversation: { + processing: { + activeTurnId: undefined, + pendingAuth: { + kind: "mcp", + provider: EVAL_MCP_AUTH_PROVIDER, + requesterId: "U123", + sessionId: turnId, + linkSentAtMs: expect.any(Number), + }, + }, + }, + }); + + const response = await testbed.runMcpOauthCallback({ + state: pendingAuthSession!.authSessionId, + generateReply: generateAssistantReply, + }); + + expect(response.status).toBe(200); + const sessionRecordAfterAuth = + await testbed.turnSessionStore.getAgentTurnSessionRecord( + threadId, + turnId, + ); + expect(sessionRecordAfterAuth?.piMessages).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + role: "user", + content: [ + { + type: "text", + text: `MCP authorization completed for provider "${EVAL_MCP_AUTH_PROVIDER}". Continue the blocked request and retry the provider operation if needed.`, + }, + ], + }), + ]), + ); + expect(testbed.agentProbe.searchToolNames).toEqual([[MCP_TOOL_NAME]]); + + const latestReusableSession = + await testbed.mcpAuthStore.getLatestMcpAuthSessionForUserProvider( + "U123", + EVAL_MCP_AUTH_PROVIDER, + ); + expect(latestReusableSession).toMatchObject({ + provider: EVAL_MCP_AUTH_PROVIDER, + conversationId: threadId, + sessionId: turnId, + userId: "U123", + userMessage: "what did i say about the budget?", + }); + expect(latestReusableSession?.authSessionId).not.toBe(parkedAuthSessionId); + expect(latestReusableSession?.authorizationUrl).toBeUndefined(); + expect(latestReusableSession?.codeVerifier).toBeUndefined(); + expect( + await testbed.mcpAuthStore.getMcpStoredOAuthCredentials( + "U123", + EVAL_MCP_AUTH_PROVIDER, + ), + ).toMatchObject({ + tokens: { + access_token: "eval-auth-access-token", + refresh_token: "eval-auth-refresh-token", + }, + }); + + const completedCheckpoint = + await testbed.turnSessionStore.getAgentTurnSessionRecord( + threadId, + turnId, + ); + expect(completedCheckpoint).toMatchObject({ + conversationId: threadId, + sessionId: turnId, + sliceId: 2, + state: "completed", + }); + + const resumedState = + await testbed.threadState.getPersistedThreadState(threadId); + expect(resumedState).toMatchObject({ + conversation: { + processing: { + activeTurnId: undefined, + pendingAuth: undefined, + }, + messages: expect.arrayContaining([ + expect.objectContaining({ + id: "user-1", + role: "user", + meta: expect.objectContaining({ + replied: true, + }), + }), + expect.objectContaining({ + role: "assistant", + text: assistantReplyWithContext, + }), + ]), + }, + }); + + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + thread_ts: "1700000000.001", + text: assistantReplyWithContext, + }), + }), + ]); + expectProcessingReactionLifecycles({ + channel: "C123", + timestamp: "1700000000.002", + count: 2, + completedCount: 1, + }); + }); +}); diff --git a/packages/junior/tests/integration/slack/mcp-auth-runtime-subscribed-parking.test.ts b/packages/junior/tests/integration/slack/mcp-auth-runtime-subscribed-parking.test.ts new file mode 100644 index 000000000..943f16f67 --- /dev/null +++ b/packages/junior/tests/integration/slack/mcp-auth-runtime-subscribed-parking.test.ts @@ -0,0 +1,127 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + EVAL_MCP_AUTH_PROVIDER, + createMcpAuthRuntimeSlackFixture, + priorBudgetContext, +} from "../../fixtures/mcp/auth-runtime-slack"; +import { + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +let testbed: Awaited>; + +describe("mcp auth runtime subscribed parking", () => { + beforeEach(async () => { + testbed = await createMcpAuthRuntimeSlackFixture(); + }, 45_000); + + afterEach(async () => { + await testbed.cleanup(); + }, 45_000); + + it("parks a subscribed-thread MCP auth challenge with the same pending-auth state", async () => { + const threadId = "slack:C124:1700000000.002"; + const turnId = "turn_user-2"; + const generateAssistantReply = testbed.createMcpAuthReplyGenerator(); + const { slackRuntime } = testbed.chatRuntime.createTestChatRuntime({ + adapters: { + generateAssistantReply, + classifySubscribedReply: async () => + ({ + object: { + should_reply: true, + confidence: 1, + reason: "requires thread follow-up", + }, + text: '{"should_reply":true,"confidence":1,"reason":"requires thread follow-up"}', + }) as never, + listThreadReplies: async () => [], + }, + }); + + const destination = { + platform: "slack" as const, + teamId: "T123", + channelId: "C124", + }; + const thread = createTestThread({ + id: threadId, + state: { + conversation: { + messages: [ + { + id: "assistant-1", + role: "assistant", + text: priorBudgetContext, + createdAtMs: 1, + author: { + userName: "junior", + isBot: true, + }, + }, + ], + }, + }, + }); + await testbed.mirrorThreadStateToAdapter(thread); + + await slackRuntime.handleSubscribedMessage( + thread, + createTestMessage({ + id: "user-2", + threadId, + text: "what did i say about the budget?", + isMention: false, + author: { + userId: "U123", + userName: "dcramer", + }, + raw: { + channel: "C124", + team_id: "T123", + ts: "1700000000.004", + thread_ts: "1700000000.002", + }, + }), + { destination }, + ); + + expect(thread.posts).toEqual([ + expect.objectContaining({ + markdown: expect.stringContaining("I sent you a link"), + }), + ]); + + const pendingCheckpoint = + await testbed.turnSessionStore.getAgentTurnSessionRecord( + threadId, + turnId, + ); + expect(pendingCheckpoint).toMatchObject({ + conversationId: threadId, + sessionId: turnId, + sliceId: 2, + state: "awaiting_resume", + resumeReason: "auth", + resumedFromSliceId: 1, + }); + + const parkedState = + await testbed.threadState.getPersistedThreadState(threadId); + expect(parkedState).toMatchObject({ + conversation: { + processing: { + activeTurnId: undefined, + pendingAuth: { + kind: "mcp", + provider: EVAL_MCP_AUTH_PROVIDER, + requesterId: "U123", + sessionId: turnId, + linkSentAtMs: expect.any(Number), + }, + }, + }, + }); + }); +}); diff --git a/packages/junior/tests/integration/slack/message-changed-behavior.test.ts b/packages/junior/tests/integration/slack/message-changed-behavior.test.ts index f92455eeb..4fc7b40ed 100644 --- a/packages/junior/tests/integration/slack/message-changed-behavior.test.ts +++ b/packages/junior/tests/integration/slack/message-changed-behavior.test.ts @@ -1,14 +1,11 @@ -import { - createTestDestination, - TEST_SLACK_TEAM_ID, -} from "../../fixtures/slack-harness"; +import { createTestDestination } from "../../fixtures/slack/harness"; import { http, HttpResponse } from "msw"; import { afterEach, describe, expect, it } from "vitest"; import { createMemoryState } from "@chat-adapter/state-memory"; import type { SlackAdapter } from "@chat-adapter/slack"; import type { Message } from "chat"; import { slackEventsApiEnvelope } from "../../fixtures/slack/factories/events"; -import { slackApiOutbox } from "../../fixtures/slack-api-outbox"; +import { slackApiOutbox } from "../../fixtures/slack/api-outbox"; import { createSlackWebhookTestClient } from "../../fixtures/slack/webhook-client"; import { mswServer } from "../../msw/server"; import { createSlackRuntime } from "@/chat/app/factory"; @@ -255,28 +252,25 @@ describe("Slack behavior: message_changed webhook ingress", () => { }); const slackRuntime = createSlackRuntime({ getSlackAdapter: () => bot.getAdapter("slack"), - services: { - replyExecutor: { - lookupSlackUser: async () => ({ + adapters: { + lookupSlackUser: async () => ({ + email: "david@example.com", + fullName: "David Cramer", + userName: "dcramer", + }), + generateAssistantReply: async (_prompt, context) => { + expect(context?.requester).toMatchObject({ email: "david@example.com", fullName: "David Cramer", + platform: "slack", + userId: "U123", userName: "dcramer", - }), - generateAssistantReply: async (_prompt, context) => { - expect(context?.requester).toEqual({ - email: "david@example.com", - fullName: "David Cramer", - platform: "slack", - teamId: TEST_SLACK_TEAM_ID, - userId: "U123", - userName: "dcramer", - }); - await context?.onTextDelta?.("Hello world"); - return { - text: "Hello world", - diagnostics: makeDiagnostics(), - }; - }, + }); + await context?.onTextDelta?.("Hello world"); + return { + text: "Hello world", + diagnostics: makeDiagnostics(), + }; }, }, }); diff --git a/packages/junior/tests/integration/slack/message-changed-reply-contract.test.ts b/packages/junior/tests/integration/slack/message-changed-reply-contract.test.ts index 90d10aa3f..3d531d718 100644 --- a/packages/junior/tests/integration/slack/message-changed-reply-contract.test.ts +++ b/packages/junior/tests/integration/slack/message-changed-reply-contract.test.ts @@ -1,9 +1,9 @@ -import { createTestDestination } from "../../fixtures/slack-harness"; +import { createTestDestination } from "../../fixtures/slack/harness"; import { describe, expect, it } from "vitest"; import { createMemoryState } from "@chat-adapter/state-memory"; import type { SlackAdapter } from "@chat-adapter/slack"; import { slackEventsApiEnvelope } from "../../fixtures/slack/factories/events"; -import { slackApiOutbox } from "../../fixtures/slack-api-outbox"; +import { slackApiOutbox } from "../../fixtures/slack/api-outbox"; import { createSlackWebhookTestClient } from "../../fixtures/slack/webhook-client"; import { createSlackRuntime } from "@/chat/app/factory"; import { JuniorChat } from "@/chat/ingress/junior-chat"; @@ -80,10 +80,8 @@ async function createEditedDmBot(args: { }); const slackRuntime = createSlackRuntime({ getSlackAdapter: () => bot.getAdapter("slack"), - services: { - replyExecutor: { - generateAssistantReply: args.generateAssistantReply, - }, + adapters: { + generateAssistantReply: args.generateAssistantReply, }, }); diff --git a/packages/junior/tests/integration/slack/message-content-behavior.test.ts b/packages/junior/tests/integration/slack/message-content-behavior.test.ts deleted file mode 100644 index e4379a194..000000000 --- a/packages/junior/tests/integration/slack/message-content-behavior.test.ts +++ /dev/null @@ -1,540 +0,0 @@ -import { afterEach, describe, expect, it } from "vitest"; -import type { PiMessage } from "@/chat/pi/messages"; -import { - getPersistedThreadState, - persistThreadState, - persistThreadStateById, -} from "@/chat/runtime/thread-state"; -import { coerceThreadConversationState } from "@/chat/state/conversation"; -import { disconnectStateAdapter } from "@/chat/state/adapter"; -import { commitMessages } from "@/chat/state/session-log"; -import { upsertAgentTurnSessionRecord } from "@/chat/state/turn-session"; -import { createTestChatRuntime } from "../../fixtures/chat-runtime"; -import { - createTestMessage, - createTestThread, - createTestDestination, -} from "../../fixtures/slack-harness"; - -interface CapturedCall { - contextConversation?: string; - piMessages?: PiMessage[]; - prompt: string; -} - -describe("Slack behavior: message content", () => { - afterEach(async () => { - await disconnectStateAdapter(); - }); - - it("strips leading Slack mention token before invoking the agent", async () => { - const calls: CapturedCall[] = []; - - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - return { - object: { - should_reply: true, - confidence: 1, - reason: "direct mention follow-up", - }, - text: '{"should_reply":true,"confidence":1,"reason":"direct mention follow-up"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async (prompt, context) => { - calls.push({ - prompt, - contextConversation: context?.conversationContext, - }); - return { - text: "Summary sent.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005000.000" }); - const message = createTestMessage({ - id: "m-content-strip", - text: "<@U_APP> please summarize the deploy status", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }); - - await slackRuntime.handleNewMention(thread, message, { - destination: createTestDestination(thread), - }); - - expect(calls).toHaveLength(1); - expect(calls[0]?.prompt).toBe("please summarize the deploy status"); - }); - - it("preserves non-leading mention tokens in user content", async () => { - const calls: CapturedCall[] = []; - - const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (prompt) => { - calls.push({ prompt }); - return { - text: "Done.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005001.000" }); - const message = createTestMessage({ - id: "m-content-preserve", - text: "<@U_APP> remind me to message <@U_ONCALL> after deploy", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }); - - await slackRuntime.handleNewMention(thread, message, { - destination: createTestDestination(thread), - }); - - expect(calls).toHaveLength(1); - expect(calls[0]?.prompt).toContain("message <@U_ONCALL> after deploy"); - }); - - it("passes legacy attachment text into the current turn prompt", async () => { - const calls: CapturedCall[] = []; - - const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (prompt, context) => { - calls.push({ - prompt, - contextConversation: context?.conversationContext, - }); - return { - text: "Alert reviewed.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005002.500" }); - const message = createTestMessage({ - id: "m-content-legacy-attachment", - text: "<@U_APP>", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - raw: { - channel: "C_BEHAVIOR", - ts: "1700005002.500", - thread_ts: "1700005002.500", - attachments: [ - { - fallback: "Deploy failed on production", - title: "Production deploy", - text: "OOM on pod-42", - fields: [{ title: "Service", value: "checkout" }], - footer: "Datadog Monitor", - }, - ], - }, - }); - - await slackRuntime.handleNewMention(thread, message, { - destination: createTestDestination(thread), - }); - - expect(calls).toHaveLength(1); - expect(calls[0]?.prompt).toContain("Production deploy"); - expect(calls[0]?.prompt).toContain("OOM on pod-42"); - expect(calls[0]?.prompt).toContain("Service: checkout"); - }); - - it("does not invoke the agent for self-authored mention messages", async () => { - let replyCalled = false; - - const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => { - replyCalled = true; - return { - text: "Should not happen", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005002.000" }); - const message = createTestMessage({ - id: "m-content-self", - text: "<@U_APP> do not respond", - isMention: true, - threadId: thread.id, - author: { - userId: "U_BOT", - isMe: true, - }, - }); - - await slackRuntime.handleNewMention(thread, message, { - destination: createTestDestination(thread), - }); - - expect(replyCalled).toBe(false); - expect(thread.posts).toHaveLength(0); - }); - - it("passes durable Pi history into the next turn", async () => { - const calls: CapturedCall[] = []; - const storedFirstTurnHistory: PiMessage[] = [ - { - role: "user", - content: [ - { - type: "text", - text: "\nold runtime facts\n", - }, - { type: "text", text: "I need the budget by Friday" }, - ], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "text", text: "First response." }], - timestamp: 2, - }, - ] as PiMessage[]; - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - return { - object: { - should_reply: true, - confidence: 1, - reason: "direct mention follow-up", - }, - text: '{"should_reply":true,"confidence":1,"reason":"direct mention follow-up"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async (prompt, context) => { - calls.push({ - prompt, - contextConversation: context?.conversationContext, - piMessages: context?.piMessages, - }); - if ( - calls.length === 1 && - context?.correlation?.conversationId && - context.correlation.turnId - ) { - await upsertAgentTurnSessionRecord({ - conversationId: context.correlation.conversationId, - sessionId: context.correlation.turnId, - sliceId: 1, - state: "completed", - piMessages: storedFirstTurnHistory, - }); - } - return { - text: calls.length === 1 ? "First response." : "Second response.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005003.000" }); - const first = createTestMessage({ - id: "m-content-context-1", - text: "<@U_APP> I need the budget by Friday", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }); - const second = createTestMessage({ - id: "m-content-context-2", - text: "<@U_APP> what did I just ask?", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }); - - await slackRuntime.handleNewMention(thread, first, { - destination: createTestDestination(thread), - }); - - const persistedState = await getPersistedThreadState(thread.id); - const conversation = coerceThreadConversationState(persistedState); - conversation.processing.activeTurnId = "missing-active-turn"; - await persistThreadStateById(thread.id, { conversation }); - - await slackRuntime.handleSubscribedMessage(thread, second, { - destination: createTestDestination(thread), - }); - - expect(calls).toHaveLength(2); - expect(calls[1]?.contextConversation ?? "").toContain("budget by Friday"); - expect(calls[1]?.piMessages).toEqual(storedFirstTurnHistory); - }); - - it("auto compacts oversized reusable Pi history before the next turn", async () => { - const calls: CapturedCall[] = []; - const priorMessages: PiMessage[] = [ - { - role: "user", - content: [ - { - type: "text", - text: "\nbootstrap instructions that must be replaced after compaction\n", - }, - { type: "text", text: "old context ".repeat(5_000) }, - ], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "text", text: "old answer ".repeat(1_000) }], - timestamp: 2, - }, - ] as PiMessage[]; - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005005.000" }); - await commitMessages({ - conversationId: thread.id, - messages: priorMessages, - ttlMs: 60_000, - }); - const conversation = coerceThreadConversationState({}); - await persistThreadState(thread, { conversation }); - - const { slackAdapter, slackRuntime } = createTestChatRuntime({ - services: { - contextCompactor: { - completeText: async () => - ({ - text: "Compacted summary: old context is still relevant.", - }) as never, - autoCompactionTriggerTokens: 100, - }, - replyExecutor: { - generateAssistantReply: async (prompt, context) => { - calls.push({ - prompt, - contextConversation: context?.conversationContext, - piMessages: context?.piMessages, - }); - return { - text: "Done.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-content-auto-compact", - text: "<@U_APP> continue", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - - expect(calls).toHaveLength(1); - const compactingStatusIndex = slackAdapter.statusCalls.findIndex((call) => - call.loadingMessages?.includes("Compacting context"), - ); - expect(compactingStatusIndex).toBeGreaterThanOrEqual(0); - expect( - slackAdapter.statusCalls.findIndex( - (call, index) => - index > compactingStatusIndex && - Boolean(call.text) && - !call.loadingMessages?.includes("Compacting context"), - ), - ).toBeGreaterThan(compactingStatusIndex); - expect(calls[0]?.piMessages?.length).toBeLessThan(priorMessages.length + 1); - expect(JSON.stringify(calls[0]?.piMessages)).toContain( - "Context handoff summary", - ); - expect(JSON.stringify(calls[0]?.piMessages)).toContain( - "old context is still relevant", - ); - expect(JSON.stringify(calls[0]?.piMessages)).not.toContain( - "bootstrap instructions", - ); - expect(JSON.stringify(calls[0]?.piMessages)).not.toContain( - "", - ); - }); - - it("keeps active-turn Pi history instead of compacting older completed history", async () => { - const calls: CapturedCall[] = []; - const activeMessages: PiMessage[] = [ - { - role: "user", - content: [ - { - type: "text", - text: "\nstale active turn bootstrap\n", - }, - { type: "text", text: "active session record tool context" }, - ], - timestamp: 3, - }, - ] as PiMessage[]; - const expectedActiveMessages: PiMessage[] = [ - { - role: "user", - content: [{ type: "text", text: "active session record tool context" }], - timestamp: 3, - }, - ] as PiMessage[]; - const priorMessages: PiMessage[] = [ - { - role: "user", - content: [{ type: "text", text: "older context ".repeat(5_000) }], - timestamp: 1, - }, - { - role: "assistant", - content: [{ type: "text", text: "older answer ".repeat(1_000) }], - timestamp: 2, - }, - ] as PiMessage[]; - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005006.000" }); - await commitMessages({ - conversationId: thread.id, - messages: priorMessages, - ttlMs: 60_000, - }); - await upsertAgentTurnSessionRecord({ - conversationId: thread.id, - sessionId: "turn-active-crashed", - sliceId: 1, - state: "running", - piMessages: activeMessages, - }); - const conversation = coerceThreadConversationState({}); - conversation.processing.activeTurnId = "turn-active-crashed"; - await persistThreadState(thread, { conversation }); - - const { slackRuntime } = createTestChatRuntime({ - services: { - contextCompactor: { - completeText: async () => { - throw new Error("active session record history should not compact"); - }, - autoCompactionTriggerTokens: 100, - }, - replyExecutor: { - generateAssistantReply: async (prompt, context) => { - calls.push({ - prompt, - contextConversation: context?.conversationContext, - piMessages: context?.piMessages, - }); - return { - text: "Done.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-content-active-session-record", - text: "<@U_APP> continue", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - - expect(calls).toHaveLength(1); - expect(calls[0]?.piMessages).toEqual(expectedActiveMessages); - }); -}); diff --git a/packages/junior/tests/integration/slack/message-im-attachment-contract.test.ts b/packages/junior/tests/integration/slack/message-im-attachment-contract.test.ts index 7cd3af50b..1e6b75306 100644 --- a/packages/junior/tests/integration/slack/message-im-attachment-contract.test.ts +++ b/packages/junior/tests/integration/slack/message-im-attachment-contract.test.ts @@ -1,4 +1,4 @@ -import { createTestDestination } from "../../fixtures/slack-harness"; +import { createTestDestination } from "../../fixtures/slack/harness"; import { http, HttpResponse } from "msw"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { createMemoryState } from "@chat-adapter/state-memory"; @@ -29,7 +29,7 @@ function makeDiagnostics() { } async function createDirectMessageBot(args: { - completeText: () => Promise<{ text: string; message: never }>; + describeImagesText: () => Promise<{ text: string; message: never }>; generateAssistantReply: ReplyExecutorServices["generateAssistantReply"]; }) { const [{ createSlackRuntime }, { JuniorChat }, { createJuniorSlackAdapter }] = @@ -51,13 +51,9 @@ async function createDirectMessageBot(args: { }); const slackRuntime = createSlackRuntime({ getSlackAdapter: () => bot.getAdapter("slack"), - services: { - visionContext: { - completeText: args.completeText, - }, - replyExecutor: { - generateAssistantReply: args.generateAssistantReply, - }, + adapters: { + describeImagesText: args.describeImagesText, + generateAssistantReply: args.generateAssistantReply, }, }); @@ -98,7 +94,7 @@ describe("Slack contract: message.im attachment ingress", () => { const capturedAttachmentMediaTypes: string[][] = []; const capturedAttachmentNames: string[][] = []; const bot = await createDirectMessageBot({ - completeText: async () => ({ + describeImagesText: async () => ({ text: "Screenshot shows the current incident chart.", message: {} as never, }), @@ -155,5 +151,5 @@ describe("Slack contract: message.im attachment ingress", () => { expect(capturedAttachmentMediaTypes).toEqual([["image/png"]]); expect(capturedAttachmentNames).toEqual([["current.png"]]); - }, 20_000); + }); }); diff --git a/packages/junior/tests/integration/slack/message-normalization-behavior.test.ts b/packages/junior/tests/integration/slack/message-normalization-behavior.test.ts new file mode 100644 index 000000000..734790c20 --- /dev/null +++ b/packages/junior/tests/integration/slack/message-normalization-behavior.test.ts @@ -0,0 +1,169 @@ +import { afterEach, describe, expect, it } from "vitest"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { conversationMessages } from "../../fixtures/slack/behavior"; +import { + createTestDestination, + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +describe("Slack behavior: message normalization", () => { + afterEach(async () => { + await disconnectStateAdapter(); + }); + + it("strips leading Slack mention token before invoking the agent", async () => { + let replyCallCount = 0; + + const { slackRuntime } = createTestChatRuntime({ + adapters: { + classifySubscribedReply: async () => { + return { + object: { + should_reply: true, + confidence: 1, + reason: "direct mention follow-up", + }, + text: '{"should_reply":true,"confidence":1,"reason":"direct mention follow-up"}', + } as never; + }, + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply("Summary sent."); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005000.000" }); + const message = createTestMessage({ + id: "m-content-strip", + text: "<@U_APP> please summarize the deploy status", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }); + + await slackRuntime.handleNewMention(thread, message, { + destination: createTestDestination(thread), + }); + + expect(replyCallCount).toBe(1); + expect(conversationMessages(thread)[0]?.text).toBe( + "please summarize the deploy status", + ); + }); + + it("preserves non-leading mention tokens in user content", async () => { + let replyCallCount = 0; + + const { slackRuntime } = createTestChatRuntime({ + adapters: { + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply("Done."); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005001.000" }); + const message = createTestMessage({ + id: "m-content-preserve", + text: "<@U_APP> remind me to message <@U_ONCALL> after deploy", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }); + + await slackRuntime.handleNewMention(thread, message, { + destination: createTestDestination(thread), + }); + + expect(replyCallCount).toBe(1); + expect(conversationMessages(thread)[0]?.text).toContain( + "message <@U_ONCALL> after deploy", + ); + }); + + it("passes legacy attachment text into the current turn prompt", async () => { + let replyCallCount = 0; + + const { slackRuntime } = createTestChatRuntime({ + adapters: { + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply("Alert reviewed."); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005002.500" }); + const message = createTestMessage({ + id: "m-content-legacy-attachment", + text: "<@U_APP>", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + raw: { + channel: "C_BEHAVIOR", + ts: "1700005002.500", + thread_ts: "1700005002.500", + attachments: [ + { + fallback: "Deploy failed on production", + title: "Production deploy", + text: "OOM on pod-42", + fields: [{ title: "Service", value: "checkout" }], + footer: "Datadog Monitor", + }, + ], + }, + }); + + await slackRuntime.handleNewMention(thread, message, { + destination: createTestDestination(thread), + }); + + expect(replyCallCount).toBe(1); + expect(conversationMessages(thread)[0]?.text).toContain( + "Production deploy", + ); + expect(conversationMessages(thread)[0]?.text).toContain("OOM on pod-42"); + expect(conversationMessages(thread)[0]?.text).toContain( + "Service: checkout", + ); + }); + + it("does not invoke the agent for self-authored mention messages", async () => { + let replyCalled = false; + + const { slackRuntime } = createTestChatRuntime({ + adapters: { + generateAssistantReply: async () => { + replyCalled = true; + return successfulAssistantReply("Should not happen"); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005002.000" }); + const message = createTestMessage({ + id: "m-content-self", + text: "<@U_APP> do not respond", + isMention: true, + threadId: thread.id, + author: { + userId: "U_BOT", + isMe: true, + }, + }); + + await slackRuntime.handleNewMention(thread, message, { + destination: createTestDestination(thread), + }); + + expect(replyCalled).toBe(false); + expect(thread.posts).toHaveLength(0); + }); +}); diff --git a/packages/junior/tests/integration/slack-server.test.ts b/packages/junior/tests/integration/slack/msw-server-contract.test.ts similarity index 100% rename from packages/junior/tests/integration/slack-server.test.ts rename to packages/junior/tests/integration/slack/msw-server-contract.test.ts diff --git a/packages/junior/tests/integration/slack/new-mention-behavior.test.ts b/packages/junior/tests/integration/slack/new-mention-behavior.test.ts index 207f8b630..9c1189bf6 100644 --- a/packages/junior/tests/integration/slack/new-mention-behavior.test.ts +++ b/packages/junior/tests/integration/slack/new-mention-behavior.test.ts @@ -1,57 +1,33 @@ import type { Message } from "chat"; import { describe, expect, it } from "vitest"; -import { makeAssistantStatus } from "@/chat/slack/assistant-thread/status"; -import { - FakeSlackAdapter, - createTestDestination, -} from "../../fixtures/slack-harness"; import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { conversationMessages } from "../../fixtures/slack/behavior"; +import { toPostedText } from "../../fixtures/slack/posts"; import { createTestMessage, createTestThread, -} from "../../fixtures/slack-harness"; - -interface FakeReplyCall { - prompt: string; -} - -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - - return String(value); -} +} from "../../fixtures/slack/harness"; describe("Slack behavior: new mention", () => { it("handles a mention with real runtime wiring and fake agent response", async () => { - const fakeReplyCalls: FakeReplyCall[] = []; + let replyCallCount = 0; const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (prompt) => { - fakeReplyCalls.push({ prompt }); - return { - text: "Acknowledged. Rollback is complete and error rates are stable.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + adapters: { + generateAssistantReply: async () => { + replyCallCount += 1; + return { + text: "Acknowledged. Rollback is complete and error rates are stable.", + diagnostics: { + assistantMessageCount: 1, + modelId: "fake-agent-model", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + }, + }; }, }, }); @@ -70,38 +46,33 @@ describe("Slack behavior: new mention", () => { }, }); - await slackRuntime.handleNewMention(thread, message, { - destination: createTestDestination(thread), - }); + await slackRuntime.handleNewMention(thread, message); - expect(fakeReplyCalls).toHaveLength(1); - expect(fakeReplyCalls[0]?.prompt).toContain("give me a status update"); + expect(replyCallCount).toBe(1); expect(thread.subscribeCalls).toBe(1); expect(thread.posts).toHaveLength(1); expect(toPostedText(thread.posts[0])).toContain("Rollback is complete"); }); - it("includes queued SDK messages in the assistant prompt", async () => { - const fakeReplyCalls: FakeReplyCall[] = []; + it("records queued SDK messages before the latest mention", async () => { + let replyCallCount = 0; const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (prompt) => { - fakeReplyCalls.push({ prompt }); - return { - text: "Handled both updates.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + adapters: { + generateAssistantReply: async () => { + replyCallCount += 1; + return { + text: "Handled both updates.", + diagnostics: { + assistantMessageCount: 1, + modelId: "fake-agent-model", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + }, + }; }, }, }); @@ -123,26 +94,15 @@ describe("Slack behavior: new mention", () => { }); await slackRuntime.handleNewMention(thread, latest, { - destination: createTestDestination(thread), messageContext: { skipped: [queued], totalSinceLastHandler: 2, }, }); - expect(fakeReplyCalls).toHaveLength(1); - expect(fakeReplyCalls[0]?.prompt).toContain("first queued request"); - expect(fakeReplyCalls[0]?.prompt).toContain("latest request"); - expect( - fakeReplyCalls[0]?.prompt.indexOf("first queued request"), - ).toBeLessThan(fakeReplyCalls[0]?.prompt.indexOf("latest request") ?? -1); - const state = thread.getState() as { - conversation?: { - messages?: Array<{ id: string; text: string }>; - }; - }; + expect(replyCallCount).toBe(1); expect( - state.conversation?.messages + conversationMessages(thread) ?.filter( (message) => message.id === "m-queued" || message.id === "m-latest", ) @@ -160,35 +120,31 @@ describe("Slack behavior: new mention", () => { attachmentText?: string; filenames: string[]; inboundAttachmentCount?: number; - prompt: string; }> = []; const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (prompt, context) => { - const attachments = context?.userAttachments ?? []; - fakeReplyCalls.push({ - prompt, - inboundAttachmentCount: context?.inboundAttachmentCount, - filenames: attachments.map( - (attachment) => attachment.filename ?? "", - ), - attachmentText: attachments[0]?.data?.toString("utf8"), - }); - return { - text: "Handled queued attachment.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + adapters: { + generateAssistantReply: async (_prompt, context) => { + const attachments = context?.userAttachments ?? []; + fakeReplyCalls.push({ + inboundAttachmentCount: context?.inboundAttachmentCount, + filenames: attachments.map( + (attachment) => attachment.filename ?? "", + ), + attachmentText: attachments[0]?.data?.toString("utf8"), + }); + return { + text: "Handled queued attachment.", + diagnostics: { + assistantMessageCount: 1, + modelId: "fake-agent-model", + outcome: "success", + toolCalls: [], + toolErrorCount: 0, + toolResultCount: 0, + usedPrimaryText: true, + }, + }; }, }, }); @@ -218,7 +174,6 @@ describe("Slack behavior: new mention", () => { }); await slackRuntime.handleNewMention(thread, latest, { - destination: createTestDestination(thread), messageContext: { skipped: [queued], totalSinceLastHandler: 2, @@ -227,173 +182,45 @@ describe("Slack behavior: new mention", () => { expect(fakeReplyCalls).toEqual([ expect.objectContaining({ - prompt: expect.stringContaining("review this file first"), inboundAttachmentCount: 1, filenames: ["queued-notes.txt"], attachmentText: "queued attachment notes", }), ]); + expect( + conversationMessages(thread) + .filter( + (message) => + message.id === "m-queued-file" || message.id === "m-latest-file", + ) + .map((message) => ({ id: message.id, text: message.text })), + ).toEqual([ + { id: "m-queued-file", text: "review this file first" }, + { id: "m-latest-file", text: "then answer now" }, + ]); expect(thread.posts).toHaveLength(1); expect(toPostedText(thread.posts[0])).toContain( "Handled queued attachment.", ); }); - it("clears assistant status after successful reply", async () => { - const slackAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createTestChatRuntime({ - slackAdapter, - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - await context?.onStatus?.(makeAssistantStatus("running", "bash")); - return { - text: "Done.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: ["bash"], - toolErrorCount: 0, - toolResultCount: 1, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ - id: "slack:C_STATUS:1700002000.000", - }); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-status-clear", - text: "<@U_APP> run a command", - isMention: true, - threadId: thread.id, - }), - { destination: createTestDestination(thread) }, - ); - - expect(slackAdapter.statusCalls.length).toBeGreaterThan(0); - expect(slackAdapter.statusCalls.at(-1)).toEqual({ - channelId: "C_STATUS", - threadTs: "1700002000.000", - text: "", - loadingMessages: undefined, - }); - }); - - it("deletes redundant reply and clears status for reaction-only turn", async () => { - const slackAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createTestChatRuntime({ - slackAdapter, - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - await context?.onStatus?.(makeAssistantStatus("drafting", "reply")); - return { - text: "Done!", - deliveryMode: "thread", - - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: ["slackMessageAddReaction"], - toolErrorCount: 0, - toolResultCount: 1, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ - id: "slack:C_STATUS:1700004000.000", - }); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-reaction-only", - text: "<@U_APP> add a reaction to this message", - isMention: true, - threadId: thread.id, - }), - { destination: createTestDestination(thread) }, - ); - - // Reply posted then deleted to complete Slack's response cycle without visible noise - expect(thread.posts).toHaveLength(0); - expect(slackAdapter.statusCalls.length).toBeGreaterThan(0); - expect(slackAdapter.statusCalls.at(-1)).toEqual({ - channelId: "C_STATUS", - threadTs: "1700004000.000", - text: "", - loadingMessages: undefined, - }); - }); - - it("clears assistant status after agent error", async () => { - const slackAdapter = new FakeSlackAdapter(); - const { slackRuntime } = createTestChatRuntime({ - slackAdapter, - services: { - replyExecutor: { - generateAssistantReply: async () => { - throw new Error("model exploded"); - }, - }, - }, - }); - - const thread = createTestThread({ - id: "slack:C_STATUS:1700003000.000", - }); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-status-error", - text: "<@U_APP> do something", - isMention: true, - threadId: thread.id, - }), - { destination: createTestDestination(thread) }, - ); - - expect(slackAdapter.statusCalls.length).toBeGreaterThan(0); - expect(slackAdapter.statusCalls.at(-1)).toEqual({ - channelId: "C_STATUS", - threadTs: "1700003000.000", - text: "", - loadingMessages: undefined, - }); - }); - it("suppresses thread reply when assistant marks delivery as channel_only", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => { - return { - text: "Posted in channel.", - deliveryMode: "channel_only", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: ["slackChannelPostMessage"], - toolErrorCount: 0, - toolResultCount: 1, - usedPrimaryText: true, - }, - }; - }, + adapters: { + generateAssistantReply: async () => { + return { + text: "Posted in channel.", + deliveryMode: "channel_only", + diagnostics: { + assistantMessageCount: 1, + modelId: "fake-agent-model", + outcome: "success", + toolCalls: ["slackChannelPostMessage"], + toolErrorCount: 0, + toolResultCount: 1, + usedPrimaryText: true, + }, + }; }, }, }); @@ -412,9 +239,7 @@ describe("Slack behavior: new mention", () => { }, }); - await slackRuntime.handleNewMention(thread, message, { - destination: createTestDestination(thread), - }); + await slackRuntime.handleNewMention(thread, message); expect(thread.subscribeCalls).toBe(1); expect(thread.posts).toHaveLength(0); diff --git a/packages/junior/tests/integration/slack/oauth-resume-slack-chunking.test.ts b/packages/junior/tests/integration/slack/oauth-resume-slack-chunking.test.ts new file mode 100644 index 000000000..7899250a8 --- /dev/null +++ b/packages/junior/tests/integration/slack/oauth-resume-slack-chunking.test.ts @@ -0,0 +1,61 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { getSlackContinuationMarker } from "@/chat/slack/output"; +import { + createOauthResumeSlackFixture, + makeResumeDiagnostics, +} from "../../fixtures/oauth/resume-slack"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { TEST_SLACK_DESTINATION } from "../../fixtures/reply-context"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("oauth resume slack chunking", () => { + beforeEach(async () => { + testbed = await createOauthResumeSlackFixture(); + }); + + afterEach(async () => { + await testbed.cleanup(); + }); + + it("chunks long resumed replies into explicit continuation messages", async () => { + const longReply = Array.from( + { length: 80 }, + (_, i) => `line ${i + 1}`, + ).join("\n"); + + await testbed.resumeAuthorizedRequest({ + messageText: "Continue the original request", + channelId: "C123", + threadTs: "1700000000.002", + connectedText: "Connected. Continuing...", + replyContext: { + credentialContext: { + actor: { type: "user", userId: "U123" }, + }, + destination: TEST_SLACK_DESTINATION, + requester: { platform: "slack", teamId: "T123", userId: "U123" }, + }, + generateReply: async () => + successfulAssistantReply(longReply, { + diagnostics: makeResumeDiagnostics(), + }), + }); + + const postCalls = getCapturedSlackApiCalls("chat.postMessage"); + expect(postCalls).toHaveLength(5); + expect(postCalls[0]?.params).toMatchObject({ + channel: "C123", + thread_ts: "1700000000.002", + text: "Connected. Continuing...", + }); + expect(postCalls[1]?.params.text).toContain(getSlackContinuationMarker()); + expect(postCalls[2]?.params.text).toContain(getSlackContinuationMarker()); + expect(postCalls[3]?.params.text).toContain(getSlackContinuationMarker()); + expect(postCalls[4]?.params.text).not.toContain( + getSlackContinuationMarker(), + ); + expect(postCalls[4]?.params.text).toContain("line 80"); + }); +}); diff --git a/packages/junior/tests/integration/slack/oauth-resume-slack-delivery.test.ts b/packages/junior/tests/integration/slack/oauth-resume-slack-delivery.test.ts new file mode 100644 index 000000000..0c90ddc27 --- /dev/null +++ b/packages/junior/tests/integration/slack/oauth-resume-slack-delivery.test.ts @@ -0,0 +1,90 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { createOauthResumeSlackFixture } from "../../fixtures/oauth/resume-slack"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { TEST_SLACK_DESTINATION } from "../../fixtures/reply-context"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("oauth resume slack delivery", () => { + beforeEach(async () => { + testbed = await createOauthResumeSlackFixture(); + }); + + afterEach(async () => { + await testbed.cleanup(); + }); + + it("posts resumed status updates through the Slack MSW harness", async () => { + await testbed.resumeAuthorizedRequest({ + messageText: "What budget deadline did I mention earlier?", + channelId: "C123", + threadTs: "1700000000.001", + connectedText: + "Your eval-auth MCP access is now connected. Continuing the original request...", + replyContext: { + credentialContext: { + actor: { type: "user", userId: "U123" }, + }, + destination: TEST_SLACK_DESTINATION, + requester: { platform: "slack", teamId: "T123", userId: "U123" }, + }, + generateReply: async () => + successfulAssistantReply( + "The budget deadline you mentioned earlier was Friday.", + ), + }); + + expect(getCapturedSlackApiCalls("assistant.threads.setStatus")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel_id: "C123", + thread_ts: "1700000000.001", + status: expect.any(String), + loading_messages: expect.arrayContaining([expect.any(String)]), + }), + }), + expect.objectContaining({ + params: expect.objectContaining({ + channel_id: "C123", + thread_ts: "1700000000.001", + status: "", + }), + }), + ]); + + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + thread_ts: "1700000000.001", + text: "Your eval-auth MCP access is now connected. Continuing the original request...", + }), + }), + expect.objectContaining({ + params: expect.objectContaining({ + blocks: [ + { + type: "markdown", + text: "The budget deadline you mentioned earlier was Friday.", + }, + { + type: "context", + elements: expect.arrayContaining([ + expect.objectContaining({ + type: "mrkdwn", + text: expect.stringContaining( + "*ID:* slack:C123:1700000000.001", + ), + }), + ]), + }, + ], + channel: "C123", + thread_ts: "1700000000.001", + text: "The budget deadline you mentioned earlier was Friday.", + }), + }), + ]); + }); +}); diff --git a/packages/junior/tests/integration/slack/oauth-resume-slack-failure-markers.test.ts b/packages/junior/tests/integration/slack/oauth-resume-slack-failure-markers.test.ts new file mode 100644 index 000000000..877262947 --- /dev/null +++ b/packages/junior/tests/integration/slack/oauth-resume-slack-failure-markers.test.ts @@ -0,0 +1,86 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { getSlackInterruptionMarker } from "@/chat/slack/output"; +import { + createOauthResumeSlackFixture, + makeResumeDiagnostics, +} from "../../fixtures/oauth/resume-slack"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { TEST_SLACK_DESTINATION } from "../../fixtures/reply-context"; +import { getCapturedSlackApiCalls } from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("oauth resume slack failure markers", () => { + beforeEach(async () => { + testbed = await createOauthResumeSlackFixture(); + }); + + afterEach(async () => { + await testbed.cleanup(); + }); + + it("marks resumed provider-error partial replies as interrupted", async () => { + await testbed.resumeAuthorizedRequest({ + messageText: "Continue the original request", + channelId: "C123", + threadTs: "1700000000.003", + connectedText: "Connected. Continuing...", + replyContext: { + credentialContext: { + actor: { type: "user", userId: "U123" }, + }, + destination: TEST_SLACK_DESTINATION, + requester: { platform: "slack", teamId: "T123", userId: "U123" }, + }, + generateReply: async () => + successfulAssistantReply("Partial output", { + diagnostics: makeResumeDiagnostics("provider_error"), + }), + }); + + const postCalls = getCapturedSlackApiCalls("chat.postMessage"); + expect(postCalls).toHaveLength(2); + expect(postCalls[1]?.params).toMatchObject({ + channel: "C123", + thread_ts: "1700000000.003", + }); + expect(postCalls[1]?.params.text).toContain("Partial output"); + expect(postCalls[1]?.params.text).toContain( + getSlackInterruptionMarker().trim(), + ); + expect(postCalls[1]?.params.text).not.toContain("event_id="); + }); + + it("replaces resumed execution-failure replies before Slack planning", async () => { + await testbed.resumeAuthorizedRequest({ + messageText: "Continue the original request", + channelId: "C123", + threadTs: "1700000000.006", + connectedText: "Connected. Continuing...", + replyContext: { + credentialContext: { + actor: { type: "user", userId: "U123" }, + }, + destination: TEST_SLACK_DESTINATION, + requester: { platform: "slack", teamId: "T123", userId: "U123" }, + }, + generateReply: async () => + successfulAssistantReply("", { + diagnostics: makeResumeDiagnostics("execution_failure", { + assistantMessageCount: 0, + usedPrimaryText: false, + }), + }), + }); + + const postCalls = getCapturedSlackApiCalls("chat.postMessage"); + expect(postCalls).toHaveLength(2); + expect(postCalls[1]?.params).toMatchObject({ + channel: "C123", + thread_ts: "1700000000.006", + }); + expect(postCalls[1]?.params.text).toContain( + "I ran into an internal error while processing that. Reference: `event_id=", + ); + }); +}); diff --git a/packages/junior/tests/integration/slack/oauth-resume-slack-file-delivery.test.ts b/packages/junior/tests/integration/slack/oauth-resume-slack-file-delivery.test.ts new file mode 100644 index 000000000..21993fbd6 --- /dev/null +++ b/packages/junior/tests/integration/slack/oauth-resume-slack-file-delivery.test.ts @@ -0,0 +1,129 @@ +import { Buffer } from "node:buffer"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + createOauthResumeSlackFixture, + makeResumeDiagnostics, +} from "../../fixtures/oauth/resume-slack"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { TEST_SLACK_DESTINATION } from "../../fixtures/reply-context"; +import { + getCapturedSlackApiCalls, + getCapturedSlackFileUploadCalls, + queueSlackApiError, +} from "../../msw/handlers/slack-api"; + +let testbed: Awaited>; + +describe("oauth resume slack file delivery", () => { + beforeEach(async () => { + testbed = await createOauthResumeSlackFixture(); + }); + + afterEach(async () => { + await testbed.cleanup(); + }); + + it("delivers resumed reply files through the shared reply planner", async () => { + await testbed.resumeAuthorizedRequest({ + messageText: "Continue the original request", + channelId: "C123", + threadTs: "1700000000.004", + connectedText: "Connected. Continuing...", + replyContext: { + credentialContext: { + actor: { type: "user", userId: "U123" }, + }, + destination: TEST_SLACK_DESTINATION, + requester: { platform: "slack", teamId: "T123", userId: "U123" }, + }, + generateReply: async () => + successfulAssistantReply("Here is the resumed artifact.", { + files: [ + { + data: Buffer.from("resume-file"), + filename: "resume.txt", + }, + ], + diagnostics: makeResumeDiagnostics(), + }), + }); + + const postCalls = getCapturedSlackApiCalls("chat.postMessage"); + expect(postCalls).toHaveLength(2); + expect(postCalls[0]?.params).toMatchObject({ + channel: "C123", + thread_ts: "1700000000.004", + text: "Connected. Continuing...", + }); + expect(postCalls[1]?.params).toMatchObject({ + channel: "C123", + thread_ts: "1700000000.004", + text: "Here is the resumed artifact.", + }); + expect(getCapturedSlackApiCalls("files.getUploadURLExternal")).toHaveLength( + 1, + ); + expect(getCapturedSlackApiCalls("files.completeUploadExternal")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel_id: "C123", + thread_ts: "1700000000.004", + }), + }), + ]); + expect(getCapturedSlackFileUploadCalls()).toHaveLength(1); + }); + + it("keeps the resumed reply visible when file upload followups fail", async () => { + queueSlackApiError("files.completeUploadExternal", { + error: "upload_failed", + }); + + await testbed.resumeAuthorizedRequest({ + messageText: "Continue the original request", + channelId: "C123", + threadTs: "1700000000.005", + connectedText: "Connected. Continuing...", + replyContext: { + credentialContext: { + actor: { type: "user", userId: "U123" }, + }, + destination: TEST_SLACK_DESTINATION, + requester: { platform: "slack", teamId: "T123", userId: "U123" }, + }, + generateReply: async () => + successfulAssistantReply("Here is the resumed artifact.", { + files: [ + { + data: Buffer.from("resume-file"), + filename: "resume.txt", + }, + ], + diagnostics: makeResumeDiagnostics(), + }), + }); + + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + thread_ts: "1700000000.005", + text: "Connected. Continuing...", + }), + }), + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + thread_ts: "1700000000.005", + text: "Here is the resumed artifact.", + }), + }), + ]); + expect(getCapturedSlackApiCalls("files.getUploadURLExternal")).toHaveLength( + 1, + ); + expect( + getCapturedSlackApiCalls("files.completeUploadExternal"), + ).toHaveLength(1); + }); +}); diff --git a/packages/junior/tests/integration/slack/pi-history-behavior.test.ts b/packages/junior/tests/integration/slack/pi-history-behavior.test.ts new file mode 100644 index 000000000..98e6a5062 --- /dev/null +++ b/packages/junior/tests/integration/slack/pi-history-behavior.test.ts @@ -0,0 +1,118 @@ +import { afterEach, describe, expect, it } from "vitest"; +import type { PiMessage } from "@/chat/pi/messages"; +import { + getPersistedThreadState, + persistThreadStateById, +} from "@/chat/runtime/thread-state"; +import { coerceThreadConversationState } from "@/chat/state/conversation"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { upsertAgentTurnSessionRecord } from "@/chat/state/turn-session"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { + createTestDestination, + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +interface RuntimeCall { + contextConversation?: string; + piMessages?: PiMessage[]; +} + +describe("Slack behavior: Pi history", () => { + afterEach(async () => { + await disconnectStateAdapter(); + }); + + it("passes durable Pi history into the next turn", async () => { + const calls: RuntimeCall[] = []; + const storedFirstTurnHistory: PiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "\nold runtime facts\n", + }, + { type: "text", text: "I need the budget by Friday" }, + ], + timestamp: 1, + }, + { + role: "assistant", + content: [{ type: "text", text: "First response." }], + timestamp: 2, + }, + ] as PiMessage[]; + const { slackRuntime } = createTestChatRuntime({ + adapters: { + classifySubscribedReply: async () => { + return { + object: { + should_reply: true, + confidence: 1, + reason: "direct mention follow-up", + }, + text: '{"should_reply":true,"confidence":1,"reason":"direct mention follow-up"}', + } as never; + }, + generateAssistantReply: async (_prompt, context) => { + calls.push({ + contextConversation: context?.conversationContext, + piMessages: context?.piMessages, + }); + if ( + calls.length === 1 && + context?.correlation?.conversationId && + context.correlation.turnId + ) { + await upsertAgentTurnSessionRecord({ + conversationId: context.correlation.conversationId, + sessionId: context.correlation.turnId, + sliceId: 1, + state: "completed", + piMessages: storedFirstTurnHistory, + }); + } + return successfulAssistantReply( + calls.length === 1 ? "First response." : "Second response.", + ); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700005003.000" }); + const first = createTestMessage({ + id: "m-content-context-1", + text: "<@U_APP> I need the budget by Friday", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }); + const second = createTestMessage({ + id: "m-content-context-2", + text: "<@U_APP> what did I just ask?", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }); + + await slackRuntime.handleNewMention(thread, first, { + destination: createTestDestination(thread), + }); + + const persistedState = await getPersistedThreadState(thread.id); + const conversation = coerceThreadConversationState(persistedState); + conversation.processing.activeTurnId = "missing-active-turn"; + await persistThreadStateById(thread.id, { conversation }); + + await slackRuntime.handleSubscribedMessage(thread, second, { + destination: createTestDestination(thread), + }); + + expect(calls).toHaveLength(2); + expect(calls[1]?.contextConversation ?? "").toContain("budget by Friday"); + expect(calls[1]?.piMessages).toEqual(storedFirstTurnHistory); + }); +}); diff --git a/packages/junior/tests/integration/slack/processing-reaction-behavior.test.ts b/packages/junior/tests/integration/slack/processing-reaction-behavior.test.ts index 7bb314048..07d77ad9a 100644 --- a/packages/junior/tests/integration/slack/processing-reaction-behavior.test.ts +++ b/packages/junior/tests/integration/slack/processing-reaction-behavior.test.ts @@ -4,8 +4,8 @@ import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; -import { slackApiOutbox } from "../../fixtures/slack-api-outbox"; +} from "../../fixtures/slack/harness"; +import { slackApiOutbox } from "../../fixtures/slack/api-outbox"; function successDiagnostics(toolCalls: string[] = []) { return { @@ -19,29 +19,33 @@ function successDiagnostics(toolCalls: string[] = []) { }; } -function reactionCall(name: string, timestamp: string) { - return expect.objectContaining({ - params: expect.objectContaining({ - channel: "C_PROCESSING", - timestamp, - name, - }), - }); +function reactionEvents(calls: ReturnType) { + return calls.map((call) => ({ + channel: call.params.channel, + name: call.params.name, + timestamp: call.params.timestamp, + })); +} + +function processingReaction(name: string, timestamp: string) { + return { + channel: "C_PROCESSING", + name, + timestamp, + }; } describe("Slack behavior: processing reaction", () => { it("adds eyes before mention work and marks the message complete after the reply", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async () => { - expect(slackApiOutbox.reactionAdds()).toHaveLength(1); - expect(slackApiOutbox.reactionRemovals()).toHaveLength(0); - return { - text: "Done.", - diagnostics: successDiagnostics(), - }; - }, + adapters: { + generateAssistantReply: async () => { + expect(slackApiOutbox.reactionAdds()).toHaveLength(1); + expect(slackApiOutbox.reactionRemovals()).toHaveLength(0); + return { + text: "Done.", + diagnostics: successDiagnostics(), + }; }, }, }); @@ -65,36 +69,32 @@ describe("Slack behavior: processing reaction", () => { { destination: createTestDestination(thread) }, ); - expect(slackApiOutbox.reactionAdds()).toEqual([ - reactionCall("eyes", "1700007001.000000"), - reactionCall("white_check_mark", "1700007001.000000"), + expect(reactionEvents(slackApiOutbox.reactionAdds())).toEqual([ + processingReaction("eyes", "1700007001.000000"), + processingReaction("white_check_mark", "1700007001.000000"), ]); - expect(slackApiOutbox.reactionRemovals()).toEqual([ - reactionCall("eyes", "1700007001.000000"), + expect(reactionEvents(slackApiOutbox.reactionRemovals())).toEqual([ + processingReaction("eyes", "1700007001.000000"), ]); }); it("does not add eyes when a subscribed message is skipped", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - expect(slackApiOutbox.reactionAdds()).toHaveLength(0); - expect(slackApiOutbox.reactionRemovals()).toHaveLength(0); - return { - object: { - should_reply: false, - confidence: 0, - reason: "side conversation", - }, - text: '{"should_reply":false,"confidence":0,"reason":"side conversation"}', - } as never; - }, + adapters: { + classifySubscribedReply: async () => { + expect(slackApiOutbox.reactionAdds()).toHaveLength(0); + expect(slackApiOutbox.reactionRemovals()).toHaveLength(0); + return { + object: { + should_reply: false, + confidence: 0, + reason: "side conversation", + }, + text: '{"should_reply":false,"confidence":0,"reason":"side conversation"}', + } as never; }, - replyExecutor: { - generateAssistantReply: async () => { - throw new Error("assistant should not run for skipped message"); - }, + generateAssistantReply: async () => { + throw new Error("assistant should not run for skipped message"); }, }, }); @@ -125,30 +125,26 @@ describe("Slack behavior: processing reaction", () => { it("adds eyes after a subscribed message is approved and marks the message complete after the reply", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - expect(slackApiOutbox.reactionAdds()).toHaveLength(0); - expect(slackApiOutbox.reactionRemovals()).toHaveLength(0); - return { - object: { - should_reply: true, - confidence: 1, - reason: "direct follow-up", - }, - text: '{"should_reply":true,"confidence":1,"reason":"direct follow-up"}', - } as never; - }, + adapters: { + classifySubscribedReply: async () => { + expect(slackApiOutbox.reactionAdds()).toHaveLength(0); + expect(slackApiOutbox.reactionRemovals()).toHaveLength(0); + return { + object: { + should_reply: true, + confidence: 1, + reason: "direct follow-up", + }, + text: '{"should_reply":true,"confidence":1,"reason":"direct follow-up"}', + } as never; }, - replyExecutor: { - generateAssistantReply: async () => { - expect(slackApiOutbox.reactionAdds()).toHaveLength(1); - expect(slackApiOutbox.reactionRemovals()).toHaveLength(0); - return { - text: "Done.", - diagnostics: successDiagnostics(), - }; - }, + generateAssistantReply: async () => { + expect(slackApiOutbox.reactionAdds()).toHaveLength(1); + expect(slackApiOutbox.reactionRemovals()).toHaveLength(0); + return { + text: "Done.", + diagnostics: successDiagnostics(), + }; }, }, }); @@ -172,29 +168,27 @@ describe("Slack behavior: processing reaction", () => { { destination: createTestDestination(thread) }, ); - expect(slackApiOutbox.reactionAdds()).toEqual([ - reactionCall("eyes", "1700007151.000000"), - reactionCall("white_check_mark", "1700007151.000000"), + expect(reactionEvents(slackApiOutbox.reactionAdds())).toEqual([ + processingReaction("eyes", "1700007151.000000"), + processingReaction("white_check_mark", "1700007151.000000"), ]); - expect(slackApiOutbox.reactionRemovals()).toEqual([ - reactionCall("eyes", "1700007151.000000"), + expect(reactionEvents(slackApiOutbox.reactionRemovals())).toEqual([ + processingReaction("eyes", "1700007151.000000"), ]); }); it("keeps eyes when the assistant explicitly adds an eyes reaction", async () => { const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply: async (_prompt, context) => { - context?.onToolInvocation?.({ - toolName: "slackMessageAddReaction", - params: { emoji: ":eyes:" }, - }); - return { - text: "Done.", - diagnostics: successDiagnostics(["slackMessageAddReaction"]), - }; - }, + adapters: { + generateAssistantReply: async (_prompt, context) => { + context?.onToolInvocation?.({ + toolName: "slackMessageAddReaction", + params: { emoji: ":eyes:" }, + }); + return { + text: "Done.", + diagnostics: successDiagnostics(["slackMessageAddReaction"]), + }; }, }, }); diff --git a/packages/junior/tests/integration/slack/provider-default-config-behavior.test.ts b/packages/junior/tests/integration/slack/provider-default-config-behavior.test.ts index 97827b2a3..3977d0324 100644 --- a/packages/junior/tests/integration/slack/provider-default-config-behavior.test.ts +++ b/packages/junior/tests/integration/slack/provider-default-config-behavior.test.ts @@ -1,32 +1,18 @@ import { describe, expect, it, vi } from "vitest"; import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { toPostedText } from "../../fixtures/slack/posts"; import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; - -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - return String(value); -} +} from "../../fixtures/slack/harness"; describe("Slack behavior: provider default configuration", () => { it("sets an explicit default GitHub repo without starting an agent turn", async () => { const generateAssistantReply = vi.fn(); const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - }, + adapters: { + generateAssistantReply, }, }); const channelStateRef = { value: {} }; @@ -82,10 +68,8 @@ describe("Slack behavior: provider default configuration", () => { }, })); const { slackRuntime } = createTestChatRuntime({ - services: { - replyExecutor: { - generateAssistantReply, - }, + adapters: { + generateAssistantReply, }, }); const channelStateRef = { value: {} }; diff --git a/packages/junior/tests/integration/slack/runtime-turn-behavior.test.ts b/packages/junior/tests/integration/slack/runtime-turn-behavior.test.ts new file mode 100644 index 000000000..81d45a5e1 --- /dev/null +++ b/packages/junior/tests/integration/slack/runtime-turn-behavior.test.ts @@ -0,0 +1,233 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { createSlackBehaviorRuntime } from "../../fixtures/slack/behavior"; +import { + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +describe("Slack behavior: runtime turns", () => { + beforeEach(async () => { + await disconnectStateAdapter(); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + await disconnectStateAdapter(); + }); + + it("does not replay a message that already has a delivered reply", async () => { + const conversationId = "slack:C_REPLAY:1700000000.000"; + const generateAssistantReply = vi.fn(); + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply, + }, + }); + const thread = createTestThread({ + id: conversationId, + state: { + conversation: { + schemaVersion: 1, + backfill: { + completedAtMs: 1, + source: "recent_messages", + }, + compactions: [], + piMessages: [], + messages: [ + { + id: "msg-replayed", + role: "user", + text: "please answer once", + createdAtMs: 1, + author: { + userId: "U-test", + }, + meta: { + replied: true, + slackTs: "1700000000.000", + }, + }, + { + id: "assistant-reply", + role: "assistant", + text: "Already answered.", + createdAtMs: 2, + author: { + isBot: true, + userName: "Junior", + }, + meta: { + replied: true, + }, + }, + ], + processing: {}, + stats: { + compactedMessageCount: 0, + estimatedContextTokens: 0, + totalMessageCount: 2, + updatedAtMs: 2, + }, + vision: { + byFileId: {}, + }, + }, + }, + }); + + await expect( + slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-replayed", + threadId: conversationId, + text: "please answer once", + isMention: true, + }), + ), + ).resolves.toBeUndefined(); + + expect(generateAssistantReply).not.toHaveBeenCalled(); + expect(thread.posts).toEqual([]); + }); + + it("posts a safe error message when assistant reply generation throws", async () => { + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply: async () => { + throw new Error("LLM unavailable"); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_ERR:1700000000.000" }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-err", + threadId: "slack:C_ERR:1700000000.000", + text: "trigger an error", + isMention: true, + }), + ); + + const errorPost = thread.posts.find( + (p) => + typeof p === "string" && + p.includes("I ran into an internal error while processing that."), + ); + expect(errorPost).toBeDefined(); + expect(String(errorPost)).not.toContain("LLM unavailable"); + }); + + it("does not persist an assistant message when final Slack delivery fails", async () => { + const finalText = "This reply never reaches Slack."; + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply: async () => successfulAssistantReply(finalText), + }, + }); + const thread = createTestThread({ + id: "slack:C_DELIVERY_FAIL:1700000000.000", + }); + thread.post = vi.fn(async () => { + throw new Error("Slack unavailable"); + }) as typeof thread.post; + + await expect( + slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-delivery-fail", + threadId: "slack:C_DELIVERY_FAIL:1700000000.000", + text: "please answer", + isMention: true, + }), + ), + ).rejects.toThrow("Slack unavailable"); + + const conversation = ( + thread.getState() as { + conversation?: { + messages?: Array<{ + id?: string; + meta?: { replied?: boolean; skippedReason?: string }; + role?: string; + text?: string; + }>; + processing?: { activeTurnId?: string }; + }; + } + ).conversation; + expect(conversation?.processing?.activeTurnId).toBeUndefined(); + expect(conversation?.messages).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + role: "assistant", + text: finalText, + }), + ]), + ); + expect( + conversation?.messages?.find( + (message) => message.id === "msg-delivery-fail", + ), + ).toMatchObject({ + meta: { + replied: false, + skippedReason: "reply failed", + }, + }); + }); + + it("passes conversation and turn correlation IDs into assistant reply context", async () => { + const capturedCorrelation: Array<{ + conversationId?: string; + threadId?: string; + turnId?: string; + runId?: string; + }> = []; + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply: async (_prompt, context) => { + capturedCorrelation.push({ + conversationId: context?.correlation?.conversationId, + threadId: context?.correlation?.threadId, + turnId: context?.correlation?.turnId, + runId: context?.correlation?.runId, + }); + return successfulAssistantReply("Done."); + }, + }, + }); + + const thread = createTestThread({ + id: "slack:C_CORRELATION:1700000000.000", + runId: "run-123", + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-correlation", + threadId: "slack:C_CORRELATION:1700000000.000", + text: "trace this turn", + isMention: true, + }), + ); + + expect(capturedCorrelation).toHaveLength(1); + expect(capturedCorrelation[0]).toEqual( + expect.objectContaining({ + conversationId: "slack:C_CORRELATION:1700000000.000", + threadId: "slack:C_CORRELATION:1700000000.000", + runId: "run-123", + }), + ); + expect(capturedCorrelation[0].turnId).toBe("turn_msg-correlation"); + }); +}); diff --git a/packages/junior/tests/integration/slack/schedule-create-tools.test.ts b/packages/junior/tests/integration/slack/schedule-create-tools.test.ts new file mode 100644 index 000000000..a4f4e10b2 --- /dev/null +++ b/packages/junior/tests/integration/slack/schedule-create-tools.test.ts @@ -0,0 +1,302 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + cleanupSlackScheduleToolTest, + createContext, + createSlackScheduleCreateTaskTool, + createSlackScheduleListTasksTool, + createTask, + executeTool, + schedulerStore, + setupSlackScheduleToolTest, + TEST_TEAM_ID, +} from "../../fixtures/slack/schedule-tools"; +import { mockTestClock } from "../../fixtures/vitest"; + +describe("Slack schedule create tools", () => { + beforeEach(setupSlackScheduleToolTest); + afterEach(cleanupSlackScheduleToolTest); + + it("creates and lists tasks only for the active Slack destination", async () => { + const created = await createTask(); + expect(created).toMatchObject({ + ok: true, + task: { + conversation_access: { + audience: "channel", + visibility: "unknown", + }, + credential_subject: null, + status: "active", + task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", + recurrence: { + frequency: "weekly", + interval: 1, + weekdays: [1], + }, + next_run_at: "2026-05-25T16:00:00.000Z", + }, + }); + + const listed = await executeTool( + createSlackScheduleListTasksTool(createContext()), + {}, + ); + expect(listed).toMatchObject({ + ok: true, + tasks: [ + { + task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", + schedule: "Every Monday at 9am", + }, + ], + }); + + const otherChannel = await executeTool( + createSlackScheduleListTasksTool(createContext({ channelId: "C999" })), + {}, + ); + expect(otherChannel).toMatchObject({ + ok: true, + tasks: [], + }); + }); + + it("creates clear recurring tasks without a second confirmation", async () => { + const result = await executeTool( + createSlackScheduleCreateTaskTool(createContext()), + { + task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", + schedule: "Every Monday at 9am", + timezone: "America/Los_Angeles", + next_run_at: "2026-05-25T16:00:00.000Z", + recurrence: "weekly", + }, + ); + + expect(result).toMatchObject({ + ok: true, + task: { + schedule: "Every Monday at 9am", + status: "active", + task: "Weekly issue digest: Summarize open scheduler issues and post a concise summary.", + }, + }); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toMatchObject([ + { + destination: { channelId: "C123" }, + status: "active", + }, + ]); + }); + + it("does not store Slack ids as creator display identity", async () => { + const created = (await createTask( + createContext({ + requester: { + userId: "U039RR91S", + userName: "unknown", + fullName: "W039RR91S", + }, + }), + )) as { task: { id: string } }; + + await expect(schedulerStore().getTask(created.task.id)).resolves.toEqual( + expect.objectContaining({ + createdBy: { + slackUserId: "U039RR91S", + }, + }), + ); + }); + + it("creates explicit one-off reminders without a second confirmation", async () => { + mockTestClock("2026-05-27T00:24:23.000Z"); + + const result = await executeTool( + createSlackScheduleCreateTaskTool( + createContext({ + channelId: "D123", + userText: "remind me in 1 minute to wash my hands", + }), + ), + { + task: "Wash hands reminder: Remind David to wash his hands.", + schedule: "In 1 minute", + next_run_at: "2026-05-27T00:25:23.000Z", + }, + ); + + expect(result).toMatchObject({ + ok: true, + task: { + next_run_at: "2026-05-27T00:25:23.000Z", + schedule: "In 1 minute", + status: "active", + task: "Wash hands reminder: Remind David to wash his hands.", + }, + }); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toMatchObject([ + { + conversationAccess: { + audience: "direct", + visibility: "private", + }, + credentialSubject: { + type: "user", + userId: "U123", + allowedWhen: "private-direct-conversation", + }, + destination: { channelId: "D123" }, + nextRunAtMs: Date.parse("2026-05-27T00:25:23.000Z"), + status: "active", + }, + ]); + }); + + it("creates short imperative one-off reminders without channel confirmation", async () => { + mockTestClock("2026-05-27T00:24:23.000Z"); + + const result = await executeTool( + createSlackScheduleCreateTaskTool( + createContext({ + userText: "drink water in 1 minute in this conversation", + }), + ), + { + task: "Drink water reminder: Remind David to drink water.", + schedule: "In 1 minute", + next_run_at: "2026-05-27T00:25:23.000Z", + }, + ); + + expect(result).toMatchObject({ + ok: true, + task: { + next_run_at: "2026-05-27T00:25:23.000Z", + schedule: "In 1 minute", + status: "active", + task: "Drink water reminder: Remind David to drink water.", + }, + }); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toMatchObject([ + { + destination: { channelId: "C123" }, + nextRunAtMs: Date.parse("2026-05-27T00:25:23.000Z"), + status: "active", + }, + ]); + }); + + it("creates one-off reminders by omitting recurrence", async () => { + mockTestClock("2026-05-28T02:17:48.005Z"); + + const result = await executeTool( + createSlackScheduleCreateTaskTool( + createContext({ + userText: "remind greg to drink water in 1m", + }), + ), + { + task: "Remind Greg to drink water.", + schedule: "In 1 minute", + next_run_at: "2026-05-28T02:18:48.005Z", + }, + ); + + expect(result).toMatchObject({ + ok: true, + task: { + next_run_at: "2026-05-28T02:18:48.005Z", + recurrence: null, + schedule: "In 1 minute", + status: "active", + task: "Remind Greg to drink water.", + }, + }); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toMatchObject([ + { + nextRunAtMs: Date.parse("2026-05-28T02:18:48.005Z"), + schedule: { + kind: "one_off", + recurrence: undefined, + }, + status: "active", + }, + ]); + }); + + it("does not delegate user credentials in private group conversations", async () => { + const result = await createTask(createContext({ channelId: "G123" })); + + expect(result).toMatchObject({ + ok: true, + task: { + conversation_access: { + audience: "group", + visibility: "private", + }, + credential_subject: null, + }, + }); + const tasks = await schedulerStore().listTasksForTeam(TEST_TEAM_ID); + expect(tasks).toMatchObject([ + { + conversationAccess: { + audience: "group", + visibility: "private", + }, + destination: { channelId: "G123" }, + }, + ]); + expect(tasks[0]?.credentialSubject).toBeUndefined(); + }); + + it("creates one-off tasks with an exact timestamp using the default Pacific timezone", async () => { + mockTestClock("2026-05-25T12:00:00.000Z"); + + const created = await createTask(createContext(), { + schedule: "On May 26 at 9am", + next_run_at: "2026-05-26T16:00:00.000Z", + recurrence: undefined, + timezone: undefined, + }); + + expect(created).toMatchObject({ + ok: true, + task: { + next_run_at: "2026-05-26T16:00:00.000Z", + recurrence: null, + timezone: "America/Los_Angeles", + }, + }); + }); + + it("uses JUNIOR_TIMEZONE as the default schedule timezone", async () => { + process.env.JUNIOR_TIMEZONE = "America/New_York"; + mockTestClock("2026-05-25T12:00:00.000Z"); + + const created = await createTask(createContext(), { + schedule: "On May 26 at 9am", + next_run_at: "2026-05-26T13:00:00.000Z", + recurrence: undefined, + timezone: undefined, + }); + + expect(created).toMatchObject({ + ok: true, + task: { + next_run_at: "2026-05-26T13:00:00.000Z", + recurrence: null, + timezone: "America/New_York", + }, + }); + }); +}); diff --git a/packages/junior/tests/integration/slack/schedule-execution-mode.test.ts b/packages/junior/tests/integration/slack/schedule-execution-mode.test.ts new file mode 100644 index 000000000..114b27f13 --- /dev/null +++ b/packages/junior/tests/integration/slack/schedule-execution-mode.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, it } from "vitest"; +import { + createContext, + createSlackScheduleCreateTaskTool, + createSlackScheduleDeleteTaskTool, + createSlackScheduleListTasksTool, + createSlackScheduleRunTaskNowTool, + createSlackScheduleUpdateTaskTool, +} from "../../fixtures/slack/schedule-tools"; + +describe("Slack schedule tool execution modes", () => { + it("all write tools have executionMode sequential", () => { + const context = createContext(); + + const createTool = createSlackScheduleCreateTaskTool(context); + const listTool = createSlackScheduleListTasksTool(context); + const updateTool = createSlackScheduleUpdateTaskTool(context); + const deleteTool = createSlackScheduleDeleteTaskTool(context); + const runNowTool = createSlackScheduleRunTaskNowTool(context); + + // Write tools must force sequential execution so a same-turn + // slackScheduleListTasks call cannot race ahead of a preceding + // slackScheduleCreateTask / update / delete write. + expect(createTool.executionMode).toBe("sequential"); + expect(updateTool.executionMode).toBe("sequential"); + expect(deleteTool.executionMode).toBe("sequential"); + expect(runNowTool.executionMode).toBe("sequential"); + + // List is read-only; it inherits the sequential batch gate from any + // write tool it shares a turn with (pi-agent-core makes the whole + // batch sequential when any tool in it is sequential). + expect(listTool.executionMode).not.toBe("sequential"); + }); +}); diff --git a/packages/junior/tests/integration/slack/schedule-run-tools.test.ts b/packages/junior/tests/integration/slack/schedule-run-tools.test.ts new file mode 100644 index 000000000..892e298f8 --- /dev/null +++ b/packages/junior/tests/integration/slack/schedule-run-tools.test.ts @@ -0,0 +1,144 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + cleanupSlackScheduleToolTest, + createContext, + createSlackScheduleDeleteTaskTool, + createSlackScheduleRunTaskNowTool, + createTask, + executeTool, + schedulerStore, + setupSlackScheduleToolTest, + TEST_TEAM_ID, +} from "../../fixtures/slack/schedule-tools"; + +describe("Slack schedule run tools", () => { + beforeEach(setupSlackScheduleToolTest); + afterEach(cleanupSlackScheduleToolTest); + + it("marks an active task due immediately without changing its scheduled next run", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const store = schedulerStore(); + const task = await store.getTask(created.task.id); + expect(task).toBeDefined(); + const scheduledNextRunAtMs = Date.parse("2026-06-01T16:00:00.000Z"); + await store.saveTask({ + ...task!, + nextRunAtMs: scheduledNextRunAtMs, + updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), + version: task!.version + 1, + }); + + const beforeMs = Date.now(); + const result = await executeTool( + createSlackScheduleRunTaskNowTool(context), + { + task_id: created.task.id, + }, + ); + const afterMs = Date.now(); + + expect(result).toMatchObject({ + ok: true, + task: { + id: created.task.id, + status: "active", + next_run_at: "2026-06-01T16:00:00.000Z", + }, + }); + const due = await store.getTask(created.task.id); + expect(due).toMatchObject({ + status: "active", + nextRunAtMs: scheduledNextRunAtMs, + destination: { + teamId: context.source?.teamId, + channelId: context.source?.channelId, + }, + createdBy: { + slackUserId: context.requester?.userId, + }, + }); + expect(due?.statusReason).toBeUndefined(); + expect(due?.runNowAtMs).toBeGreaterThanOrEqual(beforeMs); + expect(due?.runNowAtMs).toBeLessThanOrEqual(afterMs); + + await expect(store.claimDueRun({ nowMs: afterMs })).resolves.toMatchObject({ + taskId: created.task.id, + scheduledForMs: due?.runNowAtMs, + status: "pending", + }); + }); + + it("does not run-now a paused task without an explicit resume", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const store = schedulerStore(); + const task = await store.getTask(created.task.id); + expect(task).toBeDefined(); + await store.saveTask({ + ...task!, + status: "paused", + statusReason: "Paused by user.", + updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), + version: task!.version + 1, + }); + + await expect( + executeTool(createSlackScheduleRunTaskNowTool(context), { + task_id: created.task.id, + }), + ).rejects.toThrow( + "Scheduled task must be active before it can be run now. Resume the task first if you want it to run.", + ); + const paused = await store.getTask(created.task.id); + expect(paused).toMatchObject({ + status: "paused", + statusReason: "Paused by user.", + }); + expect(paused?.runNowAtMs).toBeUndefined(); + }); + + it("removes deleted tasks from scheduler listings", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + + await executeTool(createSlackScheduleDeleteTaskTool(context), { + task_id: created.task.id, + }); + + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("claims due runs idempotently", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const store = schedulerStore(); + const task = await store.getTask(created.task.id); + expect(task).toBeDefined(); + await store.saveTask({ + ...task!, + nextRunAtMs: 1000, + updatedAtMs: 1000, + }); + + const first = await store.claimDueRun({ nowMs: 2000 }); + const second = await store.claimDueRun({ nowMs: 2000 }); + + expect(first).toMatchObject({ + taskId: created.task.id, + scheduledForMs: 1000, + status: "pending", + }); + expect(second).toBeUndefined(); + }); +}); diff --git a/packages/junior/tests/integration/slack/schedule-update-tools.test.ts b/packages/junior/tests/integration/slack/schedule-update-tools.test.ts new file mode 100644 index 000000000..6af8dcad0 --- /dev/null +++ b/packages/junior/tests/integration/slack/schedule-update-tools.test.ts @@ -0,0 +1,283 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + cleanupSlackScheduleToolTest, + createContext, + createSlackScheduleDeleteTaskTool, + createSlackScheduleListTasksTool, + createSlackScheduleUpdateTaskTool, + createTask, + executeTool, + schedulerStore, + setupSlackScheduleToolTest, +} from "../../fixtures/slack/schedule-tools"; + +describe("Slack schedule update tools", () => { + beforeEach(setupSlackScheduleToolTest); + afterEach(cleanupSlackScheduleToolTest); + + it("edits and deletes a task from the same Slack destination", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const taskId = created.task.id; + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(context), + { + task_id: taskId, + task: "Daily scheduler digest: Summarize open scheduler issues.", + schedule: "Every day at 9am", + recurrence: "daily", + }, + ); + expect(updated).toMatchObject({ + ok: true, + task: { + id: taskId, + task: "Daily scheduler digest: Summarize open scheduler issues.", + schedule: "Every day at 9am", + version: 2, + }, + }); + + const deleted = await executeTool( + createSlackScheduleDeleteTaskTool(context), + { + task_id: taskId, + }, + ); + expect(deleted).toMatchObject({ + ok: true, + task: { + id: taskId, + status: "deleted", + }, + }); + + const listed = await executeTool( + createSlackScheduleListTasksTool(context), + {}, + ); + expect(listed).toMatchObject({ ok: true, tasks: [] }); + }); + + it("rejects edits that make a recurring task run more than once per day", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + + await expect( + executeTool(createSlackScheduleUpdateTaskTool(context), { + task_id: created.task.id, + schedule: "Every hour", + recurrence: "hourly", + }), + ).rejects.toThrow( + "Recurring scheduled tasks can run at most once per day.", + ); + await expect( + schedulerStore().getTask(created.task.id), + ).resolves.toMatchObject({ + schedule: { + description: "Every Monday at 9am", + }, + version: 1, + }); + }); + + it("converts recurring tasks to one-off tasks with recurrence null", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(context), + { + task_id: created.task.id, + schedule: "On June 1 at 9am", + next_run_at: "2026-06-01T16:00:00.000Z", + recurrence: null, + }, + ); + + expect(updated).toMatchObject({ + ok: true, + task: { + id: created.task.id, + next_run_at: "2026-06-01T16:00:00.000Z", + recurrence: null, + schedule: "On June 1 at 9am", + }, + }); + await expect( + schedulerStore().getTask(created.task.id), + ).resolves.toMatchObject({ + schedule: { + kind: "one_off", + recurrence: undefined, + }, + }); + }); + + it("rejects edits from another active Slack destination", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + + await expect( + executeTool( + createSlackScheduleUpdateTaskTool(createContext({ channelId: "C999" })), + { + task_id: created.task.id, + task: "Wrong channel edit.", + }, + ), + ).rejects.toThrow( + "Scheduled task can only be managed from the Slack destination where it was created.", + ); + }); + + it("allows another requester to manage tasks in the same Slack destination", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const otherRequester = createContext({ + requester: { + userId: "U999", + userName: "alice", + fullName: "Alice Reviewer", + }, + }); + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(otherRequester), + { + task_id: created.task.id, + task: "Team-owned digest: Summarize open scheduler issues.", + }, + ); + const deleted = await executeTool( + createSlackScheduleDeleteTaskTool(otherRequester), + { + task_id: created.task.id, + }, + ); + + expect(updated).toMatchObject({ + ok: true, + task: { + id: created.task.id, + task: "Team-owned digest: Summarize open scheduler issues.", + version: 2, + }, + }); + expect(deleted).toMatchObject({ + ok: true, + task: { + id: created.task.id, + status: "deleted", + }, + }); + await expect( + schedulerStore().getTask(created.task.id), + ).resolves.toMatchObject({ + status: "deleted", + executionActor: { + type: "system", + id: "scheduled-task", + }, + task: { + text: "Team-owned digest: Summarize open scheduler issues.", + }, + version: 3, + }); + }); + + it("preserves a recurring task calendar anchor on content-only edits", async () => { + const context = createContext(); + const created = (await createTask(context, { + recurrence: "weekly", + })) as { + task: { id: string }; + }; + const store = schedulerStore(); + const task = await store.getTask(created.task.id); + expect(task?.schedule.recurrence).toMatchObject({ + interval: 1, + startDate: "2026-05-25", + }); + await store.saveTask({ + ...task!, + nextRunAtMs: Date.parse("2026-06-08T16:00:00.000Z"), + updatedAtMs: Date.parse("2026-05-26T16:00:00.000Z"), + version: task!.version + 1, + }); + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(context), + { + task_id: created.task.id, + task: "Renamed issue digest: Summarize open scheduler issues.", + }, + ); + + expect(updated).toMatchObject({ + ok: true, + task: { + task: "Renamed issue digest: Summarize open scheduler issues.", + }, + }); + await expect(store.getTask(created.task.id)).resolves.toMatchObject({ + nextRunAtMs: Date.parse("2026-06-08T16:00:00.000Z"), + schedule: { + recurrence: { + interval: 1, + startDate: "2026-05-25", + }, + }, + }); + }); + + it("clears stale block reasons when resuming a task", async () => { + const context = createContext(); + const created = (await createTask(context)) as { + task: { id: string }; + }; + const store = schedulerStore(); + const task = await store.getTask(created.task.id); + expect(task).toBeDefined(); + await store.saveTask({ + ...task!, + status: "blocked", + statusReason: "Missing GitHub credentials.", + updatedAtMs: Date.parse("2026-05-25T16:01:00.000Z"), + version: task!.version + 1, + }); + + const updated = await executeTool( + createSlackScheduleUpdateTaskTool(context), + { + task_id: created.task.id, + status: "active", + }, + ); + + expect(updated).toMatchObject({ + ok: true, + task: { + id: created.task.id, + status: "active", + }, + }); + const resumed = await store.getTask(created.task.id); + expect(resumed).toMatchObject({ + status: "active", + }); + expect(resumed?.statusReason).toBeUndefined(); + }); +}); diff --git a/packages/junior/tests/integration/slack/schedule-validation-tools.test.ts b/packages/junior/tests/integration/slack/schedule-validation-tools.test.ts new file mode 100644 index 000000000..c862b62d6 --- /dev/null +++ b/packages/junior/tests/integration/slack/schedule-validation-tools.test.ts @@ -0,0 +1,179 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { type SchedulerToolContext } from "@sentry/junior-scheduler"; +import { + AgentPluginToolInputError, + cleanupSlackScheduleToolTest, + createContext, + createSlackScheduleCreateTaskTool, + createTask, + executeTool, + schedulerStore, + setupSlackScheduleToolTest, + TEST_TEAM_ID, +} from "../../fixtures/slack/schedule-tools"; + +describe("Slack schedule create validation", () => { + beforeEach(setupSlackScheduleToolTest); + afterEach(cleanupSlackScheduleToolTest); + + it("rejects invalid Slack workspace context before creating a task", async () => { + const rejected = executeTool( + createSlackScheduleCreateTaskTool(createContext({ teamId: "D123" })), + { + task: "Reminder: Remind David to wash his hands.", + schedule: "In 1 minute", + next_run_at: "2026-05-27T00:25:23.000Z", + }, + ); + + await expect(rejected).rejects.toThrow(AgentPluginToolInputError); + await expect(rejected).rejects.toThrow( + "Active Slack conversation workspace is invalid.", + ); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects synthetic unknown requester ids before creating a task", async () => { + const rejected = createTask( + createContext({ + requester: { + userId: "unknown", + userName: "unknown", + fullName: "unknown", + }, + }), + ); + + await expect(rejected).rejects.toThrow(AgentPluginToolInputError); + await expect(rejected).rejects.toThrow( + "No active Slack requester context is available.", + ); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects conversation contexts with non-canonical fields", async () => { + const rejected = createTask( + createContext({ + destination: { + platform: "slack", + teamId: TEST_TEAM_ID, + channelId: "C123", + threadTs: "1700000000.000", + } as SchedulerToolContext["source"], + }), + ); + + await expect(rejected).rejects.toThrow(AgentPluginToolInputError); + await expect(rejected).rejects.toThrow( + "Active Slack conversation must not include unknown fields.", + ); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects non-canonical Slack channel ids before creating a task", async () => { + const rejected = createTask( + createContext({ + destination: { + platform: "slack", + teamId: TEST_TEAM_ID, + channelId: "slack:D123:1700000000.000", + } as SchedulerToolContext["source"], + }), + { + schedule: "In 1 minute", + next_run_at: "2026-05-27T00:25:23.000Z", + recurrence: undefined, + }, + ); + + await expect(rejected).rejects.toThrow(AgentPluginToolInputError); + await expect(rejected).rejects.toThrow( + "Active Slack conversation channel is invalid.", + ); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects invalid Slack credential subject context before creating a task", async () => { + const rejected = createTask( + createContext({ + channelId: "D123", + credentialSubject: { + type: "user", + userId: "U123", + allowedWhen: "private-direct-conversation", + binding: { + type: "slack-direct-conversation", + teamId: TEST_TEAM_ID, + channelId: "D123", + signature: "v1=test", + }, + } as SchedulerToolContext["credentialSubject"], + }), + ); + + await expect(rejected).rejects.toThrow(AgentPluginToolInputError); + await expect(rejected).rejects.toThrow( + "Active Slack credential subject is invalid.", + ); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects parseable non-ISO next run timestamps", async () => { + await expect( + createTask(createContext(), { + next_run_at: "05/25/2026 09:00", + }), + ).rejects.toThrow("Provide next_run_at as a valid ISO timestamp."); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects missing next run timestamps with a tool error", async () => { + await expect( + createTask(createContext(), { + next_run_at: undefined, + }), + ).rejects.toThrow("Provide next_run_at as a valid ISO timestamp."); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects recurring schedules that can run more than once per day", async () => { + await expect( + createTask(createContext(), { + schedule: "Every hour", + recurrence: "hourly", + }), + ).rejects.toThrow( + "Recurring scheduled tasks can run at most once per day.", + ); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); + + it("rejects invalid default timezones", async () => { + process.env.JUNIOR_TIMEZONE = "not/a-zone"; + + await expect( + createTask(createContext(), { + timezone: undefined, + }), + ).rejects.toThrow("timezone must be a valid IANA time zone."); + await expect( + schedulerStore().listTasksForTeam(TEST_TEAM_ID), + ).resolves.toEqual([]); + }); +}); diff --git a/packages/junior/tests/integration/slack/slash-command-behavior.test.ts b/packages/junior/tests/integration/slack/slash-command-behavior.test.ts new file mode 100644 index 000000000..ea92e74f2 --- /dev/null +++ b/packages/junior/tests/integration/slack/slash-command-behavior.test.ts @@ -0,0 +1,206 @@ +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + createPluginAppFixture, + type PluginAppFixture, +} from "../../fixtures/plugin-app"; +import { + createConversationWorkQueueTestAdapter, + createNoopSlackWebhookRuntime, +} from "../../fixtures/conversation-work"; +import { slackApiOutbox } from "../../fixtures/slack/api-outbox"; +import { createSlackWebhookTestClient } from "../../fixtures/slack/webhook-client"; +import { resetSlackApiMockState } from "../../msw/handlers/slack-api"; + +const SIGNING_SECRET = "test-signing-secret"; +const BOT_USER_ID = "U_BOT"; +const ORIGINAL_ENV = { ...process.env }; +const EVAL_OAUTH_PROVIDER = "eval-oauth"; +const EVAL_OAUTH_PLUGIN_ROOT = path.resolve( + import.meta.dirname, + "../../fixtures/plugins/eval-oauth", +); + +async function loadSlackWebhookModules() { + vi.resetModules(); + const [ + { handleSlackWebhook }, + { createJuniorSlackAdapter }, + { createUserTokenStore }, + { disconnectStateAdapter, getStateAdapter }, + ] = await Promise.all([ + import("@/chat/ingress/slack-webhook"), + import("@/chat/slack/adapter"), + import("@/chat/capabilities/factory"), + import("@/chat/state/adapter"), + ]); + + await disconnectStateAdapter(); + const state = getStateAdapter(); + await state.connect(); + + return { + createJuniorSlackAdapter, + createUserTokenStore, + getStateAdapter, + handleSlackWebhook, + state, + }; +} + +function slashCommandRequest(text: string): Request { + return createSlackWebhookTestClient({ signingSecret: SIGNING_SECRET }).form( + new URLSearchParams({ + command: "/team", + team_id: "T123", + channel_id: "C123", + user_id: "U123", + user_name: "alice", + text, + trigger_id: "trigger-123", + }), + ); +} + +async function createSlashCommandHarness() { + const loaded = await loadSlackWebhookModules(); + const waitUntil = createSlackWebhookTestClient({ + signingSecret: SIGNING_SECRET, + }).waitUntil(); + + return { + ...loaded, + waitUntil, + async run(text: string): Promise { + return await loaded.handleSlackWebhook({ + request: slashCommandRequest(text), + waitUntil: waitUntil.fn, + services: { + getSlackAdapter: () => + loaded.createJuniorSlackAdapter({ + botToken: "xoxb-test-token", + botUserId: BOT_USER_ID, + signingSecret: SIGNING_SECRET, + }), + queue: createConversationWorkQueueTestAdapter(), + runtime: createNoopSlackWebhookRuntime(), + state: loaded.state, + }, + }); + }, + }; +} + +describe("Slack behavior: slash commands", () => { + let pluginApp: PluginAppFixture | undefined; + + beforeEach(async () => { + process.env = { + ...ORIGINAL_ENV, + EVAL_OAUTH_CLIENT_ID: "eval-oauth-client", + JUNIOR_BASE_URL: "https://junior.example.com", + JUNIOR_SLASH_COMMAND: "/team", + JUNIOR_STATE_ADAPTER: "memory", + SLACK_BOT_TOKEN: "xoxb-test-token", + }; + resetSlackApiMockState(); + pluginApp = await createPluginAppFixture([EVAL_OAUTH_PLUGIN_ROOT]); + }); + + afterEach(async () => { + const { disconnectStateAdapter } = await import("@/chat/state/adapter"); + await disconnectStateAdapter(); + await pluginApp?.cleanup(); + pluginApp = undefined; + resetSlackApiMockState(); + process.env = { ...ORIGINAL_ENV }; + vi.resetModules(); + }); + + it("acknowledges usage errors and posts the configured command syntax", async () => { + const harness = await createSlashCommandHarness(); + const response = await harness.run("help"); + + expect(response.status).toBe(200); + expect(harness.waitUntil.pendingCount()).toBe(1); + await harness.waitUntil.flush(); + + expect(slackApiOutbox.calls("chat.postEphemeral")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + user: "U123", + text: "Usage: `/team link ` or `/team unlink `", + }), + }), + ]); + }); + + it("starts OAuth linking through Slack private delivery and persisted state", async () => { + const harness = await createSlashCommandHarness(); + const response = await harness.run(`link ${EVAL_OAUTH_PROVIDER}`); + + expect(response.status).toBe(200); + expect(harness.waitUntil.pendingCount()).toBe(1); + await harness.waitUntil.flush(); + + const [delivery] = slackApiOutbox.calls("chat.postEphemeral"); + expect(delivery).toEqual( + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + user: "U123", + text: expect.stringContaining( + `Click here to link your Eval OAuth account`, + ), + }), + }), + ); + const text = delivery?.params.text; + if (typeof text !== "string") { + throw new Error("expected OAuth delivery text"); + } + const authUrl = text.match(/^<([^|]+)\|/)?.[1]; + if (!authUrl) { + throw new Error("expected Slack link-formatted OAuth URL"); + } + const stateValue = new URL(authUrl).searchParams.get("state"); + expect(stateValue).toBeTruthy(); + await expect( + harness.getStateAdapter().get(`oauth-state:${stateValue}`), + ).resolves.toMatchObject({ + userId: "U123", + provider: EVAL_OAUTH_PROVIDER, + channelId: "C123", + scope: "read", + }); + }); + + it("unlinks OAuth credentials from the real token store", async () => { + const harness = await createSlashCommandHarness(); + const tokenStore = harness.createUserTokenStore(); + await tokenStore.set("U123", EVAL_OAUTH_PROVIDER, { + accessToken: "old-access-token", + refreshToken: "old-refresh-token", + scope: "read", + }); + const response = await harness.run(`unlink ${EVAL_OAUTH_PROVIDER}`); + + expect(response.status).toBe(200); + expect(harness.waitUntil.pendingCount()).toBe(1); + await harness.waitUntil.flush(); + + await expect(tokenStore.get("U123", EVAL_OAUTH_PROVIDER)).resolves.toBe( + undefined, + ); + expect(slackApiOutbox.calls("chat.postEphemeral")).toEqual([ + expect.objectContaining({ + params: expect.objectContaining({ + channel: "C123", + user: "U123", + text: "Your Eval OAuth account has been unlinked.", + }), + }), + ]); + }); +}); diff --git a/packages/junior/tests/integration/slack/subscribed-message-behavior.test.ts b/packages/junior/tests/integration/slack/subscribed-message-behavior.test.ts index 44eca6c6f..5478b40f1 100644 --- a/packages/junior/tests/integration/slack/subscribed-message-behavior.test.ts +++ b/packages/junior/tests/integration/slack/subscribed-message-behavior.test.ts @@ -1,73 +1,37 @@ -import { describe, expect, it, vi } from "vitest"; -import { TurnInputCommitLostError } from "@/chat/runtime/turn"; -import type { JuniorRuntimeServiceOverrides } from "@/chat/app/services"; +import { describe, expect, it } from "vitest"; import { createProviderError } from "@/chat/services/provider-retry"; -import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { + conversationMessages, + createSlackBehaviorRuntime, + postedText, +} from "../../fixtures/slack/behavior"; import { createTestMessage, createTestThread, - createTestDestination, -} from "../../fixtures/slack-harness"; - -const emptyThreadReplies = async () => []; - -function createRuntime( - args: { - services?: JuniorRuntimeServiceOverrides; - } = {}, -) { - const services = args.services ?? {}; - return createTestChatRuntime({ - services: { - ...services, - visionContext: { - listThreadReplies: emptyThreadReplies, - ...(services.visionContext ?? {}), - }, - }, - }); -} - -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - - return String(value); -} +} from "../../fixtures/slack/harness"; describe("Slack behavior: subscribed messages", () => { it("skips reply when classifier says not to reply", async () => { - const classifierCalls: string[] = []; - - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async (params: { prompt?: unknown }) => { - classifierCalls.push(String(params.prompt)); - return { - object: { - should_reply: false, - confidence: 0, - reason: "side conversation", - }, - text: '{"should_reply":false,"confidence":0,"reason":"side conversation"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async () => { - throw new Error( - "generateAssistantReply should not run when classifier skips reply", - ); - }, + let classifierCallCount = 0; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCallCount += 1; + return { + object: { + should_reply: false, + confidence: 0, + reason: "side conversation", + }, + text: '{"should_reply":false,"confidence":0,"reason":"side conversation"}', + } as never; + }, + generateAssistantReply: async () => { + throw new Error( + "generateAssistantReply should not run when classifier skips reply", + ); }, }, }); @@ -81,11 +45,9 @@ describe("Slack behavior: subscribed messages", () => { author: { userId: "U_TESTER" }, }); - await slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }); + await slackRuntime.handleSubscribedMessage(thread, message); - expect(classifierCalls).toHaveLength(1); + expect(classifierCallCount).toBe(1); expect(thread.posts).toHaveLength(0); }); @@ -94,17 +56,13 @@ describe("Slack behavior: subscribed messages", () => { new Error("Anthropic stream ended before message_stop"), ); - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - throw providerError; - }, + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + throw providerError; }, - replyExecutor: { - generateAssistantReply: async () => { - throw new Error("generateAssistantReply should not run"); - }, + generateAssistantReply: async () => { + throw new Error("generateAssistantReply should not run"); }, }, }); @@ -119,48 +77,33 @@ describe("Slack behavior: subscribed messages", () => { }); await expect( - slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }), + slackRuntime.handleSubscribedMessage(thread, message), ).rejects.toBe(providerError); expect(thread.posts).toHaveLength(0); }); it("replies when classifier approves a subscribed-thread message", async () => { - const classifierCalls: string[] = []; - const replyCalls: string[] = []; - - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async (params: { prompt?: unknown }) => { - classifierCalls.push(String(params.prompt)); - return { - object: { - should_reply: true, - confidence: 1, - reason: "explicit ask", - }, - text: '{"should_reply":true,"confidence":1,"reason":"explicit ask"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async (prompt) => { - replyCalls.push(prompt); - return { - text: "Action item captured: monitor dashboards for 30 minutes.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + let classifierCallCount = 0; + let replyCallCount = 0; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCallCount += 1; + return { + object: { + should_reply: true, + confidence: 1, + reason: "explicit ask", + }, + text: '{"should_reply":true,"confidence":1,"reason":"explicit ask"}', + } as never; + }, + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply( + "Action item captured: monitor dashboards for 30 minutes.", + ); }, }, }); @@ -174,51 +117,29 @@ describe("Slack behavior: subscribed messages", () => { author: { userId: "U_TESTER" }, }); - await slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }); + await slackRuntime.handleSubscribedMessage(thread, message); - expect(classifierCalls).toHaveLength(1); - expect(replyCalls).toHaveLength(1); + expect(classifierCallCount).toBe(1); + expect(replyCallCount).toBe(1); expect(thread.posts).toHaveLength(1); - expect(toPostedText(thread.posts[0])).toContain("monitor dashboards"); + expect(postedText(thread.posts[0])).toContain("monitor dashboards"); }); it("replies directly to explicit mentions in subscribed threads", async () => { let classifierCalled = false; - const replyCalls: string[] = []; + let replyCallCount = 0; - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - return { - object: { - should_reply: true, - confidence: 1, - reason: "direct mention asking junior for status", - }, - text: '{"should_reply":true,"confidence":1,"reason":"direct mention asking junior for status"}', - } as never; - }, + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCalled = true; + throw new Error( + "classifier should be bypassed for explicit mentions", + ); }, - replyExecutor: { - generateAssistantReply: async (prompt) => { - replyCalls.push(prompt); - return { - text: "Yes. Shipping status is green.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply("Yes. Shipping status is green."); }, }, }); @@ -232,46 +153,29 @@ describe("Slack behavior: subscribed messages", () => { author: { userId: "U_TESTER" }, }); - await slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }); + await slackRuntime.handleSubscribedMessage(thread, message); expect(classifierCalled).toBe(false); - expect(replyCalls).toHaveLength(1); + expect(replyCallCount).toBe(1); expect(thread.posts).toHaveLength(1); - expect(toPostedText(thread.posts[0])).toContain("Shipping status is green"); + expect(postedText(thread.posts[0])).toContain("Shipping status is green"); }); it("treats queued explicit mentions as part of the subscribed turn", async () => { let classifierCalled = false; - const replyCalls: string[] = []; + let replyCallCount = 0; - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - throw new Error( - "classifier should be bypassed for queued explicit mentions", - ); - }, + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCalled = true; + throw new Error( + "classifier should be bypassed for queued explicit mentions", + ); }, - replyExecutor: { - generateAssistantReply: async (prompt) => { - replyCalls.push(prompt); - return { - text: "Handled queued subscribed turn.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply("Handled queued subscribed turn."); }, }, }); @@ -294,7 +198,6 @@ describe("Slack behavior: subscribed messages", () => { }); await slackRuntime.handleSubscribedMessage(thread, latest, { - destination: createTestDestination(thread), messageContext: { skipped: [queued], totalSinceLastHandler: 2, @@ -302,55 +205,54 @@ describe("Slack behavior: subscribed messages", () => { }); expect(classifierCalled).toBe(false); - expect(replyCalls).toHaveLength(1); - expect(replyCalls[0]).toContain("first queued request"); - expect(replyCalls[0]).toContain("latest follow-up"); + expect(replyCallCount).toBe(1); + expect( + conversationMessages(thread) + .filter( + (message) => + message.id === "m-subscribed-queued-mention" || + message.id === "m-subscribed-queued-latest", + ) + .map((message) => ({ id: message.id, text: message.text })), + ).toEqual([ + { + id: "m-subscribed-queued-mention", + text: "first queued request", + }, + { id: "m-subscribed-queued-latest", text: "latest follow-up" }, + ]); expect(thread.posts).toHaveLength(1); - expect(toPostedText(thread.posts[0])).toContain( + expect(postedText(thread.posts[0])).toContain( "Handled queued subscribed turn.", ); }); it("unsubscribes on explicit stop-thread instructions and only re-engages on a later direct mention", async () => { let classifierCalled = false; - const replyCalls: string[] = []; - - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - return { - object: { - should_reply: false, - should_unsubscribe: true, - confidence: 1, - reason: - "user explicitly asked junior to stop participating in the thread", - }, - text: '{"should_reply":false,"should_unsubscribe":true,"confidence":1,"reason":"user explicitly asked junior to stop participating in the thread"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async (prompt) => { - replyCalls.push(prompt); - return { - text: - replyCalls.length === 1 - ? "I can help with this thread." - : "I'm back because you mentioned me again.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + let replyCallCount = 0; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCalled = true; + return { + object: { + should_reply: false, + should_unsubscribe: true, + confidence: 1, + reason: + "user explicitly asked junior to stop participating in the thread", + }, + text: '{"should_reply":false,"should_unsubscribe":true,"confidence":1,"reason":"user explicitly asked junior to stop participating in the thread"}', + } as never; + }, + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply( + replyCallCount === 1 + ? "I can help with this thread." + : "I'm back because you mentioned me again.", + ); }, }, }); @@ -366,7 +268,6 @@ describe("Slack behavior: subscribed messages", () => { threadId: thread.id, author: { userId: "U_TESTER" }, }), - { destination: createTestDestination(thread) }, ); expect(thread.subscribed).toBe(true); @@ -380,13 +281,12 @@ describe("Slack behavior: subscribed messages", () => { threadId: thread.id, author: { userId: "U_TESTER" }, }), - { destination: createTestDestination(thread) }, ); expect(classifierCalled).toBe(false); - expect(replyCalls).toHaveLength(1); + expect(replyCallCount).toBe(1); expect(thread.subscribed).toBe(false); - expect(toPostedText(thread.posts[1])).toContain( + expect(postedText(thread.posts[1])).toContain( "I'll stay out of this thread unless someone @mentions me again.", ); @@ -399,777 +299,12 @@ describe("Slack behavior: subscribed messages", () => { threadId: thread.id, author: { userId: "U_TESTER" }, }), - { destination: createTestDestination(thread) }, ); - expect(replyCalls).toHaveLength(2); + expect(replyCallCount).toBe(2); expect(thread.subscribed).toBe(true); - expect(toPostedText(thread.posts[2])).toContain( + expect(postedText(thread.posts[2])).toContain( "I'm back because you mentioned me again.", ); }); - - it("short-circuits acknowledgment messages without calling the classifier", async () => { - let classifierCalled = false; - let replyCalled = false; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - throw new Error( - "classifier should be bypassed for acknowledgments", - ); - }, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyCalled = true; - return { - text: "This should never be posted.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.000" }); - const message = createTestMessage({ - id: "m-subscribed-ack", - text: "thanks!", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }); - - await slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }); - - expect(classifierCalled).toBe(false); - expect(replyCalled).toBe(false); - expect(thread.posts).toHaveLength(0); - }); - - it("routes acknowledgment text with attachments through the classifier", async () => { - let classifierCalled = false; - let replyCalled = false; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - return { - object: { - should_reply: false, - confidence: 0.95, - reason: "attachment acknowledgment", - }, - text: '{"should_reply":false,"confidence":0.95,"reason":"attachment acknowledgment"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyCalled = true; - return { - text: "This should never be posted.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.125" }); - const message = createTestMessage({ - id: "m-subscribed-ack-attachment", - text: "thanks!", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - attachments: [ - { - type: "image", - url: "https://example.com/chart.png", - }, - ], - }); - - await slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }); - - expect(classifierCalled).toBe(true); - expect(replyCalled).toBe(false); - expect(thread.posts).toHaveLength(0); - }); - - it("routes attachment-only passive messages through the classifier", async () => { - let classifierCalled = false; - let replyCalled = false; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - return { - object: { - should_reply: false, - confidence: 0.95, - reason: "passive attachment", - }, - text: '{"should_reply":false,"confidence":0.95,"reason":"passive attachment"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyCalled = true; - return { - text: "This should never be posted.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.250" }); - const message = createTestMessage({ - id: "m-subscribed-attachment-only", - text: "", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - attachments: [ - { - type: "image", - url: "https://example.com/chart.png", - }, - ], - }); - - await slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }); - - expect(classifierCalled).toBe(true); - expect(replyCalled).toBe(false); - expect(thread.posts).toHaveLength(0); - }); - - it("routes legacy attachment-only passive messages through the classifier", async () => { - let classifierCalled = false; - let replyCalled = false; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async (args) => { - classifierCalled = true; - expect(args.prompt).toContain("Deploy failed"); - expect(args.prompt).toContain("Service: checkout"); - return { - object: { - should_reply: false, - confidence: 0.95, - reason: "passive legacy attachment", - }, - text: '{"should_reply":false,"confidence":0.95,"reason":"passive legacy attachment"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyCalled = true; - return { - text: "This should never be posted.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.275" }); - const message = createTestMessage({ - id: "m-subscribed-legacy-attachment-only", - text: "", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - raw: { - channel: "C_BEHAVIOR", - ts: "1700002003.275", - thread_ts: "1700002003.275", - attachments: [ - { - fallback: "Deploy failed", - fields: [{ title: "Service", value: "checkout" }], - }, - ], - }, - }); - - await slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }); - - expect(classifierCalled).toBe(true); - expect(replyCalled).toBe(false); - expect(thread.posts).toHaveLength(0); - }); - - it("short-circuits generic immediate side-conversation questions without calling the classifier", async () => { - let classifierCalled = false; - let replyCalled = false; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - throw new Error( - "classifier should be bypassed for generic immediate side conversation", - ); - }, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyCalled = true; - return { - text: "This should never be posted.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.300" }); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-subscribed-generic-side-1", - text: "<@U_APP> summarize the deploy", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - replyCalled = false; - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-subscribed-generic-side-2", - text: "can you check on this?", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - - expect(classifierCalled).toBe(false); - expect(replyCalled).toBe(false); - expect(thread.posts).toHaveLength(1); - }); - - it("routes generic immediate attachment follow-ups through the classifier", async () => { - let classifierCalled = false; - let replyCalled = false; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - return { - object: { - should_reply: false, - confidence: 0.95, - reason: "attachment follow-up", - }, - text: '{"should_reply":false,"confidence":0.95,"reason":"attachment follow-up"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyCalled = true; - return { - text: "This should never be posted.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.350" }); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-subscribed-generic-side-attachment-1", - text: "<@U_APP> summarize the deploy", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - replyCalled = false; - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-subscribed-generic-side-attachment-2", - text: "can you check on this?", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - attachments: [ - { - type: "image", - url: "https://example.com/screenshot.png", - }, - ], - }), - { destination: createTestDestination(thread) }, - ); - - expect(classifierCalled).toBe(true); - expect(replyCalled).toBe(false); - expect(thread.posts).toHaveLength(1); - }); - - it("stays silent when a subscribed message is clearly directed at another bot", async () => { - let classifierCalled = false; - let replyCalled = false; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - throw new Error( - "classifier should be bypassed for messages addressed to another bot", - ); - }, - }, - replyExecutor: { - generateAssistantReply: async () => { - replyCalled = true; - return { - text: "This should never be posted.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.500" }); - const message = createTestMessage({ - id: "m-subscribed-other-bot", - text: "@Cursor can you help address issue 87?", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }); - - await slackRuntime.handleSubscribedMessage(thread, message, { - destination: createTestDestination(thread), - }); - - expect(classifierCalled).toBe(false); - expect(replyCalled).toBe(false); - expect(thread.posts).toHaveLength(0); - const state = (await thread.state) ?? {}; - const conversation = (state.conversation ?? {}) as { - messages?: Array<{ - id: string; - text: string; - meta?: { replied?: boolean; skippedReason?: string }; - }>; - processing?: { lastCompletedAtMs?: number }; - }; - expect(conversation.messages).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - id: "m-subscribed-other-bot", - text: "@Cursor can you help address issue 87?", - meta: expect.objectContaining({ - replied: false, - skippedReason: "directed_to_other_party:named_mention:Cursor", - }), - }), - ]), - ); - expect(conversation.processing?.lastCompletedAtMs).toEqual( - expect.any(Number), - ); - }); - - it("replies immediately to directed follow-up questions after junior just spoke", async () => { - let classifierCalled = false; - const replyCalls: string[] = []; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - return { - object: { - should_reply: true, - confidence: 0.95, - reason: "follow-up directed at assistant's previous response", - }, - text: '{"should_reply":true,"confidence":0.95,"reason":"follow-up directed at assistant\'s previous response"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async (prompt) => { - replyCalls.push(prompt); - return { - text: "You asked for the budget by Friday.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002004.000" }); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-subscribed-followup-1", - text: "<@U_APP> I need the budget by Friday", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-subscribed-followup-2", - text: "what did you just say about the budget?", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - - expect(classifierCalled).toBe(false); - expect(replyCalls).toContain("what did you just say about the budget?"); - expect(thread.posts).toHaveLength(2); - expect(toPostedText(thread.posts[1])).toContain("budget by Friday"); - }); - - it("replies immediately to terse clarifications after junior just spoke", async () => { - let classifierCalled = false; - const replyCalls: string[] = []; - - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - classifierCalled = true; - return { - object: { - should_reply: true, - confidence: 0.65, - reason: "brief clarification after assistant reply", - }, - text: '{"should_reply":true,"confidence":0.65,"reason":"brief clarification after assistant reply"}', - } as never; - }, - }, - replyExecutor: { - generateAssistantReply: async (prompt) => { - replyCalls.push(prompt); - return { - text: - replyCalls.length === 1 - ? "The deploy changed billing, auth, and the API gateway." - : "The three services were billing, auth, and the API gateway.", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, - }, - }, - }); - - const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002004.500" }); - await slackRuntime.handleNewMention( - thread, - createTestMessage({ - id: "m-subscribed-low-confidence-followup-1", - text: "<@U_APP> what changed in the deploy?", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-subscribed-low-confidence-followup-2", - text: "which one?", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread) }, - ); - - expect(classifierCalled).toBe(false); - expect(replyCalls).toContain("which one?"); - expect(thread.posts).toHaveLength(2); - expect(toPostedText(thread.posts[1])).toContain( - "billing, auth, and the API gateway", - ); - }); - - // Regression: skipped subscribed messages must commit inbound input so the - // durable mailbox does not re-enqueue them forever. - it("calls onInputCommitted when preflight skips a message directed at another user", async () => { - const { slackRuntime } = createRuntime(); - const onInputCommitted = vi.fn().mockResolvedValue(undefined); - const thread = createTestThread({ id: "slack:C_REGRESS:1700010000.001" }); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-preflight-skip", - text: "@Alice can you take a look at this?", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread), onInputCommitted }, - ); - - expect(onInputCommitted).toHaveBeenCalledTimes(1); - expect(thread.posts).toHaveLength(0); - }); - - it("preserves an unrelated active continuation when preflight skips a message", async () => { - const { slackRuntime } = createRuntime(); - const onInputCommitted = vi.fn().mockResolvedValue(undefined); - const activeTurnId = "turn_existing_resume"; - const thread = createTestThread({ - id: "slack:C_REGRESS:1700010000.005", - state: { - conversation: { - processing: { - activeTurnId, - }, - }, - }, - }); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-preflight-skip-while-resuming", - text: "@Alice can you take this one?", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread), onInputCommitted }, - ); - - const state = (await thread.state) ?? {}; - const conversation = state.conversation as { - processing?: { activeTurnId?: string }; - }; - expect(onInputCommitted).toHaveBeenCalledTimes(1); - expect(conversation.processing?.activeTurnId).toBe(activeTurnId); - expect(thread.posts).toHaveLength(0); - }); - - it("calls onInputCommitted when the classifier decides not to reply", async () => { - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => - ({ - object: { - should_reply: false, - confidence: 0.9, - reason: "side conversation", - }, - text: '{"should_reply":false,"confidence":0.9,"reason":"side conversation"}', - }) as never, - }, - }, - }); - const onInputCommitted = vi.fn().mockResolvedValue(undefined); - const thread = createTestThread({ id: "slack:C_REGRESS:1700010000.002" }); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-classifier-skip", - text: "sounds good, let's ship it", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread), onInputCommitted }, - ); - - expect(onInputCommitted).toHaveBeenCalledTimes(1); - expect(thread.posts).toHaveLength(0); - }); - - it("calls onInputCommitted on the opt-out skip path", async () => { - const { slackRuntime } = createRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => - ({ - object: { - should_reply: false, - should_unsubscribe: true, - confidence: 1, - reason: "explicit stop", - }, - text: '{"should_reply":false,"should_unsubscribe":true,"confidence":1,"reason":"explicit stop"}', - }) as never, - }, - }, - }); - const onInputCommitted = vi.fn().mockResolvedValue(undefined); - const thread = createTestThread({ id: "slack:C_REGRESS:1700010000.003" }); - // Subscribe first so opt-out has something to unsubscribe from. - thread.subscribe(); - - await slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-optout-skip", - text: "<@U_APP> please stop watching this thread", - isMention: true, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread), onInputCommitted }, - ); - - expect(onInputCommitted).toHaveBeenCalledTimes(1); - }); - - it("propagates TurnInputCommitLostError when onInputCommitted fails on skip", async () => { - const { slackRuntime } = createRuntime(); - const commitError = new TurnInputCommitLostError( - "lease lost during skip commit", - ); - const onInputCommitted = vi.fn().mockRejectedValue(commitError); - const thread = createTestThread({ id: "slack:C_REGRESS:1700010000.004" }); - - await expect( - slackRuntime.handleSubscribedMessage( - thread, - createTestMessage({ - id: "m-commit-lost", - text: "@Alice handle this please", - isMention: false, - threadId: thread.id, - author: { userId: "U_TESTER" }, - }), - { destination: createTestDestination(thread), onInputCommitted }, - ), - ).rejects.toThrow(TurnInputCommitLostError); - }); }); diff --git a/packages/junior/tests/integration/slack/subscribed-reply-policy-behavior.test.ts b/packages/junior/tests/integration/slack/subscribed-reply-policy-behavior.test.ts new file mode 100644 index 000000000..39ba3f5f9 --- /dev/null +++ b/packages/junior/tests/integration/slack/subscribed-reply-policy-behavior.test.ts @@ -0,0 +1,297 @@ +import { describe, expect, it } from "vitest"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { + conversationMessages, + createSlackBehaviorRuntime, + postedText, +} from "../../fixtures/slack/behavior"; +import { + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +describe("Slack behavior: subscribed reply policy", () => { + it("routes acknowledgment text with attachments through the classifier", async () => { + let classifierCalled = false; + let replyCalled = false; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCalled = true; + return { + object: { + should_reply: false, + confidence: 0.95, + reason: "attachment acknowledgment", + }, + text: '{"should_reply":false,"confidence":0.95,"reason":"attachment acknowledgment"}', + } as never; + }, + generateAssistantReply: async () => { + replyCalled = true; + return successfulAssistantReply("This should never be posted."); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.125" }); + const message = createTestMessage({ + id: "m-subscribed-ack-attachment", + text: "thanks!", + isMention: false, + threadId: thread.id, + author: { userId: "U_TESTER" }, + attachments: [ + { + type: "image", + url: "https://example.com/chart.png", + }, + ], + }); + + await slackRuntime.handleSubscribedMessage(thread, message); + + expect(classifierCalled).toBe(true); + expect(replyCalled).toBe(false); + expect(thread.posts).toHaveLength(0); + }); + + it("routes legacy attachment-only passive messages through the classifier", async () => { + let classifierCalled = false; + let replyCalled = false; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCalled = true; + return { + object: { + should_reply: false, + confidence: 0.95, + reason: "passive legacy attachment", + }, + text: '{"should_reply":false,"confidence":0.95,"reason":"passive legacy attachment"}', + } as never; + }, + generateAssistantReply: async () => { + replyCalled = true; + return successfulAssistantReply("This should never be posted."); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.275" }); + const message = createTestMessage({ + id: "m-subscribed-legacy-attachment-only", + text: "", + isMention: false, + threadId: thread.id, + author: { userId: "U_TESTER" }, + raw: { + channel: "C_BEHAVIOR", + ts: "1700002003.275", + thread_ts: "1700002003.275", + attachments: [ + { + fallback: "Deploy failed", + fields: [{ title: "Service", value: "checkout" }], + }, + ], + }, + }); + + await slackRuntime.handleSubscribedMessage(thread, message); + + expect(classifierCalled).toBe(true); + expect(replyCalled).toBe(false); + expect(conversationMessages(thread)[0]?.text).toContain("Deploy failed"); + expect(conversationMessages(thread)[0]?.text).toContain( + "Service: checkout", + ); + expect(thread.posts).toHaveLength(0); + }); + + it("routes generic immediate attachment follow-ups through the classifier", async () => { + let classifierCalled = false; + let replyCalled = false; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCalled = true; + return { + object: { + should_reply: false, + confidence: 0.95, + reason: "attachment follow-up", + }, + text: '{"should_reply":false,"confidence":0.95,"reason":"attachment follow-up"}', + } as never; + }, + generateAssistantReply: async () => { + replyCalled = true; + return successfulAssistantReply("This should never be posted."); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.350" }); + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "m-subscribed-generic-side-attachment-1", + text: "<@U_APP> summarize the deploy", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }), + ); + replyCalled = false; + + await slackRuntime.handleSubscribedMessage( + thread, + createTestMessage({ + id: "m-subscribed-generic-side-attachment-2", + text: "can you check on this?", + isMention: false, + threadId: thread.id, + author: { userId: "U_TESTER" }, + attachments: [ + { + type: "image", + url: "https://example.com/screenshot.png", + }, + ], + }), + ); + + expect(classifierCalled).toBe(true); + expect(replyCalled).toBe(false); + expect(thread.posts).toHaveLength(1); + }); + + it("stays silent when a subscribed message is clearly directed at another bot", async () => { + let classifierCalled = false; + let replyCalled = false; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCalled = true; + throw new Error( + "classifier should be bypassed for messages addressed to another bot", + ); + }, + generateAssistantReply: async () => { + replyCalled = true; + return successfulAssistantReply("This should never be posted."); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002003.500" }); + const message = createTestMessage({ + id: "m-subscribed-other-bot", + text: "@Cursor can you help address issue 87?", + isMention: false, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }); + + await slackRuntime.handleSubscribedMessage(thread, message); + + expect(classifierCalled).toBe(false); + expect(replyCalled).toBe(false); + expect(thread.posts).toHaveLength(0); + const state = (await thread.state) ?? {}; + const conversation = (state.conversation ?? {}) as { + messages?: Array<{ + id: string; + text: string; + meta?: { replied?: boolean; skippedReason?: string }; + }>; + processing?: { lastCompletedAtMs?: number }; + }; + expect(conversation.messages).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + id: "m-subscribed-other-bot", + text: "@Cursor can you help address issue 87?", + meta: expect.objectContaining({ + replied: false, + skippedReason: "directed_to_other_party:named_mention:Cursor", + }), + }), + ]), + ); + expect(conversation.processing?.lastCompletedAtMs).toEqual( + expect.any(Number), + ); + }); + + it("replies immediately to directed follow-up questions after junior just spoke", async () => { + let classifierCalled = false; + let replyCallCount = 0; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + classifierCalled = true; + throw new Error( + "classifier should be bypassed for directed follow-ups", + ); + }, + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply( + replyCallCount === 1 + ? "Budget noted." + : "You asked for the budget by Friday.", + ); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_BEHAVIOR:1700002004.000" }); + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "m-subscribed-followup-1", + text: "<@U_APP> I need the budget by Friday", + isMention: true, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }), + ); + + await slackRuntime.handleSubscribedMessage( + thread, + createTestMessage({ + id: "m-subscribed-followup-2", + text: "what did you just say about the budget?", + isMention: false, + threadId: thread.id, + author: { userId: "U_TESTER" }, + }), + ); + + expect(classifierCalled).toBe(false); + expect(replyCallCount).toBe(2); + expect( + conversationMessages(thread).map((message) => ({ + id: message.id, + text: message.text, + })), + ).toEqual( + expect.arrayContaining([ + { + id: "m-subscribed-followup-2", + text: "what did you just say about the budget?", + }, + ]), + ); + expect(thread.posts).toHaveLength(2); + expect(postedText(thread.posts[1])).toContain("budget by Friday"); + }); +}); diff --git a/packages/junior/tests/integration/slack/thread-continuity-behavior.test.ts b/packages/junior/tests/integration/slack/thread-continuity-behavior.test.ts index 316cf1d2b..ad2457efe 100644 --- a/packages/junior/tests/integration/slack/thread-continuity-behavior.test.ts +++ b/packages/junior/tests/integration/slack/thread-continuity-behavior.test.ts @@ -1,25 +1,14 @@ import { describe, expect, it } from "vitest"; -import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { + createSlackBehaviorRuntime, + postedText, +} from "../../fixtures/slack/behavior"; import { createTestMessage, createTestThread, createTestDestination, -} from "../../fixtures/slack-harness"; - -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - - return String(value); -} +} from "../../fixtures/slack/harness"; describe("Slack behavior: thread continuity", () => { it("keeps same-thread replies in arrival order for rapid follow-up messages", async () => { @@ -27,39 +16,25 @@ describe("Slack behavior: thread continuity", () => { "Rollback complete. Error rates are back to baseline.", "Next step: monitor dashboards for 30 minutes.", ]; - const prompts: string[] = []; - - const { slackRuntime } = createTestChatRuntime({ - services: { - subscribedReplyPolicy: { - completeObject: async () => { - return { - object: { - should_reply: true, - confidence: 1, - reason: "direct mention follow-up", - }, - text: '{"should_reply":true,"confidence":1,"reason":"direct mention follow-up"}', - } as never; - }, + let replyCallCount = 0; + + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + classifySubscribedReply: async () => { + return { + object: { + should_reply: true, + confidence: 1, + reason: "direct mention follow-up", + }, + text: '{"should_reply":true,"confidence":1,"reason":"direct mention follow-up"}', + } as never; }, - replyExecutor: { - generateAssistantReply: async (prompt) => { - prompts.push(prompt); - return { - text: - scriptedReplies[prompts.length - 1] ?? "Unexpected extra reply", - diagnostics: { - assistantMessageCount: 1, - modelId: "fake-agent-model", - outcome: "success", - toolCalls: [], - toolErrorCount: 0, - toolResultCount: 0, - usedPrimaryText: true, - }, - }; - }, + generateAssistantReply: async () => { + replyCallCount += 1; + return successfulAssistantReply( + scriptedReplies[replyCallCount - 1] ?? "Unexpected extra reply", + ); }, }, }); @@ -87,11 +62,182 @@ describe("Slack behavior: thread continuity", () => { destination: createTestDestination(thread), }); - expect(prompts).toHaveLength(2); + expect(replyCallCount).toBe(2); expect(thread.posts).toHaveLength(2); - expect(toPostedText(thread.posts[0])).toContain("Rollback complete"); - expect(toPostedText(thread.posts[1])).toContain( + expect(postedText(thread.posts[0])).toContain("Rollback complete"); + expect(postedText(thread.posts[1])).toContain( "Next step: monitor dashboards", ); }); + + it("omits prior conversation context for a brand-new mention", async () => { + const capturedContexts: Array = []; + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply: async (_prompt, context) => { + capturedContexts.push(context?.conversationContext); + return successfulAssistantReply("First reply."); + }, + }, + }); + + const threadId = "slack:C_FIRST_EMPTY:1700000000.000"; + const thread = createTestThread({ id: threadId }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-first-current", + threadId, + text: "Can you summarize this?", + isMention: true, + }), + ); + + expect(capturedContexts).toEqual([undefined]); + }); + + it("builds first-turn context from the prior thread transcript only", async () => { + const capturedContexts: Array = []; + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply: async (_prompt, context) => { + capturedContexts.push(context?.conversationContext); + return successfulAssistantReply("Follow-up reply."); + }, + }, + }); + + const threadId = "slack:C_FIRST_EXISTING:1700000000.000"; + const thread = createTestThread({ id: threadId }); + const priorMessage = createTestMessage({ + id: "msg-first-prior", + threadId, + text: "Original production issue summary.", + author: { userId: "U-prior", userName: "alice", isBot: false }, + }); + priorMessage.metadata.dateSent = new Date(1_700_000_000_000); + const currentMessage = createTestMessage({ + id: "msg-first-current", + threadId, + text: "Can you include the regression window?", + isMention: true, + author: { userId: "U-current", userName: "bob", isBot: false }, + }); + currentMessage.metadata.dateSent = new Date(1_700_000_001_000); + thread.recentMessages = [priorMessage, currentMessage]; + + await slackRuntime.handleNewMention(thread, currentMessage); + + expect(capturedContexts).toHaveLength(1); + expect(capturedContexts[0]).toContain(""); + expect(capturedContexts[0]).toContain("Original production issue summary."); + expect(capturedContexts[0]).not.toContain( + "Can you include the regression window?", + ); + }); + + it("does not include newer thread messages in subscribed-message context", async () => { + const capturedContexts: Array = []; + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateThreadTitleText: async () => + ({ text: "Context thread" }) as never, + classifySubscribedReply: async () => + ({ + object: { + should_reply: true, + confidence: 1, + reason: "follow-up", + }, + text: '{"should_reply":true,"confidence":1,"reason":"follow-up"}', + }) as never, + generateAssistantReply: async (_prompt, context) => { + capturedContexts.push(context?.conversationContext); + return successfulAssistantReply("Responding to first message only."); + }, + }, + }); + + const threadId = "slack:D_ORDER:1700000000.000"; + const thread = createTestThread({ id: threadId }); + const firstMessage = createTestMessage({ + id: "1700000000.100", + threadId, + text: "you work now?", + isMention: false, + }); + const laterMessage = createTestMessage({ + id: "1700000000.200", + threadId, + text: "hello", + isMention: false, + }); + + Object.defineProperty(thread, "messages", { + configurable: true, + get() { + return (async function* () { + // Chat SDK thread iterators are newest-first. + yield laterMessage; + yield firstMessage; + })(); + }, + }); + + await slackRuntime.handleSubscribedMessage(thread, firstMessage); + + expect(capturedContexts).toHaveLength(1); + expect(capturedContexts[0]).toBeUndefined(); + }); + + it("preserves persisted conversation state across multiple turns", async () => { + let turnCount = 0; + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply: async () => { + turnCount += 1; + return successfulAssistantReply(`reply-${turnCount}`); + }, + }, + }); + + const thread = createTestThread({ id: "slack:C_MULTI:1700000000.000" }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-t1", + threadId: "slack:C_MULTI:1700000000.000", + text: "first turn", + isMention: true, + }), + ); + + const stateAfterFirstTurn = thread.getState(); + const conv1 = ( + stateAfterFirstTurn as { conversation?: { messages?: unknown[] } } + ).conversation; + expect(conv1).toBeDefined(); + const messageCountAfterFirst = conv1?.messages?.length ?? 0; + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-t2", + threadId: "slack:C_MULTI:1700000000.000", + text: "second turn", + isMention: true, + }), + ); + + const stateAfterSecondTurn = thread.getState(); + const conv2 = ( + stateAfterSecondTurn as { conversation?: { messages?: unknown[] } } + ).conversation; + expect(conv2).toBeDefined(); + expect(conv2?.messages?.length ?? 0).toBeGreaterThan( + messageCountAfterFirst, + ); + }); }); diff --git a/packages/junior/tests/integration/slack-thread-read.test.ts b/packages/junior/tests/integration/slack/thread-read-tool.test.ts similarity index 81% rename from packages/junior/tests/integration/slack-thread-read.test.ts rename to packages/junior/tests/integration/slack/thread-read-tool.test.ts index f31781545..a731b89cb 100644 --- a/packages/junior/tests/integration/slack-thread-read.test.ts +++ b/packages/junior/tests/integration/slack/thread-read-tool.test.ts @@ -1,42 +1,22 @@ import { describe, expect, it } from "vitest"; import { createSlackThreadReadTool } from "@/chat/tools/slack/thread-read"; -import type { SlackToolContext } from "@/chat/tools/slack/context"; -import { conversationsRepliesPage } from "../fixtures/slack/factories/api"; +import { conversationsRepliesPage } from "../../fixtures/slack/factories/api"; +import { + createTestToolRuntimeContext, + executeTestTool, + type TestToolRuntimeOverrides, +} from "../../fixtures/tool-runtime"; import { getCapturedSlackApiCalls, queueSlackApiError, queueSlackApiResponse, -} from "../msw/handlers/slack-api"; - -function createContext( - overrides: Partial = {}, -): SlackToolContext { - const sourceChannelId = overrides.sourceChannelId ?? "C_CURRENT"; - const destinationChannelId = - overrides.destinationChannelId ?? sourceChannelId; - return { - destination: overrides.destination ?? { - platform: "slack", - teamId: "T123", - channelId: destinationChannelId, - }, - source: overrides.source ?? { - platform: "slack", - teamId: "T123", - channelId: sourceChannelId, - }, - destinationChannelId, - sourceChannelId, - teamId: "T123", - ...overrides, - }; -} +} from "../../msw/handlers/slack-api"; -async function executeTool(tool: any, input: TInput) { - if (typeof tool?.execute !== "function") { - throw new Error("tool execute function missing"); - } - return await tool.execute(input, {} as any); +function createContext(overrides: TestToolRuntimeOverrides = {}) { + return createTestToolRuntimeContext({ + channelId: "C_CURRENT", + ...overrides, + }); } describe("slackThreadRead", () => { @@ -62,7 +42,7 @@ describe("slackThreadRead", () => { }); const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { url: "https://sentry.slack.com/archives/C0AHB7N2JCR/p1700000000123456", }); @@ -79,7 +59,9 @@ describe("slackThreadRead", () => { expect(result.messages[0].text).toBe("root message"); expect(result.messages[1].text).toBe("reply message"); - // No conversations.info call — access determined by channel prefix + // Public-channel URLs should read the thread directly without broader + // history or channel-info calls. + expect(getCapturedSlackApiCalls("conversations.history")).toHaveLength(0); expect(getCapturedSlackApiCalls("conversations.info")).toHaveLength(0); expect(getCapturedSlackApiCalls("conversations.replies")).toHaveLength(1); }); @@ -106,7 +88,7 @@ describe("slackThreadRead", () => { }); const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { url: "https://sentry.slack.com/archives/C123/p1700000000999999?thread_ts=1700000000.000000&cid=C123", }); @@ -142,7 +124,7 @@ describe("slackThreadRead", () => { }); const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "C_MANUAL", ts: "1700000000.500000", }); @@ -173,7 +155,7 @@ describe("slackThreadRead", () => { const tool = createSlackThreadReadTool( createContext({ sourceChannelId: "G_PRIVATE" }), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "G_PRIVATE", ts: "1700000000.100000", }); @@ -210,7 +192,7 @@ describe("slackThreadRead", () => { destinationChannelId: "G_PRIVATE", }), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "G_PRIVATE", ts: "1700000000.100000", }); @@ -227,7 +209,7 @@ describe("slackThreadRead", () => { const tool = createSlackThreadReadTool( createContext({ sourceChannelId: "D_DM" }), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "G_PRIVATE", ts: "1700000000.100000", }); @@ -244,7 +226,7 @@ describe("slackThreadRead", () => { const tool = createSlackThreadReadTool( createContext({ sourceChannelId: "C_CURRENT" }), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { url: "https://sentry.slack.com/archives/G0OTHER/p1700000000100000", }); @@ -262,7 +244,7 @@ describe("slackThreadRead", () => { it("blocks reading a DM channel that is not the current channel", async () => { const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "D_SOMEONE", ts: "1700000000.100000", }); @@ -281,7 +263,7 @@ describe("slackThreadRead", () => { }); const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "C_FLAKY", ts: "1700000000.100000", }); @@ -296,7 +278,7 @@ describe("slackThreadRead", () => { it("returns an error for invalid URL input", async () => { const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { url: "not a valid url", }); @@ -308,7 +290,7 @@ describe("slackThreadRead", () => { it("returns an error when neither url nor channel_id+ts are provided", async () => { const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, {}); + const result = await executeTestTool(tool, {}); expect(result).toMatchObject({ ok: false, @@ -318,7 +300,7 @@ describe("slackThreadRead", () => { it("rejects invalid explicit ts format", async () => { const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "C123", ts: "not-a-timestamp", }); @@ -366,7 +348,7 @@ describe("slackThreadRead", () => { }); const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "C_PAGED", ts: "1700000000.000000", }); @@ -412,7 +394,7 @@ describe("slackThreadRead", () => { }); const tool = createSlackThreadReadTool(createContext()); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { channel_id: "C123", ts: "1700000000.100000", }); @@ -428,29 +410,4 @@ describe("slackThreadRead", () => { expect(file).not.toHaveProperty("url_private"); expect(file).not.toHaveProperty("url_private_download"); }); - - it("does not call conversations.history — only conversations.replies", async () => { - queueSlackApiResponse("conversations.replies", { - body: conversationsRepliesPage({ - threadTs: "1700000000.100000", - messages: [ - { - ts: "1700000000.100000", - thread_ts: "1700000000.100000", - user: "U1", - text: "msg", - }, - ], - }), - }); - - const tool = createSlackThreadReadTool(createContext()); - await executeTool(tool, { - url: "https://sentry.slack.com/archives/C123/p1700000000100000", - }); - - expect(getCapturedSlackApiCalls("conversations.history")).toHaveLength(0); - expect(getCapturedSlackApiCalls("conversations.info")).toHaveLength(0); - expect(getCapturedSlackApiCalls("conversations.replies")).toHaveLength(1); - }); }); diff --git a/packages/junior/tests/integration/slack/thread-title-behavior.test.ts b/packages/junior/tests/integration/slack/thread-title-behavior.test.ts new file mode 100644 index 000000000..f478ea73e --- /dev/null +++ b/packages/junior/tests/integration/slack/thread-title-behavior.test.ts @@ -0,0 +1,401 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import type { JuniorRuntimeScenarioAdapters } from "@/chat/app/services"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { + FakeSlackAdapter, + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { conversationMessages } from "../../fixtures/slack/behavior"; + +const emptyThreadReplies = async () => []; + +function postIncludes(thread: { posts: unknown[] }, text: string): boolean { + return thread.posts.some((post) => { + if (typeof post === "string") { + return post.includes(text); + } + if ( + post && + typeof post === "object" && + "markdown" in (post as Record) + ) { + return String((post as { markdown: string }).markdown).includes(text); + } + return false; + }); +} + +function createRuntime(args: { + adapters?: JuniorRuntimeScenarioAdapters; + slackAdapter: FakeSlackAdapter; +}) { + const adapters = args.adapters ?? {}; + return createTestChatRuntime({ + slackAdapter: args.slackAdapter, + adapters: { + listThreadReplies: emptyThreadReplies, + ...adapters, + }, + }); +} + +async function flushTitleWork(): Promise { + await new Promise((resolve) => setTimeout(resolve, 0)); +} + +function generatedTitleCall(adapter: FakeSlackAdapter) { + return adapter.titleCalls.find((call) => call.title !== "Junior"); +} + +describe("Slack behavior: thread title", () => { + afterEach(async () => { + vi.restoreAllMocks(); + await disconnectStateAdapter(); + }); + + it("generates and sets title after first assistant reply", async () => { + const slackAdapter = new FakeSlackAdapter(); + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateThreadTitleText: async () => + ({ + text: "Debugging Node.js Memory Leaks", + message: { role: "assistant", content: "" }, + }) as never, + generateAssistantReply: async () => + successfulAssistantReply("Here is how to debug memory leaks."), + }, + }); + + const thread = createTestThread({ id: "slack:D_TITLE:1700000000.000" }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-title-1", + threadId: thread.id, + text: "How do I debug memory leaks in Node?", + isMention: true, + }), + ); + + await flushTitleWork(); + + expect(generatedTitleCall(slackAdapter)).toEqual( + expect.objectContaining({ + channelId: "D_TITLE", + threadTs: "1700000000.000", + title: "Debugging Node.js Memory Leaks", + }), + ); + }); + + it("hydrates earlier human thread messages before generating a title", async () => { + const slackAdapter = new FakeSlackAdapter(); + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateThreadTitleText: async () => + ({ + text: "Production Issue Summary", + message: { role: "assistant", content: "" }, + }) as never, + generateAssistantReply: async () => + successfulAssistantReply("Here is the updated answer."), + }, + }); + + const thread = createTestThread({ id: "slack:D_TITLE4:1700000000.000" }); + const earlierMessage = createTestMessage({ + id: "msg-title4-earlier", + threadId: thread.id, + text: "Original production issue summary", + author: { userId: "U-title4", isBot: false }, + }); + earlierMessage.metadata.dateSent = new Date(1_700_000_000_000); + thread.recentMessages = [earlierMessage]; + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-title4-current", + threadId: thread.id, + text: "Can you also include the regression window?", + isMention: true, + }), + ); + + await flushTitleWork(); + + expect( + conversationMessages(thread) + .filter( + (message) => + message.id === "msg-title4-earlier" || + message.id === "msg-title4-current", + ) + .map((message) => ({ id: message.id, text: message.text })), + ).toEqual([ + { + id: "msg-title4-earlier", + text: "Original production issue summary", + }, + { + id: "msg-title4-current", + text: "Can you also include the regression window?", + }, + ]); + expect(generatedTitleCall(slackAdapter)).toEqual( + expect.objectContaining({ + title: "Production Issue Summary", + }), + ); + }); + + it("still generates for a new thread with starter assistant content", async () => { + const slackAdapter = new FakeSlackAdapter(); + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateThreadTitleText: async () => + ({ + text: "Today's Date", + message: { role: "assistant", content: "" }, + }) as never, + generateAssistantReply: async () => + successfulAssistantReply("Today is April 16, 2026."), + }, + }); + + const thread = createTestThread({ + id: "slack:D_TITLE5:1700000000.000", + }); + const starterMessage = createTestMessage({ + id: "msg-title5-starter", + threadId: thread.id, + text: "How can I help?", + author: { + isBot: true, + isMe: true, + userId: "B-title5", + userName: "junior", + }, + }); + starterMessage.metadata.dateSent = new Date(1_700_000_000_000); + thread.recentMessages = [starterMessage]; + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-title5-user", + threadId: thread.id, + text: "what's today's date", + isMention: true, + }), + ); + + await flushTitleWork(); + + expect(generatedTitleCall(slackAdapter)).toEqual( + expect.objectContaining({ + title: "Today's Date", + }), + ); + }); + + it("runs in parallel with reply delivery when generation is slow", async () => { + const slackAdapter = new FakeSlackAdapter(); + let resolveTitle: (() => void) | undefined; + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateThreadTitleText: async () => + await new Promise((resolve) => { + resolveTitle = () => + resolve({ + text: "Today's Date", + message: { role: "assistant", content: "" }, + } as never); + }), + generateAssistantReply: async () => + successfulAssistantReply("Today is April 16, 2026."), + }, + }); + + const thread = createTestThread({ id: "slack:D_TITLE6:1700000000.000" }); + let settled = false; + const turnPromise = slackRuntime + .handleNewMention( + thread, + createTestMessage({ + id: "msg-title-6", + threadId: thread.id, + text: "what's today's date", + isMention: true, + }), + ) + .then(() => { + settled = true; + }); + + await vi.waitFor(() => { + expect(postIncludes(thread, "Today is April 16, 2026.")).toBe(true); + }); + await turnPromise; + expect(settled).toBe(true); + + resolveTitle!(); + await flushTitleWork(); + expect(generatedTitleCall(slackAdapter)).toMatchObject({ + title: "Today's Date", + }); + }); + + it("does not generate title on subsequent replies", async () => { + const slackAdapter = new FakeSlackAdapter(); + let turnCount = 0; + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateThreadTitleText: async () => + ({ + text: "Some Title", + message: { role: "assistant", content: "" }, + }) as never, + generateAssistantReply: async () => { + turnCount += 1; + return successfulAssistantReply(`reply-${turnCount}`); + }, + }, + }); + + const thread = createTestThread({ id: "slack:D_TITLE2:1700000000.000" }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-t2-1", + threadId: thread.id, + text: "first message", + isMention: true, + }), + ); + await flushTitleWork(); + + expect( + slackAdapter.titleCalls.filter((call) => call.title !== "Junior"), + ).toHaveLength(1); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-t2-2", + threadId: thread.id, + text: "second message", + isMention: true, + }), + ); + await flushTitleWork(); + + expect( + slackAdapter.titleCalls.filter((call) => call.title !== "Junior"), + ).toHaveLength(1); + }); + + it("ignores Slack permission errors when setting title", async () => { + const slackAdapter = new FakeSlackAdapter(); + slackAdapter.setAssistantTitle = async () => { + const error = new Error( + "An API error occurred: no_permission", + ) as Error & { + data?: { error?: string }; + }; + error.data = { error: "no_permission" }; + throw error; + }; + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateThreadTitleText: async () => + ({ + text: "Permission Safe Title", + message: { role: "assistant", content: "" }, + }) as never, + generateAssistantReply: async () => + successfulAssistantReply("This reply should still succeed."), + }, + }); + + const thread = createTestThread({ id: "slack:D_TITLE3:1700000000.000" }); + + await expect( + slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-title-3", + threadId: thread.id, + text: "title this thread please", + isMention: true, + }), + ), + ).resolves.toBeUndefined(); + await flushTitleWork(); + expect(thread.posts.length).toBeGreaterThan(0); + }); + + it("does not regenerate after stable Slack permission failures", async () => { + const slackAdapter = new FakeSlackAdapter(); + slackAdapter.setAssistantTitle = async () => { + const error = new Error( + "An API error occurred: no_permission", + ) as Error & { + data?: { error?: string }; + }; + error.data = { error: "no_permission" }; + throw error; + }; + + let titleGenerationCount = 0; + const { slackRuntime } = createRuntime({ + slackAdapter, + adapters: { + generateThreadTitleText: async () => { + titleGenerationCount += 1; + return { + text: "Stable Permission Title", + message: { role: "assistant", content: "" }, + } as never; + }, + generateAssistantReply: async () => + successfulAssistantReply("Reply still succeeds."), + }, + }); + + const thread = createTestThread({ id: "slack:D_TITLE7:1700000000.000" }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-title7-1", + threadId: thread.id, + text: "first message", + isMention: true, + }), + ); + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-title7-2", + threadId: thread.id, + text: "second message", + isMention: true, + }), + ); + + expect(titleGenerationCount).toBe(1); + }); +}); diff --git a/packages/junior/tests/integration/slack/turn-continuation-behavior.test.ts b/packages/junior/tests/integration/slack/turn-continuation-behavior.test.ts new file mode 100644 index 000000000..c5e9626f9 --- /dev/null +++ b/packages/junior/tests/integration/slack/turn-continuation-behavior.test.ts @@ -0,0 +1,346 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { RetryableTurnError } from "@/chat/runtime/turn"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { + getAgentTurnSessionRecord, + upsertAgentTurnSessionRecord, +} from "@/chat/state/turn-session"; +import { successfulAssistantReply } from "../../fixtures/assistant-reply"; +import { + createSlackBehaviorRuntime, + threadHasPostText, +} from "../../fixtures/slack/behavior"; +import { + createAwaitingSlackTurnState, + createPiUserTurn, +} from "../../fixtures/slack/turn-state"; +import { + createTestDestination, + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +describe("Slack behavior: turn continuation", () => { + beforeEach(async () => { + await disconnectStateAdapter(); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + await disconnectStateAdapter(); + }); + + it("parks the active session when live execution yields to timeout resume", async () => { + const scheduleTurnTimeoutResume = vi.fn().mockResolvedValue(undefined); + const conversationId = "slack:C_TIMEOUT:1700000000.000"; + const sessionId = "turn_msg-timeout"; + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + scheduleTurnTimeoutResume, + generateAssistantReply: async () => { + throw new RetryableTurnError( + "turn_timeout_resume", + "simulated timeout continuation", + { + conversationId, + sessionId, + version: 3, + sliceId: 2, + }, + ); + }, + }, + }); + + const thread = createTestThread({ id: conversationId }); + await expect( + slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-timeout", + threadId: conversationId, + text: "please keep working", + isMention: true, + }), + ), + ).resolves.toBeUndefined(); + + expect(scheduleTurnTimeoutResume).toHaveBeenCalledWith({ + conversationId, + destination: createTestDestination(thread), + sessionId, + expectedVersion: 3, + }); + expect(thread.posts).toEqual([]); + + const state = thread.getState(); + const conversation = ( + state as { + conversation?: { + processing?: { activeTurnId?: string }; + }; + } + ).conversation; + expect(conversation?.processing?.activeTurnId).toBe(sessionId); + }); + + it("reschedules an awaiting turn continuation without replying to the follow-up", async () => { + const conversationId = "slack:C_TIMEOUT_RETRY:1700000000.000"; + const activeSessionId = "turn_msg-original"; + const scheduleTurnTimeoutResume = vi.fn().mockResolvedValue(undefined); + const getAwaitingTurnContinuationRequest = vi.fn().mockResolvedValue({ + conversationId, + sessionId: activeSessionId, + expectedVersion: 4, + }); + const generateAssistantReply = vi.fn(); + const onInputCommitted = vi.fn(); + const onTurnStatePersisted = vi.fn(); + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply, + getAwaitingTurnContinuationRequest, + scheduleTurnTimeoutResume, + }, + }); + + const thread = createTestThread({ + id: conversationId, + state: createAwaitingSlackTurnState({ activeSessionId }), + }); + + await expect( + slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-retry", + threadId: conversationId, + text: "what happened?", + isMention: true, + }), + { onInputCommitted, onTurnStatePersisted }, + ), + ).resolves.toBeUndefined(); + + expect(getAwaitingTurnContinuationRequest).toHaveBeenCalledWith({ + conversationId, + sessionId: activeSessionId, + }); + expect(scheduleTurnTimeoutResume).toHaveBeenCalledWith({ + conversationId, + sessionId: activeSessionId, + expectedVersion: 4, + }); + expect(generateAssistantReply).not.toHaveBeenCalled(); + expect(onTurnStatePersisted).toHaveBeenCalledOnce(); + expect(onInputCommitted).toHaveBeenCalledOnce(); + expect(thread.posts).toEqual([]); + + const state = thread.getState(); + const conversation = ( + state as { + conversation?: { + messages?: Array<{ + id?: string; + meta?: { replied?: boolean; skippedReason?: string }; + }>; + processing?: { activeTurnId?: string }; + }; + } + ).conversation; + expect(conversation?.processing?.activeTurnId).toBe(activeSessionId); + const followUp = conversation?.messages?.find( + (message) => message.id === "msg-retry", + ); + expect(followUp).toBeDefined(); + expect(followUp?.meta?.replied).toBeUndefined(); + expect(followUp?.meta?.skippedReason).toBeUndefined(); + }); + + it("terminalizes malformed awaiting continuations before handling the follow-up", async () => { + const conversationId = "slack:C_BAD_CONTINUATION:1700000000.000"; + const activeSessionId = "turn_msg-timeout-original"; + const generateAssistantReply = vi + .fn() + .mockResolvedValue(successfulAssistantReply("Recovered.")); + await upsertAgentTurnSessionRecord({ + conversationId, + sessionId: activeSessionId, + sliceId: 1, + state: "awaiting_resume", + resumeReason: "timeout", + piMessages: createPiUserTurn("please keep working"), + }); + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply, + }, + }); + + const thread = createTestThread({ + id: conversationId, + state: createAwaitingSlackTurnState({ activeSessionId }), + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-timeout-follow-up", + threadId: conversationId, + text: "what happened?", + isMention: true, + }), + ); + + expect(generateAssistantReply).toHaveBeenCalledOnce(); + expect(threadHasPostText(thread, "Recovered.")).toBe(true); + const failedRecord = await getAgentTurnSessionRecord( + conversationId, + activeSessionId, + ); + expect(failedRecord?.state).toBe("failed"); + expect(failedRecord?.errorMessage).toBe( + "Awaiting turn continuation metadata could not be materialized", + ); + const state = thread.getState(); + const conversation = ( + state as { + conversation?: { processing?: { activeTurnId?: string } }; + } + ).conversation; + expect(conversation?.processing?.activeTurnId).toBeUndefined(); + }); + + it("reschedules an awaiting continuation for repeated delivery of the active message", async () => { + const conversationId = "slack:C_TIMEOUT_DUPLICATE:1700000000.000"; + const activeSessionId = "turn_msg-duplicate"; + const scheduleTurnTimeoutResume = vi.fn().mockResolvedValue(undefined); + const getAwaitingTurnContinuationRequest = vi.fn().mockResolvedValue({ + conversationId, + sessionId: activeSessionId, + expectedVersion: 4, + }); + const generateAssistantReply = vi.fn(); + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply, + getAwaitingTurnContinuationRequest, + scheduleTurnTimeoutResume, + }, + }); + + const thread = createTestThread({ + id: conversationId, + state: createAwaitingSlackTurnState({ + activeSessionId, + userMessageId: "msg-duplicate", + }), + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-duplicate", + threadId: conversationId, + text: "please keep working", + isMention: true, + }), + ); + + expect(scheduleTurnTimeoutResume).toHaveBeenCalledWith({ + conversationId, + sessionId: activeSessionId, + expectedVersion: 4, + }); + expect(generateAssistantReply).not.toHaveBeenCalled(); + }); + + it("does not reschedule an already-replied duplicate continuation message", async () => { + const conversationId = "slack:C_TIMEOUT_REPLIED_DUP:1700000000.000"; + const activeSessionId = "turn_msg-replied-duplicate"; + const scheduleTurnTimeoutResume = vi.fn().mockResolvedValue(undefined); + const getAwaitingTurnContinuationRequest = vi.fn().mockResolvedValue({ + conversationId, + sessionId: activeSessionId, + expectedVersion: 4, + }); + const generateAssistantReply = vi.fn(); + const onTurnStatePersisted = vi.fn(); + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply, + getAwaitingTurnContinuationRequest, + scheduleTurnTimeoutResume, + }, + }); + + const thread = createTestThread({ + id: conversationId, + state: createAwaitingSlackTurnState({ + activeSessionId, + replied: true, + userMessageId: "msg-replied-duplicate", + }), + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-replied-duplicate", + threadId: conversationId, + text: "please keep working", + isMention: true, + }), + { onTurnStatePersisted }, + ); + + expect(getAwaitingTurnContinuationRequest).not.toHaveBeenCalled(); + expect(scheduleTurnTimeoutResume).not.toHaveBeenCalled(); + expect(generateAssistantReply).not.toHaveBeenCalled(); + expect(onTurnStatePersisted).toHaveBeenCalledOnce(); + expect(thread.posts).toEqual([]); + }); + + it("does not start a new turn when rescheduling an active continuation fails", async () => { + const conversationId = "slack:C_TIMEOUT_RETRY_FAIL:1700000000.000"; + const activeSessionId = "turn_msg-original"; + const scheduleTurnTimeoutResume = vi + .fn() + .mockRejectedValue(new Error("resume callback unavailable")); + const getAwaitingTurnContinuationRequest = vi.fn().mockResolvedValue({ + conversationId, + sessionId: activeSessionId, + expectedVersion: 4, + }); + const generateAssistantReply = vi.fn(); + const { slackRuntime } = createSlackBehaviorRuntime({ + adapters: { + generateAssistantReply, + getAwaitingTurnContinuationRequest, + scheduleTurnTimeoutResume, + }, + }); + + const thread = createTestThread({ + id: conversationId, + state: createAwaitingSlackTurnState({ activeSessionId }), + }); + + await slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-retry-fail", + threadId: conversationId, + text: "what happened?", + isMention: true, + }), + ); + + expect(generateAssistantReply).not.toHaveBeenCalled(); + expect(thread.posts).toEqual([ + expect.stringContaining( + "I ran into an internal error while processing that.", + ), + ]); + }); +}); diff --git a/packages/junior/tests/integration/slack/turn-continuation-contract.test.ts b/packages/junior/tests/integration/slack/turn-continuation-contract.test.ts new file mode 100644 index 000000000..387772b4a --- /dev/null +++ b/packages/junior/tests/integration/slack/turn-continuation-contract.test.ts @@ -0,0 +1,67 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { RetryableTurnError } from "@/chat/runtime/turn"; +import { + getCapturedSlackApiCalls, + resetSlackApiMockState, +} from "../../msw/handlers/slack-api"; +import { createTestChatRuntime } from "../../fixtures/chat-runtime"; +import { + createTestDestination, + createTestMessage, + createTestThread, +} from "../../fixtures/slack/harness"; + +describe("Slack contract: turn continuation", () => { + afterEach(() => { + resetSlackApiMockState(); + vi.restoreAllMocks(); + }); + + it("does not post a Slack continuation notice when a live turn times out", async () => { + const scheduleTurnTimeoutResume = vi.fn().mockResolvedValue(undefined); + const conversationId = "slack:C_TIMEOUT_API:1700000000.000"; + const sessionId = "turn_msg-timeout-api"; + const { slackRuntime } = createTestChatRuntime({ + adapters: { + listThreadReplies: async () => [], + scheduleTurnTimeoutResume, + generateAssistantReply: async () => { + throw new RetryableTurnError( + "turn_timeout_resume", + "simulated timeout continuation", + { + conversationId, + sessionId, + version: 3, + sliceId: 2, + }, + ); + }, + }, + }); + + const thread = createTestThread({ id: conversationId }); + (thread.adapter as { name?: string }).name = "slack"; + + await expect( + slackRuntime.handleNewMention( + thread, + createTestMessage({ + id: "msg-timeout-api", + threadId: conversationId, + text: "please keep working", + isMention: true, + }), + ), + ).resolves.toBeUndefined(); + + expect(scheduleTurnTimeoutResume).toHaveBeenCalledWith({ + conversationId, + destination: createTestDestination(thread), + sessionId, + expectedVersion: 3, + }); + expect(thread.posts).toEqual([]); + expect(getCapturedSlackApiCalls("chat.postMessage")).toEqual([]); + }); +}); diff --git a/packages/junior/tests/integration/slack-user-lookup.test.ts b/packages/junior/tests/integration/slack/user-lookup-tool.test.ts similarity index 79% rename from packages/junior/tests/integration/slack-user-lookup.test.ts rename to packages/junior/tests/integration/slack/user-lookup-tool.test.ts index d0261cfb4..a4e18e608 100644 --- a/packages/junior/tests/integration/slack-user-lookup.test.ts +++ b/packages/junior/tests/integration/slack/user-lookup-tool.test.ts @@ -1,18 +1,15 @@ import { describe, expect, it } from "vitest"; import { createSlackUserLookupTool } from "@/chat/tools/slack/user-lookup"; -import { usersInfoOk, usersListPage } from "../fixtures/slack/factories/api"; +import { usersInfoOk, usersListPage } from "../../fixtures/slack/factories/api"; +import { + createTestToolRuntimeContext, + executeTestTool, +} from "../../fixtures/tool-runtime"; import { getCapturedSlackApiCalls, queueSlackApiResponse, queueSlackApiError, -} from "../msw/handlers/slack-api"; - -async function executeTool(tool: any, input: TInput) { - if (typeof tool?.execute !== "function") { - throw new Error("tool execute function missing"); - } - return await tool.execute(input, {} as any); -} +} from "../../msw/handlers/slack-api"; describe("slackUserLookup", () => { describe("user_id mode", () => { @@ -36,7 +33,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { user_id: "U039RR91S" }); + const result = await executeTestTool(tool, { user_id: "U039RR91S" }); expect(result).toMatchObject({ ok: true, @@ -73,7 +70,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { user_id: "U_BASIC" }); + const result = await executeTestTool(tool, { user_id: "U_BASIC" }); expect(result).toMatchObject({ ok: true, @@ -92,7 +89,7 @@ describe("slackUserLookup", () => { queueSlackApiError("users.info", { error: "user_not_found" }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { user_id: "U_NONEXISTENT" }); + const result = await executeTestTool(tool, { user_id: "U_NONEXISTENT" }); expect(result.ok).toBe(false); expect(result.slack_error).toBe("user_not_found"); @@ -111,7 +108,9 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { email: "emailuser@sentry.io" }); + const result = await executeTestTool(tool, { + email: "emailuser@sentry.io", + }); expect(result).toMatchObject({ ok: true, @@ -132,7 +131,9 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { email: "nobody@example.com" }); + const result = await executeTestTool(tool, { + email: "nobody@example.com", + }); expect(result).toMatchObject({ ok: false, @@ -161,7 +162,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { query: "markus" }); + const result = await executeTestTool(tool, { query: "markus" }); expect(result).toMatchObject({ ok: true, @@ -186,7 +187,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { query: "zzzzzz" }); + const result = await executeTestTool(tool, { query: "zzzzzz" }); expect(result).toMatchObject({ ok: true, @@ -207,7 +208,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { query: "junior" }); + const result = await executeTestTool(tool, { query: "junior" }); expect(result.users).toHaveLength(1); expect(result.users[0].id).toBe("U2"); @@ -224,7 +225,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { query: "junior", include_bots: true, }); @@ -247,7 +248,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { query: "alice", max_pages: 2, }); @@ -274,7 +275,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { query: "alice", max_pages: 3, }); @@ -303,7 +304,7 @@ describe("slackUserLookup", () => { }); const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { query: "user" }); + const result = await executeTestTool(tool, { query: "user" }); expect(result.users).toHaveLength(1); expect(result.users[0].id).toBe("U2"); @@ -313,7 +314,7 @@ describe("slackUserLookup", () => { describe("input validation", () => { it("rejects when no input provided", async () => { const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, {}); + const result = await executeTestTool(tool, {}); expect(result).toMatchObject({ ok: false, @@ -323,7 +324,7 @@ describe("slackUserLookup", () => { it("rejects when multiple inputs provided", async () => { const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { user_id: "U123", query: "alice", }); @@ -341,52 +342,13 @@ describe("slackUserLookup", () => { const tools = createTools( [], {}, - { - source: { - platform: "slack", - teamId: "T_TEST", - channelId: "C_TEST", - }, - destination: { - platform: "slack", - teamId: "T_TEST", - channelId: "C_TEST", - }, - sandbox: {} as any, - }, + createTestToolRuntimeContext({ + channelId: "C_TEST", + }), ); expect(tools).toHaveProperty("slackUserLookup"); expect(tools.slackUserLookup.description).toContain("Slack user"); }); }); - - describe("custom profile fields", () => { - it("returns custom profile fields as-is", async () => { - queueSlackApiResponse("users.info", { - body: usersInfoOk({ - userId: "U_GH", - userName: "untitaker", - realName: "Markus Unterwaditzer", - fields: { - Xf042GITHUB: { - value: "https://github.com/untitaker", - alt: "untitaker", - label: "GitHub", - }, - }, - }), - }); - - const tool = createSlackUserLookupTool(); - const result = await executeTool(tool, { user_id: "U_GH" }); - - expect(result.user.profile_fields).toHaveLength(1); - expect(result.user.profile_fields[0]).toMatchObject({ - id: "Xf042GITHUB", - label: "GitHub", - value: "https://github.com/untitaker", - }); - }); - }); }); diff --git a/packages/junior/tests/integration/tool-idempotency.test.ts b/packages/junior/tests/integration/tool-idempotency.test.ts index f4cb56a34..3c940e35e 100644 --- a/packages/junior/tests/integration/tool-idempotency.test.ts +++ b/packages/junior/tests/integration/tool-idempotency.test.ts @@ -3,8 +3,11 @@ import { createSlackCanvasCreateTool } from "@/chat/tools/slack/canvas-tools"; import { createOperationKey } from "@/chat/tools/idempotency"; import { createSlackListAddItemsTool } from "@/chat/tools/slack/list-tools"; import { SlackActionError } from "@/chat/slack/client"; -import type { ToolState } from "@/chat/tools/types"; -import type { SlackToolContext } from "@/chat/tools/slack/context"; +import { + createTestToolRuntimeContext, + createTestToolState, + executeTestTool, +} from "../fixtures/tool-runtime"; import { canvasesAccessSetOk, canvasesCreateOk, @@ -17,71 +20,6 @@ import { queueSlackApiResponse, } from "../msw/handlers/slack-api"; -function createToolState( - options: { - currentListId?: string; - listColumnMap?: { - titleColumnId?: string; - completedColumnId?: string; - assigneeColumnId?: string; - dueDateColumnId?: string; - }; - } = {}, -): ToolState { - const operationResultCache = new Map(); - const artifactState: Record = { - listColumnMap: options.listColumnMap ?? {}, - }; - - return { - artifactState: artifactState as ToolState["artifactState"], - patchArtifactState: (patch) => { - Object.assign(artifactState, patch); - }, - getCurrentListId: () => options.currentListId, - getOperationResult: (operationKey: string): T | undefined => - operationResultCache.get(operationKey) as T | undefined, - setOperationResult: (operationKey, result) => { - operationResultCache.set(operationKey, result); - }, - }; -} - -const noopSandbox = {} as any; - -function slackContext(channelId: string): SlackToolContext { - return { - destination: { - platform: "slack" as const, - teamId: "T123", - channelId, - }, - source: { - platform: "slack" as const, - teamId: "T123", - channelId, - }, - destinationChannelId: channelId, - sourceChannelId: channelId, - teamId: "T123", - }; -} - -const LOCAL_CONTEXT = { - destination: { - platform: "local", - conversationId: "local:test:tool-idempotency", - }, - sandbox: noopSandbox, -} as const; - -async function executeTool(tool: any, input: TInput) { - if (typeof tool?.execute !== "function") { - throw new Error("tool execute function missing"); - } - return await tool.execute(input, {} as any); -} - describe("tool idempotency", () => { it("creates deterministic operation keys regardless of object key order", () => { const a = createOperationKey("slack_canvas_create", { @@ -111,14 +49,19 @@ describe("tool idempotency", () => { permalink: "https://example.invalid/canvas-1", }), }); - const state = createToolState(); - const tool = createSlackCanvasCreateTool(slackContext("C123"), state); + const state = createTestToolState(); + const tool = createSlackCanvasCreateTool( + createTestToolRuntimeContext({ + channelId: "C123", + }), + state, + ); - const first = await executeTool(tool, { + const first = await executeTestTool(tool, { title: "Weekly plan", markdown: "- item one", }); - const second = await executeTool(tool, { + const second = await executeTestTool(tool, { title: "Weekly plan", markdown: "- item one", }); @@ -157,10 +100,15 @@ describe("tool idempotency", () => { }), }); - const state = createToolState(); - const tool = createSlackCanvasCreateTool(slackContext("D123"), state); + const state = createTestToolState(); + const tool = createSlackCanvasCreateTool( + createTestToolRuntimeContext({ + channelId: "D123", + }), + state, + ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { title: "DM brief", markdown: "Body", }); @@ -198,19 +146,14 @@ describe("tool idempotency", () => { }); const tool = createSlackCanvasCreateTool( - { - ...slackContext("D123"), - destination: { - platform: "slack" as const, - teamId: "T123", - channelId: "C_SHARED", - }, - destinationChannelId: "C_SHARED", - }, - createToolState(), + createTestToolRuntimeContext({ + channelId: "D123", + deliveryChannelId: "C_SHARED", + }), + createTestToolState(), ); - const result = await executeTool(tool, { + const result = await executeTestTool(tool, { title: "Shared brief", markdown: "Body", }); @@ -229,14 +172,27 @@ describe("tool idempotency", () => { }); it("throws when creating a canvas without assistant channel context", async () => { - const state = createToolState(); + const state = createTestToolState(); const tool = createSlackCanvasCreateTool( - LOCAL_CONTEXT as unknown as SlackToolContext, + { + requester: { + platform: "slack", + teamId: "T123", + userId: "U123", + }, + source: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + sourceChannelId: "C123", + teamId: "T123", + }, state, ); await expect( - executeTool(tool, { + executeTestTool(tool, { title: "No context", markdown: "Body", }), @@ -257,18 +213,20 @@ describe("tool idempotency", () => { queueSlackApiResponse("slackLists.items.create", { body: slackListsItemsCreateOk({ itemId: "item-2" }), }); - const state = createToolState({ + const state = createTestToolState({ currentListId: "list-1", - listColumnMap: { - titleColumnId: "col-title", + artifactState: { + listColumnMap: { + titleColumnId: "col-title", + }, }, }); const tool = createSlackListAddItemsTool(state); - const first = await executeTool(tool, { + const first = await executeTestTool(tool, { items: ["Ship patch", "Run test"], }); - const second = await executeTool(tool, { + const second = await executeTestTool(tool, { items: ["Ship patch", "Run test"], }); @@ -296,11 +254,16 @@ describe("tool idempotency", () => { queueSlackApiError("canvases.create", { error: "internal_error", }); - const state = createToolState(); - const tool = createSlackCanvasCreateTool(slackContext("C123"), state); + const state = createTestToolState(); + const tool = createSlackCanvasCreateTool( + createTestToolRuntimeContext({ + channelId: "C123", + }), + state, + ); await expect( - executeTool(tool, { + executeTestTool(tool, { title: "Incident plan", markdown: "placeholder", }), diff --git a/packages/junior/tests/integration/tools/web-fetch-tool-contract.test.ts b/packages/junior/tests/integration/tools/web-fetch-tool-contract.test.ts new file mode 100644 index 000000000..6021cea98 --- /dev/null +++ b/packages/junior/tests/integration/tools/web-fetch-tool-contract.test.ts @@ -0,0 +1,104 @@ +import type { FileUpload } from "chat"; +import { http, HttpResponse } from "msw"; +import { describe, expect, it } from "vitest"; +import { createWebFetchTool } from "@/chat/tools/web/fetch-tool"; +import { mswServer } from "../../msw/server"; + +const PUBLIC_TEST_ORIGIN = "http://93.184.216.34"; + +describe("webFetch tool contract", () => { + it("fetches a public page and returns extracted readable content", async () => { + mswServer.use( + http.get(`${PUBLIC_TEST_ORIGIN}/docs`, () => + HttpResponse.html( + [ + "Agent Docs", + "", + "

Streaming agents

Use deltas for progress.

", + "", + ].join(""), + ), + ), + ); + const tool = createWebFetchTool({}); + + const result = (await tool.execute?.( + { url: `${PUBLIC_TEST_ORIGIN}/docs`, max_chars: 1000 }, + {}, + )) as { + content: string; + title?: string; + url: string; + }; + + expect(result).toMatchObject({ + url: `${PUBLIC_TEST_ORIGIN}/docs`, + title: "Agent Docs", + }); + expect(result.content).toContain("# Streaming agents"); + expect(result.content).toContain("Use deltas for progress."); + expect(result.content).not.toContain("Pricing Login"); + }); + + it("attaches fetched images through the generated-file outbox", async () => { + mswServer.use( + http.get( + `${PUBLIC_TEST_ORIGIN}/hero.png`, + () => + new HttpResponse(Buffer.from("png-bytes"), { + headers: { "content-type": "image/png" }, + }), + ), + ); + const generatedFiles: FileUpload[] = []; + const tool = createWebFetchTool({ + onGeneratedFiles(files) { + generatedFiles.push(...files); + }, + }); + + const result = await tool.execute?.( + { url: `${PUBLIC_TEST_ORIGIN}/hero.png` }, + {}, + ); + + expect(result).toEqual({ + ok: true, + url: `${PUBLIC_TEST_ORIGIN}/hero.png`, + media_type: "image/png", + bytes: Buffer.byteLength("png-bytes"), + delivery: + "Fetched image will be attached to the Slack response as a file.", + }); + expect(generatedFiles).toEqual([ + { + data: Buffer.from("png-bytes"), + filename: "hero.png", + mimeType: "image/png", + }, + ]); + }); + + it("marks client HTTP failures as non-retryable tool results", async () => { + mswServer.use( + http.get( + `${PUBLIC_TEST_ORIGIN}/missing`, + () => new HttpResponse("missing", { status: 404 }), + ), + ); + const tool = createWebFetchTool({}); + + const result = await tool.execute?.( + { url: `${PUBLIC_TEST_ORIGIN}/missing` }, + {}, + ); + + expect(result).toEqual({ + ok: false, + url: `${PUBLIC_TEST_ORIGIN}/missing`, + error: "fetch failed: 404", + status: 404, + retryable: false, + }); + }); +}); diff --git a/packages/junior/tests/integration/trusted-plugin-dispatch-recovery.test.ts b/packages/junior/tests/integration/trusted-plugin-dispatch-recovery.test.ts new file mode 100644 index 000000000..60e2a5b54 --- /dev/null +++ b/packages/junior/tests/integration/trusted-plugin-dispatch-recovery.test.ts @@ -0,0 +1,148 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { recoverStaleDispatches } from "@/chat/agent-dispatch/heartbeat"; +import { + createOrGetDispatch, + getDispatchRecord, + getDispatchStorageKey, + listIncompleteDispatchIds, + updateDispatchRecord, + withDispatchLock, +} from "@/chat/agent-dispatch/store"; +import type { DispatchRecord } from "@/chat/agent-dispatch/types"; +import { + resetHeartbeatTestEnv, + setupHeartbeatTestEnv, +} from "../fixtures/heartbeat"; + +vi.hoisted(() => { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +describe("trusted plugin dispatch recovery", () => { + const originalFetch = global.fetch; + + beforeEach(async () => { + await setupHeartbeatTestEnv(); + }); + + afterEach(async () => { + await resetHeartbeatTestEnv(originalFetch); + }); + + it("fails stale dispatches that exceed retry attempts", async () => { + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-exhausted", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }, + }); + await withDispatchLock(created.record.id, async (state) => { + const record = await state.get( + getDispatchStorageKey(created.record.id), + ); + if (!record) { + throw new Error("Expected dispatch record to exist"); + } + await updateDispatchRecord(state, { + ...record, + attempt: record.maxAttempts, + lastCallbackAtMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + }); + + await expect( + recoverStaleDispatches({ + nowMs: Date.parse("2026-05-26T12:05:00.000Z"), + }), + ).resolves.toBe(0); + await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ + status: "failed", + errorMessage: "Dispatch exceeded retry attempts.", + }); + }); + + it("removes terminal dispatches from the recovery index", async () => { + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-terminal-index", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }, + }); + + await expect(listIncompleteDispatchIds()).resolves.toContain( + created.record.id, + ); + + await withDispatchLock(created.record.id, async (state) => { + const record = await state.get( + getDispatchStorageKey(created.record.id), + ); + if (!record) { + throw new Error("missing dispatch record"); + } + await updateDispatchRecord(state, { + ...record, + status: "completed", + }); + }); + + await expect(listIncompleteDispatchIds()).resolves.not.toContain( + created.record.id, + ); + }); + + it("does not fail an active leased dispatch that reached max attempts", async () => { + const created = await createOrGetDispatch({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + options: { + idempotencyKey: "run-active-max-attempts", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }, + }); + await withDispatchLock(created.record.id, async (state) => { + const record = await state.get( + getDispatchStorageKey(created.record.id), + ); + if (!record) { + throw new Error("Expected dispatch record to exist"); + } + await updateDispatchRecord(state, { + ...record, + attempt: record.maxAttempts, + lastCallbackAtMs: Date.parse("2026-05-26T12:00:00.000Z"), + leaseExpiresAtMs: Date.parse("2026-05-26T12:10:00.000Z"), + status: "running", + }); + }); + + await expect( + recoverStaleDispatches({ + nowMs: Date.parse("2026-05-26T12:05:00.000Z"), + }), + ).resolves.toBe(0); + await expect(getDispatchRecord(created.record.id)).resolves.toMatchObject({ + status: "running", + attempt: created.record.maxAttempts, + }); + }); +}); diff --git a/packages/junior/tests/integration/trusted-plugin-heartbeat-context.test.ts b/packages/junior/tests/integration/trusted-plugin-heartbeat-context.test.ts new file mode 100644 index 000000000..ce797a8d5 --- /dev/null +++ b/packages/junior/tests/integration/trusted-plugin-heartbeat-context.test.ts @@ -0,0 +1,226 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { createHeartbeatContext } from "@/chat/agent-dispatch/context"; +import { + getDispatchRecord, + listIncompleteDispatchIds, +} from "@/chat/agent-dispatch/store"; +import { + createCredentialSubject, + mockDispatchCallbackFetch, + resetHeartbeatTestEnv, + setupHeartbeatTestEnv, +} from "../fixtures/heartbeat"; +import { getCapturedSlackApiCalls } from "../msw/handlers/slack-api"; + +vi.hoisted(() => { + process.env.JUNIOR_STATE_ADAPTER = "memory"; +}); + +describe("trusted plugin heartbeat context", () => { + const originalFetch = global.fetch; + + beforeEach(async () => { + await setupHeartbeatTestEnv(); + }); + + afterEach(async () => { + await resetHeartbeatTestEnv(originalFetch); + }); + + it("scopes dispatch lookup to the plugin that created it", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + + const schedulerCtx = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + const result = await schedulerCtx.agent.dispatch({ + idempotencyKey: "run-1", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + metadata: { runId: "run-1" }, + }); + + await expect(schedulerCtx.agent.get(result.id)).resolves.toEqual({ + id: result.id, + status: "pending", + }); + await expect( + createHeartbeatContext({ + plugin: "other-plugin", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }).agent.get(result.id), + ).resolves.toBeUndefined(); + + await expect(getDispatchRecord(result.id)).resolves.toMatchObject({ + input: "Run the scheduled task.", + destination: { channelId: "C123" }, + metadata: { runId: "run-1" }, + }); + }); + + it("keeps plugin state isolated when plugin names and keys contain delimiters", async () => { + const first = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + const second = createHeartbeatContext({ + plugin: "scheduler:run", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + + await first.state.set("run:1", "first"); + await second.state.set("1", "second"); + + await expect(first.state.get("run:1")).resolves.toBe("first"); + await expect(second.state.get("1")).resolves.toBe("second"); + }); + + it("bounds dispatch fanout from one heartbeat context", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + + const ctx = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + + for (let index = 0; index < 25; index += 1) { + await ctx.agent.dispatch({ + idempotencyKey: `run-${index}`, + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }); + } + + await expect( + ctx.agent.dispatch({ + idempotencyKey: "run-over-limit", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }), + ).rejects.toThrow("Plugin heartbeat exceeded the dispatch limit"); + }); + + it("does not count invalid dispatch requests against heartbeat fanout", async () => { + const fetchMock = vi.fn(async () => { + return new Response("Accepted", { status: 202 }); + }); + global.fetch = fetchMock as typeof fetch; + + const ctx = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + + for (let index = 0; index < 25; index += 1) { + await expect( + ctx.agent.dispatch({ + idempotencyKey: `invalid-${index}`, + destination: { + platform: "slack", + teamId: "not-a-team", + channelId: "C123", + }, + input: "Run the scheduled task.", + }), + ).rejects.toThrow("Dispatch destination teamId must be a Slack team id"); + } + + await expect( + ctx.agent.dispatch({ + idempotencyKey: "valid-after-invalid", + destination: { + platform: "slack", + teamId: "T123", + channelId: "C123", + }, + input: "Run the scheduled task.", + }), + ).resolves.toMatchObject({ status: "created" }); + }); + + it("rejects plugin credential subjects that include runtime bindings", async () => { + mockDispatchCallbackFetch(originalFetch); + + const ctx = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + const credentialSubjectWithRuntimeBinding = { + ...createCredentialSubject(), + binding: { + type: "slack-direct-conversation", + teamId: "T123", + channelId: "D999", + signature: "v1=test", + }, + }; + + await expect( + ctx.agent.dispatch({ + idempotencyKey: "run-delegated-mismatch", + credentialSubject: credentialSubjectWithRuntimeBinding, + destination: { + platform: "slack", + teamId: "T123", + channelId: "D123", + }, + input: "Run the scheduled task.", + }), + ).rejects.toThrow("Dispatch credentialSubject binding is runtime-owned"); + expect(getCapturedSlackApiCalls("conversations.info")).toHaveLength(0); + await expect(listIncompleteDispatchIds()).resolves.toEqual([]); + }); + + it("binds delegated credential subjects before persistence", async () => { + mockDispatchCallbackFetch(originalFetch); + const ctx = createHeartbeatContext({ + plugin: "scheduler", + nowMs: Date.parse("2026-05-26T12:00:00.000Z"), + }); + + const result = await ctx.agent.dispatch({ + idempotencyKey: "run-delegated", + credentialSubject: createCredentialSubject(), + destination: { + platform: "slack", + teamId: "T123", + channelId: "D123", + }, + input: "Run the scheduled task.", + }); + + await expect(getDispatchRecord(result.id)).resolves.toMatchObject({ + credentialSubject: { + type: "user", + userId: "U123", + allowedWhen: "private-direct-conversation", + binding: { + type: "slack-direct-conversation", + teamId: "T123", + channelId: "D123", + signature: expect.any(String), + }, + }, + }); + expect(getCapturedSlackApiCalls("conversations.info")).toHaveLength(0); + }); +}); diff --git a/packages/junior/tests/msw/captured-slack-api-calls.ts b/packages/junior/tests/msw/captured-slack-api-calls.ts deleted file mode 100644 index 178e4cce0..000000000 --- a/packages/junior/tests/msw/captured-slack-api-calls.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { getCapturedSlackApiCalls, type CapturedSlackApiCall } from "./handlers/slack-api"; - -export function readCapturedSlackApiCalls(): CapturedSlackApiCall[] { - return getCapturedSlackApiCalls(); -} - -export type { CapturedSlackApiCall }; diff --git a/packages/junior/tests/msw/handlers/eval-mcp-auth.ts b/packages/junior/tests/msw/handlers/eval-mcp-auth.ts index 22b333ccd..fa328ee2c 100644 --- a/packages/junior/tests/msw/handlers/eval-mcp-auth.ts +++ b/packages/junior/tests/msw/handlers/eval-mcp-auth.ts @@ -1,4 +1,5 @@ import { http, HttpResponse } from "msw"; +import { DEFAULT_TEST_NOW_MS } from "../../fixtures/vitest"; export const EVAL_MCP_AUTH_PROVIDER = "eval-auth"; export const EVAL_MCP_AUTH_CODE = "eval-auth-code"; @@ -269,7 +270,7 @@ export const evalMcpAuthHandlers = [ const body = (await request.json()) as Record; return HttpResponse.json({ client_id: "eval-auth-client-id", - client_id_issued_at: Math.floor(Date.now() / 1000), + client_id_issued_at: Math.floor(DEFAULT_TEST_NOW_MS / 1000), ...(Array.isArray(body.redirect_uris) ? { redirect_uris: body.redirect_uris } : { diff --git a/packages/junior/tests/msw/handlers/eval-oauth.ts b/packages/junior/tests/msw/handlers/eval-oauth.ts index 466235c45..67091cea6 100644 --- a/packages/junior/tests/msw/handlers/eval-oauth.ts +++ b/packages/junior/tests/msw/handlers/eval-oauth.ts @@ -6,7 +6,24 @@ export const EVAL_OAUTH_ORIGIN = "https://example.com"; const EVAL_OAUTH_TOKEN_ENDPOINT = `${EVAL_OAUTH_ORIGIN}/junior-eval-oauth/oauth/token`; const EVAL_OAUTH_ACCESS_TOKEN = "eval-oauth-access-token"; -export function resetEvalOAuthMockState(): void {} +interface QueuedEvalOAuthTokenResponse { + body: Record; + status?: number; +} + +const queuedTokenResponses: QueuedEvalOAuthTokenResponse[] = []; + +/** Queue the next eval OAuth token response returned by the MSW provider. */ +export function queueEvalOAuthTokenResponse( + response: QueuedEvalOAuthTokenResponse, +): void { + queuedTokenResponses.push(response); +} + +/** Reset queued eval OAuth provider responses between tests. */ +export function resetEvalOAuthMockState(): void { + queuedTokenResponses.length = 0; +} export const evalOAuthHandlers = [ http.post(EVAL_OAUTH_TOKEN_ENDPOINT, async ({ request }) => { @@ -22,6 +39,12 @@ export const evalOAuthHandlers = [ { status: 400 }, ); } + const queuedResponse = queuedTokenResponses.shift(); + if (queuedResponse) { + return HttpResponse.json(queuedResponse.body, { + status: queuedResponse.status ?? 200, + }); + } return HttpResponse.json({ access_token: EVAL_OAUTH_ACCESS_TOKEN, diff --git a/packages/junior/tests/msw/handlers/github-api.ts b/packages/junior/tests/msw/handlers/github-api.ts index e18405d96..bd8fa0735 100644 --- a/packages/junior/tests/msw/handlers/github-api.ts +++ b/packages/junior/tests/msw/handlers/github-api.ts @@ -1,4 +1,5 @@ import { http, HttpResponse } from "msw"; +import { DEFAULT_TEST_EXPIRES_AT_ISO } from "../../fixtures/vitest"; export const GITHUB_API_ORIGIN = "https://api.github.com"; @@ -10,7 +11,7 @@ export const githubApiHandlers = [ () => HttpResponse.json({ token: "eval-github-installation-token", - expires_at: new Date(Date.now() + 60 * 60 * 1000).toISOString(), + expires_at: DEFAULT_TEST_EXPIRES_AT_ISO, }), ), ]; diff --git a/packages/junior/tests/unit/build/nitro-plugin-module.test.ts b/packages/junior/tests/unit/build/nitro-plugin-module.test.ts index fd2a1ccd9..805312b34 100644 --- a/packages/junior/tests/unit/build/nitro-plugin-module.test.ts +++ b/packages/junior/tests/unit/build/nitro-plugin-module.test.ts @@ -47,6 +47,12 @@ type TestRollupBeforeHook = ( config: TestBuildConfig, ) => Promise | void; +interface TestNitroFixtureOptions { + rootDir?: string; + serverDir?: string; + vercel?: TestVercelOptions; +} + async function makeTempDir(): Promise { const tempDir = await fs.mkdtemp( path.join(os.tmpdir(), "junior-nitro-plugin-module-"), @@ -61,6 +67,30 @@ function getVercelOptions(nitro: { return nitro.options.vercel as TestVercelOptions; } +function createNitroFixture(options: TestNitroFixtureOptions = {}) { + const rollupBeforeHooks: TestRollupBeforeHook[] = []; + const virtual: Record Promise) | string> = {}; + const nitro = { + hooks: { + hook(name: string, callback: TestRollupBeforeHook) { + if (name === "rollup:before") { + rollupBeforeHooks.push(callback); + } + }, + }, + options: { + output: { + serverDir: options.serverDir ?? "/tmp/junior-output", + }, + rootDir: options.rootDir ?? "/tmp/junior-app", + vercel: options.vercel ?? {}, + virtual, + }, + }; + + return { nitro, rollupBeforeHooks, virtual }; +} + afterEach(async () => { for (const tempDir of tempDirs.splice(0)) { await fs.rm(tempDir, { recursive: true, force: true }); @@ -69,20 +99,7 @@ afterEach(async () => { describe("juniorNitro plugin modules", () => { it("configures Vercel build output for heartbeat and conversation work", () => { - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook() {}, - }, - options: { - output: { - serverDir: "/tmp/junior-output", - }, - rootDir: "/tmp/junior-app", - vercel: {}, - virtual, - }, - }; + const { nitro } = createNitroFixture(); juniorNitro().nitro.setup(nitro); const vercel = getVercelOptions(nitro); @@ -113,45 +130,34 @@ describe("juniorNitro plugin modules", () => { }); it("preserves existing Vercel route function settings", () => { - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook() {}, - }, - options: { - output: { - serverDir: "/tmp/junior-output", + const { nitro } = createNitroFixture({ + vercel: { + config: { + version: 3, + crons: [ + { + path: JUNIOR_HEARTBEAT_ROUTE, + schedule: "*/5 * * * *", + }, + ], + }, + functions: { + maxDuration: 120, + memory: 1024, }, - rootDir: "/tmp/junior-app", - vercel: { - config: { - version: 3, - crons: [ + functionRules: { + [JUNIOR_CONVERSATION_WORK_CALLBACK_ROUTE]: { + memory: 2048, + experimentalTriggers: [ { - path: JUNIOR_HEARTBEAT_ROUTE, - schedule: "*/5 * * * *", + type: "queue/v2beta", + topic: DEFAULT_CONVERSATION_WORK_QUEUE_TOPIC, }, ], }, - functions: { - maxDuration: 120, - memory: 1024, - }, - functionRules: { - [JUNIOR_CONVERSATION_WORK_CALLBACK_ROUTE]: { - memory: 2048, - experimentalTriggers: [ - { - type: "queue/v2beta", - topic: DEFAULT_CONVERSATION_WORK_QUEUE_TOPIC, - }, - ], - }, - }, }, - virtual, }, - }; + }); juniorNitro({ maxDuration: 300 }).nitro.setup(nitro); const vercel = getVercelOptions(nitro); @@ -181,20 +187,7 @@ describe("juniorNitro plugin modules", () => { }); it("uses a custom Vercel conversation work queue topic", () => { - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook() {}, - }, - options: { - output: { - serverDir: "/tmp/junior-output", - }, - rootDir: "/tmp/junior-app", - vercel: {}, - virtual, - }, - }; + const { nitro } = createNitroFixture(); juniorNitro({ conversationWorkQueueTopic: "custom_work" }).nitro.setup( nitro, @@ -213,31 +206,20 @@ describe("juniorNitro plugin modules", () => { }); it("replaces a stale queue trigger when the topic changes", () => { - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook() {}, - }, - options: { - output: { - serverDir: "/tmp/junior-output", - }, - rootDir: "/tmp/junior-app", - vercel: { - functionRules: { - [JUNIOR_CONVERSATION_WORK_CALLBACK_ROUTE]: { - experimentalTriggers: [ - { - type: "queue/v2beta", - topic: "old_topic", - }, - ], - }, + const { nitro } = createNitroFixture({ + vercel: { + functionRules: { + [JUNIOR_CONVERSATION_WORK_CALLBACK_ROUTE]: { + experimentalTriggers: [ + { + type: "queue/v2beta", + topic: "old_topic", + }, + ], }, }, - virtual, }, - }; + }); juniorNitro({ conversationWorkQueueTopic: "new_topic" }).nitro.setup(nitro); const vercel = getVercelOptions(nitro); @@ -254,24 +236,13 @@ describe("juniorNitro plugin modules", () => { }); it("preserves Vercel max function duration settings", () => { - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook() {}, - }, - options: { - output: { - serverDir: "/tmp/junior-output", + const { nitro } = createNitroFixture({ + vercel: { + functions: { + maxDuration: "max" as const, }, - rootDir: "/tmp/junior-app", - vercel: { - functions: { - maxDuration: "max" as const, - }, - }, - virtual, }, - }; + }); juniorNitro().nitro.setup(nitro); const vercel = getVercelOptions(nitro); @@ -301,20 +272,10 @@ describe("juniorNitro plugin modules", () => { }; delete globalState.__juniorNitroPluginModuleImports; - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook() {}, - }, - options: { - output: { - serverDir: path.join(tempRoot, ".output", "server"), - }, - rootDir: tempRoot, - vercel: {}, - virtual, - }, - }; + const { nitro, virtual } = createNitroFixture({ + rootDir: tempRoot, + serverDir: path.join(tempRoot, ".output", "server"), + }); juniorNitro({ plugins: "./plugins" }).nitro.setup(nitro); await new Promise((resolve) => setTimeout(resolve, 25)); @@ -329,21 +290,8 @@ describe("juniorNitro plugin modules", () => { delete globalState.__juniorNitroPluginModuleImports; }); - it("rejects direct plugin sets with hooks because hooks need a runtime import", () => { - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook() {}, - }, - options: { - output: { - serverDir: "/tmp/junior-output", - }, - rootDir: "/tmp/junior-app", - vercel: {}, - virtual, - }, - }; + it("rejects direct trusted plugin sets because hooks need a runtime import", () => { + const { nitro } = createNitroFixture(); expect(() => juniorNitro({ @@ -378,25 +326,10 @@ describe("juniorNitro plugin modules", () => { "utf8", ); - const rollupBeforeHooks: TestRollupBeforeHook[] = []; - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook(name: string, callback: TestRollupBeforeHook) { - if (name === "rollup:before") { - rollupBeforeHooks.push(callback); - } - }, - }, - options: { - output: { - serverDir: path.join(tempRoot, ".output", "server"), - }, - rootDir: tempRoot, - vercel: {}, - virtual, - }, - }; + const { nitro, rollupBeforeHooks, virtual } = createNitroFixture({ + rootDir: tempRoot, + serverDir: path.join(tempRoot, ".output", "server"), + }); juniorNitro({ plugins: "./plugins" }).nitro.setup(nitro); @@ -459,25 +392,10 @@ describe("juniorNitro plugin modules", () => { ); await fs.mkdir(serverDir, { recursive: true }); - const rollupBeforeHooks: TestRollupBeforeHook[] = []; - const virtual: Record Promise) | string> = {}; - const nitro = { - hooks: { - hook(name: string, callback: TestRollupBeforeHook) { - if (name === "rollup:before") { - rollupBeforeHooks.push(callback); - } - }, - }, - options: { - output: { - serverDir, - }, - rootDir: tempRoot, - vercel: {}, - virtual, - }, - }; + const { nitro, rollupBeforeHooks } = createNitroFixture({ + rootDir: tempRoot, + serverDir, + }); juniorNitro({ cwd: tempRoot, diff --git a/packages/junior/tests/unit/capabilities/capability-factory.test.ts b/packages/junior/tests/unit/capabilities/capability-factory.test.ts index 9abc47f2f..34c10acbc 100644 --- a/packages/junior/tests/unit/capabilities/capability-factory.test.ts +++ b/packages/junior/tests/unit/capabilities/capability-factory.test.ts @@ -1,139 +1,114 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; -import type { PluginDefinition } from "@/chat/plugins/types"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { issueProviderCredentialLease } from "@/chat/capabilities/factory"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { stubTestEnv } from "../../fixtures/vitest"; -const createPluginBrokerMock = vi.fn(); -const getPluginProvidersMock = vi.fn<() => PluginDefinition[]>(); const USER_CREDENTIAL_CONTEXT = { actor: { type: "user" as const, userId: "U123" }, }; -vi.mock("@/chat/capabilities/catalog", () => ({ - logCapabilityCatalogLoadedOnce: vi.fn(), -})); - -vi.mock("@/chat/plugins/registry", () => ({ - createPluginBroker: (...args: unknown[]) => createPluginBrokerMock(...args), - getPluginProviders: () => getPluginProvidersMock(), -})); - -vi.mock("@/chat/state/adapter", () => ({ - getStateAdapter: () => ({ - get: vi.fn(), - set: vi.fn(), - delete: vi.fn(), - }), -})); - describe("capability factory", () => { - afterEach(() => { - createPluginBrokerMock.mockReset(); - getPluginProvidersMock.mockReset(); - vi.resetModules(); + beforeEach(async () => { + stubTestEnv({ JUNIOR_STATE_ADAPTER: "memory" }); + await disconnectStateAdapter(); }); - it("uses normal plugin brokers for credential providers", async () => { - const broker = { - issue: vi.fn(async () => ({ - id: "lease-1", - provider: "example", - env: {}, - expiresAt: new Date(Date.now() + 60_000).toISOString(), - })), - }; - createPluginBrokerMock.mockReturnValue(broker); - getPluginProvidersMock.mockReturnValue([ - { - manifest: { - name: "example", - displayName: "Example", - description: "Example", - capabilities: ["example.api"], - configKeys: [], - domains: ["api.example.com"], - apiHeaders: { - Authorization: "Bearer ${EXAMPLE_API_HEADER}", - "X-Api-Version": "2026-01-01", - }, - commandEnv: { - EXAMPLE_API_KEY: "host_managed_credential", + afterEach(async () => { + setPluginCatalogConfig(undefined); + await disconnectStateAdapter(); + vi.unstubAllEnvs(); + }); + + it("issues provider credential leases through the registered plugin broker", async () => { + stubTestEnv({ EXAMPLE_API_HEADER: "secret-header" }); + setPluginCatalogConfig({ + inlineManifests: [ + { + manifest: { + name: "example", + displayName: "Example", + description: "Example", + capabilities: ["example.api"], + configKeys: [], + domains: ["api.example.com"], + envVars: { + EXAMPLE_API_HEADER: {}, + }, + apiHeaders: { + Authorization: "Bearer ${EXAMPLE_API_HEADER}", + "X-Api-Version": "2026-01-01", + }, + commandEnv: { + EXAMPLE_API_KEY: "host_managed_credential", + }, }, }, - dir: "/tmp/example", - skillsDir: "/tmp/example/skills", - }, - ]); + ], + }); - const { issueProviderCredentialLease } = - await import("@/chat/capabilities/factory"); const lease = await issueProviderCredentialLease({ context: USER_CREDENTIAL_CONTEXT, provider: "example", reason: "test:api-headers", }); - expect(createPluginBrokerMock).toHaveBeenCalledWith("example", { - userTokenStore: expect.any(Object), - }); - expect(broker.issue).toHaveBeenCalledWith({ - context: USER_CREDENTIAL_CONTEXT, - reason: "test:api-headers", + expect(lease).toMatchObject({ + provider: "example", + env: { + EXAMPLE_API_KEY: "host_managed_credential", + }, + headerTransforms: [ + { + domain: "api.example.com", + headers: { + Authorization: "Bearer secret-header", + "X-Api-Version": "2026-01-01", + }, + }, + ], + metadata: { + reason: "test:api-headers", + }, }); - expect(lease.provider).toBe("example"); }); it("skips domain-only providers in the generic credential router", async () => { - const broker = { - issue: vi.fn(async () => ({ - id: "lease-1", - provider: "sentry", - env: {}, - expiresAt: new Date(Date.now() + 60_000).toISOString(), - })), - }; - createPluginBrokerMock.mockReturnValue(broker); - getPluginProvidersMock.mockReturnValue([ - { - manifest: { - name: "github", - displayName: "GitHub", - description: "GitHub", - capabilities: ["github.api"], - configKeys: [], - domains: ["api.github.com"], + setPluginCatalogConfig({ + inlineManifests: [ + { + manifest: { + name: "github", + displayName: "GitHub", + description: "GitHub", + capabilities: ["github.api"], + configKeys: [], + domains: ["api.github.com"], + }, }, - dir: "/tmp/github", - skillsDir: "/tmp/github/skills", - }, - { - manifest: { - name: "sentry", - displayName: "Sentry", - description: "Sentry", - capabilities: ["sentry.api"], - configKeys: [], - credentials: { - type: "oauth-bearer", - domains: ["sentry.io"], - authTokenEnv: "SENTRY_AUTH_TOKEN", + { + manifest: { + name: "sentry", + displayName: "Sentry", + description: "Sentry", + capabilities: ["sentry.api"], + configKeys: [], + credentials: { + type: "oauth-bearer", + domains: ["sentry.io"], + authTokenEnv: "SENTRY_AUTH_TOKEN", + }, }, }, - dir: "/tmp/sentry", - skillsDir: "/tmp/sentry/skills", - }, - ]); - - const { issueProviderCredentialLease } = - await import("@/chat/capabilities/factory"); - - await issueProviderCredentialLease({ - context: USER_CREDENTIAL_CONTEXT, - provider: "sentry", - reason: "test:oauth", + ], }); - expect(createPluginBrokerMock).toHaveBeenCalledTimes(1); - expect(createPluginBrokerMock).toHaveBeenCalledWith("sentry", { - userTokenStore: expect.any(Object), - }); + await expect( + issueProviderCredentialLease({ + context: USER_CREDENTIAL_CONTEXT, + provider: "github", + reason: "test:domain-only", + }), + ).rejects.toThrow("No credential broker registered for provider: github"); }); }); diff --git a/packages/junior/tests/unit/capabilities/capability-router.test.ts b/packages/junior/tests/unit/capabilities/capability-router.test.ts index b242f69dc..fb9366e83 100644 --- a/packages/junior/tests/unit/capabilities/capability-router.test.ts +++ b/packages/junior/tests/unit/capabilities/capability-router.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it, vi } from "vitest"; import { ProviderCredentialRouter } from "@/chat/capabilities/router"; import type { CredentialBroker } from "@/chat/credentials/broker"; +import { DEFAULT_TEST_EXPIRES_AT_ISO } from "../../fixtures/vitest"; const USER_CREDENTIAL_CONTEXT = { actor: { type: "user" as const, userId: "U123" }, @@ -13,7 +14,7 @@ describe("provider credential router", () => { id: "lease-1", provider: "github", env: {}, - expiresAt: new Date(Date.now() + 60_000).toISOString(), + expiresAt: DEFAULT_TEST_EXPIRES_AT_ISO, })), }; const router = new ProviderCredentialRouter({ @@ -43,7 +44,7 @@ describe("provider credential router", () => { id: "lease-1", provider: "github", env: {}, - expiresAt: new Date(Date.now() + 60_000).toISOString(), + expiresAt: DEFAULT_TEST_EXPIRES_AT_ISO, })), }; const router = new ProviderCredentialRouter({ diff --git a/packages/junior/tests/unit/capabilities/catalog.test.ts b/packages/junior/tests/unit/capabilities/catalog.test.ts index b8156d3d9..d94c6c922 100644 --- a/packages/junior/tests/unit/capabilities/catalog.test.ts +++ b/packages/junior/tests/unit/capabilities/catalog.test.ts @@ -1,106 +1,92 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; -import type { CapabilityProviderDefinition } from "@/chat/capabilities/catalog"; +import { afterEach, describe, expect, it } from "vitest"; +import { + getCapabilityProvider, + isKnownCapability, + listCapabilityProviders, +} from "@/chat/capabilities/catalog"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; +import type { PluginManifest } from "@/chat/plugins/types"; -let currentSignature = "sig-1"; -let currentProviders: CapabilityProviderDefinition[] = []; - -function cloneProviderDefinition( - provider: CapabilityProviderDefinition, -): CapabilityProviderDefinition { - return { - ...provider, - capabilities: [...provider.capabilities], - configKeys: [...provider.configKeys], - ...(provider.target ? { target: { ...provider.target } } : {}), - }; -} - -async function loadCatalogModule() { - vi.resetModules(); - vi.doMock("@/chat/logging", () => ({ - logInfo: () => undefined, - })); - vi.doMock("@/chat/plugins/registry", () => ({ - getPluginCatalogSignature: () => currentSignature, - getPluginCapabilityProviders: () => - currentProviders.map(cloneProviderDefinition), - })); - return await import("@/chat/capabilities/catalog"); +function configureCatalog(manifests: PluginManifest[]): void { + setPluginCatalogConfig({ + inlineManifests: manifests.map((manifest) => ({ manifest })), + }); } afterEach(() => { - currentSignature = "sig-1"; - currentProviders = []; - vi.resetModules(); - vi.doUnmock("@/chat/logging"); - vi.doUnmock("@/chat/plugins/registry"); + setPluginCatalogConfig(undefined); }); describe("capability catalog", () => { - it("refreshes cached providers when the plugin catalog signature changes", async () => { - currentProviders = [ + it("refreshes cached providers when the plugin catalog signature changes", () => { + configureCatalog([ { - provider: "demo", + name: "demo", + displayName: "Demo", + description: "Demo plugin", capabilities: ["demo.read"], configKeys: ["demo.token"], }, - ]; - - const catalog = await loadCatalogModule(); + ]); - expect(catalog.getCapabilityProvider("demo.read")).toMatchObject({ + expect(getCapabilityProvider("demo.read")).toMatchObject({ provider: "demo", }); - currentSignature = "sig-2"; - currentProviders = [ + configureCatalog([ { - provider: "other", + name: "other", + displayName: "Other", + description: "Other plugin", capabilities: ["other.read"], configKeys: ["other.token"], }, - ]; + ]); - expect(catalog.getCapabilityProvider("demo.read")).toBeUndefined(); - expect(catalog.isKnownCapability("other.read")).toBe(true); + expect(getCapabilityProvider("demo.read")).toBeUndefined(); + expect(isKnownCapability("other.read")).toBe(true); }); - it("returns defensive copies from provider accessors", async () => { - currentProviders = [ + it("returns defensive copies from provider accessors", () => { + configureCatalog([ { - provider: "demo", + name: "demo", + displayName: "Demo", + description: "Demo plugin", capabilities: ["demo.read"], - configKeys: ["demo.token"], + configKeys: ["demo.token", "demo.repo"], target: { type: "repo", - configKey: "demo.repo", + configKey: "repo", commandFlags: ["--repo", "-R"], }, }, - ]; + ]); - const catalog = await loadCatalogModule(); - const listed = catalog.listCapabilityProviders(); - const direct = catalog.getCapabilityProvider("demo.read"); + const listed = listCapabilityProviders(); + const direct = getCapabilityProvider("demo.read"); expect(direct).toBeDefined(); + if (!direct) { + throw new Error("Expected demo.read provider"); + } listed[0]!.provider = "mutated"; listed[0]!.capabilities.push("demo.write"); listed[0]!.configKeys.push("demo.extra"); listed[0]!.target!.configKey = "mutated.repo"; listed[0]!.target!.commandFlags!.push("--mutated"); - direct!.provider = "direct-mutation"; - direct!.capabilities.push("direct.write"); - direct!.configKeys.push("direct.extra"); - direct!.target!.configKey = "direct.repo"; - direct!.target!.commandFlags!.push("--direct"); + direct.provider = "direct-mutation"; + direct.capabilities.push("direct.write"); + direct.configKeys.push("direct.extra"); + direct.target!.configKey = "direct.repo"; + direct.target!.commandFlags!.push("--direct"); - expect(catalog.listCapabilityProviders()).toEqual([ + expect(listCapabilityProviders()).toEqual([ { provider: "demo", capabilities: ["demo.read"], - configKeys: ["demo.token"], + configKeys: ["demo.token", "demo.repo"], target: { type: "repo", configKey: "demo.repo", @@ -108,10 +94,10 @@ describe("capability catalog", () => { }, }, ]); - expect(catalog.getCapabilityProvider("demo.read")).toEqual({ + expect(getCapabilityProvider("demo.read")).toEqual({ provider: "demo", capabilities: ["demo.read"], - configKeys: ["demo.token"], + configKeys: ["demo.token", "demo.repo"], target: { type: "repo", configKey: "demo.repo", diff --git a/packages/junior/tests/unit/chat/pi/traced-stream.test.ts b/packages/junior/tests/unit/chat/pi/traced-stream.test.ts deleted file mode 100644 index 23e178245..000000000 --- a/packages/junior/tests/unit/chat/pi/traced-stream.test.ts +++ /dev/null @@ -1,376 +0,0 @@ -import type { StreamFn } from "@earendil-works/pi-agent-core"; -import { afterEach, describe, expect, it, vi } from "vitest"; -import { - createAssistantMessageEventStream, - type AssistantMessage, - type Model, -} from "@earendil-works/pi-ai"; - -const { startInactiveSpan, withActiveSpan } = vi.hoisted(() => { - const span = { - setAttribute: vi.fn(), - setAttributes: vi.fn(), - setStatus: vi.fn(), - end: vi.fn(), - }; - return { - startInactiveSpan: vi.fn((_options: unknown) => span), - withActiveSpan: vi.fn((_s: unknown, cb: () => T) => cb()), - }; -}); - -vi.mock("@/chat/sentry", () => ({ - startInactiveSpan, - withActiveSpan, -})); - -function fakeModel(id: string): Model<"anthropic-messages"> { - return { id } as unknown as Model<"anthropic-messages">; -} - -function fakeMessage(): AssistantMessage { - return { - role: "assistant", - content: [{ type: "text", text: "hi" }], - api: "anthropic-messages", - provider: "vercel-ai-gateway", - model: "openai/gpt-5.4", - usage: { - input: 100, - output: 5, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 105, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: "stop", - timestamp: Date.now(), - }; -} - -type SpanMock = { - setAttribute: ReturnType; - setAttributes: ReturnType; - setStatus: ReturnType; - end: ReturnType; -}; - -function getSpan(): SpanMock { - return startInactiveSpan.mock.results[0]!.value as SpanMock; -} - -describe("createTracedStreamFn", () => { - afterEach(() => { - vi.clearAllMocks(); - vi.resetModules(); - }); - - it("opens a gen_ai.chat span when invoked", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const stream = createAssistantMessageEventStream(); - const base = vi.fn(() => stream); - - const traced = createTracedStreamFn(base as unknown as StreamFn); - const returned = await traced( - fakeModel("openai/gpt-5.4"), - { messages: [{ role: "user", content: "hi", timestamp: 0 }] }, - undefined, - ); - - expect(returned).toBe(stream); - expect(startInactiveSpan).toHaveBeenCalledTimes(1); - const opts = startInactiveSpan.mock.calls[0]?.[0] as unknown as { - name: string; - op: string; - }; - expect(opts.op).toBe("gen_ai.chat"); - expect(opts.name).toBe("chat openai/gpt-5.4"); - }); - - it("sets metadata-only input messages and system instructions when privacy is unknown", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const stream = createAssistantMessageEventStream(); - const base = vi.fn(() => stream); - - const traced = createTracedStreamFn(base as unknown as StreamFn); - await traced( - fakeModel("openai/gpt-5.4"), - { - systemPrompt: "you are junior", - messages: [{ role: "user", content: "hello", timestamp: 0 }], - }, - undefined, - ); - - const opts = startInactiveSpan.mock.calls[0]?.[0] as unknown as { - attributes: Record; - }; - expect(opts.attributes["gen_ai.provider.name"]).toBe("vercel-ai-gateway"); - expect(opts.attributes["server.address"]).toBe("ai-gateway.vercel.sh"); - expect(opts.attributes["server.port"]).toBe(443); - expect(opts.attributes["gen_ai.request.stream"]).toBe(true); - expect(opts.attributes["gen_ai.output.type"]).toBe("text"); - expect(opts.attributes["app.ai.input.message_count"]).toBe(1); - expect(opts.attributes["app.ai.input.content_chars"]).toBe(5); - expect(opts.attributes["app.ai.input.roles"]).toEqual(["user"]); - expect(opts.attributes["app.ai.system_instructions.content_chars"]).toBe( - 14, - ); - expect(typeof opts.attributes["gen_ai.input.messages"]).toBe("string"); - expect(opts.attributes["app.conversation.privacy"]).toBe("private"); - expect(opts.attributes["gen_ai.input.messages"]).toContain('"chars"'); - expect(opts.attributes["gen_ai.input.messages"]).not.toContain("hello"); - expect(typeof opts.attributes["gen_ai.system_instructions"]).toBe("string"); - expect(opts.attributes["gen_ai.system_instructions"]).toContain('"chars"'); - expect(opts.attributes["gen_ai.system_instructions"]).not.toContain( - "you are junior", - ); - expect(opts.attributes["gen_ai.operation.name"]).toBe("chat"); - expect(opts.attributes["gen_ai.request.model"]).toBe("openai/gpt-5.4"); - }); - - it("uses message metadata for private conversation chat spans", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const stream = createAssistantMessageEventStream(); - const base = vi.fn(() => stream); - const privatePrompt = - "private prompt\nslack.conversation.type: private_channel\nslack.conversation.name: #private-roadmap"; - - const traced = createTracedStreamFn({ - base: base as unknown as StreamFn, - conversationPrivacy: "private", - }); - await traced( - fakeModel("openai/gpt-5.4"), - { - systemPrompt: "private system", - messages: [{ role: "user", content: privatePrompt, timestamp: 0 }], - }, - undefined, - ); - - const opts = startInactiveSpan.mock.calls[0]?.[0] as unknown as { - attributes: Record; - }; - expect(opts.attributes["app.conversation.privacy"]).toBe("private"); - expect(opts.attributes["app.ai.input.message_count"]).toBe(1); - expect(opts.attributes["app.ai.input.content_chars"]).toBe( - privatePrompt.length, - ); - expect(opts.attributes["gen_ai.input.messages"]).toContain('"chars"'); - expect(opts.attributes["gen_ai.input.messages"]).not.toContain( - "private prompt", - ); - expect(opts.attributes["gen_ai.input.messages"]).not.toContain( - "slack.conversation.name", - ); - expect(opts.attributes["gen_ai.input.messages"]).not.toContain( - "#private-roadmap", - ); - expect(opts.attributes["gen_ai.system_instructions"]).toContain('"chars"'); - expect(opts.attributes["gen_ai.system_instructions"]).not.toContain( - "private system", - ); - - stream.end({ - ...fakeMessage(), - content: [{ type: "text", text: "secret" }], - }); - await stream.result(); - await new Promise((r) => setImmediate(r)); - - const span = getSpan(); - const endAttributes = Object.fromEntries( - span.setAttribute.mock.calls.map((c) => [c[0], c[1]]), - ); - expect(endAttributes["app.ai.output.message_count"]).toBe(1); - expect(endAttributes["app.ai.output.content_chars"]).toBe(6); - expect(endAttributes["gen_ai.output.messages"]).toContain('"chars"'); - expect(endAttributes["gen_ai.output.messages"]).not.toContain("secret"); - }); - - it("sets output.messages, usage tokens, finish_reasons, response.model after stream completion", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const stream = createAssistantMessageEventStream(); - const base = vi.fn(() => stream); - - const traced = createTracedStreamFn(base as unknown as StreamFn); - const returned = await traced( - fakeModel("openai/gpt-5.4"), - { messages: [{ role: "user", content: "hi", timestamp: 0 }] }, - undefined, - ); - - expect(returned).toBe(stream); - - // Resolve the stream's terminal Promise to trigger end-attribute population. - const finalMessage = fakeMessage(); - stream.end(finalMessage); - await stream.result(); - // Allow the .then callback to flush. - await new Promise((r) => setImmediate(r)); - - const span = getSpan(); - const endAttributes = Object.fromEntries( - span.setAttribute.mock.calls.map((c) => [c[0], c[1]]), - ); - expect(typeof endAttributes["gen_ai.output.messages"]).toBe("string"); - expect(endAttributes["gen_ai.usage.input_tokens"]).toBe(100); - expect(endAttributes["gen_ai.usage.output_tokens"]).toBe(5); - expect(endAttributes["gen_ai.response.finish_reasons"]).toEqual(["stop"]); - expect(endAttributes["gen_ai.response.model"]).toBe("openai/gpt-5.4"); - expect(span.end).toHaveBeenCalledTimes(1); - }); - - it("normalizes Pi toolUse finish reasons for telemetry", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const stream = createAssistantMessageEventStream(); - const base = vi.fn(() => stream); - - const traced = createTracedStreamFn(base as unknown as StreamFn); - await traced( - fakeModel("openai/gpt-5.4"), - { messages: [{ role: "user", content: "hi", timestamp: 0 }] }, - undefined, - ); - - stream.end({ ...fakeMessage(), stopReason: "toolUse" }); - await stream.result(); - await new Promise((r) => setImmediate(r)); - - const span = getSpan(); - const endAttributes = Object.fromEntries( - span.setAttribute.mock.calls.map((c) => [c[0], c[1]]), - ); - expect(endAttributes["gen_ai.response.finish_reasons"]).toEqual([ - "tool_use", - ]); - }); - - it("inherits LogContext attributes (e.g. gen_ai.conversation.id) onto the chat span", async () => { - const { withLogContext } = await import("@/chat/logging"); - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const stream = createAssistantMessageEventStream(); - const base = vi.fn(() => stream); - const traced = createTracedStreamFn(base as unknown as StreamFn); - - await withLogContext( - { conversationId: "conv_123", runId: "run_456" }, - async () => { - await traced( - fakeModel("openai/gpt-5.4"), - { messages: [{ role: "user", content: "hi", timestamp: 0 }] }, - undefined, - ); - }, - ); - - const opts = startInactiveSpan.mock.calls[0]?.[0] as { - attributes: Record; - }; - expect(opts.attributes["gen_ai.conversation.id"]).toBe("conv_123"); - expect(opts.attributes["app.run.id"]).toBe("run_456"); - // wrapper-supplied attributes still present - expect(opts.attributes["gen_ai.operation.name"]).toBe("chat"); - expect(opts.attributes["gen_ai.request.model"]).toBe("openai/gpt-5.4"); - }); - - it("ends the span when the stream errors", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const stream = createAssistantMessageEventStream(); - const base = vi.fn(() => stream); - - const traced = createTracedStreamFn(base as unknown as StreamFn); - await traced( - fakeModel("openai/gpt-5.4"), - { messages: [{ role: "user", content: "hi", timestamp: 0 }] }, - undefined, - ); - - // pi-ai's AssistantMessageEventStream resolves `result()` with the carrier - // AssistantMessage on `error` events instead of rejecting, so the wrapper's - // `.then` success arm runs on the error path. The load-bearing invariant - // is that the span ends exactly once. - const errorMessage = { ...fakeMessage(), stopReason: "error" as const }; - stream.push({ type: "error", reason: "error", error: errorMessage }); - await stream.result(); - await new Promise((r) => setImmediate(r)); - - const span = getSpan(); - expect(span.end).toHaveBeenCalledTimes(1); - // End attributes are still populated because the success arm runs. - const endAttributeKeys = span.setAttribute.mock.calls.map((c) => c[0]); - expect(endAttributeKeys).toContain("gen_ai.output.messages"); - }); - - it("sets error status and ends the span when base() throws", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const base = vi.fn(() => { - throw new Error("gateway down"); - }); - - const traced = createTracedStreamFn(base as unknown as StreamFn); - await expect( - traced( - fakeModel("openai/gpt-5.4"), - { messages: [{ role: "user", content: "hi", timestamp: 0 }] }, - undefined, - ), - ).rejects.toThrow("gateway down"); - - const span = getSpan(); - expect(span.setStatus).toHaveBeenCalledWith({ - code: 2, - message: "LLM call failed", - }); - expect(span.end).toHaveBeenCalledTimes(1); - }); - - it("sets error status and ends the span when stream.result() rejects", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const fakeStream = { - result: () => Promise.reject(new Error("stream failure")), - }; - const base = vi.fn(() => fakeStream); - - const traced = createTracedStreamFn(base as unknown as StreamFn); - await traced( - fakeModel("openai/gpt-5.4"), - { messages: [{ role: "user", content: "hi", timestamp: 0 }] }, - undefined, - ); - - await new Promise((r) => setImmediate(r)); - - const span = getSpan(); - expect(span.setStatus).toHaveBeenCalledWith({ - code: 2, - message: "LLM stream failed", - }); - expect(span.end).toHaveBeenCalledTimes(1); - }); - - it("ends the span even when setAttribute throws in the success callback", async () => { - const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); - const stream = createAssistantMessageEventStream(); - const base = vi.fn(() => stream); - - const traced = createTracedStreamFn(base as unknown as StreamFn); - await traced( - fakeModel("openai/gpt-5.4"), - { messages: [{ role: "user", content: "hi", timestamp: 0 }] }, - undefined, - ); - - const span = getSpan(); - span.setAttribute.mockImplementation(() => { - throw new Error("setAttribute exploded"); - }); - - stream.end(fakeMessage()); - await stream.result(); - await new Promise((r) => setImmediate(r)); - - expect(span.end).toHaveBeenCalledTimes(1); - }); -}); diff --git a/packages/junior/tests/unit/cli/check-cli-app-config.test.ts b/packages/junior/tests/unit/cli/check-cli-app-config.test.ts new file mode 100644 index 000000000..879283942 --- /dev/null +++ b/packages/junior/tests/unit/cli/check-cli-app-config.test.ts @@ -0,0 +1,148 @@ +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { + cleanupCheckCliTempRoots, + makeTempDir, + writeFile, + expectCheckFailure, + mkdir, + runCheckAndCollect, +} from "../../fixtures/check-cli"; + +afterEach(cleanupCheckCliTempRoots); + +describe("check cli app config", () => { + it("fails when app source uses the removed pluginPackages option", async () => { + const repoRoot = makeTempDir("junior-validate-plugin-packages-option-"); + writeFile( + path.join(repoRoot, "server.ts"), + [ + 'import { createApp } from "@sentry/junior";', + "", + "export default await createApp({", + ' pluginPackages: ["@acme/junior-demo"],', + "});", + "", + ].join("\n"), + ); + + const lines = await expectCheckFailure( + repoRoot, + "Validation failed (1 error, 0 plugin manifests, 0 skill directories checked).", + ); + + expect( + lines.some((line) => + line.includes( + "pluginPackages is no longer supported. Export a defineJuniorPlugins(...) set", + ), + ), + ).toBe(true); + }); + + it("fails when app source uses the removed plugins.packages option", async () => { + const repoRoot = makeTempDir("junior-validate-plugins-packages-option-"); + writeFile( + path.join(repoRoot, "nitro.config.ts"), + [ + 'import { juniorNitro } from "@sentry/junior/nitro";', + "", + "export default {", + " modules: [", + " juniorNitro({", + " plugins: { packages: ['@acme/junior-demo'] },", + " }),", + " ],", + "};", + "", + ].join("\n"), + ); + + const lines = await expectCheckFailure( + repoRoot, + "Validation failed (1 error, 0 plugin manifests, 0 skill directories checked).", + ); + + expect( + lines.some((line) => + line.includes( + "plugins.packages is no longer supported. Export a defineJuniorPlugins(...) set", + ), + ), + ).toBe(true); + }); + + it("fails when app configDefaults references an unregistered plugin key", async () => { + const repoRoot = makeTempDir("junior-validate-config-defaults-"); + writeFile( + path.join(repoRoot, "package.json"), + JSON.stringify( + { + dependencies: { + "@acme/junior-demo": "1.0.0", + }, + }, + null, + 2, + ), + ); + const packageRoot = path.join( + repoRoot, + "node_modules", + "@acme", + "junior-demo", + ); + writeFile( + path.join(packageRoot, "package.json"), + JSON.stringify({ name: "@acme/junior-demo", version: "1.0.0" }), + ); + writeFile( + path.join(packageRoot, "plugin.yaml"), + [ + "name: demo", + "description: Demo packaged plugin", + "config-keys:", + " - org", + "", + ].join("\n"), + ); + writeFile( + path.join(repoRoot, "server.ts"), + [ + 'import { createApp } from "@sentry/junior";', + "", + "export default await createApp({", + " configDefaults: {", + ' "sentry.org": "sentry",', + " },", + "});", + "", + ].join("\n"), + ); + + const lines = await expectCheckFailure( + repoRoot, + "Validation failed (1 error, 1 plugin manifest, 0 skill directories checked).", + ); + + expect( + lines.some((line) => + line.includes( + 'configDefaults key "sentry.org" is not a registered plugin config key', + ), + ), + ).toBe(true); + }); + + it("skips app file validation for unrelated app directories", async () => { + const repoRoot = makeTempDir("junior-validate-empty-app-"); + mkdir(path.join(repoRoot, "app")); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toEqual([ + `Checking ${repoRoot}`, + "✓ Validation passed (0 plugin manifests, 0 skill directories checked).", + ]); + }); +}); diff --git a/packages/junior/tests/unit/cli/check-cli-deployment-config.test.ts b/packages/junior/tests/unit/cli/check-cli-deployment-config.test.ts new file mode 100644 index 000000000..85a551ee7 --- /dev/null +++ b/packages/junior/tests/unit/cli/check-cli-deployment-config.test.ts @@ -0,0 +1,161 @@ +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { + cleanupCheckCliTempRoots, + makeTempDir, + writeFile, + expectCheckFailure, + runCheckAndCollect, +} from "../../fixtures/check-cli"; + +afterEach(cleanupCheckCliTempRoots); + +describe("check cli deployment config", () => { + it("fails when a Junior Nitro app does not install juniorNitro", async () => { + const repoRoot = makeTempDir("junior-validate-missing-nitro-module-"); + writeFile( + path.join(repoRoot, "package.json"), + JSON.stringify( + { + dependencies: { + "@sentry/junior": "1.0.0", + }, + }, + null, + 2, + ), + ); + writeFile( + path.join(repoRoot, "nitro.config.ts"), + [ + 'import { defineConfig } from "nitro";', + "", + "export default defineConfig({", + ' preset: "vercel",', + "});", + "", + ].join("\n"), + ); + + const lines = await expectCheckFailure( + repoRoot, + "Validation failed (1 error, 0 plugin manifests, 0 skill directories checked).", + ); + + expect(lines).toContain("✖ deployment config"); + expect( + lines.some((line) => + line.includes( + "missing juniorNitro(). The Nitro module emits Junior's Vercel queue trigger and heartbeat cron", + ), + ), + ).toBe(true); + }); + + it("fails when Vercel config targets the legacy queue source file", async () => { + const repoRoot = makeTempDir("junior-validate-legacy-vercel-function-"); + writeFile( + path.join(repoRoot, "vercel.json"), + JSON.stringify( + { + framework: "nitro", + functions: { + "api/internal/agent/continue.ts": { + maxDuration: 300, + experimentalTriggers: [ + { + type: "queue/v2beta", + topic: "junior_conversation_work", + }, + ], + }, + }, + }, + null, + 2, + ), + ); + + const lines = await expectCheckFailure( + repoRoot, + "Validation failed (1 error, 0 plugin manifests, 0 skill directories checked).", + ); + + expect(lines).toContain("✖ deployment config"); + expect( + lines.some((line) => + line.includes( + "functions.api/internal/agent/continue.ts targets a source file that Nitro does not deploy", + ), + ), + ).toBe(true); + }); + + it("warns when Vercel config still declares the root heartbeat cron", async () => { + const repoRoot = makeTempDir("junior-validate-root-heartbeat-cron-"); + writeFile( + path.join(repoRoot, "package.json"), + JSON.stringify( + { + dependencies: { + "@sentry/junior": "1.0.0", + }, + }, + null, + 2, + ), + ); + writeFile( + path.join(repoRoot, "nitro.config.ts"), + [ + 'import { defineConfig } from "nitro";', + 'import { juniorNitro } from "@sentry/junior/nitro";', + "", + "export default defineConfig({", + ' preset: "vercel",', + " modules: [juniorNitro()],", + "});", + "", + ].join("\n"), + ); + writeFile( + path.join(repoRoot, "vercel.json"), + JSON.stringify( + { + framework: "nitro", + crons: [ + { + path: "/api/internal/heartbeat", + schedule: "* * * * *", + }, + ], + }, + null, + 2, + ), + ); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toContain("⚠ deployment config"); + expect( + lines.some((line) => + line.includes( + "/api/internal/heartbeat cron is now emitted by juniorNitro()", + ), + ), + ).toBe(true); + }); + + it("skips deployment config validation for unrelated Vercel projects", async () => { + const repoRoot = makeTempDir("junior-validate-unrelated-vercel-"); + writeFile(path.join(repoRoot, "vercel.json"), "{ invalid"); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toEqual([ + `Checking ${repoRoot}`, + "✓ Validation passed (0 plugin manifests, 0 skill directories checked).", + ]); + }); +}); diff --git a/packages/junior/tests/unit/cli/check-cli-packages.test.ts b/packages/junior/tests/unit/cli/check-cli-packages.test.ts new file mode 100644 index 000000000..ff36e7343 --- /dev/null +++ b/packages/junior/tests/unit/cli/check-cli-packages.test.ts @@ -0,0 +1,149 @@ +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { + cleanupCheckCliTempRoots, + makeTempDir, + writeFile, + mkdir, + runCheckAndCollect, +} from "../../fixtures/check-cli"; + +afterEach(cleanupCheckCliTempRoots); + +describe("check cli packaged plugins", () => { + it("accepts configDefaults from JS-defined packaged plugin manifests", async () => { + const repoRoot = makeTempDir("junior-validate-js-plugin-defaults-"); + writeFile( + path.join(repoRoot, "package.json"), + JSON.stringify( + { + dependencies: { + "@acme/junior-github": "1.0.0", + "@acme/junior-sentry": "1.0.0", + }, + }, + null, + 2, + ), + ); + const githubPackageRoot = path.join( + repoRoot, + "node_modules", + "@acme", + "junior-github", + ); + writeFile( + path.join(githubPackageRoot, "package.json"), + JSON.stringify({ + name: "@acme/junior-github", + version: "1.0.0", + type: "module", + exports: { ".": { default: "./index.js" } }, + }), + ); + writeFile( + path.join(githubPackageRoot, "index.js"), + [ + "export function githubPlugin() {", + " return {", + ' name: "github",', + " manifest: {", + ' name: "github",', + ' displayName: "GitHub",', + ' description: "GitHub plugin",', + ' configKeys: ["org", "repo"],', + " },", + " };", + "}", + "", + ].join("\n"), + ); + mkdir(path.join(githubPackageRoot, "skills")); + + const sentryPackageRoot = path.join( + repoRoot, + "node_modules", + "@acme", + "junior-sentry", + ); + writeFile( + path.join(sentryPackageRoot, "package.json"), + JSON.stringify({ name: "@acme/junior-sentry", version: "1.0.0" }), + ); + writeFile( + path.join(sentryPackageRoot, "plugin.yaml"), + [ + "name: sentry", + "description: Sentry plugin", + "config-keys:", + " - org", + "", + ].join("\n"), + ); + writeFile( + path.join(repoRoot, "server.ts"), + [ + 'import { createApp } from "@sentry/junior";', + "", + "export default await createApp({", + " configDefaults: {", + ' "github.org": "getsentry",', + ' "sentry.org": "sentry",', + " },", + "});", + "", + ].join("\n"), + ); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toEqual([ + `Checking ${repoRoot}`, + "✓ packaged plugin github (@acme/junior-github)", + "✓ packaged plugin sentry (@acme/junior-sentry)", + "✓ Validation passed (2 plugin manifests, 0 skill directories checked).", + ]); + }); + + it("warns when official plugin package versions differ from core", async () => { + const repoRoot = makeTempDir("junior-validate-version-skew-"); + writeFile( + path.join(repoRoot, "package.json"), + JSON.stringify( + { + dependencies: { + "@sentry/junior": "^0.43.0", + "@sentry/junior-github": "^0.42.0", + }, + }, + null, + 2, + ), + ); + writeFile( + path.join(repoRoot, "node_modules", "@sentry", "junior", "package.json"), + JSON.stringify({ name: "@sentry/junior", version: "0.43.0" }), + ); + writeFile( + path.join( + repoRoot, + "node_modules", + "@sentry", + "junior-github", + "package.json", + ), + JSON.stringify({ name: "@sentry/junior-github", version: "0.42.0" }), + ); + mkdir( + path.join(repoRoot, "node_modules", "@sentry", "junior-github", "skills"), + ); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toEqual([ + `Checking ${repoRoot}`, + `⚠ warning: ${path.join(repoRoot, "package.json")}: @sentry/junior-github version 0.42.0 does not match @sentry/junior version 0.43.0`, + "✓ Validation passed (0 plugin manifests, 0 skill directories checked).", + ]); + }); +}); diff --git a/packages/junior/tests/unit/cli/check-cli-plugin-manifests.test.ts b/packages/junior/tests/unit/cli/check-cli-plugin-manifests.test.ts new file mode 100644 index 000000000..6c05e55de --- /dev/null +++ b/packages/junior/tests/unit/cli/check-cli-plugin-manifests.test.ts @@ -0,0 +1,181 @@ +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { + cleanupCheckCliTempRoots, + expectCheckFailure, + makeTempDir, + runCheckAndCollect, + writeAppFiles, + writeFile, +} from "../../fixtures/check-cli"; + +afterEach(cleanupCheckCliTempRoots); + +describe("check cli plugin manifests", () => { + it("validates local plugins and skills from an explicit repo root", async () => { + const repoRoot = makeTempDir("junior-validate-"); + writeAppFiles(repoRoot); + writeFile( + path.join(repoRoot, "app", "plugins", "demo", "plugin.yaml"), + [ + "name: demo", + "description: Demo plugin", + "capabilities:", + " - issues.read", + "config-keys:", + " - repo", + "target:", + " type: repo", + " config-key: repo", + "", + ].join("\n"), + ); + writeFile( + path.join( + repoRoot, + "app", + "plugins", + "demo", + "skills", + "demo-helper", + "SKILL.md", + ), + [ + "---", + "name: demo-helper", + "description: Help with demo tasks.", + "---", + "", + "Use this skill.", + "", + ].join("\n"), + ); + writeFile( + path.join(repoRoot, "app", "skills", "repo-local", "SKILL.md"), + [ + "---", + "name: repo-local", + "description: Help with repo-local tasks.", + "---", + "", + "Use this skill.", + "", + ].join("\n"), + ); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toEqual([ + `Checking ${repoRoot}`, + "✓ app files", + "✓ plugin demo", + " └─ ✓ skill demo-helper", + "✓ app skills", + " └─ ✓ skill repo-local", + "✓ Validation passed (1 plugin manifest, 2 skill directories checked).", + ]); + }); + + it("ignores plugin manifests outside app/plugins", async () => { + const repoRoot = makeTempDir("junior-validate-invalid-plugin-"); + writeFile( + path.join(repoRoot, "plugins", "demo", "plugin.yaml"), + "name: Demo\n", + ); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toEqual([ + `Checking ${repoRoot}`, + "✓ Validation passed (0 plugin manifests, 0 skill directories checked).", + ]); + }); + + it("validates installed packaged plugin manifests and skills", async () => { + const repoRoot = makeTempDir("junior-validate-packaged-plugin-"); + writeFile( + path.join(repoRoot, "package.json"), + JSON.stringify( + { + dependencies: { + "@acme/junior-demo": "1.0.0", + }, + }, + null, + 2, + ), + ); + const packageRoot = path.join( + repoRoot, + "node_modules", + "@acme", + "junior-demo", + ); + writeFile( + path.join(packageRoot, "package.json"), + JSON.stringify({ name: "@acme/junior-demo", version: "1.0.0" }), + ); + writeFile( + path.join(packageRoot, "plugin.yaml"), + [ + "name: demo", + "description: Demo packaged plugin", + "capabilities:", + " - issues.read", + "", + ].join("\n"), + ); + writeFile( + path.join(packageRoot, "skills", "demo-helper", "SKILL.md"), + [ + "---", + "name: demo-helper", + "description: Help with packaged demo tasks.", + "---", + "", + "Use this skill.", + "", + ].join("\n"), + ); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toEqual([ + `Checking ${repoRoot}`, + "✓ packaged plugin demo (@acme/junior-demo)", + " └─ ✓ skill demo-helper", + "✓ Validation passed (1 plugin manifest, 1 skill directory checked).", + ]); + }); + + it("fails when local plugins share a provider domain", async () => { + const repoRoot = makeTempDir("junior-validate-duplicate-domain-"); + writeAppFiles(repoRoot); + for (const pluginName of ["alpha", "beta"]) { + writeFile( + path.join(repoRoot, "app", "plugins", pluginName, "plugin.yaml"), + [ + `name: ${pluginName}`, + `${pluginName === "alpha" ? "description: Alpha" : "description: Beta"} plugin`, + "credentials:", + " type: oauth-bearer", + " domains:", + " - api.example.com", + ` auth-token-env: ${pluginName.toUpperCase()}_AUTH_TOKEN`, + "", + ].join("\n"), + ); + } + + const lines = await expectCheckFailure( + repoRoot, + "Validation failed (1 error, 2 plugin manifests, 0 skill directories checked).", + ); + + expect( + lines.some((line) => + line.includes('duplicate provider domain "api.example.com"'), + ), + ).toBe(true); + }); +}); diff --git a/packages/junior/tests/unit/cli/check-cli-skills.test.ts b/packages/junior/tests/unit/cli/check-cli-skills.test.ts new file mode 100644 index 000000000..3c90894d5 --- /dev/null +++ b/packages/junior/tests/unit/cli/check-cli-skills.test.ts @@ -0,0 +1,142 @@ +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { + cleanupCheckCliTempRoots, + makeTempDir, + writeFile, + expectCheckFailure, + runCheckAndCollect, + writeAppFiles, +} from "../../fixtures/check-cli"; + +afterEach(cleanupCheckCliTempRoots); + +describe("check cli skills", () => { + it("only checks skill directories under app and plugin skill roots", async () => { + const repoRoot = makeTempDir("junior-validate-duplicate-skill-"); + writeAppFiles(repoRoot); + writeFile( + path.join(repoRoot, "skills", "shared-skill", "SKILL.md"), + [ + "---", + "name: shared-skill", + "description: Shared skill.", + "---", + "", + "Use this skill.", + "", + ].join("\n"), + ); + writeFile( + path.join(repoRoot, "app", "plugins", "demo", "plugin.yaml"), + ["name: demo", "description: Demo plugin", ""].join("\n"), + ); + writeFile( + path.join( + repoRoot, + "app", + "plugins", + "demo", + "skills", + "shared-skill", + "SKILL.md", + ), + [ + "---", + "name: shared-skill", + "description: Shared skill again.", + "---", + "", + "Use this skill.", + "", + ].join("\n"), + ); + + const lines = await runCheckAndCollect(repoRoot); + + expect(lines).toEqual([ + `Checking ${repoRoot}`, + "✓ app files", + "✓ plugin demo", + " └─ ✓ skill shared-skill", + "✓ Validation passed (1 plugin manifest, 1 skill directory checked).", + ]); + }); + + it("fails when skill uses-config frontmatter is present", async () => { + const repoRoot = makeTempDir("junior-validate-uses-config-"); + writeAppFiles(repoRoot); + writeFile( + path.join(repoRoot, "app", "plugins", "demo", "plugin.yaml"), + ["name: demo", "description: Demo plugin", ""].join("\n"), + ); + writeFile( + path.join(repoRoot, "app", "skills", "repo-local", "SKILL.md"), + [ + "---", + "name: repo-local", + "description: Help with repo-local tasks.", + "uses-config: demo.repo", + "---", + "", + "Use this skill.", + "", + ].join("\n"), + ); + + await expectCheckFailure( + repoRoot, + "Validation failed (1 error, 1 plugin manifest, 1 skill directory checked).", + ); + }); + + it("fails when skill instructions reference harness tool mechanics", async () => { + const repoRoot = makeTempDir("junior-validate-use-tool-"); + writeAppFiles(repoRoot); + writeFile( + path.join(repoRoot, "app", "plugins", "demo", "plugin.yaml"), + [ + "name: demo", + "description: Demo plugin", + "mcp:", + " url: https://mcp.example.test/mcp", + " allowed-tools:", + " - demo-search", + "", + ].join("\n"), + ); + writeFile( + path.join( + repoRoot, + "app", + "plugins", + "demo", + "skills", + "demo-helper", + "SKILL.md", + ), + [ + "---", + "name: demo-helper", + "description: Help with demo tasks.", + "---", + "", + "Use available_tools, then callMcpTool with the disclosed MCP tool name.", + "", + ].join("\n"), + ); + + const lines = await expectCheckFailure( + repoRoot, + "Validation failed (1 error, 1 plugin manifest, 1 skill directory checked).", + ); + + expect( + lines.some((line) => + line.includes( + "skill instructions must not hardcode harness tool-discovery or MCP dispatcher mechanics", + ), + ), + ).toBe(true); + }); +}); diff --git a/packages/junior/tests/unit/cli/check-cli.test.ts b/packages/junior/tests/unit/cli/check-cli.test.ts deleted file mode 100644 index f2331cbc3..000000000 --- a/packages/junior/tests/unit/cli/check-cli.test.ts +++ /dev/null @@ -1,857 +0,0 @@ -import fs from "node:fs"; -import os from "node:os"; -import path from "node:path"; -import { afterEach, describe, expect, it } from "vitest"; -import { runCheck } from "@/cli/check"; - -const tempRoots: string[] = []; - -function makeTempDir(prefix: string): string { - const dir = fs.mkdtempSync(path.join(os.tmpdir(), prefix)); - tempRoots.push(dir); - return dir; -} - -function writeFile(targetPath: string, contents: string): void { - fs.mkdirSync(path.dirname(targetPath), { recursive: true }); - fs.writeFileSync(targetPath, contents, "utf8"); -} - -function writeAppFiles(repoRoot: string): void { - const appDir = path.join(repoRoot, "app"); - fs.mkdirSync(appDir, { recursive: true }); - writeFile(path.join(appDir, "SOUL.md"), "soul"); - writeFile(path.join(appDir, "WORLD.md"), "world"); - writeFile(path.join(appDir, "DESCRIPTION.md"), "description"); -} - -afterEach(() => { - for (const root of tempRoots.splice(0)) { - fs.rmSync(root, { recursive: true, force: true }); - } -}); - -describe("check cli", () => { - it("validates local plugins and skills from an explicit repo root", async () => { - const repoRoot = makeTempDir("junior-validate-"); - writeAppFiles(repoRoot); - writeFile( - path.join(repoRoot, "app", "plugins", "demo", "plugin.yaml"), - [ - "name: demo", - "display-name: Demo", - "description: Demo plugin", - "capabilities:", - " - issues.read", - "config-keys:", - " - repo", - "target:", - " type: repo", - " config-key: repo", - "", - ].join("\n"), - ); - writeFile( - path.join( - repoRoot, - "app", - "plugins", - "demo", - "skills", - "demo-helper", - "SKILL.md", - ), - [ - "---", - "name: demo-helper", - "display-name: Demo Helper", - "description: Help with demo tasks.", - "---", - "", - "Use this skill.", - "", - ].join("\n"), - ); - writeFile( - path.join(repoRoot, "app", "skills", "repo-local", "SKILL.md"), - [ - "---", - "name: repo-local", - "display-name: Repo Local", - "description: Help with repo-local tasks.", - "---", - "", - "Use this skill.", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toEqual([ - `Checking ${repoRoot}`, - "✓ app files", - "✓ plugin demo", - " └─ ✓ skill demo-helper", - "✓ app skills", - " └─ ✓ skill repo-local", - "✓ Validation passed (1 plugin manifest, 2 skill directories checked).", - ]); - }); - - it("ignores plugin manifests outside app/plugins", async () => { - const repoRoot = makeTempDir("junior-validate-invalid-plugin-"); - writeFile( - path.join(repoRoot, "plugins", "demo", "plugin.yaml"), - "name: Demo\n", - ); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toEqual([ - `Checking ${repoRoot}`, - "✓ Validation passed (0 plugin manifests, 0 skill directories checked).", - ]); - }); - - it("validates installed packaged plugin manifests and skills", async () => { - const repoRoot = makeTempDir("junior-validate-packaged-plugin-"); - writeFile( - path.join(repoRoot, "package.json"), - JSON.stringify( - { - dependencies: { - "@acme/junior-demo": "1.0.0", - }, - }, - null, - 2, - ), - ); - const packageRoot = path.join( - repoRoot, - "node_modules", - "@acme", - "junior-demo", - ); - writeFile( - path.join(packageRoot, "package.json"), - JSON.stringify({ name: "@acme/junior-demo", version: "1.0.0" }), - ); - writeFile( - path.join(packageRoot, "plugin.yaml"), - [ - "name: demo", - "display-name: Demo", - "description: Demo packaged plugin", - "capabilities:", - " - issues.read", - "", - ].join("\n"), - ); - writeFile( - path.join(packageRoot, "skills", "demo-helper", "SKILL.md"), - [ - "---", - "name: demo-helper", - "display-name: Demo Helper", - "description: Help with packaged demo tasks.", - "---", - "", - "Use this skill.", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toEqual([ - `Checking ${repoRoot}`, - "✓ packaged plugin demo (@acme/junior-demo)", - " └─ ✓ skill demo-helper", - "✓ Validation passed (1 plugin manifest, 1 skill directory checked).", - ]); - }); - - it("fails when app source uses the removed pluginPackages option", async () => { - const repoRoot = makeTempDir("junior-validate-plugin-packages-option-"); - writeFile( - path.join(repoRoot, "server.ts"), - [ - 'import { createApp } from "@sentry/junior";', - "", - "export default await createApp({", - ' pluginPackages: ["@acme/junior-demo"],', - "});", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await expect( - runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }), - ).rejects.toThrow( - "Validation failed (1 error, 0 plugin manifests, 0 skill directories checked).", - ); - - expect( - lines.some((line) => - line.includes( - "pluginPackages is no longer supported. Export a defineJuniorPlugins(...) set", - ), - ), - ).toBe(true); - }); - - it("fails when app source uses the removed plugins.packages option", async () => { - const repoRoot = makeTempDir("junior-validate-plugins-packages-option-"); - writeFile( - path.join(repoRoot, "nitro.config.ts"), - [ - 'import { juniorNitro } from "@sentry/junior/nitro";', - "", - "export default {", - " modules: [", - " juniorNitro({", - " plugins: { packages: ['@acme/junior-demo'] },", - " }),", - " ],", - "};", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await expect( - runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }), - ).rejects.toThrow( - "Validation failed (1 error, 0 plugin manifests, 0 skill directories checked).", - ); - - expect( - lines.some((line) => - line.includes( - "plugins.packages is no longer supported. Export a defineJuniorPlugins(...) set", - ), - ), - ).toBe(true); - }); - - it("fails when a Junior Nitro app does not install juniorNitro", async () => { - const repoRoot = makeTempDir("junior-validate-missing-nitro-module-"); - writeFile( - path.join(repoRoot, "package.json"), - JSON.stringify( - { - dependencies: { - "@sentry/junior": "1.0.0", - }, - }, - null, - 2, - ), - ); - writeFile( - path.join(repoRoot, "nitro.config.ts"), - [ - 'import { defineConfig } from "nitro";', - "", - "export default defineConfig({", - ' preset: "vercel",', - "});", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await expect( - runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }), - ).rejects.toThrow( - "Validation failed (1 error, 0 plugin manifests, 0 skill directories checked).", - ); - - expect(lines).toContain("✖ deployment config"); - expect( - lines.some((line) => - line.includes( - "missing juniorNitro(). The Nitro module emits Junior's Vercel queue trigger and heartbeat cron", - ), - ), - ).toBe(true); - }); - - it("fails when Vercel config targets the legacy queue source file", async () => { - const repoRoot = makeTempDir("junior-validate-legacy-vercel-function-"); - writeFile( - path.join(repoRoot, "vercel.json"), - JSON.stringify( - { - framework: "nitro", - functions: { - "api/internal/agent/continue.ts": { - maxDuration: 300, - experimentalTriggers: [ - { - type: "queue/v2beta", - topic: "junior_conversation_work", - }, - ], - }, - }, - }, - null, - 2, - ), - ); - - const lines: string[] = []; - await expect( - runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }), - ).rejects.toThrow( - "Validation failed (1 error, 0 plugin manifests, 0 skill directories checked).", - ); - - expect(lines).toContain("✖ deployment config"); - expect( - lines.some((line) => - line.includes( - "functions.api/internal/agent/continue.ts targets a source file that Nitro does not deploy", - ), - ), - ).toBe(true); - }); - - it("warns when Vercel config still declares the root heartbeat cron", async () => { - const repoRoot = makeTempDir("junior-validate-root-heartbeat-cron-"); - writeFile( - path.join(repoRoot, "package.json"), - JSON.stringify( - { - dependencies: { - "@sentry/junior": "1.0.0", - }, - }, - null, - 2, - ), - ); - writeFile( - path.join(repoRoot, "nitro.config.ts"), - [ - 'import { defineConfig } from "nitro";', - 'import { juniorNitro } from "@sentry/junior/nitro";', - "", - "export default defineConfig({", - ' preset: "vercel",', - " modules: [juniorNitro()],", - "});", - "", - ].join("\n"), - ); - writeFile( - path.join(repoRoot, "vercel.json"), - JSON.stringify( - { - framework: "nitro", - crons: [ - { - path: "/api/internal/heartbeat", - schedule: "* * * * *", - }, - ], - }, - null, - 2, - ), - ); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toContain("⚠ deployment config"); - expect( - lines.some((line) => - line.includes( - "/api/internal/heartbeat cron is now emitted by juniorNitro()", - ), - ), - ).toBe(true); - }); - - it("fails when app configDefaults references an unregistered plugin key", async () => { - const repoRoot = makeTempDir("junior-validate-config-defaults-"); - writeFile( - path.join(repoRoot, "package.json"), - JSON.stringify( - { - dependencies: { - "@acme/junior-demo": "1.0.0", - }, - }, - null, - 2, - ), - ); - const packageRoot = path.join( - repoRoot, - "node_modules", - "@acme", - "junior-demo", - ); - writeFile( - path.join(packageRoot, "package.json"), - JSON.stringify({ name: "@acme/junior-demo", version: "1.0.0" }), - ); - writeFile( - path.join(packageRoot, "plugin.yaml"), - [ - "name: demo", - "display-name: Demo", - "description: Demo packaged plugin", - "config-keys:", - " - org", - "", - ].join("\n"), - ); - writeFile( - path.join(repoRoot, "server.ts"), - [ - 'import { createApp } from "@sentry/junior";', - "", - "export default await createApp({", - " configDefaults: {", - ' "sentry.org": "sentry",', - " },", - "});", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await expect( - runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }), - ).rejects.toThrow( - "Validation failed (1 error, 1 plugin manifest, 0 skill directories checked).", - ); - - expect( - lines.some((line) => - line.includes( - 'configDefaults key "sentry.org" is not a registered plugin config key', - ), - ), - ).toBe(true); - }); - - it("accepts configDefaults from JS-defined packaged plugin manifests", async () => { - const repoRoot = makeTempDir("junior-validate-js-plugin-defaults-"); - writeFile( - path.join(repoRoot, "package.json"), - JSON.stringify( - { - dependencies: { - "@acme/junior-github": "1.0.0", - "@acme/junior-sentry": "1.0.0", - }, - }, - null, - 2, - ), - ); - const githubPackageRoot = path.join( - repoRoot, - "node_modules", - "@acme", - "junior-github", - ); - writeFile( - path.join(githubPackageRoot, "package.json"), - JSON.stringify({ - name: "@acme/junior-github", - version: "1.0.0", - type: "module", - exports: { ".": { default: "./index.js" } }, - }), - ); - writeFile( - path.join(githubPackageRoot, "index.js"), - [ - "export function githubPlugin() {", - " return {", - ' name: "github",', - " manifest: {", - ' name: "github",', - ' displayName: "GitHub",', - ' description: "GitHub plugin",', - ' configKeys: ["org", "repo"],', - " },", - " };", - "}", - "", - ].join("\n"), - ); - fs.mkdirSync(path.join(githubPackageRoot, "skills"), { recursive: true }); - - const sentryPackageRoot = path.join( - repoRoot, - "node_modules", - "@acme", - "junior-sentry", - ); - writeFile( - path.join(sentryPackageRoot, "package.json"), - JSON.stringify({ name: "@acme/junior-sentry", version: "1.0.0" }), - ); - writeFile( - path.join(sentryPackageRoot, "plugin.yaml"), - [ - "name: sentry", - "display-name: Sentry", - "description: Sentry plugin", - "config-keys:", - " - org", - "", - ].join("\n"), - ); - writeFile( - path.join(repoRoot, "server.ts"), - [ - 'import { createApp } from "@sentry/junior";', - "", - "export default await createApp({", - " configDefaults: {", - ' "github.org": "getsentry",', - ' "sentry.org": "sentry",', - " },", - "});", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toEqual([ - `Checking ${repoRoot}`, - "✓ packaged plugin github (@acme/junior-github)", - "✓ packaged plugin sentry (@acme/junior-sentry)", - "✓ Validation passed (2 plugin manifests, 0 skill directories checked).", - ]); - }); - - it("warns when official plugin package versions differ from core", async () => { - const repoRoot = makeTempDir("junior-validate-version-skew-"); - writeFile( - path.join(repoRoot, "package.json"), - JSON.stringify( - { - dependencies: { - "@sentry/junior": "^0.43.0", - "@sentry/junior-github": "^0.42.0", - }, - }, - null, - 2, - ), - ); - writeFile( - path.join(repoRoot, "node_modules", "@sentry", "junior", "package.json"), - JSON.stringify({ name: "@sentry/junior", version: "0.43.0" }), - ); - writeFile( - path.join( - repoRoot, - "node_modules", - "@sentry", - "junior-github", - "package.json", - ), - JSON.stringify({ name: "@sentry/junior-github", version: "0.42.0" }), - ); - fs.mkdirSync( - path.join(repoRoot, "node_modules", "@sentry", "junior-github", "skills"), - { recursive: true }, - ); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toEqual([ - `Checking ${repoRoot}`, - `⚠ warning: ${path.join(repoRoot, "package.json")}: @sentry/junior-github version 0.42.0 does not match @sentry/junior version 0.43.0`, - "✓ Validation passed (0 plugin manifests, 0 skill directories checked).", - ]); - }); - - it("skips app file validation for unrelated app directories", async () => { - const repoRoot = makeTempDir("junior-validate-empty-app-"); - fs.mkdirSync(path.join(repoRoot, "app"), { recursive: true }); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toEqual([ - `Checking ${repoRoot}`, - "✓ Validation passed (0 plugin manifests, 0 skill directories checked).", - ]); - }); - - it("skips deployment config validation for unrelated Vercel projects", async () => { - const repoRoot = makeTempDir("junior-validate-unrelated-vercel-"); - writeFile(path.join(repoRoot, "vercel.json"), "{ invalid"); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toEqual([ - `Checking ${repoRoot}`, - "✓ Validation passed (0 plugin manifests, 0 skill directories checked).", - ]); - }); - - it("only checks skill directories under app and plugin skill roots", async () => { - const repoRoot = makeTempDir("junior-validate-duplicate-skill-"); - writeAppFiles(repoRoot); - writeFile( - path.join(repoRoot, "skills", "shared-skill", "SKILL.md"), - [ - "---", - "name: shared-skill", - "display-name: Shared Skill", - "description: Shared skill.", - "---", - "", - "Use this skill.", - "", - ].join("\n"), - ); - writeFile( - path.join(repoRoot, "app", "plugins", "demo", "plugin.yaml"), - ["name: demo", "display-name: Demo", "description: Demo plugin", ""].join( - "\n", - ), - ); - writeFile( - path.join( - repoRoot, - "app", - "plugins", - "demo", - "skills", - "shared-skill", - "SKILL.md", - ), - [ - "---", - "name: shared-skill", - "display-name: Shared Skill", - "description: Shared skill again.", - "---", - "", - "Use this skill.", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }); - - expect(lines).toEqual([ - `Checking ${repoRoot}`, - "✓ app files", - "✓ plugin demo", - " └─ ✓ skill shared-skill", - "✓ Validation passed (1 plugin manifest, 1 skill directory checked).", - ]); - }); - - it("fails when skill uses-config frontmatter is present", async () => { - const repoRoot = makeTempDir("junior-validate-uses-config-"); - writeAppFiles(repoRoot); - writeFile( - path.join(repoRoot, "app", "plugins", "demo", "plugin.yaml"), - ["name: demo", "display-name: Demo", "description: Demo plugin", ""].join( - "\n", - ), - ); - writeFile( - path.join(repoRoot, "app", "skills", "repo-local", "SKILL.md"), - [ - "---", - "name: repo-local", - "display-name: Repo Local", - "description: Help with repo-local tasks.", - "uses-config: demo.repo", - "---", - "", - "Use this skill.", - "", - ].join("\n"), - ); - - await expect( - runCheck(repoRoot, { - info: () => undefined, - warn: () => undefined, - error: () => undefined, - }), - ).rejects.toThrow( - "Validation failed (1 error, 1 plugin manifest, 1 skill directory checked).", - ); - }); - - it("fails when skill instructions reference harness tool mechanics", async () => { - const repoRoot = makeTempDir("junior-validate-use-tool-"); - writeAppFiles(repoRoot); - writeFile( - path.join(repoRoot, "app", "plugins", "demo", "plugin.yaml"), - [ - "name: demo", - "display-name: Demo", - "description: Demo plugin", - "mcp:", - " url: https://mcp.example.test/mcp", - " allowed-tools:", - " - demo-search", - "", - ].join("\n"), - ); - writeFile( - path.join( - repoRoot, - "app", - "plugins", - "demo", - "skills", - "demo-helper", - "SKILL.md", - ), - [ - "---", - "name: demo-helper", - "display-name: Demo Helper", - "description: Help with demo tasks.", - "---", - "", - "Use available_tools, then callMcpTool with the disclosed MCP tool name.", - "", - ].join("\n"), - ); - - const lines: string[] = []; - await expect( - runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }), - ).rejects.toThrow( - "Validation failed (1 error, 1 plugin manifest, 1 skill directory checked).", - ); - - expect( - lines.some((line) => - line.includes( - "skill instructions must not hardcode harness tool-discovery or MCP dispatcher mechanics", - ), - ), - ).toBe(true); - }); - - it("fails when local plugins share a provider domain", async () => { - const repoRoot = makeTempDir("junior-validate-duplicate-domain-"); - writeAppFiles(repoRoot); - for (const pluginName of ["alpha", "beta"]) { - writeFile( - path.join(repoRoot, "app", "plugins", pluginName, "plugin.yaml"), - [ - `name: ${pluginName}`, - `display-name: ${pluginName === "alpha" ? "Alpha" : "Beta"}`, - `${pluginName === "alpha" ? "description: Alpha" : "description: Beta"} plugin`, - "credentials:", - " type: oauth-bearer", - " domains:", - " - api.example.com", - ` auth-token-env: ${pluginName.toUpperCase()}_AUTH_TOKEN`, - "", - ].join("\n"), - ); - } - - const lines: string[] = []; - await expect( - runCheck(repoRoot, { - info: (line) => lines.push(line), - warn: (line) => lines.push(line), - error: (line) => lines.push(line), - }), - ).rejects.toThrow( - "Validation failed (1 error, 2 plugin manifests, 0 skill directories checked).", - ); - - expect( - lines.some((line) => - line.includes('duplicate provider domain "api.example.com"'), - ), - ).toBe(true); - }); -}); diff --git a/packages/junior/tests/unit/cli/snapshot-warmup-cli.test.ts b/packages/junior/tests/unit/cli/snapshot-warmup-cli.test.ts index bea36e502..5bb0e76d4 100644 --- a/packages/junior/tests/unit/cli/snapshot-warmup-cli.test.ts +++ b/packages/junior/tests/unit/cli/snapshot-warmup-cli.test.ts @@ -1,53 +1,80 @@ -import { beforeEach, describe, expect, it, vi } from "vitest"; - -const { - getPluginProvidersMock, - getPluginRuntimeDependenciesMock, - getPluginRuntimePostinstallMock, - resolveRuntimeDependencySnapshotMock, -} = vi.hoisted(() => ({ - getPluginProvidersMock: vi.fn(), - getPluginRuntimeDependenciesMock: vi.fn(), - getPluginRuntimePostinstallMock: vi.fn(), - resolveRuntimeDependencySnapshotMock: vi.fn(), -})); - -vi.mock("@/chat/plugins/registry", () => ({ - getPluginProviders: getPluginProvidersMock, - getPluginRuntimeDependencies: getPluginRuntimeDependenciesMock, - getPluginRuntimePostinstall: getPluginRuntimePostinstallMock, -})); - -vi.mock("@/chat/sandbox/runtime-dependency-snapshots", () => ({ - resolveRuntimeDependencySnapshot: resolveRuntimeDependencySnapshotMock, -})); - +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { runSnapshotCreate } from "@/cli/snapshot-warmup"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; +import type { + PluginManifest, + PluginRuntimeDependency, + PluginRuntimePostinstallCommand, +} from "@/chat/plugins/types"; +import { disconnectStateAdapter } from "@/chat/state/adapter"; +import { stubTestEnv, useMemoryStateAdapter } from "../../fixtures/vitest"; + +type SnapshotResolver = NonNullable< + Parameters[1] +>["resolveRuntimeDependencySnapshot"]; + +function createPluginManifest( + name: string, + options: { + runtimeDependencies?: PluginRuntimeDependency[]; + runtimePostinstall?: PluginRuntimePostinstallCommand[]; + } = {}, +): PluginManifest { + return { + name, + displayName: name, + description: `${name} plugin`, + capabilities: [], + configKeys: [], + ...(options.runtimeDependencies + ? { runtimeDependencies: options.runtimeDependencies } + : {}), + ...(options.runtimePostinstall + ? { runtimePostinstall: options.runtimePostinstall } + : {}), + }; +} + +function configurePlugins(manifests: PluginManifest[]): void { + setPluginCatalogConfig({ + inlineManifests: manifests.map((manifest) => ({ manifest })), + }); +} describe("snapshot create cli", () => { - beforeEach(() => { - getPluginProvidersMock.mockReset(); - getPluginRuntimeDependenciesMock.mockReset(); - getPluginRuntimePostinstallMock.mockReset(); - resolveRuntimeDependencySnapshotMock.mockReset(); - - getPluginProvidersMock.mockReturnValue([]); - getPluginRuntimeDependenciesMock.mockReturnValue([]); - getPluginRuntimePostinstallMock.mockReturnValue([]); + useMemoryStateAdapter(); + + let resolveRuntimeDependencySnapshot: ReturnType< + typeof vi.fn + >; + + beforeEach(async () => { + stubTestEnv({ JUNIOR_STATE_ADAPTER: "memory" }); + await disconnectStateAdapter(); + configurePlugins([]); + resolveRuntimeDependencySnapshot = vi.fn(); + }); + + afterEach(async () => { + setPluginCatalogConfig(undefined); + await disconnectStateAdapter(); + vi.unstubAllEnvs(); }); it("uses default runtime and timeout", async () => { - resolveRuntimeDependencySnapshotMock.mockResolvedValue({ + resolveRuntimeDependencySnapshot.mockResolvedValue({ dependencyCount: 0, cacheHit: false, resolveOutcome: "no_profile", }); const logs: string[] = []; - await runSnapshotCreate((line) => logs.push(line)); + await runSnapshotCreate((line) => logs.push(line), { + resolveRuntimeDependencySnapshot, + }); - expect(resolveRuntimeDependencySnapshotMock).toHaveBeenCalledTimes(1); - expect(resolveRuntimeDependencySnapshotMock).toHaveBeenCalledWith({ + expect(resolveRuntimeDependencySnapshot).toHaveBeenCalledTimes(1); + expect(resolveRuntimeDependencySnapshot).toHaveBeenCalledWith({ runtime: "node22", timeoutMs: 10 * 60 * 1000, onProgress: expect.any(Function), @@ -56,9 +83,11 @@ describe("snapshot create cli", () => { expect(logs).toContain( "Sandbox snapshot inputs: plugins=0 system_dependencies=0 npm_dependencies=0 postinstall_commands=0", ); - await resolveRuntimeDependencySnapshotMock.mock.calls[0][0].onProgress( - "resolve_start", - ); + const resolveParams = resolveRuntimeDependencySnapshot.mock.calls[0]?.[0]; + if (!resolveParams?.onProgress) { + throw new Error("Expected snapshot resolver to be called"); + } + await resolveParams.onProgress("resolve_start"); expect(logs).toContain("Resolving sandbox snapshot profile..."); expect( logs.some((line) => line.includes("resolve_outcome=no_profile")), @@ -66,33 +95,17 @@ describe("snapshot create cli", () => { }); it("logs plugin and dependency inputs before snapshot resolution", async () => { - getPluginProvidersMock.mockReturnValue([ - { - manifest: { - name: "agent-browser", - displayName: "Agent Browser", - runtimeDependencies: [ - { type: "npm", package: "agent-browser", version: "latest" }, - { type: "system", package: "gtk3" }, - ], - runtimePostinstall: [{ cmd: "agent-browser", args: ["install"] }], - }, - }, - { - manifest: { - name: "notion", - displayName: "Notion", - }, - }, - ]); - getPluginRuntimeDependenciesMock.mockReturnValue([ - { type: "system", package: "gtk3" }, - { type: "npm", package: "agent-browser", version: "latest" }, + configurePlugins([ + createPluginManifest("agent-browser", { + runtimeDependencies: [ + { type: "npm", package: "agent-browser", version: "latest" }, + { type: "system", package: "gtk3" }, + ], + runtimePostinstall: [{ cmd: "agent-browser", args: ["install"] }], + }), + createPluginManifest("notion"), ]); - getPluginRuntimePostinstallMock.mockReturnValue([ - { cmd: "agent-browser", args: ["install"] }, - ]); - resolveRuntimeDependencySnapshotMock.mockResolvedValue({ + resolveRuntimeDependencySnapshot.mockResolvedValue({ snapshotId: "snap_123", profileHash: "abc", dependencyCount: 2, @@ -102,7 +115,9 @@ describe("snapshot create cli", () => { }); const logs: string[] = []; - await runSnapshotCreate((line) => logs.push(line)); + await runSnapshotCreate((line) => logs.push(line), { + resolveRuntimeDependencySnapshot, + }); expect(logs).toContain("Loaded plugins (2): agent-browser, notion"); expect(logs).toContain( @@ -115,7 +130,7 @@ describe("snapshot create cli", () => { }); it("logs cache hit metadata", async () => { - resolveRuntimeDependencySnapshotMock.mockResolvedValue({ + resolveRuntimeDependencySnapshot.mockResolvedValue({ snapshotId: "snap_123", profileHash: "abc", dependencyCount: 3, @@ -124,7 +139,9 @@ describe("snapshot create cli", () => { }); const logs: string[] = []; - await runSnapshotCreate((line) => logs.push(line)); + await runSnapshotCreate((line) => logs.push(line), { + resolveRuntimeDependencySnapshot, + }); const summary = logs[logs.length - 1]; expect(summary).toContain("resolve_outcome=cache_hit"); @@ -135,10 +152,12 @@ describe("snapshot create cli", () => { }); it("rethrows resolver errors", async () => { - resolveRuntimeDependencySnapshotMock.mockRejectedValue( + resolveRuntimeDependencySnapshot.mockRejectedValue( new Error("OIDC missing"), ); - await expect(runSnapshotCreate()).rejects.toThrow("OIDC missing"); + await expect( + runSnapshotCreate(undefined, { resolveRuntimeDependencySnapshot }), + ).rejects.toThrow("OIDC missing"); }); }); diff --git a/packages/junior/tests/unit/app-config.test.ts b/packages/junior/tests/unit/config/app-config.test.ts similarity index 87% rename from packages/junior/tests/unit/app-config.test.ts rename to packages/junior/tests/unit/config/app-config.test.ts index 47fb2046d..fdc8f3d32 100644 --- a/packages/junior/tests/unit/app-config.test.ts +++ b/packages/junior/tests/unit/config/app-config.test.ts @@ -56,6 +56,21 @@ async function writePluginPackage( ); } +async function writeAppPackage( + root: string, + dependencies: Record, +): Promise { + await fs.writeFile( + path.join(root, "package.json"), + JSON.stringify({ + name: "temp-junior-app", + private: true, + dependencies, + }), + "utf8", + ); +} + afterEach(async () => { process.chdir(originalCwd); setAgentPlugins([]); @@ -144,17 +159,9 @@ describe("createApp plugin config", () => { it("loads package plugins with runtime hook plugins", async () => { const tempRoot = await makeTempDir(); await writePluginPackage(tempRoot, "@acme/env-plugin", "env"); - await fs.writeFile( - path.join(tempRoot, "package.json"), - JSON.stringify({ - name: "temp-junior-app", - private: true, - dependencies: { - "@acme/env-plugin": "1.0.0", - }, - }), - "utf8", - ); + await writeAppPackage(tempRoot, { + "@acme/env-plugin": "1.0.0", + }); process.chdir(tempRoot); await createApp({ @@ -193,18 +200,10 @@ describe("createApp plugin config", () => { const tempRoot = await makeTempDir(); await writePluginPackage(tempRoot, "@acme/base-plugin", "base"); await writePluginPackage(tempRoot, "@acme/next-plugin", "next"); - await fs.writeFile( - path.join(tempRoot, "package.json"), - JSON.stringify({ - name: "temp-junior-app", - private: true, - dependencies: { - "@acme/base-plugin": "1.0.0", - "@acme/next-plugin": "1.0.0", - }, - }), - "utf8", - ); + await writeAppPackage(tempRoot, { + "@acme/base-plugin": "1.0.0", + "@acme/next-plugin": "1.0.0", + }); process.chdir(tempRoot); await createApp({ @@ -230,17 +229,9 @@ describe("createApp plugin config", () => { it("fails startup and rolls back config when a configured plugin package is missing", async () => { const tempRoot = await makeTempDir(); await writePluginPackage(tempRoot, "@acme/base-plugin", "base"); - await fs.writeFile( - path.join(tempRoot, "package.json"), - JSON.stringify({ - name: "temp-junior-app", - private: true, - dependencies: { - "@acme/base-plugin": "1.0.0", - }, - }), - "utf8", - ); + await writeAppPackage(tempRoot, { + "@acme/base-plugin": "1.0.0", + }); process.chdir(tempRoot); await createApp({ @@ -583,17 +574,9 @@ describe("createApp plugin config", () => { it("loads manifest-only package plugins by package name", async () => { const tempRoot = await makeTempDir(); await writePluginPackage(tempRoot, "@acme/full-plugin", "full"); - await fs.writeFile( - path.join(tempRoot, "package.json"), - JSON.stringify({ - name: "temp-junior-app", - private: true, - dependencies: { - "@acme/full-plugin": "1.0.0", - }, - }), - "utf8", - ); + await writeAppPackage(tempRoot, { + "@acme/full-plugin": "1.0.0", + }); process.chdir(tempRoot); await createApp({ @@ -606,57 +589,7 @@ describe("createApp plugin config", () => { ]); }); - it("rejects duplicate plugin names before mutating app config", async () => { - await createApp({ - plugins: defineJuniorPlugins([]), - }); - - expect(() => - defineJuniorPlugins([ - defineJuniorPlugin({ - manifest: { - name: "dupe", - displayName: "Dupe", - description: "Duplicate plugin", - }, - }), - defineJuniorPlugin({ - manifest: { - name: "dupe", - displayName: "Dupe", - description: "Duplicate plugin", - }, - }), - ]), - ).toThrow('Duplicate plugin registration name "dupe"'); - - expect(getAgentPlugins().map((plugin) => plugin.name)).toEqual([]); - expect(getPluginProviders()).toEqual([]); - }); - - it("rejects invalid plugin names before mutating app config", async () => { - await createApp({ - plugins: defineJuniorPlugins([]), - }); - - expect(() => - defineJuniorPlugin({ - manifest: { - name: "GitHub", - displayName: "GitHub", - description: "Invalid plugin", - }, - hooks: {}, - }), - ).toThrow( - 'Junior plugin registration name "GitHub" must be a lowercase plugin identifier', - ); - - expect(getAgentPlugins().map((plugin) => plugin.name)).toEqual([]); - expect(getPluginProviders()).toEqual([]); - }); - - it("rejects legacy state prefixes outside the plugin namespace", async () => { + it("rejects legacy state prefixes outside the trusted plugin namespace", async () => { await createApp({ plugins: defineJuniorPlugins([]), }); diff --git a/packages/junior/tests/unit/config/config-defaults.test.ts b/packages/junior/tests/unit/config/config-defaults.test.ts index c4af6432d..ef8dabaa8 100644 --- a/packages/junior/tests/unit/config/config-defaults.test.ts +++ b/packages/junior/tests/unit/config/config-defaults.test.ts @@ -1,17 +1,39 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; - -vi.mock("@/chat/plugins/registry", () => ({ - isPluginConfigKey: (key: string) => - ["sentry.org", "sentry.project", "github.org", "github.repo"].includes(key), -})); +import { afterEach, describe, expect, it } from "vitest"; import { getConfigDefaults, setConfigDefaults, } from "@/chat/configuration/defaults"; +import { setPluginCatalogConfig } from "@/chat/plugins/registry"; + +function registerConfigKeys(): void { + setPluginCatalogConfig({ + inlineManifests: [ + { + manifest: { + name: "sentry", + displayName: "Sentry", + description: "Sentry", + capabilities: [], + configKeys: ["sentry.org", "sentry.project"], + }, + }, + { + manifest: { + name: "github", + displayName: "GitHub", + description: "GitHub", + capabilities: [], + configKeys: ["github.org", "github.repo"], + }, + }, + ], + }); +} afterEach(() => { setConfigDefaults(undefined); + setPluginCatalogConfig(undefined); }); describe("install config defaults", () => { @@ -20,6 +42,7 @@ describe("install config defaults", () => { }); it("stores and retrieves defaults", () => { + registerConfigKeys(); setConfigDefaults({ "sentry.org": "sentry", "github.repo": "myorg/repo" }); expect(getConfigDefaults()).toEqual({ "sentry.org": "sentry", @@ -28,6 +51,7 @@ describe("install config defaults", () => { }); it("clears defaults when called with undefined", () => { + registerConfigKeys(); setConfigDefaults({ "sentry.org": "sentry" }); setConfigDefaults(undefined); expect(getConfigDefaults()).toEqual({}); @@ -52,6 +76,7 @@ describe("install config defaults", () => { }); it("does not mutate the input object", () => { + registerConfigKeys(); const input = { "sentry.org": "sentry" }; setConfigDefaults(input); input["sentry.org"] = "changed"; @@ -59,6 +84,7 @@ describe("install config defaults", () => { }); it("does not share nested input values", () => { + registerConfigKeys(); const input = { "sentry.org": { slug: "sentry" }, }; @@ -68,12 +94,14 @@ describe("install config defaults", () => { }); it("does not expose mutable defaults", () => { + registerConfigKeys(); setConfigDefaults({ "sentry.org": "sentry" }); getConfigDefaults()["sentry.org"] = "changed"; expect(getConfigDefaults()["sentry.org"]).toBe("sentry"); }); it("does not expose nested mutable defaults", () => { + registerConfigKeys(); setConfigDefaults({ "sentry.org": { slug: "sentry" } }); (getConfigDefaults()["sentry.org"] as { slug: string }).slug = "changed"; expect(getConfigDefaults()["sentry.org"]).toEqual({ slug: "sentry" }); diff --git a/packages/junior/tests/unit/config/plugin-set.test.ts b/packages/junior/tests/unit/config/plugin-set.test.ts new file mode 100644 index 000000000..44ad21941 --- /dev/null +++ b/packages/junior/tests/unit/config/plugin-set.test.ts @@ -0,0 +1,47 @@ +import { defineJuniorPlugin } from "@sentry/junior-plugin-api"; +import { describe, expect, it } from "vitest"; +import { defineJuniorPlugins } from "@/plugins"; + +describe("defineJuniorPlugin", () => { + it("rejects invalid registration names", () => { + expect(() => + defineJuniorPlugin({ + manifest: { + name: "GitHub", + displayName: "GitHub", + description: "Invalid plugin", + }, + hooks: {}, + }), + ).toThrow( + 'Junior plugin registration name "GitHub" must be a lowercase plugin identifier', + ); + }); +}); + +describe("defineJuniorPlugins", () => { + it("rejects duplicate package and registration names", () => { + expect(() => defineJuniorPlugins(["@acme/plugin", "@acme/plugin"])).toThrow( + 'Duplicate plugin package name "@acme/plugin"', + ); + + expect(() => + defineJuniorPlugins([ + defineJuniorPlugin({ + manifest: { + name: "dupe", + displayName: "Dupe", + description: "Duplicate plugin", + }, + }), + defineJuniorPlugin({ + manifest: { + name: "dupe", + displayName: "Dupe", + description: "Duplicate plugin", + }, + }), + ]), + ).toThrow('Duplicate plugin registration name "dupe"'); + }); +}); diff --git a/packages/junior/tests/unit/vercel.test.ts b/packages/junior/tests/unit/deployment/vercel-config.test.ts similarity index 68% rename from packages/junior/tests/unit/vercel.test.ts rename to packages/junior/tests/unit/deployment/vercel-config.test.ts index 28d9bbacf..a2f6e9e75 100644 --- a/packages/junior/tests/unit/vercel.test.ts +++ b/packages/junior/tests/unit/deployment/vercel-config.test.ts @@ -2,12 +2,10 @@ import fs from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; import { describe, expect, it } from "vitest"; -import { resolveConversationWorkVisibilityTimeoutSeconds } from "@/chat/task-execution/vercel-callback"; -import { resolveConversationWorkQueueTopic } from "@/chat/task-execution/vercel-queue"; import { juniorVercelConfig } from "@/vercel"; const TEST_DIR = path.dirname(fileURLToPath(import.meta.url)); -const WORKSPACE_ROOT = path.resolve(TEST_DIR, "../../../.."); +const WORKSPACE_ROOT = path.resolve(TEST_DIR, "../../../../.."); describe("juniorVercelConfig", () => { it("returns config with default buildCommand", () => { @@ -51,17 +49,3 @@ describe("juniorVercelConfig", () => { expect(config.functions).toBeUndefined(); }); }); - -describe("resolveConversationWorkVisibilityTimeoutSeconds", () => { - it("keeps queue redelivery past the function timeout boundary", () => { - expect(resolveConversationWorkVisibilityTimeoutSeconds(300)).toBe(330); - }); -}); - -describe("resolveConversationWorkQueueTopic", () => { - it("normalizes explicit queue topics", () => { - expect(resolveConversationWorkQueueTopic({ topic: " local_work " })).toBe( - "local_work", - ); - }); -}); diff --git a/packages/junior/tests/unit/misc/home.test.ts b/packages/junior/tests/unit/discovery/home-paths.test.ts similarity index 100% rename from packages/junior/tests/unit/misc/home.test.ts rename to packages/junior/tests/unit/discovery/home-paths.test.ts diff --git a/packages/junior/tests/unit/handlers/handlers-webhooks-lazy-load.test.ts b/packages/junior/tests/unit/handlers/handlers-webhooks-lazy-load.test.ts index 9b536f8c2..496e1090a 100644 --- a/packages/junior/tests/unit/handlers/handlers-webhooks-lazy-load.test.ts +++ b/packages/junior/tests/unit/handlers/handlers-webhooks-lazy-load.test.ts @@ -27,5 +27,5 @@ describe("handlers webhooks module loading", () => { it("loads without requiring runtime env on module load", async () => { const mod = await import("@/handlers/webhooks"); expect(typeof mod.POST).toBe("function"); - }, 15_000); + }); }); diff --git a/packages/junior/tests/unit/handlers/mcp-oauth-callback.test.ts b/packages/junior/tests/unit/handlers/mcp-oauth-callback.test.ts deleted file mode 100644 index 6fdcff99d..000000000 --- a/packages/junior/tests/unit/handlers/mcp-oauth-callback.test.ts +++ /dev/null @@ -1,83 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; - -const { finalizeMcpAuthorizationMock } = vi.hoisted(() => ({ - finalizeMcpAuthorizationMock: vi.fn(), -})); - -vi.mock("@/chat/mcp/oauth", () => ({ - finalizeMcpAuthorization: finalizeMcpAuthorizationMock, -})); - -import { GET } from "@/handlers/mcp-oauth-callback"; -import { - createWaitUntilCollector, - type WaitUntilCollector, -} from "../../fixtures/wait-until"; - -let waitUntil: WaitUntilCollector; - -function makeRequest(url: string): Request { - return new Request(url, { method: "GET" }); -} - -describe("mcp oauth callback handler", () => { - beforeEach(() => { - finalizeMcpAuthorizationMock.mockReset(); - waitUntil = createWaitUntilCollector(); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - it("returns HTML 400 when the state parameter is missing", async () => { - const response = await GET( - makeRequest("https://example.com/api/oauth/callback/mcp/demo?code=abc"), - "demo", - waitUntil.fn, - ); - - expect(response.status).toBe(400); - expect(await response.text()).toContain("Missing state parameter"); - expect(finalizeMcpAuthorizationMock).not.toHaveBeenCalled(); - expect(waitUntil.pendingCount()).toBe(0); - }); - - it("does not reflect provider error text in the HTML response", async () => { - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/mcp/demo?state=state-123&error=%3Cscript%3Ealert(1)%3C%2Fscript%3E", - ), - "demo", - waitUntil.fn, - ); - - expect(response.status).toBe(400); - const body = await response.text(); - expect(body).toContain("The provider returned an authorization error."); - expect(body).not.toContain(""); - expect(waitUntil.pendingCount()).toBe(0); - }); - - it("does not reflect callback exception text in the HTML response", async () => { - finalizeMcpAuthorizationMock.mockRejectedValueOnce( - new Error(""), - ); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/mcp/demo?code=auth-code&state=state-123", - ), - "demo", - waitUntil.fn, - ); - - expect(response.status).toBe(500); - const body = await response.text(); - expect(body).toContain( - "Junior could not finish the authorization callback. Return to Slack and retry the original request.", - ); - expect(body).not.toContain(""); - expect(waitUntil.pendingCount()).toBe(0); - }); -}); diff --git a/packages/junior/tests/unit/handlers/oauth-callback.test.ts b/packages/junior/tests/unit/handlers/oauth-callback.test.ts deleted file mode 100644 index 693640e10..000000000 --- a/packages/junior/tests/unit/handlers/oauth-callback.test.ts +++ /dev/null @@ -1,682 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import { http, HttpResponse } from "msw"; -import { mswServer } from "../../msw/server"; - -const { - BASE_URL, - EXAMPLE_OAUTH_CONFIG, - GITHUB_OAUTH_CONFIG, - SENTRY_OAUTH_CONFIG, - lookupSlackRequesterMock, - resolvePluginOAuthAccountMock, - waitUntilCallbacks, -} = vi.hoisted(() => ({ - BASE_URL: "https://example.com", - SENTRY_OAUTH_CONFIG: { - clientIdEnv: "SENTRY_CLIENT_ID", - clientSecretEnv: "SENTRY_CLIENT_SECRET", - authorizeEndpoint: "https://sentry.io/oauth/authorize/", - tokenEndpoint: "https://sentry.io/oauth/token/", - scope: "event:read org:read project:read team:read", - callbackPath: "/api/oauth/callback/sentry", - }, - EXAMPLE_OAUTH_CONFIG: { - clientIdEnv: "EXAMPLE_CLIENT_ID", - clientSecretEnv: "EXAMPLE_CLIENT_SECRET", - authorizeEndpoint: "https://api.example.com/v1/oauth/authorize", - tokenEndpoint: "https://api.example.com/v1/oauth/token", - authorizeParams: { audience: "workspace" }, - tokenAuthMethod: "basic", - tokenExtraHeaders: { "Content-Type": "application/json" }, - callbackPath: "/api/oauth/callback/example", - }, - GITHUB_OAUTH_CONFIG: { - clientIdEnv: "GITHUB_APP_CLIENT_ID", - clientSecretEnv: "GITHUB_APP_CLIENT_SECRET", - authorizeEndpoint: "https://github.com/login/oauth/authorize", - tokenEndpoint: "https://github.com/login/oauth/access_token", - treatEmptyScopeAsUnreported: true, - callbackPath: "/api/oauth/callback/github", - }, - lookupSlackRequesterMock: vi.fn(), - resolvePluginOAuthAccountMock: vi.fn(), - waitUntilCallbacks: [] as Array<() => Promise | void>, -})); - -vi.mock("@/chat/plugins/registry", () => ({ - getPluginDisplayName: (provider: string) => { - if (provider === "sentry") { - return "Sentry"; - } - if (provider === "example") { - return "Example"; - } - if (provider === "github") { - return "GitHub"; - } - return undefined; - }, - getPluginOAuthConfig: (provider: string) => { - if (provider === "sentry") { - return SENTRY_OAUTH_CONFIG; - } - if (provider === "example") { - return EXAMPLE_OAUTH_CONFIG; - } - if (provider === "github") { - return GITHUB_OAUTH_CONFIG; - } - return undefined; - }, - isPluginProvider: (provider: string) => - provider === "sentry" || provider === "example" || provider === "github", - getPluginCapabilityProviders: () => [], - isPluginCapability: () => false, - isPluginConfigKey: () => false, - getPluginProviders: () => [], - getPluginSkillRoots: () => [], - createPluginBroker: () => { - throw new Error("not implemented in test"); - }, -})); - -vi.mock("@/chat/config", async (importOriginal) => { - const original = await importOriginal(); - const memoryConfig = original.readChatConfig({ - ...process.env, - JUNIOR_STATE_ADAPTER: "memory", - }); - return { - ...original, - botConfig: { - ...memoryConfig.bot, - userName: "junior", - }, - getChatConfig: () => memoryConfig, - }; -}); - -vi.mock("@/chat/slack/user", () => ({ - lookupSlackRequester: lookupSlackRequesterMock, -})); - -vi.mock("@/chat/plugins/credential-hooks", () => ({ - resolvePluginOAuthAccount: resolvePluginOAuthAccountMock, -})); - -import { createUserTokenStore } from "@/chat/capabilities/factory"; -import { disconnectStateAdapter, getStateAdapter } from "@/chat/state/adapter"; -import { GET } from "@/handlers/oauth-callback"; -import type { WaitUntilFn } from "@/handlers/types"; - -const ORIGINAL_ENV = { ...process.env }; - -const testWaitUntil: WaitUntilFn = (task) => { - waitUntilCallbacks.push(typeof task === "function" ? task : () => task); -}; - -beforeEach(async () => { - process.env.JUNIOR_STATE_ADAPTER = "memory"; - await disconnectStateAdapter(); - await getStateAdapter().connect(); - lookupSlackRequesterMock.mockReset(); - lookupSlackRequesterMock.mockResolvedValue({ - platform: "slack", - teamId: "T777", - userId: "U777", - userName: "requester", - }); - resolvePluginOAuthAccountMock.mockReset(); - resolvePluginOAuthAccountMock.mockResolvedValue(undefined); - waitUntilCallbacks.length = 0; -}); - -afterEach(async () => { - process.env = { ...ORIGINAL_ENV }; - vi.restoreAllMocks(); - await disconnectStateAdapter(); -}); - -function makeRequest(url: string): Request { - return new Request(url, { method: "GET" }); -} - -async function putStoredState(key: string, value: unknown): Promise { - await getStateAdapter().set(key, value); -} - -async function getStoredState(key: string): Promise { - return await getStateAdapter().get(key); -} - -async function getStoredTokens(userId: string, provider: string) { - return await createUserTokenStore().get(userId, provider); -} - -function configureSentryOAuthEnv() { - process.env.SENTRY_CLIENT_ID = "client-id"; - process.env.SENTRY_CLIENT_SECRET = "client-secret"; - process.env.JUNIOR_BASE_URL = BASE_URL; -} - -function configureExampleOAuthEnv() { - process.env.EXAMPLE_CLIENT_ID = "example-client-id"; - process.env.EXAMPLE_CLIENT_SECRET = "example-client-secret"; - process.env.JUNIOR_BASE_URL = BASE_URL; -} - -function configureGitHubOAuthEnv() { - process.env.GITHUB_APP_CLIENT_ID = "github-client-id"; - process.env.GITHUB_APP_CLIENT_SECRET = "github-client-secret"; - process.env.JUNIOR_BASE_URL = BASE_URL; -} - -type CapturedTokenRequest = { - body: string; - headers: Record; - method: string; - url: string; -}; - -async function captureTokenRequest( - request: Request, -): Promise { - return { - url: request.url, - method: request.method, - headers: Object.fromEntries(request.headers.entries()), - body: await request.text(), - }; -} - -const TOKEN_ENDPOINTS = [ - SENTRY_OAUTH_CONFIG.tokenEndpoint, - EXAMPLE_OAUTH_CONFIG.tokenEndpoint, - GITHUB_OAUTH_CONFIG.tokenEndpoint, -]; - -function mockJsonFetch( - payload: Record, -): CapturedTokenRequest[] { - const requests: CapturedTokenRequest[] = []; - mswServer.use( - ...TOKEN_ENDPOINTS.map((endpoint) => - http.post(endpoint, async ({ request }) => { - requests.push(await captureTokenRequest(request)); - return HttpResponse.json(payload); - }), - ), - ); - return requests; -} - -function mockFailedFetch(status: number): CapturedTokenRequest[] { - const requests: CapturedTokenRequest[] = []; - mswServer.use( - ...TOKEN_ENDPOINTS.map((endpoint) => - http.post(endpoint, async ({ request }) => { - requests.push(await captureTokenRequest(request)); - return HttpResponse.text("failed", { status }); - }), - ), - ); - return requests; -} - -function mockInvalidJsonFetch(): CapturedTokenRequest[] { - const requests: CapturedTokenRequest[] = []; - mswServer.use( - ...TOKEN_ENDPOINTS.map((endpoint) => - http.post(endpoint, async ({ request }) => { - requests.push(await captureTokenRequest(request)); - return HttpResponse.text("not-json", { - headers: { "Content-Type": "application/json" }, - }); - }), - ), - ); - return requests; -} - -describe("oauth callback handler", () => { - it("returns styled HTML 404 for unknown provider", async () => { - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/unknown?code=abc&state=xyz", - ), - "unknown", - testWaitUntil, - ); - - expect(response.status).toBe(404); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("Unknown provider"); - }); - - it("returns styled HTML 400 when code or state is missing", async () => { - const response = await GET( - makeRequest("https://example.com/api/oauth/callback/sentry"), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(400); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("missing required parameters"); - }); - - it("returns styled HTML 400 for expired state", async () => { - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?code=abc&state=nonexistent", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(400); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("expired"); - expect(body).toContain( - "ask Junior to connect your Sentry account again to get a new link", - ); - }); - - it("returns styled HTML 400 for provider mismatch", async () => { - const stateKey = "oauth-state:test-state-123"; - await putStoredState(stateKey, { - userId: "U123", - provider: "github", // mismatch with sentry - }); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?code=abc&state=test-state-123", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(400); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("mismatch"); - }); - - it("deletes state key after reading (one-time use)", async () => { - const stateKey = "oauth-state:test-state-456"; - await putStoredState(stateKey, { - userId: "U123", - provider: "sentry", - }); - - configureSentryOAuthEnv(); - mockJsonFetch({ - access_token: "new-access", - refresh_token: "new-refresh", - expires_in: 3600, - }); - - await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?code=auth-code&state=test-state-456", - ), - "sentry", - testWaitUntil, - ); - - expect(await getStoredState(stateKey)).toBeFalsy(); - }); - - it("returns styled HTML 500 when client credentials are missing", async () => { - const stateKey = "oauth-state:test-state-789"; - await putStoredState(stateKey, { - userId: "U123", - provider: "sentry", - }); - delete process.env.SENTRY_CLIENT_ID; - delete process.env.SENTRY_CLIENT_SECRET; - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?code=abc&state=test-state-789", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(500); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("credentials"); - }); - - it("exchanges code for tokens and stores them", async () => { - const stateKey = "oauth-state:exchange-test"; - await putStoredState(stateKey, { - userId: "U456", - provider: "sentry", - channelId: "C123", - threadTs: "123.456", - }); - - configureSentryOAuthEnv(); - mockJsonFetch({ - access_token: "new-access-token", - refresh_token: "new-refresh-token", - expires_in: 7200, - }); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?code=valid-code&state=exchange-test", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(200); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("Sentry account connected"); - - const stored = (await getStoredTokens("U456", "sentry")) as { - accessToken: string; - refreshToken: string; - scope?: string; - }; - expect(stored).toBeDefined(); - expect(stored.accessToken).toBe("new-access-token"); - expect(stored.refreshToken).toBe("new-refresh-token"); - expect(stored.scope).toBe("event:read org:read project:read team:read"); - }); - - it("uses basic auth and json body for token exchange without expires_in", async () => { - const stateKey = "oauth-state:example-exchange"; - await putStoredState(stateKey, { - userId: "U999", - provider: "example", - }); - - configureExampleOAuthEnv(); - const requests = mockJsonFetch({ - access_token: "example-access-token", - refresh_token: "example-refresh-token", - }); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/example?code=valid-code&state=example-exchange", - ), - "example", - testWaitUntil, - ); - - expect(response.status).toBe(200); - expect(requests).toHaveLength(1); - expect(requests[0]).toMatchObject({ - url: "https://api.example.com/v1/oauth/token", - method: "POST", - headers: expect.objectContaining({ - accept: "application/json", - authorization: `Basic ${Buffer.from("example-client-id:example-client-secret").toString("base64")}`, - "content-type": "application/json", - }), - }); - expect(JSON.parse(requests[0]!.body)).toEqual({ - grant_type: "authorization_code", - code: "valid-code", - redirect_uri: `${BASE_URL}/api/oauth/callback/example`, - }); - - const stored = (await getStoredTokens("U999", "example")) as { - accessToken: string; - refreshToken: string; - expiresAt?: number; - }; - expect(stored).toMatchObject({ - accessToken: "example-access-token", - refreshToken: "example-refresh-token", - }); - expect(stored.expiresAt).toBeUndefined(); - }); - - it("stores GitHub App user tokens when GitHub returns an empty OAuth scope", async () => { - const stateKey = "oauth-state:github-exchange"; - await putStoredState(stateKey, { - userId: "U777", - provider: "github", - }); - - configureGitHubOAuthEnv(); - resolvePluginOAuthAccountMock.mockResolvedValue({ - id: "12345", - label: "requester", - url: "https://github.com/requester", - }); - mockJsonFetch({ - access_token: "github-user-token", - refresh_token: "github-refresh-token", - expires_in: 28_800, - scope: "", - }); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/github?code=valid-code&state=github-exchange", - ), - "github", - testWaitUntil, - ); - - expect(response.status).toBe(200); - const stored = (await getStoredTokens("U777", "github")) as { - account?: { id: string; label?: string; url?: string }; - accessToken: string; - refreshToken: string; - scope?: string; - }; - expect(stored).toMatchObject({ - accessToken: "github-user-token", - account: { - id: "12345", - label: "requester", - url: "https://github.com/requester", - }, - refreshToken: "github-refresh-token", - }); - expect(stored.scope).toBeUndefined(); - expect(resolvePluginOAuthAccountMock).toHaveBeenCalledWith({ - provider: "github", - tokens: expect.objectContaining({ - accessToken: "github-user-token", - refreshToken: "github-refresh-token", - }), - }); - }); - - it("rejects callback grants whose explicit scope is missing required access", async () => { - const stateKey = "oauth-state:missing-scope"; - await putStoredState(stateKey, { - userId: "U456", - provider: "sentry", - channelId: "C123", - threadTs: "123.456", - }); - - configureSentryOAuthEnv(); - mockJsonFetch({ - access_token: "new-access-token", - refresh_token: "new-refresh-token", - expires_in: 7200, - scope: "event:read org:read project:read", - }); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?code=valid-code&state=missing-scope", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(400); - const body = await response.text(); - expect(body).toContain("did not grant the access Junior requires"); - expect(await getStoredTokens("U456", "sentry")).toBeUndefined(); - expect(waitUntilCallbacks).toHaveLength(0); - }); - - it("returns styled HTML 500 when token exchange fails", async () => { - const stateKey = "oauth-state:fail-exchange"; - await putStoredState(stateKey, { - userId: "U789", - provider: "sentry", - }); - - configureSentryOAuthEnv(); - mockFailedFetch(400); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?code=bad-code&state=fail-exchange", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(500); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("failed"); - }); - - it("returns styled HTML 500 when token exchange returns invalid JSON", async () => { - const stateKey = "oauth-state:invalid-token-json"; - await putStoredState(stateKey, { - userId: "U789", - provider: "sentry", - }); - - configureSentryOAuthEnv(); - mockInvalidJsonFetch(); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?code=bad-code&state=invalid-token-json", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(500); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("incomplete token response"); - expect(await getStoredTokens("U789", "sentry")).toBeUndefined(); - }); - - it("returns styled HTML 400 when user denies authorization", async () => { - const stateKey = "oauth-state:deny-test"; - await putStoredState(stateKey, { - userId: "U999", - provider: "sentry", - }); - - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?error=access_denied&state=deny-test", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(400); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("declined"); - expect(body).toContain( - "ask Junior to connect your Sentry account again if you change your mind", - ); - expect(body).not.toContain("auth command"); - expect(await getStoredState(stateKey)).toBeFalsy(); - }); - - it("returns styled HTML 400 for provider-returned errors", async () => { - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?error=server_error&state=some-state", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(400); - const body = await response.text(); - expect(body).toContain(""); - expect(body).toContain("server_error"); - }); - - it("escapes HTML in provider error parameter to prevent XSS", async () => { - const response = await GET( - makeRequest( - "https://example.com/api/oauth/callback/sentry?error=%3Cscript%3Ealert(1)%3C/script%3E&state=xss-test", - ), - "sentry", - testWaitUntil, - ); - - expect(response.status).toBe(400); - const body = await response.text(); - expect(body).not.toContain("