Skip to content

Commit a08d8db

Browse files
waleedlatif1claude
andcommitted
improvement(mcp): per-server tool queries + negative cache so one slow server can't block the workspace
Move MCP tool discovery off the workspace-aggregated `Promise.all` fan-out and onto per-server React Query keys, matching how Cursor and Claude Code render remote MCP. `useMcpToolsQuery` is now a `useQueries` combiner: each server has its own cache entry, its own loading state, and a slow neighbor never gates the others. Public shape stays compatible with existing consumers. Add a short-TTL negative cache: when `listTools` fails (timeout, connection error, etc.) we mark the server unhealthy for 30s so subsequent discovery calls short-circuit instead of re-paying the timeout. OAuth-required errors are exempt so re-auth retries immediately. Drop `LIST_TOOLS_TIMEOUT_MS` from 30s to 10s to bound the worst-case first failure. Invalidations are per-server where the action is per-server (OAuth popup, per-server SSE event, refresh, update, delete). Bulk operations stay workspace-broad. Adds tests for the negative-cache behavior and the OAuth exemption. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 952eb12 commit a08d8db

6 files changed

Lines changed: 291 additions & 33 deletions

File tree

apps/sim/hooks/mcp/use-mcp-oauth-popup.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,15 @@ export function useMcpOauthPopup({ workspaceId }: UseMcpOauthPopupProps) {
8080
}
8181
if (data.ok) {
8282
queryClient.invalidateQueries({ queryKey: mcpKeys.serversList(workspaceId) })
83-
queryClient.invalidateQueries({ queryKey: mcpKeys.toolsList(workspaceId) })
83+
if (data.serverId) {
84+
// Only the freshly-authorized server needs a refetch; every other
85+
// server's cached tools stay warm so a slow neighbor can't gate the UI.
86+
queryClient.invalidateQueries({
87+
queryKey: mcpKeys.serverToolsList(workspaceId, data.serverId),
88+
})
89+
} else {
90+
queryClient.invalidateQueries({ queryKey: mcpKeys.serverTools() })
91+
}
8492
queryClient.invalidateQueries({ queryKey: mcpKeys.storedToolsList(workspaceId) })
8593
toast.success('Server authorized')
8694
} else {

apps/sim/hooks/mcp/use-mcp-tools.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ export interface UseMcpToolsResult {
3939
export function useMcpTools(workspaceId: string): UseMcpToolsResult {
4040
const queryClient = useQueryClient()
4141

42-
const { data: mcpToolsData = [], isLoading, error: queryError } = useMcpToolsQuery(workspaceId)
42+
const { data: mcpToolsData, isLoading, error: queryError } = useMcpToolsQuery(workspaceId)
4343

4444
const mcpTools = useMemo<McpToolForUI[]>(() => {
4545
return mcpToolsData.map((tool) => ({
@@ -65,7 +65,7 @@ export function useMcpTools(workspaceId: string): UseMcpToolsResult {
6565
logger.info('Refreshing MCP tools', { forceRefresh, workspaceId })
6666

6767
await queryClient.invalidateQueries({
68-
queryKey: mcpKeys.toolsList(workspaceId),
68+
queryKey: mcpKeys.serverTools(),
6969
refetchType: forceRefresh ? 'active' : 'all',
7070
})
7171
},

apps/sim/hooks/queries/mcp.ts

Lines changed: 121 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1-
import { useEffect } from 'react'
1+
import { useEffect, useMemo } from 'react'
22
import { createLogger } from '@sim/logger'
33
import { getErrorMessage } from '@sim/utils/errors'
4-
import { keepPreviousData, useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
4+
import {
5+
keepPreviousData,
6+
useMutation,
7+
useQueries,
8+
useQuery,
9+
useQueryClient,
10+
} from '@tanstack/react-query'
511
import { ApiClientError } from '@/lib/api/client/errors'
612
import { requestJson } from '@/lib/api/client/request'
713
import {
@@ -41,6 +47,9 @@ export const mcpKeys = {
4147
serversList: (workspaceId?: string) => [...mcpKeys.servers(), workspaceId ?? ''] as const,
4248
tools: () => [...mcpKeys.all, 'tools'] as const,
4349
toolsList: (workspaceId?: string) => [...mcpKeys.tools(), workspaceId ?? ''] as const,
50+
serverTools: () => [...mcpKeys.all, 'serverTools'] as const,
51+
serverToolsList: (workspaceId?: string, serverId?: string) =>
52+
[...mcpKeys.serverTools(), workspaceId ?? '', serverId ?? ''] as const,
4453
storedTools: () => [...mcpKeys.all, 'storedTools'] as const,
4554
storedToolsList: (workspaceId?: string) => [...mcpKeys.storedTools(), workspaceId ?? ''] as const,
4655
allowedDomains: () => [...mcpKeys.all, 'allowedDomains'] as const,
@@ -92,11 +101,16 @@ export function useMcpServers(workspaceId: string) {
92101
async function fetchMcpTools(
93102
workspaceId: string,
94103
forceRefresh = false,
95-
signal?: AbortSignal
104+
signal?: AbortSignal,
105+
serverId?: string
96106
): Promise<McpTool[]> {
97107
try {
98108
const data = await requestJson(discoverMcpToolsContract, {
99-
query: { workspaceId, refresh: forceRefresh || undefined },
109+
query: {
110+
workspaceId,
111+
refresh: forceRefresh || undefined,
112+
...(serverId ? { serverId } : {}),
113+
},
100114
signal,
101115
})
102116
return data.data.tools
@@ -108,24 +122,93 @@ async function fetchMcpTools(
108122
}
109123
}
110124

111-
export function useMcpToolsQuery(workspaceId: string) {
125+
/**
126+
* Per-server tools query. Each server has its own React Query entry, so a slow
127+
* or hung server can never block another server's load — same model Cursor and
128+
* Claude Code use for remote MCP.
129+
*/
130+
export function useMcpServerTools(workspaceId: string, serverId?: string) {
112131
return useQuery({
113-
queryKey: mcpKeys.toolsList(workspaceId),
114-
queryFn: ({ signal }) => fetchMcpTools(workspaceId, false, signal),
115-
enabled: !!workspaceId,
132+
queryKey: mcpKeys.serverToolsList(workspaceId, serverId),
133+
queryFn: ({ signal }) => fetchMcpTools(workspaceId, false, signal, serverId),
134+
enabled: !!workspaceId && !!serverId,
116135
retry: false,
117136
staleTime: 30 * 1000,
118-
placeholderData: keepPreviousData,
119137
})
120138
}
121139

140+
/**
141+
* Workspace-level aggregate, derived from N parallel per-server queries via
142+
* `useQueries`. Public shape stays compatible with the prior workspace-keyed
143+
* query (flat `McpTool[]`, single `isLoading`, single error) so existing
144+
* consumers don't change. A slow server only flips its own per-server state;
145+
* fast servers populate immediately.
146+
*/
147+
export function useMcpToolsQuery(workspaceId: string) {
148+
const { data: servers } = useMcpServers(workspaceId)
149+
150+
const serverIds = useMemo(() => (servers ? servers.map((s) => s.id).sort() : []), [servers])
151+
152+
const results = useQueries({
153+
queries: serverIds.map((serverId) => ({
154+
queryKey: mcpKeys.serverToolsList(workspaceId, serverId),
155+
queryFn: ({ signal }: { signal?: AbortSignal }) =>
156+
fetchMcpTools(workspaceId, false, signal, serverId),
157+
enabled: !!workspaceId,
158+
retry: false,
159+
staleTime: 30 * 1000,
160+
})),
161+
})
162+
163+
return useMemo(() => {
164+
const tools: McpTool[] = []
165+
let hasData = false
166+
let isLoading = false
167+
let firstError: Error | null = null
168+
for (const result of results) {
169+
if (result.data) {
170+
tools.push(...result.data)
171+
hasData = true
172+
}
173+
if (result.isLoading) isLoading = true
174+
if (!firstError && result.error instanceof Error) firstError = result.error
175+
}
176+
return {
177+
data: tools,
178+
// Match the prior semantics: "loading" means we have nothing to show yet.
179+
// Once any server has returned, the UI can render that and let slow
180+
// neighbors fill in incrementally without keeping the spinner up.
181+
isLoading: isLoading && !hasData,
182+
isFetching: results.some((r) => r.isFetching),
183+
error: firstError,
184+
perServer: results,
185+
}
186+
}, [results])
187+
}
188+
122189
export function useForceRefreshMcpTools() {
123190
const queryClient = useQueryClient()
124191

125192
return useMutation({
126-
mutationFn: (workspaceId: string) => fetchMcpTools(workspaceId, true),
193+
mutationFn: async (workspaceId: string) => {
194+
// Fetch each server's tools in parallel with `refresh=true`, populating
195+
// the per-server query cache directly. A slow server only blocks its own
196+
// card — fast servers light up as soon as they return.
197+
const servers = queryClient.getQueryData<McpServer[]>(mcpKeys.serversList(workspaceId)) ?? []
198+
const results = await Promise.allSettled(
199+
servers.map(async (server) => {
200+
const tools = await fetchMcpTools(workspaceId, true, undefined, server.id)
201+
queryClient.setQueryData(mcpKeys.serverToolsList(workspaceId, server.id), tools)
202+
return tools
203+
})
204+
)
205+
return results
206+
.filter((r): r is PromiseFulfilledResult<McpTool[]> => r.status === 'fulfilled')
207+
.flatMap((r) => r.value)
208+
},
127209
onSettled: (_data, _error, workspaceId) => {
128-
queryClient.invalidateQueries({ queryKey: mcpKeys.toolsList(workspaceId) })
210+
// Per-server caches were already populated inside `mutationFn` via
211+
// `setQueryData`, so we only need to refresh the dependent views.
129212
queryClient.invalidateQueries({ queryKey: mcpKeys.serversList(workspaceId) })
130213
queryClient.invalidateQueries({ queryKey: mcpKeys.storedToolsList(workspaceId) })
131214
},
@@ -175,7 +258,7 @@ export function useCreateMcpServer() {
175258
},
176259
onSettled: (_data, _error, variables) => {
177260
queryClient.invalidateQueries({ queryKey: mcpKeys.serversList(variables.workspaceId) })
178-
queryClient.invalidateQueries({ queryKey: mcpKeys.toolsList(variables.workspaceId) })
261+
queryClient.invalidateQueries({ queryKey: mcpKeys.serverTools() })
179262
},
180263
})
181264
}
@@ -237,7 +320,9 @@ export function useDeleteMcpServer() {
237320
},
238321
onSettled: (_data, _error, variables) => {
239322
queryClient.invalidateQueries({ queryKey: mcpKeys.serversList(variables.workspaceId) })
240-
queryClient.invalidateQueries({ queryKey: mcpKeys.toolsList(variables.workspaceId) })
323+
queryClient.removeQueries({
324+
queryKey: mcpKeys.serverToolsList(variables.workspaceId, variables.serverId),
325+
})
241326
queryClient.invalidateQueries({ queryKey: mcpKeys.storedToolsList(variables.workspaceId) })
242327
},
243328
})
@@ -304,7 +389,9 @@ export function useUpdateMcpServer() {
304389
},
305390
onSettled: (_data, _error, variables) => {
306391
queryClient.invalidateQueries({ queryKey: mcpKeys.serversList(variables.workspaceId) })
307-
queryClient.invalidateQueries({ queryKey: mcpKeys.toolsList(variables.workspaceId) })
392+
queryClient.invalidateQueries({
393+
queryKey: mcpKeys.serverToolsList(variables.workspaceId, variables.serverId),
394+
})
308395
},
309396
})
310397
}
@@ -334,7 +421,9 @@ export function useRefreshMcpServer() {
334421
},
335422
onSettled: (_data, _error, variables) => {
336423
queryClient.invalidateQueries({ queryKey: mcpKeys.serversList(variables.workspaceId) })
337-
queryClient.invalidateQueries({ queryKey: mcpKeys.toolsList(variables.workspaceId) })
424+
queryClient.invalidateQueries({
425+
queryKey: mcpKeys.serverToolsList(variables.workspaceId, variables.serverId),
426+
})
338427
queryClient.invalidateQueries({ queryKey: mcpKeys.storedToolsList(variables.workspaceId) })
339428
},
340429
})
@@ -386,8 +475,14 @@ export function useMcpToolsEvents(workspaceId: string) {
386475
useEffect(() => {
387476
if (!workspaceId) return
388477

389-
const invalidate = () => {
390-
queryClient.invalidateQueries({ queryKey: mcpKeys.toolsList(workspaceId) })
478+
const invalidate = (serverId?: string) => {
479+
if (serverId) {
480+
queryClient.invalidateQueries({
481+
queryKey: mcpKeys.serverToolsList(workspaceId, serverId),
482+
})
483+
} else {
484+
queryClient.invalidateQueries({ queryKey: mcpKeys.serverTools() })
485+
}
391486
queryClient.invalidateQueries({ queryKey: mcpKeys.serversList(workspaceId) })
392487
queryClient.invalidateQueries({ queryKey: mcpKeys.storedToolsList(workspaceId) })
393488
queryClient.invalidateQueries({ queryKey: workflowMcpServerKeys.all })
@@ -398,8 +493,15 @@ export function useMcpToolsEvents(workspaceId: string) {
398493
if (!entry) {
399494
const source = new EventSource(`/api/mcp/events?workspaceId=${workspaceId}`)
400495

401-
source.addEventListener('tools_changed', () => {
402-
invalidate()
496+
source.addEventListener('tools_changed', (e) => {
497+
let serverId: string | undefined
498+
try {
499+
const parsed = JSON.parse((e as MessageEvent).data) as { serverId?: string }
500+
serverId = parsed.serverId
501+
} catch {
502+
// Older event payload or non-JSON — fall back to workspace-wide invalidation.
503+
}
504+
invalidate(serverId)
403505
})
404506

405507
source.onerror = () => {

apps/sim/lib/mcp/service.test.ts

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ function tool(name: string, serverId: string) {
124124
describe('McpService.discoverTools per-server caching', () => {
125125
beforeEach(async () => {
126126
vi.clearAllMocks()
127+
// `clearAllMocks` does not drain `.mockResolvedValueOnce` queues; reset
128+
// listTools so a previous test's unconsumed mock doesn't leak into the next.
129+
mockListTools.mockReset()
127130
mockIsDomainAllowed.mockReturnValue(true)
128131
mockValidateSsrf.mockResolvedValue('1.2.3.4')
129132
mockValidateDomain.mockImplementation(() => undefined)
@@ -162,11 +165,14 @@ describe('McpService.discoverTools per-server caching', () => {
162165
expect(first.map((t) => t.name)).toEqual(['a1'])
163166

164167
mockListTools.mockClear()
165-
mockListTools.mockResolvedValueOnce([tool('b1', 'mcp-b')])
166168

169+
// a1's positive cache is intact (the failure didn't poison it). b is now
170+
// negative-cached so it's skipped instead of re-blocking — see
171+
// "negative-caches a failed server so the next discoverTools skips it"
172+
// below for the full assertion.
167173
const second = await mcpService.discoverTools(USER_ID, WORKSPACE_ID)
168-
expect(second.map((t) => t.name).sort()).toEqual(['a1', 'b1'])
169-
expect(mockListTools).toHaveBeenCalledTimes(1)
174+
expect(second.map((t) => t.name)).toEqual(['a1'])
175+
expect(mockListTools).not.toHaveBeenCalled()
170176
})
171177

172178
it("forceRefresh bypasses every server's cache", async () => {
@@ -259,4 +265,60 @@ describe('McpService.discoverTools per-server caching', () => {
259265
expect(second.map((t) => t.name)).toEqual(['a1'])
260266
expect(mockListTools).not.toHaveBeenCalled()
261267
})
268+
269+
it('negative-caches a failed server so the next discoverTools skips it', async () => {
270+
mockGetWorkspaceServersRows.mockResolvedValue([dbRow('mcp-a', 'A'), dbRow('mcp-b', 'B')])
271+
mockListTools
272+
.mockResolvedValueOnce([tool('a1', 'mcp-a')])
273+
.mockRejectedValueOnce(new Error('Request timed out'))
274+
275+
await mcpService.discoverTools(USER_ID, WORKSPACE_ID)
276+
expect(mockListTools).toHaveBeenCalledTimes(2)
277+
278+
mockListTools.mockClear()
279+
// Second call: a1 is success-cached, b is failure-cached. Neither should
280+
// hit the live transport — the slow server no longer blocks the response.
281+
const second = await mcpService.discoverTools(USER_ID, WORKSPACE_ID)
282+
expect(second.map((t) => t.name)).toEqual(['a1'])
283+
expect(mockListTools).not.toHaveBeenCalled()
284+
})
285+
286+
it('successful discoverServerTools clears the negative cache', async () => {
287+
mockGetWorkspaceServersRows.mockResolvedValue([dbRow('mcp-a', 'A')])
288+
mockListTools.mockRejectedValueOnce(new Error('Request timed out'))
289+
290+
await expect(mcpService.discoverServerTools(USER_ID, 'mcp-a', WORKSPACE_ID)).rejects.toThrow(
291+
'Request timed out'
292+
)
293+
294+
// Following the failure, the negative cache would short-circuit discoverTools.
295+
// Reconnecting the server (e.g. after OAuth) should bring it back to live.
296+
mockListTools.mockClear()
297+
mockListTools.mockResolvedValueOnce([tool('a1', 'mcp-a')])
298+
const tools = await mcpService.discoverServerTools(USER_ID, 'mcp-a', WORKSPACE_ID)
299+
expect(tools.map((t) => t.name)).toEqual(['a1'])
300+
301+
// discoverTools now sees the cleared negative cache + primed positive cache.
302+
mockListTools.mockClear()
303+
const after = await mcpService.discoverTools(USER_ID, WORKSPACE_ID)
304+
expect(after.map((t) => t.name)).toEqual(['a1'])
305+
expect(mockListTools).not.toHaveBeenCalled()
306+
})
307+
308+
it('does not negative-cache OAuth-required errors', async () => {
309+
mockGetWorkspaceServersRows.mockResolvedValue([dbRow('mcp-a', 'A')])
310+
mockListTools.mockRejectedValueOnce(new McpOauthAuthorizationRequiredError('mcp-a', 'A'))
311+
312+
await mcpService.discoverTools(USER_ID, WORKSPACE_ID)
313+
expect(mockListTools).toHaveBeenCalledTimes(1)
314+
315+
// Second call must still attempt the live transport — OAuth re-auth has
316+
// its own pathway and a stale negative cache would make reconnects
317+
// silently fail until the TTL expired.
318+
mockListTools.mockClear()
319+
mockListTools.mockResolvedValueOnce([tool('a1', 'mcp-a')])
320+
const after = await mcpService.discoverTools(USER_ID, WORKSPACE_ID)
321+
expect(after.map((t) => t.name)).toEqual(['a1'])
322+
expect(mockListTools).toHaveBeenCalledTimes(1)
323+
})
262324
})

0 commit comments

Comments
 (0)