From e238f7fb83b7945c7cddd0d5cc7e26b2d11588e8 Mon Sep 17 00:00:00 2001
From: Colby McHenry <me@colbymchenry.com>
Date: Thu, 7 May 2026 21:33:16 -0500
Subject: [PATCH] feat(mcp): emit server-level instructions in initialize
 response

Adds a universal tool-selection playbook surfaced by MCP clients
(Claude Code, Cursor, opencode, LangChain, OpenAI Agent SDK) in the
agent's system prompt automatically. Without this, agents have to
infer tool composition from individual tool descriptions and tend to
walk callers manually instead of reaching for codegraph_impact, etc.

Scoped tight: only the 9 tools that exist on main today
(search/context/callers/callees/impact/node/explore/files/status), no
"(when present)" references to unmerged tools, no per-language
guidance. ~40 lines of useful guidance.

Salvaged from #121, which bundled the instructions with #117's MCP
tool-registry refactor and referenced many tools that don't exist on
main.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/mcp/index.ts               |  7 ++++-
 src/mcp/server-instructions.ts | 55 ++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 src/mcp/server-instructions.ts

diff --git a/src/mcp/index.ts b/src/mcp/index.ts
index bc3552ae..e516631a 100644
--- a/src/mcp/index.ts
+++ b/src/mcp/index.ts
@@ -19,6 +19,7 @@ import * as path from 'path';
 import CodeGraph, { findNearestCodeGraphRoot } from '../index';
 import { StdioTransport, JsonRpcRequest, JsonRpcNotification, ErrorCodes } from './transport';
 import { tools, ToolHandler } from './tools';
+import { SERVER_INSTRUCTIONS } from './server-instructions';
 
 /**
  * Convert a file:// URI to a filesystem path.
@@ -268,13 +269,17 @@ export class MCPServer {
     // Try to initialize the default project (non-fatal if it fails)
     await this.tryInitializeDefault(projectPath);
 
-    // We accept the client's protocol version but respond with our supported version
+    // We accept the client's protocol version but respond with our supported version.
+    // The `instructions` field is surfaced by MCP clients in the agent's system
+    // prompt automatically — it's the right place for the universal tool-selection
+    // playbook, ahead of individual tool descriptions.
     this.transport.sendResult(request.id, {
       protocolVersion: PROTOCOL_VERSION,
       capabilities: {
         tools: {},
       },
       serverInfo: SERVER_INFO,
+      instructions: SERVER_INSTRUCTIONS,
     });
   }
 
diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts
new file mode 100644
index 00000000..0c715ea8
--- /dev/null
+++ b/src/mcp/server-instructions.ts
@@ -0,0 +1,55 @@
+/**
+ * Server-level instructions emitted in the MCP `initialize` response.
+ *
+ * MCP clients (Claude Code, Cursor, opencode, LangChain, OpenAI Agent
+ * SDK, …) surface this text in the agent's system prompt automatically,
+ * giving the agent a high-level playbook for the codegraph toolset
+ * before it sees individual tool descriptions.
+ *
+ * Goals when editing this:
+ *   - Tool selection by intent (which tool for which question)
+ *   - Common chains (refactor planning = X then Y)
+ *   - Anti-patterns (don't grep when codegraph_search is faster)
+ *
+ * Keep it tight. The agent reads this every session — long instructions
+ * burn tokens. Reference only tools that exist on `main`; gate any
+ * conditional tools behind feature checks if/when they ship.
+ */
+export const SERVER_INSTRUCTIONS = `# Codegraph — code intelligence over an indexed knowledge graph
+
+Codegraph is a SQLite knowledge graph of every symbol, edge, and file
+in the workspace. Reads are sub-millisecond; the index lags writes by
+about a second through the file watcher. Consult it BEFORE writing or
+editing code, not during.
+
+## Tool selection by intent
+
+- **"What is the symbol named X?"** → \`codegraph_search\`
+- **"What's the deal with this task / feature / area?"** → \`codegraph_context\` (PRIMARY — composes search + node + callers + callees in one call)
+- **"What calls this?"** → \`codegraph_callers\`
+- **"What does this call?"** → \`codegraph_callees\`
+- **"What would changing this break?"** → \`codegraph_impact\`
+- **"Show me this symbol's source / signature / docstring."** → \`codegraph_node\`
+- **"Survey an unfamiliar topic / pattern / module."** → \`codegraph_explore\` (heavier; deep dive)
+- **"What's in directory X?"** → \`codegraph_files\`
+- **"Is the index ready / what's its size?"** → \`codegraph_status\`
+
+## Common chains
+
+- **Onboarding**: \`codegraph_context\` first. If still unclear, \`codegraph_explore\` for breadth, then \`codegraph_node\` on specific symbols.
+- **Refactor planning**: \`codegraph_search\` → \`codegraph_callers\` → \`codegraph_impact\`. The blast-radius answer comes from impact, not from walking callers manually.
+- **Debugging a regression**: \`codegraph_callers\` of the suspected symbol; widen with \`codegraph_impact\` if an unexpected call appears.
+
+## Anti-patterns
+
+- **Don't grep first** when looking up a symbol by name — \`codegraph_search\` is faster and returns kind + location + signature.
+- **Don't chain \`codegraph_search\` + \`codegraph_node\`** when you just want context — \`codegraph_context\` is one round-trip.
+- **Don't use \`codegraph_explore\` for narrow questions** — it's a multi-call deep dive, expensive in tokens. Save it for genuine "I'm new here" surveys.
+- **Don't query the index immediately after editing a file** — the watcher needs ~500ms to debounce + sync. Wait for the next turn.
+
+## Limitations
+
+- Index lags file writes by ~1 second.
+- Cross-file resolution is best-effort name matching; ambiguous calls may return multiple candidates.
+- No live correctness validation — that's still the TypeScript compiler / test suite / linter's job. Codegraph supplements those with structural context they don't have.
+`;