From dff6b4497170bfaf0d6bfe148ee3460d6f39a3ca Mon Sep 17 00:00:00 2001 From: grandmaster451 Date: Tue, 16 Jun 2026 08:22:30 +0300 Subject: [PATCH 1/2] fix(mcp): sanitize tool input schemas into Moonshot Flavored JSON Schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MCP servers advertise tool input schemas as standard JSON Schema, which permits properties that omit the `type` keyword and freely uses combinators (`anyOf`, `oneOf`, `allOf`) and `` indirection. Moonshot's API validator is stricter — every property must carry an explicit `type`, and unresolved `` pointers are rejected. Without sanitization the API returns HTTP 400: tools.function.parameters is not a valid moonshot flavored json schema This adds a sanitization layer (ported from the Python kimi-cli's kosong/utils/jsonschema.py) that: 1. Resolves local `` pointers and strips definition buckets 2. Fills in missing `type` on every property schema — inferred from enum/const values, structural keywords, or defaulting to "string" Combinator branches (anyOf/oneOf/allOf/not/if/then/else/$ref) are left alone since they legitimately describe shape without type. Closes #792 --- .changeset/mfjs-schema-sanitize.md | 5 + .../agent-core/src/mcp/connection-manager.ts | 14 +- packages/agent-core/src/mcp/index.ts | 1 + .../agent-core/src/mcp/schema-sanitize.ts | 266 ++++++++++++++ .../test/mcp/schema-sanitize.test.ts | 324 ++++++++++++++++++ 5 files changed, 605 insertions(+), 5 deletions(-) create mode 100644 .changeset/mfjs-schema-sanitize.md create mode 100644 packages/agent-core/src/mcp/schema-sanitize.ts create mode 100644 packages/agent-core/test/mcp/schema-sanitize.test.ts diff --git a/.changeset/mfjs-schema-sanitize.md b/.changeset/mfjs-schema-sanitize.md new file mode 100644 index 000000000..0ad729226 --- /dev/null +++ b/.changeset/mfjs-schema-sanitize.md @@ -0,0 +1,5 @@ +--- +'@moonshot-ai/kimi-code': patch +--- + +Fix MCP tool schema rejection by sanitizing standard JSON Schema into Moonshot Flavored JSON Schema before sending to the API. Resolves #792. diff --git a/packages/agent-core/src/mcp/connection-manager.ts b/packages/agent-core/src/mcp/connection-manager.ts index 7d3c9c1f3..291a4932c 100644 --- a/packages/agent-core/src/mcp/connection-manager.ts +++ b/packages/agent-core/src/mcp/connection-manager.ts @@ -11,6 +11,7 @@ import { SseMcpClient } from './client-sse'; import type { UnexpectedCloseReason } from './client-shared'; import { StdioMcpClient } from './client-stdio'; import type { McpOAuthService } from './oauth'; +import { sanitizeMcpSchema } from './schema-sanitize'; import { assertMcpInputSchema, type MCPClient } from './types'; export type McpServerStatus = 'pending' | 'connected' | 'failed' | 'disabled' | 'needs-auth'; @@ -378,11 +379,14 @@ export class McpConnectionManager { private async connectAndDiscoverTools(client: RuntimeMcpClient): Promise { await client.connect(); const mcpTools = await client.listTools(); - return mcpTools.map((mcpTool) => ({ - name: mcpTool.name, - description: mcpTool.description, - parameters: assertMcpInputSchema(mcpTool.name, mcpTool.inputSchema), - })); + return mcpTools.map((mcpTool) => { + const validated = assertMcpInputSchema(mcpTool.name, mcpTool.inputSchema); + return { + name: mcpTool.name, + description: mcpTool.description, + parameters: sanitizeMcpSchema(validated), + }; + }); } private async closeClient(entry: InternalEntry): Promise { diff --git a/packages/agent-core/src/mcp/index.ts b/packages/agent-core/src/mcp/index.ts index d0fef23f6..e6d835c8b 100644 --- a/packages/agent-core/src/mcp/index.ts +++ b/packages/agent-core/src/mcp/index.ts @@ -1,5 +1,6 @@ export * from './connection-manager'; export * from './oauth'; +export * from './schema-sanitize'; export * from './session-config'; export * from './tool-naming'; export * from './types'; diff --git a/packages/agent-core/src/mcp/schema-sanitize.ts b/packages/agent-core/src/mcp/schema-sanitize.ts new file mode 100644 index 000000000..58206e239 --- /dev/null +++ b/packages/agent-core/src/mcp/schema-sanitize.ts @@ -0,0 +1,266 @@ +/** + * Sanitize standard JSON Schemas emitted by MCP servers into the stricter + * "Moonshot Flavored JSON Schema" (MFJS) the Kimi API validator expects. + * + * ## Background + * + * MCP servers advertise tool input schemas as standard JSON Schema objects. + * Standard JSON Schema permits property schemas that omit the `type` keyword + * (e.g. `{"enum": ["a", "b"]}`) and freely uses combinators (`anyOf`, + * `oneOf`, `allOf`) and `$ref` indirection. Most LLM providers (OpenAI, + * Anthropic) accept these without issue. + * + * Moonshot's validator is stricter: every property must carry an explicit + * `type`, and `$ref` pointers must be resolved inline. Without sanitization + * the API returns HTTP 400: + * + * > tools.function.parameters is not a valid moonshot flavored json schema, + * > details: + * + * This module is a TypeScript port of the original kosong interceptor that + * shipped in the Python-based kimi-cli (`kosong/utils/jsonschema.py`). + * + * ## What it does + * + * 1. **Dereferences local `$ref`** entries (`#/$defs/...`) so the resolved + * schema contains no indirection, then strips the definition buckets. + * 2. **Fills in missing `type`** on every property schema — inferred from + * `enum`/`const` values, from structural keywords (`properties` → + * `"object"`, `items` → `"array"`, etc.), or defaulting to `"string"`. + * + * Combinator branches (`anyOf`/`oneOf`/`allOf`/`$ref`/`not`/`if`/`then`/ + * `else`) are left alone because they legitimately describe shape without + * `type`. + */ + +type Json = string | number | boolean | null | Json[] | { [key: string]: Json }; +type JsonRecord = Record; + +/** + * JSON Schema keywords that describe a property's shape without (or in + * addition to) a `type` keyword. When any of these are present we skip the + * type-filling step so we don't distort the schema's meaning. + */ +const COMBINATOR_KEYS = [ + 'anyOf', + 'oneOf', + 'allOf', + 'not', + 'if', + 'then', + 'else', + '$ref', +] as const; + +const OBJECT_KEYWORDS = [ + 'properties', + 'additionalProperties', + 'patternProperties', + 'propertyNames', + 'required', + 'minProperties', + 'maxProperties', +] as const; + +const ARRAY_KEYWORDS = [ + 'items', + 'prefixItems', + 'minItems', + 'maxItems', + 'uniqueItems', + 'contains', +] as const; + +const STRING_KEYWORDS = ['minLength', 'maxLength', 'pattern', 'format'] as const; + +const NUMERIC_KEYWORDS = [ + 'minimum', + 'maximum', + 'multipleOf', + 'exclusiveMinimum', + 'exclusiveMaximum', +] as const; + +/** + * Resolve local `$ref` entries inside a JSON Schema, then return a deep copy + * with every reference inlined and the definition buckets removed. + * + * Only local references (those starting with `#`) are resolved; remote + * references (e.g. `https://...`) are left untouched. + * + * @throws if a local `$ref` cannot be resolved or resolves to a non-object. + */ +function derefJsonSchema(schema: JsonRecord): JsonRecord { + const root = structuredClone(schema); + + function resolvePointer(pointer: string): Json { + const parts = pointer.replace(/^#\/?/, '').split('/'); + let current: Json = root; + for (const part of parts) { + if (typeof current !== 'object' || current === null || Array.isArray(current)) { + throw new Error(`Unable to resolve reference path: ${pointer}`); + } + current = (current as JsonRecord)[part] ?? null; + if (current === undefined) { + throw new Error(`Unable to resolve reference path: ${pointer}`); + } + } + return current; + } + + function traverse(node: Json): Json { + if (Array.isArray(node)) { + return node.map(traverse); + } + if (typeof node !== 'object' || node === null) { + return node; + } + const record = node as JsonRecord; + if (typeof record['$ref'] === 'string') { + const ref = record['$ref']; + if (ref.startsWith('#')) { + const target = traverse(resolvePointer(ref)); + if (typeof target !== 'object' || target === null || Array.isArray(target)) { + throw new Error('Local $ref must resolve to a JSON object'); + } + const { $ref: _, ...rest } = record; + return { ...(target as JsonRecord), ...rest }; + } + // Remote reference — leave as-is. + return record; + } + const result: JsonRecord = {}; + for (const [key, value] of Object.entries(record)) { + result[key] = traverse(value); + } + return result; + } + + const resolved = traverse(root) as JsonRecord; + delete resolved['$defs']; + delete resolved['definitions']; + return resolved; +} + +/** + * Walk into every property-schema position under `node` and ensure each + * declares a `type`. Mutates the node in place (the caller should pass a + * deep clone). + * + * Property-schema positions are: values under `properties`, entries in + * `items` (object or array form), `additionalProperties` (object form), and + * branches of `anyOf`/`oneOf`/`allOf`. + * + * `node` itself is treated as a container and is not normalized — only the + * property schemas it contains are. + */ +function recurseSchema(node: Json): void { + if (typeof node !== 'object' || node === null || Array.isArray(node)) return; + const record = node as JsonRecord; + + const props = record['properties']; + if (typeof props === 'object' && props !== null && !Array.isArray(props)) { + for (const value of Object.values(props as JsonRecord)) { + normalizeProperty(value); + } + } + + const items = record['items']; + if (typeof items === 'object' && items !== null) { + if (Array.isArray(items)) { + for (const value of items) normalizeProperty(value); + } else { + normalizeProperty(items); + } + } + + const additional = record['additionalProperties']; + if (typeof additional === 'object' && additional !== null && !Array.isArray(additional)) { + normalizeProperty(additional); + } + + for (const key of ['anyOf', 'oneOf', 'allOf'] as const) { + const branches = record[key]; + if (Array.isArray(branches)) { + for (const value of branches) normalizeProperty(value); + } + } +} + +/** + * Ensure `node` (a property schema) declares a `type`, then recurse into it. + */ +function normalizeProperty(node: Json): void { + if (typeof node !== 'object' || node === null || Array.isArray(node)) return; + const record = node as JsonRecord; + + if (!('type' in record) && !COMBINATOR_KEYS.some((key) => key in record)) { + const enumValues = record['enum']; + if (Array.isArray(enumValues) && enumValues.length > 0) { + record['type'] = inferTypeFromValues(enumValues); + } else if ('const' in record) { + record['type'] = inferTypeFromValues([record['const']]); + } else { + record['type'] = inferTypeFromStructure(record); + } + } + + recurseSchema(record); +} + +/** + * Infer a JSON Schema `type` from structural keywords present on `node`. + * + * Falls back to `"string"` only when the node carries no structural hints. + */ +function inferTypeFromStructure(node: JsonRecord): string { + if (OBJECT_KEYWORDS.some((k) => k in node)) return 'object'; + if (ARRAY_KEYWORDS.some((k) => k in node)) return 'array'; + if (STRING_KEYWORDS.some((k) => k in node)) return 'string'; + if (NUMERIC_KEYWORDS.some((k) => k in node)) return 'number'; + return 'string'; +} + +/** + * Infer a JSON Schema `type` string from a list of concrete values. + * + * - Single type → return it. + * - `{integer, number}` → `"number"` (integer is a subset of number). + * - Mixed → `"string"`. + */ +function inferTypeFromValues(values: Json[]): string { + const inferred = new Set(); + for (const value of values) { + if (typeof value === 'boolean') inferred.add('boolean'); + else if (typeof value === 'number') { + inferred.add(Number.isInteger(value) ? 'integer' : 'number'); + } else if (typeof value === 'string') inferred.add('string'); + else if (value === null) inferred.add('null'); + else if (Array.isArray(value)) inferred.add('array'); + else if (typeof value === 'object') inferred.add('object'); + else return 'string'; + } + if (inferred.size === 1) return [...inferred][0]!; + if (inferred.size === 2 && inferred.has('integer') && inferred.has('number')) return 'number'; + return 'string'; +} + +/** + * Sanitize a standard JSON Schema (as emitted by MCP servers) into + * Moonshot Flavored JSON Schema: resolve local `$ref` pointers and fill in + * missing `type` declarations on every property. + * + * Returns a **new** object; the input is never mutated. Non-object inputs + * are returned unchanged so callers can use this as an identity pass-through + * for edge cases (MCP servers occasionally emit `true` or `false` as a + * schema). + */ +export function sanitizeMcpSchema(schema: unknown): Record { + if (typeof schema !== 'object' || schema === null || Array.isArray(schema)) { + return schema as Record; + } + const dereffed = derefJsonSchema(schema as JsonRecord); + const cloned = structuredClone(dereffed); + recurseSchema(cloned); + return cloned; +} diff --git a/packages/agent-core/test/mcp/schema-sanitize.test.ts b/packages/agent-core/test/mcp/schema-sanitize.test.ts new file mode 100644 index 000000000..a185308f6 --- /dev/null +++ b/packages/agent-core/test/mcp/schema-sanitize.test.ts @@ -0,0 +1,324 @@ +import { describe, expect, it } from 'vitest'; + +import { sanitizeMcpSchema } from '../../src/mcp/schema-sanitize'; + +type Schema = Record; + +function props(result: Schema): Record { + return result['properties'] as Record; +} + +function prop(result: Schema, name: string): Schema { + return props(result)[name]!; +} + +describe('sanitizeMcpSchema — non-object inputs', () => { + it('returns null unchanged', () => { + expect(sanitizeMcpSchema(null)).toBe(null); + }); + + it('returns arrays unchanged (invalid schema, but identity)', () => { + expect(sanitizeMcpSchema([1, 2, 3])).toEqual([1, 2, 3]); + }); + + it('returns primitives unchanged', () => { + expect(sanitizeMcpSchema('string')).toBe('string'); + expect(sanitizeMcpSchema(42)).toBe(42); + expect(sanitizeMcpSchema(true)).toBe(true); + }); +}); + +describe('sanitizeMcpSchema — missing type filling', () => { + it('fills in "string" when no type and no structural hints', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + name: { description: 'User name' }, + }, + }); + expect(prop(result, 'name')['type']).toBe('string'); + }); + + it('infers type from enum values', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + mode: { enum: ['fast', 'slow'] }, + count: { enum: [1, 2, 3] }, + enabled: { enum: [true, false] }, + mixed: { enum: [1, 'a'] }, + }, + }); + expect(prop(result, 'mode')['type']).toBe('string'); + expect(prop(result, 'count')['type']).toBe('integer'); + expect(prop(result, 'enabled')['type']).toBe('boolean'); + expect(prop(result, 'mixed')['type']).toBe('string'); + }); + + it('infers type from const value', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + kind: { const: 'user' }, + timeout: { const: 30 }, + }, + }); + expect(prop(result, 'kind')['type']).toBe('string'); + expect(prop(result, 'timeout')['type']).toBe('integer'); + }); + + it('infers type from structural keywords', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + objProp: { properties: { a: {} } }, + arrProp: { items: { type: 'string' } }, + strProp: { pattern: '^\\w+$' }, + numProp: { minimum: 0 }, + }, + }); + expect(prop(result, 'objProp')['type']).toBe('object'); + expect(prop(result, 'arrProp')['type']).toBe('array'); + expect(prop(result, 'strProp')['type']).toBe('string'); + expect(prop(result, 'numProp')['type']).toBe('number'); + }); + + it('does not add type when a combinator key is present', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + any: { anyOf: [{ type: 'string' }, { type: 'null' }] }, + one: { oneOf: [{ type: 'string' }, { type: 'number' }] }, + all: { allOf: [{ type: 'string' }] }, + not: { not: { type: 'string' } }, + }, + }); + expect(prop(result, 'any')['type']).toBeUndefined(); + expect(prop(result, 'one')['type']).toBeUndefined(); + expect(prop(result, 'all')['type']).toBeUndefined(); + expect(prop(result, 'not')['type']).toBeUndefined(); + }); + + it('does not overwrite an existing type', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + name: { type: 'string', enum: ['a', 'b'] }, + }, + }); + expect(prop(result, 'name')['type']).toBe('string'); + }); +}); + +describe('sanitizeMcpSchema — $ref dereferencing', () => { + it('inlines local $ref pointers and strips $defs', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + user: { $ref: '#/$defs/User' }, + }, + $defs: { + User: { + type: 'object', + properties: { + name: {}, + age: {}, + }, + }, + }, + }); + + expect('$defs' in result).toBe(false); + const userProp = prop(result, 'user'); + expect(userProp['type']).toBe('object'); + const userProps = props(userProp); + expect(userProps['name']!['type']).toBe('string'); + expect(userProps['age']!['type']).toBe('string'); + }); + + it('preserves sibling keys alongside $ref', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + item: { $ref: '#/$defs/Item', description: 'overridden' }, + }, + $defs: { + Item: { type: 'string' }, + }, + }); + const itemProp = prop(result, 'item'); + expect(itemProp['type']).toBe('string'); + expect(itemProp['description']).toBe('overridden'); + }); + + it('handles nested $ref chains', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + a: { $ref: '#/$defs/A' }, + }, + $defs: { + A: { $ref: '#/$defs/B' }, + B: { type: 'integer' }, + }, + }); + expect(prop(result, 'a')['type']).toBe('integer'); + }); + + it('leaves remote $ref untouched', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + remote: { $ref: 'https://example.com/schema.json' }, + }, + }); + expect(prop(result, 'remote')['$ref']).toBe('https://example.com/schema.json'); + }); + + it('strips legacy "definitions" bucket', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: {}, + definitions: { Foo: { type: 'string' } }, + }); + expect('definitions' in result).toBe(false); + }); + + it('throws on unresolvable local $ref', () => { + expect(() => + sanitizeMcpSchema({ + type: 'object', + properties: { + broken: { $ref: '#/$defs/Missing' }, + }, + }), + ).toThrow(/Unable to resolve reference path/); + }); +}); + +describe('sanitizeMcpSchema — nested arrays and items', () => { + it('fills type on array items (object form)', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + tags: { type: 'array', items: { description: 'a tag' } }, + }, + }); + const tagsProp = prop(result, 'tags'); + expect((tagsProp['items'] as Schema)['type']).toBe('string'); + }); + + it('fills type on array items (array form / tuple)', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + pair: { type: 'array', items: [{ description: 'first' }, { type: 'number' }] }, + }, + }); + const pairProp = prop(result, 'pair'); + const items = pairProp['items'] as Schema[]; + expect(items[0]!['type']).toBe('string'); + expect(items[1]!['type']).toBe('number'); + }); + + it('fills type on additionalProperties (object form)', () => { + const result = sanitizeMcpSchema({ + type: 'object', + additionalProperties: { description: 'dynamic value' }, + }); + expect((result['additionalProperties'] as Schema)['type']).toBe('string'); + }); +}); + +describe('sanitizeMcpSchema — regression: issue #792 (n8n anyOf)', () => { + // Reproduces the exact schema shape from issue #792 where n8n emits a + // property with `anyOf` whose branches lack explicit `type`. Moonshot's + // validator rejects this because `anyOf` branches must be objects. + it('fills type on anyOf/oneOf/allOf branches', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + pageSetup: { + type: 'object', + properties: { + size: { + anyOf: [ + { description: 'small' }, + { description: 'large', format: 'paper-size' }, + ], + }, + }, + }, + }, + }); + + const pageSetup = prop(result, 'pageSetup'); + const size = prop(pageSetup, 'size'); + const branches = size['anyOf'] as Schema[]; + expect(branches[0]!['type']).toBe('string'); + expect(branches[1]!['type']).toBe('string'); + }); + + it('recurses into anyOf branches that contain nested properties', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + config: { + anyOf: [ + { + properties: { + host: { description: 'hostname' }, + port: { minimum: 0 }, + }, + }, + ], + }, + }, + }); + + const config = prop(result, 'config'); + // The config property uses a combinator (anyOf), so no type is added at + // the config level. Inside the branch, nested properties get filled. + expect(config['type']).toBeUndefined(); + const branch = (config['anyOf'] as Schema[])[0]!; + const branchProps = props(branch); + expect(branchProps['host']!['type']).toBe('string'); + expect(branchProps['port']!['type']).toBe('number'); + }); +}); + +describe('sanitizeMcpSchema — immutability', () => { + it('never mutates the input schema', () => { + const input = { + type: 'object', + properties: { + name: { description: 'test' }, + }, + }; + const inputCopy = structuredClone(input); + sanitizeMcpSchema(input); + expect(input).toEqual(inputCopy); + }); +}); + +describe('sanitizeMcpSchema — integer/number unification', () => { + it('classifies {integer, number} mixed enum as "number"', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + value: { enum: [1, 2.5] }, + }, + }); + expect(prop(result, 'value')['type']).toBe('number'); + }); + + it('classifies pure integer enum as "integer"', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + value: { enum: [1, 2, 3] }, + }, + }); + expect(prop(result, 'value')['type']).toBe('integer'); + }); +}); From fc57b77553d42a1721e34fbcfb2d5d9fa532998c Mon Sep 17 00:00:00 2001 From: grandmaster451 Date: Tue, 16 Jun 2026 22:09:38 +0300 Subject: [PATCH 2/2] fix(mcp): harden schema sanitizer for recursive refs, sibling refs, JSON pointer - Guard recursive local $refs with a visited set to avoid RangeError; preserve the $ref and keep $defs when a cycle is detected. - Traverse sibling schema fields of a $ref so their own local refs are resolved instead of being left dangling after $defs deletion. - Decode JSON Pointer escape sequences (~1 -> /, ~0 -> ~) when resolving local pointer paths. Addresses Codex review feedback. --- .../agent-core/src/mcp/schema-sanitize.ts | 49 +++++++++---- .../test/mcp/schema-sanitize.test.ts | 69 +++++++++++++++++++ 2 files changed, 106 insertions(+), 12 deletions(-) diff --git a/packages/agent-core/src/mcp/schema-sanitize.ts b/packages/agent-core/src/mcp/schema-sanitize.ts index 58206e239..dd4151926 100644 --- a/packages/agent-core/src/mcp/schema-sanitize.ts +++ b/packages/agent-core/src/mcp/schema-sanitize.ts @@ -92,25 +92,36 @@ const NUMERIC_KEYWORDS = [ */ function derefJsonSchema(schema: JsonRecord): JsonRecord { const root = structuredClone(schema); + // Set when a recursive (cyclic) local $ref is encountered. In that case we + // cannot fully inline the reference without recursing forever, so we keep + // the $ref intact and retain the $defs bucket so the schema stays valid. + let hasCyclicRef = false; + // Decode a JSON Pointer fragment into the list of path tokens. + // `#/$defs/a~1b` -> ['\$defs', 'a/b'] (~1 -> /, ~0 -> ~). function resolvePointer(pointer: string): Json { - const parts = pointer.replace(/^#\/?/, '').split('/'); + const parts = pointer + .replace(/^#\/?/, '') + .split('/') + .map((part) => part.replace(/~1/g, '/').replace(/~0/g, '~')); let current: Json = root; for (const part of parts) { + if (part === '') continue; if (typeof current !== 'object' || current === null || Array.isArray(current)) { throw new Error(`Unable to resolve reference path: ${pointer}`); } - current = (current as JsonRecord)[part] ?? null; - if (current === undefined) { + const next = (current as JsonRecord)[part]; + if (next === undefined || next === null) { throw new Error(`Unable to resolve reference path: ${pointer}`); } + current = next; } return current; } - function traverse(node: Json): Json { + function traverse(node: Json, resolving: Set): Json { if (Array.isArray(node)) { - return node.map(traverse); + return node.map((child) => traverse(child, resolving)); } if (typeof node !== 'object' || node === null) { return node; @@ -119,26 +130,40 @@ function derefJsonSchema(schema: JsonRecord): JsonRecord { if (typeof record['$ref'] === 'string') { const ref = record['$ref']; if (ref.startsWith('#')) { - const target = traverse(resolvePointer(ref)); + const { $ref: _, ...rest } = record; + if (resolving.has(ref)) { + // Recursive reference: preserve the $ref and keep $defs so the + // later provider normalizer can still resolve it. + hasCyclicRef = true; + return record; + } + const target = traverse(resolvePointer(ref), new Set(resolving).add(ref)); if (typeof target !== 'object' || target === null || Array.isArray(target)) { throw new Error('Local $ref must resolve to a JSON object'); } - const { $ref: _, ...rest } = record; - return { ...(target as JsonRecord), ...rest }; + // Traverse sibling schema fields so their own local refs are resolved + // before merging (otherwise a sibling $ref is left dangling). + const mergedRest: JsonRecord = {}; + for (const [key, value] of Object.entries(rest)) { + mergedRest[key] = traverse(value, resolving); + } + return { ...(target as JsonRecord), ...mergedRest }; } // Remote reference — leave as-is. return record; } const result: JsonRecord = {}; for (const [key, value] of Object.entries(record)) { - result[key] = traverse(value); + result[key] = traverse(value, resolving); } return result; } - const resolved = traverse(root) as JsonRecord; - delete resolved['$defs']; - delete resolved['definitions']; + const resolved = traverse(root, new Set()) as JsonRecord; + if (!hasCyclicRef) { + delete resolved['$defs']; + delete resolved['definitions']; + } return resolved; } diff --git a/packages/agent-core/test/mcp/schema-sanitize.test.ts b/packages/agent-core/test/mcp/schema-sanitize.test.ts index a185308f6..8bd8903a1 100644 --- a/packages/agent-core/test/mcp/schema-sanitize.test.ts +++ b/packages/agent-core/test/mcp/schema-sanitize.test.ts @@ -194,6 +194,75 @@ describe('sanitizeMcpSchema — $ref dereferencing', () => { }), ).toThrow(/Unable to resolve reference path/); }); + + it('preserves recursive (cyclic) refs instead of infinite-looping', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + node: { $ref: '#/$defs/Node' }, + }, + $defs: { + Node: { + type: 'object', + properties: { + value: { type: 'string' }, + children: { + type: 'array', + items: { $ref: '#/$defs/Node' }, + }, + }, + }, + }, + }); + // The top-level ref resolves; the cycle lives deeper in children.items. + // Cyclic ref is preserved there and $defs is retained so the schema is valid. + expect('$defs' in result).toBe(true); + const nodeProp = prop(result, 'node'); + expect(nodeProp['type']).toBe('object'); + const children = props(nodeProp)['children']!; + const items = children['items'] as Schema; + expect(items['$ref']).toBe('#/$defs/Node'); + }); + + it('resolves local refs nested inside sibling fields of a $ref', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + outer: { + $ref: '#/$defs/Outer', + properties: { + address: { $ref: '#/$defs/Address' }, + }, + }, + }, + $defs: { + Outer: { type: 'object' }, + Address: { type: 'object', properties: { city: {} } }, + }, + }); + expect('$defs' in result).toBe(false); + const outer = prop(result, 'outer'); + expect(outer['type']).toBe('object'); + const address = props(outer)['address']!; + expect(address['type']).toBe('object'); + expect(props(address)['city']!['type']).toBe('string'); + }); + + it('decodes JSON Pointer escape sequences (~1, ~0) in $ref paths', () => { + const result = sanitizeMcpSchema({ + type: 'object', + properties: { + slash: { $ref: '#/$defs/a~1b' }, + tilde: { $ref: '#/$defs/c~0d' }, + }, + $defs: { + 'a/b': { type: 'string' }, + 'c~d': { type: 'integer' }, + }, + }); + expect(prop(result, 'slash')['type']).toBe('string'); + expect(prop(result, 'tilde')['type']).toBe('integer'); + }); }); describe('sanitizeMcpSchema — nested arrays and items', () => {