From 81cb728b57a3d02f032b2b8fe72c55ab458f94de Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 20 Apr 2026 11:50:47 +0200 Subject: [PATCH 1/2] Add AI agent detection to user-agent header Adds detection for 15 AI coding agents (amp, antigravity, augment, claude-code, cline, codex, copilot-cli, copilot-vscode, cursor, gemini-cli, goose, kiro, openclaw, opencode, windsurf) to the core user-agent. The detected product name is emitted as a single agent/ segment by createDefault so that Databricks can understand which agents are invoking the SDK. Detection honors the agents.md standard AGENT env var with an unknown fallback, and resolves ambiguity conservatively by emitting no segment when two explicit matchers fire at once. Explicit product env vars always take precedence over AGENT=. Behavior matches the parallel changes in databricks-sdk-go #1637, databricks-sdk-java #768, and databricks-sdk-py #1394. Signed-off-by: simon --- packages/core/src/clientinfo/agent.ts | 124 +++++++++++ packages/core/src/clientinfo/default.ts | 32 +-- packages/core/src/clientinfo/index.ts | 1 + packages/core/tests/clientinfo/agent.test.ts | 195 ++++++++++++++++++ .../core/tests/clientinfo/default.test.ts | 17 +- packages/core/vitest.config.browser.ts | 1 + 6 files changed, 339 insertions(+), 31 deletions(-) create mode 100644 packages/core/src/clientinfo/agent.ts create mode 100644 packages/core/tests/clientinfo/agent.test.ts diff --git a/packages/core/src/clientinfo/agent.ts b/packages/core/src/clientinfo/agent.ts new file mode 100644 index 00000000..f77fa932 --- /dev/null +++ b/packages/core/src/clientinfo/agent.ts @@ -0,0 +1,124 @@ +/** + * Detects the AI coding agent (e.g. Claude Code, Cursor, Gemini CLI) that + * is running the current Node.js process. The detected product name is + * appended to the user-agent header so that Databricks can understand + * which agents are invoking the SDK. + * + * The agent list and precedence rules are kept in sync across the Go, + * Java, Python, and TypeScript SDKs. + * + * @module + */ + +interface KnownAgent { + readonly envVar: string; + readonly product: string; +} + +// Name of the agents.md standard env var. When set to a value that no +// known agent recognizes, detection falls back to "unknown". +const AGENT_ENV_VAR = 'AGENT'; + +// Canonical list of AI coding agents. Keep this list in sync with the +// Go, Java, and Python SDKs. Agents are listed alphabetically by product +// name. +const KNOWN_AGENTS: readonly KnownAgent[] = [ + // The amp agent also sets AGENT=amp, handled by the central fallback. + {envVar: 'AMP_CURRENT_THREAD_ID', product: 'amp'}, + {envVar: 'ANTIGRAVITY_AGENT', product: 'antigravity'}, + {envVar: 'AUGMENT_AGENT', product: 'augment'}, + {envVar: 'CLAUDECODE', product: 'claude-code'}, + {envVar: 'CLINE_ACTIVE', product: 'cline'}, + {envVar: 'CODEX_CI', product: 'codex'}, + {envVar: 'COPILOT_CLI', product: 'copilot-cli'}, + // VS Code Copilot terminal, best-effort heuristic, not officially + // identified. + {envVar: 'COPILOT_MODEL', product: 'copilot-vscode'}, + {envVar: 'CURSOR_AGENT', product: 'cursor'}, + {envVar: 'GEMINI_CLI', product: 'gemini-cli'}, + // The goose agent also sets AGENT=goose, handled by the central + // fallback. + {envVar: 'GOOSE_TERMINAL', product: 'goose'}, + {envVar: 'KIRO', product: 'kiro'}, + {envVar: 'OPENCLAW_SHELL', product: 'openclaw'}, + {envVar: 'OPENCODE', product: 'opencode'}, + {envVar: 'WINDSURF_AGENT', product: 'windsurf'}, +]; + +function agentEnvFallback(): string { + const v = process.env[AGENT_ENV_VAR]; + if (v === undefined || v === '') { + return ''; + } + if (KNOWN_AGENTS.some(a => a.product === v)) { + return v; + } + return 'unknown'; +} + +/** + * Checks environment variables for known AI agents and returns the + * detected product name. + * + * Explicit product-specific env vars always take precedence over the + * generic agents.md `AGENT` env var. `AGENT` is consulted only as a + * fallback when no explicit matcher fires, so that an explicit signal + * (e.g. `CLAUDECODE=1`) always wins over a conflicting `AGENT=` + * value. + * + * Returns: + * + * - The product name when exactly one known env var is set. + * - `""` when multiple known env vars are set (ambiguity). + * - When no known env var is set and `AGENT` is a non-empty value: the + * value itself if it names a known product, otherwise `"unknown"`. + * - `""` when nothing is set. + * + * Unlike CI/CD detection (which returns the first match), agent + * detection uses an ambiguity guard because agent env vars can be + * stacked (e.g. running Cline inside Cursor). + */ +export function lookupAgentProvider(): string { + const matches: string[] = []; + for (const a of KNOWN_AGENTS) { + if (a.envVar in process.env) { + matches.push(a.product); + } + } + if (matches.length === 1) { + return matches[0]; + } + if (matches.length > 1) { + return ''; + } + return agentEnvFallback(); +} + +let cached: string | undefined; + +/** + * Returns the detected AI agent name, cached for the process lifetime. + * + * Returns one of: + * + * - The known product name when exactly one agent is detected via + * explicit env matchers, or when `AGENT` is set to a known product + * name and no explicit matcher fired. + * - `"unknown"` when no explicit matcher fired and `AGENT` is set to a + * value that is not a known product name. + * - `""` when no agent is detected, or when multiple explicit matchers + * fire for different agents (ambiguity). + */ +export function agentProvider(): string { + cached ??= lookupAgentProvider(); + return cached; +} + +/** + * Clears the cached agent detection result so that the next call to + * {@link agentProvider} re-evaluates the environment. Exported for + * testing only. + */ +export function clearAgentCache(): void { + cached = undefined; +} diff --git a/packages/core/src/clientinfo/default.ts b/packages/core/src/clientinfo/default.ts index 00dded94..d849d07d 100644 --- a/packages/core/src/clientinfo/default.ts +++ b/packages/core/src/clientinfo/default.ts @@ -1,10 +1,6 @@ import {ClientInfo, sanitize} from './clientinfo'; import {MODULE_NAME, VERSION, getBase} from './base'; - -interface AgentDef { - readonly envVar: string; - readonly product: string; -} +import {agentProvider} from './agent'; interface EnvCheck { readonly name: string; @@ -16,18 +12,6 @@ interface CicdDef { readonly envVars: readonly EnvCheck[]; } -const KNOWN_AGENTS: readonly AgentDef[] = [ - {envVar: 'ANTIGRAVITY_AGENT', product: 'antigravity'}, - {envVar: 'CLAUDECODE', product: 'claude-code'}, - {envVar: 'CLINE_ACTIVE', product: 'cline'}, - {envVar: 'CODEX_CI', product: 'codex'}, - {envVar: 'COPILOT_CLI', product: 'copilot-cli'}, - {envVar: 'CURSOR_AGENT', product: 'cursor'}, - {envVar: 'GEMINI_CLI', product: 'gemini-cli'}, - {envVar: 'OPENCODE', product: 'opencode'}, - {envVar: 'OPENCLAW_SHELL', product: 'openclaw'}, -]; - const CICD_PROVIDERS: readonly CicdDef[] = [ { name: 'github', @@ -67,17 +51,6 @@ const CICD_PROVIDERS: readonly CicdDef[] = [ {name: 'tf-cloud', envVars: [{name: 'TFC_RUN_ID', expectedValue: ''}]}, ]; -// Returns all detected AI coding agents. -function detectAgents(): string[] { - const detected: string[] = []; - for (const a of KNOWN_AGENTS) { - if (process.env[a.envVar] !== undefined) { - detected.push(a.product); - } - } - return detected; -} - function detectCicd(): string { for (const p of CICD_PROVIDERS) { const allMatch = p.envVars.every(ev => { @@ -146,7 +119,8 @@ export function createDefault(): ClientInfo { pairs.push({key: 'runtime', value: sanitize(runtime)}); } - for (const agent of detectAgents()) { + const agent = agentProvider(); + if (agent !== '') { pairs.push({key: 'agent', value: agent}); } diff --git a/packages/core/src/clientinfo/index.ts b/packages/core/src/clientinfo/index.ts index aabe17d9..6401073c 100644 --- a/packages/core/src/clientinfo/index.ts +++ b/packages/core/src/clientinfo/index.ts @@ -2,3 +2,4 @@ export type {ClientInfoErrorCode} from './clientinfo'; export {ClientInfo, ClientInfoError} from './clientinfo'; export {addToDefault, setPartner, setProduct} from './base'; export {createDefault} from './default'; +export {agentProvider} from './agent'; diff --git a/packages/core/tests/clientinfo/agent.test.ts b/packages/core/tests/clientinfo/agent.test.ts new file mode 100644 index 00000000..0f9a779f --- /dev/null +++ b/packages/core/tests/clientinfo/agent.test.ts @@ -0,0 +1,195 @@ +import {describe, it, expect, beforeEach, afterEach} from 'vitest'; +import {agentProvider} from '../../src/clientinfo'; +import {clearAgentCache, lookupAgentProvider} from '../../src/clientinfo/agent'; + +describe('lookupAgentProvider', () => { + let savedEnv: NodeJS.ProcessEnv; + + beforeEach(() => { + clearAgentCache(); + savedEnv = process.env; + process.env = {}; + }); + + afterEach(() => { + process.env = savedEnv; + clearAgentCache(); + }); + + const testCases: { + name: string; + env: Record; + want: string; + }[] = [ + { + name: 'no agent', + env: {}, + want: '', + }, + { + name: 'amp via AMP_CURRENT_THREAD_ID', + env: {AMP_CURRENT_THREAD_ID: 'abc123'}, + want: 'amp', + }, + { + name: 'antigravity', + env: {ANTIGRAVITY_AGENT: '1'}, + want: 'antigravity', + }, + { + name: 'augment', + env: {AUGMENT_AGENT: '1'}, + want: 'augment', + }, + { + name: 'claude code', + env: {CLAUDECODE: '1'}, + want: 'claude-code', + }, + { + name: 'cline', + env: {CLINE_ACTIVE: '1'}, + want: 'cline', + }, + { + name: 'codex', + env: {CODEX_CI: '1'}, + want: 'codex', + }, + { + name: 'copilot cli', + env: {COPILOT_CLI: '1'}, + want: 'copilot-cli', + }, + { + name: 'copilot vscode', + env: {COPILOT_MODEL: 'gpt-4'}, + want: 'copilot-vscode', + }, + { + name: 'cursor', + env: {CURSOR_AGENT: '1'}, + want: 'cursor', + }, + { + name: 'gemini cli', + env: {GEMINI_CLI: '1'}, + want: 'gemini-cli', + }, + { + name: 'goose via GOOSE_TERMINAL', + env: {GOOSE_TERMINAL: '1'}, + want: 'goose', + }, + { + name: 'kiro', + env: {KIRO: '1'}, + want: 'kiro', + }, + { + name: 'openclaw', + env: {OPENCLAW_SHELL: 'exec'}, + want: 'openclaw', + }, + { + name: 'opencode', + env: {OPENCODE: '1'}, + want: 'opencode', + }, + { + name: 'windsurf', + env: {WINDSURF_AGENT: '1'}, + want: 'windsurf', + }, + { + name: 'empty value still counts as set', + env: {CLAUDECODE: ''}, + want: 'claude-code', + }, + { + name: 'multiple agents are ambiguous', + env: {CLAUDECODE: '1', CURSOR_AGENT: '1'}, + want: '', + }, + { + name: 'goose via AGENT', + env: {AGENT: 'goose'}, + want: 'goose', + }, + { + name: 'amp via AGENT', + env: {AGENT: 'amp'}, + want: 'amp', + }, + { + name: 'cursor via AGENT', + env: {AGENT: 'cursor'}, + want: 'cursor', + }, + { + name: 'AGENT with unknown value falls back to unknown', + env: {AGENT: 'somethingweird'}, + want: 'unknown', + }, + { + name: 'AGENT empty string does not trigger fallback', + env: {AGENT: ''}, + want: '', + }, + { + name: 'goose via both GOOSE_TERMINAL and AGENT is not ambiguous', + env: {GOOSE_TERMINAL: '1', AGENT: 'goose'}, + want: 'goose', + }, + { + name: 'explicit GOOSE_TERMINAL wins over AGENT=cursor', + env: {GOOSE_TERMINAL: '1', AGENT: 'cursor'}, + want: 'goose', + }, + { + name: 'explicit CLAUDECODE wins over AGENT=goose', + env: {AGENT: 'goose', CLAUDECODE: '1'}, + want: 'claude-code', + }, + { + name: 'known matcher wins over AGENT fallback to unknown', + env: {AGENT: 'somethingunknown', CLAUDECODE: '1'}, + want: 'claude-code', + }, + { + name: 'COPILOT_CLI and COPILOT_MODEL together is ambiguous', + env: {COPILOT_CLI: '1', COPILOT_MODEL: 'gpt-4'}, + want: '', + }, + ]; + + it.each(testCases)('$name', ({env, want}) => { + process.env = env; + expect(lookupAgentProvider()).toBe(want); + }); +}); + +describe('agentProvider', () => { + let savedEnv: NodeJS.ProcessEnv; + + beforeEach(() => { + clearAgentCache(); + savedEnv = process.env; + process.env = {}; + }); + + afterEach(() => { + process.env = savedEnv; + clearAgentCache(); + }); + + it('caches the detection result for the process lifetime', () => { + process.env = {CURSOR_AGENT: '1'}; + expect(agentProvider()).toBe('cursor'); + + // Changing the environment after the first call must not change the + // cached result. + process.env = {CLAUDECODE: '1'}; + expect(agentProvider()).toBe('cursor'); + }); +}); diff --git a/packages/core/tests/clientinfo/default.test.ts b/packages/core/tests/clientinfo/default.test.ts index 689af045..95b8cbb5 100644 --- a/packages/core/tests/clientinfo/default.test.ts +++ b/packages/core/tests/clientinfo/default.test.ts @@ -17,18 +17,21 @@ import { CACHED_NODE_VERSION, normalizeNodeVersion, } from '../../src/clientinfo/default'; +import {clearAgentCache} from '../../src/clientinfo/agent'; describe('createDefault', () => { let savedEnv: NodeJS.ProcessEnv; beforeEach(() => { resetBase(); + clearAgentCache(); savedEnv = process.env; process.env = {...savedEnv}; }); afterEach(() => { process.env = savedEnv; + clearAgentCache(); }); const prefix = `${MODULE_NAME}/${VERSION} node/${CACHED_NODE_VERSION} os/${process.platform}`; @@ -69,9 +72,19 @@ describe('createDefault', () => { want: `${prefix} agent/claude-code`, }, { - name: 'multiple agents all reported', + name: 'multiple agents are ambiguous and omit the agent segment', env: {CLAUDECODE: '1', CURSOR_AGENT: '1'}, - want: `${prefix} agent/claude-code agent/cursor`, + want: prefix, + }, + { + name: 'AGENT fallback to known product', + env: {AGENT: 'goose'}, + want: `${prefix} agent/goose`, + }, + { + name: 'AGENT fallback to unknown', + env: {AGENT: 'somethingweird'}, + want: `${prefix} agent/unknown`, }, { name: 'databricks runtime', diff --git a/packages/core/vitest.config.browser.ts b/packages/core/vitest.config.browser.ts index d31bf584..0a89d223 100644 --- a/packages/core/vitest.config.browser.ts +++ b/packages/core/vitest.config.browser.ts @@ -12,6 +12,7 @@ export default defineConfig({ exclude: [ 'tests/profiles/resolve.test.ts', 'tests/clientinfo/default.test.ts', + 'tests/clientinfo/agent.test.ts', ], }, }); From 15818235228e9236b0b5717db483b499f0369d57 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 20 Apr 2026 14:51:23 +0200 Subject: [PATCH 2/2] Report agent/multiple when agents are stacked Nested agents (e.g. a Cursor CLI subagent spawned by Claude Code) set multiple agent env vars on the same process. The previous ambiguity guard silently dropped the signal in that case. Report "multiple" instead so the stacked case is visible in telemetry. Also collapse the known BYOK false positive where Copilot CLI users have COPILOT_MODEL set alongside COPILOT_CLI: that pair now reports "copilot-cli" rather than "multiple". Co-authored-by: Isaac Signed-off-by: simon --- packages/core/src/clientinfo/agent.ts | 25 ++++++++++++------- packages/core/tests/clientinfo/agent.test.ts | 18 ++++++++++--- .../core/tests/clientinfo/default.test.ts | 4 +-- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/packages/core/src/clientinfo/agent.ts b/packages/core/src/clientinfo/agent.ts index f77fa932..0b43af7f 100644 --- a/packages/core/src/clientinfo/agent.ts +++ b/packages/core/src/clientinfo/agent.ts @@ -69,27 +69,32 @@ function agentEnvFallback(): string { * Returns: * * - The product name when exactly one known env var is set. - * - `""` when multiple known env vars are set (ambiguity). + * - `"multiple"` when multiple known env vars are set. Agent env vars + * can be stacked when one agent invokes another as a subagent (e.g. + * Claude Code spawning a Cursor CLI subprocess), so the child process + * inherits env vars from multiple layers. * - When no known env var is set and `AGENT` is a non-empty value: the * value itself if it names a known product, otherwise `"unknown"`. * - `""` when nothing is set. - * - * Unlike CI/CD detection (which returns the first match), agent - * detection uses an ambiguity guard because agent env vars can be - * stacked (e.g. running Cline inside Cursor). */ export function lookupAgentProvider(): string { - const matches: string[] = []; + let matches: string[] = []; for (const a of KNOWN_AGENTS) { if (a.envVar in process.env) { matches.push(a.product); } } + // Known BYOK false positive: Copilot CLI users often set COPILOT_MODEL + // alongside COPILOT_CLI. Treat the pair as a single copilot-cli signal + // rather than a stacked multi-agent setup. + if (matches.includes('copilot-cli') && matches.includes('copilot-vscode')) { + matches = matches.filter(m => m !== 'copilot-vscode'); + } if (matches.length === 1) { return matches[0]; } if (matches.length > 1) { - return ''; + return 'multiple'; } return agentEnvFallback(); } @@ -104,10 +109,12 @@ let cached: string | undefined; * - The known product name when exactly one agent is detected via * explicit env matchers, or when `AGENT` is set to a known product * name and no explicit matcher fired. + * - `"multiple"` when multiple explicit matchers fire for different + * agents (typically nested agents, e.g. Cursor CLI running as a + * Claude Code subagent). * - `"unknown"` when no explicit matcher fired and `AGENT` is set to a * value that is not a known product name. - * - `""` when no agent is detected, or when multiple explicit matchers - * fire for different agents (ambiguity). + * - `""` when no agent is detected. */ export function agentProvider(): string { cached ??= lookupAgentProvider(); diff --git a/packages/core/tests/clientinfo/agent.test.ts b/packages/core/tests/clientinfo/agent.test.ts index 0f9a779f..fbcffc8e 100644 --- a/packages/core/tests/clientinfo/agent.test.ts +++ b/packages/core/tests/clientinfo/agent.test.ts @@ -107,9 +107,14 @@ describe('lookupAgentProvider', () => { want: 'claude-code', }, { - name: 'multiple agents are ambiguous', + name: 'multiple agents stacked (e.g. Cursor CLI subagent invoked by Claude Code)', env: {CLAUDECODE: '1', CURSOR_AGENT: '1'}, - want: '', + want: 'multiple', + }, + { + name: 'three stacked agents also report multiple', + env: {CLAUDECODE: '1', CURSOR_AGENT: '1', AUGMENT_AGENT: '1'}, + want: 'multiple', }, { name: 'goose via AGENT', @@ -157,9 +162,14 @@ describe('lookupAgentProvider', () => { want: 'claude-code', }, { - name: 'COPILOT_CLI and COPILOT_MODEL together is ambiguous', + name: 'COPILOT_CLI + COPILOT_MODEL collapses to copilot-cli (BYOK)', env: {COPILOT_CLI: '1', COPILOT_MODEL: 'gpt-4'}, - want: '', + want: 'copilot-cli', + }, + { + name: 'COPILOT_CLI + COPILOT_MODEL + CLAUDECODE still reports multiple after BYOK collapse', + env: {COPILOT_CLI: '1', COPILOT_MODEL: 'gpt-4', CLAUDECODE: '1'}, + want: 'multiple', }, ]; diff --git a/packages/core/tests/clientinfo/default.test.ts b/packages/core/tests/clientinfo/default.test.ts index 95b8cbb5..b691bb92 100644 --- a/packages/core/tests/clientinfo/default.test.ts +++ b/packages/core/tests/clientinfo/default.test.ts @@ -72,9 +72,9 @@ describe('createDefault', () => { want: `${prefix} agent/claude-code`, }, { - name: 'multiple agents are ambiguous and omit the agent segment', + name: 'multiple agents report the multiple sentinel', env: {CLAUDECODE: '1', CURSOR_AGENT: '1'}, - want: prefix, + want: `${prefix} agent/multiple`, }, { name: 'AGENT fallback to known product',