From ceec20576623e0794f2d8f161a4ce5303b21c181 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 5 Jun 2026 08:58:37 +0000 Subject: [PATCH] Honor AI_AGENT and pass raw values through Adds the Vercel @vercel/detect-agent AI_AGENT= env var as a secondary fallback after the agents.md AGENT= standard. AGENT wins when both are non-empty; empty is treated as unset for both. Also changes the fallback behavior to pass the raw value through (sanitized via sanitize() and capped at 64 chars) instead of coercing unrecognized names to the literal "unknown". Bucketing arbitrary tool names is an ETL concern, not the SDK's; the prior coercion buried useful signal such as versioned variants like "claude-code_2-1-141". Mirrors databricks-sdk-go#1683, databricks-sdk-py#1454, and databricks-sdk-java#815. --- packages/core/src/clientinfo/agent.ts | 52 ++++++++++------ packages/core/tests/clientinfo/agent.test.ts | 60 +++++++++++++++++-- .../core/tests/clientinfo/default.test.ts | 4 +- 3 files changed, 92 insertions(+), 24 deletions(-) diff --git a/packages/core/src/clientinfo/agent.ts b/packages/core/src/clientinfo/agent.ts index 2ea806ea..90284b00 100644 --- a/packages/core/src/clientinfo/agent.ts +++ b/packages/core/src/clientinfo/agent.ts @@ -10,18 +10,29 @@ * @module */ +import {sanitize} from './clientinfo'; + interface KnownAgent { readonly envVar: string; readonly product: string; } -// Name of the agents.md standard env var. When set to a value that no -// known agent recognizes, detection falls back to "unknown". +// Name of the agents.md standard env var. const AGENT_ENV_VAR = 'AGENT'; -// Canonical list of AI coding agents. Keep this list in sync with the -// Go, Java, and Python SDKs. Agents are listed alphabetically by product -// name. +// Name of the Vercel @vercel/detect-agent convention env var. It serves +// the same purpose as AGENT_ENV_VAR; agentEnvFallback consults it only when +// AGENT_ENV_VAR is unset or empty. +const AI_AGENT_ENV_VAR = 'AI_AGENT'; + +// Caps fallback values to keep the user-agent bounded. Explicit-matcher +// products are short by construction; only the fallback path can carry +// arbitrary lengths. +const MAX_AGENT_FALLBACK_LEN = 64; + +// Canonical list of AI coding agents. Keep this list, and the AGENT / +// AI_AGENT fallback handling in agentEnvFallback, in sync with the Go, +// Java, and Python SDKs. Agents are listed alphabetically by product name. const KNOWN_AGENTS: readonly KnownAgent[] = [ // The amp agent also sets AGENT=amp, handled by the central fallback. {envVar: 'AMP_CURRENT_THREAD_ID', product: 'amp'}, @@ -45,15 +56,22 @@ const KNOWN_AGENTS: readonly KnownAgent[] = [ {envVar: 'WINDSURF_AGENT', product: 'windsurf'}, ]; +/** + * Returns a sanitized, length-capped name from `AGENT` or `AI_AGENT`, + * preferring `AGENT` when both are non-empty. Empty is treated as unset for + * both. The value is passed through rather than categorized so that new + * names are propagated without the need to update the list of known agents. + */ function agentEnvFallback(): string { - const v = process.env[AGENT_ENV_VAR]; + let v = process.env[AGENT_ENV_VAR]; if (v === undefined || v === '') { - return ''; + v = process.env[AI_AGENT_ENV_VAR]; } - if (KNOWN_AGENTS.some(a => a.product === v)) { - return v; + if (v === undefined || v === '') { + return ''; } - return 'unknown'; + // slice is a no-op when the value is already within the cap. + return sanitize(v).slice(0, MAX_AGENT_FALLBACK_LEN); } /** @@ -61,8 +79,7 @@ function agentEnvFallback(): string { * detected product name. * * Explicit product-specific env vars always take precedence over the - * generic agents.md `AGENT` env var. `AGENT` is consulted only as a - * fallback when no explicit matcher fires, so that an explicit signal + * generic `AGENT` and `AI_AGENT` env vars, so that an explicit signal * (e.g. `CLAUDECODE=1`) always wins over a conflicting `AGENT=` * value. * @@ -73,8 +90,8 @@ function agentEnvFallback(): string { * can be stacked when one agent invokes another as a subagent (e.g. * Claude Code spawning a Cursor CLI subprocess), so the child process * inherits env vars from multiple layers. - * - When no known env var is set and `AGENT` is a non-empty value: the - * value itself if it names a known product, otherwise `"unknown"`. + * - A sanitized, length-capped value from `AGENT` or `AI_AGENT` when no + * known env var is set (see {@link agentEnvFallback}). * - `""` when nothing is set. */ export function lookupAgentProvider(): string { @@ -101,13 +118,12 @@ let cached: string | undefined; * Returns one of: * * - The known product name when exactly one agent is detected via - * explicit env matchers, or when `AGENT` is set to a known product - * name and no explicit matcher fired. + * explicit env matchers. * - `"multiple"` when multiple explicit matchers fire for different * agents (typically nested agents, e.g. Cursor CLI running as a * Claude Code subagent). - * - `"unknown"` when no explicit matcher fired and `AGENT` is set to a - * value that is not a known product name. + * - A sanitized, length-capped value from `AGENT` or `AI_AGENT` when no + * explicit matcher fired (see {@link agentEnvFallback}). * - `""` when no agent is detected. */ export function agentProvider(): string { diff --git a/packages/core/tests/clientinfo/agent.test.ts b/packages/core/tests/clientinfo/agent.test.ts index 967caa06..5290b3f9 100644 --- a/packages/core/tests/clientinfo/agent.test.ts +++ b/packages/core/tests/clientinfo/agent.test.ts @@ -135,9 +135,19 @@ describe('lookupAgentProvider', () => { want: 'cursor', }, { - name: 'AGENT with unknown value falls back to unknown', - env: {AGENT: 'somethingweird'}, - want: 'unknown', + name: 'AGENT with unrecognized value passes through (sanitized)', + env: {AGENT: 'someweirdthing'}, + want: 'someweirdthing', + }, + { + name: 'AGENT with disallowed chars is sanitized to hyphens', + env: {AGENT: 'claude code/agent'}, + want: 'claude-code-agent', + }, + { + name: 'AGENT longer than the cap is truncated', + env: {AGENT: 'a'.repeat(100)}, + want: 'a'.repeat(64), }, { name: 'AGENT empty string does not trigger fallback', @@ -160,7 +170,7 @@ describe('lookupAgentProvider', () => { want: 'claude-code', }, { - name: 'known matcher wins over AGENT fallback to unknown', + name: 'known matcher wins over unrecognized AGENT fallback', env: {AGENT: 'somethingunknown', CLAUDECODE: '1'}, want: 'claude-code', }, @@ -169,6 +179,48 @@ describe('lookupAgentProvider', () => { env: {VSCODE_AGENT: '1', COPILOT_CLI: '1'}, want: 'multiple', }, + // AI_AGENT fallback (Vercel @vercel/detect-agent convention). + { + name: 'AI_AGENT=cursor falls back to cursor', + env: {AI_AGENT: 'cursor'}, + want: 'cursor', + }, + { + name: 'AI_AGENT empty string does not trigger fallback', + env: {AI_AGENT: ''}, + want: '', + }, + { + name: 'known matcher wins over AI_AGENT fallback', + env: {AI_AGENT: 'somethingunknown', CLAUDECODE: '1'}, + want: 'claude-code', + }, + // AGENT vs AI_AGENT precedence: AGENT wins when both are non-empty. + { + name: 'AGENT wins over AI_AGENT when both are set to known products', + env: {AGENT: 'claude-code', AI_AGENT: 'cursor'}, + want: 'claude-code', + }, + { + name: 'AGENT set to unrecognized non-empty value still wins over AI_AGENT', + env: {AGENT: 'somethingunknown', AI_AGENT: 'cursor'}, + want: 'somethingunknown', + }, + { + name: 'AGENT set, AI_AGENT empty: AGENT value is used', + env: {AGENT: 'cursor', AI_AGENT: ''}, + want: 'cursor', + }, + { + name: 'empty AGENT falls through to AI_AGENT', + env: {AGENT: '', AI_AGENT: 'cursor'}, + want: 'cursor', + }, + { + name: 'both AGENT and AI_AGENT empty returns no agent', + env: {AGENT: '', AI_AGENT: ''}, + want: '', + }, ]; it.each(testCases)('$name', ({env, want}) => { diff --git a/packages/core/tests/clientinfo/default.test.ts b/packages/core/tests/clientinfo/default.test.ts index b691bb92..7e2d7cfc 100644 --- a/packages/core/tests/clientinfo/default.test.ts +++ b/packages/core/tests/clientinfo/default.test.ts @@ -82,9 +82,9 @@ describe('createDefault', () => { want: `${prefix} agent/goose`, }, { - name: 'AGENT fallback to unknown', + name: 'AGENT fallback passes unrecognized value through', env: {AGENT: 'somethingweird'}, - want: `${prefix} agent/unknown`, + want: `${prefix} agent/somethingweird`, }, { name: 'databricks runtime',