From 0aed3498fcce4a0b0c3d3d5a30148c0528a57e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BF=AE=E9=9B=A8?= <47820304+PeterGuy326@users.noreply.github.com> Date: Tue, 16 Jun 2026 17:32:26 +0800 Subject: [PATCH] feat(agent): accurate agent_code detection + per-channel agentId for stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tag each MCP request with which agent host is driving dws (agent_code) and a per-(machine × agent_code) instance id, so usage can be sliced by channel and instance in the data warehouse. Root cause it fixes: agent_code was only sent when the host injected DINGTALK_DWS_AGENTCODE (~99.98% empty), so the gateway logged none. Detection ladder (every signature observed on a real host / official docs, not guessed; unknown -> custom): T0 explicit DINGTALK_DWS_AGENTCODE T1 verified env signatures: claudecode (CLAUDECODE), codex (CODEX_SANDBOX), openclaw (OPENCLAW_BUNDLE_ROOT), hermes (HERMES_HOME) T2 VSCODE_BRAND value (covers the whole VS Code fork family) T3 macOS __CFBundleIdentifier map (qoder/cursor/vscode/workbuddy) T4 custom fallback identity.json v2 (machineId + per-agent_code agents map), deterministic dwsa_ derivation, transparent v1 migration. Backward-compatible wiring: x-dws-agent-id stays machine-level; new x-dws-agent-instance-id carries the per-channel id; X-Cli-Version emitted so old/new clients are distinguishable. Trust boundary (docs/agent-code.md): agent_code and the ids are self-reported and spoofable — fit for statistics ONLY, never for auth/limit/billing. Includes unit tests for every tier and the integration doc. --- docs/agent-code.md | 93 ++++++++++++ internal/app/runner.go | 23 ++- internal/app/runner_test.go | 52 ++++++- internal/auth/agent_code_detect.go | 161 ++++++++++++++++++++ internal/auth/agent_code_detect_test.go | 187 ++++++++++++++++++++++++ internal/auth/identity.go | 165 +++++++++++++++++++-- internal/auth/identity_agentid_test.go | 111 ++++++++++++++ 7 files changed, 775 insertions(+), 17 deletions(-) create mode 100644 docs/agent-code.md create mode 100644 internal/auth/agent_code_detect.go create mode 100644 internal/auth/agent_code_detect_test.go create mode 100644 internal/auth/identity_agentid_test.go diff --git a/docs/agent-code.md b/docs/agent-code.md new file mode 100644 index 00000000..f993c9fe --- /dev/null +++ b/docs/agent-code.md @@ -0,0 +1,93 @@ +# Agent identification (agent_code & agentId) + +dws tags every MCP request with **which agent host is driving it** and a +**per-instance id**, so usage can be sliced by channel/instance in the data +warehouse. This page is the integration contract. + +## What dws sends on the wire + +| Header | Meaning | Granularity | +|--------|---------|-------------| +| `x-dingtalk-dws-agent-code` | which agent host (claudecode / codex / qoder / cursor / custom …) | channel | +| `x-dws-agent-instance-id` | `dwsa_` derived from `machineId + agent_code` | machine × channel | +| `x-dws-agent-id` | stable per-install machine id (v1-compatible) | machine | +| `X-Cli-Version` | dws CLI version (segments old vs new clients) | — | + +`x-dws-agent-id` keeps its original machine-level meaning for backward +compatibility; `x-dws-agent-instance-id` is the new per-channel value. Old +clients send no `agent_code` / instance id — treat their absence as +"legacy/unknown", not an error. + +## How `agent_code` is resolved (confidence ladder) + +1. **T0 — explicit declaration:** `DINGTALK_DWS_AGENTCODE=`. **Use this.** +2. **T1 — verified env signature:** an agent that auto-sets a distinctive var + (`CLAUDECODE`, `CODEX_SANDBOX`, `OPENCLAW_BUNDLE_ROOT`, `HERMES_HOME`). +3. **T2 — `VSCODE_BRAND`:** every VS Code fork declares its brand — one rule + covers Cursor / Windsurf / Trae / Qoder / Kiro / … incl. future forks. +4. **T3 — macOS `__CFBundleIdentifier`:** known agent app bundles. +5. **T4 — `custom`:** unknown host. Never guessed. + +## Declaring your agent (recommended — the only fully-general path) + +Auto-detection cannot cover every agent: most terminal agents (gemini/ +antigravity, aider, opencode, qwen-code, crush, goose, kimi, amazon-q, +continue, …) expose **no reliable self-identifying env var** — only user-set +API keys, which must not be used as identity. The robust answer is: **the host +sets `DINGTALK_DWS_AGENTCODE` in the env block where it launches dws as an MCP +server.** This is accurate for any agent, on any OS, and is future-proof. + +MCP server config example (JSON-style hosts): +```jsonc +{ + "mcpServers": { + "dingtalk-workspace": { + "command": "dws", + "args": ["mcp", "..."], + "env": { "DINGTALK_DWS_AGENTCODE": "your-agent-code" } + } + } +} +``` + +### Canonical codes + +`claudecode`, `codex`, `cursor`, `vscode`, `qoder`, `windsurf`, `trae`, +`workbuddy`, `openclaw`, `hermes`, `codebuddy`, `comate`, `lingma`, `gemini`, +`aider`, `opencode`, `goose`, `crush`, `kimi`, `amazonq`, `continue`, … +Use a stable lowercase slug; unknown values are kept as-is (lowercased, +spaces stripped), so a new agent name flows through cleanly. + +## Trust & limitations — READ THIS + +**`agent_code` AND the ids (`x-dws-agent-id`, `x-dws-agent-instance-id`) are +self-reported, best-effort signals, NOT an authenticated identity.** + +- `agent_code`: every declaration/auto-detect signal is an env var the + host/user controls — spoofable (`export CLAUDECODE=1` → dws reports + `claudecode`). +- The ids are **even easier to forge**: they are generated, stored, and sent + entirely client-side. `machineId` is a random UUID in the plaintext + `~/.dws/identity.json` (which the user owns), and the instance id is just + `sha256(machineId + agent_code)`. Editing that one file — or rewriting the + header — lets anyone mint, split, rotate, or impersonate ids at will. The + `dwsa_` prefix does NOT make it a secure identifier. + +- ✅ **Fit for statistics / observability** (the intended use): there is no + incentive to misreport one's own agent, and real hosts emit real signals, so + aggregate per-channel metrics are reliable in practice. +- ❌ **NOT fit for authentication, authorization, rate-limiting, billing, or + revocation.** Anything where a party benefits from lying must not trust this + field. For control-plane use you need a gateway-issued **authoritative** + agentId bound to a verified credential (clientId / PAT / OAuth) — a separate, + heavier mechanism, deliberately out of scope here. + +Treat `agent_code` / `x-dws-agent-instance-id` as analytics dimensions only. + +## Gateway side (required for the data to land) + +dws sending the headers is necessary but not sufficient. The gateway must: +1. add `x-dingtalk-dws-agent-code`, `x-dws-agent-instance-id`, `X-Cli-Version` + to the upstream-header pass-through allowlist (otherwise they are stripped); +2. log them as fields, and deliver them to the warehouse (alongside the + existing flow-control / execution logs). diff --git a/internal/app/runner.go b/internal/app/runner.go index bc4b48dd..624c06ed 100644 --- a/internal/app/runner.go +++ b/internal/app/runner.go @@ -687,7 +687,28 @@ func resolveIdentityHeaders() map[string]string { if sessionID == "" { sessionID = os.Getenv(envRewindSessionID) } - agentCode, _ := authpkg.AgentCodeFromEnv() + // Resolve the agent_code (accuracy-first; unknown hosts -> custom) and the + // per-(machine × agent_code) instance id. This is what makes agent_code + // actually report a value: previously it was sent only when the host + // injected DINGTALK_DWS_AGENTCODE (empty ~99.98% of the time), so the + // gateway logged no agent_code at all. DetectAgentCode always yields a code. + // + // Backward-compat by design (additive, not breaking): + // - x-dws-agent-id keeps its v1 meaning = machine-level install UUID + // (set by id.Headers() above), so old/new clients stay comparable. + // - x-dws-agent-instance-id is NEW: the per-(machine × agent_code) id. + // Old clients don't send it, which is itself a clean old/new signal. + // Note: x-dws-channel (DWS_CHANNEL) is a separate axis, untouched. + agentCode, agentCodeSig := authpkg.DetectAgentCode() + headers["x-dws-agent-instance-id"] = id.ResolveAgentID(defaultConfigDir(), agentCode, agentCodeSig) + + // Emit the CLI version on the wire so the gateway can segment old vs new + // clients (and scope agent_code coverage / adoption). The header constant + // existed but was never set; wire it here. + if version != "" { + headers[transport.HeaderVersion] = version + } + envHeaders := map[string]string{ "x-dingtalk-agent": os.Getenv(envDingtalkAgent), "x-dingtalk-dws-agent-code": agentCode, diff --git a/internal/app/runner_test.go b/internal/app/runner_test.go index 1327e567..e794bc1b 100644 --- a/internal/app/runner_test.go +++ b/internal/app/runner_test.go @@ -328,14 +328,62 @@ func TestResolveIdentityHeadersForwardsAgentCode(t *testing.T) { } } +func TestResolveIdentityHeadersAgentIdentityFields(t *testing.T) { + setupRuntimeCommandTest(t) + t.Setenv(authpkg.AgentCodeEnv, "qoder") + + headers := resolveIdentityHeaders() + + // x-dws-agent-id stays machine-level (v1 install UUID): non-empty and NOT + // the dwsa_ instance form — this is the cross-version continuity anchor. + machineID := headers["x-dws-agent-id"] + if machineID == "" { + t.Fatal("x-dws-agent-id must stay populated (machine-level)") + } + if strings.HasPrefix(machineID, "dwsa_") { + t.Fatalf("x-dws-agent-id must remain machine-level, got instance form %q", machineID) + } + + // x-dws-agent-instance-id is the NEW per-(machine × agent_code) id. + instID := headers["x-dws-agent-instance-id"] + if !strings.HasPrefix(instID, "dwsa_") { + t.Fatalf("x-dws-agent-instance-id must be a derived instance id, got %q", instID) + } + if instID == machineID { + t.Fatal("instance id must differ from machine id") + } + + // CLI version must now be on the wire so the gateway can segment old/new. + if headers[transport.HeaderVersion] == "" { + t.Fatalf("%s must be emitted", transport.HeaderVersion) + } +} + func TestResolveIdentityHeadersIgnoresReversedAgentCodeEnv(t *testing.T) { setupRuntimeCommandTest(t) t.Setenv(authpkg.AgentCodeEnv, "") t.Setenv("DWS_DINGTALK_AGENTCODE", " compat ") + // Isolate from ambient agent-host detection signals so this test asserts + // only the reversed-env-name behavior (the suite itself may run under + // Claude Code / Qoder / VS Code, whose signals would otherwise be detected). + for _, k := range []string{ + "CLAUDECODE", "CLAUDE_CODE_ENTRYPOINT", + "OPENCLAW_BUNDLE_ROOT", "OPENCLAW_RUNTIME_ROLE", "HERMES_HOME", + "CODEX_SANDBOX", "VSCODE_BRAND", "__CFBundleIdentifier", + } { + t.Setenv(k, "") + } headers := resolveIdentityHeaders() - if got := headers["x-dingtalk-dws-agent-code"]; got != "" { - t.Fatalf("x-dingtalk-dws-agent-code = %q, want empty because reversed env is ignored", got) + // The reversed env name must never be consumed. With no canonical + // declaration and no host signature, agent_code resolves to the honest + // "custom" fallback — and crucially is NOT the reversed value. + got := headers["x-dingtalk-dws-agent-code"] + if got == "compat" { + t.Fatalf("x-dingtalk-dws-agent-code = %q, reversed env must be ignored", got) + } + if got != authpkg.AgentCodeCustom { + t.Fatalf("x-dingtalk-dws-agent-code = %q, want %q (fallback)", got, authpkg.AgentCodeCustom) } } diff --git a/internal/auth/agent_code_detect.go b/internal/auth/agent_code_detect.go new file mode 100644 index 00000000..5ffd9a65 --- /dev/null +++ b/internal/auth/agent_code_detect.go @@ -0,0 +1,161 @@ +// Copyright 2026 Alibaba Group +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// agent_code_detect.go resolves the agent_code — which agent HOST is driving +// dws (claudecode / qoder / cursor / vscode / openclaw / hermes / ...). It +// fills the x-dingtalk-dws-agent-code header for per-channel statistics. +// +// SEPARATE axis from DWS_CHANNEL / x-dws-channel (a distribution channel code); +// the two are never conflated here. +// +// Design contract — ACCURACY OVER COVERAGE, but maximize accurate coverage: +// - Prefer generalizable, host-declared signals so one rule covers a whole +// family (VSCODE_BRAND covers every VS Code fork, present and future). +// - Every per-host signature below is OBSERVED on a real host (live process +// env via `ps eww`, or the app bundle Info.plist), not guessed. +// - Anything unidentified falls back to AgentCodeCustom — never guess. +// - Deliberately NOT used: TERM_PROGRAM (reports the terminal, e.g. iTerm, +// not the agent host) and fuzzy parent-process name matching. +package auth + +import ( + "os" + "strings" +) + +// AgentCodeCustom is the honest fallback for any host we cannot identify. +const AgentCodeCustom = "custom" + +// hostSignature is a verified env fingerprint for a known agent host. EnvKeys +// match when any listed key is present and non-empty. +type hostSignature struct { + Code string + EnvKeys []string +} + +// knownSignatures: CLI / daemon agents that inject a distinctive env var, which +// the dws subprocess they spawn inherits. All verified on a real machine +// (2026-06-16) via live process env / launch env — not guessed. +var knownSignatures = []hostSignature{ + // Claude Code — verified: CLAUDECODE=1, CLAUDE_CODE_ENTRYPOINT=cli. + {Code: "claudecode", EnvKeys: []string{"CLAUDECODE", "CLAUDE_CODE_ENTRYPOINT"}}, + // OpenClaw — verified on the running daemon: OPENCLAW_BUNDLE_ROOT. + {Code: "openclaw", EnvKeys: []string{"OPENCLAW_BUNDLE_ROOT", "OPENCLAW_RUNTIME_ROLE"}}, + // Hermes — verified on the running gateway: HERMES_HOME. + {Code: "hermes", EnvKeys: []string{"HERMES_HOME"}}, + // OpenAI Codex — CODEX_SANDBOX is auto-set by Codex for the subprocesses it + // spawns (e.g. CODEX_SANDBOX=seatbelt on macOS), and Codex filters this + // CODEX_-prefixed name out of user .env to prevent spoofing — so its + // presence reliably means "running under Codex". + // Source: developers.openai.com/codex/concepts/sandboxing + {Code: "codex", EnvKeys: []string{"CODEX_SANDBOX"}}, +} + +// NOTE on coverage limits (honest, not a TODO to silently ignore): +// Most terminal agents (gemini-cli/antigravity, aider, opencode, qwen-code, +// crush, goose, kimi, amazon-q, continue, ...) expose NO reliable +// self-identifying env marker — only user-set API-key/config vars, which we +// must not key off (a user setting GEMINI_API_KEY is not "running under +// gemini"). They therefore resolve to custom unless they declare themselves. +// +// The authoritative, fully-general path to 100% coverage is the T0 declaration +// contract: a host sets DINGTALK_DWS_AGENTCODE= when it launches dws. +// That is accurate for ANY agent (present or future) on ANY OS, and is what an +// integrating host should wire up. Auto-detection (signatures / VSCODE_BRAND / +// bundle id) is a best-effort supplement for hosts that have not declared. + +// bundleIDToCode maps macOS app bundle identifiers to agent codes. The bundle +// id is exposed via __CFBundleIdentifier and inherited by child processes the +// IDE spawns (including dws), so it identifies the host even from an integrated +// terminal. Verified from each app's Info.plist (2026-06-16). Only known agent +// bundles map; everything else (iTerm, Terminal, ...) falls through to custom. +// +// macOS-only signal: __CFBundleIdentifier does not exist on Linux/Windows, so +// this map is simply a no-op there (os.Getenv returns ""). +var bundleIDToCode = map[string]string{ + "com.qoder.ide": "qoder", + "com.todesktop.230313mzl4w4u92": "cursor", // Cursor's ToDesktop bundle id + "com.microsoft.VSCode": "vscode", + "com.workbuddy.workbuddy": "workbuddy", +} + +// DetectAgentCode resolves the agent_code via a confidence ladder and returns +// the normalized code plus the signal that decided it: +// +// T0 explicit host declaration (DINGTALK_DWS_AGENTCODE — dedicated field) +// T1 verified per-agent env signature (CLI/daemon agents) +// T2 VSCODE_BRAND value (every VS Code fork declares its brand) +// T3 macOS app bundle id (known agent bundles only) +// T4 fallback -> custom (never guess) +func DetectAgentCode() (code string, signal string) { + // T0: host explicitly declares its agent_code — highest confidence. + if v, name := AgentCodeFromEnv(); v != "" { + return normalizeAgentCode(v), "env:" + name + } + + // T1: verified per-agent env signature (most specific — wins over the IDE + // it may be running inside). + for _, sig := range knownSignatures { + for _, k := range sig.EnvKeys { + if strings.TrimSpace(os.Getenv(k)) != "" { + return sig.Code, "sig:" + k + } + } + } + + // T2: VS Code fork family. The brand value IS the host's self-declaration, + // so this single rule covers Qoder/Cursor/VS Code/Windsurf/Trae/Kiro/... — + // including forks that don't exist yet. + if b := strings.TrimSpace(os.Getenv("VSCODE_BRAND")); b != "" { + return normalizeAgentCode(b), "env:VSCODE_BRAND" + } + + // T3: macOS app bundle id (known agent bundles only). + if id := strings.TrimSpace(os.Getenv("__CFBundleIdentifier")); id != "" { + if c, ok := bundleIDToCode[id]; ok { + return c, "bundle:" + id + } + } + + // T4: unknown host — honest fallback, no guessing. + return AgentCodeCustom, "fallback" +} + +// normalizeAgentCode maps host-declared names/brands to canonical agent_code +// values. Unrecognized but non-empty input is lowercased, space-stripped and +// kept as-is — still a host declaration, so still accurate (this is what gives +// automatic coverage of new VS Code forks via VSCODE_BRAND). +func normalizeAgentCode(raw string) string { + s := strings.ToLower(strings.TrimSpace(raw)) + s = strings.ReplaceAll(s, " ", "") + switch s { + case "": + return AgentCodeCustom + case "claude", "claude-code", "claude_code", "claudecode": + return "claudecode" + case "qoder", "qoderwork": + return "qoder" + case "workbuddy", "work-buddy": + return "workbuddy" + case "visualstudiocode", "code", "code-oss", "vscode": + return "vscode" + case "cursor": + return "cursor" + case "windsurf": + return "windsurf" + case "trae", "traecn": + return "trae" + default: + return s + } +} diff --git a/internal/auth/agent_code_detect_test.go b/internal/auth/agent_code_detect_test.go new file mode 100644 index 00000000..0c1274ca --- /dev/null +++ b/internal/auth/agent_code_detect_test.go @@ -0,0 +1,187 @@ +// Copyright 2026 Alibaba Group +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "strings" + "testing" +) + +// agentCodeSignalEnvs is every env DetectAgentCode consults. Tests clear them +// all so each case starts clean (the suite itself runs under a real host). +var agentCodeSignalEnvs = []string{ + AgentCodeEnv, + "CLAUDECODE", "CLAUDE_CODE_ENTRYPOINT", + "OPENCLAW_BUNDLE_ROOT", "OPENCLAW_RUNTIME_ROLE", + "HERMES_HOME", "CODEX_SANDBOX", + "VSCODE_BRAND", "__CFBundleIdentifier", + "TERM_PROGRAM", "DWS_CHANNEL", +} + +func clearAgentCodeEnv(t *testing.T) { + t.Helper() + for _, k := range agentCodeSignalEnvs { + t.Setenv(k, "") + } +} + +func TestDetectAgentCode_HostDeclaration_T0(t *testing.T) { + clearAgentCodeEnv(t) + t.Setenv(AgentCodeEnv, "Qoder") + code, sig := DetectAgentCode() + if code != "qoder" { + t.Fatalf("want qoder, got %q", code) + } + if !strings.HasPrefix(sig, "env:"+AgentCodeEnv) { + t.Fatalf("want env signal, got %q", sig) + } +} + +func TestDetectAgentCode_VerifiedSignatures_T1(t *testing.T) { + cases := []struct { + env, val, want string + }{ + {"CLAUDECODE", "1", "claudecode"}, + {"CLAUDE_CODE_ENTRYPOINT", "cli", "claudecode"}, + {"OPENCLAW_BUNDLE_ROOT", "/Users/x/.openclaw-bundle", "openclaw"}, + {"HERMES_HOME", "/Users/x/.hermes", "hermes"}, + {"CODEX_SANDBOX", "seatbelt", "codex"}, + } + for _, c := range cases { + t.Run(c.env, func(t *testing.T) { + clearAgentCodeEnv(t) + t.Setenv(c.env, c.val) + code, sig := DetectAgentCode() + if code != c.want { + t.Fatalf("%s=%s: want %q, got %q", c.env, c.val, c.want, code) + } + if !strings.HasPrefix(sig, "sig:") { + t.Fatalf("want sig:* signal, got %q", sig) + } + }) + } +} + +func TestDetectAgentCode_VSCodeBrand_T2(t *testing.T) { + cases := map[string]string{ + "Qoder": "qoder", + "Cursor": "cursor", + "Visual Studio Code": "vscode", + "Windsurf": "windsurf", + "Trae": "trae", + "SomeNewFork": "somenewfork", // generic coverage of future forks + } + for brand, want := range cases { + t.Run(brand, func(t *testing.T) { + clearAgentCodeEnv(t) + t.Setenv("VSCODE_BRAND", brand) + code, sig := DetectAgentCode() + if code != want { + t.Fatalf("VSCODE_BRAND=%q: want %q, got %q", brand, want, code) + } + if sig != "env:VSCODE_BRAND" { + t.Fatalf("want env:VSCODE_BRAND signal, got %q", sig) + } + }) + } +} + +func TestDetectAgentCode_BundleID_T3(t *testing.T) { + cases := map[string]string{ + "com.qoder.ide": "qoder", + "com.todesktop.230313mzl4w4u92": "cursor", + "com.microsoft.VSCode": "vscode", + "com.workbuddy.workbuddy": "workbuddy", + } + for id, want := range cases { + t.Run(id, func(t *testing.T) { + clearAgentCodeEnv(t) + t.Setenv("__CFBundleIdentifier", id) + code, sig := DetectAgentCode() + if code != want { + t.Fatalf("bundle %q: want %q, got %q", id, want, code) + } + if !strings.HasPrefix(sig, "bundle:") { + t.Fatalf("want bundle:* signal, got %q", sig) + } + }) + } +} + +// An unknown bundle id (e.g. a plain terminal) must NOT be labeled — falls to +// custom. +func TestDetectAgentCode_UnknownBundleIsCustom(t *testing.T) { + clearAgentCodeEnv(t) + t.Setenv("__CFBundleIdentifier", "com.googlecode.iterm2") + code, _ := DetectAgentCode() + if code != AgentCodeCustom { + t.Fatalf("unknown bundle must be custom, got %q", code) + } +} + +func TestDetectAgentCode_Fallback_Custom(t *testing.T) { + clearAgentCodeEnv(t) + code, sig := DetectAgentCode() + if code != AgentCodeCustom { + t.Fatalf("want custom, got %q", code) + } + if sig != "fallback" { + t.Fatalf("want fallback, got %q", sig) + } +} + +// TERM_PROGRAM and DWS_CHANNEL must never decide agent_code. +func TestDetectAgentCode_IgnoresNoise(t *testing.T) { + clearAgentCodeEnv(t) + t.Setenv("TERM_PROGRAM", "iTerm.app") + t.Setenv("DWS_CHANNEL", "Qoderwork") + code, _ := DetectAgentCode() + if code != AgentCodeCustom { + t.Fatalf("noise must not decide agent_code; want custom, got %q", code) + } +} + +// Precedence: explicit declaration (T0) > env signature (T1) > VSCODE_BRAND +// (T2). A CLI agent running inside an IDE reports the CLI agent. +func TestDetectAgentCode_Precedence(t *testing.T) { + clearAgentCodeEnv(t) + t.Setenv("CLAUDECODE", "1") // T1 + t.Setenv("VSCODE_BRAND", "Qoder") // T2 + if code, _ := DetectAgentCode(); code != "claudecode" { + t.Fatalf("T1 must beat T2, got %q", code) + } + t.Setenv(AgentCodeEnv, "workbuddy") // T0 + if code, _ := DetectAgentCode(); code != "workbuddy" { + t.Fatalf("T0 must beat all, got %q", code) + } +} + +func TestNormalizeAgentCode(t *testing.T) { + cases := map[string]string{ + "claude": "claudecode", + "Claude-Code": "claudecode", + "CLAUDECODE": "claudecode", + "Qoderwork": "qoder", + "WorkBuddy": "workbuddy", + "Visual Studio Code": "vscode", + "Cursor": "cursor", + "": AgentCodeCustom, + "some-new-ide": "some-new-ide", + } + for in, want := range cases { + if got := normalizeAgentCode(in); got != want { + t.Errorf("normalizeAgentCode(%q) = %q, want %q", in, got, want) + } + } +} diff --git a/internal/auth/identity.go b/internal/auth/identity.go index c800610d..d93abede 100644 --- a/internal/auth/identity.go +++ b/internal/auth/identity.go @@ -13,17 +13,27 @@ // identity.go manages agent instance identification for tracking. // -// Each agent installation gets a unique agentId (UUID v4) that persists across -// version upgrades but regenerates on reinstall. This identity is transparently -// injected into MCP HTTP headers for gateway-side data collection. +// Identity has two granularities, both injected into MCP HTTP headers for +// gateway-side statistics: +// +// - machineId: a stable per-install UUID v4 (persists across upgrades, +// regenerates on reinstall). Non-PII. +// - agentId: a per-(machine × agentCode) id derived deterministically from +// machineId + agent_code, so one machine running multiple agent hosts +// (e.g. claudecode + cursor) yields a distinct, idempotent agentId per +// agent_code. Computed client-side — no gateway round-trip required. +// +// The agent_code itself is resolved by DetectAgentCode (agent_code_detect.go). package auth import ( "crypto/rand" + "crypto/sha256" "encoding/json" - "fmt" + "math/big" "os" "path/filepath" + "time" "github.com/DingTalk-Real-AI/dingtalk-workspace-cli/pkg/config" "github.com/DingTalk-Real-AI/dingtalk-workspace-cli/pkg/edition" @@ -31,14 +41,34 @@ import ( const identityFile = "identity.json" +// identityVersion is the current on-disk schema version. v1 files (no +// machineId/agents) are migrated transparently on load. +const identityVersion = 2 + +// AgentEntry records the derived agentId for a single agent_code on this +// machine. +type AgentEntry struct { + AgentID string `json:"agentId"` + FirstSeen string `json:"firstSeen,omitempty"` + Detect string `json:"detect,omitempty"` // signal that decided the agent_code +} + // Identity holds the agent instance identification fields. +// +// AgentID is retained for backward compatibility with v1 readers: on a fresh +// install it is written equal to MachineID, and a v1 file's agentId is migrated +// into MachineID on load. type Identity struct { - AgentID string `json:"agentId"` // UUID v4, generated at install time - Source string `json:"source"` // data source, default "dws" + Version int `json:"version,omitempty"` + AgentID string `json:"agentId"` // v1 install UUID; == MachineID on v2 installs + MachineID string `json:"machineId,omitempty"` // stable per-install machine seed + Source string `json:"source"` // data source, default "dws" + Agents map[string]*AgentEntry `json:"agents,omitempty"` // agent_code -> derived agentId } // Load reads the identity from /identity.json. // Returns nil if the file does not exist or cannot be parsed. +// v1 files are migrated in-memory (machineId backfilled from agentId). func Load(configDir string) *Identity { path := filepath.Join(configDir, identityFile) data, err := os.ReadFile(path) @@ -49,21 +79,43 @@ func Load(configDir string) *Identity { if err := json.Unmarshal(data, &id); err != nil { return nil } - if id.AgentID == "" { + if id.AgentID == "" && id.MachineID == "" { return nil } + id.migrate() return &id } +// migrate backfills v2 fields from a v1 file in-memory (does not persist). +func (id *Identity) migrate() { + if id.MachineID == "" { + id.MachineID = id.AgentID // v1 install UUID becomes the machine seed + } + if id.AgentID == "" { + id.AgentID = id.MachineID + } + if id.Source == "" { + id.Source = "dws" + } + if id.Agents == nil { + id.Agents = make(map[string]*AgentEntry) + } + id.Version = identityVersion +} + // EnsureExists loads existing identity or creates a new one if not present. func EnsureExists(configDir string) *Identity { if id := Load(configDir); id != nil { return id } + u := generateUUID() id := &Identity{ - AgentID: generateUUID(), - Source: "dws", + Version: identityVersion, + AgentID: u, // kept == MachineID for backward-compat + MachineID: u, + Source: "dws", + Agents: make(map[string]*AgentEntry), } // Best-effort persist — don't fail the CLI if write fails. @@ -71,14 +123,51 @@ func EnsureExists(configDir string) *Identity { return id } -// Headers returns the identity as HTTP header key-value pairs. +// machineSeed returns the stable seed used to derive per-channel agentIds. +func (id *Identity) machineSeed() string { + if id.MachineID != "" { + return id.MachineID + } + return id.AgentID +} + +// ResolveAgentID returns the per-(machine × agentCode) agentId, deriving and +// persisting it on first sight of an agentCode. Idempotent: the same machine +// and agentCode always yields the same id, which is what makes cumulative +// per-agent_code statistics possible. An empty agentCode is treated as the +// custom bucket. +func (id *Identity) ResolveAgentID(configDir, agentCode, signal string) string { + if agentCode == "" { + agentCode = AgentCodeCustom + } + if id.Agents == nil { + id.Agents = make(map[string]*AgentEntry) + } + if e, ok := id.Agents[agentCode]; ok && e.AgentID != "" { + return e.AgentID + } + aid := deriveAgentID(id.machineSeed(), agentCode) + id.Agents[agentCode] = &AgentEntry{ + AgentID: aid, + FirstSeen: time.Now().UTC().Format(time.RFC3339), + Detect: signal, + } + _ = save(configDir, id) // best-effort cache; recomputable if it fails + return aid +} + +// Headers returns the identity as static HTTP header key-value pairs. +// x-dws-agent-id carries the stable machine-level id (== v1 install UUID), kept +// continuous across versions. The per-(machine × agent_code) instance id is a +// SEPARATE header (x-dws-agent-instance-id) injected by the caller via +// ResolveAgentID — it does not override x-dws-agent-id. func (id *Identity) Headers() map[string]string { if id == nil { return nil } h := make(map[string]string, 5) - if id.AgentID != "" { - h["x-dws-agent-id"] = id.AgentID + if seed := id.machineSeed(); seed != "" { + h["x-dws-agent-id"] = seed } if id.Source != "" { h["x-dws-source"] = id.Source @@ -104,6 +193,38 @@ func save(configDir string, id *Identity) error { return os.WriteFile(filepath.Join(configDir, identityFile), data, config.FilePerm) } +// deriveAgentID computes a stable, client-side agentId for a (machine, +// agentCode) pair: dwsa_<12 base62 chars of sha256(seed|agentCode)>. +// Deterministic and idempotent; no gateway allocation needed for statistics. +func deriveAgentID(seed, agentCode string) string { + sum := sha256.Sum256([]byte(seed + "|" + agentCode)) + enc := base62Encode(sum[:]) + for len(enc) < 12 { + enc = "0" + enc + } + return "dwsa_" + enc[:12] +} + +const base62Alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + +func base62Encode(b []byte) string { + n := new(big.Int).SetBytes(b) + if n.Sign() == 0 { + return "0" + } + base := big.NewInt(62) + mod := new(big.Int) + var out []byte + for n.Sign() > 0 { + n.DivMod(n, base, mod) + out = append(out, base62Alphabet[mod.Int64()]) + } + for i, j := 0, len(out)-1; i < j; i, j = i+1, j-1 { + out[i], out[j] = out[j], out[i] + } + return string(out) +} + // generateUUID produces a UUID v4 string. func generateUUID() string { var u [16]byte @@ -113,6 +234,22 @@ func generateUUID() string { } u[6] = (u[6] & 0x0f) | 0x40 // version 4 u[8] = (u[8] & 0x3f) | 0x80 // variant 10 - return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x", - u[0:4], u[4:6], u[6:8], u[8:10], u[10:16]) + return fmtUUID(u) +} + +func fmtUUID(u [16]byte) string { + const hexdig = "0123456789abcdef" + // 8-4-4-4-12 with dashes => 36 bytes + buf := make([]byte, 36) + pos := 0 + for i := 0; i < 16; i++ { + if i == 4 || i == 6 || i == 8 || i == 10 { + buf[pos] = '-' + pos++ + } + buf[pos] = hexdig[u[i]>>4] + buf[pos+1] = hexdig[u[i]&0x0f] + pos += 2 + } + return string(buf) } diff --git a/internal/auth/identity_agentid_test.go b/internal/auth/identity_agentid_test.go new file mode 100644 index 00000000..b2f2cb54 --- /dev/null +++ b/internal/auth/identity_agentid_test.go @@ -0,0 +1,111 @@ +// Copyright 2026 Alibaba Group +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package auth + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestDeriveAgentID_Format(t *testing.T) { + id := deriveAgentID("machine-abc", "claudecode") + if !strings.HasPrefix(id, "dwsa_") { + t.Fatalf("want dwsa_ prefix, got %q", id) + } + if len(id) != len("dwsa_")+12 { + t.Fatalf("want 12 base62 chars after prefix, got %q (len %d)", id, len(id)) + } +} + +func TestDeriveAgentID_Deterministic(t *testing.T) { + a := deriveAgentID("seed", "claudecode") + b := deriveAgentID("seed", "claudecode") + if a != b { + t.Fatalf("derivation must be deterministic: %q != %q", a, b) + } +} + +func TestDeriveAgentID_DistinctByChannelAndMachine(t *testing.T) { + m1c1 := deriveAgentID("machine1", "claudecode") + m1c2 := deriveAgentID("machine1", "cursor") + m2c1 := deriveAgentID("machine2", "claudecode") + if m1c1 == m1c2 { + t.Errorf("same machine, different channel must differ: %q", m1c1) + } + if m1c1 == m2c1 { + t.Errorf("different machine, same channel must differ: %q", m1c1) + } +} + +func TestResolveAgentID_IdempotentAndPersisted(t *testing.T) { + dir := t.TempDir() + id := EnsureExists(dir) + + first := id.ResolveAgentID(dir, "claudecode", "sig:CLAUDECODE") + second := id.ResolveAgentID(dir, "claudecode", "sig:CLAUDECODE") + if first != second { + t.Fatalf("ResolveAgentID must be idempotent: %q != %q", first, second) + } + + // Reload from disk — the channel entry must have persisted. + reloaded := Load(dir) + if reloaded == nil { + t.Fatal("expected identity to persist") + } + e, ok := reloaded.Agents["claudecode"] + if !ok || e.AgentID != first { + t.Fatalf("persisted agentId mismatch: %+v", reloaded.Agents) + } + if e.Detect != "sig:CLAUDECODE" { + t.Errorf("want detect signal recorded, got %q", e.Detect) + } +} + +func TestResolveAgentID_EmptyAgentCodeGoesCustom(t *testing.T) { + dir := t.TempDir() + id := EnsureExists(dir) + got := id.ResolveAgentID(dir, "", "fallback") + want := id.ResolveAgentID(dir, AgentCodeCustom, "fallback") + if got != want { + t.Fatalf("empty agent_code must map to custom bucket: %q != %q", got, want) + } +} + +// A v1 file ({agentId, source}) must migrate: machineId backfilled from the +// legacy agentId, and per-channel derivation keyed off that stable seed. +func TestLoad_MigratesV1(t *testing.T) { + dir := t.TempDir() + v1 := `{"agentId":"504ddd36-3acf-45f6-9c1f-82f99260a419","source":"dws"}` + if err := os.WriteFile(filepath.Join(dir, identityFile), []byte(v1), 0o600); err != nil { + t.Fatal(err) + } + + id := Load(dir) + if id == nil { + t.Fatal("v1 file should load") + } + if id.MachineID != "504ddd36-3acf-45f6-9c1f-82f99260a419" { + t.Fatalf("machineId must backfill from legacy agentId, got %q", id.MachineID) + } + if id.machineSeed() != id.MachineID { + t.Fatalf("seed should be machineId, got %q", id.machineSeed()) + } + // Derivation is stable against the migrated seed. + want := deriveAgentID(id.MachineID, "claudecode") + if got := id.ResolveAgentID(dir, "claudecode", "sig:CLAUDECODE"); got != want { + t.Fatalf("post-migration derivation mismatch: %q != %q", got, want) + } +}