diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index 2b516b72d..cb712669c 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -57,10 +57,17 @@ import { type FileEnrichmentDeps, } from "../../enrichment/file-enricher"; import type { PostHogAPIConfig } from "../../types"; -import { unreachable, withTimeout } from "../../utils/common"; +import { + isCloudRun, + resolveGithubToken, + unreachable, + withTimeout, +} from "../../utils/common"; import { Logger } from "../../utils/logger"; import { Pushable } from "../../utils/streams"; import { BaseAcpAgent } from "../base-acp-agent"; +import { LOCAL_TOOLS_MCP_NAME } from "../local-tools"; +import { resolveTaskId } from "../session-meta"; import { promptToClaude } from "./conversion/acp-to-sdk"; import { handleResultMessage, @@ -69,6 +76,7 @@ import { handleUserAssistantMessage, } from "./conversion/sdk-to-acp"; import type { EnrichedReadCache } from "./hooks"; +import { createLocalToolsMcpServer } from "./mcp/local-tools"; import { fetchMcpToolMetadata, getConnectedMcpServerNames, @@ -1091,7 +1099,10 @@ export class ClaudeAcpAgent extends BaseAcpAgent { const isResume = !!resume; const meta = params._meta as NewSessionMeta | undefined; - const taskId = meta?.persistence?.taskId; + const taskId = resolveTaskId(meta); + // Gate signed-commit wiring on cloud-run detection so the desktop (which + // signs via CommitSaga) is untouched. + const cloudRun = isCloudRun(meta); const effort = meta?.claudeCode?.options?.effort as EffortLevel | undefined; // We want to create a new session id unless it is resume, @@ -1115,6 +1126,24 @@ export class ClaudeAcpAgent extends BaseAcpAgent { const mcpServers = supportsMcpInjection(earlyModelId) ? parseMcpServers(params) : {}; + + // Register the in-process general local-tools MCP server. Tools self-gate + // via the registry (e.g. signed-commit is cloud-only and needs a GH token), + // so adding a tool needs no change here. In cloud runs `git commit`/`git + // push` are blocked by the PreToolUse guard (and the sandbox git shim), so + // the agent commits via the signed-commit tool instead. + const localToolsServer = createLocalToolsMcpServer( + { cwd, token: resolveGithubToken(), taskId }, + meta, + ); + if (localToolsServer) { + mcpServers[LOCAL_TOOLS_MCP_NAME] = localToolsServer; + } else if (cloudRun) { + this.logger.warn( + "Cloud run registered no local tools — missing GH_TOKEN/GITHUB_TOKEN? signed commits unavailable", + ); + } + const systemPrompt = buildSystemPrompt(meta?.systemPrompt); if (meta?.mcpToolApprovals) { @@ -1164,6 +1193,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { effort, enrichmentDeps: this.enrichment?.deps, enrichedReadCache: this.enrichedReadCache, + cloudMode: cloudRun, }); // Use the same abort controller that buildSessionOptions gave to the query diff --git a/packages/agent/src/adapters/claude/hooks.test.ts b/packages/agent/src/adapters/claude/hooks.test.ts index 771aa4399..4bcc7e2e4 100644 --- a/packages/agent/src/adapters/claude/hooks.test.ts +++ b/packages/agent/src/adapters/claude/hooks.test.ts @@ -11,6 +11,7 @@ import { Logger } from "../../utils/logger"; import { createPreToolUseHook, createReadEnrichmentHook, + createSignedCommitGuardHook, type EnrichedReadCache, } from "./hooks"; import type { @@ -311,3 +312,56 @@ describe("createPreToolUseHook", () => { }); }); }); + +describe("createSignedCommitGuardHook", () => { + const logger = new Logger(); + + function bashInput(command: string): HookInput { + return { + session_id: "s", + transcript_path: "/tmp/t", + cwd: "/tmp", + hook_event_name: "PreToolUse", + tool_name: "Bash", + tool_use_id: "toolu_1", + tool_input: { command }, + } as HookInput; + } + + const guard = createSignedCommitGuardHook(logger); + const opts = { signal: new AbortController().signal }; + + test.each([ + "git commit -m x", + "git push origin main", + "git add . && git commit -m 'y'", + "git -C /repo commit", + "git --no-pager push", + ])("denies %s", async (command) => { + const result = await guard(bashInput(command), undefined, opts); + expect(result).toMatchObject({ + hookSpecificOutput: { permissionDecision: "deny" }, + }); + }); + + test.each([ + "git status", + "git add .", + "git fetch origin", + "git log --grep=commit", + "git stash push", + "git ls-remote --heads origin x", + ])("allows %s", async (command) => { + const result = await guard(bashInput(command), undefined, opts); + expect(result).toEqual({ continue: true }); + }); + + test("ignores non-Bash tools", async () => { + const result = await guard( + { ...bashInput("git commit"), tool_name: "Read" } as HookInput, + undefined, + opts, + ); + expect(result).toEqual({ continue: true }); + }); +}); diff --git a/packages/agent/src/adapters/claude/hooks.ts b/packages/agent/src/adapters/claude/hooks.ts index 3dc59be89..1df94d720 100644 --- a/packages/agent/src/adapters/claude/hooks.ts +++ b/packages/agent/src/adapters/claude/hooks.ts @@ -4,6 +4,7 @@ import { type FileEnrichmentDeps, } from "../../enrichment/file-enricher"; import type { Logger } from "../../utils/logger"; +import { SIGNED_COMMIT_QUALIFIED_TOOL_NAME } from "../signed-commit-shared"; import { stripCatLineNumbers } from "./conversion/sdk-to-acp"; import { extractPostHogSubTool, @@ -222,6 +223,91 @@ export const createSubagentRewriteHook = }; }; +// git global options that consume the following token as their value, so the +// subcommand detector must skip both (mirrors the sandbox `git` PATH shim). +const GIT_VALUE_FLAGS = new Set([ + "-C", + "-c", + "--git-dir", + "--work-tree", + "--namespace", + "--exec-path", +]); + +function gitSubcommand(segment: string): string | null { + const tokens = segment.trim().split(/\s+/).filter(Boolean); + if (tokens.length === 0) return null; + // Strip a leading path so `/usr/bin/git` is still recognised as git. + const head = tokens[0].split("/").pop(); + if (head !== "git") return null; + + let skipNext = false; + for (const tok of tokens.slice(1)) { + if (skipNext) { + skipNext = false; + continue; + } + if (GIT_VALUE_FLAGS.has(tok)) { + skipNext = true; + continue; + } + if (tok.startsWith("-")) continue; + return tok; + } + return null; +} + +/** + * True when any top-level shell segment of `command` is a direct `git commit` / + * `git push` invocation (allowing `git`-level global flags like `-C path` or + * `--no-pager`). Does not match subcommands such as `git stash push` or + * `git log --grep=commit`. Git reached via command substitution (`$(git push)`) + * is not caught here — the sandbox `git` PATH shim is the authoritative backstop; + * this hook is a fast in-band deny with a helpful message. + */ +function blocksUnsignedGit(command: string): boolean { + // Cheap reject for the overwhelmingly common non-git Bash call before splitting. + if (!command.includes("git")) return false; + return command.split(/&&|\|\||[;\n|]/).some((segment) => { + const sub = gitSubcommand(segment); + return sub === "commit" || sub === "push"; + }); +} + +/** + * Cloud-only guard: blocks raw `git commit` / `git push` so unsigned commits + * cannot leave the sandbox. The agent must use the `git_signed_commit` tool, + * which creates GitHub-signed (Verified) commits via the API. + */ +export const createSignedCommitGuardHook = + (logger: Logger): HookCallback => + async (input: HookInput, _toolUseID: string | undefined) => { + if (input.hook_event_name !== "PreToolUse") return { continue: true }; + if (input.tool_name !== "Bash") return { continue: true }; + + const command = (input.tool_input as { command?: string } | undefined) + ?.command; + if (!command || !blocksUnsignedGit(command)) { + return { continue: true }; + } + + logger.info( + `[SignedCommitGuard] Blocking unsigned git command: ${command}`, + ); + return { + continue: true, + hookSpecificOutput: { + hookEventName: "PreToolUse" as const, + permissionDecision: "deny" as const, + permissionDecisionReason: + "Commits must be signed: `git commit` and `git push` are disabled here. " + + "Stage changes with `git add`, then call the `git_signed_commit` tool " + + `(${SIGNED_COMMIT_QUALIFIED_TOOL_NAME}) with a \`message\` to create a signed ` + + "commit on the branch.", + }, + }; + }; + export const createPreToolUseHook = (settingsManager: SettingsManager, logger: Logger): HookCallback => async (input: HookInput, _toolUseID: string | undefined) => { diff --git a/packages/agent/src/adapters/claude/mcp/local-tools.test.ts b/packages/agent/src/adapters/claude/mcp/local-tools.test.ts new file mode 100644 index 000000000..da6e67a3f --- /dev/null +++ b/packages/agent/src/adapters/claude/mcp/local-tools.test.ts @@ -0,0 +1,50 @@ +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { createLocalToolsMcpServer } from "./local-tools"; + +describe("createLocalToolsMcpServer", () => { + const savedSandbox = process.env.IS_SANDBOX; + + beforeEach(() => { + // isCloudRun also keys off IS_SANDBOX; clear it so the meta arg is the only + // cloud signal under test. + delete process.env.IS_SANDBOX; + }); + + afterEach(() => { + if (savedSandbox === undefined) { + delete process.env.IS_SANDBOX; + } else { + process.env.IS_SANDBOX = savedSandbox; + } + }); + + it("returns undefined when no tool's gate passes (desktop run)", () => { + expect( + createLocalToolsMcpServer({ cwd: "/repo", token: "ghs_x" }, undefined), + ).toBeUndefined(); + }); + + it("exposes git_signed_commit over MCP in a cloud run with a token", async () => { + const server = createLocalToolsMcpServer( + { cwd: "/repo", token: "ghs_x" }, + { taskRunId: "run-1" }, + ); + if (!server) { + throw new Error("expected the local-tools server to be registered"); + } + expect(server.name).toBe("posthog-local"); + + const [clientTransport, serverTransport] = + InMemoryTransport.createLinkedPair(); + await server.instance.connect(serverTransport); + const client = new Client({ name: "test", version: "1.0.0" }); + await client.connect(clientTransport); + + const { tools } = await client.listTools(); + expect(tools.map((t) => t.name)).toContain("git_signed_commit"); + + await client.close(); + }); +}); diff --git a/packages/agent/src/adapters/claude/mcp/local-tools.ts b/packages/agent/src/adapters/claude/mcp/local-tools.ts new file mode 100644 index 000000000..9f6838472 --- /dev/null +++ b/packages/agent/src/adapters/claude/mcp/local-tools.ts @@ -0,0 +1,40 @@ +import { + createSdkMcpServer, + type McpSdkServerConfigWithInstance, + tool, +} from "@anthropic-ai/claude-agent-sdk"; +import { + enabledLocalTools, + LOCAL_TOOLS_MCP_NAME, + type LocalToolCtx, + type LocalToolGateMeta, +} from "../../local-tools"; + +/** + * In-process SDK MCP server exposing the enabled local tools to the Claude + * adapter (see `../../local-tools` for the registry). Returns `undefined` when + * no tool's gate passes, so the caller can skip registering an empty server. + * Registered per session in `claude-agent.ts`. + */ +export function createLocalToolsMcpServer( + ctx: LocalToolCtx, + meta: LocalToolGateMeta | undefined, +): McpSdkServerConfigWithInstance | undefined { + const tools = enabledLocalTools(ctx, meta); + if (tools.length === 0) { + return undefined; + } + return createSdkMcpServer({ + name: LOCAL_TOOLS_MCP_NAME, + version: "1.0.0", + tools: tools.map((t) => + tool( + t.name, + t.description, + t.schema, + async (args) => t.handler(ctx, args), + { alwaysLoad: t.alwaysLoad ?? false }, + ), + ), + }); +} diff --git a/packages/agent/src/adapters/claude/session/options.ts b/packages/agent/src/adapters/claude/session/options.ts index 1ad1bf255..4da0fd3af 100644 --- a/packages/agent/src/adapters/claude/session/options.ts +++ b/packages/agent/src/adapters/claude/session/options.ts @@ -17,6 +17,7 @@ import { createPostToolUseHook, createPreToolUseHook, createReadEnrichmentHook, + createSignedCommitGuardHook, createSubagentRewriteHook, type EnrichedReadCache, type OnModeChange, @@ -55,6 +56,8 @@ export interface BuildOptionsParams { effort?: EffortLevel; enrichmentDeps?: FileEnrichmentDeps; enrichedReadCache?: EnrichedReadCache; + /** Cloud task session — enables the signed-commit guard. */ + cloudMode?: boolean; } export function buildSystemPrompt( @@ -129,6 +132,7 @@ function buildHooks( enrichmentDeps: FileEnrichmentDeps | undefined, enrichedReadCache: EnrichedReadCache | undefined, registeredAgents: ReadonlySet, + cloudMode: boolean, ): Options["hooks"] { const postToolUseHooks = [createPostToolUseHook({ onModeChange })]; if (enrichmentDeps && enrichedReadCache) { @@ -137,21 +141,21 @@ function buildHooks( ); } + const preToolUseHooks = [ + createPreToolUseHook(settingsManager, logger), + createSubagentRewriteHook(logger, registeredAgents), + ]; + if (cloudMode) { + preToolUseHooks.push(createSignedCommitGuardHook(logger)); + } + return { ...userHooks, PostToolUse: [ ...(userHooks?.PostToolUse || []), { hooks: postToolUseHooks }, ], - PreToolUse: [ - ...(userHooks?.PreToolUse || []), - { - hooks: [ - createPreToolUseHook(settingsManager, logger), - createSubagentRewriteHook(logger, registeredAgents), - ], - }, - ], + PreToolUse: [...(userHooks?.PreToolUse || []), { hooks: preToolUseHooks }], }; } @@ -352,6 +356,7 @@ export function buildSessionOptions(params: BuildOptionsParams): Options { params.enrichmentDeps, params.enrichedReadCache, registeredAgentNames, + params.cloudMode ?? false, ), outputFormat: params.outputFormat, abortController: getAbortController( diff --git a/packages/agent/src/adapters/claude/types.ts b/packages/agent/src/adapters/claude/types.ts index 1898a2360..f33e35c93 100644 --- a/packages/agent/src/adapters/claude/types.ts +++ b/packages/agent/src/adapters/claude/types.ts @@ -120,6 +120,7 @@ export type SDKMessageFilter = { export type NewSessionMeta = { taskRunId?: string; + taskId?: string; disableBuiltInTools?: boolean; systemPrompt?: unknown; sessionId?: string; diff --git a/packages/agent/src/adapters/codex/codex-agent.ts b/packages/agent/src/adapters/codex/codex-agent.ts index 4313d29c0..1e362151e 100644 --- a/packages/agent/src/adapters/codex/codex-agent.ts +++ b/packages/agent/src/adapters/codex/codex-agent.ts @@ -38,6 +38,7 @@ import { type SetSessionModeRequest, type SetSessionModeResponse, } from "@agentclientprotocol/sdk"; +import { ghTokenEnv } from "@posthog/git/signed-commit"; import packageJson from "../../../package.json" with { type: "json" }; import { isMethod, @@ -56,6 +57,7 @@ import { type PermissionMode, } from "../../execution-mode"; import type { PostHogAPIConfig, ProcessSpawnedCallback } from "../../types"; +import { isCloudRun, resolveGithubToken } from "../../utils/common"; import { Logger } from "../../utils/logger"; import { nodeReadableToWebReadable, @@ -63,6 +65,12 @@ import { } from "../../utils/streams"; import { BaseAcpAgent, type BaseSession } from "../base-acp-agent"; import { classifyAgentError } from "../error-classification"; +import { + enabledLocalTools, + LOCAL_TOOLS_MCP_NAME, + type LocalToolCtx, +} from "../local-tools"; +import { resolveTaskId } from "../session-meta"; import { createCodexClient } from "./codex-client"; import { normalizeCodexConfigOptions } from "./models"; import { @@ -193,8 +201,7 @@ const STRUCTURED_OUTPUT_INSTRUCTIONS = `\n\nWhen you have completed the task, ca * harness/bin.js, etc), `import.meta.dirname` sits at different depths. Walk * up until we find the script so each bundle locates the shared dist asset. */ -function resolveStructuredOutputMcpScript(): string { - const rel = "adapters/codex/structured-output-mcp-server.js"; +function resolveBundledMcpScript(rel: string): string { let dir = import.meta.dirname ?? __dirname; for (let i = 0; i < 5; i++) { const candidate = resolvePath(dir, rel); @@ -209,7 +216,9 @@ function resolveStructuredOutputMcpScript(): string { function buildStructuredOutputMcpServer( jsonSchema: Record, ): McpServerStdio { - const scriptPath = resolveStructuredOutputMcpScript(); + const scriptPath = resolveBundledMcpScript( + "adapters/codex/structured-output-mcp-server.js", + ); const schemaBase64 = Buffer.from(JSON.stringify(jsonSchema)).toString( "base64", ); @@ -221,6 +230,41 @@ function buildStructuredOutputMcpServer( }; } +/** + * Builds the stdio MCP server config exposing the enabled local tools. Context + * (cwd, taskId, token) and the enabled tool names are passed base64/CSV-encoded + * so the child registers the same tools the Claude adapter exposes in-process. + */ +function buildLocalToolsMcpServer( + ctx: LocalToolCtx, + enabledNames: string[], +): McpServerStdio { + const scriptPath = resolveBundledMcpScript( + "adapters/codex/local-tools-mcp-server.js", + ); + const ctxBase64 = Buffer.from(JSON.stringify(ctx)).toString("base64"); + const env = [ + { name: "POSTHOG_LOCAL_TOOLS_CTX", value: ctxBase64 }, + { name: "POSTHOG_LOCAL_TOOLS_ENABLED", value: enabledNames.join(",") }, + ]; + if (ctx.token) { + // Token also on the child env so its own git remote ops (fetch/ls-remote) + // authenticate; the var names come from the single shared source. + env.push( + ...Object.entries(ghTokenEnv(ctx.token)).map(([name, value]) => ({ + name, + value, + })), + ); + } + return { + name: LOCAL_TOOLS_MCP_NAME, + command: process.execPath, + args: [scriptPath], + env, + }; +} + export class CodexAcpAgent extends BaseAcpAgent { readonly adapterName = "codex"; declare session: CodexSession; @@ -338,7 +382,10 @@ export class CodexAcpAgent extends BaseAcpAgent { const meta = params._meta as NewSessionMeta | undefined; const requestedPermissionMode = toCodexPermissionMode(meta?.permissionMode); - const injectedParams = this.applyStructuredOutput(params, meta); + const injectedParams = this.applyLocalTools( + this.applyStructuredOutput(params, meta), + meta, + ); const response = await this.codexConnection.newSession(injectedParams); response.configOptions = normalizeCodexConfigOptions( response.configOptions, @@ -347,7 +394,7 @@ export class CodexAcpAgent extends BaseAcpAgent { // Initialize session state this.sessionState = createSessionState(response.sessionId, params.cwd, { taskRunId: meta?.taskRunId, - taskId: meta?.taskId ?? meta?.persistence?.taskId, + taskId: resolveTaskId(meta), modeId: response.modes?.currentModeId ?? "auto", modelId: response.models?.currentModelId, permissionMode: requestedPermissionMode, @@ -380,7 +427,10 @@ export class CodexAcpAgent extends BaseAcpAgent { async loadSession(params: LoadSessionRequest): Promise { const meta = params._meta as NewSessionMeta | undefined; - const injectedParams = this.applyStructuredOutput(params, meta); + const injectedParams = this.applyLocalTools( + this.applyStructuredOutput(params, meta), + meta, + ); const response = await this.codexConnection.loadSession(injectedParams); response.configOptions = normalizeCodexConfigOptions( response.configOptions, @@ -396,7 +446,7 @@ export class CodexAcpAgent extends BaseAcpAgent { // not, which silently broke task-completion tracking on re-attach. this.sessionState = createSessionState(params.sessionId, params.cwd, { taskRunId: meta?.taskRunId, - taskId: meta?.taskId ?? meta?.persistence?.taskId, + taskId: resolveTaskId(meta), modeId: response.modes?.currentModeId ?? "auto", permissionMode: currentPermissionMode, }); @@ -418,13 +468,16 @@ export class CodexAcpAgent extends BaseAcpAgent { params: ResumeSessionRequest, ): Promise { const meta = params._meta as NewSessionMeta | undefined; - const injectedParams = this.applyStructuredOutput( - { - sessionId: params.sessionId, - cwd: params.cwd, - mcpServers: params.mcpServers ?? [], - _meta: params._meta, - }, + const injectedParams = this.applyLocalTools( + this.applyStructuredOutput( + { + sessionId: params.sessionId, + cwd: params.cwd, + mcpServers: params.mcpServers ?? [], + _meta: params._meta, + }, + meta, + ), meta, ); @@ -439,7 +492,7 @@ export class CodexAcpAgent extends BaseAcpAgent { ); this.sessionState = createSessionState(params.sessionId, params.cwd, { taskRunId: meta?.taskRunId, - taskId: meta?.taskId ?? meta?.persistence?.taskId, + taskId: resolveTaskId(meta), modeId: loadResponse.modes?.currentModeId ?? "auto", permissionMode: currentPermissionMode, }); @@ -465,12 +518,15 @@ export class CodexAcpAgent extends BaseAcpAgent { params: ForkSessionRequest, ): Promise { const meta = params._meta as NewSessionMeta | undefined; - const injectedParams = this.applyStructuredOutput( - { - cwd: params.cwd, - mcpServers: params.mcpServers ?? [], - _meta: params._meta, - }, + const injectedParams = this.applyLocalTools( + this.applyStructuredOutput( + { + cwd: params.cwd, + mcpServers: params.mcpServers ?? [], + _meta: params._meta, + }, + meta, + ), meta, ); @@ -483,7 +539,7 @@ export class CodexAcpAgent extends BaseAcpAgent { const requestedPermissionMode = toCodexPermissionMode(meta?.permissionMode); this.sessionState = createSessionState(newResponse.sessionId, params.cwd, { taskRunId: meta?.taskRunId, - taskId: meta?.taskId ?? meta?.persistence?.taskId, + taskId: resolveTaskId(meta), modeId: newResponse.modes?.currentModeId ?? "auto", permissionMode: requestedPermissionMode, }); @@ -531,6 +587,45 @@ export class CodexAcpAgent extends BaseAcpAgent { }; } + /** + * Injects the stdio general local-tools MCP server. Tools self-gate via the + * registry (e.g. signed-commit is cloud-only and needs a GH token), so the + * server is only injected when at least one tool's gate passes. Their + * instructions already live in the shared cloud system prompt, so only the + * server needs injecting here. + */ + private applyLocalTools< + T extends { cwd?: string; mcpServers?: McpServer[]; _meta?: unknown }, + >(request: T, meta: NewSessionMeta | undefined): T { + const cwd = request.cwd; + if (!cwd) { + return request; + } + const ctx: LocalToolCtx = { + cwd, + token: resolveGithubToken(), + taskId: resolveTaskId(meta), + }; + const tools = enabledLocalTools(ctx, meta); + if (tools.length === 0) { + if (isCloudRun(meta)) { + this.logger.warn( + "Cloud run registered no local tools — missing GH_TOKEN/GITHUB_TOKEN? signed commits unavailable", + ); + } + return request; + } + + const mcpServer = buildLocalToolsMcpServer( + ctx, + tools.map((t) => t.name), + ); + return { + ...request, + mcpServers: [...(request.mcpServers ?? []), mcpServer], + }; + } + private async applyInitialPermissionMode( sessionId: string, permissionMode?: string, diff --git a/packages/agent/src/adapters/codex/local-tools-mcp-server.ts b/packages/agent/src/adapters/codex/local-tools-mcp-server.ts new file mode 100644 index 000000000..397f22c20 --- /dev/null +++ b/packages/agent/src/adapters/codex/local-tools-mcp-server.ts @@ -0,0 +1,71 @@ +/** + * Standalone stdio MCP server exposing the general local tools to the Codex + * adapter. Spawned by codex-acp as an MCP server process. Reads its context + * (cwd, taskId, token) from POSTHOG_LOCAL_TOOLS_CTX and the set of tools to + * register from POSTHOG_LOCAL_TOOLS_ENABLED (both set by the parent, which has + * already evaluated each tool's gate) — then registers those registry tools, + * the same ones the Claude adapter exposes in-process. + * + * Usage: + * POSTHOG_LOCAL_TOOLS_CTX= \ + * POSTHOG_LOCAL_TOOLS_ENABLED=git_signed_commit \ + * node local-tools-mcp-server.js + */ + +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { readGithubTokenFromEnv } from "@posthog/git/signed-commit"; +import { + LOCAL_TOOLS, + LOCAL_TOOLS_MCP_NAME, + type LocalToolCtx, +} from "../local-tools"; + +function die(message: string): never { + process.stderr.write(`[local-tools-mcp-server] ${message}\n`); + process.exit(1); +} + +const ctxEnv = process.env.POSTHOG_LOCAL_TOOLS_CTX; +if (!ctxEnv) { + die("POSTHOG_LOCAL_TOOLS_CTX env var is required"); +} + +let parsed: { cwd: string; taskId?: string; token?: string }; +try { + parsed = JSON.parse(Buffer.from(ctxEnv, "base64").toString("utf-8")); +} catch (err) { + die(`Failed to parse POSTHOG_LOCAL_TOOLS_CTX as base64-encoded JSON: ${err}`); +} + +if (!parsed.cwd) { + die("POSTHOG_LOCAL_TOOLS_CTX must include cwd"); +} + +const ctx: LocalToolCtx = { + cwd: parsed.cwd, + token: parsed.token ?? readGithubTokenFromEnv(), + taskId: parsed.taskId, +}; + +const enabledNames = (process.env.POSTHOG_LOCAL_TOOLS_ENABLED ?? "") + .split(",") + .filter(Boolean); +const tools = LOCAL_TOOLS.filter((t) => enabledNames.includes(t.name)); +if (tools.length === 0) { + die("POSTHOG_LOCAL_TOOLS_ENABLED listed no known tools"); +} + +const server = new McpServer({ + name: LOCAL_TOOLS_MCP_NAME, + version: "1.0.0", +}); + +for (const t of tools) { + server.tool(t.name, t.description, t.schema, async (args) => + t.handler(ctx, args), + ); +} + +const transport = new StdioServerTransport(); +await server.connect(transport); diff --git a/packages/agent/src/adapters/local-tools/index.ts b/packages/agent/src/adapters/local-tools/index.ts new file mode 100644 index 000000000..8311d7472 --- /dev/null +++ b/packages/agent/src/adapters/local-tools/index.ts @@ -0,0 +1,22 @@ +import type { LocalTool, LocalToolCtx, LocalToolGateMeta } from "./registry"; +import { signedCommitTool } from "./tools/signed-commit"; + +export { + LOCAL_TOOLS_MCP_NAME, + type LocalTool, + type LocalToolCtx, + type LocalToolGateMeta, + type LocalToolResult, + qualifiedLocalToolName, +} from "./registry"; + +/** Every tool the general local MCP server can expose. Add new tools here. */ +export const LOCAL_TOOLS: LocalTool[] = [signedCommitTool]; + +/** Tools whose gate passes for the given context — the set to actually expose. */ +export function enabledLocalTools( + ctx: LocalToolCtx, + meta: LocalToolGateMeta | undefined, +): LocalTool[] { + return LOCAL_TOOLS.filter((t) => t.isEnabled(ctx, meta)); +} diff --git a/packages/agent/src/adapters/local-tools/registry.test.ts b/packages/agent/src/adapters/local-tools/registry.test.ts new file mode 100644 index 000000000..278b7c598 --- /dev/null +++ b/packages/agent/src/adapters/local-tools/registry.test.ts @@ -0,0 +1,57 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + enabledLocalTools, + LOCAL_TOOLS, + LOCAL_TOOLS_MCP_NAME, + qualifiedLocalToolName, +} from "./index"; + +describe("local-tools registry", () => { + const savedSandbox = process.env.IS_SANDBOX; + + beforeEach(() => { + // isCloudRun also keys off IS_SANDBOX; clear it so meta.taskRunId is the + // only cloud signal under test. + delete process.env.IS_SANDBOX; + }); + + afterEach(() => { + if (savedSandbox === undefined) { + delete process.env.IS_SANDBOX; + } else { + process.env.IS_SANDBOX = savedSandbox; + } + }); + + it("registers tools with unique names", () => { + const names = LOCAL_TOOLS.map((t) => t.name); + expect(new Set(names).size).toBe(names.length); + }); + + it("qualifies tool names under the general server", () => { + expect(qualifiedLocalToolName("git_signed_commit")).toBe( + `mcp__${LOCAL_TOOLS_MCP_NAME}__git_signed_commit`, + ); + }); + + it.each([ + { name: "cloud run with a token", taskRunId: "run-1", token: "ghs_x" }, + { name: "cloud run without a token", taskRunId: "run-1", token: undefined }, + { name: "desktop run with a token", taskRunId: undefined, token: "ghs_x" }, + { + name: "desktop run without a token", + taskRunId: undefined, + token: undefined, + }, + ])( + "exposes git_signed_commit only in $name when cloud+token", + ({ taskRunId, token }) => { + const tools = enabledLocalTools( + { cwd: "/repo", token }, + taskRunId ? { taskRunId } : undefined, + ); + const hasSignedCommit = tools.some((t) => t.name === "git_signed_commit"); + expect(hasSignedCommit).toBe(Boolean(taskRunId) && Boolean(token)); + }, + ); +}); diff --git a/packages/agent/src/adapters/local-tools/registry.ts b/packages/agent/src/adapters/local-tools/registry.ts new file mode 100644 index 000000000..1f6ba9db2 --- /dev/null +++ b/packages/agent/src/adapters/local-tools/registry.ts @@ -0,0 +1,81 @@ +import type { z } from "zod"; + +/** + * A single general-purpose local MCP server hosts every tool registered here, + * for both adapters: the Claude in-process SDK server and the Codex stdio + * server. Adding a tool means adding one entry to `LOCAL_TOOLS` (see + * `./index.ts`) — no per-tool server file or adapter wiring. The name appears + * in tool ids as `mcp__posthog-local__`. + */ +export const LOCAL_TOOLS_MCP_NAME = "posthog-local"; + +/** Runtime context handed to every local tool's handler and gate. */ +export interface LocalToolCtx { + cwd: string; + /** GitHub token available to the sandbox, if any. */ + token?: string; + taskId?: string; +} + +/** Minimal session-meta shape needed to gate tools (e.g. cloud-only). */ +export interface LocalToolGateMeta { + taskRunId?: string; +} + +/** + * MCP tool result shape. Carries an open index signature so the value is + * assignable to either SDK's `CallToolResult` (the Claude SDK and the MCP SDK + * both attach an open `_meta`). + */ +export interface LocalToolResult { + content: { type: "text"; text: string }[]; + isError?: true; + [key: string]: unknown; +} + +/** Tool definition with its input schema's type preserved for the handler. */ +export interface LocalToolDef { + name: string; + description: string; + schema: S; + /** + * Keep the tool visible even though MCP tools are offloaded behind ToolSearch + * by default in the Claude adapter (ENABLE_TOOL_SEARCH). Ignored by Codex. + */ + alwaysLoad?: boolean; + isEnabled(ctx: LocalToolCtx, meta: LocalToolGateMeta | undefined): boolean; + handler( + ctx: LocalToolCtx, + args: z.infer>, + ): Promise; +} + +/** Schema-erased tool, the shape stored in the registry array. */ +export interface LocalTool { + name: string; + description: string; + schema: z.ZodRawShape; + alwaysLoad?: boolean; + isEnabled(ctx: LocalToolCtx, meta: LocalToolGateMeta | undefined): boolean; + handler( + ctx: LocalToolCtx, + args: Record, + ): Promise; +} + +/** + * Registers a tool, preserving its schema's inferred type at the definition + * site. The returned value erases the schema generic so tools of different + * shapes can live in one array; the cast is sound because both MCP SDKs + * validate `args` against `schema` before dispatching to the handler. + */ +export function defineLocalTool( + def: LocalToolDef, +): LocalTool { + return def as unknown as LocalTool; +} + +/** The qualified tool id as the model and tool guards see it. */ +export function qualifiedLocalToolName(toolName: string): string { + return `mcp__${LOCAL_TOOLS_MCP_NAME}__${toolName}`; +} diff --git a/packages/agent/src/adapters/local-tools/tools/signed-commit.ts b/packages/agent/src/adapters/local-tools/tools/signed-commit.ts new file mode 100644 index 000000000..2a9df9680 --- /dev/null +++ b/packages/agent/src/adapters/local-tools/tools/signed-commit.ts @@ -0,0 +1,26 @@ +import { isCloudRun } from "../../../utils/common"; +import { + runSignedCommitTool, + SIGNED_COMMIT_TOOL_DESCRIPTION, + SIGNED_COMMIT_TOOL_NAME, + signedCommitToolSchema, +} from "../../signed-commit-shared"; +import { defineLocalTool } from "../registry"; + +/** + * `git_signed_commit` as a local tool. Cloud runs only, and only when a GitHub + * token is available (the commit is created via GitHub's API, which also signs + * it). Committing is core to cloud tasks, so keep it visible past ToolSearch. + */ +export const signedCommitTool = defineLocalTool({ + name: SIGNED_COMMIT_TOOL_NAME, + description: SIGNED_COMMIT_TOOL_DESCRIPTION, + schema: signedCommitToolSchema, + alwaysLoad: true, + isEnabled: (ctx, meta) => isCloudRun(meta) && !!ctx.token, + handler: (ctx, args) => + runSignedCommitTool( + { cwd: ctx.cwd, token: ctx.token ?? "", taskId: ctx.taskId }, + args, + ), +}); diff --git a/packages/agent/src/adapters/session-meta.ts b/packages/agent/src/adapters/session-meta.ts new file mode 100644 index 000000000..931c45f21 --- /dev/null +++ b/packages/agent/src/adapters/session-meta.ts @@ -0,0 +1,16 @@ +/** Minimal shape needed to resolve the effective task id from session meta. */ +interface TaskIdSource { + taskId?: string; + persistence?: { taskId?: string }; +} + +/** + * The task id can arrive directly on the session meta or nested under + * `persistence`; prefer the top-level value. Shared by the Claude and Codex + * adapters so the fallback chain stays in sync. + */ +export function resolveTaskId( + meta: TaskIdSource | undefined, +): string | undefined { + return meta?.taskId ?? meta?.persistence?.taskId; +} diff --git a/packages/agent/src/adapters/signed-commit-shared.ts b/packages/agent/src/adapters/signed-commit-shared.ts new file mode 100644 index 000000000..d1014031a --- /dev/null +++ b/packages/agent/src/adapters/signed-commit-shared.ts @@ -0,0 +1,82 @@ +import { + createSignedCommit, + type SignedCommitCtx, + type SignedCommitInput, + type SignedCommitResult, +} from "@posthog/git/signed-commit"; +import { z } from "zod"; +import { qualifiedLocalToolName } from "./local-tools/registry"; + +/** + * Shared definitions for the `git_signed_commit` tool, used by the local-tools + * registry entry (which both adapters expose) so the tool name, schema, + * description, and result formatting can't drift. The qualified name also + * appears in the cloud system prompt and the PreToolUse guard message. + */ + +export const SIGNED_COMMIT_TOOL_NAME = "git_signed_commit"; +export const SIGNED_COMMIT_QUALIFIED_TOOL_NAME = qualifiedLocalToolName( + SIGNED_COMMIT_TOOL_NAME, +); + +export const SIGNED_COMMIT_TOOL_DESCRIPTION = + "Create a GitHub-signed (Verified) commit on the branch. Stage files with `git add` " + + "first (or pass `paths`), then call this instead of `git commit`/`git push` — those are " + + "blocked because all commits must be signed. The commit is created via GitHub's API and " + + "your local checkout is kept in sync. For a new branch, pass `branch` (prefixed with " + + "`posthog-code/`) and the tool creates it on the remote."; + +export const signedCommitToolSchema = { + message: z.string().describe("Commit headline (first line)."), + body: z.string().optional().describe("Optional extended commit body."), + branch: z + .string() + .optional() + .describe( + "Target branch; defaults to the current branch. Use a posthog-code/ prefix for new branches.", + ), + paths: z + .array(z.string()) + .optional() + .describe( + "Files to stage before committing; defaults to already-staged files.", + ), +}; + +export function formatSignedCommitResult(result: SignedCommitResult): string { + const list = result.commits.map((c) => `- ${c.sha} ${c.url}`).join("\n"); + return `Created ${result.commits.length} signed commit(s) on ${result.branch}:\n${list}`; +} + +export interface SignedCommitToolResult { + content: { type: "text"; text: string }[]; + isError?: true; + // Both SDKs' CallToolResult carries an open `_meta`/index signature; mirror it + // so this shape is assignable to either adapter's tool-handler return type. + [key: string]: unknown; +} + +/** + * Runs `git_signed_commit` and formats the MCP result. Shared by the Claude + * in-process tool and the Codex stdio server so success/error formatting (and + * the error-message prefix) can't drift between adapters. + */ +export async function runSignedCommitTool( + ctx: SignedCommitCtx, + args: SignedCommitInput, +): Promise { + try { + const result = await createSignedCommit(ctx, args); + return { + content: [{ type: "text", text: formatSignedCommitResult(result) }], + }; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return { + content: [ + { type: "text", text: `${SIGNED_COMMIT_TOOL_NAME} failed: ${message}` }, + ], + isError: true, + }; + } +} diff --git a/packages/agent/src/server/agent-server.test.ts b/packages/agent/src/server/agent-server.test.ts index 72784f739..171a26ba5 100644 --- a/packages/agent/src/server/agent-server.test.ts +++ b/packages/agent/src/server/agent-server.test.ts @@ -900,10 +900,8 @@ describe("AgentServer HTTP Mode", () => { expect(prompt).toContain( "gh pr checkout https://github.com/org/repo/pull/1", ); - expect(prompt).toContain( - "Stage and commit all changes with a clear commit message", - ); - expect(prompt).toContain("Push to the existing PR branch"); + expect(prompt).toContain("git_signed_commit"); + expect(prompt).toContain("Committing (signed commits required)"); expect(prompt).not.toContain("Create a draft pull request"); // Review-comment thread handling: reply + resolve expect(prompt).toContain("review thread"); diff --git a/packages/agent/src/server/agent-server.ts b/packages/agent/src/server/agent-server.ts index af4998a4a..7f2fcdb09 100644 --- a/packages/agent/src/server/agent-server.ts +++ b/packages/agent/src/server/agent-server.ts @@ -25,6 +25,7 @@ import { type AgentErrorClassification, classifyAgentError, } from "../adapters/error-classification"; +import { SIGNED_COMMIT_QUALIFIED_TOOL_NAME } from "../adapters/signed-commit-shared"; import type { PermissionMode } from "../execution-mode"; import { DEFAULT_CODEX_MODEL } from "../gateway-models"; import { HandoffCheckpointTracker } from "../handoff-checkpoint"; @@ -949,6 +950,7 @@ export class AgentServer { _meta: { sessionId: payload.run_id, taskRunId: payload.run_id, + taskId: payload.task_id, systemPrompt: sessionSystemPrompt, ...(this.config.model && { model: this.config.model }), allowedDomains: this.config.allowedDomains, @@ -1599,24 +1601,21 @@ export class AgentServer { private buildCloudSystemPrompt(prUrl?: string | null): string { const taskId = this.config.taskId; const shouldAutoCreatePr = this.shouldAutoPublishCloudChanges(); - const attributionInstructions = ` -## Attribution -Do NOT use Claude Code's default attribution (no "Co-Authored-By" trailers, no "Generated with [Claude Code]" lines). + const signedCommitInstructions = ` +## Committing (signed commits required) +Commits MUST be signed. \`git commit\` and \`git push\` are blocked in this environment. +To commit: stage your changes with \`git add\`, then call the \`git_signed_commit\` tool (full +name \`${SIGNED_COMMIT_QUALIFIED_TOOL_NAME}\`) with a \`message\` (and optional \`body\`/\`paths\`). +It creates a GitHub-signed ("Verified") commit on the branch and keeps your local checkout in +sync. To start a new branch, pass \`branch\` (prefixed with \`posthog-code/\`) — the tool creates +it on the remote for you. -If you create a commit, add the following trailers to the commit message (after a blank line at the end): +## Attribution +Do NOT add "Co-Authored-By" trailers or "Generated with [Claude Code]" lines to your +commit messages. The \`git_signed_commit\` tool automatically appends the only trailers +we want: Generated-By: PostHog Code - Task-Id: ${taskId} - -Example: -\`\`\` -git commit -m "$(cat <<'EOF' -fix: resolve login redirect loop - -Generated-By: PostHog Code -Task-Id: ${taskId} -EOF -)" -\`\`\``; + Task-Id: ${taskId}`; if (prUrl) { if (!shouldAutoCreatePr) { @@ -1630,7 +1629,7 @@ Do the requested work, but stop with local changes ready for review. Important: - Do NOT create new commits, push to the branch, or update the pull request unless the user explicitly asks. - Do NOT create a new branch or a new pull request. -${attributionInstructions} +${signedCommitInstructions} `; } @@ -1641,9 +1640,8 @@ This task already has an open pull request: ${prUrl} After completing the requested changes: 1. Check out the existing PR branch with \`gh pr checkout ${prUrl}\` -2. Stage and commit all changes with a clear commit message -3. Push to the existing PR branch -4. For every PR review comment or review thread you addressed, treat the thread as done only after BOTH of these: +2. Stage your changes with \`git add\`, then call the \`git_signed_commit\` tool with a clear \`message\` (do NOT use \`git commit\`/\`git push\` — they are blocked). This commits to the existing PR branch. +3. For every PR review comment or review thread you addressed, treat the thread as done only after BOTH of these: - Reply on the thread with a short note describing what changed (reference the commit SHA when useful) using \`gh api -X POST /repos/{owner}/{repo}/pulls/{n}/comments/{id}/replies -f body='...'\`. - Resolve the thread via the \`resolveReviewThread\` GraphQL mutation: \`gh api graphql -f query='mutation($id:ID!){resolveReviewThread(input:{threadId:$id}){thread{isResolved}}}' -f id=""\`. List unresolved threads first with \`gh api graphql -f query='{repository(owner:"",name:""){pullRequest(number:){reviewThreads(first:100){nodes{id isResolved comments(first:1){nodes{body}}}}}}}'\` so you can resolve each one you fixed. @@ -1651,7 +1649,7 @@ After completing the requested changes: Important: - Do NOT create a new branch or a new pull request. - Do NOT push fixes for review comments without replying to and resolving each related thread. -${attributionInstructions} +${signedCommitInstructions} `; } @@ -1666,7 +1664,7 @@ When the user asks for code changes: When the user explicitly asks to clone or work in a GitHub repository: - Clone the repository into /tmp/workspace/repos// using \`gh repo clone / /tmp/workspace/repos//\` - Work from inside that cloned repository for follow-up code changes -- If the user explicitly asks you to open or update a pull request, create a branch, commit the requested changes, push it, and open a draft pull request from inside the clone. Before opening the PR, check the cloned repo for a PR template at \`.github/pull_request_template.md\` (or variants; fall back to the org's \`.github\` repo via \`gh api\`) and use it as the body structure, and search for matching open issues with \`gh issue list --search\` to include \`Closes #\` / \`Refs #\` links. +- If the user explicitly asks you to open or update a pull request, create a branch, stage your changes with \`git add\` and commit them with the \`git_signed_commit\` tool (do NOT use \`git commit\`/\`git push\` — they are blocked), and open a draft pull request from inside the clone. Before opening the PR, check the cloned repo for a PR template at \`.github/pull_request_template.md\` (or variants; fall back to the org's \`.github\` repo via \`gh api\`) and use it as the body structure, and search for matching open issues with \`gh issue list --search\` to include \`Closes #\` / \`Refs #\` links. - Do NOT create branches, commits, push changes, or open pull requests unless the user explicitly asks for that`; return ` @@ -1686,7 +1684,7 @@ ${publishInstructions} Important: - Prefer using MCP tools to answer questions with real data over giving generic advice. -${attributionInstructions} +${signedCommitInstructions} `; } @@ -1698,7 +1696,7 @@ Do the requested work, but stop with local changes ready for review. Important: - Do NOT create a branch, commit, push, or open a pull request unless the user explicitly asks. -${attributionInstructions} +${signedCommitInstructions} `; } @@ -1706,14 +1704,13 @@ ${attributionInstructions} # Cloud Task Execution After completing the requested changes: -1. Create a new branch prefixed with \`posthog-code/\` (e.g. \`posthog-code/fix-login-redirect\`) based on the work done -2. Stage and commit all changes with a clear commit message -3. Push the branch to origin -4. Before opening the PR, prepare the body: +1. Pick a new branch name prefixed with \`posthog-code/\` (e.g. \`posthog-code/fix-login-redirect\`) +2. Stage your changes with \`git add\`, then call the \`git_signed_commit\` tool with \`branch\` set to that name and a clear \`message\` (do NOT use \`git commit\`/\`git push\` — they are blocked). The tool creates the branch on the remote and a signed commit on it. +3. Before opening the PR, prepare the body: - Check the repo for a PR template at \`.github/pull_request_template.md\` (also try \`.github/PULL_REQUEST_TEMPLATE.md\`, \`docs/pull_request_template.md\`, and root variants). If one exists, use its exact section headings as the PR body — do NOT fall back to a generic Summary/Test plan format. - If no repo-level template exists, check the org's \`.github\` repo via \`gh api /repos//.github/contents/.github/pull_request_template.md\` (and other common paths) and use that as a fallback. - Search for matching open issues with \`gh issue list --state open --search ''\` (derive keywords from the branch name, commits, and changed files; \`gh issue view \` to confirm relevance). For every issue this PR would resolve, include a \`Closes #\` line in the body so GitHub auto-links and auto-closes it on merge. For issues that are related but not fully resolved, use \`Refs #\` instead. -5. Create a draft pull request using \`gh pr create --draft${this.config.baseBranch ? ` --base ${this.config.baseBranch}` : ""}\` with a descriptive title and the body prepared above. Add the following footer at the end of the PR description: +4. Create a draft pull request using \`gh pr create --draft${this.config.baseBranch ? ` --base ${this.config.baseBranch}` : ""}\` with a descriptive title and the body prepared above. Add the following footer at the end of the PR description: \`\`\` --- *Created with [PostHog Code](https://posthog.com/code?ref=pr)* @@ -1721,7 +1718,7 @@ After completing the requested changes: Important: - Always create the PR as a draft. Do not ask for confirmation. -${attributionInstructions} +${signedCommitInstructions} `; } diff --git a/packages/agent/src/utils/common.ts b/packages/agent/src/utils/common.ts index 3bcf09acd..9f100f8db 100644 --- a/packages/agent/src/utils/common.ts +++ b/packages/agent/src/utils/common.ts @@ -1,3 +1,4 @@ +import { readGithubTokenFromEnv } from "@posthog/git/signed-commit"; import type { Logger } from "./logger"; /** @@ -25,6 +26,19 @@ export const IS_ROOT = export const ALLOW_BYPASS = !IS_ROOT || !!process.env.IS_SANDBOX; +/** + * A cloud sandbox run, as opposed to a local desktop session. Cloud sandboxes + * always set IS_SANDBOX and carry a taskRunId; desktop sessions have neither. + */ +export function isCloudRun(meta: { taskRunId?: string } | undefined): boolean { + return !!process.env.IS_SANDBOX || !!meta?.taskRunId; +} + +/** The GitHub token available to the sandbox, if any. */ +export function resolveGithubToken(): string | undefined { + return readGithubTokenFromEnv(); +} + export function unreachable(value: never, logger: Logger): void { let valueAsString: string; try { diff --git a/packages/agent/tsup.config.ts b/packages/agent/tsup.config.ts index ef30a1681..d17d91e4c 100644 --- a/packages/agent/tsup.config.ts +++ b/packages/agent/tsup.config.ts @@ -88,6 +88,7 @@ export default defineConfig([ "src/adapters/codex/models.ts", "src/adapters/claude/mcp/tool-metadata.ts", "src/adapters/codex/structured-output-mcp-server.ts", + "src/adapters/codex/local-tools-mcp-server.ts", "src/adapters/reasoning-effort.ts", "src/execution-mode.ts", "src/server/schemas.ts",