wusijian007 · wusijian007 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,10 +15,10 @@ jobs:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - name: Setup Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v5
         with:
           node-version: 20
           cache: npm

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -25,7 +25,7 @@ The CLI binary entry is `packages/cli/dist/index.js` (exposed as `myagent`). The
 ### Environment
 
 - `ANTHROPIC_API_KEY` — required for real model calls (`chat`, `agent`, `tui`). Read from process env or a local `.env` (parsed by `loadEnvironment` in `packages/cli/src/index.ts`; only an allow-listed set of keys is honored).
-- `ANTHROPIC_BASE_URL`, `MYAGENT_MODEL`, `MYAGENT_PERMISSION_MODE`, `MYAGENT_INPUT_USD_PER_MTOK`, `MYAGENT_OUTPUT_USD_PER_MTOK`, `MYAGENT_CACHE_WRITE_USD_PER_MTOK`, `MYAGENT_CACHE_READ_USD_PER_MTOK` — optional overrides. The two cache-rate vars feed `estimateUsageCostUsd` and surface in `myagent usage <sessionId>` once prompt caching is enabled in M1.5b.
+- `ANTHROPIC_BASE_URL`, `MYAGENT_MODEL`, `MYAGENT_PERMISSION_MODE`, `MYAGENT_INPUT_USD_PER_MTOK`, `MYAGENT_OUTPUT_USD_PER_MTOK`, `MYAGENT_CACHE_WRITE_USD_PER_MTOK`, `MYAGENT_CACHE_READ_USD_PER_MTOK` — optional overrides. Prompt caching is wired on outbound requests: the agent's system prompt is sent as a single `SystemTextBlock` with `cache_control: ephemeral`, and the tool list's last entry carries a matching marker so the whole tool block is cached. `cacheCreationInputTokens` / `cacheReadInputTokens` flow back through `ModelUsage` → `TokenUsage` → session record → `myagent usage <sessionId>` per-turn breakdown.
 - Offline tests use `FakeModel` and do not need an API key.
 - Runtime state (sessions, artifacts, profiles, tasks, fork traces, memory) is written under `.myagent/` in the cwd; gitignored.
 

diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts
@@ -46,6 +46,7 @@ import {
   type MemoryEntry,
   type SessionCompactionArchiver,
   type SessionEvent,
+  type SystemTextBlock,
   type ModelClient,
   type ModelStreamEvent,
   type PermissionDecision,
@@ -1576,6 +1577,18 @@ async function runAgentTurn(options: RunAgentTurnOptions): Promise<AgentTurnResu
           profile.addMetric("model.output_tokens", event.usage.outputTokens ?? 0, "tokens", {
             requestId: event.requestId
           });
+          profile.addMetric(
+            "model.cache_creation_input_tokens",
+            event.usage.cacheCreationInputTokens ?? 0,
+            "tokens",
+            { requestId: event.requestId }
+          );
+          profile.addMetric(
+            "model.cache_read_input_tokens",
+            event.usage.cacheReadInputTokens ?? 0,
+            "tokens",
+            { requestId: event.requestId }
+          );
           profile.addMetric("model.cost_usd", costDelta, "usd", {
             requestId: event.requestId,
             estimated: true
@@ -1638,6 +1651,16 @@ async function runAgentTurn(options: RunAgentTurnOptions): Promise<AgentTurnResu
     const finalState = getBootstrapState();
     profile.addMetric("session.input_tokens", finalState.tokenUsage.inputTokens, "tokens");
     profile.addMetric("session.output_tokens", finalState.tokenUsage.outputTokens, "tokens");
+    profile.addMetric(
+      "session.cache_creation_input_tokens",
+      finalState.tokenUsage.cacheCreationInputTokens,
+      "tokens"
+    );
+    profile.addMetric(
+      "session.cache_read_input_tokens",
+      finalState.tokenUsage.cacheReadInputTokens,
+      "tokens"
+    );
     profile.addMetric("session.cost_usd", finalState.costUsd, "usd", { estimated: true });
     await profileStore.save(profile.finish("completed")).catch(() => undefined);
     return { exitCode: 0, sessionId: bootstrap.sessionId };
@@ -1856,10 +1879,28 @@ function parseOptionalNumber(value: string | undefined): number | undefined {
   return Number.isFinite(parsed) && parsed >= 0 ? parsed : undefined;
 }
 
-function buildAgentSystemPrompt(memoryContext: string, skillContext: string): string {
-  return [READ_ONLY_AGENT_SYSTEM_PROMPT, memoryContext.trim(), skillContext.trim()]
+/**
+ * Returns the agent's system prompt as a structured block array so we
+ * can mark it as a prompt-cache breakpoint. The combined content is
+ * placed in a single text block with `cache_control: ephemeral`, which
+ * tells Anthropic to cache the entire system prompt: identical reuse
+ * across every turn of a session, since memory + skill snapshots are
+ * captured once at session start.
+ */
+function buildAgentSystemPrompt(
+  memoryContext: string,
+  skillContext: string
+): readonly SystemTextBlock[] {
+  const combined = [READ_ONLY_AGENT_SYSTEM_PROMPT, memoryContext.trim(), skillContext.trim()]
     .filter((part) => part.length > 0)
     .join("\n\n");
+  return [
+    {
+      type: "text",
+      text: combined,
+      cache_control: { type: "ephemeral" }
+    }
+  ];
 }
 
 const READ_ONLY_AGENT_SYSTEM_PROMPT = `You are myagent Week 18, a safety-first coding agent.

diff --git a/packages/cli/test/cli.test.ts b/packages/cli/test/cli.test.ts
@@ -26,6 +26,12 @@ function captureWriter() {
   };
 }
 
+function systemToText(system: string | ReadonlyArray<{ type: "text"; text: string }> | undefined): string {
+  if (system === undefined) return "";
+  if (typeof system === "string") return system;
+  return system.map((block) => block.text).join("\n\n");
+}
+
 describe("myagent cli", () => {
   it("prints version without starting agent runtime", async () => {
     const stdout = captureWriter();
@@ -150,7 +156,7 @@ describe("myagent cli", () => {
         };
       },
       async *stream(request) {
-        systems.push(request.system ?? "");
+        systems.push(systemToText(request.system));
         yield {
           type: "assistant_message",
           message: {
@@ -227,7 +233,7 @@ describe("myagent cli", () => {
             };
           },
           async *stream(request) {
-            systems.push(request.system ?? "");
+            systems.push(systemToText(request.system));
             yield {
               type: "assistant_message",
               message: { role: "assistant", content: "Use real DB integration fixtures." },
@@ -525,6 +531,48 @@ describe("myagent cli", () => {
     expect(record.events.at(-1)?.type).toBe("compact");
   });
 
+  it("sends the agent's system prompt as a structured block with cache_control", async () => {
+    const cwd = mkdtempSync(join(tmpdir(), "myagent-cli-cache-system-"));
+    let capturedSystem: unknown;
+    const stdout = captureWriter();
+    const stderr = captureWriter();
+
+    const exitCode = await runCli(["agent", "summarize", "fixture"], stdout.writer, stderr.writer, {
+      cwd,
+      env: {},
+      createModelClient: () =>
+        ({
+          async create() {
+            return {
+              message: { role: "assistant", content: "ok" },
+              requestId: "req_cache"
+            };
+          },
+          async *stream(request) {
+            capturedSystem = request.system;
+            yield {
+              type: "assistant_message",
+              message: { role: "assistant", content: "fixture summary" },
+              requestId: "req_cache"
+            };
+          }
+        }) satisfies ModelClient
+    });
+
+    expect(exitCode).toBe(0);
+    expect(stderr.text()).toBe("");
+    expect(Array.isArray(capturedSystem)).toBe(true);
+    const systemBlocks = capturedSystem as Array<{
+      type: string;
+      text: string;
+      cache_control?: { type: string };
+    }>;
+    expect(systemBlocks).toHaveLength(1);
+    expect(systemBlocks[0]?.type).toBe("text");
+    expect(systemBlocks[0]?.text).toContain("safety-first coding agent");
+    expect(systemBlocks[0]?.cache_control).toEqual({ type: "ephemeral" });
+  });
+
   it("prints per-turn token + cost breakdown via myagent usage", async () => {
     const cwd = mkdtempSync(join(tmpdir(), "myagent-cli-usage-"));
     const sessionRootDir = join(cwd, ".myagent", "sessions");

diff --git a/packages/core/src/anthropic.ts b/packages/core/src/anthropic.ts
@@ -161,8 +161,19 @@ export class AnthropicModelClient implements ModelClient {
           stop_reason?: string | null;
           partial_json?: string;
         };
-        message?: { usage?: { input_tokens?: number; output_tokens?: number } };
-        usage?: { output_tokens?: number };
+        message?: {
+          usage?: {
+            input_tokens?: number;
+            output_tokens?: number;
+            cache_creation_input_tokens?: number;
+            cache_read_input_tokens?: number;
+          };
+        };
+        usage?: {
+          output_tokens?: number;
+          cache_creation_input_tokens?: number;
+          cache_read_input_tokens?: number;
+        };
       };
 
       if (typed.type === "message_start") {
@@ -227,7 +238,11 @@ export class AnthropicModelClient implements ModelClient {
         stopReason = typed.delta?.stop_reason;
         usage = {
           ...usage,
-          outputTokens: typed.usage?.output_tokens ?? usage?.outputTokens
+          outputTokens: typed.usage?.output_tokens ?? usage?.outputTokens,
+          cacheCreationInputTokens:
+            typed.usage?.cache_creation_input_tokens ?? usage?.cacheCreationInputTokens,
+          cacheReadInputTokens:
+            typed.usage?.cache_read_input_tokens ?? usage?.cacheReadInputTokens
         };
       }
     }
@@ -403,36 +418,50 @@ function toInternalContent(
   return content;
 }
 
-function toAnthropicTools(
+export function toAnthropicTools(
   tools: readonly ModelToolDefinition[] | undefined
 ): { tools?: Array<Record<string, unknown>> } {
   if (!tools || tools.length === 0) {
     return {};
   }
 
-  return {
-    tools: tools.map((tool) => ({
-      name: tool.name,
-      description: tool.description,
-      input_schema: tool.inputSchema
-    }))
-  };
+  const mapped: Array<Record<string, unknown>> = tools.map((tool) => ({
+    name: tool.name,
+    description: tool.description,
+    input_schema: tool.inputSchema
+  }));
+  // Mark the last tool with cache_control so the entire tool list becomes
+  // a single prompt-cache breakpoint. Tools rarely change across turns,
+  // so this caches the largest stable input segment after the system
+  // prompt. The marker is harmless on uncached calls.
+  const lastIndex = mapped.length - 1;
+  mapped[lastIndex] = { ...mapped[lastIndex], cache_control: { type: "ephemeral" } };
+  return { tools: mapped };
 }
 
 function isRecord(value: unknown): value is Record<string, unknown> {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
 
-function toModelUsage(
-  usage: { input_tokens?: number; output_tokens?: number } | undefined
+export function toModelUsage(
+  usage:
+    | {
+        input_tokens?: number;
+        output_tokens?: number;
+        cache_creation_input_tokens?: number;
+        cache_read_input_tokens?: number;
+      }
+    | undefined
 ): ModelUsage | undefined {
   if (!usage) {
     return undefined;
   }
 
   return {
     inputTokens: usage.input_tokens,
-    outputTokens: usage.output_tokens
+    outputTokens: usage.output_tokens,
+    cacheCreationInputTokens: usage.cache_creation_input_tokens,
+    cacheReadInputTokens: usage.cache_read_input_tokens
   };
 }
 

diff --git a/packages/core/src/fork.ts b/packages/core/src/fork.ts
@@ -1,5 +1,6 @@
 import { createHash } from "node:crypto";
 
+import type { SystemTextBlock } from "./model.js";
 import type { Message, ToolDefinition } from "./types.js";
 
 export type ForkTrace = {
@@ -18,15 +19,25 @@ export type ForkTraceInput = {
   parentDepth: number;
   subagentType: string;
   model: string;
-  systemPrompt?: string;
+  systemPrompt?: string | readonly SystemTextBlock[];
   tools: readonly ToolDefinition[];
   prefixMessages: readonly Message[];
   directive: string;
   previous?: ForkTrace;
 };
 
+function systemPromptToHashable(systemPrompt: ForkTraceInput["systemPrompt"]): string {
+  if (systemPrompt === undefined) {
+    return "";
+  }
+  if (typeof systemPrompt === "string") {
+    return systemPrompt;
+  }
+  return systemPrompt.map((block) => block.text).join("\n\n");
+}
+
 export function createForkTrace(input: ForkTraceInput): ForkTrace {
-  const systemPromptHash = sha256(input.systemPrompt ?? "");
+  const systemPromptHash = sha256(systemPromptToHashable(input.systemPrompt));
   const toolHash = hashToolDefinitions(input.tools);
   const prefixHash = hashMessages(input.prefixMessages);
   const directiveHash = sha256(input.directive);

diff --git a/packages/core/src/model.ts b/packages/core/src/model.ts
@@ -23,11 +23,28 @@ export type ModelUsage = {
   cacheReadInputTokens?: number;
 };
 
+/**
+ * A single text block in a structured system prompt. The optional
+ * `cache_control` marker turns this block into an Anthropic prompt-cache
+ * breakpoint: the cumulative content up to and including this block is
+ * cached and reused across requests that share the same prefix.
+ */
+export type SystemTextBlock = {
+  type: "text";
+  text: string;
+  cache_control?: { type: "ephemeral" };
+};
+
 export type ModelRequest = {
   messages: readonly Message[];
   model?: string;
   maxTokens?: number;
-  system?: string;
+  /**
+   * The system prompt. A plain string preserves the legacy flat form
+   * (no caching). An array of `SystemTextBlock`s enables structured
+   * caching when at least one block carries `cache_control`.
+   */
+  system?: string | readonly SystemTextBlock[];
   requestId?: string;
   timeoutMs?: number;
   signal?: AbortSignal;

diff --git a/packages/core/src/query.ts b/packages/core/src/query.ts
@@ -10,7 +10,8 @@ import {
   type ModelClient,
   type ModelErrorKind,
   type ModelStreamEvent,
-  type ModelUsage
+  type ModelUsage,
+  type SystemTextBlock
 } from "./model.js";
 import { executeToolBatch, partitionToolCalls } from "./scheduler.js";
 import { toModelToolDefinition } from "./tool.js";
@@ -32,7 +33,7 @@ export type QueryOptions = {
   initialMessages: readonly Message[];
   tools: readonly ToolDefinition[];
   toolContext: ToolContext;
-  system?: string;
+  system?: string | readonly SystemTextBlock[];
   modelName?: string;
   maxTokens?: number;
   maxTurns?: number;
@@ -270,7 +271,7 @@ type CollectModelTurnWithRetryOptions = {
   messages: readonly Message[];
   modelName: string;
   maxTokens: number;
-  system?: string;
+  system?: string | readonly SystemTextBlock[];
   signal?: AbortSignal;
   tools: readonly ModelToolDefinition[];
   contextBudgetTokens: number;

diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts
@@ -1,5 +1,5 @@
 import type { z } from "zod";
-import type { ModelClient, ModelUsage } from "./model.js";
+import type { ModelClient, ModelUsage, SystemTextBlock } from "./model.js";
 import type { ForkTrace } from "./fork.js";
 import type { ProfileRecorder } from "./profile.js";
 import type { TaskStore } from "./task.js";
@@ -78,7 +78,7 @@ export type ToolContext = {
   model?: ModelClient;
   modelName?: string;
   maxTokens?: number;
-  system?: string;
+  system?: string | readonly SystemTextBlock[];
   parentMessages?: readonly Message[];
   tools?: readonly ToolDefinition[];
   taskStore?: TaskStore;

diff --git a/packages/core/test/security/README.md b/packages/core/test/security/README.md
@@ -73,6 +73,16 @@ Tests live in two trees because of the package boundary
 | `executeToolBatch` never overlaps two non-concurrency-safe tools | `packages/core/test/security/scheduler-write-serialization.test.ts` |
 | Sibling read tools cancel when a Bash sibling errors with cancel-on-error | `packages/core/test/scheduler.test.ts` |
 
+### Prompt caching plumbing
+
+| Invariant | Test |
+|---|---|
+| `toAnthropicTools` marks the *last* tool with `cache_control: { type: "ephemeral" }` so the full tool list becomes a single cache breakpoint | `packages/core/test/security/prompt-caching.test.ts` |
+| `toAnthropicTools` returns `{}` (no tools, no spurious cache marker) on an empty/undefined input | `packages/core/test/security/prompt-caching.test.ts` |
+| `toModelUsage` extracts `cache_creation_input_tokens` + `cache_read_input_tokens` when the SDK provides them | `packages/core/test/security/prompt-caching.test.ts` |
+| `toModelUsage` leaves cache fields `undefined` on non-cached turns (the SDK omits them) | `packages/core/test/security/prompt-caching.test.ts` |
+| The agent's outbound `request.system` is a `SystemTextBlock[]` (not a string) with `cache_control: ephemeral` on the block | `packages/cli/test/cli.test.ts` |
+
 ### Cache token accounting
 
 | Invariant | Test |