Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ jobs:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Setup Node.js
uses: actions/setup-node@v4
uses: actions/setup-node@v5
with:
node-version: 20
cache: npm
Expand Down
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The CLI binary entry is `packages/cli/dist/index.js` (exposed as `myagent`). The
### Environment

- `ANTHROPIC_API_KEY` — required for real model calls (`chat`, `agent`, `tui`). Read from process env or a local `.env` (parsed by `loadEnvironment` in `packages/cli/src/index.ts`; only an allow-listed set of keys is honored).
- `ANTHROPIC_BASE_URL`, `MYAGENT_MODEL`, `MYAGENT_PERMISSION_MODE`, `MYAGENT_INPUT_USD_PER_MTOK`, `MYAGENT_OUTPUT_USD_PER_MTOK`, `MYAGENT_CACHE_WRITE_USD_PER_MTOK`, `MYAGENT_CACHE_READ_USD_PER_MTOK` — optional overrides. The two cache-rate vars feed `estimateUsageCostUsd` and surface in `myagent usage <sessionId>` once prompt caching is enabled in M1.5b.
- `ANTHROPIC_BASE_URL`, `MYAGENT_MODEL`, `MYAGENT_PERMISSION_MODE`, `MYAGENT_INPUT_USD_PER_MTOK`, `MYAGENT_OUTPUT_USD_PER_MTOK`, `MYAGENT_CACHE_WRITE_USD_PER_MTOK`, `MYAGENT_CACHE_READ_USD_PER_MTOK` — optional overrides. Prompt caching is wired on outbound requests: the agent's system prompt is sent as a single `SystemTextBlock` with `cache_control: ephemeral`, and the tool list's last entry carries a matching marker so the whole tool block is cached. `cacheCreationInputTokens` / `cacheReadInputTokens` flow back through `ModelUsage` → `TokenUsage` → session record → `myagent usage <sessionId>` per-turn breakdown.
- Offline tests use `FakeModel` and do not need an API key.
- Runtime state (sessions, artifacts, profiles, tasks, fork traces, memory) is written under `.myagent/` in the cwd; gitignored.

Expand Down
45 changes: 43 additions & 2 deletions packages/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import {
type MemoryEntry,
type SessionCompactionArchiver,
type SessionEvent,
type SystemTextBlock,
type ModelClient,
type ModelStreamEvent,
type PermissionDecision,
Expand Down Expand Up @@ -1576,6 +1577,18 @@ async function runAgentTurn(options: RunAgentTurnOptions): Promise<AgentTurnResu
profile.addMetric("model.output_tokens", event.usage.outputTokens ?? 0, "tokens", {
requestId: event.requestId
});
profile.addMetric(
"model.cache_creation_input_tokens",
event.usage.cacheCreationInputTokens ?? 0,
"tokens",
{ requestId: event.requestId }
);
profile.addMetric(
"model.cache_read_input_tokens",
event.usage.cacheReadInputTokens ?? 0,
"tokens",
{ requestId: event.requestId }
);
profile.addMetric("model.cost_usd", costDelta, "usd", {
requestId: event.requestId,
estimated: true
Expand Down Expand Up @@ -1638,6 +1651,16 @@ async function runAgentTurn(options: RunAgentTurnOptions): Promise<AgentTurnResu
const finalState = getBootstrapState();
profile.addMetric("session.input_tokens", finalState.tokenUsage.inputTokens, "tokens");
profile.addMetric("session.output_tokens", finalState.tokenUsage.outputTokens, "tokens");
profile.addMetric(
"session.cache_creation_input_tokens",
finalState.tokenUsage.cacheCreationInputTokens,
"tokens"
);
profile.addMetric(
"session.cache_read_input_tokens",
finalState.tokenUsage.cacheReadInputTokens,
"tokens"
);
profile.addMetric("session.cost_usd", finalState.costUsd, "usd", { estimated: true });
await profileStore.save(profile.finish("completed")).catch(() => undefined);
return { exitCode: 0, sessionId: bootstrap.sessionId };
Expand Down Expand Up @@ -1856,10 +1879,28 @@ function parseOptionalNumber(value: string | undefined): number | undefined {
return Number.isFinite(parsed) && parsed >= 0 ? parsed : undefined;
}

function buildAgentSystemPrompt(memoryContext: string, skillContext: string): string {
return [READ_ONLY_AGENT_SYSTEM_PROMPT, memoryContext.trim(), skillContext.trim()]
/**
* Returns the agent's system prompt as a structured block array so we
* can mark it as a prompt-cache breakpoint. The combined content is
* placed in a single text block with `cache_control: ephemeral`, which
* tells Anthropic to cache the entire system prompt: identical reuse
* across every turn of a session, since memory + skill snapshots are
* captured once at session start.
*/
function buildAgentSystemPrompt(
memoryContext: string,
skillContext: string
): readonly SystemTextBlock[] {
const combined = [READ_ONLY_AGENT_SYSTEM_PROMPT, memoryContext.trim(), skillContext.trim()]
.filter((part) => part.length > 0)
.join("\n\n");
return [
{
type: "text",
text: combined,
cache_control: { type: "ephemeral" }
}
];
}

const READ_ONLY_AGENT_SYSTEM_PROMPT = `You are myagent Week 18, a safety-first coding agent.
Expand Down
52 changes: 50 additions & 2 deletions packages/cli/test/cli.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ function captureWriter() {
};
}

function systemToText(system: string | ReadonlyArray<{ type: "text"; text: string }> | undefined): string {
if (system === undefined) return "";
if (typeof system === "string") return system;
return system.map((block) => block.text).join("\n\n");
}

describe("myagent cli", () => {
it("prints version without starting agent runtime", async () => {
const stdout = captureWriter();
Expand Down Expand Up @@ -150,7 +156,7 @@ describe("myagent cli", () => {
};
},
async *stream(request) {
systems.push(request.system ?? "");
systems.push(systemToText(request.system));
yield {
type: "assistant_message",
message: {
Expand Down Expand Up @@ -227,7 +233,7 @@ describe("myagent cli", () => {
};
},
async *stream(request) {
systems.push(request.system ?? "");
systems.push(systemToText(request.system));
yield {
type: "assistant_message",
message: { role: "assistant", content: "Use real DB integration fixtures." },
Expand Down Expand Up @@ -525,6 +531,48 @@ describe("myagent cli", () => {
expect(record.events.at(-1)?.type).toBe("compact");
});

it("sends the agent's system prompt as a structured block with cache_control", async () => {
const cwd = mkdtempSync(join(tmpdir(), "myagent-cli-cache-system-"));
let capturedSystem: unknown;
const stdout = captureWriter();
const stderr = captureWriter();

const exitCode = await runCli(["agent", "summarize", "fixture"], stdout.writer, stderr.writer, {
cwd,
env: {},
createModelClient: () =>
({
async create() {
return {
message: { role: "assistant", content: "ok" },
requestId: "req_cache"
};
},
async *stream(request) {
capturedSystem = request.system;
yield {
type: "assistant_message",
message: { role: "assistant", content: "fixture summary" },
requestId: "req_cache"
};
}
}) satisfies ModelClient
});

expect(exitCode).toBe(0);
expect(stderr.text()).toBe("");
expect(Array.isArray(capturedSystem)).toBe(true);
const systemBlocks = capturedSystem as Array<{
type: string;
text: string;
cache_control?: { type: string };
}>;
expect(systemBlocks).toHaveLength(1);
expect(systemBlocks[0]?.type).toBe("text");
expect(systemBlocks[0]?.text).toContain("safety-first coding agent");
expect(systemBlocks[0]?.cache_control).toEqual({ type: "ephemeral" });
});

it("prints per-turn token + cost breakdown via myagent usage", async () => {
const cwd = mkdtempSync(join(tmpdir(), "myagent-cli-usage-"));
const sessionRootDir = join(cwd, ".myagent", "sessions");
Expand Down
57 changes: 43 additions & 14 deletions packages/core/src/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,19 @@ export class AnthropicModelClient implements ModelClient {
stop_reason?: string | null;
partial_json?: string;
};
message?: { usage?: { input_tokens?: number; output_tokens?: number } };
usage?: { output_tokens?: number };
message?: {
usage?: {
input_tokens?: number;
output_tokens?: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
};
};
usage?: {
output_tokens?: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
};
};

if (typed.type === "message_start") {
Expand Down Expand Up @@ -227,7 +238,11 @@ export class AnthropicModelClient implements ModelClient {
stopReason = typed.delta?.stop_reason;
usage = {
...usage,
outputTokens: typed.usage?.output_tokens ?? usage?.outputTokens
outputTokens: typed.usage?.output_tokens ?? usage?.outputTokens,
cacheCreationInputTokens:
typed.usage?.cache_creation_input_tokens ?? usage?.cacheCreationInputTokens,
cacheReadInputTokens:
typed.usage?.cache_read_input_tokens ?? usage?.cacheReadInputTokens
};
}
}
Expand Down Expand Up @@ -403,36 +418,50 @@ function toInternalContent(
return content;
}

function toAnthropicTools(
export function toAnthropicTools(
tools: readonly ModelToolDefinition[] | undefined
): { tools?: Array<Record<string, unknown>> } {
if (!tools || tools.length === 0) {
return {};
}

return {
tools: tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: tool.inputSchema
}))
};
const mapped: Array<Record<string, unknown>> = tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: tool.inputSchema
}));
// Mark the last tool with cache_control so the entire tool list becomes
// a single prompt-cache breakpoint. Tools rarely change across turns,
// so this caches the largest stable input segment after the system
// prompt. The marker is harmless on uncached calls.
const lastIndex = mapped.length - 1;
mapped[lastIndex] = { ...mapped[lastIndex], cache_control: { type: "ephemeral" } };
return { tools: mapped };
}

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}

function toModelUsage(
usage: { input_tokens?: number; output_tokens?: number } | undefined
export function toModelUsage(
usage:
| {
input_tokens?: number;
output_tokens?: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
}
| undefined
): ModelUsage | undefined {
if (!usage) {
return undefined;
}

return {
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens
outputTokens: usage.output_tokens,
cacheCreationInputTokens: usage.cache_creation_input_tokens,
cacheReadInputTokens: usage.cache_read_input_tokens
};
}

Expand Down
15 changes: 13 additions & 2 deletions packages/core/src/fork.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { createHash } from "node:crypto";

import type { SystemTextBlock } from "./model.js";
import type { Message, ToolDefinition } from "./types.js";

export type ForkTrace = {
Expand All @@ -18,15 +19,25 @@ export type ForkTraceInput = {
parentDepth: number;
subagentType: string;
model: string;
systemPrompt?: string;
systemPrompt?: string | readonly SystemTextBlock[];
tools: readonly ToolDefinition[];
prefixMessages: readonly Message[];
directive: string;
previous?: ForkTrace;
};

function systemPromptToHashable(systemPrompt: ForkTraceInput["systemPrompt"]): string {
if (systemPrompt === undefined) {
return "";
}
if (typeof systemPrompt === "string") {
return systemPrompt;
}
return systemPrompt.map((block) => block.text).join("\n\n");
}

export function createForkTrace(input: ForkTraceInput): ForkTrace {
const systemPromptHash = sha256(input.systemPrompt ?? "");
const systemPromptHash = sha256(systemPromptToHashable(input.systemPrompt));
const toolHash = hashToolDefinitions(input.tools);
const prefixHash = hashMessages(input.prefixMessages);
const directiveHash = sha256(input.directive);
Expand Down
19 changes: 18 additions & 1 deletion packages/core/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,28 @@ export type ModelUsage = {
cacheReadInputTokens?: number;
};

/**
* A single text block in a structured system prompt. The optional
* `cache_control` marker turns this block into an Anthropic prompt-cache
* breakpoint: the cumulative content up to and including this block is
* cached and reused across requests that share the same prefix.
*/
export type SystemTextBlock = {
type: "text";
text: string;
cache_control?: { type: "ephemeral" };
};

export type ModelRequest = {
messages: readonly Message[];
model?: string;
maxTokens?: number;
system?: string;
/**
* The system prompt. A plain string preserves the legacy flat form
* (no caching). An array of `SystemTextBlock`s enables structured
* caching when at least one block carries `cache_control`.
*/
system?: string | readonly SystemTextBlock[];
requestId?: string;
timeoutMs?: number;
signal?: AbortSignal;
Expand Down
7 changes: 4 additions & 3 deletions packages/core/src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import {
type ModelClient,
type ModelErrorKind,
type ModelStreamEvent,
type ModelUsage
type ModelUsage,
type SystemTextBlock
} from "./model.js";
import { executeToolBatch, partitionToolCalls } from "./scheduler.js";
import { toModelToolDefinition } from "./tool.js";
Expand All @@ -32,7 +33,7 @@ export type QueryOptions = {
initialMessages: readonly Message[];
tools: readonly ToolDefinition[];
toolContext: ToolContext;
system?: string;
system?: string | readonly SystemTextBlock[];
modelName?: string;
maxTokens?: number;
maxTurns?: number;
Expand Down Expand Up @@ -270,7 +271,7 @@ type CollectModelTurnWithRetryOptions = {
messages: readonly Message[];
modelName: string;
maxTokens: number;
system?: string;
system?: string | readonly SystemTextBlock[];
signal?: AbortSignal;
tools: readonly ModelToolDefinition[];
contextBudgetTokens: number;
Expand Down
4 changes: 2 additions & 2 deletions packages/core/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { z } from "zod";
import type { ModelClient, ModelUsage } from "./model.js";
import type { ModelClient, ModelUsage, SystemTextBlock } from "./model.js";
import type { ForkTrace } from "./fork.js";
import type { ProfileRecorder } from "./profile.js";
import type { TaskStore } from "./task.js";
Expand Down Expand Up @@ -78,7 +78,7 @@ export type ToolContext = {
model?: ModelClient;
modelName?: string;
maxTokens?: number;
system?: string;
system?: string | readonly SystemTextBlock[];
parentMessages?: readonly Message[];
tools?: readonly ToolDefinition[];
taskStore?: TaskStore;
Expand Down
10 changes: 10 additions & 0 deletions packages/core/test/security/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ Tests live in two trees because of the package boundary
| `executeToolBatch` never overlaps two non-concurrency-safe tools | `packages/core/test/security/scheduler-write-serialization.test.ts` |
| Sibling read tools cancel when a Bash sibling errors with cancel-on-error | `packages/core/test/scheduler.test.ts` |

### Prompt caching plumbing

| Invariant | Test |
|---|---|
| `toAnthropicTools` marks the *last* tool with `cache_control: { type: "ephemeral" }` so the full tool list becomes a single cache breakpoint | `packages/core/test/security/prompt-caching.test.ts` |
| `toAnthropicTools` returns `{}` (no tools, no spurious cache marker) on an empty/undefined input | `packages/core/test/security/prompt-caching.test.ts` |
| `toModelUsage` extracts `cache_creation_input_tokens` + `cache_read_input_tokens` when the SDK provides them | `packages/core/test/security/prompt-caching.test.ts` |
| `toModelUsage` leaves cache fields `undefined` on non-cached turns (the SDK omits them) | `packages/core/test/security/prompt-caching.test.ts` |
| The agent's outbound `request.system` is a `SystemTextBlock[]` (not a string) with `cache_control: ephemeral` on the block | `packages/cli/test/cli.test.ts` |

### Cache token accounting

| Invariant | Test |
Expand Down
Loading
Loading