diff --git a/.changeset/code-mode-packages.md b/.changeset/code-mode-packages.md new file mode 100644 index 000000000..0c2a6a7e0 --- /dev/null +++ b/.changeset/code-mode-packages.md @@ -0,0 +1,20 @@ +--- +'@tanstack/ai': minor +'@tanstack/ai-code-mode': minor +'@tanstack/ai-code-mode-skills': minor +'@tanstack/ai-isolate-cloudflare': minor +'@tanstack/ai-isolate-node': minor +'@tanstack/ai-isolate-quickjs': minor +'@tanstack/ai-event-client': minor +'@tanstack/ai-ollama': patch +'@tanstack/ai-openai': patch +'@tanstack/ai-client': patch +'@tanstack/ai-devtools-core': patch +--- + +Add code mode and isolate packages for secure AI code execution + +Also includes fixes for Ollama tool call argument streaming and usage +reporting, OpenAI realtime adapter handling of missing call_id/item_id, +realtime client guards for missing toolCallId, and new DevtoolsChatMiddleware +type export from ai-event-client. diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index 96c227977..a4fad3fd2 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -16,6 +16,10 @@ jobs: autofix: name: autofix runs-on: ubuntu-latest + env: + # Run JS actions (e.g. autofix-ci/action, still `node20` runtime) on Node 24 per + # https://github.blog/changelog/2025-09-19-deprecation-of-node-20-on-github-actions-runners/ + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true steps: - name: Checkout uses: actions/checkout@v6.0.2 @@ -24,6 +28,7 @@ jobs: - name: Fix formatting run: pnpm format - name: Apply fixes - uses: autofix-ci/action@635ffb0c9798bd160680f18fd73371e355b85f27 + # v1.3.3 — API endpoint reliability (see https://github.com/autofix-ci/action/releases/tag/v1.3.3) + uses: autofix-ci/action@7a166d7532b277f34e16238930461bf77f9d7ed8 with: commit-message: 'ci: apply automated fixes' diff --git a/.gitignore b/.gitignore index 09aca0094..2ba04aa82 100644 --- a/.gitignore +++ b/.gitignore @@ -58,8 +58,16 @@ test-traces playwright-report test-results +solo.yml +.structured-output-skills + STATUS_*.md +# models-eval run artifacts (full candidate/gold text per model) +packages/typescript/ai-code-mode/models-eval/log/ + +.skills # Only .claude.settings.json should be committed .claude/settings.local.json -.claude/worktrees/* \ No newline at end of file +.claude/worktrees/* +solo.yml diff --git a/docs/guides/code-mode-isolates.md b/docs/guides/code-mode-isolates.md new file mode 100644 index 000000000..31b125425 --- /dev/null +++ b/docs/guides/code-mode-isolates.md @@ -0,0 +1,203 @@ +--- +title: Code Mode Isolate Drivers +id: code-mode-isolates +order: 21 +--- + +Isolate drivers provide the secure sandbox runtimes that [Code Mode](./code-mode.md) uses to execute generated TypeScript. All drivers implement the same `IsolateDriver` interface, so you can swap them without changing any other code. + +## Choosing a Driver + +| | Node (`isolated-vm`) | QuickJS (WASM) | Cloudflare Workers | +|---|---|---|---| +| **Best for** | Server-side Node.js apps | Browsers, edge, portability | Edge deployments on Cloudflare | +| **Performance** | Fast (V8 JIT) | Slower (interpreted) | Fast (V8 on Cloudflare edge) | +| **Native deps** | Yes (C++ addon) | None | None | +| **Browser support** | No | Yes | N/A | +| **Memory limit** | Configurable | Configurable | N/A | +| **Stack size limit** | N/A | Configurable | N/A | +| **Setup** | `pnpm add` | `pnpm add` | Deploy a Worker first | + +--- + +## Node.js Driver (`@tanstack/ai-isolate-node`) + +Uses V8 isolates via the [`isolated-vm`](https://github.com/laverdet/isolated-vm) native addon. This is the fastest option for server-side Node.js applications because generated code runs in the same V8 engine as the host, under JIT compilation, with no serialization overhead beyond tool call boundaries. + +### Installation + +```bash +pnpm add @tanstack/ai-isolate-node +``` + +`isolated-vm` is a native C++ addon and must be compiled for your platform. It requires Node.js 18 or later. + +### Usage + +```typescript +import { createNodeIsolateDriver } from '@tanstack/ai-isolate-node' + +const driver = createNodeIsolateDriver({ + memoryLimit: 128, // MB + timeout: 30_000, // ms +}) +``` + +### Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `memoryLimit` | `number` | `128` | Maximum heap size for the V8 isolate, in megabytes. Execution is terminated if this limit is exceeded. | +| `timeout` | `number` | `30000` | Maximum wall-clock time per execution, in milliseconds. | + +### How it works + +Each `execute_typescript` call creates a fresh V8 isolate. Your tools are bridged into the isolate as async reference functions — when generated code calls `external_myTool(...)`, the call crosses the isolate boundary back into the host Node.js process, executes your tool implementation, and returns the result. Console output (`log`, `error`, `warn`, `info`) is captured and returned in the execution result. The isolate is destroyed after each call. + +--- + +## QuickJS Driver (`@tanstack/ai-isolate-quickjs`) + +Uses [QuickJS](https://bellard.org/quickjs/) compiled to WebAssembly via Emscripten. Because the sandbox is a WASM module, it has no native dependencies and runs anywhere JavaScript runs: Node.js, browsers, Deno, Bun, and Cloudflare Workers (without deploying a separate Worker). + +### Installation + +```bash +pnpm add @tanstack/ai-isolate-quickjs +``` + +### Usage + +```typescript +import { createQuickJSIsolateDriver } from '@tanstack/ai-isolate-quickjs' + +const driver = createQuickJSIsolateDriver({ + memoryLimit: 128, // MB + timeout: 30_000, // ms + maxStackSize: 524288, // bytes (512 KiB) +}) +``` + +### Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `memoryLimit` | `number` | `128` | Maximum heap memory for the QuickJS VM, in megabytes. | +| `timeout` | `number` | `30000` | Maximum wall-clock time per execution, in milliseconds. | +| `maxStackSize` | `number` | `524288` | Maximum call stack size in bytes (default: 512 KiB). Increase for deeply recursive code; decrease to catch runaway recursion sooner. | + +### How it works + +QuickJS WASM uses an asyncified execution model — the WASM module can pause while awaiting host async functions (your tools). Executions are serialized through a global queue to prevent concurrent WASM calls, which the asyncify model does not support. Fatal errors (memory exhaustion, stack overflow) are detected, the VM is disposed, and a structured error is returned. Console output is captured and returned with the result. + +> **Performance note:** QuickJS interprets JavaScript rather than JIT-compiling it, so compute-heavy scripts run slower than with the Node driver. For typical LLM-generated scripts that are mostly waiting on `external_*` tool calls, this difference is not significant. + +--- + +## Cloudflare Workers Driver (`@tanstack/ai-isolate-cloudflare`) + +Runs generated code inside a [Cloudflare Worker](https://workers.cloudflare.com/) at the edge. Your application server sends code and tool schemas to the Worker via HTTP; the Worker executes the code and calls back when it needs a tool result. This keeps your tool implementations on your server while sandboxed execution happens on Cloudflare's global network. + +### Installation + +```bash +pnpm add @tanstack/ai-isolate-cloudflare +``` + +### Usage + +```typescript +import { createCloudflareIsolateDriver } from '@tanstack/ai-isolate-cloudflare' + +const driver = createCloudflareIsolateDriver({ + workerUrl: 'https://my-code-mode-worker.my-account.workers.dev', + authorization: process.env.CODE_MODE_WORKER_SECRET, + timeout: 30_000, + maxToolRounds: 10, +}) +``` + +### Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `workerUrl` | `string` | — | **Required.** Full URL of the deployed Cloudflare Worker. | +| `authorization` | `string` | — | Optional value sent as the `Authorization` header on every request. Use this to prevent unauthorized access to your Worker. | +| `timeout` | `number` | `30000` | Maximum wall-clock time for the entire execution (including all tool round-trips), in milliseconds. | +| `maxToolRounds` | `number` | `10` | Maximum number of tool-call/result cycles per execution. Prevents infinite loops when generated code calls tools in a loop. | + +### Deploying the Worker + +The package exports a ready-made Worker handler at `@tanstack/ai-isolate-cloudflare/worker`. Create a `wrangler.toml` and a worker entry file: + +```toml +# wrangler.toml +name = "code-mode-worker" +main = "src/worker.ts" +compatibility_date = "2024-01-01" + +[unsafe] +bindings = [{ name = "eval", type = "eval" }] +``` + +```typescript +// src/worker.ts +export { default } from '@tanstack/ai-isolate-cloudflare/worker' +``` + +Deploy: + +```bash +wrangler deploy +``` + +### How it works + +The driver implements a request/response loop for tool execution: + +``` +Driver (your server) Worker (Cloudflare edge) +───────────────────── ───────────────────────── +Send: code + tool schemas ──────▶ Execute code + ◀────── Return: needs tool X with args Y +Execute tool X locally +Send: tool result ──────▶ Resume execution + ◀────── Return: final result / needs tool Z +...repeat until done... +``` + +Each round-trip adds network latency, so the `maxToolRounds` limit both prevents runaway scripts and caps the maximum number of cross-continent hops. Console output from all rounds is aggregated and returned in the final result. + +> **Security:** The Worker requires `UNSAFE_EVAL` (local dev) or the `eval` unsafe binding (production) to execute arbitrary code. Restrict access using the `authorization` option or Cloudflare Access policies. + +--- + +## The `IsolateDriver` Interface + +All three drivers satisfy this interface, exported from `@tanstack/ai-code-mode`: + +```typescript +interface IsolateDriver { + createContext(config: IsolateConfig): Promise +} + +interface IsolateConfig { + bindings: Record + timeout?: number + memoryLimit?: number +} + +interface IsolateContext { + execute(code: string): Promise + dispose(): Promise +} + +interface ExecutionResult { + success: boolean + value?: T + logs: Array + error?: NormalizedError +} +``` + +You can implement this interface to build a custom driver — for example, a Docker-based sandbox or a Deno subprocess. diff --git a/docs/guides/code-mode-with-skills.md b/docs/guides/code-mode-with-skills.md new file mode 100644 index 000000000..3c869c88e --- /dev/null +++ b/docs/guides/code-mode-with-skills.md @@ -0,0 +1,326 @@ +--- +title: Code Mode with Skills +id: code-mode-with-skills +order: 20 +--- + +Skills extend [Code Mode](./code-mode.md) with a persistent library of reusable TypeScript snippets. When the LLM writes a useful piece of code — say, a function that fetches and ranks NPM packages — it can save that code as a _skill_. On future requests, relevant skills are loaded from storage and made available as first-class tools the LLM can call without re-writing the logic. + +## Overview + +The skills system has two integration paths: + +| Approach | Entry point | Skill selection | Best for | +|----------|-------------|----------------|----------| +| **High-level** | `codeModeWithSkills()` | Automatic (LLM-based) | New projects, turnkey setup | +| **Manual** | Individual functions (`skillsToTools`, `createSkillManagementTools`, etc.) | You decide which skills to load | Full control, existing setups | + +Both paths share the same storage, trust, and execution primitives — they differ only in how skills are selected and assembled. + +## How It Works + +A request with skills enabled goes through these stages: + +``` +┌─────────────────────────────────────────────────────┐ +│ 1. Load skill index (metadata only, no code) │ +├─────────────────────────────────────────────────────┤ +│ 2. Select relevant skills (LLM call — fast model) │ +├─────────────────────────────────────────────────────┤ +│ 3. Build tool registry │ +│ ├── execute_typescript (Code Mode sandbox) │ +│ ├── search_skills / get_skill / register_skill │ +│ └── skill tools (one per selected skill) │ +├─────────────────────────────────────────────────────┤ +│ 4. Generate system prompt │ +│ ├── Code Mode type stubs │ +│ └── Skill library documentation │ +├─────────────────────────────────────────────────────┤ +│ 5. Main chat() call (strong model) │ +│ ├── Can call skill tools directly │ +│ ├── Can write code via execute_typescript │ +│ └── Can register new skills for future use │ +└─────────────────────────────────────────────────────┘ +``` + +### LLM calls + +There are **two** LLM interactions per request when using the high-level API: + +1. **Skill selection** (`selectRelevantSkills`) — A single chat call using the adapter you provide. It sends the last 5 conversation messages plus a catalog of skill names/descriptions, and asks the model to return a JSON array of relevant skill names. This should be a cheap/fast model (e.g., `gpt-4o-mini`, `claude-haiku-4-5`). + +2. **Main chat** — The primary `chat()` call with your full model. This is where the LLM reasons, calls tools, writes code, and registers skills. + +The selection call is lightweight — it only sees skill metadata (names, descriptions, usage hints), not full code. If there are no skills in storage or no messages, it short-circuits and skips the LLM call entirely. + +## High-Level API: `codeModeWithSkills()` + +```typescript +import { chat, maxIterations, toServerSentEventsStream } from '@tanstack/ai' +import { createNodeIsolateDriver } from '@tanstack/ai-isolate-node' +import { codeModeWithSkills } from '@tanstack/ai-code-mode-skills' +import { createFileSkillStorage } from '@tanstack/ai-code-mode-skills/storage' +import { openaiText } from '@tanstack/ai-openai' + +const storage = createFileSkillStorage({ directory: './.skills' }) +const driver = createNodeIsolateDriver() + +const { toolsRegistry, systemPrompt, selectedSkills } = await codeModeWithSkills({ + config: { + driver, + tools: [myTool1, myTool2], + timeout: 60_000, + memoryLimit: 128, + }, + adapter: openaiText('gpt-4o-mini'), // cheap model for skill selection + skills: { + storage, + maxSkillsInContext: 5, + }, + messages, // current conversation +}) + +const stream = chat({ + adapter: openaiText('gpt-4o'), // strong model for reasoning + toolRegistry: toolsRegistry, + messages, + systemPrompts: ['You are a helpful assistant.', systemPrompt], + agentLoopStrategy: maxIterations(15), +}) +``` + +`codeModeWithSkills` returns: + +| Property | Type | Description | +|----------|------|-------------| +| `toolsRegistry` | `ToolRegistry` | Mutable registry containing all tools. Pass to `chat()` via `toolRegistry`. | +| `systemPrompt` | `string` | Combined Code Mode + skill library documentation. | +| `selectedSkills` | `Array` | Skills the selection model chose for this conversation. | + +### What goes into the registry + +The registry is populated with: + +- **`execute_typescript`** — The Code Mode sandbox tool. Inside the sandbox, skills are also available as `skill_*` functions (loaded dynamically at execution time). +- **`search_skills`** — Search the skill library by query. Returns matching skill metadata. +- **`get_skill`** — Retrieve full details (including code) for a specific skill. +- **`register_skill`** — Save working code as a new skill. Newly registered skills are immediately added to the registry as callable tools. +- **One tool per selected skill** — Each selected skill becomes a direct tool (prefixed with `[SKILL]` in its description) that the LLM can call without going through `execute_typescript`. + +## Manual API + +If you want full control — for example, loading all skills instead of using LLM-based selection — use the lower-level functions directly. This is the approach used in the `ts-code-mode-web` example. + +```typescript +import { chat, maxIterations } from '@tanstack/ai' +import { createCodeMode } from '@tanstack/ai-code-mode' +import { createNodeIsolateDriver } from '@tanstack/ai-isolate-node' +import { + createAlwaysTrustedStrategy, + createSkillManagementTools, + createSkillsSystemPrompt, + skillsToTools, +} from '@tanstack/ai-code-mode-skills' +import { createFileSkillStorage } from '@tanstack/ai-code-mode-skills/storage' + +const trustStrategy = createAlwaysTrustedStrategy() +const storage = createFileSkillStorage({ + directory: './.skills', + trustStrategy, +}) +const driver = createNodeIsolateDriver() + +// 1. Create Code Mode tool + prompt +const { tool: codeModeTool, systemPrompt: codeModePrompt } = + createCodeMode({ + driver, + tools: [myTool1, myTool2], + timeout: 60_000, + memoryLimit: 128, + }) + +// 2. Load all skills and convert to tools +const allSkills = await storage.loadAll() +const skillIndex = await storage.loadIndex() + +const skillTools = allSkills.length > 0 + ? skillsToTools({ + skills: allSkills, + driver, + tools: [myTool1, myTool2], + storage, + timeout: 60_000, + memoryLimit: 128, + }) + : [] + +// 3. Create management tools +const managementTools = createSkillManagementTools({ + storage, + trustStrategy, +}) + +// 4. Generate skill library prompt +const skillsPrompt = createSkillsSystemPrompt({ + selectedSkills: allSkills, + totalSkillCount: skillIndex.length, + skillsAsTools: true, +}) + +// 5. Assemble and call chat() +const stream = chat({ + adapter: openaiText('gpt-4o'), + tools: [codeModeTool, ...managementTools, ...skillTools], + messages, + systemPrompts: [BASE_PROMPT, codeModePrompt, skillsPrompt], + agentLoopStrategy: maxIterations(15), +}) +``` + +This approach skips the selection LLM call entirely — you load whichever skills you want and pass them in directly. + +## Skill Storage + +Skills are persisted through the `SkillStorage` interface. Two implementations are provided: + +### File storage (production) + +```typescript +import { createFileSkillStorage } from '@tanstack/ai-code-mode-skills/storage' + +const storage = createFileSkillStorage({ + directory: './.skills', + trustStrategy, // optional, defaults to createDefaultTrustStrategy() +}) +``` + +Creates a directory structure: + +``` +.skills/ + _index.json # Lightweight catalog for fast loading + fetch_github_stats/ + meta.json # Description, schemas, hints, stats + code.ts # TypeScript source + compare_npm_packages/ + meta.json + code.ts +``` + +### Memory storage (testing) + +```typescript +import { createMemorySkillStorage } from '@tanstack/ai-code-mode-skills/storage' + +const storage = createMemorySkillStorage() +``` + +Keeps everything in memory. Useful for tests and demos. + +### Storage interface + +Both implementations satisfy this interface: + +| Method | Description | +|--------|-------------| +| `loadIndex()` | Load lightweight metadata for all skills (no code) | +| `loadAll()` | Load all skills with full details including code | +| `get(name)` | Get a single skill by name | +| `save(skill)` | Create or update a skill | +| `delete(name)` | Remove a skill | +| `search(query, options?)` | Search skills by text query | +| `updateStats(name, success)` | Record an execution result for trust tracking | + +## Trust Strategies + +Skills start untrusted and earn trust through successful executions. The trust level is metadata only — it does not currently gate execution. Four built-in strategies are available: + +```typescript +import { + createDefaultTrustStrategy, + createAlwaysTrustedStrategy, + createRelaxedTrustStrategy, + createCustomTrustStrategy, +} from '@tanstack/ai-code-mode-skills' +``` + +| Strategy | Initial level | Provisional | Trusted | +|----------|--------------|-------------|---------| +| **Default** | `untrusted` | 10+ runs, ≥90% success | 100+ runs, ≥95% success | +| **Relaxed** | `untrusted` | 3+ runs, ≥80% success | 10+ runs, ≥90% success | +| **Always trusted** | `trusted` | — | — | +| **Custom** | Configurable | Configurable | Configurable | + +```typescript +const strategy = createCustomTrustStrategy({ + initialLevel: 'untrusted', + provisionalThreshold: { executions: 5, successRate: 0.85 }, + trustedThreshold: { executions: 50, successRate: 0.95 }, +}) +``` + +## Skill Lifecycle + +### Registration + +When the LLM produces useful code via `execute_typescript`, the system prompt instructs it to call `register_skill` with: + +- `name` — snake_case identifier (becomes the tool name) +- `description` — what the skill does +- `code` — TypeScript source that receives an `input` variable +- `inputSchema` / `outputSchema` — JSON Schema strings +- `usageHints` — when to use this skill +- `dependsOn` — other skills this one calls + +The skill is saved to storage and (if a `ToolRegistry` was provided) immediately added as a callable tool in the current session. + +### Execution + +When a skill tool is called, the system: + +1. Wraps the skill code with `const input = ;` +2. Strips TypeScript syntax to plain JavaScript +3. Creates a fresh sandbox context with `external_*` bindings +4. Executes the code and returns the result +5. Updates execution stats (success/failure count) asynchronously + +### Selection (high-level API only) + +On each new request, `selectRelevantSkills`: + +1. Takes the last 5 conversation messages as context +2. Builds a catalog from the skill index (name + description + first usage hint) +3. Asks the adapter to return a JSON array of relevant skill names (max `maxSkillsInContext`) +4. Loads full skill data for the selected names + +If parsing fails or the model returns invalid JSON, it falls back to an empty selection — the request proceeds without pre-loaded skills, but the LLM can still search and use skills via the management tools. + +## Skills as Tools vs. Sandbox Bindings + +The `skillsAsTools` option (default: `true`) controls how skills are exposed: + +| Mode | How the LLM calls a skill | Pros | Cons | +|------|--------------------------|------|------| +| **As tools** (`true`) | Direct tool call: `skill_name({ ... })` | Simpler for the LLM, shows in tool-call UI, proper input validation | One tool per skill in the tool list | +| **As bindings** (`false`) | Inside `execute_typescript`: `await skill_fetch_data({ ... })` | Skills composable in code, fewer top-level tools | LLM must write code to use them | + +When `skillsAsTools` is enabled, the system prompt documents each skill with its schema, usage hints, and example calls. When disabled, skills appear as typed `skill_*` functions in the sandbox type stubs. + +## Custom Events + +Skill execution emits events through the TanStack AI event system: + +| Event | When | Payload | +|-------|------|---------| +| `code_mode:skill_call` | Skill tool invoked | `{ skill, input, timestamp }` | +| `code_mode:skill_result` | Skill completed successfully | `{ skill, result, duration, timestamp }` | +| `code_mode:skill_error` | Skill execution failed | `{ skill, error, duration, timestamp }` | +| `skill:registered` | New skill saved via `register_skill` | `{ id, name, description, timestamp }` | + +## Tips + +- **Use a cheap model for selection.** The selection call only needs to match skill names to conversation context — `gpt-4o-mini` or `claude-haiku-4-5` work well. +- **Start without skills.** Get Code Mode working first, then add `@tanstack/ai-code-mode-skills` once you have tools that produce reusable patterns. +- **Monitor the skill count.** As the library grows, consider increasing `maxSkillsInContext` or switching to the manual API where you control which skills load. +- **Newly registered skills are available on the next message,** not in the current turn's tool list (unless using `ToolRegistry` with the high-level API, which adds them immediately). +- **Skills can call other skills.** Inside the sandbox, both `external_*` and `skill_*` functions are available. Set `dependsOn` when registering to document these relationships. diff --git a/docs/guides/code-mode.md b/docs/guides/code-mode.md new file mode 100644 index 000000000..9336b414f --- /dev/null +++ b/docs/guides/code-mode.md @@ -0,0 +1,315 @@ +--- +title: Code Mode +id: code-mode +order: 19 +--- + +Code Mode lets an LLM write and execute TypeScript programs inside a secure sandbox. Instead of making one tool call at a time, the model writes a short script that orchestrates multiple tools with loops, conditionals, `Promise.all`, and data transformations — then returns a single result. + +## Why Code Mode? + +### Reduced context window usage + +In a traditional agentic loop, every tool call adds a round-trip of messages: the model's tool-call request, the tool result, then the model's next reasoning step. A task that touches five tools can easily consume thousands of tokens in back-and-forth. + +With Code Mode the model emits one `execute_typescript` call containing a complete program. The five tool invocations happen inside the sandbox, and only the final result comes back — one request, one response. + +### The LLM decides how to interpret tool output + +When tools are called individually, the model must decide what to do with each result in a new turn. With Code Mode, the model writes the logic up front: filter, aggregate, compare, branch. It can `Promise.all` ten API calls, pick the best result, and return a summary — all in a single execution. + +### Type-safe tool execution + +Tools you pass to Code Mode are converted to typed function stubs that appear in the system prompt. The model sees exact input/output types, so it generates correct calls without guessing parameter names or shapes. TypeScript annotations in the generated code are stripped automatically before execution. + +### Secure sandboxing + +Generated code runs in an isolated environment (V8 isolate, QuickJS WASM, or Cloudflare Worker) with no access to the host file system, network, or process. The sandbox has configurable timeouts and memory limits. + +## Getting Started + +### 1. Install packages + +```bash +pnpm add @tanstack/ai @tanstack/ai-code-mode zod +``` + +Pick an isolate driver: + +```bash +# Node.js — fastest, uses V8 isolates (requires native compilation) +pnpm add @tanstack/ai-isolate-node + +# QuickJS WASM — no native deps, works in browsers and edge runtimes +pnpm add @tanstack/ai-isolate-quickjs + +# Cloudflare Workers — run on the edge +pnpm add @tanstack/ai-isolate-cloudflare +``` + +### 2. Define tools + +Define your tools with `toolDefinition()` and provide a server-side implementation with `.server()`. These become the `external_*` functions available inside the sandbox. + +```typescript +import { toolDefinition } from "@tanstack/ai"; +import { z } from "zod"; + +const fetchWeather = toolDefinition({ + name: "fetchWeather", + description: "Get current weather for a city", + inputSchema: z.object({ location: z.string() }), + outputSchema: z.object({ + temperature: z.number(), + condition: z.string(), + }), +}).server(async ({ location }) => { + const res = await fetch(`https://api.weather.example/v1?city=${location}`); + return res.json(); +}); +``` + +### 3. Create the Code Mode tool and system prompt + +```typescript +import { createCodeMode } from "@tanstack/ai-code-mode"; +import { createNodeIsolateDriver } from "@tanstack/ai-isolate-node"; + +const { tool, systemPrompt } = createCodeMode({ + driver: createNodeIsolateDriver(), + tools: [fetchWeather], + timeout: 30_000, +}); +``` + +### 4. Use with `chat()` + +```typescript +import { chat } from "@tanstack/ai"; +import { openaiText } from "@tanstack/ai-openai/adapters"; + +const result = await chat({ + adapter: openaiText(), + model: "gpt-4o", + systemPrompts: [ + "You are a helpful weather assistant.", + systemPrompt, + ], + tools: [tool], + messages: [ + { + role: "user", + content: "Compare the weather in Tokyo, Paris, and New York City", + }, + ], +}); +``` + +The model will generate something like: + +```typescript +const cities = ["Tokyo", "Paris", "New York City"]; +const results = await Promise.all( + cities.map((city) => external_fetchWeather({ location: city })) +); + +const warmest = results.reduce((prev, curr) => + curr.temperature > prev.temperature ? curr : prev +); + +return { + comparison: results.map((r, i) => ({ + city: cities[i], + temperature: r.temperature, + condition: r.condition, + })), + warmest: cities[results.indexOf(warmest)], +}; +``` + +All three API calls happen in parallel inside the sandbox. The model receives one structured result instead of three separate tool-call round-trips. + +## API Reference + +### `createCodeMode(config)` + +Creates both the `execute_typescript` tool and its matching system prompt from a single config object. This is the recommended entry point. + +```typescript +const { tool, systemPrompt } = createCodeMode({ + driver, // IsolateDriver — required + tools, // Array — required, at least one + timeout, // number — execution timeout in ms (default: 30000) + memoryLimit, // number — memory limit in MB (default: 128, Node + QuickJS drivers) + getSkillBindings, // () => Promise> — optional dynamic bindings +}); +``` + +**Config properties:** + +| Property | Type | Description | +|----------|------|-------------| +| `driver` | `IsolateDriver` | The sandbox runtime to execute code in | +| `tools` | `Array` | Tools exposed as `external_*` functions. Must have `.server()` implementations | +| `timeout` | `number` | Execution timeout in milliseconds (default: 30000) | +| `memoryLimit` | `number` | Memory limit in MB (default: 128). Supported by Node and QuickJS drivers | +| `getSkillBindings` | `() => Promise>` | Optional function returning additional bindings at execution time | + +The tool returns a `CodeModeToolResult`: + +```typescript +interface CodeModeToolResult { + success: boolean; + result?: unknown; // Return value from the executed code + logs?: Array; // Captured console output + error?: { + message: string; + name?: string; + line?: number; + }; +} +``` + +### `createCodeModeTool(config)` / `createCodeModeSystemPrompt(config)` + +Lower-level functions if you need only the tool or only the prompt. `createCodeMode` calls both internally. + +```typescript +import { createCodeModeTool, createCodeModeSystemPrompt } from "@tanstack/ai-code-mode"; + +const tool = createCodeModeTool(config); +const prompt = createCodeModeSystemPrompt(config); +``` + +### `IsolateDriver` + +The interface that sandbox runtimes implement. You do not implement this yourself — pick one of the provided drivers: + +```typescript +interface IsolateDriver { + createContext(config: IsolateConfig): Promise; +} +``` + +**Available drivers:** + +| Package | Factory function | Environment | +|---------|-----------------|-------------| +| `@tanstack/ai-isolate-node` | `createNodeIsolateDriver()` | Node.js | +| `@tanstack/ai-isolate-quickjs` | `createQuickJSIsolateDriver()` | Node.js, browser, edge | +| `@tanstack/ai-isolate-cloudflare` | `createCloudflareIsolateDriver()` | Cloudflare Workers | + +For full configuration options for each driver, see [Isolate Drivers](./code-mode-isolates.md). + +### Advanced + +These utilities are used internally and are exported for custom pipelines: + +- **`stripTypeScript(code)`** — Strips TypeScript syntax using esbuild, converting to plain JavaScript. +- **`toolsToBindings(tools, prefix?)`** — Converts TanStack AI tools into `Record` for sandbox injection. +- **`generateTypeStubs(bindings, options?)`** — Generates TypeScript type declarations from tool bindings for system prompts. + +## Choosing a Driver + +For a full comparison of drivers with all configuration options, see [Isolate Drivers](./code-mode-isolates.md). + +In brief: use the **Node driver** for server-side Node.js (fastest, V8 JIT), **QuickJS** for browsers or portable edge deployments (no native deps), and the **Cloudflare driver** when you deploy to Cloudflare Workers. + +## Custom Events + +Code Mode emits custom events during execution that you can observe through the TanStack AI event system. These are useful for building UIs that show execution progress, debugging, or logging. + +| Event | When | Payload | +|-------|------|---------| +| `code_mode:execution_started` | Code execution begins | `{ timestamp, codeLength }` | +| `code_mode:console` | Each `console.log/error/warn/info` call | `{ level, message, timestamp }` | +| `code_mode:external_call` | Before an `external_*` function runs | `{ function, args, timestamp }` | +| `code_mode:external_result` | After a successful `external_*` call | `{ function, result, duration }` | +| `code_mode:external_error` | When an `external_*` call fails | `{ function, error, duration }` | + +## Model Evaluation + +Code Mode includes a development benchmark package at `packages/typescript/ai-code-mode/models-eval`. + +Recommended workflow: + +1. Capture raw model outputs and telemetry (no judge call): + +```bash +pnpm --filter @tanstack/ai-code-mode-models-eval eval:capture +``` + +2. Judge the latest captured session from logs (no model rerun): + +```bash +pnpm --filter @tanstack/ai-code-mode-models-eval eval:judge +``` + +3. Canonical benchmark output is written to: + +`packages/typescript/ai-code-mode/models-eval/results.json` + +### Evaluation methodology + +Metrics: + +- `accuracy` (1-10): factual correctness vs gold reference +- `comprehensiveness` (1-10): how fully the response answers the user request +- `typescriptQuality` (1-10): quality/readability/type-safety of generated TypeScript +- `codeModeEfficiency` (1-10): how efficiently the model reaches the solution in code mode +- `speedTier` (1-5): relative wall-clock speed within `local` and `cloud` groups +- `tokenEfficiencyTier` (1-5): relative tokens-per-successful-execution within `local` and `cloud` groups +- `stabilityTier` (1-5): consistency over latest 5 logged runs per model +- `stars` (1-3): weighted rollup score + +Stability definition: + +- A run is considered stable when it has: + - no top-level run error + - non-empty final candidate report + - at least one successful `execute_typescript` call + +Star rollup weights: + +- accuracy: 25% +- comprehensiveness: 15% +- typescriptQuality: 15% +- codeModeEfficiency (with compile/runtime failure penalty): 10% +- speedTier: 10% +- tokenEfficiencyTier: 10% +- stabilityTier: 15% + +### Canonical model results + +The canonical source of truth is: + +- `packages/typescript/ai-code-mode/models-eval/results.json` + +Current human-readable snapshot (session `2026-03-26T15:38:44.006Z`): + +- **Top overall (★★★):** GPT-OSS 20B, Claude Haiku 4.5, GPT-4o Mini, Gemini 2.5 Flash, Grok 4.1 Fast, Llama 3.3 70B (Groq) +- **Strong but below top tier (★★☆):** Nemotron Cascade 2, Qwen3 32B (Groq) +- **Notable caveat:** Llama 3.3 70B shows high quality when it works, but lower stability (`stabilityTier: 4`) versus most models at `5` + +| Model | Stars | Accuracy | Code-Mode | Speed | Token Eff. | Stability | +|---|---:|---:|---:|---:|---:|---:| +| GPT-OSS 20B | ★★★ | 10 | 5 | 5 | 5 | 5 | +| Nemotron Cascade 2 | ★★☆ | 3 | 5 | 1 | 5 | 5 | +| Claude Haiku 4.5 | ★★★ | 10 | 7 | 3 | 2 | 5 | +| GPT-4o Mini | ★★★ | 10 | 9 | 3 | 1 | 5 | +| Gemini 2.5 Flash | ★★★ | 10 | 10 | 4 | 2 | 5 | +| Grok 4.1 Fast | ★★★ | 10 | 10 | 4 | 5 | 5 | +| Llama 3.3 70B (Groq) | ★★★ | 10 | 9 | 5 | 3 | 4 | +| Qwen3 32B (Groq) | ★★☆ | 10 | 4 | 1 | 2 | 5 | + +For full details (including comprehensiveness, TypeScript quality, token counts, and judge summaries), use: + +- `packages/typescript/ai-code-mode/README.md` + +## Tips + +- **Start simple.** Give the model 2-3 tools and a clear task. Code Mode works best when the model has a focused set of capabilities. +- **Prefer `Promise.all` tasks.** Code Mode shines when the model can parallelize work that would otherwise be sequential tool calls. +- **Use `console.log` for debugging.** Logs are captured and returned in the result, making it easy to see what happened inside the sandbox. +- **Keep tools focused.** Each tool should do one thing well. The model will compose them in code. +- **Check the system prompt.** Call `createCodeModeSystemPrompt(config)` and inspect the output to see exactly what the model will see, including generated type stubs. diff --git a/examples/ts-code-mode-web/.env.example b/examples/ts-code-mode-web/.env.example new file mode 100644 index 000000000..33c2ca8ea --- /dev/null +++ b/examples/ts-code-mode-web/.env.example @@ -0,0 +1,9 @@ +# Required: GitHub Personal Access Token for API calls +# Create at: https://github.com/settings/tokens +# Scopes needed: public_repo, read:user +GITHUB_TOKEN=ghp_your_token_here + +# At least one AI provider is required +ANTHROPIC_API_KEY=sk-ant-your_key_here +OPENAI_API_KEY=sk-your_key_here +GEMINI_API_KEY=your_key_here diff --git a/examples/ts-code-mode-web/.gitignore b/examples/ts-code-mode-web/.gitignore new file mode 100644 index 000000000..4c344dc63 --- /dev/null +++ b/examples/ts-code-mode-web/.gitignore @@ -0,0 +1,7 @@ +node_modules +.env.local +.output +dist +*.log +.db-skills +.structured-output-skills diff --git a/examples/ts-code-mode-web/README.md b/examples/ts-code-mode-web/README.md new file mode 100644 index 000000000..3873392a4 --- /dev/null +++ b/examples/ts-code-mode-web/README.md @@ -0,0 +1,81 @@ +# ts-code-mode-web + +TanStack AI Code Mode web example using React and TanStack Start. + +## Setup + +### Prerequisites + +- Node.js >=18 +- pnpm@10.17.0 +- Xcode Command Line Tools (macOS) + +### Installation + +```bash +pnpm install +``` + +### Rebuilding `isolated-vm` (native module) + +`@tanstack/ai-isolate-node` depends on [`isolated-vm`](https://github.com/laverdet/isolated-vm), a native Node.js addon that must be compiled from source when a prebuilt binary is not available for your Node.js version. + +**When is this needed?** + +The `isolated-vm` package ships prebuilt binaries for select Node.js ABI versions. If you are running a newer Node.js version whose ABI is not yet included (e.g. Node.js 25.x / ABI 141), you will see an error like: + +``` +Error: No native build was found for platform=darwin arch=arm64 runtime=node abi=141 ... + loaded from: .../isolated-vm +``` + +**How to fix it** + +1. Ensure Xcode Command Line Tools are installed (macOS): + + ```bash + xcode-select --install + ``` + +2. Run `node-gyp` via `npx` inside the `isolated-vm` package directory (from the monorepo root): + + ```bash + ISOLATED_VM_DIR="node_modules/.pnpm/isolated-vm@6.1.0/node_modules/isolated-vm" + cd "$ISOLATED_VM_DIR" && npx node-gyp rebuild + ``` + + Or as a one-liner from the monorepo root (`/Users/jherr/tanstack/ai/code-mode`): + + ```bash + cd node_modules/.pnpm/isolated-vm@6.1.0/node_modules/isolated-vm && npx node-gyp rebuild + ``` + +3. A successful build ends with `gyp info ok` and produces `build/Release/isolated_vm.node`. + +**Notes** + +- Linker warnings about macOS version mismatches (`building for macOS-11.0, but linking with dylib ... built for newer version`) are harmless and can be ignored. +- The compiled `.node` file lives in the pnpm content-addressable store and will need to be rebuilt after `pnpm install --force` or after upgrading Node.js to a different ABI version. +- Python 3 is required by `node-gyp`. It is detected automatically from `$PATH`. + +### Node.js 25 + `isolated-vm` runtime crash (SIGSEGV) + +Even after successfully compiling from source, `isolated-vm@6.1.0` crashes the server process (exit code 139, SIGSEGV) when run under **Node.js 25.x**. This is a V8 API incompatibility — Node 25 ships V8 14.1 whose internal C++ API has changed in ways that break `isolated-vm`'s isolate creation code at runtime. + +**Symptom:** The server dies silently with no JavaScript error or log output when the first code mode request is made. + +**Fix applied:** All server routes in this example have been switched from the `node` isolate driver to the `quickjs` driver (`@tanstack/ai-isolate-quickjs`), which is a pure-JS sandbox with no native addon dependency and works on any Node.js version. + +**When `isolated-vm` works again:** Once `isolated-vm` publishes a release compatible with Node 25 / V8 14.1, switch the driver back to `'node'` in `src/lib/create-isolate-driver.ts` (default) and in each API route file. The `node` driver provides stronger isolation (true V8 process boundary) and is preferred in production. + +## Development + +```bash +pnpm dev # starts the Vite dev server on port 3001 +``` + +## Build + +```bash +pnpm build +``` diff --git a/examples/ts-code-mode-web/package.json b/examples/ts-code-mode-web/package.json new file mode 100644 index 000000000..a302b0fbc --- /dev/null +++ b/examples/ts-code-mode-web/package.json @@ -0,0 +1,57 @@ +{ + "name": "ts-code-mode-web", + "private": true, + "type": "module", + "scripts": { + "dev": "vite dev --port 3001", + "build": "vite build", + "serve": "vite preview", + "test": "exit 0" + }, + "dependencies": { + "@radix-ui/react-collapsible": "^1.1.2", + "@tailwindcss/vite": "^4.1.18", + "@tanstack/ai": "workspace:*", + "@tanstack/ai-anthropic": "workspace:*", + "@tanstack/ai-client": "workspace:*", + "@tanstack/ai-code-mode": "workspace:*", + "@tanstack/ai-code-mode-skills": "workspace:*", + "@tanstack/ai-gemini": "workspace:*", + "@tanstack/ai-isolate-cloudflare": "workspace:*", + "@tanstack/ai-isolate-node": "workspace:*", + "@tanstack/ai-isolate-quickjs": "workspace:*", + "@tanstack/ai-ollama": "workspace:*", + "@tanstack/ai-openai": "workspace:*", + "@tanstack/ai-react": "workspace:*", + "@tanstack/nitro-v2-vite-plugin": "^1.154.7", + "@tanstack/react-router": "^1.158.4", + "@tanstack/react-start": "^1.159.0", + "@tanstack/router-plugin": "^1.158.4", + "@tanstack/store": "^0.8.0", + "framer-motion": "^11.15.0", + "highlight.js": "^11.11.1", + "lucide-react": "^0.561.0", + "marked": "^15.0.6", + "puppeteer": "^24.34.0", + "react": "^19.2.3", + "react-dom": "^19.2.3", + "react-markdown": "^10.1.0", + "recharts": "^2.15.0", + "rehype-highlight": "^7.0.2", + "rehype-raw": "^7.0.0", + "rehype-sanitize": "^6.0.0", + "remark-gfm": "^4.0.1", + "tailwindcss": "^4.1.18", + "vite-tsconfig-paths": "^5.1.4", + "zod": "^4.2.0" + }, + "devDependencies": { + "@tanstack/devtools-vite": "^0.5.3", + "@types/node": "^24.10.1", + "@types/react": "^19.2.7", + "@types/react-dom": "^19.2.3", + "@vitejs/plugin-react": "^5.1.2", + "typescript": "5.9.3", + "vite": "^7.2.7" + } +} diff --git a/examples/ts-code-mode-web/public/coco-code-mode.png b/examples/ts-code-mode-web/public/coco-code-mode.png new file mode 100644 index 000000000..cee62d087 Binary files /dev/null and b/examples/ts-code-mode-web/public/coco-code-mode.png differ diff --git a/examples/ts-code-mode-web/public/coco-regular-tools.png b/examples/ts-code-mode-web/public/coco-regular-tools.png new file mode 100644 index 000000000..046be94b6 Binary files /dev/null and b/examples/ts-code-mode-web/public/coco-regular-tools.png differ diff --git a/examples/ts-code-mode-web/public/gold-results/db-gold-messages-1.json b/examples/ts-code-mode-web/public/gold-results/db-gold-messages-1.json new file mode 100644 index 000000000..dfbf582e9 --- /dev/null +++ b/examples/ts-code-mode-web/public/gold-results/db-gold-messages-1.json @@ -0,0 +1,124 @@ +{ + "prompt": "Which customers have purchased from all three product categories? Show their names, cities, and total spend per category", + "messages": [ + { + "id": "msg-1773358136917-npsml", + "role": "user", + "parts": [ + { + "type": "text", + "content": "Which customers have purchased from all three product categories? Show their names, cities, and total spend per category" + } + ], + "createdAt": "2026-03-12T23:28:56.917Z" + }, + { + "id": "msg-1773358137348-ia4r24", + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "I'll help you find customers who have purchased from all three product categories. Let me start by exploring the data structure." + }, + { + "type": "tool-call", + "id": "toolu_019kDdYVMQ7iwGKdmNnB9UWr", + "name": "getSchemaInfo", + "arguments": "", + "state": "input-complete", + "output": { + "schemas": { + "customers": { + "id": "number", + "name": "string", + "email": "string", + "city": "string", + "joined": "string (date)" + }, + "products": { + "id": "number", + "name": "string", + "category": "string", + "price": "number", + "stock": "number" + }, + "purchases": { + "id": "number", + "customer_id": "number", + "product_id": "number", + "quantity": "number", + "total": "number", + "purchased_at": "string (date)" + } + }, + "rowCounts": { + "customers": 10, + "products": 10, + "purchases": 25 + } + } + }, + { + "type": "tool-result", + "toolCallId": "toolu_019kDdYVMQ7iwGKdmNnB9UWr", + "content": "{\"schemas\":{\"customers\":{\"id\":\"number\",\"name\":\"string\",\"email\":\"string\",\"city\":\"string\",\"joined\":\"string (date)\"},\"products\":{\"id\":\"number\",\"name\":\"string\",\"category\":\"string\",\"price\":\"number\",\"stock\":\"number\"},\"purchases\":{\"id\":\"number\",\"customer_id\":\"number\",\"product_id\":\"number\",\"quantity\":\"number\",\"total\":\"number\",\"purchased_at\":\"string (date)\"}},\"rowCounts\":{\"customers\":10,\"products\":10,\"purchases\":25}}", + "state": "complete" + }, + { + "type": "text", + "content": "Now let me query the data to find customers who purchased from all three categories:" + }, + { + "type": "tool-call", + "id": "toolu_016GFCoEhZnuB4phCxMHoYVK", + "name": "execute_typescript", + "arguments": "{\"typescriptCode\": \"\\n// First, get all products to understand categories\\nconst productsResult = await external_queryTable({ table: 'products' });\\nconst products = productsResult.rows as Array<{id: number; name: string; category: string; price: number}>;\\n\\n// Get unique categories\\nconst categories = Array.from(new Set(products.map(p => p.category)));\\nconsole.log('Categories found:', categories);\\n\\n// Get all purchases with customer and product info\\nconst purchasesResult = await external_queryTable({ table: 'purchases' });\\nconst purchases = purchasesResult.rows as Array<{id: number; customer_id: number; product_id: number; quantity: number; total: number}>;\\n\\n// Create a map of product_id to category\\nconst productMap = new Map(products.map(p => [p.id, p.category]));\\n\\n// Get all customers\\nconst customersResult = await external_queryTable({ table: 'customers' });\\nconst customers = customersResult.rows as Array<{id: number; name: string; email: string; city: string; joined: string}>;\\n\\n// Track purchases by customer and category\\nconst customerPurchases = new Map;\\n spendByCategory: Map;\\n}>();\\n\\nfor (const customer of customers) {\\n customerPurchases.set(customer.id, {\\n name: customer.name,\\n city: customer.city,\\n categories: new Set(),\\n spendByCategory: new Map(categories.map(c => [c, 0]))\\n });\\n}\\n\\n// Process purchases\\nfor (const purchase of purchases) {\\n const category = productMap.get(purchase.product_id);\\n if (category && customerPurchases.has(purchase.customer_id)) {\\n const data = customerPurchases.get(purchase.customer_id)!;\\n data.categories.add(category);\\n data.spendByCategory.set(category, (data.spendByCategory.get(category) || 0) + purchase.total);\\n }\\n}\\n\\n// Filter customers who purchased from ALL categories\\nconst qualifyingCustomers = Array.from(customerPurchases.entries())\\n .filter(([_, data]) => data.categories.size === categories.length)\\n .map(([id, data]) => ({\\n id,\\n name: data.name,\\n city: data.city,\\n spendByCategory: Object.fromEntries(data.spendByCategory)\\n }));\\n\\nconsole.log(`\\\\nFound ${qualifyingCustomers.length} customer(s) who purchased from all ${categories.length} categories`);\\n\\nreturn {\\n categories,\\n qualifyingCustomers\\n};\\n\"}", + "state": "input-complete", + "output": { + "success": true, + "result": { + "categories": ["Electronics", "Furniture", "Office"], + "qualifyingCustomers": [ + { + "id": 2, + "name": "Bob Smith", + "city": "San Francisco", + "spendByCategory": { + "Electronics": 89.97, + "Furniture": 299.99, + "Office": 149.9 + } + }, + { + "id": 3, + "name": "Carol Williams", + "city": "Chicago", + "spendByCategory": { + "Electronics": 139.98, + "Furniture": 399.99, + "Office": 99.96 + } + } + ] + }, + "logs": [ + "Categories found: [\"Electronics\",\"Furniture\",\"Office\"]", + "\nFound 2 customer(s) who purchased from all 3 categories" + ] + } + }, + { + "type": "tool-result", + "toolCallId": "toolu_016GFCoEhZnuB4phCxMHoYVK", + "content": "{\"success\":true,\"result\":{\"categories\":[\"Electronics\",\"Furniture\",\"Office\"],\"qualifyingCustomers\":[{\"id\":2,\"name\":\"Bob Smith\",\"city\":\"San Francisco\",\"spendByCategory\":{\"Electronics\":89.97,\"Furniture\":299.99,\"Office\":149.9}},{\"id\":3,\"name\":\"Carol Williams\",\"city\":\"Chicago\",\"spendByCategory\":{\"Electronics\":139.98,\"Furniture\":399.99,\"Office\":99.96}}]},\"logs\":[\"Categories found: [\\\"Electronics\\\",\\\"Furniture\\\",\\\"Office\\\"]\",\"\\nFound 2 customer(s) who purchased from all 3 categories\"]}", + "state": "complete" + }, + { + "type": "text", + "content": "Perfect! Here are the customers who have purchased from all three product categories:\n\n## Customers with Purchases Across All Categories\n\n| Name | City | Electronics | Furniture | Office | Total Spend |\n|------|------|-------------|-----------|--------|-------------|\n| Bob Smith | San Francisco | $89.97 | $299.99 | $149.90 | $539.86 |\n| Carol Williams | Chicago | $139.98 | $399.99 | $99.96 | $639.93 |\n\n**Summary:**\n- **2 customers** have made purchases from all three categories (Electronics, Furniture, and Office)\n- **Bob Smith** (San Francisco) has spent the most on Furniture ($299.99)\n- **Carol Williams** (Chicago) has spent the most on Furniture ($399.99) and has the highest total spend overall" + } + ], + "createdAt": "2026-03-12T23:28:57.348Z" + } + ] +} diff --git a/examples/ts-code-mode-web/public/gold-results/db-gold-messages-2.json b/examples/ts-code-mode-web/public/gold-results/db-gold-messages-2.json new file mode 100644 index 000000000..4670d10ce --- /dev/null +++ b/examples/ts-code-mode-web/public/gold-results/db-gold-messages-2.json @@ -0,0 +1,152 @@ +{ + "prompt": "Show a monthly breakdown of purchases with total revenue, average order value, and the number of distinct products sold each month", + "messages": [ + { + "id": "msg-1773358111166-fgkrra", + "role": "user", + "parts": [ + { + "type": "text", + "content": "Show a monthly breakdown of purchases with total revenue, average order value, and the number of distinct products sold each month" + } + ], + "createdAt": "2026-03-12T23:28:31.166Z" + }, + { + "id": "msg-1773358111583-aurhoko", + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "I'll help you get a monthly breakdown of purchases. Let me start by checking the schema and then querying the purchase data." + }, + { + "type": "tool-call", + "id": "toolu_012er8u979E7P3BjoS6rJzer", + "name": "getSchemaInfo", + "arguments": "{\"table\": \"purchases\"}", + "state": "input-complete", + "output": { + "schemas": { + "purchases": { + "id": "number", + "customer_id": "number", + "product_id": "number", + "quantity": "number", + "total": "number", + "purchased_at": "string (date)" + } + }, + "rowCounts": { + "purchases": 25 + } + } + }, + { + "type": "tool-result", + "toolCallId": "toolu_012er8u979E7P3BjoS6rJzer", + "content": "{\"schemas\":{\"purchases\":{\"id\":\"number\",\"customer_id\":\"number\",\"product_id\":\"number\",\"quantity\":\"number\",\"total\":\"number\",\"purchased_at\":\"string (date)\"}},\"rowCounts\":{\"purchases\":25}}", + "state": "complete" + }, + { + "type": "text", + "content": "Now let me query all purchases and process them to create a monthly breakdown:" + }, + { + "type": "tool-call", + "id": "toolu_01V9L6TrzsAW91Mm9pezXR5S", + "name": "execute_typescript", + "arguments": "{\"typescriptCode\": \"\\nconst result = await external_queryTable({\\n table: \\\"purchases\\\",\\n columns: [\\\"purchased_at\\\", \\\"total\\\", \\\"product_id\\\", \\\"quantity\\\"]\\n});\\n\\nconst purchases = result.rows as Array<{\\n purchased_at: string;\\n total: number;\\n product_id: number;\\n quantity: number;\\n}>;\\n\\n// Group by month\\nconst monthlyStats = new Map;\\n}>();\\n\\npurchases.forEach(purchase => {\\n // Extract year-month from the date\\n const monthKey = purchase.purchased_at.substring(0, 7); // YYYY-MM format\\n \\n if (!monthlyStats.has(monthKey)) {\\n monthlyStats.set(monthKey, {\\n totalRevenue: 0,\\n orderCount: 0,\\n productIds: new Set()\\n });\\n }\\n \\n const monthData = monthlyStats.get(monthKey)!;\\n monthData.totalRevenue += purchase.total;\\n monthData.orderCount += 1;\\n monthData.productIds.add(purchase.product_id);\\n});\\n\\n// Convert to sorted array and calculate metrics\\nconst monthlyBreakdown = Array.from(monthlyStats.entries())\\n .sort((a, b) => a[0].localeCompare(b[0]))\\n .map(([month, stats]) => ({\\n month,\\n totalRevenue: parseFloat(stats.totalRevenue.toFixed(2)),\\n orderCount: stats.orderCount,\\n averageOrderValue: parseFloat((stats.totalRevenue / stats.orderCount).toFixed(2)),\\n distinctProductsSold: stats.productIds.size\\n }));\\n\\nreturn monthlyBreakdown;\\n\"}", + "state": "input-complete", + "output": { + "success": true, + "result": [ + { + "month": "2024-03", + "totalRevenue": 189.97, + "orderCount": 2, + "averageOrderValue": 94.98, + "distinctProductsSold": 2 + }, + { + "month": "2024-04", + "totalRevenue": 389.96, + "orderCount": 2, + "averageOrderValue": 194.98, + "distinctProductsSold": 2 + }, + { + "month": "2024-05", + "totalRevenue": 499.95, + "orderCount": 2, + "averageOrderValue": 249.97, + "distinctProductsSold": 2 + }, + { + "month": "2024-06", + "totalRevenue": 194.93, + "orderCount": 2, + "averageOrderValue": 97.47, + "distinctProductsSold": 2 + }, + { + "month": "2024-07", + "totalRevenue": 114.98, + "orderCount": 2, + "averageOrderValue": 57.49, + "distinctProductsSold": 2 + }, + { + "month": "2024-08", + "totalRevenue": 209.97, + "orderCount": 2, + "averageOrderValue": 104.98, + "distinctProductsSold": 2 + }, + { + "month": "2024-09", + "totalRevenue": 449.98, + "orderCount": 2, + "averageOrderValue": 224.99, + "distinctProductsSold": 2 + }, + { + "month": "2024-10", + "totalRevenue": 529.94, + "orderCount": 3, + "averageOrderValue": 176.65, + "distinctProductsSold": 3 + }, + { + "month": "2024-11", + "totalRevenue": 249.96, + "orderCount": 3, + "averageOrderValue": 83.32, + "distinctProductsSold": 3 + }, + { + "month": "2024-12", + "totalRevenue": 899.83, + "orderCount": 5, + "averageOrderValue": 179.97, + "distinctProductsSold": 5 + } + ], + "logs": [] + } + }, + { + "type": "tool-result", + "toolCallId": "toolu_01V9L6TrzsAW91Mm9pezXR5S", + "content": "{\"success\":true,\"result\":[{\"month\":\"2024-03\",\"totalRevenue\":189.97,\"orderCount\":2,\"averageOrderValue\":94.98,\"distinctProductsSold\":2},{\"month\":\"2024-04\",\"totalRevenue\":389.96,\"orderCount\":2,\"averageOrderValue\":194.98,\"distinctProductsSold\":2},{\"month\":\"2024-05\",\"totalRevenue\":499.95,\"orderCount\":2,\"averageOrderValue\":249.97,\"distinctProductsSold\":2},{\"month\":\"2024-06\",\"totalRevenue\":194.93,\"orderCount\":2,\"averageOrderValue\":97.47,\"distinctProductsSold\":2},{\"month\":\"2024-07\",\"totalRevenue\":114.98,\"orderCount\":2,\"averageOrderValue\":57.49,\"distinctProductsSold\":2},{\"month\":\"2024-08\",\"totalRevenue\":209.97,\"orderCount\":2,\"averageOrderValue\":104.98,\"distinctProductsSold\":2},{\"month\":\"2024-09\",\"totalRevenue\":449.98,\"orderCount\":2,\"averageOrderValue\":224.99,\"distinctProductsSold\":2},{\"month\":\"2024-10\",\"totalRevenue\":529.94,\"orderCount\":3,\"averageOrderValue\":176.65,\"distinctProductsSold\":3},{\"month\":\"2024-11\",\"totalRevenue\":249.96,\"orderCount\":3,\"averageOrderValue\":83.32,\"distinctProductsSold\":3},{\"month\":\"2024-12\",\"totalRevenue\":899.83,\"orderCount\":5,\"averageOrderValue\":179.97,\"distinctProductsSold\":5}],\"logs\":[]}", + "state": "complete" + }, + { + "type": "text", + "content": "Perfect! Here's your monthly breakdown of purchases:\n\n| Month | Total Revenue | Order Count | Average Order Value | Distinct Products Sold |\n|-------|---------------|-------------|---------------------|------------------------|\n| 2024-03 | $189.97 | 2 | $94.98 | 2 |\n| 2024-04 | $389.96 | 2 | $194.98 | 2 |\n| 2024-05 | $499.95 | 2 | $249.97 | 2 |\n| 2024-06 | $194.93 | 2 | $97.47 | 2 |\n| 2024-07 | $114.98 | 2 | $57.49 | 2 |\n| 2024-08 | $209.97 | 2 | $104.98 | 2 |\n| 2024-09 | $449.98 | 2 | $224.99 | 2 |\n| 2024-10 | $529.94 | 3 | $176.65 | 3 |\n| 2024-11 | $249.96 | 3 | $83.32 | 3 |\n| 2024-12 | $899.83 | 5 | $179.97 | 5 |\n\n**Key Insights:**\n- **Best Month:** December 2024 had the highest revenue ($899.83) with 5 orders\n- **Highest Average Order Value:** May 2024 ($249.97 per order)\n- **Lowest Revenue Month:** July 2024 ($114.98)\n- **Trend:** Strong momentum in Q4 2024, with December showing significant growth in both order volume and product variety\n- **Overall:** Total of 25 purchases across 10 months with a wide range of average order values from $57.49 to $249.97" + } + ], + "createdAt": "2026-03-12T23:28:31.583Z" + } + ] +} diff --git a/examples/ts-code-mode-web/public/gold-results/db-gold-messages-3.json b/examples/ts-code-mode-web/public/gold-results/db-gold-messages-3.json new file mode 100644 index 000000000..b189ec806 --- /dev/null +++ b/examples/ts-code-mode-web/public/gold-results/db-gold-messages-3.json @@ -0,0 +1,178 @@ +{ + "prompt": "Rank every customer by total spending and show what percentage of their purchases were electronics vs furniture vs office products", + "messages": [ + { + "id": "msg-1773358095332-676p26", + "role": "user", + "parts": [ + { + "type": "text", + "content": "Rank every customer by total spending and show what percentage of their purchases were electronics vs furniture vs office products" + } + ], + "createdAt": "2026-03-12T23:28:15.332Z" + }, + { + "id": "msg-1773358095889-fq08h", + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "I'll help you rank customers by total spending and break down their purchase percentages by product category. Let me start by getting the schema information." + }, + { + "type": "tool-call", + "id": "toolu_016UzKdGiDpASbxjWcAHCLzP", + "name": "getSchemaInfo", + "arguments": "", + "state": "input-complete", + "output": { + "schemas": { + "customers": { + "id": "number", + "name": "string", + "email": "string", + "city": "string", + "joined": "string (date)" + }, + "products": { + "id": "number", + "name": "string", + "category": "string", + "price": "number", + "stock": "number" + }, + "purchases": { + "id": "number", + "customer_id": "number", + "product_id": "number", + "quantity": "number", + "total": "number", + "purchased_at": "string (date)" + } + }, + "rowCounts": { + "customers": 10, + "products": 10, + "purchases": 25 + } + } + }, + { + "type": "tool-result", + "toolCallId": "toolu_016UzKdGiDpASbxjWcAHCLzP", + "content": "{\"schemas\":{\"customers\":{\"id\":\"number\",\"name\":\"string\",\"email\":\"string\",\"city\":\"string\",\"joined\":\"string (date)\"},\"products\":{\"id\":\"number\",\"name\":\"string\",\"category\":\"string\",\"price\":\"number\",\"stock\":\"number\"},\"purchases\":{\"id\":\"number\",\"customer_id\":\"number\",\"product_id\":\"number\",\"quantity\":\"number\",\"total\":\"number\",\"purchased_at\":\"string (date)\"}},\"rowCounts\":{\"customers\":10,\"products\":10,\"purchases\":25}}", + "state": "complete" + }, + { + "type": "text", + "content": "Now let me fetch all the necessary data to analyze customer spending by category:" + }, + { + "type": "tool-call", + "id": "toolu_01NrCyYmQdiJAF4M1dL2QnR2", + "name": "execute_typescript", + "arguments": "{\"typescriptCode\": \"\\n// Fetch all purchases, products, and customers\\nconst [purchasesResult, productsResult, customersResult] = await Promise.all([\\n external_queryTable({ table: 'purchases' }),\\n external_queryTable({ table: 'products' }),\\n external_queryTable({ table: 'customers' })\\n]);\\n\\nconst purchases = purchasesResult.rows as any[];\\nconst products = productsResult.rows as any[];\\nconst customers = customersResult.rows as any[];\\n\\n// Create a map of product ID to category\\nconst productMap = Object.fromEntries(\\n products.map(p => [p.id, { name: p.name, category: p.category, price: p.price }])\\n);\\n\\n// Group purchases by customer\\nconst customerSpending = new Map;\\n}>();\\n\\ncustomers.forEach(customer => {\\n customerSpending.set(customer.id, {\\n name: customer.name,\\n totalSpent: 0,\\n categorySpending: {}\\n });\\n});\\n\\n// Process purchases\\npurchases.forEach(purchase => {\\n const product = productMap[purchase.product_id];\\n const customer = customerSpending.get(purchase.customer_id);\\n \\n if (customer && product) {\\n customer.totalSpent += purchase.total;\\n if (!customer.categorySpending[product.category]) {\\n customer.categorySpending[product.category] = 0;\\n }\\n customer.categorySpending[product.category] += purchase.total;\\n }\\n});\\n\\n// Convert to array and sort by total spending (descending)\\nconst rankedCustomers = Array.from(customerSpending.values())\\n .filter(c => c.totalSpent > 0) // Only customers with purchases\\n .sort((a, b) => b.totalSpent - a.totalSpent)\\n .map((customer, rank) => {\\n const percentages: Record = {};\\n const categories = ['Electronics', 'Furniture', 'Office Products'];\\n \\n categories.forEach(cat => {\\n const categoryTotal = customer.categorySpending[cat] || 0;\\n percentages[cat] = customer.totalSpent > 0 \\n ? Math.round((categoryTotal / customer.totalSpent) * 100 * 100) / 100\\n : 0;\\n });\\n\\n return {\\n rank: rank + 1,\\n name: customer.name,\\n totalSpent: Math.round(customer.totalSpent * 100) / 100,\\n electronics: percentages['Electronics'],\\n furniture: percentages['Furniture'],\\n officeProducts: percentages['Office Products']\\n };\\n });\\n\\nreturn rankedCustomers;\\n\"}", + "state": "input-complete", + "output": { + "success": true, + "result": [ + { + "rank": 1, + "name": "Carol Williams", + "totalSpent": 639.93, + "electronics": 21.87, + "furniture": 62.51, + "officeProducts": 0 + }, + { + "rank": 2, + "name": "Bob Smith", + "totalSpent": 539.86, + "electronics": 16.67, + "furniture": 55.57, + "officeProducts": 0 + }, + { + "rank": 3, + "name": "Eva Martinez", + "totalSpent": 514.97, + "electronics": 13.59, + "furniture": 86.41, + "officeProducts": 0 + }, + { + "rank": 4, + "name": "Henry Davis", + "totalSpent": 479.96, + "electronics": 37.5, + "furniture": 62.5, + "officeProducts": 0 + }, + { + "rank": 5, + "name": "Grace Kim", + "totalSpent": 449.98, + "electronics": 11.11, + "furniture": 88.89, + "officeProducts": 0 + }, + { + "rank": 6, + "name": "David Brown", + "totalSpent": 344.9, + "electronics": 78.27, + "furniture": 0, + "officeProducts": 0 + }, + { + "rank": 7, + "name": "Alice Johnson", + "totalSpent": 249.96, + "electronics": 100, + "furniture": 0, + "officeProducts": 0 + }, + { + "rank": 8, + "name": "Frank Lee", + "totalSpent": 209.97, + "electronics": 100, + "furniture": 0, + "officeProducts": 0 + }, + { + "rank": 9, + "name": "Jack Wilson", + "totalSpent": 179.97, + "electronics": 50, + "furniture": 50, + "officeProducts": 0 + }, + { + "rank": 10, + "name": "Ivy Chen", + "totalSpent": 119.97, + "electronics": 58.34, + "furniture": 0, + "officeProducts": 0 + } + ], + "logs": [] + } + }, + { + "type": "tool-result", + "toolCallId": "toolu_01NrCyYmQdiJAF4M1dL2QnR2", + "content": "{\"success\":true,\"result\":[{\"rank\":1,\"name\":\"Carol Williams\",\"totalSpent\":639.93,\"electronics\":21.87,\"furniture\":62.51,\"officeProducts\":0},{\"rank\":2,\"name\":\"Bob Smith\",\"totalSpent\":539.86,\"electronics\":16.67,\"furniture\":55.57,\"officeProducts\":0},{\"rank\":3,\"name\":\"Eva Martinez\",\"totalSpent\":514.97,\"electronics\":13.59,\"furniture\":86.41,\"officeProducts\":0},{\"rank\":4,\"name\":\"Henry Davis\",\"totalSpent\":479.96,\"electronics\":37.5,\"furniture\":62.5,\"officeProducts\":0},{\"rank\":5,\"name\":\"Grace Kim\",\"totalSpent\":449.98,\"electronics\":11.11,\"furniture\":88.89,\"officeProducts\":0},{\"rank\":6,\"name\":\"David Brown\",\"totalSpent\":344.9,\"electronics\":78.27,\"furniture\":0,\"officeProducts\":0},{\"rank\":7,\"name\":\"Alice Johnson\",\"totalSpent\":249.96,\"electronics\":100,\"furniture\":0,\"officeProducts\":0},{\"rank\":8,\"name\":\"Frank Lee\",\"totalSpent\":209.97,\"electronics\":100,\"furniture\":0,\"officeProducts\":0},{\"rank\":9,\"name\":\"Jack Wilson\",\"totalSpent\":179.97,\"electronics\":50,\"furniture\":50,\"officeProducts\":0},{\"rank\":10,\"name\":\"Ivy Chen\",\"totalSpent\":119.97,\"electronics\":58.34,\"furniture\":0,\"officeProducts\":0}],\"logs\":[]}", + "state": "complete" + }, + { + "type": "text", + "content": "Perfect! Here's the ranked list of customers by total spending with their purchase breakdown by category:\n\n| Rank | Customer Name | Total Spent | Electronics | Furniture | Office Products |\n|------|---------------|-------------|-------------|-----------|-----------------|\n| 1 | Carol Williams | $639.93 | 21.87% | 62.51% | 0% |\n| 2 | Bob Smith | $539.86 | 16.67% | 55.57% | 0% |\n| 3 | Eva Martinez | $514.97 | 13.59% | 86.41% | 0% |\n| 4 | Henry Davis | $479.96 | 37.50% | 62.50% | 0% |\n| 5 | Grace Kim | $449.98 | 11.11% | 88.89% | 0% |\n| 6 | David Brown | $344.90 | 78.27% | 0% | 0% |\n| 7 | Alice Johnson | $249.96 | 100.00% | 0% | 0% |\n| 8 | Frank Lee | $209.97 | 100.00% | 0% | 0% |\n| 9 | Jack Wilson | $179.97 | 50.00% | 50.00% | 0% |\n| 10 | Ivy Chen | $119.97 | 58.34% | 0% | 0% |\n\n**Key Insights:**\n- **Top spender:** Carol Williams with $639.93, primarily focused on Furniture (62.51%)\n- **Strongest Electronics buyers:** Alice Johnson and Frank Lee (100% of purchases), followed by David Brown (78.27%)\n- **Furniture enthusiasts:** Grace Kim (88.89%), Eva Martinez (86.41%)\n- **Office Products:** No customers purchased office products in this dataset\n- **Top 3 customers** account for 1,694.76 out of 4,704.40 total spending (36% of all revenue)" + } + ], + "createdAt": "2026-03-12T23:28:15.889Z" + } + ] +} diff --git a/examples/ts-code-mode-web/public/gold-results/db-gold-messages-4.json b/examples/ts-code-mode-web/public/gold-results/db-gold-messages-4.json new file mode 100644 index 000000000..b0a8ef8b3 --- /dev/null +++ b/examples/ts-code-mode-web/public/gold-results/db-gold-messages-4.json @@ -0,0 +1,128 @@ +{ + "prompt": "For each city, show the total revenue, number of unique customers, and the most purchased product category", + "messages": [ + { + "id": "msg-1773358079154-2uw768", + "role": "user", + "parts": [ + { + "type": "text", + "content": "For each city, show the total revenue, number of unique customers, and the most purchased product category" + } + ], + "createdAt": "2026-03-12T23:27:59.154Z" + }, + { + "id": "msg-1773358079591-ub25fs", + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "I'll help you get that information. Let me first check the schema to understand the data structure, then query the data." + }, + { + "type": "tool-call", + "id": "toolu_011KifhviESJKC2DsHYsCPuZ", + "name": "getSchemaInfo", + "arguments": "", + "state": "input-complete", + "output": { + "schemas": { + "customers": { + "id": "number", + "name": "string", + "email": "string", + "city": "string", + "joined": "string (date)" + }, + "products": { + "id": "number", + "name": "string", + "category": "string", + "price": "number", + "stock": "number" + }, + "purchases": { + "id": "number", + "customer_id": "number", + "product_id": "number", + "quantity": "number", + "total": "number", + "purchased_at": "string (date)" + } + }, + "rowCounts": { + "customers": 10, + "products": 10, + "purchases": 25 + } + } + }, + { + "type": "tool-result", + "toolCallId": "toolu_011KifhviESJKC2DsHYsCPuZ", + "content": "{\"schemas\":{\"customers\":{\"id\":\"number\",\"name\":\"string\",\"email\":\"string\",\"city\":\"string\",\"joined\":\"string (date)\"},\"products\":{\"id\":\"number\",\"name\":\"string\",\"category\":\"string\",\"price\":\"number\",\"stock\":\"number\"},\"purchases\":{\"id\":\"number\",\"customer_id\":\"number\",\"product_id\":\"number\",\"quantity\":\"number\",\"total\":\"number\",\"purchased_at\":\"string (date)\"}},\"rowCounts\":{\"customers\":10,\"products\":10,\"purchases\":25}}", + "state": "complete" + }, + { + "type": "text", + "content": "Now let me query all the necessary data:" + }, + { + "type": "tool-call", + "id": "toolu_01PAg2GUs8P7Zfjm7YJKBoLr", + "name": "execute_typescript", + "arguments": "{\"typescriptCode\": \"\\n// Query all customers, products, and purchases\\nconst customersResult = await external_queryTable({ table: 'customers' });\\nconst productsResult = await external_queryTable({ table: 'products' });\\nconst purchasesResult = await external_queryTable({ table: 'purchases' });\\n\\nconst customers = customersResult.rows as any[];\\nconst products = productsResult.rows as any[];\\nconst purchases = purchasesResult.rows as any[];\\n\\n// Create lookup maps for faster access\\nconst productMap = new Map(products.map(p => [p.id, p]));\\nconst customerMap = new Map(customers.map(c => [c.id, c]));\\n\\n// Group data by city\\nconst cityData = new Map;\\n categoryPurchaseCounts: Map;\\n}>();\\n\\n// Process each purchase\\nfor (const purchase of purchases) {\\n const customer = customerMap.get(purchase.customer_id);\\n const product = productMap.get(purchase.product_id);\\n \\n if (customer && product) {\\n const city = customer.city;\\n \\n if (!cityData.has(city)) {\\n cityData.set(city, {\\n totalRevenue: 0,\\n uniqueCustomers: new Set(),\\n categoryPurchaseCounts: new Map()\\n });\\n }\\n \\n const data = cityData.get(city)!;\\n data.totalRevenue += purchase.total;\\n data.uniqueCustomers.add(purchase.customer_id);\\n \\n const currentCount = data.categoryPurchaseCounts.get(product.category) || 0;\\n data.categoryPurchaseCounts.set(product.category, currentCount + purchase.quantity);\\n }\\n}\\n\\n// Format results\\nconst results = Array.from(cityData.entries()).map(([city, data]) => {\\n // Find most purchased category\\n let mostPurchasedCategory = 'N/A';\\n let maxCount = 0;\\n \\n for (const [category, count] of data.categoryPurchaseCounts.entries()) {\\n if (count > maxCount) {\\n maxCount = count;\\n mostPurchasedCategory = category;\\n }\\n }\\n \\n return {\\n city,\\n totalRevenue: data.totalRevenue.toFixed(2),\\n uniqueCustomers: data.uniqueCustomers.size,\\n mostPurchasedCategory\\n };\\n}).sort((a, b) => a.city.localeCompare(b.city));\\n\\nreturn results;\\n\"}", + "state": "input-complete", + "output": { + "success": true, + "result": [ + { + "city": "Austin", + "totalRevenue": "464.87", + "uniqueCustomers": 2, + "mostPurchasedCategory": "Office" + }, + { + "city": "Chicago", + "totalRevenue": "1119.89", + "uniqueCustomers": 2, + "mostPurchasedCategory": "Electronics" + }, + { + "city": "New York", + "totalRevenue": "459.93", + "uniqueCustomers": 2, + "mostPurchasedCategory": "Electronics" + }, + { + "city": "San Francisco", + "totalRevenue": "989.84", + "uniqueCustomers": 2, + "mostPurchasedCategory": "Office" + }, + { + "city": "Seattle", + "totalRevenue": "694.94", + "uniqueCustomers": 2, + "mostPurchasedCategory": "Furniture" + } + ], + "logs": [] + } + }, + { + "type": "tool-result", + "toolCallId": "toolu_01PAg2GUs8P7Zfjm7YJKBoLr", + "content": "{\"success\":true,\"result\":[{\"city\":\"Austin\",\"totalRevenue\":\"464.87\",\"uniqueCustomers\":2,\"mostPurchasedCategory\":\"Office\"},{\"city\":\"Chicago\",\"totalRevenue\":\"1119.89\",\"uniqueCustomers\":2,\"mostPurchasedCategory\":\"Electronics\"},{\"city\":\"New York\",\"totalRevenue\":\"459.93\",\"uniqueCustomers\":2,\"mostPurchasedCategory\":\"Electronics\"},{\"city\":\"San Francisco\",\"totalRevenue\":\"989.84\",\"uniqueCustomers\":2,\"mostPurchasedCategory\":\"Office\"},{\"city\":\"Seattle\",\"totalRevenue\":\"694.94\",\"uniqueCustomers\":2,\"mostPurchasedCategory\":\"Furniture\"}],\"logs\":[]}", + "state": "complete" + }, + { + "type": "text", + "content": "Perfect! Here's a summary of revenue, customer engagement, and product preferences by city:\n\n| City | Total Revenue | Unique Customers | Most Purchased Category |\n|------|---------------|------------------|------------------------|\n| Austin | $464.87 | 2 | Office |\n| Chicago | $1,119.89 | 2 | Electronics |\n| New York | $459.93 | 2 | Electronics |\n| San Francisco | $989.84 | 2 | Office |\n| Seattle | $694.94 | 2 | Furniture |\n\n**Key Insights:**\n- **Chicago** is the top revenue-generating city with $1,119.89 in total sales, with Electronics being the most popular category\n- **San Francisco** is the second highest with $989.84 in revenue, focusing on Office products\n- **Seattle** generates $694.94 in revenue with a preference for Furniture\n- **Austin** and **New York** have similar revenue (around $460), with Austin preferring Office supplies and New York preferring Electronics\n- Each city has exactly 2 unique customers in the dataset" + } + ], + "createdAt": "2026-03-12T23:27:59.591Z" + } + ] +} diff --git a/examples/ts-code-mode-web/src/components/ChatInput.tsx b/examples/ts-code-mode-web/src/components/ChatInput.tsx new file mode 100644 index 000000000..e970deba6 --- /dev/null +++ b/examples/ts-code-mode-web/src/components/ChatInput.tsx @@ -0,0 +1,74 @@ +import { Send } from 'lucide-react' +import { useState, useRef, useEffect } from 'react' + +interface ChatInputProps { + onSend: (message: string) => void + disabled?: boolean + placeholder?: string + exampleQueries?: string +} + +export default function ChatInput({ + onSend, + disabled = false, + placeholder = 'Ask a question about GitHub or NPM analytics...', + exampleQueries = '"Show download trends for @tanstack/query" | "Find repos with unusual growth" | "Compare React state libraries"', +}: ChatInputProps) { + const [input, setInput] = useState('') + const textareaRef = useRef(null) + + // Auto-resize textarea + useEffect(() => { + if (textareaRef.current) { + textareaRef.current.style.height = 'auto' + textareaRef.current.style.height = `${Math.min(textareaRef.current.scrollHeight, 200)}px` + } + }, [input]) + + const handleSubmit = () => { + if (input.trim() && !disabled) { + onSend(input.trim()) + setInput('') + } + } + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault() + handleSubmit() + } + } + + return ( +
+
+
+
+