diff --git a/README.md b/README.md index bcf3e2b..6311a93 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ A unified TypeScript/Node.js SDK for building AI-powered applications with multi - **Embeddings** — Vector generation for RAG applications (OpenAI, Gemini, Ollama) - **Workflow Engine** — AI-driven planning and step-by-step task execution with progress events - **Mode System** — Built-in Agent and Chat modes, plus `createMode()` for custom modes with tool filtering +- **Multi-Agent Conversations** — Collaborative AI teams with specialized agents, handoffs, and conversation threading - **HITL Confirmation** — Human-in-the-loop approval for high-risk operations with configurable bypass rules - **Custom Providers** — Bring your own provider by implementing the `ProviderAdapter` interface - **79 Built-in Tools** across 10 categories: @@ -91,6 +92,50 @@ const sdk = await Toolpack.init({ }); ``` +### Multi-Agent Conversations + +Create collaborative AI teams where specialized agents work together on complex tasks: + +```typescript +import { Toolpack } from 'toolpack-sdk'; + +const sdk = await Toolpack.init({ + provider: 'openai', + tools: true, +}); + +// Create a team of specialized agents +const team = sdk.createAgentTeam({ + agents: { + researcher: { + mode: 'chat', + systemPrompt: 'You are a research specialist. Use web tools to gather information.', + allowedToolCategories: ['network'] + }, + coder: { + mode: 'coding', + systemPrompt: 'You are a coding expert. Focus on writing clean, efficient code.', + allowedToolCategories: ['filesystem', 'coding'] + }, + reviewer: { + mode: 'chat', + systemPrompt: 'You are a code reviewer. Analyze code for best practices and issues.', + allowedToolCategories: ['filesystem'] + } + }, + conversationMode: 'collaborative', + enableAgentHandoffs: true +}); + +// Agents collaborate on complex tasks +const result = await team.generate( + 'Research the latest React patterns, implement a todo component, and review the code' +); + +console.log('Final result:', result.content); +console.log('Conversation had', result.totalRounds, 'rounds'); +``` + ## Providers ### Built-in Providers diff --git a/package-lock.json b/package-lock.json index 249a0ee..3185463 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9693,6 +9693,331 @@ } } }, + "node_modules/netlify/node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz", + "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==", + "cpu": [ + "arm" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "android" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-android-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz", + "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "android" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz", + "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "darwin" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-darwin-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz", + "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "darwin" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz", + "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "freebsd" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz", + "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "freebsd" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz", + "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==", + "cpu": [ + "arm" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz", + "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==", + "cpu": [ + "arm" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz", + "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz", + "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz", + "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==", + "cpu": [ + "loong64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz", + "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==", + "cpu": [ + "loong64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz", + "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==", + "cpu": [ + "ppc64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz", + "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==", + "cpu": [ + "ppc64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz", + "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==", + "cpu": [ + "riscv64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz", + "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==", + "cpu": [ + "riscv64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz", + "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==", + "cpu": [ + "s390x" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz", + "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz", + "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz", + "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "openbsd" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz", + "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "openharmony" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz", + "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "win32" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz", + "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==", + "cpu": [ + "ia32" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "win32" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz", + "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "win32" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz", + "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "win32" + ] + }, "node_modules/netlify/node_modules/@sec-ant/readable-stream": { "version": "0.4.1", "license": "MIT" @@ -12637,6 +12962,17 @@ "safe-buffer": "~5.1.0" } }, + "node_modules/netlify/node_modules/fsevents": { + "version": "2.3.3", + "extraneous": true, + "license": "MIT", + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/netlify/node_modules/function-bind": { "version": "1.1.2", "license": "MIT", @@ -16074,6 +16410,51 @@ "version": "1.4.1", "license": "MIT" }, + "node_modules/netlify/node_modules/rollup": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz", + "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==", + "extraneous": true, + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.8" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.59.0", + "@rollup/rollup-android-arm64": "4.59.0", + "@rollup/rollup-darwin-arm64": "4.59.0", + "@rollup/rollup-darwin-x64": "4.59.0", + "@rollup/rollup-freebsd-arm64": "4.59.0", + "@rollup/rollup-freebsd-x64": "4.59.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", + "@rollup/rollup-linux-arm-musleabihf": "4.59.0", + "@rollup/rollup-linux-arm64-gnu": "4.59.0", + "@rollup/rollup-linux-arm64-musl": "4.59.0", + "@rollup/rollup-linux-loong64-gnu": "4.59.0", + "@rollup/rollup-linux-loong64-musl": "4.59.0", + "@rollup/rollup-linux-ppc64-gnu": "4.59.0", + "@rollup/rollup-linux-ppc64-musl": "4.59.0", + "@rollup/rollup-linux-riscv64-gnu": "4.59.0", + "@rollup/rollup-linux-riscv64-musl": "4.59.0", + "@rollup/rollup-linux-s390x-gnu": "4.59.0", + "@rollup/rollup-linux-x64-gnu": "4.59.0", + "@rollup/rollup-linux-x64-musl": "4.59.0", + "@rollup/rollup-openbsd-x64": "4.59.0", + "@rollup/rollup-openharmony-arm64": "4.59.0", + "@rollup/rollup-win32-arm64-msvc": "4.59.0", + "@rollup/rollup-win32-ia32-msvc": "4.59.0", + "@rollup/rollup-win32-x64-gnu": "4.59.0", + "@rollup/rollup-win32-x64-msvc": "4.59.0", + "fsevents": "~2.3.2" + } + }, "node_modules/netlify/node_modules/router": { "version": "2.2.0", "license": "MIT", @@ -19953,7 +20334,7 @@ }, "packages/toolpack-knowledge": { "name": "@toolpack-sdk/knowledge", - "version": "1.3.0", + "version": "1.4.0", "license": "Apache-2.0", "dependencies": { "better-sqlite3": "^12.6.2", @@ -19973,7 +20354,7 @@ } }, "packages/toolpack-sdk": { - "version": "1.2.0", + "version": "1.4.0", "license": "Apache-2.0", "dependencies": { "@anthropic-ai/sdk": "^0.73.0", diff --git a/packages/toolpack-sdk/examples/README.md b/packages/toolpack-sdk/examples/README.md new file mode 100644 index 0000000..86a38b7 --- /dev/null +++ b/packages/toolpack-sdk/examples/README.md @@ -0,0 +1,237 @@ +# Context Window Examples + +This directory contains comprehensive examples demonstrating how to use the context window management features in Toolpack SDK. + +## Overview + +Context window management helps prevent API errors and handle long conversations by automatically managing conversation history within the model's token limits. + +## Examples + +### 1. Basic Pruning Example (`context-window-basic.ts`) + +**What it demonstrates:** +- Simple setup of context window management with pruning strategy +- Automatic removal of oldest messages when approaching context limits +- Retention of system messages during pruning + +**Key features:** +- Prune strategy (removes old messages) +- `pruneThreshold: 85` - Start pruning at 85% of context window +- `maxMessageHistoryLength: 50` - Keep at most 50 messages +- `retainSystemMessages: true` - Always preserve system prompts + +**Use case:** Best for simple conversations where you just need to keep recent history without losing important context. + +### 2. Summarization Example (`context-window-summarization.ts`) + +**What it demonstrates:** +- Using summarization strategy to condense conversation history +- Automatic summarization of older messages before reaching limits +- Error handling for summarization failures + +**Key features:** +- Summarize strategy (creates summaries of old messages) +- `summarizerModel: 'gpt-4'` - Use a capable model for summaries +- Automatic fallback to pruning if summarization fails +- Preserves full semantic meaning of conversations + +**Use case:** Best for long, complex conversations where you need to preserve all the semantic information but save token space. + +### 3. State Monitoring and Fail Strategy (`context-window-monitoring.ts`) + +**What it demonstrates:** +- Monitoring conversation state and token usage +- Using the fail strategy for safety +- Error handling and recovery mechanisms +- Manual state management when needed + +**Key features:** +- Fail strategy (prevents sending requests that exceed limits) +- `pruneThreshold: 90` - Alert at 90%, fail at 100% +- Error handling with `ContextWindowExceededError` +- Recovery strategies for when limits are exceeded + +**Use case:** Best for safety-critical applications where you want explicit control and error handling rather than automatic pruning. + +### 4. Multi-Conversation Tracking (`context-window-multi-conversation.ts`) + +**What it demonstrates:** +- Managing context windows for multiple simultaneous conversations +- Per-conversation state tracking +- Resource management across conversations +- Advanced `MultiConversationContextManager` class + +**Key features:** +- Separate conversation IDs for different users/threads +- Independent context window management per conversation +- Resource pooling and conversation archiving +- Statistics tracking across all conversations + +**Use case:** Best for applications serving multiple users (chatbots, customer support, etc.) where each conversation needs independent context management. + +## Running the Examples + +### Prerequisites + +```bash +# Install dependencies +npm install + +# Set up your OpenAI API key +export OPENAI_API_KEY=sk_... +``` + +### Running a Specific Example + +```bash +# Basic pruning +npx ts-node examples/context-window-basic.ts + +# Summarization +npx ts-node examples/context-window-summarization.ts + +# State monitoring +npx ts-node examples/context-window-monitoring.ts + +# Multi-conversation +npx ts-node examples/context-window-multi-conversation.ts +``` + +## Configuration Options + +All examples use these common configuration options: + +```typescript +contextWindow: { + // Enable/disable context window management + enabled: boolean; + + // Strategy: 'prune' | 'summarize' | 'fail' + strategy: 'prune' | 'summarize' | 'fail'; + + // Threshold for triggering management (percentage, 0-100) + pruneThreshold?: number; // Default: 85 + + // Maximum messages to keep in history + maxMessageHistoryLength?: number; + + // Always preserve system messages + retainSystemMessages?: boolean; // Default: true + + // Model to use for summarization + summarizerModel?: string; + + // Output token buffer (percentage overhead) + outputTokenBuffer?: number; // Default: 1.15 +} +``` + +## Strategy Comparison + +| Strategy | Behavior | Best For | Tradeoffs | +|----------|----------|----------|-----------| +| **Prune** | Removes oldest messages | General conversations | May lose context | +| **Summarize** | Condenses old messages | Long complex conversations | More expensive (extra API call) | +| **Fail** | Throws error if limit exceeded | Safety-critical apps | Requires manual handling | + +## Common Patterns + +### Pattern 1: Long-Running Conversation + +```typescript +// Use summarization to preserve context while saving tokens +contextWindow: { + strategy: 'summarize', + pruneThreshold: 80, + summarizerModel: 'gpt-4' +} +``` + +### Pattern 2: Quick User Interactions + +```typescript +// Use aggressive pruning to keep it fast and cheap +contextWindow: { + strategy: 'prune', + pruneThreshold: 90, + maxMessageHistoryLength: 10 +} +``` + +### Pattern 3: Critical Applications + +```typescript +// Fail fast rather than silently lose context +contextWindow: { + strategy: 'fail', + pruneThreshold: 85 +} +``` + +### Pattern 4: Multi-User System + +```typescript +// Track each conversation separately with moderate settings +contextWindow: { + strategy: 'prune', + pruneThreshold: 85, + maxMessageHistoryLength: 50, + conversationId: `user-${userId}-session` +} +``` + +## Monitoring and Observability + +All examples demonstrate best practices for monitoring: + +- Track token usage per conversation +- Monitor context window utilization percentage +- Handle errors gracefully +- Implement recovery strategies +- Log state changes for debugging + +## Next Steps + +1. **Choose a strategy** based on your use case +2. **Test with your conversations** to find optimal thresholds +3. **Monitor performance** in production +4. **Adjust settings** based on metrics +5. **Handle errors** appropriately for your application + +## Advanced Topics + +- Custom context window state managers (see multi-conversation example) +- Integration with vector databases for semantic search over summaries +- Conversation archiving and retrieval +- Analytics and metrics collection +- Integration with observability platforms + +## Troubleshooting + +### Getting `ContextWindowExceededError` + +Use fail strategy and handle the error: +```typescript +try { + const response = await client.generate(...); +} catch (error) { + if (error.code === 'CONTEXT_WINDOW_EXCEEDED') { + // Implement recovery: archive, start new conversation, etc. + } +} +``` + +### Summarization is slow + +Consider using a faster model for summarization or increasing `pruneThreshold` to allow more tokens before summarization triggers. + +### Losing important context + +Switch from `prune` to `summarize` strategy, or increase `maxMessageHistoryLength`. + +## See Also + +- [API Documentation](../docs/API.md) +- [Context Window Implementation Plan](../../../CONTEXT_WINDOW_IMPLEMENTATION_PLAN.md) +- [Best Practices Guide](../docs/BEST_PRACTICES.md) diff --git a/packages/toolpack-sdk/examples/context-window-basic.ts b/packages/toolpack-sdk/examples/context-window-basic.ts new file mode 100644 index 0000000..ca4f2bb --- /dev/null +++ b/packages/toolpack-sdk/examples/context-window-basic.ts @@ -0,0 +1,78 @@ +/** + * Basic Context Window Example - Pruning Strategy + * + * This example demonstrates the basic usage of context window management + * with the prune strategy, which removes oldest messages when approaching limits. + */ + +import { Toolpack } from '../src/toolpack'; + +async function basicPruningExample() { + // Initialize Toolpack with context window configuration + const toolpack = await Toolpack.init({ + provider: 'openai', + providers: { + openai: { + apiKey: process.env.OPENAI_API_KEY + } + }, + contextWindow: { + enabled: true, + strategy: 'prune', // Remove old messages automatically + pruneThreshold: 85, // Start pruning at 85% of context window + maxMessageHistoryLength: 50, // Keep at most 50 messages + retainSystemMessages: true, // Always keep system prompts + outputTokenBuffer: 1.15 // Reserve 15% for output + } + }); + + // Get the AI client instance + const client = toolpack.getClient(); + + // Example conversation with many messages + const messages = [ + { + role: 'system' as const, + content: 'You are a helpful coding assistant. Provide clear, concise answers.' + }, + { + role: 'user' as const, + content: 'What is the difference between let and const in JavaScript?' + }, + { + role: 'assistant' as const, + content: `let and const are both block-scoped variables introduced in ES6: + +- let: Can be reassigned, is mutable +- const: Cannot be reassigned, creates immutable reference + +Example: +let x = 1; +x = 2; // OK + +const y = 1; +y = 2; // Error: Assignment to constant variable` + }, + { + role: 'user' as const, + content: 'Can you provide more examples?' + }, + { + role: 'assistant' as const, + content: 'Sure! Here are more examples...' + } + // ... more messages would be pruned automatically if needed + ]; + + // Generate a response - context window is managed automatically + const response = await client.generate({ + messages, + model: 'gpt-4', + temperature: 0.7 + }); + + console.log('Assistant response:', response.content); +} + +// Run the example +basicPruningExample().catch(console.error); diff --git a/packages/toolpack-sdk/examples/context-window-monitoring.ts b/packages/toolpack-sdk/examples/context-window-monitoring.ts new file mode 100644 index 0000000..6a6934f --- /dev/null +++ b/packages/toolpack-sdk/examples/context-window-monitoring.ts @@ -0,0 +1,168 @@ +/** + * Context Window Example - State Monitoring and Fail Strategy + * + * This example demonstrates how to monitor context window state and use + * the fail strategy to prevent requests from being sent with excessive context. + */ + +import { Toolpack } from '../src/toolpack'; +import { AIClient } from '../src/client'; + +async function stateMonitoringExample() { + // Initialize Toolpack with fail strategy for safety + const toolpack = await Toolpack.init({ + provider: 'openai', + providers: { + openai: { + apiKey: process.env.OPENAI_API_KEY + } + }, + conversationId: 'user-123-session', // Track conversation for state management + contextWindow: { + enabled: true, + strategy: 'fail', // Fail rather than silently drop context + pruneThreshold: 90, // Alert at 90%, fail at 100% + outputTokenBuffer: 1.20, + retainSystemMessages: true + } + }); + + const client = toolpack.getClient(); + + // Example messages that will accumulate + const messages = [ + { + role: 'system' as const, + content: 'You are an expert data analyst providing insights.' + } + ]; + + // Simulate adding many messages to the conversation + for (let i = 0; i < 10; i++) { + messages.push({ + role: 'user' as const, + content: `Question ${i}: Analyze this data point - ${generateSampleData()}` + }); + + messages.push({ + role: 'assistant' as const, + content: `Analysis ${i}: Based on the provided data, the key insights are...` + }); + } + + // Now attempt to generate response with potentially high token count + try { + const response = await client.generate({ + messages, + model: 'gpt-4', + temperature: 0.5 + }); + + console.log('Response:', response.content); + + } catch (error: any) { + // Handle context window errors with fail strategy + if (error.code === 'CONTEXT_WINDOW_EXCEEDED') { + console.error('Context window exceeded!'); + console.error('Conversation ID:', error.conversationId); + console.error('Current tokens:', error.currentTokens); + console.error('Limit:', error.contextWindowLimit); + console.error('Overage:', error.getOverageTokens(), 'tokens'); + console.error('Usage:', error.getUsagePercentage() + '%'); + + // Strategies for recovery: + console.log('\nRecovery options:'); + console.log('1. Archive old messages from the conversation'); + console.log('2. Start a new conversation'); + console.log('3. Reduce context with manual pruning'); + + // Example: Implement manual recovery + console.log('\nStarting fresh conversation...'); + // Reset messages to system + last few exchanges + const freshMessages = [ + messages[0], // System message + messages[messages.length - 2], // Last user message + messages[messages.length - 1] // Last assistant response + ]; + + // Try again with reduced context + const recoveryResponse = await client.generate({ + messages: freshMessages, + model: 'gpt-4' + }); + + console.log('Recovery response:', recoveryResponse.content); + } else { + throw error; + } + } +} + +/** + * Alternative: Manual State Monitoring + * Use this when you need fine-grained control over context window management + */ +async function manualStateMonitoringExample() { + const toolpack = await Toolpack.init({ + provider: 'openai', + providers: { + openai: { + apiKey: process.env.OPENAI_API_KEY + } + }, + conversationId: 'monitored-session', + contextWindow: { + enabled: true, + strategy: 'prune' + } + }); + + const client = toolpack.getClient(); + + // Get reference to state manager if available + const messages = [ + { role: 'system' as const, content: 'You are helpful' }, + { role: 'user' as const, content: 'Hello' } + ]; + + // Make a request + let response = await client.generate({ + messages, + model: 'gpt-4' + }); + + console.log('Generation succeeded:', response.content ? response.content.substring(0, 50) + '...' : 'empty'); + + // In a real scenario, you might want to: + // 1. Track token usage over time + // 2. Alert when approaching limits + // 3. Implement proactive cleanup + // 4. Monitor multiple conversations + + return response; +} + +function generateSampleData(): string { + return JSON.stringify({ + metric: 'revenue', + value: Math.floor(Math.random() * 100000), + date: new Date().toISOString().split('T')[0] + }); +} + +// Run examples +(async () => { + console.log('=== State Monitoring Example ===\n'); + try { + await stateMonitoringExample(); + } catch (error) { + console.error('Error in state monitoring example:', error); + } + + console.log('\n\n=== Manual State Monitoring Example ===\n'); + try { + await manualStateMonitoringExample(); + } catch (error) { + console.error('Error in manual monitoring example:', error); + } +})(); diff --git a/packages/toolpack-sdk/examples/context-window-multi-conversation.ts b/packages/toolpack-sdk/examples/context-window-multi-conversation.ts new file mode 100644 index 0000000..b20c324 --- /dev/null +++ b/packages/toolpack-sdk/examples/context-window-multi-conversation.ts @@ -0,0 +1,285 @@ +/** + * Context Window Example - Multi-Conversation Tracking + * + * This example demonstrates managing context windows for multiple + * concurrent conversations with different users or threads. + */ + +import { Toolpack } from '../src/toolpack'; + +/** + * Represents a single user conversation + */ +interface UserConversation { + userId: string; + conversationId: string; + messages: Array<{ role: string; content: string }>; +} + +async function multiConversationExample() { + // Initialize Toolpack with context window management + const toolpack = await Toolpack.init({ + provider: 'openai', + providers: { + openai: { + apiKey: process.env.OPENAI_API_KEY + } + }, + contextWindow: { + enabled: true, + strategy: 'prune', + pruneThreshold: 85, + maxMessageHistoryLength: 50, + retainSystemMessages: true, + outputTokenBuffer: 1.15 + } + }); + + // Simulated conversations with different users + const conversations: Record = { + 'user-001': { + userId: 'user-001', + conversationId: 'conv-001-support', + messages: [ + { + role: 'system', + content: 'You are a customer support assistant.' + } + ] + }, + 'user-002': { + userId: 'user-002', + conversationId: 'conv-002-sales', + messages: [ + { + role: 'system', + content: 'You are a sales assistant helping customers find products.' + } + ] + }, + 'user-003': { + userId: 'user-003', + conversationId: 'conv-003-technical', + messages: [ + { + role: 'system', + content: 'You are a technical support specialist.' + } + ] + } + }; + + // Process each user's conversation + for (const [userId, conv] of Object.entries(conversations)) { + await processUserConversation(toolpack, conv); + } + + // Example: Generate stats for all conversations + console.log('\n=== Conversation Summary ==='); + for (const [userId, conv] of Object.entries(conversations)) { + console.log(`${userId} (${conv.conversationId}): ${conv.messages.length} messages`); + } +} + +async function processUserConversation( + toolpack: any, + conversation: UserConversation +) { + console.log(`\nProcessing ${conversation.userId}...`); + + // Create a new client instance for this conversation with specific ID + const client = toolpack.getClient(conversation.conversationId); + + // Add some user messages + conversation.messages.push({ + role: 'user', + content: `Hello, I have a question about your service for user ${conversation.userId}` + }); + + try { + // Generate response for this conversation + const response = await client.generate({ + messages: conversation.messages as any, + model: 'gpt-4', + temperature: 0.7 + }); + + // Add assistant response to history + conversation.messages.push({ + role: 'assistant', + content: response.content || '' + }); + + console.log(` ✓ Response generated (${conversation.messages.length} total messages)`); + + } catch (error: any) { + console.error(` ✗ Error for ${conversation.userId}:`, error.message); + + // Handle specific errors per conversation + if (error.code === 'CONTEXT_WINDOW_EXCEEDED') { + console.log(` → Context window exceeded, archiving old messages`); + // Archive mechanism would go here + } + } +} + +/** + * Advanced: Context Window Manager for Multiple Conversations + */ +class MultiConversationContextManager { + private conversations: Map = new Map(); + private toolpack: any; + private maxConversations: number = 10; + private tokenBudget: number = 500000; // Total tokens for all conversations + + constructor(toolpack: any) { + this.toolpack = toolpack; + } + + /** + * Start a new conversation + */ + startConversation(userId: string, systemPrompt: string): string { + const conversationId = `conv-${Date.now()}-${userId}`; + + this.conversations.set(conversationId, { + userId, + conversationId, + messages: [{ role: 'system', content: systemPrompt }] + }); + + if (this.conversations.size > this.maxConversations) { + this.pruneInactiveConversations(); + } + + return conversationId; + } + + /** + * Add message to conversation + */ + addMessage(conversationId: string, role: string, content: string) { + const conv = this.conversations.get(conversationId); + if (conv) { + conv.messages.push({ role, content }); + } + } + + /** + * Generate response for conversation + */ + async generateResponse(conversationId: string): Promise { + const conv = this.conversations.get(conversationId); + if (!conv) throw new Error(`Conversation ${conversationId} not found`); + + const client = this.toolpack.getClient(conversationId); + + const response = await client.generate({ + messages: conv.messages as any, + model: 'gpt-4', + temperature: 0.7 + }); + + // Add assistant message + this.addMessage(conversationId, 'assistant', response.content || ''); + + return response.content || ''; + } + + /** + * Remove old/inactive conversations to free resources + */ + private pruneInactiveConversations() { + // Sort by message count (less recent assumed to be older) + const sorted = Array.from(this.conversations.values()) + .sort((a, b) => a.messages.length - b.messages.length); + + // Remove oldest conversation + if (sorted.length > 0) { + const toRemove = sorted[0]; + this.conversations.delete(toRemove.conversationId); + console.log(`Archived conversation ${toRemove.conversationId}`); + } + } + + /** + * Get manager statistics + */ + getStats() { + let totalMessages = 0; + let totalTokens = 0; + + for (const conv of this.conversations.values()) { + totalMessages += conv.messages.length; + // Rough token estimate + totalTokens += conv.messages.reduce((sum, msg) => { + return sum + Math.ceil((msg.role.length + msg.content.length) / 4); + }, 0); + } + + return { + activeConversations: this.conversations.size, + totalMessages, + estimatedTokens: totalTokens, + budgetUsage: Math.round((totalTokens / this.tokenBudget) * 100) + }; + } +} + +// Advanced example usage +async function advancedMultiConversationExample() { + const toolpack = await Toolpack.init({ + provider: 'openai', + providers: { + openai: { + apiKey: process.env.OPENAI_API_KEY + } + }, + contextWindow: { + enabled: true, + strategy: 'prune' + } + }); + + const manager = new MultiConversationContextManager(toolpack); + + // Start multiple conversations + const conv1 = manager.startConversation('user-a', 'You are a helpful assistant.'); + const conv2 = manager.startConversation('user-b', 'You are a technical expert.'); + + // Add messages + manager.addMessage(conv1, 'user', 'What is AI?'); + manager.addMessage(conv2, 'user', 'How do neural networks work?'); + + // Generate responses + try { + const resp1 = await manager.generateResponse(conv1); + console.log('Response 1:', resp1.substring(0, 100) + '...'); + + const resp2 = await manager.generateResponse(conv2); + console.log('Response 2:', resp2.substring(0, 100) + '...'); + } catch (error) { + console.error('Error generating responses:', error); + } + + // Print stats + console.log('\n=== Manager Stats ==='); + console.log(manager.getStats()); +} + +// Run examples +(async () => { + console.log('=== Multi-Conversation Example ===\n'); + try { + await multiConversationExample(); + } catch (error) { + console.error('Error:', error); + } + + console.log('\n\n=== Advanced Multi-Conversation Example ===\n'); + try { + await advancedMultiConversationExample(); + } catch (error) { + console.error('Error:', error); + } +})(); diff --git a/packages/toolpack-sdk/examples/context-window-summarization.ts b/packages/toolpack-sdk/examples/context-window-summarization.ts new file mode 100644 index 0000000..87bbc90 --- /dev/null +++ b/packages/toolpack-sdk/examples/context-window-summarization.ts @@ -0,0 +1,112 @@ +/** + * Context Window Example - Summarization Strategy + * + * This example demonstrates using the summarization strategy to condense + * conversation history into a summary before reaching context limits. + */ + +import { Toolpack } from '../src/toolpack'; + +async function summarizationExample() { + // Initialize Toolpack with summarization strategy + const toolpack = await Toolpack.init({ + provider: 'openai', + providers: { + openai: { + apiKey: process.env.OPENAI_API_KEY + } + }, + contextWindow: { + enabled: true, + strategy: 'summarize', // Use summarization when approaching limits + pruneThreshold: 80, // Start summarizing at 80% of context window + maxMessageHistoryLength: 100, // Allow more messages before other pruning + retainSystemMessages: true, + // Optional: specify a different model for summarization + summarizerModel: 'gpt-4', // Use more capable model for summaries + outputTokenBuffer: 1.15 + } + }); + + const client = toolpack.getClient(); + + // Simulate a long conversation that would benefit from summarization + const messages = [ + { + role: 'system' as const, + content: `You are a project management assistant helping a team plan their sprint. +You have expertise in Agile methodologies and project estimation.` + }, + { + role: 'user' as const, + content: 'We need to plan our Q2 roadmap. What should we consider?' + }, + { + role: 'assistant' as const, + content: `For Q2 planning, consider: +1. Stakeholder priorities +2. Team capacity and velocity +3. Dependencies and risks +4. Resource availability +5. Technical debt vs. features +6. User feedback and metrics + +Should I help you prioritize specific items?` + }, + { + role: 'user' as const, + content: 'We have 5 feature requests, 3 bug fixes, and significant technical debt.' + }, + { + role: 'assistant' as const, + content: `That's a common challenge. I'd recommend: + +Feature Requests (if high-value): +- Estimate effort for each +- Map to business OKRs +- Consider user impact + +Bug Fixes: +- Prioritize by severity +- Group related fixes + +Technical Debt: +- Define concrete benefits +- Allocate 20-30% of sprint capacity +- Show ROI + +What's your current sprint velocity?` + }, + // Many more messages would follow in a real scenario + // When the context window approaches the limit, the system will + // automatically summarize the earlier conversation about roadmap planning, + // keeping only recent messages and the summary + ]; + + try { + const response = await client.generate({ + messages, + model: 'gpt-4', + temperature: 0.7 + }); + + console.log('Response after potential summarization:', response.content); + + // The system automatically handled context window management: + // - Monitored token usage + // - When approaching the threshold, summarized older messages + // - Preserved system message and recent context + // - Continued the conversation seamlessly + + } catch (error: any) { + if (error.code === 'SUMMARIZATION_ERROR') { + console.error('Summarization failed:', error.message); + console.error('Recovery suggestion:', error.getSuggestedRecovery()); + } else { + throw error; + } + } +} + +// Run the example +summarizationExample().catch(console.error); diff --git a/packages/toolpack-sdk/src/client/index.ts b/packages/toolpack-sdk/src/client/index.ts index 73afc0a..9d1c69b 100644 --- a/packages/toolpack-sdk/src/client/index.ts +++ b/packages/toolpack-sdk/src/client/index.ts @@ -1,7 +1,12 @@ import { EventEmitter } from 'events'; import { ProviderAdapter } from "../providers/base/index.js"; -import { CompletionRequest, CompletionResponse, CompletionChunk, ToolCallRequest, ToolCallResult, EmbeddingRequest, EmbeddingResponse, ToolProgressEvent, ToolLogEvent, OnToolConfirmCallback, ToolConfirmationRequestedEvent, ToolConfirmationResolvedEvent } from "../types/index.js"; +import { CompletionRequest, CompletionResponse, CompletionChunk, ToolCallRequest, ToolCallResult, EmbeddingRequest, EmbeddingResponse, ToolProgressEvent, ToolLogEvent, OnToolConfirmCallback, ToolConfirmationRequestedEvent, ToolConfirmationResolvedEvent, ContextWindowConfig, ProviderModelInfo } from "../types/index.js"; import { SDKError, ProviderError } from "../errors/index.js"; +import { ContextWindowExceededError, SummarizationError } from '../errors/context-window-errors.js'; +import { countTokens, getSafeOutputReserve } from '../utils/token-counter.js'; +import { pruneMessages } from '../utils/message-pruner.js'; +import { prepareSummarizationRequest, createSummarySystemMessage, parseSummarizationResponse, validateSummarizationResult } from '../utils/message-summarizer.js'; +import { ContextWindowStateManager, createContextWindowStateManager } from '../utils/context-window-state.js'; import { ToolRegistry } from '../tools/registry.js'; import { ToolRouter } from '../tools/router.js'; import type { ToolsConfig, ToolSchema, ToolContext, ToolDefinition } from "../tools/types.js"; @@ -196,6 +201,8 @@ export interface AIClientConfig { onToolConfirm?: OnToolConfirmCallback; /** Optional conversation ID for tracking context */ conversationId?: string; + /** Context window management configuration */ + contextWindowConfig?: ContextWindowConfig; } export class AIClient extends EventEmitter { @@ -215,6 +222,9 @@ export class AIClient extends EventEmitter { private onToolConfirm?: OnToolConfirmCallback; private currentRound: number = 0; private conversationId?: string; + private contextWindowConfig?: ContextWindowConfig; + private contextWindowStateManager?: ContextWindowStateManager; + private providerModelCache: Map = new Map(); constructor(config: AIClientConfig) { super(); @@ -233,6 +243,13 @@ export class AIClient extends EventEmitter { this.hitlConfig = config.hitlConfig; this.onToolConfirm = config.onToolConfirm; this.conversationId = config.conversationId; + this.contextWindowConfig = config.contextWindowConfig; + this.providerModelCache = new Map(); + + // Initialize context window state manager if config provided + if (this.contextWindowConfig && this.contextWindowConfig.enabled !== false) { + this.contextWindowStateManager = createContextWindowStateManager(this.contextWindowConfig); + } // Index tools for BM25 search if registry is provided if (this.toolRegistry) { @@ -240,6 +257,196 @@ export class AIClient extends EventEmitter { } } + private getConversationId(): string { + return this.conversationId || 'global'; + } + + private async getModelInfo(provider: ProviderAdapter, model: string): Promise { + const providerKey = (provider as any).name || provider.constructor.name; + let models = this.providerModelCache.get(providerKey); + if (!models) { + try { + models = await provider.getModels(); + } catch { + models = []; + } + this.providerModelCache.set(providerKey, models); + } + + return models.find(m => m.id === model || m.displayName === model); + } + + private async countRequestTokens(request: CompletionRequest, provider: ProviderAdapter, model: string): Promise { + const providerCount = await provider.countTokens(request.messages, model); + if (typeof providerCount === 'number' && Number.isFinite(providerCount)) { + return providerCount; + } + + return countTokens(request.messages, model, provider.getDisplayName().toLowerCase()); + } + + private async pruneConversation(request: CompletionRequest, provider: ProviderAdapter, targetTokenCount: number): Promise { + const retainSystemMessages = this.contextWindowConfig?.retainSystemMessages ?? true; + let currentRequest = request; + for (let attempt = 0; attempt < 3; attempt += 1) { + const currentTokens = await this.countRequestTokens(currentRequest, provider, currentRequest.model); + if (currentTokens <= targetTokenCount) { + return currentRequest; + } + + const tokensToRecover = currentTokens - targetTokenCount; + const result = pruneMessages(currentRequest.messages, tokensToRecover, retainSystemMessages); + const removedSet = new Set(result.pruneInfo.removedMessages); + const filteredMessages = currentRequest.messages.filter(msg => !removedSet.has(msg)); + const filteredRequest = { ...currentRequest, messages: filteredMessages }; + + if (this.contextWindowStateManager) { + this.contextWindowStateManager.recordPruneOperation(this.getConversationId(), result.tokensReclaimed); + } + + const newTokenCount = await this.countRequestTokens(filteredRequest, provider, filteredRequest.model); + if (newTokenCount <= targetTokenCount || filteredMessages.length === currentRequest.messages.length) { + return filteredRequest; + } + currentRequest = filteredRequest; + } + + return request; + } + + private async pruneToMaxMessageHistory(request: CompletionRequest): Promise { + const maxLength = this.contextWindowConfig?.maxMessageHistoryLength; + if (!maxLength || request.messages.length <= maxLength) { + return request; + } + + const retainSystemMessages = this.contextWindowConfig?.retainSystemMessages ?? true; + const prunableIndexes: number[] = []; + request.messages.forEach((msg, idx) => { + if (retainSystemMessages && msg.role === 'system') return; + if (msg.role === 'tool') return; + prunableIndexes.push(idx); + }); + + const removeCount = Math.max(0, request.messages.length - maxLength); + if (removeCount === 0) { + return request; + } + + const removeIndexes = new Set(prunableIndexes.slice(0, removeCount)); + const filteredMessages = request.messages.filter((_, idx) => !removeIndexes.has(idx)); + return { ...request, messages: filteredMessages }; + } + + private async summarizeConversation(request: CompletionRequest, provider: ProviderAdapter): Promise { + const messages = request.messages; + const systemMessages = messages.filter(m => m.role === 'system'); + const nonSystemMessages = messages.filter(m => m.role !== 'system'); + + if (nonSystemMessages.length < 4) { + return request; + } + + const recentMessages = nonSystemMessages.slice(-4); + const messagesToSummarize = nonSystemMessages.slice(0, -4); + if (messagesToSummarize.length < 2) { + return request; + } + + const summarizerModel = this.contextWindowConfig?.summarizerModel || request.model; + const summaryRequestMessages = prepareSummarizationRequest(messagesToSummarize, { + model: summarizerModel, + maxSummaryTokens: 500, + }); + + const summaryResponse = await provider.generate({ + model: summarizerModel, + messages: summaryRequestMessages, + max_tokens: 500, + temperature: 0, + response_format: 'text', + }); + + if (!summaryResponse.content) { + throw new SummarizationError('Summarization provider returned no summary', this.getConversationId(), messagesToSummarize.length, 'invalid_response'); + } + + const originalTokenCount = await this.countRequestTokens({ ...request, messages: messagesToSummarize }, provider, summarizerModel); + const summarization = parseSummarizationResponse(summaryResponse.content, messagesToSummarize, originalTokenCount); + const validation = validateSummarizationResult(summarization); + + if (!validation.valid) { + throw new SummarizationError(`Summarization result is invalid: ${validation.issues.join('; ')}`, this.getConversationId(), messagesToSummarize.length, 'invalid_quality', summaryResponse.content); + } + + const summaryMessage = createSummarySystemMessage(summarization.summary, messagesToSummarize.length); + const summarizedMessages = [...systemMessages, summaryMessage, ...recentMessages]; + + if (this.contextWindowStateManager) { + this.contextWindowStateManager.recordSummarization(this.getConversationId(), summarization.tokensSaved); + } + + return { ...request, messages: summarizedMessages }; + } + + private async enforceContextWindow(request: CompletionRequest, provider: ProviderAdapter): Promise { + if (!this.contextWindowConfig || this.contextWindowConfig.enabled === false) { + return request; + } + + let managedRequest = await this.pruneToMaxMessageHistory(request); + const modelInfo = await this.getModelInfo(provider, managedRequest.model); + const contextWindow = modelInfo?.contextWindow ?? 100000; + const maxOutputTokens = managedRequest.max_tokens ?? modelInfo?.maxOutputTokens ?? 1024; + const outputBuffer = this.contextWindowConfig.outputTokenBuffer ?? 1.15; + const reserve = getSafeOutputReserve(maxOutputTokens, outputBuffer); + const safeInputLimit = Math.max(0, contextWindow - reserve); + const configuredThreshold = Math.floor(contextWindow * ((this.contextWindowConfig.pruneThreshold ?? 85) / 100)); + const triggerThreshold = Math.min(safeInputLimit, configuredThreshold); + + const currentTokens = await this.countRequestTokens(managedRequest, provider, managedRequest.model); + if (this.contextWindowStateManager) { + this.contextWindowStateManager.updateTokenCount(this.getConversationId(), currentTokens); + if (currentTokens > triggerThreshold) { + this.contextWindowStateManager.recordWarning(this.getConversationId()); + } + } + + if (currentTokens <= triggerThreshold) { + return managedRequest; + } + + const strategy = this.contextWindowConfig.strategy ?? 'prune'; + + if (strategy === 'fail' && currentTokens > safeInputLimit) { + throw new ContextWindowExceededError(`Context window exceeded by request messages`, this.getConversationId(), currentTokens, safeInputLimit, strategy); + } + + if (strategy === 'summarize') { + try { + const summarizedRequest = await this.summarizeConversation(managedRequest, provider); + const summarizedTokens = await this.countRequestTokens(summarizedRequest, provider, summarizedRequest.model); + if (summarizedTokens <= safeInputLimit) { + return summarizedRequest; + } + managedRequest = await this.pruneConversation(summarizedRequest, provider, safeInputLimit); + } catch (error) { + managedRequest = await this.pruneConversation(managedRequest, provider, safeInputLimit); + } + } else { + managedRequest = await this.pruneConversation(managedRequest, provider, safeInputLimit); + } + + const finalTokens = await this.countRequestTokens(managedRequest, provider, managedRequest.model); + if (finalTokens > safeInputLimit) { + if (strategy === 'fail') { + throw new ContextWindowExceededError(`Context window exceeded after attempted cleanup`, this.getConversationId(), finalTokens, safeInputLimit, strategy); + } + } + + return managedRequest; + } + /** * Check if a tool should bypass confirmation based on HITL config. * Returns true if the tool should execute without confirmation. @@ -407,7 +614,8 @@ export class AIClient extends EventEmitter { // Resolve tools to send with the request const resolvedProviderName = providerName || this.defaultProvider; - const enrichedRequest = await this.enrichRequestWithTools(modeAwareRequest); + let enrichedRequest = await this.enrichRequestWithTools(modeAwareRequest); + enrichedRequest = await this.enforceContextWindow(enrichedRequest, provider); const policy = (process.env.TOOLPACK_SDK_TOOL_CHOICE_POLICY || this.toolsConfig.toolChoicePolicy || 'auto') as any; const hasTools = (enrichedRequest.tools?.length || 0) > 0; @@ -618,7 +826,8 @@ export class AIClient extends EventEmitter { // Call the model again with updated messages const rawFollowupReq: any = { ...enrichedRequest, messages, __toolpack_request_id: requestId }; // Re-enrich to include any tools discovered in the previous round - const followupReq = await this.enrichRequestWithTools(rawFollowupReq); + let followupReq = await this.enrichRequestWithTools(rawFollowupReq); + followupReq = await this.enforceContextWindow(followupReq, provider); if ((followupReq as any).tool_choice === 'required') { (followupReq as any).tool_choice = lookupOnly ? 'none' : 'auto'; @@ -655,7 +864,8 @@ export class AIClient extends EventEmitter { modeAwareRequest = this.injectOverrideSystemPrompt(modeAwareRequest); modeAwareRequest = this.injectModeSystemPrompt(modeAwareRequest); - const enrichedRequest = await this.enrichRequestWithTools(modeAwareRequest); + let enrichedRequest = await this.enrichRequestWithTools(modeAwareRequest); + enrichedRequest = await this.enforceContextWindow(enrichedRequest, provider); const policy = (process.env.TOOLPACK_SDK_TOOL_CHOICE_POLICY || this.toolsConfig.toolChoicePolicy || 'auto') as any; const hasTools = (enrichedRequest.tools?.length || 0) > 0; @@ -732,7 +942,8 @@ export class AIClient extends EventEmitter { const rawRoundReq: any = { ...baseReq, messages }; // Re-enrich to include any newly discovered tools from previous rounds - const roundReq = await this.enrichRequestWithTools(rawRoundReq); + let roundReq = await this.enrichRequestWithTools(rawRoundReq); + roundReq = await this.enforceContextWindow(roundReq, provider); if (rounds > 0 && (roundReq as any).tool_choice === 'required') { (roundReq as any).tool_choice = lookupOnly ? 'none' : 'auto'; diff --git a/packages/toolpack-sdk/src/errors/context-window-errors.ts b/packages/toolpack-sdk/src/errors/context-window-errors.ts new file mode 100644 index 0000000..58669e3 --- /dev/null +++ b/packages/toolpack-sdk/src/errors/context-window-errors.ts @@ -0,0 +1,275 @@ +import { SDKError } from './index.js'; + +/** + * Thrown when a conversation exceeds the configured context window limit + * and cannot be recovered through pruning or summarization + */ +export class ContextWindowExceededError extends SDKError { + constructor( + message: string, + public conversationId: string, + public currentTokens: number, + public contextWindowLimit: number, + public strategy: 'prune' | 'summarize' | 'fail', + cause?: any + ) { + super(message, 'CONTEXT_WINDOW_EXCEEDED', 400, cause); + this.name = 'ContextWindowExceededError'; + } + + /** + * Get the number of tokens over the limit + */ + getOverageTokens(): number { + return Math.max(0, this.currentTokens - this.contextWindowLimit); + } + + /** + * Get the percentage of the context window being used + */ + getUsagePercentage(): number { + return Math.round((this.currentTokens / this.contextWindowLimit) * 100); + } + + /** + * Get a detailed error report + */ + getDetailedReport(): string { + return ` +Context Window Exceeded +======================= +Conversation ID: ${this.conversationId} +Current Tokens: ${this.currentTokens} +Context Window Limit: ${this.contextWindowLimit} +Overage: ${this.getOverageTokens()} tokens +Usage: ${this.getUsagePercentage()}% +Strategy: ${this.strategy} + +Message: ${this.message} + `.trim(); + } +} + +/** + * Thrown when there is insufficient context remaining to process a request + * after pruning or summarization, even though tokens are within limits + */ +export class InsufficientContextError extends SDKError { + constructor( + message: string, + public conversationId: string, + public requiredTokens: number, + public availableTokens: number, + public minimumRequiredTokens: number, + cause?: any + ) { + super(message, 'INSUFFICIENT_CONTEXT', 400, cause); + this.name = 'InsufficientContextError'; + } + + /** + * Get the token deficit + */ + getDeficit(): number { + return Math.max(0, this.requiredTokens - this.availableTokens); + } + + /** + * Whether the deficit could be recovered by adjusting the strategy + */ + isRecoverable(): boolean { + return this.availableTokens >= this.minimumRequiredTokens * 0.5; + } + + /** + * Get a detailed error report + */ + getDetailedReport(): string { + return ` +Insufficient Context +==================== +Conversation ID: ${this.conversationId} +Required Tokens: ${this.requiredTokens} +Available Tokens: ${this.availableTokens} +Deficit: ${this.getDeficit()} tokens +Minimum Required: ${this.minimumRequiredTokens} +Recoverable: ${this.isRecoverable() ? 'Yes' : 'No'} + +Message: ${this.message} + `.trim(); + } +} + +/** + * Thrown when summarization fails or produces inadequate results + */ +export class SummarizationError extends SDKError { + constructor( + message: string, + public conversationId: string, + public messageCount: number, + public failureReason: 'provider_error' | 'invalid_response' | 'insufficient_tokens' | 'invalid_quality' | 'unknown', + public summaryAttempt?: string, + cause?: any + ) { + super(message, 'SUMMARIZATION_ERROR', 500, cause); + this.name = 'SummarizationError'; + } + + /** + * Whether the error is retryable + */ + isRetryable(): boolean { + return this.failureReason === 'provider_error' || this.failureReason === 'insufficient_tokens'; + } + + /** + * Get suggested recovery action + */ + getSuggestedRecovery(): string { + switch (this.failureReason) { + case 'provider_error': + return 'Retry the summarization request or switch to a different summarizer model'; + case 'invalid_response': + return 'Review the summarizer prompt or use a different model'; + case 'insufficient_tokens': + return 'Reduce the number of messages to summarize or increase the summary token budget'; + case 'invalid_quality': + return 'Adjust summarization parameters or use a more capable model'; + default: + return 'Manual intervention required'; + } + } + + /** + * Get a detailed error report + */ + getDetailedReport(): string { + const report = ` +Summarization Error +=================== +Conversation ID: ${this.conversationId} +Messages Attempted: ${this.messageCount} +Failure Reason: ${this.failureReason} +Retryable: ${this.isRetryable() ? 'Yes' : 'No'} +Recovery Action: ${this.getSuggestedRecovery()} + +Message: ${this.message}`; + + if (this.summaryAttempt) { + return report + ` + +Partial Summary: +${this.summaryAttempt.substring(0, 500)}${this.summaryAttempt.length > 500 ? '...' : ''}`; + } + + return report.trim(); + } +} + +/** + * Thrown when context window configuration is invalid + */ +export class ContextWindowConfigError extends SDKError { + constructor( + message: string, + public configField: string, + public providedValue: any, + public constraint: string, + cause?: any + ) { + super(message, 'CONTEXT_WINDOW_CONFIG_ERROR', 400, cause); + this.name = 'ContextWindowConfigError'; + } + + /** + * Get a detailed error report + */ + getDetailedReport(): string { + return ` +Context Window Configuration Error +=================================== +Field: ${this.configField} +Provided Value: ${JSON.stringify(this.providedValue)} +Constraint: ${this.constraint} + +Message: ${this.message} + `.trim(); + } +} + +/** + * Thrown when a state operation is performed on a non-existent conversation + */ +export class ConversationNotFoundError extends SDKError { + constructor(message: string, public conversationId: string, cause?: any) { + super(message, 'CONVERSATION_NOT_FOUND', 404, cause); + this.name = 'ConversationNotFoundError'; + } +} + +/** + * Utility function to check if an error is context window related + */ +export function isContextWindowError(error: any): error is SDKError & { code: string } { + if (!error || typeof error !== 'object') { + return false; + } + + const contextWindowErrorCodes = [ + 'CONTEXT_WINDOW_EXCEEDED', + 'INSUFFICIENT_CONTEXT', + 'SUMMARIZATION_ERROR', + 'CONTEXT_WINDOW_CONFIG_ERROR', + 'CONVERSATION_NOT_FOUND', + ]; + + return contextWindowErrorCodes.includes(error.code); +} + +/** + * Utility function to handle context window errors + */ +export function handleContextWindowError( + error: SDKError, + _conversationId?: string +): { + shouldRetry: boolean; + shouldFallback: boolean; + action: 'prune' | 'summarize' | 'fail' | 'none'; + message: string; +} { + if (error instanceof SummarizationError) { + return { + shouldRetry: error.isRetryable(), + shouldFallback: true, + action: error.isRetryable() ? 'prune' : 'fail', + message: error.getSuggestedRecovery(), + }; + } + + if (error instanceof ContextWindowExceededError) { + return { + shouldRetry: false, + shouldFallback: true, + action: error.strategy === 'fail' ? 'prune' : 'none', + message: `Context window exceeded. Strategy: ${error.strategy}`, + }; + } + + if (error instanceof InsufficientContextError) { + return { + shouldRetry: false, + shouldFallback: true, + action: 'fail', + message: 'Insufficient context after recovery attempts', + }; + } + + return { + shouldRetry: false, + shouldFallback: false, + action: 'none', + message: 'Unknown context window error', + }; +} diff --git a/packages/toolpack-sdk/src/errors/index.ts b/packages/toolpack-sdk/src/errors/index.ts index 23c7f22..fa4f92a 100644 --- a/packages/toolpack-sdk/src/errors/index.ts +++ b/packages/toolpack-sdk/src/errors/index.ts @@ -53,3 +53,277 @@ export class TimeoutError extends SDKError { this.name = 'TimeoutError'; } } + +/** + * Thrown when a conversation exceeds the configured context window limit + * and cannot be recovered through pruning or summarization + */ +export class ContextWindowExceededError extends SDKError { + constructor( + message: string, + public conversationId: string, + public currentTokens: number, + public contextWindowLimit: number, + public strategy: 'prune' | 'summarize' | 'fail', + cause?: any + ) { + super(message, 'CONTEXT_WINDOW_EXCEEDED', 400, cause); + this.name = 'ContextWindowExceededError'; + } + + /** + * Get the number of tokens over the limit + */ + getOverageTokens(): number { + return Math.max(0, this.currentTokens - this.contextWindowLimit); + } + + /** + * Get the percentage of the context window being used + */ + getUsagePercentage(): number { + return Math.round((this.currentTokens / this.contextWindowLimit) * 100); + } + + /** + * Get a detailed error report + */ + getDetailedReport(): string { + return ` +Context Window Exceeded +======================= +Conversation ID: ${this.conversationId} +Current Tokens: ${this.currentTokens} +Context Window Limit: ${this.contextWindowLimit} +Overage: ${this.getOverageTokens()} tokens +Usage: ${this.getUsagePercentage()}% +Strategy: ${this.strategy} + +Message: ${this.message} + `.trim(); + } +} + +/** + * Thrown when there is insufficient context remaining to process a request + * after pruning or summarization, even though tokens are within limits + */ +export class InsufficientContextError extends SDKError { + constructor( + message: string, + public conversationId: string, + public requiredTokens: number, + public availableTokens: number, + public minimumRequiredTokens: number, + cause?: any + ) { + super(message, 'INSUFFICIENT_CONTEXT', 400, cause); + this.name = 'InsufficientContextError'; + } + + /** + * Get the token deficit + */ + getDeficit(): number { + return Math.max(0, this.requiredTokens - this.availableTokens); + } + + /** + * Whether the deficit could be recovered by adjusting the strategy + */ + isRecoverable(): boolean { + return this.availableTokens >= this.minimumRequiredTokens * 0.5; + } + + /** + * Get a detailed error report + */ + getDetailedReport(): string { + return ` +Insufficient Context +==================== +Conversation ID: ${this.conversationId} +Required Tokens: ${this.requiredTokens} +Available Tokens: ${this.availableTokens} +Deficit: ${this.getDeficit()} tokens +Minimum Required: ${this.minimumRequiredTokens} +Recoverable: ${this.isRecoverable() ? 'Yes' : 'No'} + +Message: ${this.message} + `.trim(); + } +} + +/** + * Thrown when summarization fails or produces inadequate results + */ +export class SummarizationError extends SDKError { + constructor( + message: string, + public conversationId: string, + public messageCount: number, + public failureReason: 'provider_error' | 'invalid_response' | 'insufficient_tokens' | 'invalid_quality' | 'unknown', + public summaryAttempt?: string, + cause?: any + ) { + super(message, 'SUMMARIZATION_ERROR', 500, cause); + this.name = 'SummarizationError'; + } + + /** + * Whether the error is retryable + */ + isRetryable(): boolean { + return this.failureReason === 'provider_error' || this.failureReason === 'insufficient_tokens'; + } + + /** + * Get suggested recovery action + */ + getSuggestedRecovery(): string { + switch (this.failureReason) { + case 'provider_error': + return 'Retry the summarization request or switch to a different summarizer model'; + case 'invalid_response': + return 'Review the summarizer prompt or use a different model'; + case 'insufficient_tokens': + return 'Reduce the number of messages to summarize or increase the summary token budget'; + case 'invalid_quality': + return 'Adjust summarization parameters or use a more capable model'; + default: + return 'Manual intervention required'; + } + } + + /** + * Get a detailed error report + */ + getDetailedReport(): string { + const report = ` +Summarization Error +=================== +Conversation ID: ${this.conversationId} +Messages Attempted: ${this.messageCount} +Failure Reason: ${this.failureReason} +Retryable: ${this.isRetryable() ? 'Yes' : 'No'} +Recovery Action: ${this.getSuggestedRecovery()} + +Message: ${this.message}`; + + if (this.summaryAttempt) { + return report + ` + +Partial Summary: +${this.summaryAttempt.substring(0, 500)}${this.summaryAttempt.length > 500 ? '...' : ''}`; + } + + return report.trim(); + } +} + +/** + * Thrown when context window configuration is invalid + */ +export class ContextWindowConfigError extends SDKError { + constructor( + message: string, + public configField: string, + public providedValue: any, + public constraint: string, + cause?: any + ) { + super(message, 'CONTEXT_WINDOW_CONFIG_ERROR', 400, cause); + this.name = 'ContextWindowConfigError'; + } + + /** + * Get a detailed error report + */ + getDetailedReport(): string { + return ` +Context Window Configuration Error +=================================== +Field: ${this.configField} +Provided Value: ${JSON.stringify(this.providedValue)} +Constraint: ${this.constraint} + +Message: ${this.message} + `.trim(); + } +} + +/** + * Thrown when a state operation is performed on a non-existent conversation + */ +export class ConversationNotFoundError extends SDKError { + constructor(message: string, public conversationId: string, cause?: any) { + super(message, 'CONVERSATION_NOT_FOUND', 404, cause); + this.name = 'ConversationNotFoundError'; + } +} + +/** + * Utility function to check if an error is context window related + */ +export function isContextWindowError(error: any): error is SDKError & { code: string } { + if (!error || typeof error !== 'object') { + return false; + } + + const contextWindowErrorCodes = [ + 'CONTEXT_WINDOW_EXCEEDED', + 'INSUFFICIENT_CONTEXT', + 'SUMMARIZATION_ERROR', + 'CONTEXT_WINDOW_CONFIG_ERROR', + 'CONVERSATION_NOT_FOUND', + ]; + + return contextWindowErrorCodes.includes(error.code); +} + +/** + * Utility function to handle context window errors + */ +export function handleContextWindowError( + error: SDKError, + _conversationId?: string +): { + shouldRetry: boolean; + shouldFallback: boolean; + action: 'prune' | 'summarize' | 'fail' | 'none'; + message: string; +} { + if (error instanceof SummarizationError) { + return { + shouldRetry: error.isRetryable(), + shouldFallback: true, + action: error.isRetryable() ? 'prune' : 'fail', + message: error.getSuggestedRecovery(), + }; + } + + if (error instanceof ContextWindowExceededError) { + return { + shouldRetry: false, + shouldFallback: true, + action: error.strategy === 'fail' ? 'prune' : 'none', + message: `Context window exceeded. Strategy: ${error.strategy}`, + }; + } + + if (error instanceof InsufficientContextError) { + return { + shouldRetry: false, + shouldFallback: true, + action: 'fail', + message: 'Insufficient context after recovery attempts', + }; + } + + return { + shouldRetry: false, + shouldFallback: false, + action: 'none', + message: 'Unknown context window error', + }; +} diff --git a/packages/toolpack-sdk/src/index.ts b/packages/toolpack-sdk/src/index.ts index 83246fd..0a0eb9d 100644 --- a/packages/toolpack-sdk/src/index.ts +++ b/packages/toolpack-sdk/src/index.ts @@ -8,4 +8,8 @@ export * from './workflows/index.js'; export * from './toolpack.js'; export * from './utils/home-config.js'; export * from './utils/runtime-config-loader.js'; +export * from './utils/token-counter.js'; +export * from './utils/message-pruner.js'; +export * from './utils/message-summarizer.js'; +export * from './utils/context-window-state.js'; export * from './mcp/index.js'; \ No newline at end of file diff --git a/packages/toolpack-sdk/src/providers/base/index.ts b/packages/toolpack-sdk/src/providers/base/index.ts index 1f295f3..d3bd9c7 100644 --- a/packages/toolpack-sdk/src/providers/base/index.ts +++ b/packages/toolpack-sdk/src/providers/base/index.ts @@ -1,4 +1,4 @@ -import { CompletionRequest, CompletionResponse, CompletionChunk, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse } from "../../types/index.js"; +import { CompletionRequest, CompletionResponse, CompletionChunk, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse, Message } from "../../types/index.js"; export { CompletionRequest, CompletionResponse, CompletionChunk, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse } from "../../types/index.js"; import { InvalidRequestError } from "../../errors/index.js"; @@ -72,4 +72,14 @@ export abstract class ProviderAdapter { async deleteFile(_fileId: string): Promise { throw new InvalidRequestError(`File deletion API is not supported by ${this.getDisplayName()}`); } + + /** + * Estimates the number of tokens for the given messages and model. + * @param _messages The messages to count. + * @param _model The model to count for. + * @returns The number of tokens or null if not supported. + */ + async countTokens(_messages: Message[], _model: string): Promise { + return null; + } } diff --git a/packages/toolpack-sdk/src/providers/config.ts b/packages/toolpack-sdk/src/providers/config.ts index 306e038..5382235 100644 --- a/packages/toolpack-sdk/src/providers/config.ts +++ b/packages/toolpack-sdk/src/providers/config.ts @@ -1,6 +1,7 @@ import * as fs from 'fs'; import * as path from 'path'; import { ModeConfig } from '../modes/mode-types.js'; +import { ContextWindowConfig } from '../types/index.js'; import { SDKError } from '../errors/index.js'; const CONFIG_FILENAME = 'toolpack.config.json'; @@ -80,6 +81,9 @@ export interface ToolpackConfig { /** Human-in-the-loop configuration for tool confirmation */ hitl?: HitlConfig; + + /** Context window management configuration for automatic conversation pruning/summarization */ + contextWindow?: ContextWindowConfig; } // ============================================================================ diff --git a/packages/toolpack-sdk/src/toolpack.ts b/packages/toolpack-sdk/src/toolpack.ts index 5ba7b06..d625919 100644 --- a/packages/toolpack-sdk/src/toolpack.ts +++ b/packages/toolpack-sdk/src/toolpack.ts @@ -8,7 +8,7 @@ import { EmbeddingRequest, EmbeddingResponse, } from './providers/base/index.js'; -import { ProviderInfo, ProviderModelInfo } from "./types/index.js"; +import { ProviderInfo, ProviderModelInfo, ContextWindowConfig } from "./types/index.js"; import { OpenAIAdapter } from './providers/openai/index.js'; import { AnthropicAdapter } from './providers/anthropic/index.js'; import { GeminiAdapter } from './providers/gemini/index.js'; @@ -57,6 +57,9 @@ export interface ToolpackInitConfig { /** Load built-in tools (fs, http, etc.)? Default: false */ tools?: boolean; + /** Context window management configuration for automatic conversation pruning/summarization */ + contextWindow?: ContextWindowConfig; + /** Custom tool projects to load in addition to built-ins */ customTools?: ToolProject[]; @@ -380,6 +383,7 @@ export class Toolpack extends EventEmitter { hitlConfig: Object.keys(hitlConfig).length > 0 ? hitlConfig : undefined, onToolConfirm: config.onToolConfirm, conversationId: config.conversationId, + contextWindowConfig: config.contextWindow, }); const instance = new Toolpack(client, defaultProviderName, modeRegistry); diff --git a/packages/toolpack-sdk/src/types/index.ts b/packages/toolpack-sdk/src/types/index.ts index 6cefc44..fee33ed 100644 --- a/packages/toolpack-sdk/src/types/index.ts +++ b/packages/toolpack-sdk/src/types/index.ts @@ -281,3 +281,67 @@ export interface ProviderInfo { /** Available models from this provider */ models: ProviderModelInfo[]; } + +// ── Context Window Management Types ──────────────────────────── + +/** + * Strategy for handling context window limit scenarios + */ +export type ContextWindowStrategy = 'prune' | 'summarize' | 'fail'; + +/** + * Configuration for automatic context window management + */ +export interface ContextWindowConfig { + /** Master switch for context window management. Default: true */ + enabled?: boolean; + + /** Strategy when context limit is approached or exceeded. Default: 'prune' */ + strategy?: ContextWindowStrategy; + + /** + * Percentage of context window to trigger pruning/summarization. + * When current tokens exceed this percentage, cleanup is initiated. + * Default: 85 + */ + pruneThreshold?: number; + + /** + * Optional maximum message history length as fallback limit. + * Useful for caps independent of token counting. + * When set, removes messages when count exceeds this. + */ + maxMessageHistoryLength?: number; + + /** + * Model to use for conversation summarization (if strategy is 'summarize'). + * If omitted, uses the same model as the current request. + * Example: 'gpt-4.1-mini' for faster/cheaper summaries + */ + summarizerModel?: string; + + /** + * Whether to always retain system messages (never prune them). + * Default: true + */ + retainSystemMessages?: boolean; + + /** + * Percentage buffer above actual maxOutputTokens to reserve for safety. + * Default: 1.15 (15% buffer) + */ + outputTokenBuffer?: number; +} + +/** + * Tracks context window state per conversation for monitoring + */ +export interface ContextWindowState { + conversationId?: string; + estimatedTokens: number; + lastUpdated: number; + pruneCount: number; + lastPrunedAt?: number; + warningsSent: number; + summarizationCount: number; +} diff --git a/packages/toolpack-sdk/src/types/js-tiktoken.d.ts b/packages/toolpack-sdk/src/types/js-tiktoken.d.ts new file mode 100644 index 0000000..821e570 --- /dev/null +++ b/packages/toolpack-sdk/src/types/js-tiktoken.d.ts @@ -0,0 +1 @@ +declare module 'js-tiktoken'; diff --git a/packages/toolpack-sdk/src/utils/context-window-state.ts b/packages/toolpack-sdk/src/utils/context-window-state.ts new file mode 100644 index 0000000..cfd1077 --- /dev/null +++ b/packages/toolpack-sdk/src/utils/context-window-state.ts @@ -0,0 +1,337 @@ +import type { ContextWindowState, ContextWindowConfig } from '../types/index.js'; + +/** + * Manages per-conversation context window state + * Tracks token usage, pruning operations, and summarization events + */ +export class ContextWindowStateManager { + private states: Map = new Map(); + private config: ContextWindowConfig; + private maxTokens: number = 100000; + + constructor(config: ContextWindowConfig) { + this.config = config; + } + + /** + * Gets or creates state for a conversation + */ + getOrCreateState(conversationId: string): ContextWindowState { + if (!this.states.has(conversationId)) { + this.states.set(conversationId, { + conversationId, + estimatedTokens: 0, + lastUpdated: Date.now(), + pruneCount: 0, + lastPrunedAt: undefined, + warningsSent: 0, + summarizationCount: 0, + }); + } + + return this.states.get(conversationId)!; + } + + /** + * Updates token count for a conversation + */ + updateTokenCount(conversationId: string, tokens: number): ContextWindowState { + const state = this.getOrCreateState(conversationId); + state.estimatedTokens = tokens; + state.lastUpdated = Date.now(); + return state; + } + + /** + * Increments pruning operation counter + */ + recordPruneOperation(conversationId: string, tokensRecovered: number): ContextWindowState { + const state = this.getOrCreateState(conversationId); + state.pruneCount++; + state.lastPrunedAt = Date.now(); + state.estimatedTokens = Math.max(0, state.estimatedTokens - tokensRecovered); + state.lastUpdated = Date.now(); + return state; + } + + /** + * Increments warning count + */ + recordWarning(conversationId: string): ContextWindowState { + const state = this.getOrCreateState(conversationId); + state.warningsSent++; + state.lastUpdated = Date.now(); + return state; + } + + /** + * Increments summarization count + */ + recordSummarization(conversationId: string, tokensSaved: number): ContextWindowState { + const state = this.getOrCreateState(conversationId); + state.summarizationCount++; + state.estimatedTokens = Math.max(0, state.estimatedTokens - tokensSaved); + state.lastUpdated = Date.now(); + return state; + } + + /** + * Gets the current state for a conversation + */ + getState(conversationId: string): ContextWindowState | undefined { + return this.states.get(conversationId); + } + + /** + * Gets all tracked conversation states + */ + getAllStates(): ContextWindowState[] { + return Array.from(this.states.values()); + } + + /** + * Deletes state for a conversation + */ + deleteState(conversationId: string): boolean { + return this.states.delete(conversationId); + } + + /** + * Clears all states + */ + clearAllStates(): void { + this.states.clear(); + } + + /** + * Gets statistics for a conversation + */ + getStatistics(conversationId: string): { + conversationId: string; + currentTokens: number; + pruneCount: number; + summarizationCount: number; + warningsSent: number; + lastActivity: Date | undefined; + contextWindowPercentage: number; + } | null { + const state = this.states.get(conversationId); + if (!state) { + return null; + } + + const contextWindowLimit = this.config.pruneThreshold || 100000; + const contextWindowPercentage = Math.round((state.estimatedTokens / contextWindowLimit) * 100); + + return { + conversationId, + currentTokens: state.estimatedTokens, + pruneCount: state.pruneCount, + summarizationCount: state.summarizationCount, + warningsSent: state.warningsSent, + lastActivity: new Date(state.lastUpdated), + contextWindowPercentage, + }; + } + + /** + * Gets conversations exceeding a threshold + */ + getExceedingThreshold(threshold?: number): ContextWindowState[] { + const limit = threshold || (this.maxTokens * (this.config.pruneThreshold || 85) / 100); + return Array.from(this.states.values()).filter((state) => state.estimatedTokens > limit); + } + + /** + * Gets conversations at risk (approaching threshold) + */ + getAtRiskConversations(riskPercentage: number = 80): ContextWindowState[] { + const limit = this.maxTokens; + const riskThreshold = (limit * riskPercentage) / 100; + return Array.from(this.states.values()).filter((state) => state.estimatedTokens > riskThreshold && state.estimatedTokens <= limit); + } + + /** + * Generates a report of all conversation states + */ + generateReport(): string { + const states = this.getAllStates(); + if (states.length === 0) { + return 'No conversations tracked yet.'; + } + + const limit = this.config.pruneThreshold || 100000; + const lines: string[] = [ + 'Context Window State Report', + '==========================', + `Report Generated: ${new Date().toISOString()}`, + `Context Window Limit: ${limit} tokens`, + `Total Conversations: ${states.length}`, + '', + ]; + + // Summary statistics + const totalTokens = states.reduce((sum, s) => sum + s.estimatedTokens, 0); + const totalPrunes = states.reduce((sum, s) => sum + s.pruneCount, 0); + const totalSummarizations = states.reduce((sum, s) => sum + s.summarizationCount, 0); + const avgTokens = Math.round(totalTokens / states.length); + + lines.push('Summary:'); + lines.push(`- Total tokens across all conversations: ${totalTokens}`); + lines.push(`- Average tokens per conversation: ${avgTokens}`); + lines.push(`- Total prune operations: ${totalPrunes}`); + lines.push(`- Total summarizations: ${totalSummarizations}`); + lines.push(''); + + // At-risk conversations + const atRisk = this.getAtRiskConversations(); + if (atRisk.length > 0) { + lines.push(`At-Risk Conversations (80%+ threshold): ${atRisk.length}`); + atRisk.forEach((state) => { + const pct = Math.round((state.estimatedTokens / limit) * 100); + lines.push(`- ${state.conversationId}: ${state.estimatedTokens}/${limit} tokens (${pct}%)`); + }); + lines.push(''); + } + + // Exceeded conversations + const exceeded = this.getExceedingThreshold(); + if (exceeded.length > 0) { + lines.push(`Exceeded Conversations: ${exceeded.length}`); + exceeded.forEach((state) => { + const over = state.estimatedTokens - limit; + lines.push(`- ${state.conversationId}: ${state.estimatedTokens}/${limit} tokens (+${over} over)`); + }); + lines.push(''); + } + + // Most active conversations + lines.push('Most Active Conversations (by operations):'); + const sorted = [...states].sort((a, b) => { + const aOps = a.pruneCount + a.summarizationCount; + const bOps = b.pruneCount + b.summarizationCount; + return bOps - aOps; + }); + + sorted.slice(0, 5).forEach((state) => { + const ops = state.pruneCount + state.summarizationCount; + lines.push(`- ${state.conversationId}: ${state.pruneCount} prunes, ${state.summarizationCount} summarizations (${ops} operations)`); + }); + + return lines.join('\n'); + } + + /** + * Exports state as JSON for persistence + */ + export(): Record { + const result: Record = {}; + for (const [key, value] of this.states.entries()) { + result[key] = { + ...value, + lastUpdated: value.lastUpdated, + lastPrunedAt: value.lastPrunedAt, + }; + } + return result; + } + + /** + * Imports state from JSON + */ + import(data: Record): void { + for (const [key, value] of Object.entries(data)) { + this.states.set(key, { + ...value, + lastUpdated: typeof value.lastUpdated === 'number' ? value.lastUpdated : new Date(value.lastUpdated).getTime(), + lastPrunedAt: value.lastPrunedAt && typeof value.lastPrunedAt !== 'number' ? new Date(value.lastPrunedAt).getTime() : value.lastPrunedAt, + }); + } + } + + /** + * Prunes old conversations (no activity in specified time) + */ + pruneInactiveConversations(inactivityMinutes: number = 60): string[] { + const cutoffTime = Date.now() - inactivityMinutes * 60 * 1000; + const toDelete: string[] = []; + + for (const [conversationId, state] of this.states.entries()) { + if (state.lastUpdated < cutoffTime) { + toDelete.push(conversationId); + } + } + + toDelete.forEach((id) => this.states.delete(id)); + return toDelete; + } + + /** + * Gets memory usage of the state manager + */ + getMemoryUsage(): { + conversationCount: number; + approximateByteSize: number; + } { + const conversationCount = this.states.size; + // Rough estimation: ~500 bytes per conversation state + const approximateByteSize = conversationCount * 500; + + return { + conversationCount, + approximateByteSize, + }; + } + + /** + * Validates state integrity + */ + validateIntegrity(): { + isValid: boolean; + issues: string[]; + } { + const issues: string[] = []; + + for (const [conversationId, state] of this.states.entries()) { + if (!conversationId || typeof conversationId !== 'string') { + issues.push(`Invalid conversation ID: ${conversationId}`); + } + + if (state.estimatedTokens < 0) { + issues.push(`Negative token count for ${conversationId}: ${state.estimatedTokens}`); + } + + if (state.pruneCount < 0) { + issues.push(`Negative prune count for ${conversationId}: ${state.pruneCount}`); + } + + if (state.summarizationCount < 0) { + issues.push(`Negative summarization count for ${conversationId}: ${state.summarizationCount}`); + } + + if (state.warningsSent < 0) { + issues.push(`Negative warning count for ${conversationId}: ${state.warningsSent}`); + } + + if (typeof state.lastUpdated !== 'number' || state.lastUpdated <= 0) { + issues.push(`Invalid lastUpdated timestamp for ${conversationId}`); + } + + if (state.lastPrunedAt !== undefined && (typeof state.lastPrunedAt !== 'number' || state.lastPrunedAt <= 0)) { + issues.push(`Invalid lastPrunedAt timestamp for ${conversationId}`); + } + } + + return { + isValid: issues.length === 0, + issues, + }; + } +} + +/** + * Creates a new ContextWindowStateManager with the given config + */ +export function createContextWindowStateManager(config: ContextWindowConfig): ContextWindowStateManager { + return new ContextWindowStateManager(config); +} diff --git a/packages/toolpack-sdk/src/utils/message-pruner.ts b/packages/toolpack-sdk/src/utils/message-pruner.ts new file mode 100644 index 0000000..fe1b42a --- /dev/null +++ b/packages/toolpack-sdk/src/utils/message-pruner.ts @@ -0,0 +1,222 @@ +/** + * Message Pruning Utilities + * + * Implements strategies for removing messages from conversation history + * to stay within context window limits. + */ + +import { Message, TextPart } from '../types/index.js'; + +export interface PruneResult { + removed: number; // number of messages removed + tokensReclaimed: number; // estimated tokens freed + newTotal: number; // total tokens after pruning + pruneInfo: { + beforeCount: number; + afterCount: number; + removedMessages: Message[]; + }; +} + +/** + * Remove oldest messages to reclaim tokens + * + * Strategy: Remove oldest user/assistant pairs first, keeping system messages always + */ +export function pruneMessages( + messages: Message[], + targetTokens: number, + retainSystemMessages: boolean = true +): PruneResult { + const beforeCount = messages.length; + const removedMessages: Message[] = []; + let tokensReclaimed = 0; + + // Identify which messages are safe to remove + const prunableMessages: Array<{ index: number; message: Message }> = []; + + messages.forEach((msg, idx) => { + // Keep system messages if requested + if (retainSystemMessages && msg.role === 'system') { + return; + } + + // Keep tool results as they're linked to assistant messages + if (msg.role === 'tool') { + return; + } + + prunableMessages.push({ index: idx, message: msg }); + }); + + // Remove oldest prunable messages until target is met + for (const { message } of prunableMessages) { + if (tokensReclaimed >= targetTokens) break; + + // Estimate tokens in this message + const msgTokens = estimateMessageTokens(message); + tokensReclaimed += msgTokens; + removedMessages.push(message); + } + + // Remove messages from history (in reverse index order to maintain indices) + const removeIndices = new Set(removedMessages.map(msg => messages.indexOf(msg))); + const filteredMessages = messages.filter((_, idx) => !removeIndices.has(idx)); + + return { + removed: removedMessages.length, + tokensReclaimed, + newTotal: filteredMessages.length, + pruneInfo: { + beforeCount, + afterCount: filteredMessages.length, + removedMessages, + }, + }; +} + +/** + * Truncate messages that exceed context window + */ +export function truncateMessage(message: Message, maxTokens: number): Message { + if (typeof message.content === 'string') { + // Rough estimate: ~4 chars per token + const maxChars = maxTokens * 4; + if (message.content.length <= maxChars) { + return message; + } + + const truncated = message.content.substring(0, maxChars); + const omittedTokens = Math.ceil((message.content.length - maxChars) / 4); + + return { + ...message, + content: `${truncated}\n\n[...truncated ${omittedTokens} tokens]`, + }; + } else if (Array.isArray(message.content)) { + // For multipart content, remove images and keep text up to limit + const textParts = message.content.filter(p => p.type === 'text'); + const totalChars = textParts.reduce((sum, p) => sum + ((p as any).text?.length || 0), 0); + const maxChars = maxTokens * 4; + + if (totalChars <= maxChars) { + return message; + } + + let charCount = 0; + const keptParts: TextPart[] = []; + + for (const part of textParts) { + if (part.type === 'text') { + const remaining = maxChars - charCount; + if (remaining <= 0) break; + + const txt = part.text; + if (txt.length <= remaining) { + keptParts.push(part); + charCount += txt.length; + } else { + const truncated = txt.substring(0, remaining); + const omittedTokens = Math.ceil((txt.length - remaining) / 4); + keptParts.push({ + type: 'text', + text: `${truncated}\n\n[...truncated ${omittedTokens} tokens]`, + }); + break; + } + } + } + + return { + ...message, + content: keptParts.length > 0 ? keptParts : message.content, + }; + } + + return message; +} + +/** + * Estimate tokens in a single message (for pruning calculations) + */ +function estimateMessageTokens(message: Message): number { + // Base overhead for message structure + let tokens = 4; + + if (typeof message.content === 'string') { + tokens += Math.ceil(message.content.length / 4); + } else if (Array.isArray(message.content)) { + for (const part of message.content) { + if (part.type === 'text') { + tokens += Math.ceil(((part as any).text?.length || 0) / 4); + } else if (part.type === 'image_data' || part.type === 'image_url' || part.type === 'image_file') { + // Rough estimate for images + tokens += 256; + } + } + } + + // Add tokens for tool calls + if (message.tool_calls?.length) { + for (const tc of message.tool_calls) { + tokens += Math.ceil(tc.function.name.length / 4); + tokens += Math.ceil(tc.function.arguments.length / 4); + } + } + + // Add name tokens + if (message.name) { + tokens += Math.ceil(message.name.length / 4); + } + + return tokens; +} + +/** + * Group messages by type for analysis + */ +export function groupMessagesByRole(messages: Message[]): Record { + const groups: Record = { + system: [], + user: [], + assistant: [], + tool: [], + }; + + messages.forEach(msg => { + groups[msg.role] ??= []; + groups[msg.role].push(msg); + }); + + return groups; +} + +/** + * Get summary stats about messages + */ +export function getMessageStats(messages: Message[]): { + totalMessages: number; + totalTokens: number; + byRole: Record; + largestMessageTokens: number; +} { + let totalTokens = 0; + const byRole: Record = {}; + let largestMessageTokens = 0; + + for (const msg of messages) { + const msgTokens = estimateMessageTokens(msg); + totalTokens += msgTokens; + largestMessageTokens = Math.max(largestMessageTokens, msgTokens); + + byRole[msg.role] ??= 0; + byRole[msg.role]++; + } + + return { + totalMessages: messages.length, + totalTokens, + byRole, + largestMessageTokens, + }; +} diff --git a/packages/toolpack-sdk/src/utils/message-summarizer.ts b/packages/toolpack-sdk/src/utils/message-summarizer.ts new file mode 100644 index 0000000..4beb150 --- /dev/null +++ b/packages/toolpack-sdk/src/utils/message-summarizer.ts @@ -0,0 +1,344 @@ +import type { Message } from '../types/index.js'; +import { SDKError } from '../errors/index.js'; + +function getMessageText(message: Message): string { + if (message.content == null) { + return ''; + } + + if (typeof message.content === 'string') { + return message.content; + } + + return message.content + .map((part) => { + if (part.type === 'text') { + return part.text; + } + + if (part.type === 'image_url') { + return `[image: ${part.image_url.url}]`; + } + + if (part.type === 'image_file') { + return `[image-file: ${part.image_file.path}]`; + } + + if (part.type === 'image_data') { + return `[image-data: ${part.image_data.mimeType}]`; + } + + return ''; + }) + .filter(Boolean) + .join(' '); +} + +/** + * Options for summarizing messages + */ +export interface SummarizationOptions { + /** Model to use for summarization (e.g., 'gpt-4-turbo') */ + model: string; + /** Maximum tokens for summary (default: 500) */ + maxSummaryTokens?: number; + /** Whether to preserve exact message boundaries or create coherent summary (default: false) */ + preserveExactMessages?: boolean; + /** Custom summarization prompt template */ + summaryPrompt?: string; + /** Custom format for summary marker in message history */ + summaryMarkerFormat?: string; +} + +/** + * Result of a summarization operation + */ +export interface SummarizationResult { + /** Summary content */ + summary: string; + /** Number of messages that were summarized */ + messageCount: number; + /** Approximate tokens in original messages */ + originalTokens: number; + /** Approximate tokens in summary */ + summaryTokens: number; + /** Number of tokens saved */ + tokensSaved: number; + /** Timestamp of summarization */ + timestamp: Date; +} + +/** + * Generates a default summarization prompt for the given messages + */ +export function generateSummarizationPrompt( + messages: Message[], + userPrompt?: string +): string { + if (userPrompt) { + return userPrompt; + } + + // Extract conversation context + const userMessages = messages.filter((m) => m.role === 'user'); + const assistantMessages = messages.filter((m) => m.role === 'assistant'); + const toolMessages = messages.filter((m) => m.role === 'tool'); + + const messageCount = messages.length; + const userCount = userMessages.length; + const assistantCount = assistantMessages.length; + + return `Please provide a concise summary of the following conversation history. The conversation contains ${messageCount} messages (${userCount} user messages, ${assistantCount} assistant responses${toolMessages.length > 0 ? `, and ${toolMessages.length} tool responses` : ''}). + +Focus on: +1. Key topics discussed +2. Important decisions or conclusions +3. User's intent and goals +4. Relevant context for continuing the conversation + +The summary should be comprehensive yet concise, preserving all critical information needed to continue the conversation naturally. + +--- +CONVERSATION: +${messages.map((m, i) => { + const content = getMessageText(m); + return `[Message ${i + 1}] ${m.role.toUpperCase()}: ${content.substring(0, 200)}${content.length > 200 ? '...' : ''}`; +}) + .join('\n')} +--- + +SUMMARY:`; +} + +/** + * Creates a system message containing the conversation summary + */ +export function createSummarySystemMessage(summary: string, originalMessageCount: number): Message { + return { + role: 'system', + content: `[Context Summary] +This conversation has been summarized to manage context window. The following is a summary of the first ${originalMessageCount} messages: + +${summary} + +[End Summary] + +Use this summary to understand the conversation context. When responding, acknowledge that you're aware of the previous conversation and continue naturally.`, + }; +} + +/** + * Extracts key information from messages for summarization + */ +export function extractConversationKeypoints(messages: Message[]): { + topics: string[]; + decisions: string[]; + userGoals: string[]; + context: string; +} { + const topics: Set = new Set(); + const decisions: string[] = []; + const userGoals: string[] = []; + let lastUserMessage = ''; + + for (const message of messages) { + if (message.role === 'user') { + const content = getMessageText(message); + lastUserMessage = content; + + // Extract potential goals (sentences ending with ? or containing action words) + if (content.includes('?')) { + userGoals.push(content.split('\n')[0]); + } + + // Extract topics (capitalized phrases) + const topicMatches = content.match(/\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/g); + if (topicMatches) { + topicMatches.forEach((t) => topics.add(t)); + } + } else if (message.role === 'assistant') { + const content = getMessageText(message); + + // Extract decisions (sentences with decision markers) + if (content.includes('decided') || content.includes('concluded') || content.includes('determined')) { + const sentences = content.split(/[.!?]+/); + for (const sentence of sentences) { + if (sentence.includes('decided') || sentence.includes('concluded') || sentence.includes('determined')) { + decisions.push(sentence.trim()); + } + } + } + } + } + + return { + topics: Array.from(topics).slice(0, 10), + decisions: decisions.slice(0, 5), + userGoals: userGoals.slice(0, 5), + context: lastUserMessage.substring(0, 200), + }; +} + +/** + * Estimates tokens in a summary (rough estimation) + */ +export function estimateSummaryTokens(summaryText: string): number { + // Rough estimation: ~1 token per 4 characters for most text + return Math.ceil(summaryText.length / 4); +} + +/** + * Validates that a summarization result is sensible + */ +export function validateSummarizationResult(result: SummarizationResult): { + valid: boolean; + issues: string[]; +} { + const issues: string[] = []; + + if (!result.summary || result.summary.length < 10) { + issues.push('Summary is too short'); + } + + if (result.summary.length > 5000) { + issues.push('Summary is excessively long'); + } + + if (result.messageCount < 2) { + issues.push('Must summarize at least 2 messages'); + } + + if (result.summaryTokens >= result.originalTokens * 0.8) { + issues.push('Summary is not significantly shorter than original messages'); + } + + if (result.tokensSaved < 0) { + issues.push('Token calculation error: saved tokens is negative'); + } + + return { + valid: issues.length === 0, + issues, + }; +} + +/** + * Prepares messages for summarization by the LLM + * Returns the messages that should be sent to the summarizer model + */ +export function prepareSummarizationRequest( + messagesToSummarize: Message[], + options: SummarizationOptions +): Message[] { + const prompt = generateSummarizationPrompt(messagesToSummarize, options.summaryPrompt); + + // Create a system message that instructs the model to summarize + const systemMessage: Message = { + role: 'system', + content: `You are a conversation summarizer. Your task is to create a clear, concise summary of the provided conversation that preserves all critical information. + +Maximum summary length: ${options.maxSummaryTokens || 500} tokens. +Format: Write only the summary without any additional commentary.`, + }; + + // Create the user message with the summarization request + const userMessage: Message = { + role: 'user', + content: prompt, + }; + + return [systemMessage, userMessage]; +} + +/** + * Parses the summarization response from the LLM + */ +export function parseSummarizationResponse( + response: string, + originalMessages: Message[], + originalTokenCount: number +): SummarizationResult { + const summaryTokens = estimateSummaryTokens(response); + const tokensSaved = Math.max(0, originalTokenCount - summaryTokens); + + return { + summary: response.trim(), + messageCount: originalMessages.length, + originalTokens: originalTokenCount, + summaryTokens, + tokensSaved, + timestamp: new Date(), + }; +} + +/** + * Builds a new message array with summarized history + */ +export function buildSummarizedHistory( + systemMessages: Message[], + summarizedContent: SummarizationResult, + recentMessages: Message[] +): Message[] { + // Keep system messages as-is + const result: Message[] = [...systemMessages]; + + // Add summary as a system message + result.push(createSummarySystemMessage(summarizedContent.summary, summarizedContent.messageCount)); + + // Add recent messages + result.push(...recentMessages); + + return result; +} + +/** + * Creates a detailed summarization report + */ +export function createSummarizationReport( + result: SummarizationResult, + beforeMessageCount: number, + afterMessageCount: number +): string { + const reductionPercent = Math.round((result.tokensSaved / result.originalTokens) * 100); + + return ` +Summarization Report +==================== +Timestamp: ${result.timestamp.toISOString()} +Status: ✓ Summarization completed + +Input Analysis: +- Messages summarized: ${result.messageCount} +- Original token count: ${result.originalTokens} +- Summary token count: ${result.summaryTokens} +- Tokens saved: ${result.tokensSaved} (${reductionPercent}% reduction) + +Message Count: +- Before: ${beforeMessageCount} messages +- After: ${afterMessageCount} messages +- Reduction: ${beforeMessageCount - afterMessageCount} messages + +Summary Preview: +${result.summary.substring(0, 300)}${result.summary.length > 300 ? '...' : ''} + `.trim(); +} + +/** + * Merges multiple summarization results into one + */ +export function mergeSummarizationResults(results: SummarizationResult[]): SummarizationResult { + if (results.length === 0) { + throw new SDKError('Cannot merge empty summarization results', 'CONTEXT_WINDOW_ERROR'); + } + + const merged: SummarizationResult = { + summary: results.map((r) => `[Round ${results.indexOf(r) + 1}] ${r.summary}`).join('\n\n'), + messageCount: results.reduce((sum, r) => sum + r.messageCount, 0), + originalTokens: results.reduce((sum, r) => sum + r.originalTokens, 0), + summaryTokens: results.reduce((sum, r) => sum + r.summaryTokens, 0), + tokensSaved: results.reduce((sum, r) => sum + r.tokensSaved, 0), + timestamp: new Date(), + }; + + return merged; +} diff --git a/packages/toolpack-sdk/src/utils/token-counter.ts b/packages/toolpack-sdk/src/utils/token-counter.ts new file mode 100644 index 0000000..5bb3b44 --- /dev/null +++ b/packages/toolpack-sdk/src/utils/token-counter.ts @@ -0,0 +1,238 @@ +/** + * Token Counting Utilities + * + * Provider-specific token counting for accurate context window management. + * Supports OpenAI (js-tiktoken), Anthropic, Gemini, and Ollama with fallback estimation. + */ + +import { Message } from '../types/index.js'; + +// ============================================================================ +// Token Encoding Initialization (lazy-loaded) +// ============================================================================ + +let tiktoken: any = null; + +async function initTiktoken() { + if (!tiktoken) { + try { + const mod = await import('js-tiktoken'); + tiktoken = mod; + } catch { + // tiktoken not available, will use fallback + } + } + return tiktoken; +} + +// ============================================================================ +// OpenAI Token Counting (js-tiktoken) +// ============================================================================ + +const TOKENS_PER_MESSAGE: Record = { + 'gpt-4.1': 3, + 'gpt-4.1-mini': 3, + 'gpt-5.1': 3, + 'gpt-5.2': 3, + 'gpt-5.4': 3, + 'gpt-5.4-pro': 3, + // Fallback for unknown models + '__default__': 4, +}; + +const TOKENS_PER_MESSAGE_SUFFIX = 2; + +async function countOpenAITokens(messages: Message[], model: string): Promise { + try { + const tiktokenModule = await initTiktoken(); + if (!tiktokenModule) { + return estimateTokenCount(messages); + } + + const encoding = tiktokenModule.encoding_for_model(model); + let totalTokens = 0; + + // Add tokens per message overhead + const tokensPerMessage = TOKENS_PER_MESSAGE[model] ?? TOKENS_PER_MESSAGE['__default__']; + + for (const message of messages) { + totalTokens += tokensPerMessage; + + if (typeof message.content === 'string') { + totalTokens += encoding.encode(message.content).length; + } else if (Array.isArray(message.content)) { + for (const part of message.content) { + if (part.type === 'text') { + totalTokens += encoding.encode((part as any).text).length; + } else if (part.type === 'image_data' || part.type === 'image_url' || part.type === 'image_file') { + // Estimate image tokens: ~256 tokens + detail + totalTokens += 256; + } + } + } + + if (message.tool_calls?.length) { + for (const toolCall of message.tool_calls) { + totalTokens += encoding.encode(toolCall.function.name).length; + totalTokens += encoding.encode(toolCall.function.arguments).length; + } + } + + if (message.name) { + totalTokens += encoding.encode(message.name).length; + } + } + + // Add reply tokens overhead + totalTokens += TOKENS_PER_MESSAGE_SUFFIX; + + return totalTokens; + } catch (error) { + // Fallback to estimation if tiktoken fails + return estimateTokenCount(messages); + } +} + +// ============================================================================ +// Anthropic Token Counting +// ============================================================================ + +async function countAnthropicTokens(messages: Message[], _model: string): Promise { + try { + // Anthropic's token counting API would be called here + // For now, use estimation with slight adjustment for Anthropic's tokenizer + const baseEstimate = estimateTokenCount(messages); + // Anthropic tends to count tokens slightly differently, add ~10% margin + return Math.ceil(baseEstimate * 1.1); + } catch { + return estimateTokenCount(messages); + } +} + +// ============================================================================ +// Gemini Token Counting +// ============================================================================ + +async function countGeminiTokens(messages: Message[], _model: string): Promise { + try { + // Gemini's token counting would use their API + // For now, use estimation with adjustment + const baseEstimate = estimateTokenCount(messages); + // Gemini's tokenizer is similar to OpenAI but with slight variations + return Math.ceil(baseEstimate * 1.05); + } catch { + return estimateTokenCount(messages); + } +} + +// ============================================================================ +// Ollama Token Counting (Estimation) +// ============================================================================ + +async function countOllamaTokens(messages: Message[], _model: string): Promise { + // Ollama uses similar tokenization to llama models + // Use estimation as Ollama doesn't provide token counting API + const baseEstimate = estimateTokenCount(messages); + // Llama tokenization tends to be ~1.1x character/4 + return Math.ceil(baseEstimate * 1.05); +} + +// ============================================================================ +// Fallback: Estimation (chars / 4) +// ============================================================================ + +export function estimateTokenCount(messages: Message[]): number { + let totalChars = 0; + + for (const message of messages) { + // Add overhead for message structure + totalChars += 50; + + if (typeof message.content === 'string') { + totalChars += message.content.length; + } else if (Array.isArray(message.content)) { + for (const part of message.content) { + if (part.type === 'text') { + totalChars += (part as any).text.length; + } else if (part.type === 'image_data' || part.type === 'image_url' || part.type === 'image_file') { + // Estimate ~1000 chars per image + totalChars += 1000; + } + } + } + + if (message.tool_calls?.length) { + for (const toolCall of message.tool_calls) { + totalChars += toolCall.function.name.length; + totalChars += toolCall.function.arguments.length; + } + } + + if (message.name) { + totalChars += message.name.length; + } + } + + // Rough estimate: ~4 chars per token (OpenAI/Anthropic standard) + return Math.ceil(totalChars / 4); +} + +// ============================================================================ +// Main API: Count tokens for any provider/model +// ============================================================================ + +export async function countTokens( + messages: Message[], + model: string, + provider: string +): Promise { + // Normalize provider name + const normalizedProvider = provider.toLowerCase(); + + if (normalizedProvider === 'openai' || normalizedProvider === 'openai-gpt') { + return countOpenAITokens(messages, model); + } else if (normalizedProvider === 'anthropic' || normalizedProvider === 'claude') { + return countAnthropicTokens(messages, model); + } else if (normalizedProvider === 'gemini' || normalizedProvider === 'google') { + return countGeminiTokens(messages, model); + } else if (normalizedProvider === 'ollama') { + return countOllamaTokens(messages, model); + } else { + // Fallback for unknown providers + return estimateTokenCount(messages); + } +} + +// ============================================================================ +// Utilities +// ============================================================================ + +/** + * Calculate if a request would exceed the context window given available space + */ +export function wouldExceedContextWindow( + currentTokens: number, + contextWindow: number, + maxOutputTokens: number +): boolean { + const availableForInput = contextWindow - maxOutputTokens; + return currentTokens > availableForInput; +} + +/** + * Calculate percentage of context window used + */ +export function getContextWindowPercentage( + currentTokens: number, + contextWindow: number +): number { + return Math.round((currentTokens / contextWindow) * 100); +} + +/** + * Get safe reserve tokens for output (accounting for overhead) + */ +export function getSafeOutputReserve(maxOutputTokens: number, bufferPercentage: number = 1.15): number { + // Add 15% buffer by default for overhead (message wrapping, tools, etc.) + return Math.ceil(maxOutputTokens * bufferPercentage); +} diff --git a/packages/toolpack-sdk/src/workflows/workflow-types.ts b/packages/toolpack-sdk/src/workflows/workflow-types.ts index da30282..dc5ffb3 100644 --- a/packages/toolpack-sdk/src/workflows/workflow-types.ts +++ b/packages/toolpack-sdk/src/workflows/workflow-types.ts @@ -120,6 +120,18 @@ export interface WorkflowEvents { /** Emitted for progress updates */ 'workflow:progress': (progress: WorkflowProgress) => void; + /** Emitted when context window usage is high (approaching limit) */ + 'workflow:context_warning': (event: ContextWindowWarningEvent) => void; + + /** Emitted when context window would be exceeded */ + 'workflow:context_exceeded': (event: ContextWindowExceededEvent) => void; + + /** Emitted when messages are pruned to recover context */ + 'workflow:context_pruned': (event: ContextPrunedEvent) => void; + + /** Emitted when conversation is summarized for context recovery */ + 'workflow:conversation_summarized': (event: ConversationSummarizedEvent) => void; + /** Emitted when workflow completes */ 'workflow:completed': (plan: Plan, result: WorkflowResult) => void; @@ -127,6 +139,41 @@ export interface WorkflowEvents { 'workflow:failed': (plan: Plan, error: Error) => void; } +// ── Context Window Events ──────────────────────────── + +export interface ContextWindowWarningEvent { + currentTokens: number; + contextWindow: number; + percentage: number; + model: string; + conversationId?: string; +} + +export interface ContextWindowExceededEvent { + currentTokens: number; + contextWindow: number; + maxOutputTokens: number; + model: string; + strategy: 'prune' | 'summarize' | 'fail'; + conversationId?: string; +} + +export interface ContextPrunedEvent { + removed: number; + tokensReclaimed: number; + newTotal: number; + conversationId?: string; + beforeCount: number; + afterCount: number; +} + +export interface ConversationSummarizedEvent { + summarized: number; + summaryTokens: number; + tokensSaved: number; + conversationId?: string; +} + export interface WorkflowProgress { planId: string; currentStep: number; diff --git a/packages/toolpack-sdk/tests/integration/context-window.test.ts b/packages/toolpack-sdk/tests/integration/context-window.test.ts new file mode 100644 index 0000000..b2961f7 --- /dev/null +++ b/packages/toolpack-sdk/tests/integration/context-window.test.ts @@ -0,0 +1,363 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import { AIClient } from '../../src/client'; +import { ProviderAdapter, CompletionRequest, CompletionResponse, CompletionChunk, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo } from '../../src/providers/base'; +import { ContextWindowConfig } from '../../src/types/index'; + +// Mock provider for testing context window enforcement +class ContextWindowMockProvider implements ProviderAdapter { + callCount = 0; + + async getModels(): Promise { + return [ + { + id: 'test-model', + displayName: 'Test Model', + contextWindow: 4096, + maxOutputTokens: 1024, + capabilities: { + chat: false, + streaming: false, + toolCalling: false, + embeddings: false, + vision: false, + reasoning: undefined, + fileUpload: undefined + } + } + ]; + } + + async countTokens(messages: any[]): Promise { + // Rough estimation: each message ~50 tokens + content length / 4 + let total = 0; + for (const msg of messages) { + total += 50; + if (typeof msg.content === 'string') { + total += Math.ceil(msg.content.length / 4); + } + } + return total; + } + + async generate(request: CompletionRequest): Promise { + this.callCount++; + // Echo back the message count so we can verify pruning happened + return { + content: `Processed ${request.messages.length} messages` + }; + } + + async *stream(request: CompletionRequest): AsyncGenerator { + yield { delta: `Processing ${request.messages.length} messages` }; + yield { finish_reason: 'stop' }; + } + + async embed(request: EmbeddingRequest): Promise { + return { embeddings: [] }; + } + + getDisplayName(): string { + return 'test-provider'; + } +} + +describe('Context Window Enforcement in AIClient', () => { + let client: AIClient; + let provider: ContextWindowMockProvider; + + beforeEach(() => { + provider = new ContextWindowMockProvider(); + }); + + describe('Disabled Context Window', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + contextWindowConfig: { + enabled: false + } + }); + }); + + it('should not enforce context window when disabled', async () => { + const messages = [ + { role: 'user', content: 'Test' } + ]; + + const response = await client.generate({ + messages, + model: 'test-model' + }); + + expect(response.content).toContain('Processed 2 messages'); + }); + }); + + describe('Prune Strategy (Default)', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + contextWindowConfig: { + enabled: true, + strategy: 'prune', + pruneThreshold: 85, + maxMessageHistoryLength: 10 + } as ContextWindowConfig + }); + }); + + it('should prune old messages when approaching threshold', async () => { + const messages = Array.from({ length: 15 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `Message ${i}: This is a test message with some content` + })); + + const response = await client.generate({ + messages, + model: 'test-model' + }); + + // Should have pruned down to maxMessageHistoryLength (10) + const messageCount = parseInt(response.content?.match(/\d+/) || '0'); + expect(messageCount).toBeLessThanOrEqual(10); + }); + + it('should retain system messages during pruning', async () => { + const messages = [ + { role: 'system', content: 'You are a helpful assistant' }, + ...Array.from({ length: 15 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `Message ${i}` + })) + ]; + + const response = await client.generate({ + messages, + model: 'test-model' + }); + + // Response should contain something (indicating it processed) + expect(response.content).toBeDefined(); + }); + }); + + describe('Max Message History Limit', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + contextWindowConfig: { + enabled: true, + strategy: 'prune', + maxMessageHistoryLength: 5 + } as ContextWindowConfig + }); + }); + + it('should enforce max message history length', async () => { + const messages = Array.from({ length: 20 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `Message ${i}` + })); + + const response = await client.generate({ + messages, + model: 'test-model' + }); + + const messageCount = parseInt(response.content?.match(/\d+/) || '0'); + expect(messageCount).toBeLessThanOrEqual(5); + }); + }); + + describe('Fail Strategy', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + contextWindowConfig: { + enabled: true, + strategy: 'fail', + pruneThreshold: 85 + } as ContextWindowConfig + }); + }); + + it('should throw error when context window exceeded with fail strategy', async () => { + const messages = Array.from({ length: 100 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `Message ${i}: ` + 'x'.repeat(100) + })); + + try { + await client.generate({ + messages, + model: 'test-model' + }); + // If we get here and no error was thrown, that's fine - tokens might not exceed with mock + } catch (error: any) { + expect(error.code).toBe('CONTEXT_WINDOW_EXCEEDED'); + expect(error.conversationId).toBeDefined(); + } + }); + }); + + describe('Conversation Tracking', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + conversationId: 'test-conv', + contextWindowConfig: { + enabled: true, + strategy: 'prune', + maxMessageHistoryLength: 5 + } as ContextWindowConfig + }); + }); + + it('should track context window state per conversation', async () => { + const messages = [ + { role: 'user', content: 'Test message' } + ]; + + const response = await client.generate({ + messages, + model: 'test-model' + }); + + expect(response.content).toBeDefined(); + expect(provider.callCount).toBeGreaterThan(0); + }); + }); + + describe('Stream Context Window Enforcement', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + contextWindowConfig: { + enabled: true, + strategy: 'prune', + maxMessageHistoryLength: 10 + } as ContextWindowConfig + }); + }); + + it('should enforce context window on streaming', async () => { + const messages = Array.from({ length: 15 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `Message ${i}` + })); + + let collectiveContent = ''; + for await (const chunk of client.stream({ + messages, + model: 'test-model' + })) { + if (chunk.delta) { + collectiveContent += chunk.delta; + } + } + + expect(collectiveContent).toBeDefined(); + }); + }); + + describe('Custom Output Token Buffer', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + contextWindowConfig: { + enabled: true, + strategy: 'prune', + outputTokenBuffer: 1.25, // 25% instead of default 15% + pruneThreshold: 85 + } as ContextWindowConfig + }); + }); + + it('should apply custom output token buffer', async () => { + const messages = [ + { role: 'user', content: 'Test' } + ]; + + const response = await client.generate({ + messages, + model: 'test-model', + max_tokens: 1024 + }); + + // Should succeed with the custom buffer applied + expect(response.content).toBeDefined(); + }); + }); + + describe('Message History Configuration', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + contextWindowConfig: { + enabled: true, + strategy: 'prune', + maxMessageHistoryLength: 3, + retainSystemMessages: true + } as ContextWindowConfig + }); + }); + + it('should retain system messages with maxMessageHistoryLength', async () => { + const messages = [ + { role: 'system', content: 'You are helpful' }, + { role: 'user', content: 'Q1' }, + { role: 'assistant', content: 'A1' }, + { role: 'user', content: 'Q2' }, + { role: 'assistant', content: 'A2' } + ]; + + const response = await client.generate({ + messages, + model: 'test-model' + }); + + expect(response.content).toBeDefined(); + }); + }); + + describe('Multiple Calls with State Tracking', () => { + beforeEach(() => { + client = new AIClient({ + providers: { test: provider }, + defaultProvider: 'test', + conversationId: 'multi-call-test', + contextWindowConfig: { + enabled: true, + strategy: 'prune', + maxMessageHistoryLength: 5 + } as ContextWindowConfig + }); + }); + + it('should maintain context across multiple calls', async () => { + // First call + let response1 = await client.generate({ + messages: [{ role: 'user', content: 'First' }], + model: 'test-model' + }); + expect(response1.content).toBeDefined(); + + // Second call + let response2 = await client.generate({ + messages: [{ role: 'user', content: 'Second' }], + model: 'test-model' + }); + expect(response2.content).toBeDefined(); + + expect(provider.callCount).toBe(2); + }); + }); +}); diff --git a/packages/toolpack-sdk/tests/unit/context-window-errors.test.ts b/packages/toolpack-sdk/tests/unit/context-window-errors.test.ts new file mode 100644 index 0000000..746fb3c --- /dev/null +++ b/packages/toolpack-sdk/tests/unit/context-window-errors.test.ts @@ -0,0 +1,331 @@ +import { describe, it, expect } from 'vitest'; +import { + ContextWindowExceededError, + InsufficientContextError, + SummarizationError, + ContextWindowConfigError, + ConversationNotFoundError, + isContextWindowError, + handleContextWindowError +} from '../../src/errors/context-window-errors'; + +describe('Context Window Errors', () => { + describe('ContextWindowExceededError', () => { + it('should create error with correct properties', () => { + const error = new ContextWindowExceededError( + 'Context window exceeded', + 'conv-1', + 5000, + 4000, + 'prune' + ); + + expect(error.message).toBe('Context window exceeded'); + expect(error.code).toBe('CONTEXT_WINDOW_EXCEEDED'); + expect(error.conversationId).toBe('conv-1'); + expect(error.currentTokens).toBe(5000); + expect(error.contextWindowLimit).toBe(4000); + expect(error.strategy).toBe('prune'); + }); + + it('should calculate overage tokens correctly', () => { + const error = new ContextWindowExceededError( + 'Message', + 'conv-1', + 5000, + 4000, + 'prune' + ); + + expect(error.getOverageTokens()).toBe(1000); + }); + + it('should calculate usage percentage correctly', () => { + const error = new ContextWindowExceededError( + 'Message', + 'conv-1', + 2000, + 4000, + 'prune' + ); + + expect(error.getUsagePercentage()).toBe(50); + }); + + it('should generate detailed report', () => { + const error = new ContextWindowExceededError( + 'Test message', + 'conv-1', + 5000, + 4000, + 'fail' + ); + + const report = error.getDetailedReport(); + + expect(report).toContain('Context Window Exceeded'); + expect(report).toContain('conv-1'); + expect(report).toContain('5000'); + expect(report).toContain('4000'); + expect(report).toContain('fail'); + }); + }); + + describe('InsufficientContextError', () => { + it('should create error with correct properties', () => { + const error = new InsufficientContextError( + 'Insufficient context', + 'conv-1', + 1000, + 500, + 800 + ); + + expect(error.message).toBe('Insufficient context'); + expect(error.code).toBe('INSUFFICIENT_CONTEXT'); + expect(error.conversationId).toBe('conv-1'); + expect(error.requiredTokens).toBe(1000); + expect(error.availableTokens).toBe(500); + }); + + it('should calculate deficit correctly', () => { + const error = new InsufficientContextError( + 'Message', + 'conv-1', + 1000, + 500, + 800 + ); + + expect(error.getDeficit()).toBe(500); + }); + + it('should determine recoverability', () => { + const recoverableError = new InsufficientContextError( + 'Message', + 'conv-1', + 1000, + 500, + 600 // Can recover since 500 >= 600 * 0.5 (300) + ); + + expect(recoverableError.isRecoverable()).toBe(true); + + const nonRecoverableError = new InsufficientContextError( + 'Message', + 'conv-1', + 1000, + 100, + 400 // Cannot recover since 100 < 400 * 0.5 (200) + ); + + expect(nonRecoverableError.isRecoverable()).toBe(false); + }); + + it('should generate detailed report', () => { + const error = new InsufficientContextError( + 'Test message', + 'conv-1', + 1000, + 500, + 800 + ); + + const report = error.getDetailedReport(); + + expect(report).toContain('Insufficient Context'); + expect(report).toContain('conv-1'); + expect(report).toContain('1000'); + expect(report).toContain('500'); + }); + }); + + describe('SummarizationError', () => { + it('should create error with correct properties', () => { + const error = new SummarizationError( + 'Summarization failed', + 'conv-1', + 10, + 'provider_error' + ); + + expect(error.message).toBe('Summarization failed'); + expect(error.code).toBe('SUMMARIZATION_ERROR'); + expect(error.conversationId).toBe('conv-1'); + expect(error.messageCount).toBe(10); + expect(error.failureReason).toBe('provider_error'); + }); + + it('should determine retryability', () => { + const retryableError = new SummarizationError( + 'Message', + 'conv-1', + 10, + 'provider_error' + ); + + expect(retryableError.isRetryable()).toBe(true); + + const nonRetryableError = new SummarizationError( + 'Message', + 'conv-1', + 10, + 'invalid_quality' + ); + + expect(nonRetryableError.isRetryable()).toBe(false); + }); + + it('should suggest recovery actions', () => { + const providerErrorRecovery = new SummarizationError( + 'Message', + 'conv-1', + 10, + 'provider_error' + ).getSuggestedRecovery(); + + expect(providerErrorRecovery).toContain('Retry'); + + const invalidResponseRecovery = new SummarizationError( + 'Message', + 'conv-1', + 10, + 'invalid_response' + ).getSuggestedRecovery(); + + expect(invalidResponseRecovery).toContain('prompt'); + + const insufficientTokensRecovery = new SummarizationError( + 'Message', + 'conv-1', + 10, + 'insufficient_tokens' + ).getSuggestedRecovery(); + + expect(insufficientTokensRecovery).toContain('Reduce'); + + const invalidQualityRecovery = new SummarizationError( + 'Message', + 'conv-1', + 10, + 'invalid_quality' + ).getSuggestedRecovery(); + + expect(invalidQualityRecovery).toContain('Adjust'); + }); + + it('should include summary attempt in report if provided', () => { + const error = new SummarizationError( + 'Test message', + 'conv-1', + 10, + 'invalid_quality', + 'Attempted summary content here' + ); + + const report = error.getDetailedReport(); + + expect(report).toContain('Partial Summary'); + expect(report).toContain('Attempted summary'); + }); + }); + + describe('ContextWindowConfigError', () => { + it('should create error with correct properties', () => { + const error = new ContextWindowConfigError( + 'Invalid config', + 'pruneThreshold', + -10, + 'Must be positive' + ); + + expect(error.message).toBe('Invalid config'); + expect(error.code).toBe('CONTEXT_WINDOW_CONFIG_ERROR'); + expect(error.configField).toBe('pruneThreshold'); + expect(error.providedValue).toBe(-10); + expect(error.constraint).toBe('Must be positive'); + }); + + it('should generate detailed report', () => { + const error = new ContextWindowConfigError( + 'Test message', + 'strategy', + 'invalid', + 'Must be prune, summarize, or fail' + ); + + const report = error.getDetailedReport(); + + expect(report).toContain('Configuration Error'); + expect(report).toContain('strategy'); + expect(report).toContain('invalid'); + }); + }); + + describe('ConversationNotFoundError', () => { + it('should create error with correct properties', () => { + const error = new ConversationNotFoundError( + 'Conversation not found', + 'missing-conv' + ); + + expect(error.message).toBe('Conversation not found'); + expect(error.code).toBe('CONVERSATION_NOT_FOUND'); + expect(error.conversationId).toBe('missing-conv'); + }); + }); + + describe('isContextWindowError', () => { + it('should identify context window errors', () => { + const cwError = new ContextWindowExceededError('msg', 'c1', 5000, 4000, 'prune'); + expect(isContextWindowError(cwError)).toBe(true); + + const summaryError = new SummarizationError('msg', 'c1', 10, 'provider_error'); + expect(isContextWindowError(summaryError)).toBe(true); + }); + + it('should return false for non-context-window errors', () => { + const genericError = new Error('Generic error'); + expect(isContextWindowError(genericError)).toBe(false); + }); + + it('should return false for null or non-objects', () => { + expect(isContextWindowError(null)).toBe(false); + expect(isContextWindowError(undefined)).toBe(false); + expect(isContextWindowError('string')).toBe(false); + }); + }); + + describe('handleContextWindowError', () => { + it('should handle summarization errors', () => { + const error = new SummarizationError('msg', 'c1', 10, 'provider_error'); + const handling = handleContextWindowError(error); + + expect(handling.shouldRetry).toBe(true); + expect(handling.action).toBe('prune'); + }); + + it('should handle context window exceeded errors', () => { + const error = new ContextWindowExceededError('msg', 'c1', 5000, 4000, 'fail'); + const handling = handleContextWindowError(error); + + expect(handling.shouldRetry).toBe(false); + expect(handling.shouldFallback).toBe(true); + }); + + it('should handle insufficient context errors', () => { + const error = new InsufficientContextError('msg', 'c1', 1000, 500, 800); + const handling = handleContextWindowError(error); + + expect(handling.shouldRetry).toBe(false); + expect(handling.action).toBe('fail'); + }); + + it('should provide recovery guidance', () => { + const error = new SummarizationError('msg', 'c1', 10, 'provider_error'); + const handling = handleContextWindowError(error); + + expect(handling.message.length).toBeGreaterThan(0); + }); + }); +}); diff --git a/packages/toolpack-sdk/tests/unit/context-window-message-pruner.test.ts b/packages/toolpack-sdk/tests/unit/context-window-message-pruner.test.ts new file mode 100644 index 0000000..1f5f6b6 --- /dev/null +++ b/packages/toolpack-sdk/tests/unit/context-window-message-pruner.test.ts @@ -0,0 +1,249 @@ +import { describe, it, expect } from 'vitest'; +import { pruneMessages, truncateMessage, groupMessagesByRole, getMessageStats } from '../../src/utils/message-pruner'; +import { Message } from '../../src/types/index'; + +describe('Message Pruner Utilities', () => { + describe('pruneMessages', () => { + it('should remove messages until token target is met', () => { + const messages: Message[] = [ + { role: 'system', content: 'You are helpful' }, + { role: 'user', content: 'First message with more content to increase token count significantly' }, + { role: 'assistant', content: 'First response with more content to increase token count significantly' }, + { role: 'user', content: 'Second message with more content to increase token count significantly' }, + { role: 'assistant', content: 'Second response with more content to increase token count significantly' } + ]; + + const result = pruneMessages(messages, 50, true); + + expect(result.pruneInfo.beforeCount).toBe(5); + expect(result.pruneInfo.afterCount).toBeLessThan(5); + expect(result.removed).toBeGreaterThan(0); + expect(result.tokensReclaimed).toBeGreaterThanOrEqual(50); + }); + + it('should retain system messages when requested', () => { + const messages: Message[] = [ + { role: 'system', content: 'You are helpful' }, + { role: 'user', content: 'First message' }, + { role: 'assistant', content: 'Response 1' }, + { role: 'user', content: 'Second message' } + ]; + + const result = pruneMessages(messages, 100, true); + const filteredMessages = messages.filter(m => !result.pruneInfo.removedMessages.includes(m)); + + const hasSystemMessage = filteredMessages.some(m => m.role === 'system'); + expect(hasSystemMessage).toBe(true); + }); + + it('should not retain system messages when configured', () => { + const messages: Message[] = [ + { role: 'system', content: 'You are helpful' }, + { role: 'user', content: 'First message' }, + { role: 'assistant', content: 'Response' + } + ]; + + const result = pruneMessages(messages, 100, false); + // System message can be removed if needed + const hasSystemInRemoved = result.pruneInfo.removedMessages.some(m => m.role === 'system'); + const hasSystemInFiltered = messages + .filter(m => !result.pruneInfo.removedMessages.includes(m)) + .some(m => m.role === 'system'); + + // Either it was removed or it wasn't (depends on token recovery) + expect(typeof hasSystemInRemoved).toBe('boolean'); + expect(typeof hasSystemInFiltered).toBe('boolean'); + }); + + it('should not remove tool messages', () => { + const messages: Message[] = [ + { role: 'assistant', content: 'I will search', tool_calls: [{ id: '1', type: 'function', function: { name: 'search', arguments: '{}' } }] }, + { role: 'tool', content: 'Search result', tool_call_id: '1' }, + { role: 'user', content: 'Very long message that should be pruned because it has many tokens in it and we need to recover tokens' } + ]; + + const result = pruneMessages(messages, 50, true); + const toolMessagesRemoved = result.pruneInfo.removedMessages.filter(m => m.role === 'tool'); + + expect(toolMessagesRemoved).toHaveLength(0); + }); + + it('should handle empty message list', () => { + const messages: Message[] = []; + const result = pruneMessages(messages, 100, true); + + expect(result.removed).toBe(0); + expect(result.tokensReclaimed).toBe(0); + expect(result.pruneInfo.afterCount).toBe(0); + }); + + it('should handle target of 0', () => { + const messages: Message[] = [ + { role: 'user', content: 'Message' } + ]; + + const result = pruneMessages(messages, 0, true); + expect(result.removed).toBe(0); + expect(result.tokensReclaimed).toBe(0); + }); + }); + + describe('truncateMessage', () => { + it('should truncate string content when exceeding max tokens', () => { + const message: Message = { + role: 'user', + content: 'This is a very long message ' + 'x'.repeat(1000) + }; + + const truncated = truncateMessage(message, 50); + + expect(typeof truncated.content).toBe('string'); + expect((truncated.content as string).length).toBeLessThan((message.content as string).length); + expect((truncated.content as string)).toContain('[...truncated'); + }); + + it('should not truncate if under max tokens', () => { + const message: Message = { + role: 'user', + content: 'Short message' + }; + + const truncated = truncateMessage(message, 100); + + expect(truncated).toEqual(message); + }); + + it('should handle multipart content', () => { + const message: Message = { + role: 'user', + content: [ + { type: 'text', text: 'x'.repeat(500) }, + { type: 'image_url', image_url: { url: 'https://example.com/image.png' } } + ] + }; + + const truncated = truncateMessage(message, 50); + + expect(Array.isArray(truncated.content)).toBe(true); + if (Array.isArray(truncated.content)) { + const textPart = truncated.content.find(p => p.type === 'text'); + expect(textPart).toBeDefined(); + } + }); + + it('should handle null content', () => { + const message: Message = { + role: 'user', + content: null + }; + + const truncated = truncateMessage(message, 50); + + expect(truncated.content).toBeNull(); + }); + }); + + describe('groupMessagesByRole', () => { + it('should group messages by role', () => { + const messages: Message[] = [ + { role: 'system', content: 'System' }, + { role: 'user', content: 'User 1' }, + { role: 'assistant', content: 'Assistant 1' }, + { role: 'user', content: 'User 2' }, + { role: 'assistant', content: 'Assistant 2' } + ]; + + const grouped = groupMessagesByRole(messages); + + expect(grouped.system).toHaveLength(1); + expect(grouped.user).toHaveLength(2); + expect(grouped.assistant).toHaveLength(2); + expect(grouped.tool).toHaveLength(0); + }); + + it('should handle empty messages', () => { + const messages: Message[] = []; + const grouped = groupMessagesByRole(messages); + + expect(grouped.system).toHaveLength(0); + expect(grouped.user).toHaveLength(0); + expect(grouped.assistant).toHaveLength(0); + expect(grouped.tool).toHaveLength(0); + }); + + it('should include all role types', () => { + const messages: Message[] = [ + { role: 'system', content: 'System' }, + { role: 'user', content: 'User' }, + { role: 'assistant', content: 'Assistant' }, + { role: 'tool', content: 'Tool result', tool_call_id: '1' } + ]; + + const grouped = groupMessagesByRole(messages); + + expect('system' in grouped).toBe(true); + expect('user' in grouped).toBe(true); + expect('assistant' in grouped).toBe(true); + expect('tool' in grouped).toBe(true); + }); + }); + + describe('getMessageStats', () => { + it('should calculate message statistics', () => { + const messages: Message[] = [ + { role: 'system', content: 'You are helpful' }, + { role: 'user', content: 'Question?' }, + { role: 'assistant', content: 'Answer.' } + ]; + + const stats = getMessageStats(messages); + + expect(stats.totalMessages).toBe(3); + expect(stats.totalTokens).toBeGreaterThan(0); + expect(stats.byRole.system).toBe(1); + expect(stats.byRole.user).toBe(1); + expect(stats.byRole.assistant).toBe(1); + expect(stats.largestMessageTokens).toBeGreaterThan(0); + }); + + it('should handle empty messages', () => { + const messages: Message[] = []; + const stats = getMessageStats(messages); + + expect(stats.totalMessages).toBe(0); + expect(stats.totalTokens).toBe(0); + expect(stats.largestMessageTokens).toBe(0); + }); + + it('should track largest message', () => { + const messages: Message[] = [ + { role: 'user', content: 'Short' }, + { role: 'user', content: 'Much much much much longer message with more content' }, + { role: 'user', content: 'Medium message here' } + ]; + + const stats = getMessageStats(messages); + + expect(stats.largestMessageTokens).toBeGreaterThan(0); + // The second message should have the most tokens + const secondMsgTokens = Math.ceil('Much much much much longer message with more content'.length / 4) + 4; + expect(stats.largestMessageTokens).toBeGreaterThanOrEqual(secondMsgTokens); + }); + + it('should count messages by role correctly', () => { + const messages: Message[] = [ + { role: 'user', content: 'User 1' }, + { role: 'user', content: 'User 2' }, + { role: 'assistant', content: 'Assistant 1' }, + { role: 'tool', content: 'Tool 1', tool_call_id: '1' } + ]; + + const stats = getMessageStats(messages); + + expect(stats.byRole.user).toBe(2); + expect(stats.byRole.assistant).toBe(1); + expect(stats.byRole.tool).toBe(1); + }); + }); +}); diff --git a/packages/toolpack-sdk/tests/unit/context-window-state-manager.test.ts b/packages/toolpack-sdk/tests/unit/context-window-state-manager.test.ts new file mode 100644 index 0000000..37aa876 --- /dev/null +++ b/packages/toolpack-sdk/tests/unit/context-window-state-manager.test.ts @@ -0,0 +1,350 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import { ContextWindowStateManager, createContextWindowStateManager } from '../../src/utils/context-window-state'; +import { ContextWindowConfig } from '../../src/types/index'; + +describe('ContextWindowStateManager', () => { + let manager: ContextWindowStateManager; + const config: ContextWindowConfig = { + enabled: true, + strategy: 'prune', + pruneThreshold: 85, + maxMessageHistoryLength: 50, + outputTokenBuffer: 1.15 + }; + + beforeEach(() => { + manager = new ContextWindowStateManager(config); + }); + + describe('State Creation and Retrieval', () => { + it('should create state for a new conversation', () => { + const state = manager.getOrCreateState('conv-1'); + + expect(state.conversationId).toBe('conv-1'); + expect(state.estimatedTokens).toBe(0); + expect(state.pruneCount).toBe(0); + expect(state.summarizationCount).toBe(0); + expect(state.warningsSent).toBe(0); + }); + + it('should reuse existing state for same conversation', () => { + const state1 = manager.getOrCreateState('conv-1'); + manager.updateTokenCount('conv-1', 100); + + const state2 = manager.getOrCreateState('conv-1'); + + expect(state2.estimatedTokens).toBe(100); + }); + + it('should return undefined for non-existent state', () => { + const state = manager.getState('non-existent'); + + expect(state).toBeUndefined(); + }); + + it('should return existing state when it exists', () => { + manager.getOrCreateState('conv-1'); + const state = manager.getState('conv-1'); + + expect(state).toBeDefined(); + expect(state?.conversationId).toBe('conv-1'); + }); + }); + + describe('Token Count Management', () => { + it('should update token count', () => { + manager.updateTokenCount('conv-1', 500); + const state = manager.getState('conv-1'); + + expect(state?.estimatedTokens).toBe(500); + }); + + it('should update lastUpdated timestamp', () => { + const before = Date.now(); + manager.updateTokenCount('conv-1', 100); + const after = Date.now(); + + const state = manager.getState('conv-1'); + expect(state?.lastUpdated).toBeGreaterThanOrEqual(before); + expect(state?.lastUpdated).toBeLessThanOrEqual(after); + }); + }); + + describe('Prune Operations', () => { + it('should increment prune count', () => { + manager.recordPruneOperation('conv-1', 100); + const state = manager.getState('conv-1'); + + expect(state?.pruneCount).toBe(1); + }); + + it('should record last pruned timestamp', () => { + manager.recordPruneOperation('conv-1', 100); + const state = manager.getState('conv-1'); + + expect(state?.lastPrunedAt).toBeDefined(); + expect(typeof state?.lastPrunedAt).toBe('number'); + }); + + it('should reduce estimated tokens', () => { + manager.updateTokenCount('conv-1', 500); + manager.recordPruneOperation('conv-1', 100); + const state = manager.getState('conv-1'); + + expect(state?.estimatedTokens).toBe(400); + }); + + it('should not allow negative tokens', () => { + manager.updateTokenCount('conv-1', 50); + manager.recordPruneOperation('conv-1', 100); // Try to recover more than available + const state = manager.getState('conv-1'); + + expect(state?.estimatedTokens).toBe(0); + }); + + it('should track multiple prune operations', () => { + manager.recordPruneOperation('conv-1', 50); + manager.recordPruneOperation('conv-1', 50); + manager.recordPruneOperation('conv-1', 50); + const state = manager.getState('conv-1'); + + expect(state?.pruneCount).toBe(3); + }); + }); + + describe('Warning Management', () => { + it('should record warnings', () => { + manager.recordWarning('conv-1'); + const state = manager.getState('conv-1'); + + expect(state?.warningsSent).toBe(1); + }); + + it('should increment warning count on multiple calls', () => { + manager.recordWarning('conv-1'); + manager.recordWarning('conv-1'); + manager.recordWarning('conv-1'); + const state = manager.getState('conv-1'); + + expect(state?.warningsSent).toBe(3); + }); + }); + + describe('Summarization Operations', () => { + it('should increment summarization count', () => { + manager.recordSummarization('conv-1', 100); + const state = manager.getState('conv-1'); + + expect(state?.summarizationCount).toBe(1); + }); + + it('should reduce tokens by saved amount', () => { + manager.updateTokenCount('conv-1', 500); + manager.recordSummarization('conv-1', 150); + const state = manager.getState('conv-1'); + + expect(state?.estimatedTokens).toBe(350); + }); + + it('should track multiple summarizations', () => { + manager.recordSummarization('conv-1', 50); + manager.recordSummarization('conv-1', 50); + const state = manager.getState('conv-1'); + + expect(state?.summarizationCount).toBe(2); + }); + }); + + describe('State Querying', () => { + it('should get all states', () => { + manager.getOrCreateState('conv-1'); + manager.getOrCreateState('conv-2'); + manager.getOrCreateState('conv-3'); + + const allStates = manager.getAllStates(); + + expect(allStates).toHaveLength(3); + }); + + it('should get statistics for a conversation', () => { + manager.getOrCreateState('conv-1'); + manager.updateTokenCount('conv-1', 500); + manager.recordPruneOperation('conv-1', 100); + + const stats = manager.getStatistics('conv-1'); + + expect(stats).toBeDefined(); + expect(stats?.currentTokens).toBe(400); + expect(stats?.pruneCount).toBe(1); + expect(stats?.contextWindowPercentage).toBeGreaterThan(0); + }); + + it('should return null statistics for non-existent conversation', () => { + const stats = manager.getStatistics('non-existent'); + + expect(stats).toBeNull(); + }); + + it('should identify conversations at risk', () => { + manager.updateTokenCount('conv-1', 85000); // 85% of 100k + manager.updateTokenCount('conv-2', 50000); // 50% of 100k + + const atRisk = manager.getAtRiskConversations(80); + + expect(atRisk.some(s => s.conversationId === 'conv-1')).toBe(true); + expect(atRisk.some(s => s.conversationId === 'conv-2')).toBe(false); + }); + + it('should identify conversations exceeding threshold', () => { + manager.updateTokenCount('conv-1', 150000); // Exceeds default 100k + + const exceeded = manager.getExceedingThreshold(); + + expect(exceeded.some(s => s.conversationId === 'conv-1')).toBe(true); + }); + }); + + describe('State Management', () => { + it('should delete state for a conversation', () => { + manager.getOrCreateState('conv-1'); + const deleted = manager.deleteState('conv-1'); + + expect(deleted).toBe(true); + expect(manager.getState('conv-1')).toBeUndefined(); + }); + + it('should return false when deleting non-existent state', () => { + const deleted = manager.deleteState('non-existent'); + + expect(deleted).toBe(false); + }); + + it('should clear all states', () => { + manager.getOrCreateState('conv-1'); + manager.getOrCreateState('conv-2'); + + manager.clearAllStates(); + + expect(manager.getAllStates()).toHaveLength(0); + }); + + it('should prune inactive conversations', () => { + manager.getOrCreateState('conv-1'); + manager.getOrCreateState('conv-2'); + + // Manually set old timestamps + const state1 = manager.getState('conv-1'); + const state2 = manager.getState('conv-2'); + if (state1) state1.lastUpdated = Date.now() - (65 * 60 * 1000); // 65 minutes ago + if (state2) state2.lastUpdated = Date.now(); + + const pruned = manager.pruneInactiveConversations(60); + + expect(pruned).toContain('conv-1'); + expect(pruned).not.toContain('conv-2'); + expect(manager.getState('conv-1')).toBeUndefined(); + expect(manager.getState('conv-2')).toBeDefined(); + }); + }); + + describe('Reporting and Export', () => { + it('should generate a report', () => { + manager.getOrCreateState('conv-1'); + manager.updateTokenCount('conv-1', 500); + + const report = manager.generateReport(); + + expect(report).toContain('Context Window State Report'); + expect(report).toContain('conv-1'); + expect(report).toContain('500'); + }); + + it('should handle empty state report', () => { + const report = manager.generateReport(); + + expect(report).toContain('No conversations tracked yet'); + }); + + it('should export state as JSON', () => { + manager.getOrCreateState('conv-1'); + manager.updateTokenCount('conv-1', 500); + + const exported = manager.export(); + + expect(exported['conv-1']).toBeDefined(); + expect(exported['conv-1'].conversationId).toBe('conv-1'); + expect(exported['conv-1'].estimatedTokens).toBe(500); + }); + + it('should import state from JSON', () => { + const importData = { + 'conv-imported': { + conversationId: 'conv-imported', + estimatedTokens: 1000, + lastUpdated: Date.now(), + pruneCount: 5, + summarizationCount: 2, + warningsSent: 1 + } + }; + + manager.import(importData); + + const state = manager.getState('conv-imported'); + expect(state?.estimatedTokens).toBe(1000); + expect(state?.pruneCount).toBe(5); + }); + }); + + describe('Memory Usage', () => { + it('should report memory usage', () => { + manager.getOrCreateState('conv-1'); + manager.getOrCreateState('conv-2'); + + const usage = manager.getMemoryUsage(); + + expect(usage.conversationCount).toBe(2); + expect(usage.approximateByteSize).toBeGreaterThan(0); + }); + }); + + describe('Integrity Validation', () => { + it('should validate integrity of states', () => { + manager.getOrCreateState('conv-1'); + + const validation = manager.validateIntegrity(); + + expect(validation.isValid).toBe(true); + expect(validation.issues).toHaveLength(0); + }); + + it('should detect invalid token counts', () => { + const state = manager.getOrCreateState('conv-1'); + state.estimatedTokens = -1; + + const validation = manager.validateIntegrity(); + + expect(validation.isValid).toBe(false); + expect(validation.issues.some(i => i.includes('Negative token count'))).toBe(true); + }); + + it('should detect invalid counts', () => { + const state = manager.getOrCreateState('conv-1'); + state.pruneCount = -5; + + const validation = manager.validateIntegrity(); + + expect(validation.isValid).toBe(false); + expect(validation.issues.some(i => i.includes('prune count'))).toBe(true); + }); + }); + + describe('Factory Function', () => { + it('should create manager via factory function', () => { + const mgr = createContextWindowStateManager(config); + + expect(mgr).toBeInstanceOf(ContextWindowStateManager); + expect(mgr.getOrCreateState('test')).toBeDefined(); + }); + }); +}); diff --git a/packages/toolpack-sdk/tests/unit/context-window-token-counter.test.ts b/packages/toolpack-sdk/tests/unit/context-window-token-counter.test.ts new file mode 100644 index 0000000..cd25ef1 --- /dev/null +++ b/packages/toolpack-sdk/tests/unit/context-window-token-counter.test.ts @@ -0,0 +1,189 @@ +import { describe, it, expect } from 'vitest'; +import { estimateTokenCount, getContextWindowPercentage, getSafeOutputReserve, wouldExceedContextWindow } from '../../src/utils/token-counter'; +import { Message } from '../../src/types/index'; + +describe('Token Counter Utilities', () => { + describe('estimateTokenCount', () => { + it('should estimate tokens from simple text content', () => { + const messages: Message[] = [ + { role: 'user', content: 'Hello world' } + ]; + const tokens = estimateTokenCount(messages); + expect(tokens).toBeGreaterThan(0); + expect(tokens).toBeLessThan(100); + }); + + it('should handle empty messages', () => { + const messages: Message[] = []; + const tokens = estimateTokenCount(messages); + expect(tokens).toBe(0); + }); + + it('should estimate tokens from system messages', () => { + const messages: Message[] = [ + { role: 'system', content: 'You are a helpful assistant' }, + { role: 'user', content: 'How are you?' } + ]; + const tokens = estimateTokenCount(messages); + expect(tokens).toBeGreaterThan(0); + }); + + it('should handle multipart content with text', () => { + const messages: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'Hello world' }] } + ]; + const tokens = estimateTokenCount(messages); + expect(tokens).toBeGreaterThan(0); + }); + + it('should estimate image tokens', () => { + const messages: Message[] = [ + { + role: 'user', content: [ + { type: 'text', text: 'What is this?' }, + { type: 'image_url', image_url: { url: 'https://example.com/image.png' } } + ] + } + ]; + const tokens = estimateTokenCount(messages); + // Should include text tokens + image estimate (~1000 chars) + expect(tokens).toBeGreaterThan(250); + }); + + it('should handle tool calls in messages', () => { + const messages: Message[] = [ + { + role: 'assistant', + content: 'I will help', + tool_calls: [ + { + id: 'call-1', + type: 'function', + function: { name: 'search', arguments: JSON.stringify({ query: 'example' }) } + } + ] + } + ]; + const tokens = estimateTokenCount(messages); + expect(tokens).toBeGreaterThan(0); + }); + + it('should handle tool response messages', () => { + const messages: Message[] = [ + { + role: 'tool', + content: 'Search result', + tool_call_id: 'call-1' + } + ]; + const tokens = estimateTokenCount(messages); + expect(tokens).toBeGreaterThan(0); + }); + + it('should increase linearly with message count', () => { + const message: Message = { role: 'user', content: 'Test message' }; + const tokens1 = estimateTokenCount([message]); + const tokens2 = estimateTokenCount([message, message]); + const tokens3 = estimateTokenCount([message, message, message]); + + expect(tokens2).toBeGreaterThan(tokens1); + expect(tokens3).toBeGreaterThan(tokens2); + }); + }); + + describe('wouldExceedContextWindow', () => { + it('should detect when context window would be exceeded', () => { + const contextWindow = 100; + const maxOutputTokens = 20; + const currentTokens = 85; // Would exceed: 85 + 20 > 100 + + expect(wouldExceedContextWindow(currentTokens, contextWindow, maxOutputTokens)).toBe(true); + }); + + it('should detect when context window would not be exceeded', () => { + const contextWindow = 100; + const maxOutputTokens = 20; + const currentTokens = 70; // Would not exceed: 70 + 20 < 100 + + expect(wouldExceedContextWindow(currentTokens, contextWindow, maxOutputTokens)).toBe(false); + }); + + it('should handle exact boundary', () => { + const contextWindow = 100; + const maxOutputTokens = 20; + const currentTokens = 80; // Exact: 80 + 20 = 100 + + expect(wouldExceedContextWindow(currentTokens, contextWindow, maxOutputTokens)).toBe(false); + }); + }); + + describe('getContextWindowPercentage', () => { + it('should calculate percentage correctly', () => { + const contextWindow = 100; + const currentTokens = 50; + const percentage = getContextWindowPercentage(currentTokens, contextWindow); + + expect(percentage).toBe(50); + }); + + it('should round to nearest integer', () => { + const contextWindow = 100; + const currentTokens = 33; + const percentage = getContextWindowPercentage(currentTokens, contextWindow); + + expect(percentage).toBe(33); + }); + + it('should handle zero tokens', () => { + const contextWindow = 100; + const currentTokens = 0; + const percentage = getContextWindowPercentage(currentTokens, contextWindow); + + expect(percentage).toBe(0); + }); + + it('should handle full context window', () => { + const contextWindow = 100; + const currentTokens = 100; + const percentage = getContextWindowPercentage(currentTokens, contextWindow); + + expect(percentage).toBe(100); + }); + }); + + describe('getSafeOutputReserve', () => { + it('should apply default buffer percentage', () => { + const maxOutputTokens = 100; + const reserve = getSafeOutputReserve(maxOutputTokens); + + // Default buffer is 1.15 (15%) + expect(reserve).toBe(Math.ceil(100 * 1.15)); + expect(reserve).toBe(115); + }); + + it('should apply custom buffer percentage', () => { + const maxOutputTokens = 100; + const bufferPercentage = 1.25; + const reserve = getSafeOutputReserve(maxOutputTokens, bufferPercentage); + + expect(reserve).toBe(Math.ceil(100 * 1.25)); + expect(reserve).toBe(125); + }); + + it('should handle zero buffer', () => { + const maxOutputTokens = 100; + const bufferPercentage = 1.0; + const reserve = getSafeOutputReserve(maxOutputTokens, bufferPercentage); + + expect(reserve).toBe(100); + }); + + it('should round up fractional results', () => { + const maxOutputTokens = 97; + const bufferPercentage = 1.15; + const reserve = getSafeOutputReserve(maxOutputTokens, bufferPercentage); + + expect(reserve).toBe(112); // ceil(97 * 1.15) = ceil(111.55) + }); + }); +});