diff --git a/.env.example b/.env.example index 8f9c917..644358f 100644 --- a/.env.example +++ b/.env.example @@ -294,6 +294,12 @@ TOKEN_BUDGET_WARNING=100000 TOKEN_BUDGET_MAX=180000 TOKEN_BUDGET_ENFORCEMENT=true +# TOON JSON->TOON prompt compression (opt-in; for large structured JSON context) +TOON_ENABLED=false +TOON_MIN_BYTES=4096 +TOON_FAIL_OPEN=true +TOON_LOG_STATS=true + # ============================================================================== # Smart Tool Selection (Advanced Token Optimization) # ============================================================================== diff --git a/docs/toon-integration-spec.md b/docs/toon-integration-spec.md new file mode 100644 index 0000000..87da40f --- /dev/null +++ b/docs/toon-integration-spec.md @@ -0,0 +1,130 @@ +# TOON Integration Spec (Lynkr Spike) + +Date: 2026-02-17 +Branch: `codex/toon-integration-spike` +Status: Implemented behind flags (`TOON_ENABLED=false` by default). + +## 1) Goal + +Reduce prompt token usage for large structured JSON context while preserving current Lynkr routing, tool execution semantics, and reliability. + +## 2) Non-Goals + +1. Do not replace Lynkr routing/fallback logic. +2. Do not change MCP/tool protocol behavior. +3. Do not change provider request envelope formats. +4. Do not require TOON for normal operation. + +## 3) Integration Strategy (Minimal, Reversible) + +1. Add a TOON adapter module (encode-only for prompt context). +2. Apply TOON only to eligible large JSON blobs before they are inserted into model-visible context. +3. Keep original JSON in memory/session for execution and audit; only prompt copy is compressed. +4. Fail open: if TOON conversion fails, send original JSON unchanged. + +## 4) What We Will Compress + +Eligible inputs (all required): + +1. Payload is valid JSON object/array. +2. Payload size exceeds threshold (for example, `TOON_MIN_BYTES`). +3. Payload is read-only context for model comprehension (not protocol-critical). + +Primary targets: + +1. Large tool output summaries inserted into prompt context. +2. Large search/result payloads injected for reasoning. +3. Structured data snapshots used for analysis tasks. + +## 5) What We Will Never Compress + +Hard exclusions: + +1. Tool schemas/definitions (`tools`, `input_schema`, function signatures). +2. Tool call argument payloads that are executed by systems. +3. Provider request envelopes (`/v1/messages`, `/chat/completions` body schema fields). +4. Protocol control fields (roles, stop reasons, tool IDs, request IDs). +5. Stored canonical session payloads used for replay/debug/audit. + +Rule: if a payload is machine-validated/executed downstream, keep JSON. + +## 6) Config Flags (Default Safe) + +Proposed env flags: + +1. `TOON_ENABLED=false` (default off) +2. `TOON_MIN_BYTES=4096` (only convert larger payloads) +3. `TOON_FAIL_OPEN=true` (fallback to JSON on any TOON error) +4. `TOON_LOG_STATS=true` (log before/after token estimate for observability) + +## 7) Verification Gates + +Before enabling: + +1. Existing unit tests pass unchanged. +2. Existing MCP smoke passes (`find_tool`/`call_tool` path). + +With `TOON_ENABLED=true`: + +1. Prompt A/B benchmark still passes functionally. +2. No regression in Task/subagent behavior. +3. Data-heavy prompt shows token reduction vs baseline. +4. No increase in protocol/tool-call errors. + +## 8) Rollback Rules + +Immediate rollback: + +1. Set `TOON_ENABLED=false`. +2. Restart Lynkr service. + +Code rollback: + +1. Revert TOON integration commit(s) on this branch. +2. Re-run unit + MCP smoke gates. + +## 9) Risks and Mitigations + +1. Risk: semantic drift from transformed payloads. + - Mitigation: apply only to read-only context, fail-open on error, keep canonical JSON. +2. Risk: negligible gains on non-tabular/deeply nested payloads. + - Mitigation: threshold + eligibility checks; skip low-value payloads. +3. Risk: harder debugging. + - Mitigation: log conversion stats and keep original payload for diagnostics. + +## 10) Stock Provider Validation (Ollama Cloud) + +Date: 2026-02-17 + +Runtime under test: + +1. `MODEL_PROVIDER=ollama` +2. `OLLAMA_ENDPOINT=http://127.0.0.1:11434` +3. `OLLAMA_MODEL=glm-5:cloud` +4. `TOON_MIN_BYTES=256` +5. `TOON_FAIL_OPEN=true` +6. `TOON_LOG_STATS=true` + +Probe used: + +1. Send a two-message request where the second message is a large JSON blob. +2. Ask model to classify the next message as `JSON` vs `OTHER` based on first character. +3. Run once with `TOON_ENABLED=false`, once with `TOON_ENABLED=true`. + +Observed results: + +1. `TOON_ENABLED=false` + - Reply: `JSON` + - Provider header: `x-lynkr-provider: ollama` + - TOON log entries: `0` +2. `TOON_ENABLED=true` + - Reply: `OTHER` + - Provider header: `x-lynkr-provider: ollama` + - TOON log entries: `1` + - Logged conversion stats: `originalBytes=6416`, `compressedBytes=5854` (saved `562` bytes, `8.76%`) + +Conclusion: + +1. TOON gating works on stock Ollama cloud path (not moonshot-specific). +2. Compression is applied only when flag-enabled. +3. Provider routing remains unchanged (`ollama`) during TOON transformation. diff --git a/package-lock.json b/package-lock.json index 3406514..2befc91 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,21 +1,20 @@ { "name": "lynkr", - "version": "7.0.1", + "version": "7.2.5", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lynkr", - "version": "7.0.1", + "version": "7.2.5", "license": "Apache-2.0", "dependencies": { "@azure/openai": "^2.0.0", "@babel/parser": "^7.29.0", "@babel/traverse": "^7.29.0", - "better-sqlite3": "^12.6.2", + "@toon-format/toon": "^2.1.0", "compression": "^1.7.4", "diff": "^5.2.0", - "dockerode": "^4.0.2", "dotenv": "^16.4.5", "express": "^5.1.0", "express-rate-limit": "^8.2.1", @@ -39,6 +38,8 @@ "node": ">=20.0.0" }, "optionalDependencies": { + "better-sqlite3": "^12.6.2", + "dockerode": "^4.0.2", "tree-sitter": "^0.21.1", "tree-sitter-javascript": "^0.21.0", "tree-sitter-python": "^0.21.0", @@ -302,7 +303,8 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/@balena/dockerignore/-/dockerignore-1.0.2.tgz", "integrity": "sha512-wMue2Sy4GAVTk6Ic4tJVcnfdau+gx2EnG7S+uAEe+TWJFqE4YoWN4/H8MSLj4eYJKxGg26lZwboEniNiNwZQ6Q==", - "license": "Apache-2.0" + "license": "Apache-2.0", + "optional": true }, "node_modules/@eslint-community/eslint-utils": { "version": "4.9.1", @@ -397,6 +399,7 @@ "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz", "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==", "license": "Apache-2.0", + "optional": true, "dependencies": { "@grpc/proto-loader": "^0.8.0", "@js-sdsl/ordered-map": "^4.4.2" @@ -410,6 +413,7 @@ "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.8.0.tgz", "integrity": "sha512-rc1hOQtjIWGxcxpb9aHAfLpIctjEnsDehj0DAiVfBlmT84uvR0uUtN2hEi/ecvWVjXUGf5qPF4qEgiLOx1YIMQ==", "license": "Apache-2.0", + "optional": true, "dependencies": { "lodash.camelcase": "^4.3.0", "long": "^5.0.0", @@ -428,6 +432,7 @@ "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.15.tgz", "integrity": "sha512-tMXdRCfYVixjuFK+Hk0Q1s38gV9zDiDJfWL3h1rv4Qc39oILCu1TRTDt7+fGUI8K4G1Fj125Hx/ru3azECWTyQ==", "license": "Apache-2.0", + "optional": true, "dependencies": { "lodash.camelcase": "^4.3.0", "long": "^5.0.0", @@ -544,6 +549,7 @@ "resolved": "https://registry.npmjs.org/@js-sdsl/ordered-map/-/ordered-map-4.4.2.tgz", "integrity": "sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==", "license": "MIT", + "optional": true, "funding": { "type": "opencollective", "url": "https://opencollective.com/js-sdsl" @@ -588,31 +594,36 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true }, "node_modules/@protobufjs/base64": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true }, "node_modules/@protobufjs/codegen": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true }, "node_modules/@protobufjs/eventemitter": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true }, "node_modules/@protobufjs/fetch": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", "license": "BSD-3-Clause", + "optional": true, "dependencies": { "@protobufjs/aspromise": "^1.1.1", "@protobufjs/inquire": "^1.1.0" @@ -622,37 +633,49 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true }, "node_modules/@protobufjs/inquire": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true }, "node_modules/@protobufjs/path": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true }, "node_modules/@protobufjs/pool": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true }, "node_modules/@protobufjs/utf8": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "optional": true + }, + "node_modules/@toon-format/toon": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@toon-format/toon/-/toon-2.1.0.tgz", + "integrity": "sha512-JwWptdF5eOA0HaQxbKAzkpQtR4wSWTEfDlEy/y3/4okmOAX1qwnpLZMmtEWr+ncAhTTY1raCKH0kteHhSXnQqg==", + "license": "MIT" }, "node_modules/@types/node": { "version": "25.2.2", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.2.tgz", "integrity": "sha512-BkmoP5/FhRYek5izySdkOneRyXYN35I860MFAGupTdebyE66uZaR+bXLHq8k4DirE5DwQi3NuhvRU1jqTVwUrQ==", "license": "MIT", + "optional": true, "dependencies": { "undici-types": "~7.16.0" } @@ -765,6 +788,7 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "devOptional": true, "license": "MIT", "engines": { "node": ">=8" @@ -774,6 +798,7 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "devOptional": true, "license": "MIT", "dependencies": { "color-convert": "^2.0.1" @@ -810,6 +835,7 @@ "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.6.tgz", "integrity": "sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==", "license": "MIT", + "optional": true, "dependencies": { "safer-buffer": "~2.1.0" } @@ -855,6 +881,7 @@ "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", "integrity": "sha512-qeFIXtP4MSoi6NLqO12WfqARWWuCKi2Rn/9hJLEmtB5yTNr9DqFWkJRCf2qShWzPeAMRnOgCrq0sg/KLv5ES9w==", "license": "BSD-3-Clause", + "optional": true, "dependencies": { "tweetnacl": "^0.14.3" } @@ -865,6 +892,7 @@ "integrity": "sha512-8VYKM3MjCa9WcaSAI3hzwhmyHVlH8tiGFwf0RlTsZPWJ1I5MkzjiudCo4KC4DxOaL/53A5B1sI/IbldNFDbsKA==", "hasInstallScript": true, "license": "MIT", + "optional": true, "dependencies": { "bindings": "^1.5.0", "prebuild-install": "^7.1.1" @@ -891,6 +919,7 @@ "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", "license": "MIT", + "optional": true, "dependencies": { "file-uri-to-path": "1.0.0" } @@ -900,6 +929,7 @@ "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", "license": "MIT", + "optional": true, "dependencies": { "buffer": "^5.5.0", "inherits": "^2.0.4", @@ -995,6 +1025,7 @@ } ], "license": "MIT", + "optional": true, "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.1.13" @@ -1116,13 +1147,15 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", - "license": "ISC" + "license": "ISC", + "optional": true }, "node_modules/cliui": { "version": "8.0.1", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", "license": "ISC", + "optional": true, "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", @@ -1136,6 +1169,7 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "devOptional": true, "license": "MIT", "dependencies": { "color-name": "~1.1.4" @@ -1148,6 +1182,7 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "devOptional": true, "license": "MIT" }, "node_modules/colorette": { @@ -1287,6 +1322,7 @@ "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", "license": "MIT", + "optional": true, "dependencies": { "mimic-response": "^3.1.0" }, @@ -1302,6 +1338,7 @@ "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", "license": "MIT", + "optional": true, "engines": { "node": ">=4.0.0" } @@ -1327,6 +1364,7 @@ "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", "license": "Apache-2.0", + "optional": true, "engines": { "node": ">=8" } @@ -1345,6 +1383,7 @@ "resolved": "https://registry.npmjs.org/docker-modem/-/docker-modem-5.0.6.tgz", "integrity": "sha512-ens7BiayssQz/uAxGzH8zGXCtiV24rRWXdjNha5V4zSOcxmAZsfGVm/PPFbwQdqEkDnhG+SyR9E3zSHUbOKXBQ==", "license": "Apache-2.0", + "optional": true, "dependencies": { "debug": "^4.1.1", "readable-stream": "^3.5.0", @@ -1360,6 +1399,7 @@ "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", "license": "MIT", + "optional": true, "dependencies": { "ms": "^2.1.3" }, @@ -1376,13 +1416,15 @@ "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/dockerode": { "version": "4.0.9", "resolved": "https://registry.npmjs.org/dockerode/-/dockerode-4.0.9.tgz", "integrity": "sha512-iND4mcOWhPaCNh54WmK/KoSb35AFqPAUWFMffTQcp52uQt36b5uNwEJTSXntJZBbeGad72Crbi/hvDIv6us/6Q==", "license": "Apache-2.0", + "optional": true, "dependencies": { "@balena/dockerignore": "^1.0.2", "@grpc/grpc-js": "^1.11.1", @@ -1445,7 +1487,8 @@ "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/encodeurl": { "version": "2.0.0", @@ -1460,6 +1503,7 @@ "version": "1.4.5", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "devOptional": true, "license": "MIT", "dependencies": { "once": "^1.4.0" @@ -1500,6 +1544,7 @@ "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", "license": "MIT", + "optional": true, "engines": { "node": ">=6" } @@ -1731,6 +1776,7 @@ "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", "license": "(MIT OR WTFPL)", + "optional": true, "engines": { "node": ">=6" } @@ -1917,7 +1963,8 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/fill-range": { "version": "7.1.1", @@ -2036,7 +2083,8 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/fs.realpath": { "version": "1.0.0", @@ -2119,7 +2167,8 @@ "version": "0.0.0", "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/glob": { "version": "7.2.3", @@ -2426,7 +2475,8 @@ "version": "1.3.8", "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", - "license": "ISC" + "license": "ISC", + "optional": true }, "node_modules/ip-address": { "version": "10.0.1", @@ -2473,6 +2523,7 @@ "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", "license": "MIT", + "optional": true, "engines": { "node": ">=8" } @@ -2626,7 +2677,8 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/lodash.merge": { "version": "4.6.2", @@ -2639,7 +2691,8 @@ "version": "5.3.2", "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz", "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==", - "license": "Apache-2.0" + "license": "Apache-2.0", + "optional": true }, "node_modules/math-intrinsics": { "version": "1.1.0", @@ -2723,6 +2776,7 @@ "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", "license": "MIT", + "optional": true, "engines": { "node": ">=10" }, @@ -2747,6 +2801,7 @@ "version": "1.2.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "devOptional": true, "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" @@ -2756,7 +2811,8 @@ "version": "0.5.3", "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/ms": { "version": "2.0.0", @@ -2775,7 +2831,8 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/natural-compare": { "version": "1.4.0", @@ -2798,6 +2855,7 @@ "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.87.0.tgz", "integrity": "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ==", "license": "MIT", + "optional": true, "dependencies": { "semver": "^7.3.5" }, @@ -3279,6 +3337,7 @@ "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", "license": "MIT", + "optional": true, "dependencies": { "detect-libc": "^2.0.0", "expand-template": "^2.0.3", @@ -3331,6 +3390,7 @@ "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==", "hasInstallScript": true, "license": "BSD-3-Clause", + "optional": true, "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", @@ -3373,6 +3433,7 @@ "version": "3.0.3", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz", "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==", + "devOptional": true, "license": "MIT", "dependencies": { "end-of-stream": "^1.1.0", @@ -3459,6 +3520,7 @@ "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", + "optional": true, "dependencies": { "deep-extend": "^0.6.0", "ini": "~1.3.0", @@ -3474,6 +3536,7 @@ "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", "license": "MIT", + "optional": true, "engines": { "node": ">=0.10.0" } @@ -3483,6 +3546,7 @@ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", "license": "MIT", + "optional": true, "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", @@ -3519,6 +3583,7 @@ "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", "license": "MIT", + "optional": true, "engines": { "node": ">=0.10.0" } @@ -3668,6 +3733,7 @@ "version": "7.7.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "devOptional": true, "license": "ISC", "bin": { "semver": "bin/semver.js" @@ -3863,7 +3929,8 @@ "url": "https://feross.org/support" } ], - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/simple-get": { "version": "4.0.1", @@ -3884,6 +3951,7 @@ } ], "license": "MIT", + "optional": true, "dependencies": { "decompress-response": "^6.0.0", "once": "^1.3.1", @@ -3916,7 +3984,8 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/split-ca/-/split-ca-1.0.1.tgz", "integrity": "sha512-Q5thBSxp5t8WPTTJQS59LrGqOZqOsrhDGDVm8azCqIBjSBd7nd9o2PM+mDulQQkh8h//4U6hFZnc/mul8t5pWQ==", - "license": "ISC" + "license": "ISC", + "optional": true }, "node_modules/split2": { "version": "4.2.0", @@ -3932,6 +4001,7 @@ "resolved": "https://registry.npmjs.org/ssh2/-/ssh2-1.17.0.tgz", "integrity": "sha512-wPldCk3asibAjQ/kziWQQt1Wh3PgDFpC0XpwclzKcdT1vql6KeYxf5LIt4nlFkUeR8WuphYMKqUA56X4rjbfgQ==", "hasInstallScript": true, + "optional": true, "dependencies": { "asn1": "^0.2.6", "bcrypt-pbkdf": "^1.0.2" @@ -3967,6 +4037,7 @@ "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", "license": "MIT", + "optional": true, "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", @@ -3980,6 +4051,7 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "devOptional": true, "license": "MIT", "dependencies": { "ansi-regex": "^5.0.1" @@ -4019,6 +4091,7 @@ "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", "license": "MIT", + "optional": true, "dependencies": { "chownr": "^1.1.1", "mkdirp-classic": "^0.5.2", @@ -4031,6 +4104,7 @@ "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", "license": "MIT", + "optional": true, "dependencies": { "bl": "^4.0.3", "end-of-stream": "^1.4.1", @@ -4179,6 +4253,7 @@ "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", "license": "Apache-2.0", + "optional": true, "dependencies": { "safe-buffer": "^5.0.1" }, @@ -4190,7 +4265,8 @@ "version": "0.14.5", "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", "integrity": "sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==", - "license": "Unlicense" + "license": "Unlicense", + "optional": true }, "node_modules/type-check": { "version": "0.4.0", @@ -4252,7 +4328,8 @@ "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/unpipe": { "version": "1.0.0", @@ -4277,7 +4354,8 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "license": "MIT" + "license": "MIT", + "optional": true }, "node_modules/uuid": { "version": "10.0.0", @@ -4288,6 +4366,7 @@ "https://github.com/sponsors/ctavan" ], "license": "MIT", + "optional": true, "bin": { "uuid": "dist/bin/uuid" } @@ -4332,6 +4411,7 @@ "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", "license": "MIT", + "optional": true, "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", @@ -4355,6 +4435,7 @@ "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", "license": "ISC", + "optional": true, "engines": { "node": ">=10" } @@ -4364,6 +4445,7 @@ "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", "license": "MIT", + "optional": true, "dependencies": { "cliui": "^8.0.1", "escalade": "^3.1.1", @@ -4382,6 +4464,7 @@ "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", "license": "ISC", + "optional": true, "engines": { "node": ">=12" } diff --git a/package.json b/package.json index 73e6797..0590bab 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,7 @@ "dev": "nodemon index.js", "lint": "eslint src index.js", "test": "npm run test:unit && npm run test:performance", - "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js", + "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js", "test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js", "test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js", "test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js", @@ -47,6 +47,7 @@ "@azure/openai": "^2.0.0", "@babel/parser": "^7.29.0", "@babel/traverse": "^7.29.0", + "@toon-format/toon": "^2.1.0", "compression": "^1.7.4", "diff": "^5.2.0", "dotenv": "^16.4.5", diff --git a/src/budget/index.js b/src/budget/index.js index 2d05b79..5bcaadd 100644 --- a/src/budget/index.js +++ b/src/budget/index.js @@ -11,13 +11,14 @@ const logger = require('../logger'); class BudgetManager { constructor(options = {}) { this.enabled = options.enabled !== false; + let dbPath = null; if (!this.enabled || !Database) { this.enabled = false; return; } try { - const dbPath = path.join(process.cwd(), 'data', 'budgets.db'); + dbPath = path.join(process.cwd(), 'data', 'budgets.db'); const dbDir = path.dirname(dbPath); if (!fs.existsSync(dbDir)) { diff --git a/src/config/index.js b/src/config/index.js index 466585d..d75d045 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -204,6 +204,12 @@ const tokenBudgetWarning = Number.parseInt(process.env.TOKEN_BUDGET_WARNING ?? " const tokenBudgetMax = Number.parseInt(process.env.TOKEN_BUDGET_MAX ?? "180000", 10); const tokenBudgetEnforcement = process.env.TOKEN_BUDGET_ENFORCEMENT !== "false"; // default true +// TOON payload compression (opt-in) +const toonEnabled = process.env.TOON_ENABLED === "true"; // default false +const toonMinBytes = Number.parseInt(process.env.TOON_MIN_BYTES ?? "4096", 10); +const toonFailOpen = process.env.TOON_FAIL_OPEN !== "false"; // default true +const toonLogStats = process.env.TOON_LOG_STATS !== "false"; // default true + // Smart tool selection configuration (always enabled) const smartToolSelectionMode = (process.env.SMART_TOOL_SELECTION_MODE ?? "heuristic").toLowerCase(); const smartToolSelectionTokenBudget = Number.parseInt( @@ -765,6 +771,12 @@ var config = { max: tokenBudgetMax, enforcement: tokenBudgetEnforcement, }, + toon: { + enabled: toonEnabled, + minBytes: Number.isNaN(toonMinBytes) ? 4096 : toonMinBytes, + failOpen: toonFailOpen, + logStats: toonLogStats, + }, smartToolSelection: { enabled: true, // HARDCODED - always enabled mode: smartToolSelectionMode, @@ -892,6 +904,12 @@ function reloadConfig() { config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase(); config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim(); + config.toon.enabled = process.env.TOON_ENABLED === "true"; + const newToonMinBytes = Number.parseInt(process.env.TOON_MIN_BYTES ?? "4096", 10); + config.toon.minBytes = Number.isNaN(newToonMinBytes) ? 4096 : newToonMinBytes; + config.toon.failOpen = process.env.TOON_FAIL_OPEN !== "false"; + config.toon.logStats = process.env.TOON_LOG_STATS !== "false"; + // Log level config.logger.level = process.env.LOG_LEVEL ?? "info"; diff --git a/src/context/toon.js b/src/context/toon.js new file mode 100644 index 0000000..4e34b06 --- /dev/null +++ b/src/context/toon.js @@ -0,0 +1,173 @@ +const logger = require("../logger"); + +let cachedEncode; +let cachedLoadError; +let warnedMissingDependency = false; + +function normaliseSettings(settings = {}) { + const minBytesRaw = + typeof settings.minBytes === "number" ? settings.minBytes : Number.parseInt(settings.minBytes ?? "4096", 10); + return { + enabled: settings.enabled === true, + minBytes: Number.isFinite(minBytesRaw) && minBytesRaw > 0 ? minBytesRaw : 4096, + failOpen: settings.failOpen !== false, + logStats: settings.logStats !== false, + }; +} + +function resolveEncodeFn(overrideEncode) { + if (typeof overrideEncode === "function") return overrideEncode; + if (cachedEncode !== undefined) return cachedEncode; + try { + const toon = require("@toon-format/toon"); + cachedEncode = typeof toon?.encode === "function" ? toon.encode : null; + cachedLoadError = cachedEncode ? null : new Error("Missing encode() export from @toon-format/toon"); + } catch (err) { + cachedEncode = null; + cachedLoadError = err; + } + return cachedEncode; +} + +function looksLikeJsonObjectOrArray(text) { + if (typeof text !== "string") return false; + const trimmed = text.trim(); + if (trimmed.length < 2) return false; + return ( + (trimmed.startsWith("{") && trimmed.endsWith("}")) || + (trimmed.startsWith("[") && trimmed.endsWith("]")) + ); +} + +function safeJsonParse(text) { + try { + return JSON.parse(text); + } catch { + return null; + } +} + +function toToonString(encodeFn, value) { + const encoded = encodeFn(value); + if (typeof encoded === "string") return encoded; + if (encoded && typeof encoded[Symbol.iterator] === "function") { + return Array.from(encoded).join("\n"); + } + return ""; +} + +function compressStringContent(content, cfg, encodeFn, stats) { + if (typeof content !== "string") return content; + + const originalBytes = Buffer.byteLength(content, "utf8"); + if (originalBytes < cfg.minBytes) { + stats.skippedBySize += 1; + return content; + } + + stats.candidateCount += 1; + if (!looksLikeJsonObjectOrArray(content)) { + stats.skippedByShape += 1; + return content; + } + + const parsed = safeJsonParse(content); + if (!parsed || typeof parsed !== "object") { + stats.skippedByParse += 1; + return content; + } + + const toonText = toToonString(encodeFn, parsed); + if (typeof toonText !== "string" || toonText.trim().length === 0) { + return content; + } + + const compressedBytes = Buffer.byteLength(toonText, "utf8"); + stats.convertedCount += 1; + stats.originalBytes += originalBytes; + stats.compressedBytes += compressedBytes; + return toonText; +} + +function applyToonCompression(payload, settings = {}, options = {}) { + const cfg = normaliseSettings(settings); + const stats = { + enabled: cfg.enabled, + available: true, + convertedCount: 0, + candidateCount: 0, + skippedBySize: 0, + skippedByShape: 0, + skippedByParse: 0, + failureCount: 0, + originalBytes: 0, + compressedBytes: 0, + }; + + if (!cfg.enabled) return { payload, stats }; + if (!payload || !Array.isArray(payload.messages) || payload.messages.length === 0) { + return { payload, stats }; + } + + const encodeFn = resolveEncodeFn(options.encode); + if (typeof encodeFn !== "function") { + stats.available = false; + const err = cachedLoadError ?? new Error("TOON encoder unavailable"); + if (!cfg.failOpen) throw err; + if (!warnedMissingDependency) { + logger.warn( + { error: err.message }, + "TOON enabled but encoder dependency is unavailable; falling back to JSON", + ); + warnedMissingDependency = true; + } + return { payload, stats }; + } + + for (const message of payload.messages) { + if (!message || typeof message !== "object") continue; + if (message.role === "tool") continue; // Never mutate machine-executed protocol payloads + try { + if (typeof message.content === "string") { + message.content = compressStringContent(message.content, cfg, encodeFn, stats); + continue; + } + + if (!Array.isArray(message.content)) continue; + for (const block of message.content) { + if (!block || typeof block !== "object") continue; + + // Keep protocol blocks untouched. Only compress user-language text fields. + if (block.type === "text" && typeof block.text === "string") { + block.text = compressStringContent(block.text, cfg, encodeFn, stats); + continue; + } + + if (block.type === "input_text" && typeof block.input_text === "string") { + block.input_text = compressStringContent(block.input_text, cfg, encodeFn, stats); + } + } + } catch (err) { + stats.failureCount += 1; + if (!cfg.failOpen) throw err; + } + } + + if (cfg.logStats && stats.convertedCount > 0) { + logger.info( + { + convertedCount: stats.convertedCount, + candidateCount: stats.candidateCount, + originalBytes: stats.originalBytes, + compressedBytes: stats.compressedBytes, + }, + "TOON compression applied to message context", + ); + } + + return { payload, stats }; +} + +module.exports = { + applyToonCompression, +}; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index 55a47a5..9825c92 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -10,6 +10,7 @@ const tokens = require("../utils/tokens"); const systemPrompt = require("../prompts/system"); const historyCompression = require("../context/compression"); const tokenBudget = require("../context/budget"); +const { applyToonCompression } = require("../context/toon"); const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection"); const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom"); const { createAuditLogger } = require("../logger/audit-logger"); @@ -1172,6 +1173,10 @@ function sanitizePayload(payload) { } } + // Optional TOON conversion for large JSON message payloads (prompt context only). + // Run this BEFORE message coalescing to preserve parseable JSON boundaries. + applyToonCompression(clean, config.toon, { logger }); + // FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error) // Strategy: Merge all consecutive messages, add instruction to focus on last request if (Array.isArray(clean.messages) && clean.messages.length > 0) { diff --git a/test/toon-compression.test.js b/test/toon-compression.test.js new file mode 100644 index 0000000..3c13f53 --- /dev/null +++ b/test/toon-compression.test.js @@ -0,0 +1,131 @@ +const assert = require("assert"); +const { describe, it } = require("node:test"); + +const { applyToonCompression } = require("../src/context/toon"); + +function createLargeJsonString() { + return JSON.stringify({ + rows: Array.from({ length: 8 }, (_, idx) => ({ + id: idx + 1, + label: `item-${idx + 1}`, + value: `value-${idx + 1}`.repeat(20), + })), + }); +} + +describe("TOON compression", () => { + it("is a no-op when TOON is disabled", () => { + const payload = { + stream: false, + tool_choice: { type: "auto" }, + tools: [{ name: "Read", input_schema: { type: "object", properties: {} } }], + messages: [{ role: "user", content: createLargeJsonString() }], + }; + const before = JSON.parse(JSON.stringify(payload)); + + const { payload: after, stats } = applyToonCompression( + payload, + { enabled: false, minBytes: 1, failOpen: true }, + { encode: () => "should-not-run" }, + ); + + assert.deepStrictEqual(after, before); + assert.strictEqual(stats.enabled, false); + assert.strictEqual(stats.convertedCount, 0); + }); + + it("falls back safely when encoder throws (fail-open)", () => { + const payload = { + messages: [{ role: "user", content: createLargeJsonString() }], + }; + const original = payload.messages[0].content; + + const { payload: after, stats } = applyToonCompression( + payload, + { enabled: true, minBytes: 1, failOpen: true, logStats: false }, + { + encode: () => { + throw new Error("simulated toon encode failure"); + }, + }, + ); + + assert.strictEqual(after.messages[0].content, original); + assert.strictEqual(stats.failureCount, 1); + assert.strictEqual(stats.convertedCount, 0); + }); + + it("does not mutate protocol fields while compressing eligible message content", () => { + const payload = { + model: "kimi-k2.5", + stream: true, + tool_choice: { type: "tool", name: "Read" }, + tools: [ + { + name: "Read", + description: "Read files", + input_schema: { + type: "object", + properties: { file_path: { type: "string" } }, + required: ["file_path"], + }, + }, + ], + messages: [ + { role: "user", content: createLargeJsonString() }, + { role: "tool", content: createLargeJsonString() }, // tool role should never be touched + ], + }; + const beforeTools = JSON.parse(JSON.stringify(payload.tools)); + const beforeToolChoice = JSON.parse(JSON.stringify(payload.tool_choice)); + const beforeToolRoleContent = payload.messages[1].content; + + const { payload: after, stats } = applyToonCompression( + payload, + { enabled: true, minBytes: 1, failOpen: false, logStats: false }, + { encode: () => "rows[1]{id,label,value}:\n 1,item-1,value-1" }, + ); + + assert.strictEqual(after.messages[0].content, "rows[1]{id,label,value}:\n 1,item-1,value-1"); + assert.strictEqual(after.messages[1].content, beforeToolRoleContent); + assert.deepStrictEqual(after.tools, beforeTools); + assert.deepStrictEqual(after.tool_choice, beforeToolChoice); + assert.strictEqual(after.stream, true); + assert.strictEqual(after.model, "kimi-k2.5"); + assert.strictEqual(stats.convertedCount, 1); + }); + + it("compresses Anthropic text blocks while preserving tool protocol blocks", () => { + const largeJson = createLargeJsonString(); + const payload = { + messages: [ + { + role: "user", + content: [ + { type: "text", text: largeJson }, + { type: "input_text", input_text: largeJson }, + { + type: "tool_result", + tool_use_id: "toolu_123", + content: largeJson, + is_error: false, + }, + ], + }, + ], + }; + + const originalToolResultContent = payload.messages[0].content[2].content; + + const { payload: after, stats } = applyToonCompression( + payload, + { enabled: true, minBytes: 1, failOpen: false, logStats: false }, + { encode: () => "rows[1]{id,label,value}:\n 1,item-1,value-1" }, + ); + + assert.strictEqual(after.messages[0].content[0].text, "rows[1]{id,label,value}:\n 1,item-1,value-1"); + assert.strictEqual(after.messages[0].content[1].input_text, "rows[1]{id,label,value}:\n 1,item-1,value-1"); + assert.strictEqual(after.messages[0].content[2].content, originalToolResultContent); + assert.strictEqual(stats.convertedCount, 2); + }); +});