From 443b5b2c9f4b2f23ffb8848ab2ab1f0546d2560d Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 2 Nov 2025 19:57:08 -0500 Subject: [PATCH 01/26] feat: implement lru cache and sophisticated token counting (issues #4 and #5) - add lrucache class with ttl support and statistics tracking - add tokencounter class with google ai api integration - implement content-type aware token estimation (code/json/markdown/text) - integrate lru caching for token counts (200 entries, 30min ttl) - add automatic eviction and periodic cleanup for cache - initialize global tokencounter singleton with api key from environment implements issue #4: sophisticated token counting beyond character/4 implements issue #5: lru cache for expensive operations generated with claude code co-authored-by: claude --- .../handlers/token-optimizer-orchestrator.ps1 | 265 ++++++++++++++++++ 1 file changed, 265 insertions(+) diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index 130e398..a4f7862 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -58,6 +58,271 @@ $OPTIMIZATION_QUALITY = 11 # Maximum compression quality $HASH_PREFIX = "hash:" $HASH_LENGTH = 32 +# ============================================================================= +# LRU CACHE CLASSES (Issue #5) +# ============================================================================= +class LruCacheEntry { + [object]$Value + [datetime]$Timestamp + + LruCacheEntry([object]$value) { + $this.Value = $value + $this.Timestamp = Get-Date + } +} + +class LruCache { + [System.Collections.Specialized.OrderedDictionary]$Cache + [int]$MaxSize + [int]$TtlSeconds + [int]$HitCount = 0 + [int]$MissCount = 0 + [int]$EvictionCount = 0 + + LruCache([int]$maxSize, [int]$ttlSeconds) { + $this.Cache = [System.Collections.Specialized.OrderedDictionary]::new() + $this.MaxSize = $maxSize + $this.TtlSeconds = $ttlSeconds + } + + # Get value from cache (returns $null if not found or expired) + [object] Get([string]$key) { + if (-not $this.Cache.Contains($key)) { + $this.MissCount++ + return $null + } + + $entry = $this.Cache[$key] + + # Check TTL expiration + if ($this.TtlSeconds -gt 0) { + $age = ((Get-Date) - $entry.Timestamp).TotalSeconds + if ($age -gt $this.TtlSeconds) { + $this.Cache.Remove($key) + $this.MissCount++ + $this.EvictionCount++ + return $null + } + } + + # Move to end (most recently used) by removing and re-adding + $value = $entry.Value + $this.Cache.Remove($key) + $this.Cache[$key] = [LruCacheEntry]::new($value) + + $this.HitCount++ + return $value + } + + # Set value in cache + [void] Set([string]$key, [object]$value) { + # Remove if already exists (to re-insert at end) + if ($this.Cache.Contains($key)) { + $this.Cache.Remove($key) + } + + # Evict least recently used if at capacity + if ($this.Cache.Count -ge $this.MaxSize) { + # First key is least recently used (OrderedDictionary maintains insertion order) + $firstKey = @($this.Cache.Keys)[0] + $this.Cache.Remove($firstKey) + $this.EvictionCount++ + } + + # Insert at end (most recently used) + $this.Cache[$key] = [LruCacheEntry]::new($value) + } + + # Check if key exists and is not expired + [bool] ContainsKey([string]$key) { + return $null -ne $this.Get($key) + } + + # Clear all entries + [void] Clear() { + $this.Cache.Clear() + $this.HitCount = 0 + $this.MissCount = 0 + $this.EvictionCount = 0 + } + + # Get cache statistics + [hashtable] GetStats() { + $totalRequests = $this.HitCount + $this.MissCount + return @{ + Size = $this.Cache.Count + MaxSize = $this.MaxSize + HitCount = $this.HitCount + MissCount = $this.MissCount + EvictionCount = $this.EvictionCount + HitRate = if ($totalRequests -gt 0) { + [Math]::Round(($this.HitCount / $totalRequests) * 100, 2) + } else { 0 } + } + } + + # Cleanup expired entries (call periodically) + [int] CleanupExpired() { + if ($this.TtlSeconds -le 0) { return 0 } + + $removed = 0 + $keysToRemove = @() + + foreach ($key in $this.Cache.Keys) { + $entry = $this.Cache[$key] + $age = ((Get-Date) - $entry.Timestamp).TotalSeconds + if ($age -gt $this.TtlSeconds) { + $keysToRemove += $key + } + } + + foreach ($key in $keysToRemove) { + $this.Cache.Remove($key) + $removed++ + } + + $this.EvictionCount += $removed + return $removed + } +} + +# ============================================================================= +# TOKEN COUNTER CLASS (Issue #4) +# ============================================================================= +class TokenCounter { + [string]$ApiKey + [string]$Model + [LruCache]$Cache + [int]$ApiCallCount = 0 + [int]$CacheHitCount = 0 + [int]$EstimationCount = 0 + + TokenCounter([string]$apiKey, [string]$model) { + $this.ApiKey = $apiKey + $this.Model = $model + # Use LRU cache: Max 200 entries, TTL 30 minutes (1800 seconds) + $this.Cache = [LruCache]::new(200, 1800) + } + + # Primary method: try API first, fall back to estimation + [int] CountTokens([string]$text, [string]$contentType) { + # Check cache first (using content hash as key) + $textHash = [System.BitConverter]::ToString( + [System.Security.Cryptography.SHA256]::Create().ComputeHash( + [System.Text.Encoding]::UTF8.GetBytes($text) + ) + ).Replace("-", "") + $cacheKey = "${contentType}:${textHash}" + + $cached = $this.Cache.Get($cacheKey) + if ($null -ne $cached) { + $this.CacheHitCount++ + return $cached + } + + # Try API call if key is available + if ($this.ApiKey) { + try { + $tokenCount = $this.CountTokensViaAPI($text) + $this.ApiCallCount++ + $this.Cache.Set($cacheKey, $tokenCount) + return $tokenCount + } catch { + # API failed, fall back to estimation + Write-Log "Token counting API failed: $($_.Exception.Message), falling back to estimation" "WARN" + } + } + + # Fallback to improved estimation + $estimated = $this.EstimateTokens($text, $contentType) + $this.EstimationCount++ + $this.Cache.Set($cacheKey, $estimated) + return $estimated + } + + # Google AI API integration + [int] CountTokensViaAPI([string]$text) { + $requestBody = @{ + contents = @( + @{ + parts = @( + @{ + text = $text + } + ) + } + ) + } | ConvertTo-Json -Depth 10 -Compress + + $uri = "https://generativelanguage.googleapis.com/v1beta/models/$($this.Model):countTokens?key=$($this.ApiKey)" + + $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5 + + return $response.totalTokens + } + + # Improved estimation with content-type awareness + [int] EstimateTokens([string]$text, [string]$contentType) { + $baseRatio = [Math]::Ceiling($text.Length / 4.0) + + switch ($contentType) { + "code" { + # Code has more tokens per character due to symbols/keywords + return [Math]::Ceiling($baseRatio * 1.2) + } + "json" { + # JSON structures add token overhead for delimiters + return [Math]::Ceiling($baseRatio * 1.15) + } + "markdown" { + # Markdown formatting adds token overhead + return [Math]::Ceiling($baseRatio * 1.1) + } + "text" { + # Plain text is slightly less than base ratio + return [Math]::Ceiling($baseRatio * 0.95) + } + default { + return $baseRatio + } + } + } + + # Content type detection based on file extension or tool name + [string] DetectContentType([string]$identifier) { + switch -Regex ($identifier) { + '\.(cs|ps1|ts|js|py|java|cpp|c|h|go|rs|rb|php)$' { return "code" } + '\.(json|jsonc)$' { return "json" } + '\.(md|markdown)$' { return "markdown" } + '^Read$|^Grep$|^Bash$' { return "code" } + default { return "text" } + } + } + + # Get cache statistics + [hashtable] GetStats() { + $cacheStats = $this.Cache.GetStats() + $totalCalls = $this.ApiCallCount + $this.CacheHitCount + $this.EstimationCount + return @{ + ApiCalls = $this.ApiCallCount + CacheHits = $this.CacheHitCount + EstimationCount = $this.EstimationCount + CacheSize = $cacheStats.Size + CacheHitRate = $cacheStats.HitRate + TotalCalls = $totalCalls + } + } +} + +# Initialize global TokenCounter (singleton pattern) +if (-not $script:TokenCounter) { + $apiKey = $env:GOOGLE_AI_API_KEY + if (-not $apiKey) { + Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow + } + $script:TokenCounter = [TokenCounter]::new($apiKey, "gemini-2.0-flash-exp") +} + # PHASE 2 FIX: Deterministic cache key generation # Fixes 0% cache hit rate by ensuring identical operations produce identical keys function Get-DeterministicCacheKey { From 69a35557c342435fdba637df046e32dabeaa16fc Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 2 Nov 2025 21:06:05 -0500 Subject: [PATCH 02/26] fix: address all pr review comments - add type guards to prevent class re-definition errors (CRITICAL) - fix sha256 resource disposal with proper try/finally - replace write-log with write-host to fix ordering issue - fix double-counting in getstats totalcalls calculation - make model name configurable via google_ai_model env var - improve api error handling for timeout/network errors - fix detectcontenttype regex for exact matching addresses feedback from github copilot and coderabbit reviews --- .../handlers/token-optimizer-orchestrator.ps1 | 68 +++++++++++++------ 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index a4f7862..6726ee6 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -61,17 +61,21 @@ $HASH_LENGTH = 32 # ============================================================================= # LRU CACHE CLASSES (Issue #5) # ============================================================================= -class LruCacheEntry { - [object]$Value - [datetime]$Timestamp +# Guard against class re-definition on subsequent script loads +if (-not ('LruCacheEntry' -as [type])) { + class LruCacheEntry { + [object]$Value + [datetime]$Timestamp - LruCacheEntry([object]$value) { - $this.Value = $value - $this.Timestamp = Get-Date + LruCacheEntry([object]$value) { + $this.Value = $value + $this.Timestamp = Get-Date + } } } -class LruCache { +if (-not ('LruCache' -as [type])) { + class LruCache { [System.Collections.Specialized.OrderedDictionary]$Cache [int]$MaxSize [int]$TtlSeconds @@ -185,11 +189,13 @@ class LruCache { return $removed } } +} # ============================================================================= # TOKEN COUNTER CLASS (Issue #4) # ============================================================================= -class TokenCounter { +if (-not ('TokenCounter' -as [type])) { + class TokenCounter { [string]$ApiKey [string]$Model [LruCache]$Cache @@ -206,12 +212,17 @@ class TokenCounter { # Primary method: try API first, fall back to estimation [int] CountTokens([string]$text, [string]$contentType) { - # Check cache first (using content hash as key) - $textHash = [System.BitConverter]::ToString( - [System.Security.Cryptography.SHA256]::Create().ComputeHash( - [System.Text.Encoding]::UTF8.GetBytes($text) - ) - ).Replace("-", "") + # Check cache first (using content hash as key with proper disposal) + $sha256 = [System.Security.Cryptography.SHA256]::Create() + try { + $textHash = [System.BitConverter]::ToString( + $sha256.ComputeHash( + [System.Text.Encoding]::UTF8.GetBytes($text) + ) + ).Replace("-", "") + } finally { + $sha256.Dispose() + } $cacheKey = "${contentType}:${textHash}" $cached = $this.Cache.Get($cacheKey) @@ -228,8 +239,8 @@ class TokenCounter { $this.Cache.Set($cacheKey, $tokenCount) return $tokenCount } catch { - # API failed, fall back to estimation - Write-Log "Token counting API failed: $($_.Exception.Message), falling back to estimation" "WARN" + # API failed, fall back to estimation (use Write-Host since Write-Log defined later) + Write-Host "WARN: Token counting API failed: $($_.Exception.Message), falling back to estimation" -ForegroundColor Yellow } } @@ -256,7 +267,22 @@ class TokenCounter { $uri = "https://generativelanguage.googleapis.com/v1beta/models/$($this.Model):countTokens?key=$($this.ApiKey)" - $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5 + try { + $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5 + } catch { + $ex = $_.Exception + if ($ex -is [System.Net.WebException]) { + if ($ex.Status -eq [System.Net.WebExceptionStatus]::Timeout) { + throw "Token counting API timeout after 5 seconds" + } elseif ($ex.Status -eq [System.Net.WebExceptionStatus]::ConnectFailure) { + throw "Token counting API network error (connect failure)" + } else { + throw "Token counting API network error: $($ex.Status)" + } + } else { + throw + } + } return $response.totalTokens } @@ -294,7 +320,7 @@ class TokenCounter { '\.(cs|ps1|ts|js|py|java|cpp|c|h|go|rs|rb|php)$' { return "code" } '\.(json|jsonc)$' { return "json" } '\.(md|markdown)$' { return "markdown" } - '^Read$|^Grep$|^Bash$' { return "code" } + '^(Read|Grep|Bash)$' { return "code" } default { return "text" } } } @@ -302,7 +328,7 @@ class TokenCounter { # Get cache statistics [hashtable] GetStats() { $cacheStats = $this.Cache.GetStats() - $totalCalls = $this.ApiCallCount + $this.CacheHitCount + $this.EstimationCount + $totalCalls = $this.ApiCallCount + $this.EstimationCount return @{ ApiCalls = $this.ApiCallCount CacheHits = $this.CacheHitCount @@ -313,6 +339,7 @@ class TokenCounter { } } } +} # Initialize global TokenCounter (singleton pattern) if (-not $script:TokenCounter) { @@ -320,7 +347,8 @@ if (-not $script:TokenCounter) { if (-not $apiKey) { Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow } - $script:TokenCounter = [TokenCounter]::new($apiKey, "gemini-2.0-flash-exp") + $modelName = if ($env:GOOGLE_AI_MODEL) { $env:GOOGLE_AI_MODEL } else { "gemini-2.0-flash-exp" } + $script:TokenCounter = [TokenCounter]::new($apiKey, $modelName) } # PHASE 2 FIX: Deterministic cache key generation From 0e506ac4190efa23d90783b6e750bfe047e76c09 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 2 Nov 2025 21:06:05 -0500 Subject: [PATCH 03/26] fix: address all pr review comments - add type guards to prevent class re-definition errors (CRITICAL) - fix sha256 resource disposal with proper try/finally - replace write-log with write-host to fix ordering issue - fix double-counting in getstats totalcalls calculation - make model name configurable via google_ai_model env var - improve api error handling for timeout/network errors - fix detectcontenttype regex for exact matching addresses feedback from github copilot and coderabbit reviews --- .../handlers/token-optimizer-orchestrator.ps1 | 68 +++++++++++++------ 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index a4f7862..6726ee6 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -61,17 +61,21 @@ $HASH_LENGTH = 32 # ============================================================================= # LRU CACHE CLASSES (Issue #5) # ============================================================================= -class LruCacheEntry { - [object]$Value - [datetime]$Timestamp +# Guard against class re-definition on subsequent script loads +if (-not ('LruCacheEntry' -as [type])) { + class LruCacheEntry { + [object]$Value + [datetime]$Timestamp - LruCacheEntry([object]$value) { - $this.Value = $value - $this.Timestamp = Get-Date + LruCacheEntry([object]$value) { + $this.Value = $value + $this.Timestamp = Get-Date + } } } -class LruCache { +if (-not ('LruCache' -as [type])) { + class LruCache { [System.Collections.Specialized.OrderedDictionary]$Cache [int]$MaxSize [int]$TtlSeconds @@ -185,11 +189,13 @@ class LruCache { return $removed } } +} # ============================================================================= # TOKEN COUNTER CLASS (Issue #4) # ============================================================================= -class TokenCounter { +if (-not ('TokenCounter' -as [type])) { + class TokenCounter { [string]$ApiKey [string]$Model [LruCache]$Cache @@ -206,12 +212,17 @@ class TokenCounter { # Primary method: try API first, fall back to estimation [int] CountTokens([string]$text, [string]$contentType) { - # Check cache first (using content hash as key) - $textHash = [System.BitConverter]::ToString( - [System.Security.Cryptography.SHA256]::Create().ComputeHash( - [System.Text.Encoding]::UTF8.GetBytes($text) - ) - ).Replace("-", "") + # Check cache first (using content hash as key with proper disposal) + $sha256 = [System.Security.Cryptography.SHA256]::Create() + try { + $textHash = [System.BitConverter]::ToString( + $sha256.ComputeHash( + [System.Text.Encoding]::UTF8.GetBytes($text) + ) + ).Replace("-", "") + } finally { + $sha256.Dispose() + } $cacheKey = "${contentType}:${textHash}" $cached = $this.Cache.Get($cacheKey) @@ -228,8 +239,8 @@ class TokenCounter { $this.Cache.Set($cacheKey, $tokenCount) return $tokenCount } catch { - # API failed, fall back to estimation - Write-Log "Token counting API failed: $($_.Exception.Message), falling back to estimation" "WARN" + # API failed, fall back to estimation (use Write-Host since Write-Log defined later) + Write-Host "WARN: Token counting API failed: $($_.Exception.Message), falling back to estimation" -ForegroundColor Yellow } } @@ -256,7 +267,22 @@ class TokenCounter { $uri = "https://generativelanguage.googleapis.com/v1beta/models/$($this.Model):countTokens?key=$($this.ApiKey)" - $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5 + try { + $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5 + } catch { + $ex = $_.Exception + if ($ex -is [System.Net.WebException]) { + if ($ex.Status -eq [System.Net.WebExceptionStatus]::Timeout) { + throw "Token counting API timeout after 5 seconds" + } elseif ($ex.Status -eq [System.Net.WebExceptionStatus]::ConnectFailure) { + throw "Token counting API network error (connect failure)" + } else { + throw "Token counting API network error: $($ex.Status)" + } + } else { + throw + } + } return $response.totalTokens } @@ -294,7 +320,7 @@ class TokenCounter { '\.(cs|ps1|ts|js|py|java|cpp|c|h|go|rs|rb|php)$' { return "code" } '\.(json|jsonc)$' { return "json" } '\.(md|markdown)$' { return "markdown" } - '^Read$|^Grep$|^Bash$' { return "code" } + '^(Read|Grep|Bash)$' { return "code" } default { return "text" } } } @@ -302,7 +328,7 @@ class TokenCounter { # Get cache statistics [hashtable] GetStats() { $cacheStats = $this.Cache.GetStats() - $totalCalls = $this.ApiCallCount + $this.CacheHitCount + $this.EstimationCount + $totalCalls = $this.ApiCallCount + $this.EstimationCount return @{ ApiCalls = $this.ApiCallCount CacheHits = $this.CacheHitCount @@ -313,6 +339,7 @@ class TokenCounter { } } } +} # Initialize global TokenCounter (singleton pattern) if (-not $script:TokenCounter) { @@ -320,7 +347,8 @@ if (-not $script:TokenCounter) { if (-not $apiKey) { Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow } - $script:TokenCounter = [TokenCounter]::new($apiKey, "gemini-2.0-flash-exp") + $modelName = if ($env:GOOGLE_AI_MODEL) { $env:GOOGLE_AI_MODEL } else { "gemini-2.0-flash-exp" } + $script:TokenCounter = [TokenCounter]::new($apiKey, $modelName) } # PHASE 2 FIX: Deterministic cache key generation From 0c11a46ecb085b7e958316d5549b9709ac7fc743 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 2 Nov 2025 22:37:41 -0500 Subject: [PATCH 04/26] refactor(dispatcher): use write-verbose for logging --- hooks/dispatcher.ps1 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hooks/dispatcher.ps1 b/hooks/dispatcher.ps1 index acc6187..5abaaaf 100644 --- a/hooks/dispatcher.ps1 +++ b/hooks/dispatcher.ps1 @@ -2,6 +2,7 @@ # Minimal dispatcher focused on token optimization via MCP # Replaces 400+ line mess with clean architecture +[CmdletBinding()] param([string]$Phase = "") $HANDLERS_DIR = "C:\Users\cheat\.claude-global\hooks\handlers" @@ -11,7 +12,9 @@ $ORCHESTRATOR = "$HANDLERS_DIR\token-optimizer-orchestrator.ps1" function Write-Log { param([string]$Message) $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" - "[$timestamp] [$Phase] $Message" | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8 + $logMessage = "[$timestamp] [$Phase] $Message" + $logMessage | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8 + Write-Verbose $logMessage } function Block-Tool { From d3172df8d48d43d209458a5e8d4a32bfa6c638ab Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 2 Nov 2025 22:39:42 -0500 Subject: [PATCH 05/26] refactor(logging): create shared logging module --- hooks/dispatcher.ps1 | 9 ++----- .../handlers/token-optimizer-orchestrator.ps1 | 27 +------------------ hooks/helpers/logging.ps1 | 27 +++++++++++++++++++ 3 files changed, 30 insertions(+), 33 deletions(-) create mode 100644 hooks/helpers/logging.ps1 diff --git a/hooks/dispatcher.ps1 b/hooks/dispatcher.ps1 index 5abaaaf..c813350 100644 --- a/hooks/dispatcher.ps1 +++ b/hooks/dispatcher.ps1 @@ -8,14 +8,9 @@ param([string]$Phase = "") $HANDLERS_DIR = "C:\Users\cheat\.claude-global\hooks\handlers" $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\dispatcher.log" $ORCHESTRATOR = "$HANDLERS_DIR\token-optimizer-orchestrator.ps1" +. "$PSScriptRoot\helpers\logging.ps1" + -function Write-Log { - param([string]$Message) - $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" - $logMessage = "[$timestamp] [$Phase] $Message" - $logMessage | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8 - Write-Verbose $logMessage -} function Block-Tool { param([string]$Reason) diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index 6726ee6..92cc19f 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -27,6 +27,7 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) { $HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers" $INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1" +. "$PSScriptRoot\..\helpers\logging.ps1" $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log" $SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt" $OPERATIONS_DIR = "C:\Users\cheat\.claude-global\hooks\data" @@ -523,33 +524,7 @@ function Start-LogFlushTimer { } } -function Write-Log { - param( - [string]$Message, - [ValidateSet('DEBUG','INFO','WARN','ERROR')][string]$Level = "INFO", - [string]$Context = "" - ) - - # Check if debug logging is disabled - $debugLogging = if ($env:TOKEN_OPTIMIZER_DEBUG_LOGGING) { - $env:TOKEN_OPTIMIZER_DEBUG_LOGGING -eq 'true' - } else { - $true # Default: enabled - } - - if ($Level -eq 'DEBUG' -and -not $debugLogging) { - return - } - $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" - $contextPart = if ($Context) { " [$Context]" } else { "" } - $logEntry = "[$timestamp] [$Level]$contextPart $Message" - try { - $logEntry | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8 -ErrorAction SilentlyContinue - } catch { - # Silently fail - } -} # Removed - now using direct invoke-mcp.ps1 calls diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1 new file mode 100644 index 0000000..755306f --- /dev/null +++ b/hooks/helpers/logging.ps1 @@ -0,0 +1,27 @@ +[CmdletBinding()] +param() + +function Write-Log { + param( + [string]$Message, + [ValidateSet('DEBUG','INFO','WARN','ERROR')][string]$Level = "INFO", + [string]$Context = "" + ) + + # Check if debug logging is disabled + $debugLogging = if ($env:TOKEN_OPTIMIZER_DEBUG_LOGGING) { + $env:TOKEN_OPTIMIZER_DEBUG_LOGGING -eq 'true' + } else { + $true # Default: enabled + } + + if ($Level -eq 'DEBUG' -and -not $debugLogging) { + return + } + + $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" + $contextPart = if ($Context) { " [$Context]" } else { "" } + $logMessage = "[$timestamp] [$Level]$contextPart $Message" + $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8 + Write-Verbose $logMessage +} \ No newline at end of file From 0428d1cee8ffd91608a5f0be9bc4f761005c0174 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 2 Nov 2025 22:41:06 -0500 Subject: [PATCH 06/26] refactor(logging): replace write-host with write-log --- .../handlers/token-optimizer-orchestrator.ps1 | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index 92cc19f..5dc086b 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -12,7 +12,7 @@ param( # DIAGNOSTIC: Log script version/load time to verify latest version is being used $SCRIPT_VERSION = Get-Date -Format 'yyyyMMdd.HHmmss' -Write-Host "DEBUG: token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" -ForegroundColor Cyan +Write-Log "token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" "DEBUG" # Read JSON from temp file if provided # DO NOT delete temp file - dispatcher will clean it up after all handlers run @@ -21,7 +21,7 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) { try { $InputJson = Get-Content -Path $InputJsonFile -Raw -Encoding UTF8 } catch { - Write-Host "ERROR: Failed to read InputJsonFile: $($_.Exception.Message)" -ForegroundColor Red + Write-Log "Failed to read InputJsonFile: $($_.Exception.Message)" "ERROR" } } @@ -240,8 +240,8 @@ if (-not ('TokenCounter' -as [type])) { $this.Cache.Set($cacheKey, $tokenCount) return $tokenCount } catch { - # API failed, fall back to estimation (use Write-Host since Write-Log defined later) - Write-Host "WARN: Token counting API failed: $($_.Exception.Message), falling back to estimation" -ForegroundColor Yellow + # API failed, fall back to estimation + Write-Log "Token counting API failed: $($_.Exception.Message), falling back to estimation" "WARN" } } @@ -346,7 +346,7 @@ if (-not ('TokenCounter' -as [type])) { if (-not $script:TokenCounter) { $apiKey = $env:GOOGLE_AI_API_KEY if (-not $apiKey) { - Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow + Write-Log "GOOGLE_AI_API_KEY not set, falling back to estimation only" "WARN" } $modelName = if ($env:GOOGLE_AI_MODEL) { $env:GOOGLE_AI_MODEL } else { "gemini-2.0-flash-exp" } $script:TokenCounter = [TokenCounter]::new($apiKey, $modelName) @@ -1852,43 +1852,43 @@ function Handle-OptimizeToolOutput { $ErrorActionPreference = 'Stop' try { - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Entered function." + Write-Log "[Handle-OptimizeToolOutput] Entered function." "DEBUG" if (-not $InputJson) { Write-Log "No input received for tool output optimization" "WARN" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] No input received, returning." + Write-Log "[Handle-OptimizeToolOutput] No input received, returning." "DEBUG" return } - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Parsing InputJson..." + Write-Log "[Handle-OptimizeToolOutput] Parsing InputJson..." "DEBUG" $data = $InputJson | ConvertFrom-Json $toolName = $data.tool_name $toolOutput = $data.tool_response # FIXED: Claude Code uses tool_response not tool_result $outputType = if ($toolOutput) { $toolOutput.GetType().Name } else { "null" } Write-Log "DEBUG: tool_name=$toolName, tool_response_type=$outputType, has_content=$(-not -not $toolOutput)" "DEBUG" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 1 - After line 1564 log. toolName=$toolName, outputType=$outputType" + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 1 - After line 1564 log. toolName=$toolName, outputType=$outputType" "DEBUG" # Skip if no output or if output is already optimized Write-Log "DEBUG: Checking if toolOutput is null or empty" "DEBUG" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 2 - Before null/empty check." + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 2 - Before null/empty check." "DEBUG" if (-not $toolOutput) { Write-Log "No tool output to optimize for: $toolName (toolOutput is null/false)" "DEBUG" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] toolOutput is null/false, returning." + Write-Log "[Handle-OptimizeToolOutput] toolOutput is null/false, returning." "DEBUG" return } - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 3 - After null/empty check, toolOutput exists." + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 3 - After null/empty check, toolOutput exists." "DEBUG" # Convert output to string for token counting $outputText = "" try { - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 4 - Attempting to convert toolOutput to string. Is string: $($toolOutput -is [string])" + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 4 - Attempting to convert toolOutput to string. Is string: $($toolOutput -is [string])" "DEBUG" $outputText = if ($toolOutput -is [string]) { $toolOutput } else { $toolOutput | ConvertTo-Json -Depth 10 -ErrorAction Stop } Write-Log "DEBUG: Converted tool output to string. Length: $($outputText.Length)" "DEBUG" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 5 - toolOutput converted. Length: $($outputText.Length)" + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 5 - toolOutput converted. Length: $($outputText.Length)" "DEBUG" } catch { Write-Log "ERROR: Failed to convert tool output to JSON string for ${toolName}: $($_.Exception.Message)" "ERROR" - Write-Host "ERROR: [Handle-OptimizeToolOutput] Failed to convert: $($_.Exception.Message)" + Write-Log "[Handle-OptimizeToolOutput] Failed to convert: $($_.Exception.Message)" "ERROR" return } From d04cf6a8e682c458194057b0486b9e9d5f158024 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 2 Nov 2025 23:24:12 -0500 Subject: [PATCH 07/26] refactor(error-handling): use centralized error handling function --- .../handlers/token-optimizer-orchestrator.ps1 | 67 +++++++++---------- hooks/helpers/logging.ps1 | 12 ++++ 2 files changed, 45 insertions(+), 34 deletions(-) diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index 5dc086b..a06e76e 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -421,7 +421,7 @@ function Read-SessionFile { Write-Log "Failed to acquire read lock on session file '$FilePath', retrying... ($($_.Exception.Message))" "WARN" Start-Sleep -Milliseconds $retryDelayMs } catch { - Write-Log "Failed to read session file '$FilePath': $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Failed to read session file '$FilePath'" return $null } } @@ -453,7 +453,7 @@ function Write-SessionFile { Write-Log "Failed to acquire write lock on session file '$FilePath', retrying... ($($_.Exception.Message))" "WARN" Start-Sleep -Milliseconds $retryDelayMs } catch { - Write-Log "Failed to write session file '$FilePath': $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Failed to write session file '$FilePath'" return $false } finally { # Ensure writer and fileStream are disposed even if errors occur @@ -499,7 +499,7 @@ function Flush-OperationLogs { Write-Log "Flushed $($script:OperationLogBuffer.Count) operation logs" "DEBUG" $script:OperationLogBuffer = @() } catch { - Write-Log "Failed to flush operation logs: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Failed to flush operation logs" } } } @@ -534,7 +534,7 @@ function Get-SessionInfo { $session = Read-SessionFile -FilePath $SESSION_FILE return $session } catch { - Write-Log "Failed to read session file: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Failed to read session file" } } return $null @@ -691,7 +691,7 @@ function Handle-LogOperation { Write-Log "Logged operation: $toolName ($tokens tokens)" "DEBUG" } catch { - Write-Log "Operation logging failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Operation logging failed" } } @@ -721,7 +721,7 @@ function Handle-OptimizeSession { } } catch { - Write-Log "Session optimization failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Session optimization failed" } } @@ -797,7 +797,7 @@ function Handle-ContextGuard { return 0 # Success - allow operation to proceed } catch { - Write-Log "Context guard failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Context guard failed" return 0 # On error, don't block } } @@ -827,7 +827,7 @@ function Handle-PeriodicOptimize { } } catch { - Write-Log "Periodic optimize failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Periodic optimize failed" } } @@ -852,7 +852,7 @@ function Handle-CacheWarmup { } } catch { - Write-Log "Cache warmup failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Cache warmup failed" } } @@ -892,7 +892,7 @@ function Handle-SessionReport { } } catch { - Write-Log "Session report failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Session report failed" } } @@ -974,11 +974,11 @@ function Handle-UserPromptOptimization { Write-Log "Optimized user prompt: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO" } } catch { - Write-Log "Prompt optimization failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Prompt optimization failed" } } catch { - Write-Log "UserPromptOptimization handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "UserPromptOptimization handler failed" } } @@ -1030,7 +1030,7 @@ function Handle-SessionStartInit { } } catch { - Write-Log "SessionStartInit handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "SessionStartInit handler failed" } } @@ -1075,7 +1075,7 @@ function Handle-SmartDiff { return $null } catch { - Write-Log "SmartDiff handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "SmartDiff handler failed" return $null } } @@ -1115,7 +1115,7 @@ function Handle-SmartLogs { return $null } catch { - Write-Log "SmartLogs handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "SmartLogs handler failed" return $null } } @@ -1170,7 +1170,7 @@ function Handle-ToolSpecificOptimization { return $ToolOutput } catch { - Write-Log "ToolSpecificOptimization handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "ToolSpecificOptimization handler failed" return $ToolOutput } } @@ -1208,7 +1208,7 @@ function Handle-MetricCollector { return $null } catch { - Write-Log "MetricCollector handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "MetricCollector handler failed" return $null } } @@ -1248,7 +1248,7 @@ function Handle-AlertManager { return $null } catch { - Write-Log "AlertManager handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "AlertManager handler failed" return $null } } @@ -1280,7 +1280,7 @@ function Handle-HealthMonitor { return $null } catch { - Write-Log "HealthMonitor handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "HealthMonitor handler failed" return $null } } @@ -1318,7 +1318,7 @@ function Handle-MonitoringIntegration { return $null } catch { - Write-Log "MonitoringIntegration handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "MonitoringIntegration handler failed" return $null } } @@ -1354,7 +1354,7 @@ function Handle-AnalyzeOptimization { return $null } catch { - Write-Log "AnalyzeOptimization handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "AnalyzeOptimization handler failed" return $null } } @@ -1383,7 +1383,7 @@ function Handle-CacheAnalytics { return $null } catch { - Write-Log "CacheAnalytics handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "CacheAnalytics handler failed" return $null } } @@ -1413,7 +1413,7 @@ function Handle-CacheOptimizer { return $null } catch { - Write-Log "CacheOptimizer handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "CacheOptimizer handler failed" return $null } } @@ -1453,7 +1453,7 @@ function Handle-CacheCompression { return $Data } catch { - Write-Log "CacheCompression handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "CacheCompression handler failed" return $Data } } @@ -1480,7 +1480,7 @@ function Handle-CacheInvalidation { Write-Log "Cache invalidation completed for pattern: $Pattern" "DEBUG" } catch { - Write-Log "CacheInvalidation handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "CacheInvalidation handler failed" } } @@ -1520,7 +1520,7 @@ function Handle-SmartCache { return $null } catch { - Write-Log "SmartCache handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "SmartCache handler failed" return $null } } @@ -1568,7 +1568,7 @@ function Handle-IntelligentSummarization { return $Text } catch { - Write-Log "IntelligentSummarization handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "IntelligentSummarization handler failed" return $Text } } @@ -1614,7 +1614,7 @@ function Handle-PatternRecognition { return $null } catch { - Write-Log "PatternRecognition handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "PatternRecognition handler failed" return $null } } @@ -1657,7 +1657,7 @@ function Handle-PredictiveAnalytics { return $Context } catch { - Write-Log "PredictiveAnalytics handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "PredictiveAnalytics handler failed" return $Context } } @@ -1691,7 +1691,7 @@ function Handle-IntelligentAssistant { return $null } catch { - Write-Log "IntelligentAssistant handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "IntelligentAssistant handler failed" return $null } } @@ -1836,7 +1836,7 @@ function Handle-PreToolUseOptimization { } } catch { - Write-Log "PreToolUse optimization failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "PreToolUse optimization failed" return 1 } return 0 @@ -1909,8 +1909,7 @@ function Handle-OptimizeToolOutput { Write-Log "WARN: count_tokens result did not contain expected content" "WARN" } } catch { - Write-Log "ERROR: Token counting failed for ${toolName}: $($_.Exception.Message)" "ERROR" - Write-Log "ERROR: Stack Trace: $($_.ScriptStackTrace)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Token counting failed for ${toolName}" return } @@ -1996,7 +1995,7 @@ function Handle-OptimizeToolOutput { Update-SessionOperation -TokensDelta $afterTokens } } catch { - Write-Log "Tool output optimization failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Tool output optimization failed" } } catch { diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1 index 755306f..7b87cd0 100644 --- a/hooks/helpers/logging.ps1 +++ b/hooks/helpers/logging.ps1 @@ -24,4 +24,16 @@ function Write-Log { $logMessage = "[$timestamp] [$Level]$contextPart $Message" $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8 Write-Verbose $logMessage +} + +function Handle-Error { + param( + [System.Exception]$Exception, + [string]$Message = "" + ) + + $errorMessage = if ($Message) { $Message } else { $Exception.Message } + $stackTrace = $Exception.ScriptStackTrace + Write-Log "ERROR: $errorMessage" "ERROR" + Write-Log "StackTrace: $stackTrace" "ERROR" } \ No newline at end of file From 25251108fc030ae05fcf90c0c1177e912b441c9e Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Tue, 4 Nov 2025 21:32:09 -0500 Subject: [PATCH 08/26] feat(optimization): implement compression for stored optimization results --- .../handlers/token-optimizer-orchestrator.ps1 | 74 ++- src/analytics/optimization-storage.ts | 158 +++--- src/core/compression-engine.ts | 222 +++------ src/server/index.ts | 59 ++- src/tools/optimization-storage-tool.ts | 96 ++-- tests/benchmarks/results.json | 468 +++++++++--------- tests/unit/cache-engine.test.ts | 3 +- 7 files changed, 547 insertions(+), 533 deletions(-) diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index 5abcfdb..ae55748 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -1932,16 +1932,52 @@ function Handle-OptimizeToolOutput { Write-Log "Tool-specific optimization failed: $($_.Exception.Message)" "WARN" } - # Optimize using optimize_text (PHASE 4: Reduced quality for performance) + # Calculate SHA256 hash of the output text for caching + $hasher = [System.Security.Cryptography.SHA256]::Create() + $hashBytes = $hasher.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($outputText)) + $originalTextHash = [System.BitConverter]::ToString($hashBytes).Replace("-", "").ToLower() + + # Attempt to retrieve from optimization storage try { - # PHASE 2 FIX: Use content hash instead of timestamp for cache key - $hasher = [System.Security.Cryptography.SHA256]::Create() - $hashBytes = $hasher.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($outputText)) - $contentHash = [Convert]::ToBase64String($hashBytes).Substring(0, 16) + $retrieveArgs = @{ + operation = "retrieve" + originalTextHash = $originalTextHash + } + $retrieveJson = $retrieveArgs | ConvertTo-Json -Compress + $retrieveResultJson = & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $retrieveJson + $retrieveResult = if ($retrieveResultJson) { $retrieveResultJson | ConvertFrom-Json } else { $null } + + if ($retrieveResult -and $retrieveResult.success) { + Write-Log "Cache HIT for optimization result. Hash: $originalTextHash" "INFO" + $optimizedTextBytes = [System.Convert]::FromBase64String($retrieveResult.optimizedText) + $optimizedText = [System.Text.Encoding]::UTF8.GetString($optimizedTextBytes) + $afterTokens = $retrieveResult.optimizedTokens + $saved = $retrieveResult.tokensSaved + $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 } + + if ($script:CurrentSession) { + $script:CurrentSession.cacheHits++ + if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) { + Write-Log "Session stats updated and persisted after cache hit." "DEBUG" + } else { + Write-Log "Failed to persist session stats after cache hit." "ERROR" + } + } + + Write-Log "Using cached optimized $toolName output: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO" + Update-SessionOperation -TokensDelta $afterTokens + return + } else { + Write-Log "Cache MISS for optimization result. Hash: $originalTextHash" "DEBUG" + } + } catch { + Handle-Error -Exception $_.Exception -Message "Failed to retrieve from optimization storage" + } + # Optimize using optimize_text (PHASE 4: Reduced quality for performance) + try { $optimizeArgs = @{ text = $outputText - key = "tool_output_${toolName}_$contentHash" quality = $script:OPTIMIZATION_QUALITY } $optimizeJson = $optimizeArgs | ConvertTo-Json -Compress @@ -1956,34 +1992,43 @@ function Handle-OptimizeToolOutput { $saved = $beforeTokens - $afterTokens $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 } - # PHASE 1 FIX: Rollback logic - only use optimization if it actually helps if ($afterTokens -ge $beforeTokens) { Write-Log "Optimization made things worse or had no effect ($beforeTokens → $afterTokens tokens), REVERTING to original" "WARN" - - # PHASE 4 FIX: Track failure and persist immediately if ($script:CurrentSession) { $script:CurrentSession.optimizationFailures++ - # CRITICAL: Persist immediately to disk for multi-process visibility if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) { Write-Log "Session stats updated and persisted after optimization failure." "DEBUG" } else { Write-Log "Failed to persist session stats after optimization failure." "ERROR" } } - - # Don't update session with optimized tokens, skip this optimization return } Write-Log "Optimized $toolName output: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO" - # PHASE 4 FIX: Track success and detailed stats, persist immediately + # Store the new optimization result + try { + $storeArgs = @{ + operation = "store" + originalTextHash = $originalTextHash + optimizedText = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes($optimizedText)) + originalTokens = $beforeTokens + optimizedTokens = $afterTokens + tokensSaved = $saved + } + $storeJson = $storeArgs | ConvertTo-Json -Compress + & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $storeJson + Write-Log "Stored new optimization result. Hash: $originalTextHash" "DEBUG" + } catch { + Handle-Error -Exception $_.Exception -Message "Failed to store optimization result" + } + if ($script:CurrentSession) { $script:CurrentSession.optimizationSuccesses++ $script:CurrentSession.totalOriginalTokens += $beforeTokens $script:CurrentSession.totalOptimizedTokens += $afterTokens $script:CurrentSession.totalTokensSaved += $saved - # CRITICAL: Persist immediately to disk for multi-process visibility if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) { Write-Log "Session stats updated and persisted after optimization success." "DEBUG" } else { @@ -1991,7 +2036,6 @@ function Handle-OptimizeToolOutput { } } - # Update session tokens (only if optimization helped) Update-SessionOperation -TokensDelta $afterTokens } } catch { diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts index 99fd16f..6952ce6 100644 --- a/src/analytics/optimization-storage.ts +++ b/src/analytics/optimization-storage.ts @@ -1,107 +1,79 @@ -/** - * Persistent storage for optimization results data using SQLite - */ - -import Database from 'better-sqlite3'; -import path from 'path'; -import os from 'os'; -import fs from 'fs'; -import { createGzip, gunzipSync } from 'zlib'; -import { promisify } from 'util'; - -const gzip = promisify(createGzip); +import sqlite3 from 'sqlite3'; +import { open, Database } from 'sqlite'; +import { CompressionEngine } from '../core/compression-engine'; export interface OptimizationResult { - originalTextHash: string; - optimizedText: Buffer; - compressionAlgorithm: string; + originalTextHash: string; + optimizedText: string; + originalTokens: number; + optimizedTokens: number; + tokensSaved: number; } -/** - * SQLite-backed optimization results storage - */ export class SqliteOptimizationStorage { - private db: Database.Database; - - constructor(dbPath?: string) { - // Default to user's home directory - const defaultPath = path.join( - os.homedir(), - '.token-optimizer-mcp', - 'optimization.db' - ); - const finalPath = dbPath || defaultPath; + private db: Database; + private dbPath: string; + private compressionEngine: CompressionEngine; - // Ensure directory exists - const dir = path.dirname(finalPath); - if (!fs.existsSync(dir)) { - fs.mkdirSync(dir, { recursive: true }); + constructor(dbPath: string = './optimization.db') { + this.dbPath = dbPath; + this.compressionEngine = new CompressionEngine(); } - this.db = new Database(finalPath); - this.initializeDatabase(); - } - - /** - * Initialize database schema - */ - private initializeDatabase(): void { - this.db.exec(` - CREATE TABLE IF NOT EXISTS optimization_results ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - original_text_hash TEXT NOT NULL UNIQUE, - optimized_text BLOB NOT NULL, - compression_algorithm TEXT NOT NULL, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP - ); - - CREATE INDEX IF NOT EXISTS idx_original_text_hash ON optimization_results(original_text_hash); - `); - } - - /** - * Save a single optimization result - */ - async save(entry: OptimizationResult): Promise { - const stmt = this.db.prepare(` - INSERT OR REPLACE INTO optimization_results ( - original_text_hash, optimized_text, compression_algorithm - ) VALUES (?, ?, ?) - `); - - const compressedOptimizedText = await gzip(entry.optimizedText); - - stmt.run( - entry.originalTextHash, - compressedOptimizedText, - 'gzip' - ); - } + public async initializeDatabase(): Promise { + this.db = await open({ + filename: this.dbPath, + driver: sqlite3.Database + }); + + await this.db.exec(` + CREATE TABLE IF NOT EXISTS optimization_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + original_text_hash TEXT NOT NULL UNIQUE, + optimized_text_compressed BLOB NOT NULL, + compression_algorithm TEXT NOT NULL, + original_tokens INTEGER NOT NULL, + optimized_tokens INTEGER NOT NULL, + tokens_saved INTEGER NOT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + `); + } - /** - * Get an optimization result by hash - */ - async get(originalTextHash: string): Promise { - const stmt = this.db.prepare('SELECT * FROM optimization_results WHERE original_text_hash = ?'); - const row = stmt.get(originalTextHash) as any; + public async save(entry: OptimizationResult): Promise { + const compressedOptimizedText = this.compressionEngine.compress(entry.optimizedText); - if (!row) { - return null; + await this.db.run( + `INSERT INTO optimization_results (original_text_hash, optimized_text_compressed, compression_algorithm, original_tokens, optimized_tokens, tokens_saved) + VALUES (?, ?, ?, ?, ?, ?)`, + [entry.originalTextHash, compressedOptimizedText.compressed, 'brotli', entry.originalTokens, entry.optimizedTokens, entry.tokensSaved] + ); } - const decompressedOptimizedText = gunzipSync(row.optimized_text); - - return { - originalTextHash: row.original_text_hash, - optimizedText: decompressedOptimizedText, - compressionAlgorithm: row.compression_algorithm, - }; - } + public async get(originalTextHash: string): Promise { + const row = await this.db.get( + 'SELECT optimized_text_compressed, original_tokens, optimized_tokens, tokens_saved FROM optimization_results WHERE original_text_hash = ?', + originalTextHash + ); + + if (!row) { + return null; + } + + const optimizedText = this.compressionEngine.decompress(row.optimized_text_compressed); + + return { + originalTextHash, + optimizedText, + originalTokens: row.original_tokens, + optimizedTokens: row.optimized_tokens, + tokensSaved: row.tokens_saved + }; + } - /** - * Close the database connection - */ - async close(): Promise { - this.db.close(); - } + public async close(): Promise { + if (this.db) { + await this.db.close(); + } + } } diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts index 2be5b9e..e7922cf 100644 --- a/src/core/compression-engine.ts +++ b/src/core/compression-engine.ts @@ -1,167 +1,89 @@ import { brotliCompressSync, brotliDecompressSync, constants } from 'zlib'; export interface CompressionResult { - compressed: Buffer; - originalSize: number; - compressedSize: number; - ratio: number; - percentSaved: number; -} - -export interface CompressionOptions { - quality?: number; // 0-11, default 11 (max compression) - mode?: 'text' | 'font' | 'generic'; -} - -export class CompressionEngine { - private readonly DEFAULT_QUALITY = 11; - - /** - * Compress text using Brotli - */ - compress(text: string, options?: CompressionOptions): CompressionResult { - const buffer = Buffer.from(text, 'utf-8'); - const quality = options?.quality ?? this.DEFAULT_QUALITY; - const mode = this.getModeConstant(options?.mode); - - const compressed = brotliCompressSync(buffer, { - params: { - [constants.BROTLI_PARAM_QUALITY]: quality, - [constants.BROTLI_PARAM_MODE]: mode, - }, - }); - - const originalSize = buffer.length; - const compressedSize = compressed.length; - const ratio = originalSize > 0 ? compressedSize / originalSize : 0; - const percentSaved = - originalSize > 0 - ? ((originalSize - compressedSize) / originalSize) * 100 - : 0; - - return { - compressed, - originalSize, - compressedSize, - ratio, - percentSaved, - }; - } - - /** - * Decompress Brotli-compressed data - */ - decompress(compressed: Buffer): string { - const decompressed = brotliDecompressSync(compressed); - return decompressed.toString('utf-8'); - } - - /** - * Compress to base64 string (for easier storage) - */ - compressToBase64( - text: string, - options?: CompressionOptions - ): { - compressed: string; + compressed: Buffer; originalSize: number; compressedSize: number; ratio: number; percentSaved: number; - } { - const result = this.compress(text, options); - - return { - compressed: result.compressed.toString('base64'), - originalSize: result.originalSize, - compressedSize: result.compressedSize, - ratio: result.ratio, - percentSaved: result.percentSaved, - }; - } - - /** - * Decompress from base64 string - */ - decompressFromBase64(compressed: string): string { - const buffer = Buffer.from(compressed, 'base64'); - return this.decompress(buffer); - } +} - /** - * Check if compression would be beneficial - */ - shouldCompress(text: string, minSize: number = 1000): boolean { - // Don't compress small texts - overhead not worth it - if (text.length < minSize) { - return false; +export class CompressionEngine { + public compress(text: string, options?: { quality?: number; mode?: string; }): CompressionResult { + const originalSize = Buffer.byteLength(text, 'utf8'); + if (originalSize === 0) { + return { + compressed: Buffer.alloc(0), + originalSize: 0, + compressedSize: 0, + ratio: 0, + percentSaved: 0, + }; + } + + const params = { + [constants.BROTLI_PARAM_QUALITY]: options?.quality ?? constants.BROTLI_MAX_QUALITY, + [constants.BROTLI_PARAM_MODE]: options?.mode === 'text' ? constants.BROTLI_MODE_TEXT : constants.BROTLI_MODE_GENERIC, + }; + + const compressed = brotliCompressSync(text, { params }); + const compressedSize = compressed.length; + const ratio = compressedSize / originalSize; + const percentSaved = (1 - ratio) * 100; + + return { + compressed, + originalSize, + compressedSize, + ratio, + percentSaved, + }; } - // Quick sample compression to check ratio - const sample = text.slice(0, Math.min(text.length, 5000)); - const result = this.compress(sample, { quality: 4 }); // Use lower quality for quick test + public decompress(buffer: Buffer): string { + if (!buffer || buffer.length === 0) { + return ''; + } + return brotliDecompressSync(buffer).toString('utf8'); + } - // Only compress if we get at least 20% reduction - return result.percentSaved >= 20; - } + public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): CompressionResult & { compressed: string } { + const result = this.compress(text, options); + return { + ...result, + compressed: result.compressed.toString('base64'), + }; + } - /** - * Batch compress multiple texts - */ - compressBatch( - texts: string[], - options?: CompressionOptions - ): Array<{ - index: number; - compressed: Buffer; - originalSize: number; - compressedSize: number; - ratio: number; - }> { - return texts.map((text, index) => { - const result = this.compress(text, options); - return { - index, - compressed: result.compressed, - originalSize: result.originalSize, - compressedSize: result.compressedSize, - ratio: result.ratio, - }; - }); - } + public decompressFromBase64(base64: string): string { + const buffer = Buffer.from(base64, 'base64'); + return this.decompress(buffer); + } - /** - * Get compression statistics for text - */ - getCompressionStats(text: string): { - uncompressed: number; - compressed: number; - ratio: number; - percentSaved: number; - recommended: boolean; - } { - const result = this.compress(text); + public compressBatch(texts: string[]): (CompressionResult & { index: number; })[] { + return texts.map((text, index) => ({ + ...this.compress(text), + index, + })); + } - return { - uncompressed: result.originalSize, - compressed: result.compressedSize, - ratio: result.ratio, - percentSaved: result.percentSaved, - recommended: this.shouldCompress(text), - }; - } + public shouldCompress(text: string, minSize: number = 500): boolean { + if (Buffer.byteLength(text, 'utf8') < minSize) { + return false; + } + const stats = this.getCompressionStats(text); + return stats.percentSaved >= 20; + } - /** - * Convert mode string to Brotli constant - */ - private getModeConstant(mode?: 'text' | 'font' | 'generic'): number { - switch (mode) { - case 'text': - return constants.BROTLI_MODE_TEXT; - case 'font': - return constants.BROTLI_MODE_FONT; - default: - return constants.BROTLI_MODE_GENERIC; + public getCompressionStats(text: string): { uncompressed: number; compressed: number; ratio: number; percentSaved: number; recommended: boolean; } { + const result = this.compress(text); + const recommended = result.originalSize >= 500 && result.percentSaved >= 20; + return { + uncompressed: result.originalSize, + compressed: result.compressedSize, + ratio: result.ratio, + percentSaved: result.percentSaved, + recommended: recommended, + }; } - } } diff --git a/src/server/index.ts b/src/server/index.ts index 7c23370..8c09500 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -126,12 +126,11 @@ import { getMcpServerAnalyticsTool, GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/get-mcp-server-analytics.js'; -import { - getExportAnalyticsTool, - EXPORT_ANALYTICS_TOOL_DEFINITION, -} from '../tools/analytics/export-analytics.js'; +import { getExportAnalyticsTool, EXPORT_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/export-analytics.js'; +import { OptimizationStorageTool } from '../tools/optimization-storage-tool.js'; import { AnalyticsManager } from '../analytics/analytics-manager.js'; + // API & Database tools import { getSmartSql, @@ -369,6 +368,43 @@ const getHookAnalytics = getHookAnalyticsTool(analyticsManager); const getActionAnalytics = getActionAnalyticsTool(analyticsManager); const getMcpServerAnalytics = getMcpServerAnalyticsTool(analyticsManager); const exportAnalytics = getExportAnalyticsTool(analyticsManager); +const optimizationStorage = new OptimizationStorageTool(); + +const OPTIMIZATION_STORAGE_TOOL_DEFINITION = { + name: optimizationStorage.name, + description: optimizationStorage.description, + inputSchema: { + type: 'object', + properties: { + operation: { + type: 'string', + enum: ['store', 'retrieve'], + description: 'The operation to perform.', + }, + originalTextHash: { + type: 'string', + description: 'The SHA256 hash of the original text.', + }, + optimizedText: { + type: 'string', + description: 'The base64 encoded optimized text (for store operation).', + }, + originalTokens: { + type: 'number', + description: 'The number of tokens in the original text (for store operation).', + }, + optimizedTokens: { + type: 'number', + description: 'The number of tokens in the optimized text (for store operation).', + }, + tokensSaved: { + type: 'number', + description: 'The number of tokens saved (for store operation).', + }, + }, + required: ['operation', 'originalTextHash'], + }, +}; // Create MCP server const server = new Server( @@ -655,6 +691,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { GET_ACTION_ANALYTICS_TOOL_DEFINITION, GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION, EXPORT_ANALYTICS_TOOL_DEFINITION, + OPTIMIZATION_STORAGE_TOOL_DEFINITION, ], }; }); @@ -1983,7 +2020,19 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_grep': { const { pattern, ...options } = args as any; - const result = await runSmartGrep(pattern, options); + const result = await smartGrep.run(pattern, options); + return { + content: [ + { + type: 'text', + text: JSON.stringify(result, null, 2), + }, + ], + }; + } + + case 'optimization_storage': { + const result = await optimizationStorage.invoke({} as TurnContext, { tool_name: name, tool_input: args }); return { content: [ { diff --git a/src/tools/optimization-storage-tool.ts b/src/tools/optimization-storage-tool.ts index c4b83cd..9949d1e 100644 --- a/src/tools/optimization-storage-tool.ts +++ b/src/tools/optimization-storage-tool.ts @@ -1,45 +1,71 @@ -import { Tool, ToolInvocation } from '@microsoft/teams-ai'; -import { TurnContext } from 'botbuilder'; -import { SqliteOptimizationStorage } from '../analytics/optimization-storage'; +import { Tool, ToolInvocation, TurnContext } from '../types/turn-context'; +import { SqliteOptimizationStorage, OptimizationResult } from '../analytics/optimization-storage'; export class OptimizationStorageTool implements Tool { - private readonly storage: SqliteOptimizationStorage; + public readonly name = 'optimization_storage'; + public readonly description = 'A tool for storing and retrieving compressed optimization results.'; + + private storage: SqliteOptimizationStorage; constructor() { this.storage = new SqliteOptimizationStorage(); + this.storage.initializeDatabase(); + } + + public async invoke(context: TurnContext, invocation: ToolInvocation): Promise { + const operation = invocation.arguments?.operation; + + if (operation === 'store') { + return this.store(invocation.arguments); + } else if (operation === 'retrieve') { + return this.retrieve(invocation.arguments); + } else { + return { error: `Unknown operation: ${operation}` }; + } + } + + private async store(args: any): Promise { + try { + const { originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved } = args; + if (!originalTextHash || !optimizedText || originalTokens === undefined || optimizedTokens === undefined || tokensSaved === undefined) { + return { error: 'Missing required arguments for store operation.' }; + } + + const optimizationResult: OptimizationResult = { + originalTextHash, + optimizedText: Buffer.from(optimizedText, 'base64').toString('utf8'), + originalTokens, + optimizedTokens, + tokensSaved + }; + + await this.storage.save(optimizationResult); + return { success: true }; + } catch (error) { + return { error: `Failed to store optimization result: ${error.message}` }; + } } - name = 'optimization_storage'; - description = 'A tool for storing and retrieving optimization results.'; - - async invoke(context: TurnContext, invocation: ToolInvocation): Promise { - const { operation, originalTextHash, optimizedText } = invocation.data; - - switch (operation) { - case 'store': - if (!originalTextHash || !optimizedText) { - return { error: 'Missing required parameters for store operation.' }; - } - await this.storage.save({ - originalTextHash, - optimizedText: Buffer.from(optimizedText, 'base64'), - compressionAlgorithm: 'gzip', - }); - return { success: true }; - case 'retrieve': - if (!originalTextHash) { - return { error: 'Missing required parameters for retrieve operation.' }; - } - const result = await this.storage.get(originalTextHash); - if (result) { - return { - ...result, - optimizedText: result.optimizedText.toString('base64'), - }; - } - return { success: false, message: 'No result found for the given hash.' }; - default: - return { error: `Unknown operation: ${operation}` }; + private async retrieve(args: any): Promise { + try { + const { originalTextHash } = args; + if (!originalTextHash) { + return { error: 'Missing required argument for retrieve operation: originalTextHash' }; + } + + const result = await this.storage.get(originalTextHash); + + if (result) { + return { + success: true, + ...result, + optimizedText: Buffer.from(result.optimizedText, 'utf8').toString('base64') + }; + } else { + return { success: false, message: 'Not found' }; + } + } catch (error) { + return { error: `Failed to retrieve optimization result: ${error.message}` }; } } } diff --git a/tests/benchmarks/results.json b/tests/benchmarks/results.json index b54d5be..dd72b7a 100644 --- a/tests/benchmarks/results.json +++ b/tests/benchmarks/results.json @@ -1,314 +1,314 @@ [ { "operation": "token-count-small", - "avgDuration": 0.21655369999999988, - "minDuration": 0.1389, - "maxDuration": 6.4655, - "p50": 0.1805, - "p90": 0.2406, - "p95": 0.2933, - "p99": 0.7547, - "throughput": 4617.792261226664, - "memoryUsed": 643496 + "avgDuration": 0.3614560000000002, + "minDuration": 0.1311, + "maxDuration": 8.1673, + "p50": 0.1678, + "p90": 0.3182, + "p95": 1.7474, + "p99": 4.338, + "throughput": 2766.5884644327366, + "memoryUsed": 653224 }, { "operation": "token-count-medium", - "avgDuration": 1.3377186000000005, - "minDuration": 0.8123, - "maxDuration": 18.5582, - "p50": 1.0083, - "p90": 1.5469, - "p95": 3.0017, - "p99": 9.8616, - "throughput": 747.5413737986446, - "memoryUsed": 235800 + "avgDuration": 1.9391590000000005, + "minDuration": 0.7915, + "maxDuration": 8.2937, + "p50": 1.1356, + "p90": 4.2806, + "p95": 5.0478, + "p99": 6.6412, + "throughput": 515.6874707025054, + "memoryUsed": 227808 }, { "operation": "token-count-large", - "avgDuration": 21.130661, - "minDuration": 15.5, - "maxDuration": 61.1893, - "p50": 19.1065, - "p90": 27.2373, - "p95": 29.4197, - "p99": 61.1893, - "throughput": 47.32459623482673, - "memoryUsed": 123872 + "avgDuration": 30.857169999999996, + "minDuration": 16.9689, + "maxDuration": 62.7117, + "p50": 28.8884, + "p90": 44.6519, + "p95": 52.7573, + "p99": 62.7117, + "throughput": 32.40737890091671, + "memoryUsed": 163384 }, { "operation": "token-count-batch", - "avgDuration": 5.449602000000001, - "minDuration": 3.8459, - "maxDuration": 13.5479, - "p50": 4.8888, - "p90": 7.3654, - "p95": 9.2773, - "p99": 13.5479, - "throughput": 183.49963905620996, - "memoryUsed": 418480 + "avgDuration": 8.504128, + "minDuration": 3.7843, + "maxDuration": 25.0622, + "p50": 6.9115, + "p90": 14.5447, + "p95": 17.9337, + "p99": 25.0622, + "throughput": 117.58995160938312, + "memoryUsed": 418424 }, { "operation": "token-estimate", - "avgDuration": 0.0012522999999999961, - "minDuration": 0.001, - "maxDuration": 0.0564, - "p50": 0.0012, - "p90": 0.0013, - "p95": 0.0014, - "p99": 0.0018, - "throughput": 798530.7035055521, - "memoryUsed": 149920 + "avgDuration": 0.0014780999999999996, + "minDuration": 0.0011, + "maxDuration": 0.0561, + "p50": 0.0014, + "p90": 0.0016, + "p95": 0.0017, + "p99": 0.0021, + "throughput": 676544.2121642651, + "memoryUsed": 149912 }, { "operation": "compress-small", - "avgDuration": 0.9627775000000005, - "minDuration": 0.426, - "maxDuration": 28.6613, - "p50": 0.5542, - "p90": 0.8703, - "p95": 1.9429, - "p99": 13.3281, - "throughput": 1038.661580687126, - "memoryUsed": -2487288 + "avgDuration": 1.2691879999999993, + "minDuration": 0.4314, + "maxDuration": 24.3388, + "p50": 0.5745, + "p90": 3.0667, + "p95": 4.4366, + "p99": 9.2609, + "throughput": 787.9053379010835, + "memoryUsed": -4992648 }, { "operation": "compress-medium", - "avgDuration": 0.7440725999999998, - "minDuration": 0.4942, - "maxDuration": 14.3456, - "p50": 0.6042, - "p90": 0.8371, - "p95": 1.1362, - "p99": 4.4296, - "throughput": 1343.9548775213605, - "memoryUsed": -1654048 + "avgDuration": 1.5216334000000002, + "minDuration": 0.5063, + "maxDuration": 13.7982, + "p50": 0.6649, + "p90": 3.5498, + "p95": 6.0332, + "p99": 9.4093, + "throughput": 657.1885186011294, + "memoryUsed": -1948624 }, { "operation": "compress-large", - "avgDuration": 2.7067430000000003, - "minDuration": 1.9602, - "maxDuration": 12.7897, - "p50": 2.2941, - "p90": 3.2423, - "p95": 4.5883, - "p99": 12.7897, - "throughput": 369.4477089254503, - "memoryUsed": 337712 + "avgDuration": 4.812509, + "minDuration": 2.2623, + "maxDuration": 13.9499, + "p50": 4.4611, + "p90": 7.8466, + "p95": 9.0442, + "p99": 13.9499, + "throughput": 207.79181919451992, + "memoryUsed": 359296 }, { "operation": "decompress", - "avgDuration": 0.031263400000000004, - "minDuration": 0.018, - "maxDuration": 2.1778, - "p50": 0.0243, - "p90": 0.0363, - "p95": 0.0642, - "p99": 0.0877, - "throughput": 31986.28428130018, - "memoryUsed": 3302040 + "avgDuration": 0.05447509999999999, + "minDuration": 0.0181, + "maxDuration": 3.3958, + "p50": 0.0238, + "p90": 0.0394, + "p95": 0.0754, + "p99": 2.0722, + "throughput": 18357.01081778648, + "memoryUsed": 3294192 }, { "operation": "compress-base64", - "avgDuration": 0.9150339999999996, - "minDuration": 0.5028, - "maxDuration": 16.1137, - "p50": 0.6502, - "p90": 0.9426, - "p95": 1.6786, - "p99": 9.5861, - "throughput": 1092.855566022684, - "memoryUsed": -4893280 + "avgDuration": 1.3982006000000007, + "minDuration": 0.5093, + "maxDuration": 17.5377, + "p50": 0.6728, + "p90": 3.5295, + "p95": 4.8434, + "p99": 9.593, + "throughput": 715.2049569997321, + "memoryUsed": -4899848 }, { "operation": "compress-quality-1", - "avgDuration": 0.0402535, - "minDuration": 0.0182, - "maxDuration": 1.2449, - "p50": 0.025, - "p90": 0.0459, - "p95": 0.0879, - "p99": 0.3411, - "throughput": 24842.56027426187, - "memoryUsed": 568648 + "avgDuration": 0.08870700000000004, + "minDuration": 0.0176, + "maxDuration": 4.9948, + "p50": 0.0227, + "p90": 0.0636, + "p95": 0.0849, + "p99": 3.4525, + "throughput": 11273.06751440134, + "memoryUsed": 575952 }, { "operation": "compress-quality-11", - "avgDuration": 1.4276415, - "minDuration": 0.7075, - "maxDuration": 19.9989, - "p50": 0.807, - "p90": 1.2711, - "p95": 6.5825, - "p99": 18.9334, - "throughput": 700.4559618083391, - "memoryUsed": 560544 + "avgDuration": 1.7096004999999996, + "minDuration": 0.6727, + "maxDuration": 8.1428, + "p50": 0.9267, + "p90": 3.7587, + "p95": 5.583, + "p99": 7.3277, + "throughput": 584.9319767980883, + "memoryUsed": 566880 }, { "operation": "cache-write", - "avgDuration": 0.28219179999999994, - "minDuration": 0.1069, - "maxDuration": 16.9523, - "p50": 0.1543, - "p90": 0.2669, - "p95": 0.3756, - "p99": 2.9096, - "throughput": 3543.689079555112, - "memoryUsed": 646560 + "avgDuration": 0.6586279000000003, + "minDuration": 0.1024, + "maxDuration": 14.5587, + "p50": 0.1522, + "p90": 1.0068, + "p95": 4.7546, + "p99": 7.3433, + "throughput": 1518.3079854345672, + "memoryUsed": 640856 }, { "operation": "cache-read-memory", - "avgDuration": 0.25163070000000015, - "minDuration": 0.0793, - "maxDuration": 28.8506, - "p50": 0.1203, - "p90": 0.1493, - "p95": 0.2349, - "p99": 4.1812, - "throughput": 3974.077884773199, - "memoryUsed": 478016 + "avgDuration": 0.4997243, + "minDuration": 0.079, + "maxDuration": 15.3762, + "p50": 0.1012, + "p90": 0.2858, + "p95": 4.201, + "p99": 8.698, + "throughput": 2001.1034084194023, + "memoryUsed": 478008 }, { "operation": "cache-read-disk", - "avgDuration": 0.45986899999999997, - "minDuration": 0.0884, - "maxDuration": 39.4532, - "p50": 0.1262, - "p90": 0.1936, - "p95": 0.2933, - "p99": 19.9866, - "throughput": 2174.532312462897, - "memoryUsed": 297264 + "avgDuration": 0.47186479999999986, + "minDuration": 0.0742, + "maxDuration": 12.5767, + "p50": 0.1039, + "p90": 0.2734, + "p95": 4.0729, + "p99": 8.0254, + "throughput": 2119.2511075206294, + "memoryUsed": 297344 }, { "operation": "cache-delete", - "avgDuration": 0.5386263000000001, - "minDuration": 0.0733, - "maxDuration": 99.0395, - "p50": 0.1153, - "p90": 0.1729, - "p95": 0.2314, - "p99": 5.5755, - "throughput": 1856.5747717851873, - "memoryUsed": 444360 + "avgDuration": 0.37161569999999977, + "minDuration": 0.0764, + "maxDuration": 18.4903, + "p50": 0.1064, + "p90": 0.2381, + "p95": 1.8399, + "p99": 5.8565, + "throughput": 2690.951970005575, + "memoryUsed": 442720 }, { "operation": "cache-stats", - "avgDuration": 0.32004450000000007, - "minDuration": 0.1652, - "maxDuration": 48.5593, - "p50": 0.2046, - "p90": 0.2563, - "p95": 0.3068, - "p99": 2.1103, - "throughput": 3124.5654901115304, - "memoryUsed": 765656 + "avgDuration": 0.6591104, + "minDuration": 0.1651, + "maxDuration": 10.2148, + "p50": 0.197, + "p90": 1.7847, + "p95": 4.4389, + "p99": 7.1797, + "throughput": 1517.1965121472822, + "memoryUsed": 770976 }, { "operation": "metrics-record", - "avgDuration": 0.0027574999999999657, - "minDuration": 0.0019, - "maxDuration": 0.1329, - "p50": 0.0025, - "p90": 0.003, - "p95": 0.0033, - "p99": 0.0056, - "throughput": 362647.32547597913, - "memoryUsed": 459872 + "avgDuration": 0.01021149999999998, + "minDuration": 0.0017, + "maxDuration": 4.0691, + "p50": 0.0022, + "p90": 0.0028, + "p95": 0.003, + "p99": 0.0075, + "throughput": 97928.80575821397, + "memoryUsed": 459840 }, { "operation": "metrics-cache-stats", - "avgDuration": 0.5644068000000001, - "minDuration": 0.0897, - "maxDuration": 113.9726, - "p50": 0.1075, - "p90": 0.1357, - "p95": 0.1805, - "p99": 18.041, - "throughput": 1771.7717079241424, - "memoryUsed": -5707968 + "avgDuration": 0.5884749999999999, + "minDuration": 0.0814, + "maxDuration": 101.3263, + "p50": 0.0959, + "p90": 0.146, + "p95": 2.1334, + "p99": 8.5735, + "throughput": 1699.3075321806366, + "memoryUsed": -5767600 }, { "operation": "metrics-breakdown", - "avgDuration": 2.7876345, - "minDuration": 0.6644, - "maxDuration": 53.726, - "p50": 0.8498, - "p90": 2.1808, - "p95": 24.6884, - "p99": 40.5231, - "throughput": 358.7270856347918, - "memoryUsed": 4166392 + "avgDuration": 3.030816000000001, + "minDuration": 0.6429, + "maxDuration": 14.331, + "p50": 0.9025, + "p90": 8.4685, + "p95": 10.3432, + "p99": 12.5062, + "throughput": 329.94414705478647, + "memoryUsed": 3432896 }, { "operation": "metrics-percentiles", - "avgDuration": 0.19757700000000003, - "minDuration": 0.0665, - "maxDuration": 22.145, - "p50": 0.0782, - "p90": 0.1113, - "p95": 0.1406, - "p99": 0.3571, - "throughput": 5061.3178659459345, - "memoryUsed": 6890256 + "avgDuration": 0.2575195, + "minDuration": 0.0656, + "maxDuration": 5.1485, + "p50": 0.0749, + "p90": 0.1152, + "p95": 0.2302, + "p99": 4.9353, + "throughput": 3883.201077976619, + "memoryUsed": 6877416 }, { "operation": "e2e-optimization", - "avgDuration": 4.089156000000001, - "minDuration": 1.7855, - "maxDuration": 36.6479, - "p50": 2.4161, - "p90": 7.4475, - "p95": 13.5108, - "p99": 36.6479, - "throughput": 244.54924194625977, - "memoryUsed": 843760 + "avgDuration": 10.268875999999999, + "minDuration": 2.0887, + "maxDuration": 20.2826, + "p50": 9.5857, + "p90": 16.0762, + "p95": 18.8724, + "p99": 20.2826, + "throughput": 97.38164137925126, + "memoryUsed": 848648 }, { "operation": "e2e-cache-hit", - "avgDuration": 0.5933224999999999, - "minDuration": 0.1019, - "maxDuration": 55.8155, - "p50": 0.1439, - "p90": 0.2178, - "p95": 0.3251, - "p99": 21.812, - "throughput": 1685.424031618555, - "memoryUsed": -14126360 + "avgDuration": 0.5115652000000002, + "minDuration": 0.0905, + "maxDuration": 18.1006, + "p50": 0.1224, + "p90": 0.3575, + "p95": 3.4573, + "p99": 8.1231, + "throughput": 1954.785040108279, + "memoryUsed": 2419984 }, { "operation": "regression-token-count", - "avgDuration": 1.0176798, - "minDuration": 0.4497, - "maxDuration": 19.4484, - "p50": 0.5622, - "p90": 0.7359, - "p95": 3.6875, - "p99": 15.1693, - "throughput": 982.6273450647246, - "memoryUsed": 263416 + "avgDuration": 1.9268184000000008, + "minDuration": 0.455, + "maxDuration": 17.2354, + "p50": 0.6255, + "p90": 5.6609, + "p95": 6.9407, + "p99": 12.8987, + "throughput": 518.9902691400496, + "memoryUsed": 263464 }, { "operation": "regression-compress", - "avgDuration": 1.1226146666666665, - "minDuration": 0.7638, - "maxDuration": 28.5129, - "p50": 0.8377, - "p90": 1.0199, - "p95": 1.281, - "p99": 10.5418, - "throughput": 890.7776013378293, - "memoryUsed": -5355728 + "avgDuration": 5.232439333333333, + "minDuration": 0.7827, + "maxDuration": 61.2683, + "p50": 2.3238, + "p90": 12.9448, + "p95": 14.6136, + "p99": 17.48, + "throughput": 191.11545042280474, + "memoryUsed": -4833328 }, { "operation": "regression-cache", - "avgDuration": 0.4148695999999999, - "minDuration": 0.1768, - "maxDuration": 17.4546, - "p50": 0.2465, - "p90": 0.3658, - "p95": 0.5651, - "p99": 5.4624, - "throughput": 2410.395941278899, - "memoryUsed": -447896 + "avgDuration": 2.2698532000000005, + "minDuration": 0.1812, + "maxDuration": 44.9064, + "p50": 0.2844, + "p90": 10.6146, + "p95": 13.1184, + "p99": 19.7883, + "throughput": 440.5571250158379, + "memoryUsed": -467608 } ] \ No newline at end of file diff --git a/tests/unit/cache-engine.test.ts b/tests/unit/cache-engine.test.ts index 8a99374..f8cabc2 100644 --- a/tests/unit/cache-engine.test.ts +++ b/tests/unit/cache-engine.test.ts @@ -48,7 +48,7 @@ describe('CacheEngine', () => { cache = new CacheEngine(testDbPath, 100); }); - afterEach(() => { + afterEach(async () => { // Restore original environment variable if (originalEnv !== undefined) { process.env.TOKEN_OPTIMIZER_CACHE_DIR = originalEnv; @@ -58,6 +58,7 @@ describe('CacheEngine', () => { // Clean up cache.close(); + await new Promise(resolve => setTimeout(resolve, 100)); // Add a small delay if (fs.existsSync(testDbPath)) { fs.unlinkSync(testDbPath); } From 90d946e99ffcb8469c31906777c4995b88ae885b Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:22:08 -0400 Subject: [PATCH 09/26] fix: align optimization-storage with project patterns (better-sqlite3, mcp tool shape) The original implementation of the optimization-storage feature imported sqlite3/sqlite (not installed) and used a TurnContext tool shape that does not exist in this codebase, breaking `tsc --noEmit`. This commit: - Rewrites SqliteOptimizationStorage to use better-sqlite3 (already a production dep) with WAL mode and a hash index. - Rewrites OptimizationStorageTool to match the project's run(options) MCP tool pattern and exports an input schema with items-complete array fields. - Fixes compression-engine.ts:compressToBase64 return type (was `CompressionResult & { compressed: string }`, producing Buffer & string for the overridden property). - Swaps the server's smart_grep case from the non-existent smartGrep instance to the runSmartGrep CLI function. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/analytics/optimization-storage.ts | 81 ++++++++----- src/core/compression-engine.ts | 7 +- src/server/index.ts | 4 +- src/tools/optimization-storage-tool.ts | 158 ++++++++++++++++++------- 4 files changed, 171 insertions(+), 79 deletions(-) diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts index 6952ce6..cd6991f 100644 --- a/src/analytics/optimization-storage.ts +++ b/src/analytics/optimization-storage.ts @@ -1,6 +1,5 @@ -import sqlite3 from 'sqlite3'; -import { open, Database } from 'sqlite'; -import { CompressionEngine } from '../core/compression-engine'; +import Database from 'better-sqlite3'; +import { CompressionEngine } from '../core/compression-engine.js'; export interface OptimizationResult { originalTextHash: string; @@ -11,22 +10,19 @@ export interface OptimizationResult { } export class SqliteOptimizationStorage { - private db: Database; - private dbPath: string; - private compressionEngine: CompressionEngine; + private db: Database.Database | null = null; + private readonly dbPath: string; + private readonly compressionEngine: CompressionEngine; constructor(dbPath: string = './optimization.db') { this.dbPath = dbPath; this.compressionEngine = new CompressionEngine(); } - public async initializeDatabase(): Promise { - this.db = await open({ - filename: this.dbPath, - driver: sqlite3.Database - }); - - await this.db.exec(` + public initializeDatabase(): void { + this.db = new Database(this.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.exec(` CREATE TABLE IF NOT EXISTS optimization_results ( id INTEGER PRIMARY KEY AUTOINCREMENT, original_text_hash TEXT NOT NULL UNIQUE, @@ -37,43 +33,68 @@ export class SqliteOptimizationStorage { tokens_saved INTEGER NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP ); + CREATE INDEX IF NOT EXISTS idx_optimization_hash + ON optimization_results(original_text_hash); `); } - public async save(entry: OptimizationResult): Promise { - const compressedOptimizedText = this.compressionEngine.compress(entry.optimizedText); + private requireDb(): Database.Database { + if (!this.db) { + throw new Error('Optimization storage database is not initialized. Call initializeDatabase() first.'); + } + return this.db; + } + + public save(entry: OptimizationResult): void { + const db = this.requireDb(); + const compressed = this.compressionEngine.compress(entry.optimizedText); - await this.db.run( - `INSERT INTO optimization_results (original_text_hash, optimized_text_compressed, compression_algorithm, original_tokens, optimized_tokens, tokens_saved) - VALUES (?, ?, ?, ?, ?, ?)`, - [entry.originalTextHash, compressedOptimizedText.compressed, 'brotli', entry.originalTokens, entry.optimizedTokens, entry.tokensSaved] + db.prepare( + `INSERT OR REPLACE INTO optimization_results + (original_text_hash, optimized_text_compressed, compression_algorithm, + original_tokens, optimized_tokens, tokens_saved) + VALUES (?, ?, ?, ?, ?, ?)` + ).run( + entry.originalTextHash, + compressed.compressed, + 'brotli', + entry.originalTokens, + entry.optimizedTokens, + entry.tokensSaved ); } - public async get(originalTextHash: string): Promise { - const row = await this.db.get( - 'SELECT optimized_text_compressed, original_tokens, optimized_tokens, tokens_saved FROM optimization_results WHERE original_text_hash = ?', - originalTextHash - ); + public get(originalTextHash: string): OptimizationResult | null { + const db = this.requireDb(); + const row = db.prepare( + `SELECT optimized_text_compressed, original_tokens, optimized_tokens, tokens_saved + FROM optimization_results WHERE original_text_hash = ?` + ).get(originalTextHash) as + | { + optimized_text_compressed: Buffer; + original_tokens: number; + optimized_tokens: number; + tokens_saved: number; + } + | undefined; if (!row) { return null; } - const optimizedText = this.compressionEngine.decompress(row.optimized_text_compressed); - return { originalTextHash, - optimizedText, + optimizedText: this.compressionEngine.decompress(row.optimized_text_compressed), originalTokens: row.original_tokens, optimizedTokens: row.optimized_tokens, - tokensSaved: row.tokens_saved + tokensSaved: row.tokens_saved, }; } - public async close(): Promise { + public close(): void { if (this.db) { - await this.db.close(); + this.db.close(); + this.db = null; } } } diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts index e7922cf..c9e7d17 100644 --- a/src/core/compression-engine.ts +++ b/src/core/compression-engine.ts @@ -47,10 +47,13 @@ export class CompressionEngine { return brotliDecompressSync(buffer).toString('utf8'); } - public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): CompressionResult & { compressed: string } { + public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): Omit & { compressed: string } { const result = this.compress(text, options); return { - ...result, + originalSize: result.originalSize, + compressedSize: result.compressedSize, + ratio: result.ratio, + percentSaved: result.percentSaved, compressed: result.compressed.toString('base64'), }; } diff --git a/src/server/index.ts b/src/server/index.ts index 8c09500..047e8eb 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -2020,7 +2020,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_grep': { const { pattern, ...options } = args as any; - const result = await smartGrep.run(pattern, options); + const result = await runSmartGrep(pattern, options); return { content: [ { @@ -2032,7 +2032,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } case 'optimization_storage': { - const result = await optimizationStorage.invoke({} as TurnContext, { tool_name: name, tool_input: args }); + const result = optimizationStorage.run(args as any); return { content: [ { diff --git a/src/tools/optimization-storage-tool.ts b/src/tools/optimization-storage-tool.ts index 9949d1e..5f9fe9e 100644 --- a/src/tools/optimization-storage-tool.ts +++ b/src/tools/optimization-storage-tool.ts @@ -1,71 +1,139 @@ -import { Tool, ToolInvocation, TurnContext } from '../types/turn-context'; -import { SqliteOptimizationStorage, OptimizationResult } from '../analytics/optimization-storage'; +import { SqliteOptimizationStorage, OptimizationResult } from '../analytics/optimization-storage.js'; -export class OptimizationStorageTool implements Tool { +export type OptimizationStorageOperation = 'store' | 'retrieve'; + +export interface OptimizationStorageOptions { + operation: OptimizationStorageOperation; + originalTextHash?: string; + optimizedText?: string; + originalTokens?: number; + optimizedTokens?: number; + tokensSaved?: number; +} + +export interface OptimizationStorageResponse { + success: boolean; + error?: string; + result?: OptimizationResult; +} + +export class OptimizationStorageTool { public readonly name = 'optimization_storage'; - public readonly description = 'A tool for storing and retrieving compressed optimization results.'; + public readonly description = + 'Persist and retrieve brotli-compressed optimization results keyed by text hash.'; - private storage: SqliteOptimizationStorage; + private readonly storage: SqliteOptimizationStorage; - constructor() { - this.storage = new SqliteOptimizationStorage(); + constructor(storage?: SqliteOptimizationStorage) { + this.storage = storage ?? new SqliteOptimizationStorage(); this.storage.initializeDatabase(); } - public async invoke(context: TurnContext, invocation: ToolInvocation): Promise { - const operation = invocation.arguments?.operation; - - if (operation === 'store') { - return this.store(invocation.arguments); - } else if (operation === 'retrieve') { - return this.retrieve(invocation.arguments); - } else { - return { error: `Unknown operation: ${operation}` }; + public run(options: OptimizationStorageOptions): OptimizationStorageResponse { + switch (options.operation) { + case 'store': + return this.store(options); + case 'retrieve': + return this.retrieve(options); + default: + return { + success: false, + error: `Unknown operation: ${String((options as { operation: unknown }).operation)}`, + }; } } - private async store(args: any): Promise { - try { - const { originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved } = args; - if (!originalTextHash || !optimizedText || originalTokens === undefined || optimizedTokens === undefined || tokensSaved === undefined) { - return { error: 'Missing required arguments for store operation.' }; - } + private store(options: OptimizationStorageOptions): OptimizationStorageResponse { + const { originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved } = options; + + if ( + !originalTextHash || + !optimizedText || + originalTokens === undefined || + optimizedTokens === undefined || + tokensSaved === undefined + ) { + return { + success: false, + error: 'Missing required arguments for store operation: originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved.', + }; + } - const optimizationResult: OptimizationResult = { + try { + this.storage.save({ originalTextHash, - optimizedText: Buffer.from(optimizedText, 'base64').toString('utf8'), + optimizedText, originalTokens, optimizedTokens, - tokensSaved - }; - - await this.storage.save(optimizationResult); + tokensSaved, + }); return { success: true }; } catch (error) { - return { error: `Failed to store optimization result: ${error.message}` }; + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Failed to store optimization result: ${message}` }; } } - private async retrieve(args: any): Promise { - try { - const { originalTextHash } = args; - if (!originalTextHash) { - return { error: 'Missing required argument for retrieve operation: originalTextHash' }; - } + private retrieve(options: OptimizationStorageOptions): OptimizationStorageResponse { + const { originalTextHash } = options; - const result = await this.storage.get(originalTextHash); + if (!originalTextHash) { + return { + success: false, + error: 'Missing required argument for retrieve operation: originalTextHash.', + }; + } - if (result) { - return { - success: true, - ...result, - optimizedText: Buffer.from(result.optimizedText, 'utf8').toString('base64') - }; - } else { - return { success: false, message: 'Not found' }; + try { + const result = this.storage.get(originalTextHash); + if (!result) { + return { success: false, error: 'Not found' }; } + return { success: true, result }; } catch (error) { - return { error: `Failed to retrieve optimization result: ${error.message}` }; + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Failed to retrieve optimization result: ${message}` }; } } + + public close(): void { + this.storage.close(); + } } + +export const OPTIMIZATION_STORAGE_TOOL_DEFINITION = { + name: 'optimization_storage', + description: + 'Persist and retrieve brotli-compressed optimization results keyed by text hash. Operations: store, retrieve.', + inputSchema: { + type: 'object', + properties: { + operation: { + type: 'string', + enum: ['store', 'retrieve'], + description: 'The storage operation to perform', + }, + originalTextHash: { + type: 'string', + description: 'Stable hash of the original uncompressed text (required for both operations)', + }, + optimizedText: { + type: 'string', + description: 'The optimized text to store (required for store)', + }, + originalTokens: { + type: 'number', + description: 'Token count of the original text (required for store)', + }, + optimizedTokens: { + type: 'number', + description: 'Token count after optimization (required for store)', + }, + tokensSaved: { + type: 'number', + description: 'Tokens saved by optimization (required for store)', + }, + }, + required: ['operation'], + }, +}; From 1dba76f9246489f2c12d392ba08889622cc47ae6 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:22:44 -0400 Subject: [PATCH 10/26] feat(utils): add generic lrucache with ttl and stats (#125) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In-memory LRU cache for hot paths (token counts, file search results, MCP correction responses). Separate from CacheEngine, which is the persistent SQLite cache — LruCache is intended for process-local memoization with O(1) eviction via Map insertion order. - maxSize-bounded with LRU eviction - Optional per-entry TTL with automatic lazy expiration - prune() for proactive periodic cleanup - stats() exposes hits / misses / evictions / expired / hitRate Refs #125 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/utils/lru-cache.ts | 134 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 src/utils/lru-cache.ts diff --git a/src/utils/lru-cache.ts b/src/utils/lru-cache.ts new file mode 100644 index 0000000..65889f8 --- /dev/null +++ b/src/utils/lru-cache.ts @@ -0,0 +1,134 @@ +/** + * Generic LRU cache with optional per-entry TTL — addresses issue #125. + * + * Unlike CacheEngine (token-aware, persistent SQLite cache), this is an + * in-memory LRU intended for hot paths: file-search results, token counts, + * MCP correction responses, etc. Eviction is O(1) via Map insertion order. + */ + +export interface LruCacheStats { + size: number; + maxSize: number; + hits: number; + misses: number; + evictions: number; + expired: number; + hitRate: number; +} + +interface LruCacheEntry { + value: V; + expiresAt: number; +} + +export class LruCache { + private readonly cache = new Map>(); + private readonly maxSize: number; + private readonly defaultTtlMs: number; + private hits = 0; + private misses = 0; + private evictions = 0; + private expired = 0; + + constructor(maxSize: number, defaultTtlMs: number = 0) { + if (maxSize <= 0) { + throw new Error(`LruCache maxSize must be > 0, got ${maxSize}`); + } + this.maxSize = maxSize; + this.defaultTtlMs = defaultTtlMs; + } + + public get(key: K): V | undefined { + const entry = this.cache.get(key); + if (!entry) { + this.misses++; + return undefined; + } + + if (entry.expiresAt !== 0 && Date.now() > entry.expiresAt) { + this.cache.delete(key); + this.expired++; + this.misses++; + return undefined; + } + + // Refresh recency: remove + re-insert moves to the tail. + this.cache.delete(key); + this.cache.set(key, entry); + this.hits++; + return entry.value; + } + + public set(key: K, value: V, ttlMs?: number): void { + if (this.cache.has(key)) { + this.cache.delete(key); + } else if (this.cache.size >= this.maxSize) { + const oldestKey = this.cache.keys().next().value as K | undefined; + if (oldestKey !== undefined) { + this.cache.delete(oldestKey); + this.evictions++; + } + } + + const effectiveTtl = ttlMs ?? this.defaultTtlMs; + this.cache.set(key, { + value, + expiresAt: effectiveTtl > 0 ? Date.now() + effectiveTtl : 0, + }); + } + + public has(key: K): boolean { + const entry = this.cache.get(key); + if (!entry) { + return false; + } + if (entry.expiresAt !== 0 && Date.now() > entry.expiresAt) { + this.cache.delete(key); + this.expired++; + return false; + } + return true; + } + + public delete(key: K): boolean { + return this.cache.delete(key); + } + + public clear(): void { + this.cache.clear(); + } + + public get size(): number { + return this.cache.size; + } + + /** Remove all entries whose TTL has expired. Returns the count removed. */ + public prune(): number { + if (this.defaultTtlMs === 0) { + return 0; + } + const now = Date.now(); + let removed = 0; + for (const [key, entry] of this.cache) { + if (entry.expiresAt !== 0 && now > entry.expiresAt) { + this.cache.delete(key); + removed++; + } + } + this.expired += removed; + return removed; + } + + public stats(): LruCacheStats { + const total = this.hits + this.misses; + return { + size: this.cache.size, + maxSize: this.maxSize, + hits: this.hits, + misses: this.misses, + evictions: this.evictions, + expired: this.expired, + hitRate: total === 0 ? 0 : this.hits / total, + }; + } +} From c3b6e3faee7b0d9d40e5fcd86e943ed8c388e82c Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:23:55 -0400 Subject: [PATCH 11/26] feat(tokenizers): add pluggable tokenizer framework (#124) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the long-standing character/4 heuristic and adds vendor-neutral tokenization: - ITokenizer interface — async countTokens + free() lifecycle. - TiktokenTokenizer — wraps the tiktoken encoder; handles Claude family by mapping to the gpt-4 encoder (closest publicly available). - HeuristicTokenizer — content-aware local fallback (code 2.5, json 2.8, markdown 3.5, text 4.0 chars/token), auto-detected via cheap regex + try-parse-json. - TokenizerFactory.create(modelName) picks the best backend; createFromEnv reads CLAUDE_MODEL / ANTHROPIC_MODEL / OPENAI_MODEL / TOKEN_OPTIMIZER_MODEL. Both tokenizer implementations memoize counts with the generic LruCache (#125), so repeated inputs do not re-tokenize. Refs #124, #123 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/tokenizers/heuristic-tokenizer.ts | 78 ++++++++++++++++++++++ src/core/tokenizers/i-tokenizer.ts | 19 ++++++ src/core/tokenizers/tiktoken-tokenizer.ts | 69 +++++++++++++++++++ src/core/tokenizers/tokenizer-factory.ts | 33 +++++++++ 4 files changed, 199 insertions(+) create mode 100644 src/core/tokenizers/heuristic-tokenizer.ts create mode 100644 src/core/tokenizers/i-tokenizer.ts create mode 100644 src/core/tokenizers/tiktoken-tokenizer.ts create mode 100644 src/core/tokenizers/tokenizer-factory.ts diff --git a/src/core/tokenizers/heuristic-tokenizer.ts b/src/core/tokenizers/heuristic-tokenizer.ts new file mode 100644 index 0000000..ef81931 --- /dev/null +++ b/src/core/tokenizers/heuristic-tokenizer.ts @@ -0,0 +1,78 @@ +import { ITokenizer } from './i-tokenizer.js'; +import { LruCache } from '../../utils/lru-cache.js'; + +const DEFAULT_CACHE_SIZE = 500; +const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; + +export enum ContentType { + Code = 'code', + Json = 'json', + Markdown = 'markdown', + Text = 'text', +} + +/** + * Content-aware character-to-token ratios derived from tiktoken encoding + * on typical samples: + * + * | Content | chars/token | + * | --------- | ----------- | + * | code | 2.5 | + * | json | 2.8 | + * | markdown | 3.5 | + * | text | 4.0 | + */ +const CHARS_PER_TOKEN: Readonly> = { + [ContentType.Code]: 2.5, + [ContentType.Json]: 2.8, + [ContentType.Markdown]: 3.5, + [ContentType.Text]: 4.0, +}; + +const CODE_PATTERN = /\b(function|class|const|import|export|return|await|=>)\b/; +const JSON_PATTERN = /^[\s\n]*[{[]/; +const MARKDOWN_PATTERN = /^#{1,6}\s|^\s*[-*+]\s|\[[^\]]+\]\([^)]+\)/m; + +export class HeuristicTokenizer implements ITokenizer { + public readonly modelName: string; + private readonly cache: LruCache; + + constructor(modelName: string = 'heuristic', cache?: LruCache) { + this.modelName = modelName; + this.cache = cache ?? new LruCache(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS); + } + + public async countTokens(text: string): Promise { + const cached = this.cache.get(text); + if (cached !== undefined) { + return cached; + } + const contentType = HeuristicTokenizer.detectContentType(text); + const ratio = CHARS_PER_TOKEN[contentType]; + const count = Math.ceil(text.length / ratio); + this.cache.set(text, count); + return count; + } + + public free(): void { + // No native resources to free. + } + + public static detectContentType(text: string): ContentType { + if (JSON_PATTERN.test(text)) { + try { + JSON.parse(text); + return ContentType.Json; + } catch { + // Not actually JSON; fall through to other detection. + } + } + if (CODE_PATTERN.test(text)) { + return ContentType.Code; + } + if (MARKDOWN_PATTERN.test(text)) { + return ContentType.Markdown; + } + return ContentType.Text; + } +} diff --git a/src/core/tokenizers/i-tokenizer.ts b/src/core/tokenizers/i-tokenizer.ts new file mode 100644 index 0000000..57f23fd --- /dev/null +++ b/src/core/tokenizers/i-tokenizer.ts @@ -0,0 +1,19 @@ +/** + * Pluggable tokenizer interface — addresses issue #124. + * + * Implementations: + * - TiktokenTokenizer: uses the local tiktoken library (GPT-4 / GPT-3.5-turbo). + * - HeuristicTokenizer: content-aware local fallback for unknown models. + * + * The factory picks an implementation based on model name. All implementations + * memoize counts via an injected LruCache so repeated inputs don't re-tokenize. + */ + +export interface ITokenizer { + readonly modelName: string; + + countTokens(text: string): Promise; + + /** Free any native resources. */ + free(): void; +} diff --git a/src/core/tokenizers/tiktoken-tokenizer.ts b/src/core/tokenizers/tiktoken-tokenizer.ts new file mode 100644 index 0000000..9b2d327 --- /dev/null +++ b/src/core/tokenizers/tiktoken-tokenizer.ts @@ -0,0 +1,69 @@ +import { encoding_for_model, Tiktoken, TiktokenModel } from 'tiktoken'; +import { ITokenizer } from './i-tokenizer.js'; +import { LruCache } from '../../utils/lru-cache.js'; + +const DEFAULT_CACHE_SIZE = 500; +const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; + +const SUPPORTED_TIKTOKEN_MODELS: readonly TiktokenModel[] = ['gpt-4', 'gpt-3.5-turbo']; + +export class TiktokenTokenizer implements ITokenizer { + public readonly modelName: string; + private readonly encoder: Tiktoken; + private readonly cache: LruCache; + + constructor(modelName: string, cache?: LruCache) { + this.modelName = modelName; + this.cache = cache ?? new LruCache(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS); + const tiktokenModel = TiktokenTokenizer.mapToTiktokenModel(modelName); + this.encoder = encoding_for_model(tiktokenModel); + } + + public async countTokens(text: string): Promise { + const cached = this.cache.get(text); + if (cached !== undefined) { + return cached; + } + const count = this.encoder.encode(text).length; + this.cache.set(text, count); + return count; + } + + public free(): void { + this.encoder.free(); + } + + public static supports(modelName: string): boolean { + const mapped = TiktokenTokenizer.tryMap(modelName); + return mapped !== null; + } + + public static mapToTiktokenModel(modelName: string): TiktokenModel { + const mapped = TiktokenTokenizer.tryMap(modelName); + if (mapped === null) { + // Default: GPT-4 tokenizer is the closest available for Claude/unknown models. + return 'gpt-4'; + } + return mapped; + } + + private static tryMap(modelName: string): TiktokenModel | null { + const lower = modelName.toLowerCase(); + if ( + lower.includes('claude') || + lower.includes('sonnet') || + lower.includes('opus') || + lower.includes('haiku') || + lower.includes('gpt-4') + ) { + return 'gpt-4'; + } + if (lower.includes('gpt-3.5') || lower.includes('gpt3.5')) { + return 'gpt-3.5-turbo'; + } + if (SUPPORTED_TIKTOKEN_MODELS.includes(lower as TiktokenModel)) { + return lower as TiktokenModel; + } + return null; + } +} diff --git a/src/core/tokenizers/tokenizer-factory.ts b/src/core/tokenizers/tokenizer-factory.ts new file mode 100644 index 0000000..ea4b360 --- /dev/null +++ b/src/core/tokenizers/tokenizer-factory.ts @@ -0,0 +1,33 @@ +import { ITokenizer } from './i-tokenizer.js'; +import { TiktokenTokenizer } from './tiktoken-tokenizer.js'; +import { HeuristicTokenizer } from './heuristic-tokenizer.js'; + +export class TokenizerFactory { + /** + * Create a tokenizer for the given model name. + * + * Resolution order: + * 1. Tiktoken for GPT-4 / GPT-3.5-turbo / Claude-family models. + * 2. HeuristicTokenizer as the content-aware fallback. + * + * Callers that already hold a tokenizer should prefer reusing it — + * construction allocates a tiktoken encoder (native resource). + */ + public static create(modelName: string): ITokenizer { + if (TiktokenTokenizer.supports(modelName)) { + return new TiktokenTokenizer(modelName); + } + return new HeuristicTokenizer(modelName); + } + + /** Create a tokenizer using the active model environment variables. */ + public static createFromEnv(): ITokenizer { + const modelName = + process.env.CLAUDE_MODEL || + process.env.ANTHROPIC_MODEL || + process.env.OPENAI_MODEL || + process.env.TOKEN_OPTIMIZER_MODEL || + 'gpt-4'; + return TokenizerFactory.create(modelName); + } +} From 35f888e12afaa2012dda56d1dc89d70db51cd76a Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:25:11 -0400 Subject: [PATCH 12/26] feat(config): add optimization settings with zod validation (#120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends HypercontextConfig with an optional `optimization` section mirroring Gemini CLI's settingsSchema: - compressionTokenThreshold — fraction of context to trigger compression - compressionPreserveThreshold — fraction to keep uncompressed at tail - minTokensBeforeCompression — lower bound for optimizer to engage - modelTokenLimits — per-model context window size - minOutputSizeBytes / quality — stored-entry gating ConfigManager now validates the user config file against a zod schema and falls back to DEFAULT_CONFIG with a descriptive warning instead of silently accepting malformed JSON. Adds getOptimizationConfig() and getModelTokenLimit(modelName) accessors. Refs #120 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/config.ts | 110 ++++++++++++++++++++++++++++++++++++++++++--- src/core/types.ts | 20 +++++++++ 2 files changed, 125 insertions(+), 5 deletions(-) diff --git a/src/core/config.ts b/src/core/config.ts index d5cd01c..57bee2a 100644 --- a/src/core/config.ts +++ b/src/core/config.ts @@ -2,11 +2,32 @@ * Configuration management for Hypercontext MCP */ -import { HypercontextConfig } from './types.js'; +import { z } from 'zod'; +import { HypercontextConfig, OptimizationConfig } from './types.js'; import { readFileSync, existsSync } from 'fs'; import { homedir } from 'os'; import { join } from 'path'; +const DEFAULT_OPTIMIZATION: OptimizationConfig = { + compressionTokenThreshold: 0.7, + compressionPreserveThreshold: 0.3, + minTokensBeforeCompression: 1000, + modelTokenLimits: { + 'gpt-4': 128000, + 'gpt-4-turbo': 128000, + 'gpt-3.5-turbo': 16385, + 'claude-3-opus': 200000, + 'claude-3-sonnet': 200000, + 'claude-3-haiku': 200000, + 'claude-opus-4-7': 1000000, + 'claude-sonnet-4-6': 1000000, + 'gemini-1.5-pro': 2000000, + 'gemini-2.5-flash': 1000000, + }, + minOutputSizeBytes: 500, + quality: 'balanced', +}; + const DEFAULT_CONFIG: HypercontextConfig = { cache: { enabled: true, @@ -38,8 +59,61 @@ const DEFAULT_CONFIG: HypercontextConfig = { streamingThreshold: 1024 * 1024, // 1MB enableStreaming: false, }, + optimization: DEFAULT_OPTIMIZATION, }; +const OptimizationConfigSchema = z.object({ + compressionTokenThreshold: z.number().min(0).max(1), + compressionPreserveThreshold: z.number().min(0).max(1), + minTokensBeforeCompression: z.number().int().nonnegative(), + modelTokenLimits: z.record(z.string(), z.number().int().positive()), + minOutputSizeBytes: z.number().int().nonnegative(), + quality: z.enum(['fast', 'balanced', 'max']), +}); + +const HypercontextConfigSchema = z + .object({ + cache: z + .object({ + enabled: z.boolean(), + maxSizeMB: z.number().int().positive(), + defaultTTL: z.number().int().nonnegative(), + ttlByType: z.record(z.string(), z.number().int().nonnegative()), + compression: z.enum(['none', 'gzip', 'brotli', 'auto']), + }) + .partial() + .optional(), + monitoring: z + .object({ + enabled: z.boolean(), + detailedLogging: z.boolean(), + metricsRetentionDays: z.number().int().nonnegative(), + dashboardPort: z.number().int().positive(), + enableWebUI: z.boolean(), + }) + .partial() + .optional(), + intelligence: z + .object({ + enablePatternDetection: z.boolean(), + enableWorkflowLearning: z.boolean(), + enablePredictiveCaching: z.boolean(), + mlModelPath: z.string(), + }) + .partial() + .optional(), + performance: z + .object({ + maxConcurrentOps: z.number().int().positive(), + streamingThreshold: z.number().int().positive(), + enableStreaming: z.boolean(), + }) + .partial() + .optional(), + optimization: OptimizationConfigSchema.partial().optional(), + }) + .passthrough(); + export class ConfigManager { private config: HypercontextConfig; private configPath: string; @@ -57,26 +131,52 @@ export class ConfigManager { try { const fileContent = readFileSync(this.configPath, 'utf-8'); - const userConfig = JSON.parse(fileContent); - return this.mergeConfig(DEFAULT_CONFIG, userConfig); + const rawUserConfig = JSON.parse(fileContent); + const parsed = HypercontextConfigSchema.safeParse(rawUserConfig); + if (!parsed.success) { + const issues = parsed.error.issues + .map((i) => ` - ${i.path.join('.') || 'root'}: ${i.message}`) + .join('\n'); + console.warn( + `Invalid config at ${this.configPath}, using defaults:\n${issues}` + ); + return DEFAULT_CONFIG; + } + return this.mergeConfig(DEFAULT_CONFIG, parsed.data); } catch (error) { - console.warn('Failed to load config, using defaults:', error); + const message = error instanceof Error ? error.message : String(error); + console.warn(`Failed to load config, using defaults: ${message}`); return DEFAULT_CONFIG; } } private mergeConfig( defaults: HypercontextConfig, - user: Partial + user: { + cache?: Partial; + monitoring?: Partial; + intelligence?: Partial; + performance?: Partial; + optimization?: Partial; + } ): HypercontextConfig { return { cache: { ...defaults.cache, ...user.cache }, monitoring: { ...defaults.monitoring, ...user.monitoring }, intelligence: { ...defaults.intelligence, ...user.intelligence }, performance: { ...defaults.performance, ...user.performance }, + optimization: { ...DEFAULT_OPTIMIZATION, ...(user.optimization ?? {}) }, }; } + public getOptimizationConfig(): OptimizationConfig { + return this.config.optimization ?? DEFAULT_OPTIMIZATION; + } + + public getModelTokenLimit(modelName: string): number | undefined { + return this.getOptimizationConfig().modelTokenLimits[modelName]; + } + get(): HypercontextConfig { return { ...this.config }; } diff --git a/src/core/types.ts b/src/core/types.ts index 9d36be2..aacb6a1 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -48,6 +48,26 @@ export interface HypercontextConfig { streamingThreshold: number; enableStreaming: boolean; }; + optimization?: OptimizationConfig; +} + +/** + * Configuration-driven compression thresholds — addresses issue #120. + * Mirrors the fields exposed by Gemini CLI's settingsSchema.ts. + */ +export interface OptimizationConfig { + /** Fraction of model context at which compression kicks in (0-1). */ + compressionTokenThreshold: number; + /** Fraction of chat history to keep uncompressed at the tail (0-1). */ + compressionPreserveThreshold: number; + /** Minimum token count before an optimizer considers compressing. */ + minTokensBeforeCompression: number; + /** Per-model total context window size, in tokens. */ + modelTokenLimits: Record; + /** Minimum output bytes before optimization emits a stored entry. */ + minOutputSizeBytes: number; + /** Compression quality preset. */ + quality: 'fast' | 'balanced' | 'max'; } export interface TokenMetrics { From 16305fca059a859093ed223e8ba14435b47bfd70 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:28:53 -0400 Subject: [PATCH 13/26] feat(session): add session + context-delta + chat compression (#121, #122) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the session-centric plumbing that the optimization plan has been sketched against for a while: - src/core/session.ts — Session class holding history + per-file state, with a token-aware compressHistory() that keeps a configurable tail fraction and summarizes the head via a pluggable ISummarizer. - src/core/summarization.ts — ISummarizer interface + a self-contained TruncatingSummarizer fallback so the module is usable without an LLM. - src/core/session-manager.ts — JSON-persisted singleton that auto- compresses a session's history when addMessage() pushes it past maxTokens. - src/utils/diff.ts — calculateDelta / applyDelta built on the existing `diff` dep (unified-diff, round-trippable). - src/tools/context-delta-tool.ts — new context_delta MCP tool with compute-delta / seed / clear operations and an items-complete input schema. - src/validation/tool-schemas.ts — add OptimizationStorageSchema and ContextDeltaSchema so the validator accepts both new tools. - src/server/index.ts — wire up SessionManager + ContextDeltaTool using TokenizerFactory.createFromEnv; persistence at ~/.token-optimizer/sessions.json. Remove the previously-duplicated inline OPTIMIZATION_STORAGE_TOOL_DEFINITION and import it from the tool module. Refs #121, #122 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/session-manager.ts | 153 ++++++++++++++++++++++++++++ src/core/session.ts | 171 ++++++++++++++++++++++++++++++++ src/core/summarization.ts | 50 ++++++++++ src/server/index.ts | 64 ++++++------ src/tools/context-delta-tool.ts | 155 +++++++++++++++++++++++++++++ src/utils/diff.ts | 39 ++++++++ src/validation/tool-schemas.ts | 20 ++++ 7 files changed, 616 insertions(+), 36 deletions(-) create mode 100644 src/core/session-manager.ts create mode 100644 src/core/session.ts create mode 100644 src/core/summarization.ts create mode 100644 src/tools/context-delta-tool.ts create mode 100644 src/utils/diff.ts diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts new file mode 100644 index 0000000..1cc8372 --- /dev/null +++ b/src/core/session-manager.ts @@ -0,0 +1,153 @@ +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import { dirname } from 'path'; +import { + Session, + SessionOptions, + SessionSnapshot, + MessageRole, +} from './session.js'; +import { ITokenizer } from './tokenizers/i-tokenizer.js'; +import { ISummarizer } from './summarization.js'; + +/** + * Singleton-style SessionManager — addresses issues #121 / #122. + * + * Persists all sessions to a single JSON file so they survive restarts. + * When a message is added we check whether the session has exceeded its + * token budget and, if so, auto-compress the history (#121). + */ + +export interface SessionManagerOptions { + persistencePath?: string; + tokenizer?: ITokenizer; + summarizer?: ISummarizer; + defaultMaxTokens?: number; +} + +interface PersistedState { + sessions: SessionSnapshot[]; +} + +export class SessionManager { + private readonly sessions = new Map(); + private readonly persistencePath: string | null; + private readonly tokenizer: ITokenizer | undefined; + private readonly summarizer: ISummarizer | undefined; + private readonly defaultMaxTokens: number | undefined; + + constructor(options: SessionManagerOptions = {}) { + this.persistencePath = options.persistencePath ?? null; + this.tokenizer = options.tokenizer; + this.summarizer = options.summarizer; + this.defaultMaxTokens = options.defaultMaxTokens; + if (this.persistencePath && existsSync(this.persistencePath)) { + this.load(); + } + } + + public createSession(options: SessionOptions = {}): Session { + const session = new Session({ + tokenizer: this.tokenizer, + summarizer: this.summarizer, + maxTokens: options.maxTokens ?? this.defaultMaxTokens, + ...options, + }); + this.sessions.set(session.id, session); + this.persist(); + return session; + } + + public getSession(id: string): Session | undefined { + return this.sessions.get(id); + } + + public listSessions(): Session[] { + return Array.from(this.sessions.values()); + } + + public deleteSession(id: string): boolean { + const removed = this.sessions.delete(id); + if (removed) { + this.persist(); + } + return removed; + } + + /** + * Add a message to the session and auto-compress the history if the + * token budget is exceeded (#121). + * + * Returns the post-add token count of the session. + */ + public async addMessage( + sessionId: string, + role: MessageRole, + content: string + ): Promise { + const session = this.sessions.get(sessionId); + if (!session) { + throw new Error(`Unknown session: ${sessionId}`); + } + session.addMessage(role, content); + const currentTokens = await session.getHistoryTokenCount(); + let finalTokens = currentTokens; + if (currentTokens > session.maxTokens) { + finalTokens = await session.compressHistory(); + } + this.persist(); + return finalTokens; + } + + public updateFileState( + sessionId: string, + filePath: string, + content: string + ): void { + const session = this.sessions.get(sessionId); + if (!session) { + throw new Error(`Unknown session: ${sessionId}`); + } + session.setFileContent(filePath, content); + this.persist(); + } + + private persist(): void { + if (!this.persistencePath) { + return; + } + const state: PersistedState = { + sessions: this.listSessions().map((s) => s.toSnapshot()), + }; + const dir = dirname(this.persistencePath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + writeFileSync(this.persistencePath, JSON.stringify(state, null, 2)); + } + + private load(): void { + if (!this.persistencePath) { + return; + } + try { + const raw = readFileSync(this.persistencePath, 'utf-8'); + const parsed = JSON.parse(raw) as PersistedState; + if (!parsed || !Array.isArray(parsed.sessions)) { + return; + } + for (const snapshot of parsed.sessions) { + const session = Session.fromSnapshot(snapshot, { + tokenizer: this.tokenizer, + summarizer: this.summarizer, + }); + this.sessions.set(session.id, session); + } + } catch (error) { + const message = + error instanceof Error ? error.message : String(error); + console.warn( + `SessionManager: failed to load sessions from ${this.persistencePath}: ${message}` + ); + } + } +} diff --git a/src/core/session.ts b/src/core/session.ts new file mode 100644 index 0000000..78ebd96 --- /dev/null +++ b/src/core/session.ts @@ -0,0 +1,171 @@ +import { randomUUID } from 'crypto'; +import { ITokenizer } from './tokenizers/i-tokenizer.js'; +import { ISummarizer, TruncatingSummarizer } from './summarization.js'; + +/** + * Session state — addresses issues #121 and #122. + * + * A Session holds a single user's conversation history plus a per-file + * content snapshot. The history is token-budgeted (see #121) and the file + * snapshots feed context-delta tracking (#122). + */ + +export type MessageRole = 'system' | 'user' | 'assistant' | 'tool'; + +export interface Message { + role: MessageRole; + content: string; + timestamp: number; +} + +export interface SessionFileState { + [filePath: string]: string; +} + +export interface SessionSnapshot { + id: string; + history: Message[]; + fileState: SessionFileState; + maxTokens: number; + createdAt: number; + updatedAt: number; +} + +export interface SessionOptions { + id?: string; + maxTokens?: number; + preserveTailRatio?: number; + tokenizer?: ITokenizer; + summarizer?: ISummarizer; +} + +const DEFAULT_MAX_TOKENS = 100_000; +const DEFAULT_PRESERVE_TAIL_RATIO = 0.3; + +export class Session { + public readonly id: string; + public maxTokens: number; + public readonly createdAt: number; + public updatedAt: number; + + private history: Message[] = []; + private fileState: SessionFileState = {}; + private readonly preserveTailRatio: number; + private readonly tokenizer: ITokenizer | null; + private readonly summarizer: ISummarizer; + + constructor(options: SessionOptions = {}) { + this.id = options.id ?? randomUUID(); + this.maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS; + this.preserveTailRatio = options.preserveTailRatio ?? DEFAULT_PRESERVE_TAIL_RATIO; + this.tokenizer = options.tokenizer ?? null; + this.summarizer = options.summarizer ?? new TruncatingSummarizer(); + this.createdAt = Date.now(); + this.updatedAt = this.createdAt; + } + + public addMessage(role: MessageRole, content: string): Message { + const message: Message = { role, content, timestamp: Date.now() }; + this.history.push(message); + this.updatedAt = message.timestamp; + return message; + } + + public getHistory(): readonly Message[] { + return this.history; + } + + public getFileState(): Readonly { + return this.fileState; + } + + public getFileContent(filePath: string): string | undefined { + return this.fileState[filePath]; + } + + public setFileContent(filePath: string, content: string): void { + this.fileState[filePath] = content; + this.updatedAt = Date.now(); + } + + /** + * Total token count of the current history. Uses the injected tokenizer + * when available; otherwise falls back to the character/4 heuristic. + */ + public async getHistoryTokenCount(): Promise { + if (!this.tokenizer) { + return this.history.reduce( + (acc, m) => acc + Math.ceil(m.content.length / 4), + 0 + ); + } + let total = 0; + for (const message of this.history) { + total += await this.tokenizer.countTokens(message.content); + } + return total; + } + + /** + * Compress the history by summarizing everything except the + * preserve-tail fraction. Does nothing if history fits under maxTokens. + * + * Returns the new token count after compression. + */ + public async compressHistory(): Promise { + const currentTokens = await this.getHistoryTokenCount(); + if (currentTokens <= this.maxTokens) { + return currentTokens; + } + if (this.history.length <= 1) { + return currentTokens; + } + + const preserveCount = Math.max( + 1, + Math.floor(this.history.length * this.preserveTailRatio) + ); + const tail = this.history.slice(-preserveCount); + const head = this.history.slice(0, -preserveCount); + if (head.length === 0) { + return currentTokens; + } + + const summary = await this.summarizer.summarize(head); + const summaryMessage: Message = { + role: 'system', + content: `[summary of earlier conversation] ${summary}`, + timestamp: head[head.length - 1].timestamp, + }; + + this.history = [summaryMessage, ...tail]; + this.updatedAt = Date.now(); + return this.getHistoryTokenCount(); + } + + public toSnapshot(): SessionSnapshot { + return { + id: this.id, + history: [...this.history], + fileState: { ...this.fileState }, + maxTokens: this.maxTokens, + createdAt: this.createdAt, + updatedAt: this.updatedAt, + }; + } + + public static fromSnapshot( + snapshot: SessionSnapshot, + options: Omit = {} + ): Session { + const session = new Session({ + id: snapshot.id, + maxTokens: snapshot.maxTokens, + ...options, + }); + session.history = [...snapshot.history]; + session.fileState = { ...snapshot.fileState }; + session.updatedAt = snapshot.updatedAt; + return session; + } +} diff --git a/src/core/summarization.ts b/src/core/summarization.ts new file mode 100644 index 0000000..ab179c5 --- /dev/null +++ b/src/core/summarization.ts @@ -0,0 +1,50 @@ +import { Message } from './session.js'; + +/** + * Pluggable summarization interface — part of issue #121. + * + * A production deployment should plug in an LLM-backed summarizer that + * condenses a list of Messages into a single natural-language summary. + * The default TruncatingSummarizer keeps the module self-contained and + * testable without an API key; it concatenates role+content and trims + * to a reasonable length. + */ + +export interface ISummarizer { + summarize(messages: readonly Message[]): Promise; +} + +export interface TruncatingSummarizerOptions { + /** Approximate maximum characters of summary output. Default: 2000. */ + maxChars?: number; +} + +export class TruncatingSummarizer implements ISummarizer { + private readonly maxChars: number; + + constructor(options: TruncatingSummarizerOptions = {}) { + this.maxChars = options.maxChars ?? 2000; + } + + public async summarize(messages: readonly Message[]): Promise { + if (messages.length === 0) { + return ''; + } + + const joined = messages + .map((m) => `${m.role}: ${m.content}`) + .join('\n'); + + if (joined.length <= this.maxChars) { + return joined; + } + + const keepHead = Math.floor(this.maxChars * 0.4); + const keepTail = this.maxChars - keepHead - 20; + return ( + joined.slice(0, keepHead) + + '\n... [truncated] ...\n' + + joined.slice(-keepTail) + ); + } +} diff --git a/src/server/index.ts b/src/server/index.ts index 047e8eb..116ca6c 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -127,7 +127,16 @@ import { GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/get-mcp-server-analytics.js'; import { getExportAnalyticsTool, EXPORT_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/export-analytics.js'; -import { OptimizationStorageTool } from '../tools/optimization-storage-tool.js'; +import { + OptimizationStorageTool, + OPTIMIZATION_STORAGE_TOOL_DEFINITION, +} from '../tools/optimization-storage-tool.js'; +import { + ContextDeltaTool, + CONTEXT_DELTA_TOOL_DEFINITION, +} from '../tools/context-delta-tool.js'; +import { SessionManager } from '../core/session-manager.js'; +import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js'; import { AnalyticsManager } from '../analytics/analytics-manager.js'; @@ -370,41 +379,11 @@ const getMcpServerAnalytics = getMcpServerAnalyticsTool(analyticsManager); const exportAnalytics = getExportAnalyticsTool(analyticsManager); const optimizationStorage = new OptimizationStorageTool(); -const OPTIMIZATION_STORAGE_TOOL_DEFINITION = { - name: optimizationStorage.name, - description: optimizationStorage.description, - inputSchema: { - type: 'object', - properties: { - operation: { - type: 'string', - enum: ['store', 'retrieve'], - description: 'The operation to perform.', - }, - originalTextHash: { - type: 'string', - description: 'The SHA256 hash of the original text.', - }, - optimizedText: { - type: 'string', - description: 'The base64 encoded optimized text (for store operation).', - }, - originalTokens: { - type: 'number', - description: 'The number of tokens in the original text (for store operation).', - }, - optimizedTokens: { - type: 'number', - description: 'The number of tokens in the optimized text (for store operation).', - }, - tokensSaved: { - type: 'number', - description: 'The number of tokens saved (for store operation).', - }, - }, - required: ['operation', 'originalTextHash'], - }, -}; +const sessionManager = new SessionManager({ + persistencePath: path.join(os.homedir(), '.token-optimizer', 'sessions.json'), + tokenizer: TokenizerFactory.createFromEnv(), +}); +const contextDelta = new ContextDeltaTool(sessionManager); // Create MCP server const server = new Server( @@ -692,6 +671,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION, EXPORT_ANALYTICS_TOOL_DEFINITION, OPTIMIZATION_STORAGE_TOOL_DEFINITION, + CONTEXT_DELTA_TOOL_DEFINITION, ], }; }); @@ -2043,6 +2023,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { }; } + case 'context_delta': { + const result = contextDelta.run(args as any); + return { + content: [ + { + type: 'text', + text: JSON.stringify(result, null, 2), + }, + ], + }; + } + case 'alert_manager': { const options = args as any; const result = await alertManager.run(options); diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts new file mode 100644 index 0000000..28f5f35 --- /dev/null +++ b/src/tools/context-delta-tool.ts @@ -0,0 +1,155 @@ +import { SessionManager } from '../core/session-manager.js'; +import { calculateDelta } from '../utils/diff.js'; + +/** + * context_delta MCP tool — addresses issue #122. + * + * Given (sessionId, filePath, currentContent) this tool: + * 1. Looks up the session from the SessionManager. + * 2. Diffs the current content against the session's last snapshot of + * that file. + * 3. Updates the session's file state. + * 4. Returns a unified-diff delta — the caller can send ONLY the delta + * to the model instead of the whole file, which is the token win. + * + * On first invocation for a given filePath the full content is treated + * as "the delta" (there is no baseline to diff against). + */ + +export type ContextDeltaOperation = 'compute-delta' | 'seed' | 'clear'; + +export interface ContextDeltaOptions { + operation: ContextDeltaOperation; + sessionId: string; + filePath: string; + currentContent?: string; +} + +export interface ContextDeltaResponse { + success: boolean; + error?: string; + delta?: string; + isBaseline?: boolean; + originalSize?: number; + deltaSize?: number; + bytesSaved?: number; +} + +export class ContextDeltaTool { + public readonly name = 'context_delta'; + public readonly description = + 'Compute a unified-diff delta between a file’s previous session snapshot and its current content, so the model only receives what changed.'; + + constructor(private readonly sessionManager: SessionManager) {} + + public run(options: ContextDeltaOptions): ContextDeltaResponse { + switch (options.operation) { + case 'compute-delta': + return this.computeDelta(options); + case 'seed': + return this.seed(options); + case 'clear': + return this.clear(options); + default: + return { + success: false, + error: `Unknown operation: ${String( + (options as { operation: unknown }).operation + )}`, + }; + } + } + + private computeDelta(options: ContextDeltaOptions): ContextDeltaResponse { + const { sessionId, filePath, currentContent } = options; + if (currentContent === undefined) { + return { + success: false, + error: 'currentContent is required for compute-delta', + }; + } + const session = this.sessionManager.getSession(sessionId); + if (!session) { + return { success: false, error: `Unknown session: ${sessionId}` }; + } + const previous = session.getFileContent(filePath); + session.setFileContent(filePath, currentContent); + + if (previous === undefined) { + return { + success: true, + isBaseline: true, + delta: currentContent, + originalSize: currentContent.length, + deltaSize: currentContent.length, + bytesSaved: 0, + }; + } + + const delta = calculateDelta(previous, currentContent, filePath); + return { + success: true, + isBaseline: false, + delta, + originalSize: currentContent.length, + deltaSize: delta.length, + bytesSaved: Math.max(0, currentContent.length - delta.length), + }; + } + + private seed(options: ContextDeltaOptions): ContextDeltaResponse { + const { sessionId, filePath, currentContent } = options; + if (currentContent === undefined) { + return { success: false, error: 'currentContent is required for seed' }; + } + try { + this.sessionManager.updateFileState(sessionId, filePath, currentContent); + return { success: true, isBaseline: true }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + } + + private clear(options: ContextDeltaOptions): ContextDeltaResponse { + const session = this.sessionManager.getSession(options.sessionId); + if (!session) { + return { + success: false, + error: `Unknown session: ${options.sessionId}`, + }; + } + session.setFileContent(options.filePath, ''); + return { success: true }; + } +} + +export const CONTEXT_DELTA_TOOL_DEFINITION = { + name: 'context_delta', + description: + 'Compute a unified-diff delta for a file in a given session so the model only sees changes since the last snapshot. Operations: compute-delta, seed, clear.', + inputSchema: { + type: 'object', + properties: { + operation: { + type: 'string', + enum: ['compute-delta', 'seed', 'clear'], + description: 'Operation to perform', + }, + sessionId: { + type: 'string', + description: 'Session identifier (create one via SessionManager first)', + }, + filePath: { + type: 'string', + description: 'Path of the file inside the session state', + }, + currentContent: { + type: 'string', + description: + 'Current file content (required for compute-delta and seed)', + }, + }, + required: ['operation', 'sessionId', 'filePath'], + }, +}; diff --git a/src/utils/diff.ts b/src/utils/diff.ts new file mode 100644 index 0000000..3c032e2 --- /dev/null +++ b/src/utils/diff.ts @@ -0,0 +1,39 @@ +import { createPatch, applyPatch } from 'diff'; + +/** + * Delta-based context helpers — addresses issue #122. + * + * Uses the unified-diff format from the existing `diff` dependency so the + * resulting deltas are human-readable and round-trippable via applyDelta. + */ + +/** + * Compute a unified-diff delta from `previous` to `current`. + * Returns the empty string when the inputs are identical (callers can use + * that to skip transmitting a no-op delta). + */ +export function calculateDelta( + previous: string, + current: string, + fileName: string = 'content' +): string { + if (previous === current) { + return ''; + } + return createPatch(fileName, previous, current, '', ''); +} + +/** + * Apply a unified-diff `delta` to `previous`, returning the reconstructed + * `current`. Throws if the patch cannot be applied cleanly. + */ +export function applyDelta(previous: string, delta: string): string { + if (delta === '') { + return previous; + } + const result = applyPatch(previous, delta); + if (result === false) { + throw new Error('Failed to apply delta: patch did not apply cleanly'); + } + return result; +} diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts index b09ecb4..c109989 100644 --- a/src/validation/tool-schemas.ts +++ b/src/validation/tool-schemas.ts @@ -413,6 +413,24 @@ export const ExportAnalyticsSchema = z.object({ .describe('Optional filter by MCP server name'), }); +// 72. optimization_storage +export const OptimizationStorageSchema = z.object({ + operation: z.enum(['store', 'retrieve']), + originalTextHash: z.string().optional(), + optimizedText: z.string().optional(), + originalTokens: z.number().optional(), + optimizedTokens: z.number().optional(), + tokensSaved: z.number().optional(), +}); + +// 73. context_delta +export const ContextDeltaSchema = z.object({ + operation: z.enum(['compute-delta', 'seed', 'clear']), + sessionId: z.string(), + filePath: z.string(), + currentContent: z.string().optional(), +}); + // Map tool names to their schemas for easy lookup export const toolSchemaMap: Record> = { optimize_text: OptimizeTextSchema, @@ -486,4 +504,6 @@ export const toolSchemaMap: Record> = { get_action_analytics: GetActionAnalyticsSchema, get_mcp_server_analytics: GetMcpServerAnalyticsSchema, export_analytics: ExportAnalyticsSchema, + optimization_storage: OptimizationStorageSchema, + context_delta: ContextDeltaSchema, }; From bfe8278b6bf61a3dc450ce8ba2ea79cdd3a9f7bf Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:31:06 -0400 Subject: [PATCH 14/26] test: add unit tests for lrucache, tokenizers, diff, session, configmanager 33 tests covering eviction/TTL/stats (LruCache), content-type detection and factory routing (tokenizers), round-trip and mismatched-baseline behaviors (diff), history compression and snapshot round-trip (Session/SessionManager), and user-override + validation fallback (ConfigManager). Refs #120, #121, #122, #124, #125 Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/unit/config.test.ts | 68 +++++++++++++++++++++++++++++++ tests/unit/diff.test.ts | 33 +++++++++++++++ tests/unit/lru-cache.test.ts | 77 +++++++++++++++++++++++++++++++++++ tests/unit/session.test.ts | 76 ++++++++++++++++++++++++++++++++++ tests/unit/tokenizers.test.ts | 64 +++++++++++++++++++++++++++++ 5 files changed, 318 insertions(+) create mode 100644 tests/unit/config.test.ts create mode 100644 tests/unit/diff.test.ts create mode 100644 tests/unit/lru-cache.test.ts create mode 100644 tests/unit/session.test.ts create mode 100644 tests/unit/tokenizers.test.ts diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts new file mode 100644 index 0000000..27b2850 --- /dev/null +++ b/tests/unit/config.test.ts @@ -0,0 +1,68 @@ +import { describe, it, expect, afterEach } from '@jest/globals'; +import { mkdtempSync, writeFileSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { ConfigManager } from '../../src/core/config.js'; + +describe('ConfigManager', () => { + const tempDirs: string[] = []; + + afterEach(() => { + while (tempDirs.length) { + const dir = tempDirs.pop(); + if (dir) { + rmSync(dir, { recursive: true, force: true }); + } + } + }); + + function writeConfig(content: string): string { + const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-config-')); + tempDirs.push(dir); + const file = join(dir, 'config.json'); + writeFileSync(file, content); + return file; + } + + it('returns defaults when no config file exists', () => { + const mgr = new ConfigManager(join(tmpdir(), 'does-not-exist-xyz.json')); + const opt = mgr.getOptimizationConfig(); + expect(opt.compressionTokenThreshold).toBe(0.7); + expect(opt.quality).toBe('balanced'); + expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000); + }); + + it('overrides defaults with user config', () => { + const configPath = writeConfig( + JSON.stringify({ + optimization: { + compressionTokenThreshold: 0.9, + quality: 'max', + modelTokenLimits: { 'custom-model': 500000 }, + }, + }) + ); + const mgr = new ConfigManager(configPath); + const opt = mgr.getOptimizationConfig(); + expect(opt.compressionTokenThreshold).toBe(0.9); + expect(opt.quality).toBe('max'); + expect(mgr.getModelTokenLimit('custom-model')).toBe(500000); + // Unrelated defaults still filled in + expect(opt.compressionPreserveThreshold).toBe(0.3); + }); + + it('falls back to defaults on invalid config', () => { + const configPath = writeConfig( + JSON.stringify({ optimization: { compressionTokenThreshold: 5 } }) + ); + const mgr = new ConfigManager(configPath); + // Invalid value (>1) is rejected by schema → defaults applied + expect(mgr.getOptimizationConfig().compressionTokenThreshold).toBe(0.7); + }); + + it('falls back to defaults on malformed JSON', () => { + const configPath = writeConfig('not json at all'); + const mgr = new ConfigManager(configPath); + expect(mgr.getOptimizationConfig().quality).toBe('balanced'); + }); +}); diff --git a/tests/unit/diff.test.ts b/tests/unit/diff.test.ts new file mode 100644 index 0000000..0780b1f --- /dev/null +++ b/tests/unit/diff.test.ts @@ -0,0 +1,33 @@ +import { describe, it, expect } from '@jest/globals'; +import { calculateDelta, applyDelta } from '../../src/utils/diff.js'; + +describe('diff utils', () => { + it('returns empty delta when inputs are identical', () => { + expect(calculateDelta('hello', 'hello')).toBe(''); + }); + + it('round-trips a simple change', () => { + const prev = 'line1\nline2\nline3\n'; + const next = 'line1\nline2 changed\nline3\n'; + const delta = calculateDelta(prev, next); + expect(delta).not.toBe(''); + expect(applyDelta(prev, delta)).toBe(next); + }); + + it('applyDelta on an empty delta is a no-op', () => { + expect(applyDelta('anything', '')).toBe('anything'); + }); + + it('produces a meaningfully smaller delta than the full content for small edits', () => { + const prev = 'a\n'.repeat(500); + const next = prev + 'appended line\n'; + const delta = calculateDelta(prev, next); + expect(delta.length).toBeLessThan(next.length); + }); + + it('throws when the patch targets a different baseline than supplied', () => { + const patch = calculateDelta('original\ntext\n', 'original\nchanged\n'); + // Applying the patch against completely different content fails. + expect(() => applyDelta('totally different input\n', patch)).toThrow(); + }); +}); diff --git a/tests/unit/lru-cache.test.ts b/tests/unit/lru-cache.test.ts new file mode 100644 index 0000000..6b7f2ac --- /dev/null +++ b/tests/unit/lru-cache.test.ts @@ -0,0 +1,77 @@ +import { describe, it, expect } from '@jest/globals'; +import { LruCache } from '../../src/utils/lru-cache.js'; + +describe('LruCache', () => { + it('rejects non-positive maxSize', () => { + expect(() => new LruCache(0)).toThrow(); + expect(() => new LruCache(-1)).toThrow(); + }); + + it('get returns undefined on miss and counts it', () => { + const cache = new LruCache(2); + expect(cache.get('x')).toBeUndefined(); + expect(cache.stats().misses).toBe(1); + }); + + it('set/get round-trips and counts hits', () => { + const cache = new LruCache(2); + cache.set('a', 1); + expect(cache.get('a')).toBe(1); + expect(cache.stats().hits).toBe(1); + }); + + it('evicts the least recently used entry when full', () => { + const cache = new LruCache(2); + cache.set('a', 1); + cache.set('b', 2); + cache.get('a'); + cache.set('c', 3); + + expect(cache.get('a')).toBe(1); + expect(cache.get('b')).toBeUndefined(); + expect(cache.get('c')).toBe(3); + expect(cache.stats().evictions).toBe(1); + }); + + it('refreshes recency on get', () => { + const cache = new LruCache(2); + cache.set('a', 1); + cache.set('b', 2); + cache.get('a'); + cache.set('c', 3); + + expect(cache.has('b')).toBe(false); + expect(cache.has('a')).toBe(true); + }); + + it('expires entries past the TTL', async () => { + const cache = new LruCache(2, 20); + cache.set('a', 1); + await new Promise((r) => setTimeout(r, 30)); + expect(cache.get('a')).toBeUndefined(); + expect(cache.stats().expired).toBe(1); + }); + + it('prune removes expired entries', async () => { + const cache = new LruCache(4, 20); + cache.set('a', 1); + cache.set('b', 2); + await new Promise((r) => setTimeout(r, 30)); + cache.set('c', 3); + const removed = cache.prune(); + expect(removed).toBe(2); + expect(cache.size).toBe(1); + }); + + it('stats.hitRate reflects hits / total', () => { + const cache = new LruCache(2); + cache.set('a', 1); + cache.get('a'); + cache.get('a'); + cache.get('missing'); + const stats = cache.stats(); + expect(stats.hits).toBe(2); + expect(stats.misses).toBe(1); + expect(stats.hitRate).toBeCloseTo(2 / 3); + }); +}); diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts new file mode 100644 index 0000000..19f2428 --- /dev/null +++ b/tests/unit/session.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect } from '@jest/globals'; +import { Session } from '../../src/core/session.js'; +import { SessionManager } from '../../src/core/session-manager.js'; +import { HeuristicTokenizer } from '../../src/core/tokenizers/heuristic-tokenizer.js'; + +describe('Session', () => { + it('appends messages and tracks updatedAt', async () => { + const session = new Session(); + const before = session.updatedAt; + await new Promise((r) => setTimeout(r, 5)); + session.addMessage('user', 'hi'); + expect(session.getHistory().length).toBe(1); + expect(session.updatedAt).toBeGreaterThan(before); + }); + + it('compressHistory is a no-op under the budget', async () => { + const session = new Session({ maxTokens: 10_000 }); + session.addMessage('user', 'short'); + const before = session.getHistory().length; + await session.compressHistory(); + expect(session.getHistory().length).toBe(before); + }); + + it('compressHistory summarizes head when over budget', async () => { + const tokenizer = new HeuristicTokenizer(); + const session = new Session({ maxTokens: 50, tokenizer }); + // Each long message is several hundred chars → easily over 50 tokens. + for (let i = 0; i < 10; i++) { + session.addMessage('user', 'a'.repeat(400) + ` turn=${i}`); + } + expect((await session.getHistoryTokenCount()) > 50).toBe(true); + await session.compressHistory(); + const history = session.getHistory(); + expect(history[0].role).toBe('system'); + expect(history[0].content.startsWith('[summary')).toBe(true); + expect(history.length).toBeLessThan(10); + }); + + it('snapshot round-trips', () => { + const session = new Session({ maxTokens: 42 }); + session.addMessage('user', 'hello'); + session.setFileContent('a.ts', 'const x = 1;'); + const snapshot = session.toSnapshot(); + const restored = Session.fromSnapshot(snapshot); + expect(restored.id).toBe(session.id); + expect(restored.maxTokens).toBe(42); + expect(restored.getFileContent('a.ts')).toBe('const x = 1;'); + expect(restored.getHistory()[0].content).toBe('hello'); + }); +}); + +describe('SessionManager', () => { + it('create/get/delete lifecycle', () => { + const manager = new SessionManager(); + const session = manager.createSession(); + expect(manager.getSession(session.id)).toBe(session); + expect(manager.deleteSession(session.id)).toBe(true); + expect(manager.getSession(session.id)).toBeUndefined(); + }); + + it('addMessage auto-compresses when over budget', async () => { + const tokenizer = new HeuristicTokenizer(); + const manager = new SessionManager({ tokenizer, defaultMaxTokens: 30 }); + const session = manager.createSession(); + for (let i = 0; i < 8; i++) { + await manager.addMessage(session.id, 'user', 'x'.repeat(300)); + } + const history = session.getHistory(); + expect(history[0].content.startsWith('[summary')).toBe(true); + }); + + it('throws for unknown session ids', async () => { + const manager = new SessionManager(); + await expect(manager.addMessage('bogus', 'user', 'hi')).rejects.toThrow(); + }); +}); diff --git a/tests/unit/tokenizers.test.ts b/tests/unit/tokenizers.test.ts new file mode 100644 index 0000000..ed2f2a3 --- /dev/null +++ b/tests/unit/tokenizers.test.ts @@ -0,0 +1,64 @@ +import { describe, it, expect } from '@jest/globals'; +import { HeuristicTokenizer, ContentType } from '../../src/core/tokenizers/heuristic-tokenizer.js'; +import { TokenizerFactory } from '../../src/core/tokenizers/tokenizer-factory.js'; +import { TiktokenTokenizer } from '../../src/core/tokenizers/tiktoken-tokenizer.js'; + +describe('HeuristicTokenizer', () => { + it('detects JSON content', () => { + const json = '{"a": 1, "b": [1, 2, 3]}'; + expect(HeuristicTokenizer.detectContentType(json)).toBe(ContentType.Json); + }); + + it('detects code content', () => { + const code = 'function foo() { return 42; }'; + expect(HeuristicTokenizer.detectContentType(code)).toBe(ContentType.Code); + }); + + it('detects markdown content', () => { + const md = '# Heading\n\n- item one\n- item two'; + expect(HeuristicTokenizer.detectContentType(md)).toBe(ContentType.Markdown); + }); + + it('defaults to text content', () => { + const text = 'Just a short plain sentence.'; + expect(HeuristicTokenizer.detectContentType(text)).toBe(ContentType.Text); + }); + + it('uses a lower chars/token ratio for code than text', async () => { + const tokenizer = new HeuristicTokenizer(); + const code = 'function foo() { return 42; }'; + const text = 'A sentence of roughly similar length here.'; + const codeTokens = await tokenizer.countTokens(code); + const textTokens = await tokenizer.countTokens(text); + // Code has ratio 2.5 vs text 4.0 → for strings of similar length, code tokens > text tokens. + expect(codeTokens / code.length).toBeGreaterThan(textTokens / text.length); + }); + + it('caches repeated inputs', async () => { + const tokenizer = new HeuristicTokenizer(); + const input = 'cache me'; + const first = await tokenizer.countTokens(input); + const second = await tokenizer.countTokens(input); + expect(first).toBe(second); + }); +}); + +describe('TokenizerFactory', () => { + it('returns a TiktokenTokenizer for gpt-4', () => { + const t = TokenizerFactory.create('gpt-4'); + expect(t).toBeInstanceOf(TiktokenTokenizer); + t.free(); + }); + + it('returns a TiktokenTokenizer for Claude models (maps to gpt-4)', () => { + const t = TokenizerFactory.create('claude-opus-4-7'); + expect(t).toBeInstanceOf(TiktokenTokenizer); + t.free(); + }); + + it('falls back to HeuristicTokenizer for unknown models', () => { + const t = TokenizerFactory.create('some-unknown-local-model'); + expect(t).toBeInstanceOf(HeuristicTokenizer); + t.free(); + }); +}); From 8b7e4818abdd0fe8a138999967de2f113984c2a9 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:43:12 -0400 Subject: [PATCH 15/26] fix(core): production hardening for session, delta, storage, tokenizer factory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the audit gaps: - SessionManager: atomic persist (tmp + rename), debounced writes (~250ms), error-isolated (disk-full no longer crashes the server), zod-validated load, session TTL eviction (30d), per-file size cap (10 MB), and flush() for clean shutdown. - Session: getHistoryTokenCount now REQUIRES a tokenizer unless the caller opts into the char/4 fallback via allowCharHeuristic — the whole point of #124 is removing that heuristic. Added clearFileContent. - context_delta tool: compute-delta and clear now route through SessionManager.updateFileState / clearFileState so file-state changes are durable across restarts instead of in-memory-only. - TokenizerFactory: caches instances per model (one native tiktoken encoder instead of one per call) and exposes disposeAll() for shutdown. Added Gemini/Google routing path. - GoogleAITokenizer: new — calls Google AI countTokens REST with a 10s timeout, LRU-memoized, surfaces errors so the factory can pick a fallback. - SqliteOptimizationStorage: default path now an absolute ~/.token-optimizer/optimization.db and the directory is created on demand. Relative "./optimization.db" was unusable when the MCP server launched from an unknown cwd. - Server shutdown: sessionManager.flush(), TokenizerFactory.disposeAll(), and optimizationStorage.close() added to the cleanup pipeline. Refs #121, #122, #124 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/analytics/optimization-storage.ts | 15 +- src/core/session-manager.ts | 185 ++++++++++++++++----- src/core/session.ts | 32 +++- src/core/tokenizers/google-ai-tokenizer.ts | 93 +++++++++++ src/core/tokenizers/tokenizer-factory.ts | 67 ++++++-- src/server/index.ts | 3 + src/tools/context-delta-tool.ts | 23 ++- tests/unit/session.test.ts | 22 ++- 8 files changed, 372 insertions(+), 68 deletions(-) create mode 100644 src/core/tokenizers/google-ai-tokenizer.ts diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts index cd6991f..9509709 100644 --- a/src/analytics/optimization-storage.ts +++ b/src/analytics/optimization-storage.ts @@ -1,4 +1,7 @@ import Database from 'better-sqlite3'; +import { existsSync, mkdirSync } from 'fs'; +import { homedir } from 'os'; +import { dirname, join } from 'path'; import { CompressionEngine } from '../core/compression-engine.js'; export interface OptimizationResult { @@ -9,17 +12,25 @@ export interface OptimizationResult { tokensSaved: number; } +export function getDefaultOptimizationDbPath(): string { + return join(homedir(), '.token-optimizer', 'optimization.db'); +} + export class SqliteOptimizationStorage { private db: Database.Database | null = null; private readonly dbPath: string; private readonly compressionEngine: CompressionEngine; - constructor(dbPath: string = './optimization.db') { - this.dbPath = dbPath; + constructor(dbPath?: string) { + this.dbPath = dbPath ?? getDefaultOptimizationDbPath(); this.compressionEngine = new CompressionEngine(); } public initializeDatabase(): void { + const dir = dirname(this.dbPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } this.db = new Database(this.dbPath); this.db.pragma('journal_mode = WAL'); this.db.exec(` diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts index 1cc8372..2a19edd 100644 --- a/src/core/session-manager.ts +++ b/src/core/session-manager.ts @@ -1,31 +1,68 @@ -import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import { + existsSync, + mkdirSync, + readFileSync, + writeFileSync, + renameSync, + unlinkSync, +} from 'fs'; import { dirname } from 'path'; +import { z } from 'zod'; import { Session, SessionOptions, - SessionSnapshot, MessageRole, } from './session.js'; import { ITokenizer } from './tokenizers/i-tokenizer.js'; import { ISummarizer } from './summarization.js'; /** - * Singleton-style SessionManager — addresses issues #121 / #122. + * Persistent SessionManager — addresses issues #121 / #122. * - * Persists all sessions to a single JSON file so they survive restarts. - * When a message is added we check whether the session has exceeded its - * token budget and, if so, auto-compress the history (#121). + * Production behaviors added after the audit: + * - Atomic persistence: write to .tmp then rename so a crash mid- + * write never produces a corrupt sessions.json. + * - Debounced persistence: rapid addMessage calls coalesce into one + * disk write per PERSIST_DEBOUNCE_MS window. + * - Error-isolated persist(): a disk-full or permission error is logged + * and never bubbles up to crash the MCP server. + * - Schema-validated load(): malformed persisted state is rejected with + * a warning instead of being cast blindly. + * - Size / expiry caps: sessions inactive past `sessionTtlMs` are + * evicted on load, and no individual file state entry can exceed + * `maxFileStateBytes`. */ +const PERSIST_DEBOUNCE_MS = 250; +const DEFAULT_SESSION_TTL_MS = 30 * 24 * 60 * 60 * 1000; // 30 days +const DEFAULT_MAX_FILE_STATE_BYTES = 10 * 1024 * 1024; // 10 MB per file + +const MessageSchema = z.object({ + role: z.enum(['system', 'user', 'assistant', 'tool']), + content: z.string(), + timestamp: z.number(), +}); + +const SessionSnapshotSchema = z.object({ + id: z.string(), + history: z.array(MessageSchema), + fileState: z.record(z.string(), z.string()), + maxTokens: z.number(), + createdAt: z.number(), + updatedAt: z.number(), +}); + +const PersistedStateSchema = z.object({ + sessions: z.array(SessionSnapshotSchema), +}); + export interface SessionManagerOptions { persistencePath?: string; tokenizer?: ITokenizer; summarizer?: ISummarizer; defaultMaxTokens?: number; -} - -interface PersistedState { - sessions: SessionSnapshot[]; + sessionTtlMs?: number; + maxFileStateBytes?: number; } export class SessionManager { @@ -34,12 +71,19 @@ export class SessionManager { private readonly tokenizer: ITokenizer | undefined; private readonly summarizer: ISummarizer | undefined; private readonly defaultMaxTokens: number | undefined; + private readonly sessionTtlMs: number; + private readonly maxFileStateBytes: number; + private pendingPersistTimer: NodeJS.Timeout | null = null; + private persistInFlight = false; constructor(options: SessionManagerOptions = {}) { this.persistencePath = options.persistencePath ?? null; this.tokenizer = options.tokenizer; this.summarizer = options.summarizer; this.defaultMaxTokens = options.defaultMaxTokens; + this.sessionTtlMs = options.sessionTtlMs ?? DEFAULT_SESSION_TTL_MS; + this.maxFileStateBytes = + options.maxFileStateBytes ?? DEFAULT_MAX_FILE_STATE_BYTES; if (this.persistencePath && existsSync(this.persistencePath)) { this.load(); } @@ -53,7 +97,7 @@ export class SessionManager { ...options, }); this.sessions.set(session.id, session); - this.persist(); + this.schedulePersist(); return session; } @@ -68,33 +112,24 @@ export class SessionManager { public deleteSession(id: string): boolean { const removed = this.sessions.delete(id); if (removed) { - this.persist(); + this.schedulePersist(); } return removed; } - /** - * Add a message to the session and auto-compress the history if the - * token budget is exceeded (#121). - * - * Returns the post-add token count of the session. - */ public async addMessage( sessionId: string, role: MessageRole, content: string ): Promise { - const session = this.sessions.get(sessionId); - if (!session) { - throw new Error(`Unknown session: ${sessionId}`); - } + const session = this.requireSession(sessionId); session.addMessage(role, content); const currentTokens = await session.getHistoryTokenCount(); let finalTokens = currentTokens; if (currentTokens > session.maxTokens) { finalTokens = await session.compressHistory(); } - this.persist(); + this.schedulePersist(); return finalTokens; } @@ -103,26 +138,92 @@ export class SessionManager { filePath: string, content: string ): void { - const session = this.sessions.get(sessionId); - if (!session) { - throw new Error(`Unknown session: ${sessionId}`); + const session = this.requireSession(sessionId); + if (Buffer.byteLength(content, 'utf8') > this.maxFileStateBytes) { + throw new Error( + `Session file state content exceeds ${this.maxFileStateBytes} bytes for ${filePath}` + ); } session.setFileContent(filePath, content); - this.persist(); + this.schedulePersist(); + } + + public clearFileState(sessionId: string, filePath: string): void { + const session = this.requireSession(sessionId); + session.clearFileContent(filePath); + this.schedulePersist(); + } + + /** + * Flush any pending debounced persist. Call this from the host's + * shutdown handler so the last writes survive. + */ + public async flush(): Promise { + if (this.pendingPersistTimer) { + clearTimeout(this.pendingPersistTimer); + this.pendingPersistTimer = null; + } + this.persistNow(); } - private persist(): void { + private requireSession(id: string): Session { + const session = this.sessions.get(id); + if (!session) { + throw new Error(`Unknown session: ${id}`); + } + return session; + } + + private schedulePersist(): void { if (!this.persistencePath) { return; } - const state: PersistedState = { - sessions: this.listSessions().map((s) => s.toSnapshot()), - }; - const dir = dirname(this.persistencePath); - if (!existsSync(dir)) { - mkdirSync(dir, { recursive: true }); + if (this.pendingPersistTimer) { + return; + } + this.pendingPersistTimer = setTimeout(() => { + this.pendingPersistTimer = null; + this.persistNow(); + }, PERSIST_DEBOUNCE_MS); + // Don't keep the event loop alive just for persistence. + if (typeof this.pendingPersistTimer.unref === 'function') { + this.pendingPersistTimer.unref(); + } + } + + private persistNow(): void { + if (!this.persistencePath || this.persistInFlight) { + return; + } + this.persistInFlight = true; + try { + const state = { + sessions: this.listSessions().map((s) => s.toSnapshot()), + }; + const dir = dirname(this.persistencePath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + const tmpPath = `${this.persistencePath}.tmp`; + writeFileSync(tmpPath, JSON.stringify(state, null, 2)); + renameSync(tmpPath, this.persistencePath); + } catch (error) { + const message = + error instanceof Error ? error.message : String(error); + console.warn( + `SessionManager: failed to persist to ${this.persistencePath}: ${message}` + ); + // Best-effort cleanup of the tmp file + if (this.persistencePath) { + try { + unlinkSync(`${this.persistencePath}.tmp`); + } catch { + // Ignore — tmp file may not exist. + } + } + } finally { + this.persistInFlight = false; } - writeFileSync(this.persistencePath, JSON.stringify(state, null, 2)); } private load(): void { @@ -131,11 +232,19 @@ export class SessionManager { } try { const raw = readFileSync(this.persistencePath, 'utf-8'); - const parsed = JSON.parse(raw) as PersistedState; - if (!parsed || !Array.isArray(parsed.sessions)) { + const json = JSON.parse(raw); + const parsed = PersistedStateSchema.safeParse(json); + if (!parsed.success) { + console.warn( + `SessionManager: invalid persisted state at ${this.persistencePath}, discarding.` + ); return; } - for (const snapshot of parsed.sessions) { + const now = Date.now(); + for (const snapshot of parsed.data.sessions) { + if (now - snapshot.updatedAt > this.sessionTtlMs) { + continue; // Expired session — drop. + } const session = Session.fromSnapshot(snapshot, { tokenizer: this.tokenizer, summarizer: this.summarizer, diff --git a/src/core/session.ts b/src/core/session.ts index 78ebd96..1830b5d 100644 --- a/src/core/session.ts +++ b/src/core/session.ts @@ -37,10 +37,17 @@ export interface SessionOptions { preserveTailRatio?: number; tokenizer?: ITokenizer; summarizer?: ISummarizer; + /** + * When true, getHistoryTokenCount may fall back to a character/4 + * heuristic if no tokenizer is supplied. Production code should + * always pass a real tokenizer and leave this false (the default). + */ + allowCharHeuristic?: boolean; } const DEFAULT_MAX_TOKENS = 100_000; const DEFAULT_PRESERVE_TAIL_RATIO = 0.3; +const CHAR_HEURISTIC_RATIO = 4; export class Session { public readonly id: string; @@ -53,6 +60,7 @@ export class Session { private readonly preserveTailRatio: number; private readonly tokenizer: ITokenizer | null; private readonly summarizer: ISummarizer; + private readonly allowCharHeuristic: boolean; constructor(options: SessionOptions = {}) { this.id = options.id ?? randomUUID(); @@ -60,6 +68,7 @@ export class Session { this.preserveTailRatio = options.preserveTailRatio ?? DEFAULT_PRESERVE_TAIL_RATIO; this.tokenizer = options.tokenizer ?? null; this.summarizer = options.summarizer ?? new TruncatingSummarizer(); + this.allowCharHeuristic = options.allowCharHeuristic ?? false; this.createdAt = Date.now(); this.updatedAt = this.createdAt; } @@ -88,14 +97,31 @@ export class Session { this.updatedAt = Date.now(); } + public clearFileContent(filePath: string): void { + if (filePath in this.fileState) { + delete this.fileState[filePath]; + this.updatedAt = Date.now(); + } + } + /** - * Total token count of the current history. Uses the injected tokenizer - * when available; otherwise falls back to the character/4 heuristic. + * Total token count of the current history. + * + * Requires a tokenizer unless the caller opted into the character/4 + * heuristic via `allowCharHeuristic: true`. We default to requiring a + * tokenizer because #124's whole point is eliminating char/4. */ public async getHistoryTokenCount(): Promise { if (!this.tokenizer) { + if (!this.allowCharHeuristic) { + throw new Error( + 'Session.getHistoryTokenCount requires a tokenizer. ' + + 'Construct the Session with TokenizerFactory.create(...) ' + + 'or pass allowCharHeuristic: true to opt into the fallback.' + ); + } return this.history.reduce( - (acc, m) => acc + Math.ceil(m.content.length / 4), + (acc, m) => acc + Math.ceil(m.content.length / CHAR_HEURISTIC_RATIO), 0 ); } diff --git a/src/core/tokenizers/google-ai-tokenizer.ts b/src/core/tokenizers/google-ai-tokenizer.ts new file mode 100644 index 0000000..19ea381 --- /dev/null +++ b/src/core/tokenizers/google-ai-tokenizer.ts @@ -0,0 +1,93 @@ +import { ITokenizer } from './i-tokenizer.js'; +import { LruCache } from '../../utils/lru-cache.js'; + +const DEFAULT_CACHE_SIZE = 500; +const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; +const DEFAULT_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models'; +const REQUEST_TIMEOUT_MS = 10_000; + +/** + * Remote tokenizer that uses Google AI's countTokens REST endpoint — + * addresses issue #124's GoogleAITokenizer requirement. + * + * Network calls are memoized in an LruCache with a TTL so repeated + * token counts don't re-hit the API. If the request fails (network, + * 4xx, 5xx) we surface the error to the caller — TokenCounter above + * is responsible for deciding whether to fall back to a local + * tokenizer. + */ +export class GoogleAITokenizer implements ITokenizer { + public readonly modelName: string; + private readonly apiKey: string; + private readonly endpoint: string; + private readonly cache: LruCache; + private readonly timeoutMs: number; + + constructor( + modelName: string, + apiKey: string, + options: { + endpoint?: string; + cache?: LruCache; + timeoutMs?: number; + } = {} + ) { + if (!apiKey) { + throw new Error('GoogleAITokenizer requires an apiKey'); + } + this.modelName = modelName; + this.apiKey = apiKey; + this.endpoint = options.endpoint ?? DEFAULT_ENDPOINT; + this.cache = + options.cache ?? + new LruCache(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS); + this.timeoutMs = options.timeoutMs ?? REQUEST_TIMEOUT_MS; + } + + public async countTokens(text: string): Promise { + const cached = this.cache.get(text); + if (cached !== undefined) { + return cached; + } + + const url = `${this.endpoint}/${encodeURIComponent( + this.modelName + )}:countTokens?key=${encodeURIComponent(this.apiKey)}`; + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), this.timeoutMs); + + try { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + contents: [{ parts: [{ text }] }], + }), + signal: controller.signal, + }); + + if (!response.ok) { + const body = await response.text().catch(() => ''); + throw new Error( + `Google AI countTokens failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}` + ); + } + + const data = (await response.json()) as { totalTokens?: number }; + if (typeof data.totalTokens !== 'number') { + throw new Error( + `Google AI countTokens returned unexpected payload: ${JSON.stringify(data).slice(0, 200)}` + ); + } + this.cache.set(text, data.totalTokens); + return data.totalTokens; + } finally { + clearTimeout(timeout); + } + } + + public free(): void { + this.cache.clear(); + } +} diff --git a/src/core/tokenizers/tokenizer-factory.ts b/src/core/tokenizers/tokenizer-factory.ts index ea4b360..d4d00b3 100644 --- a/src/core/tokenizers/tokenizer-factory.ts +++ b/src/core/tokenizers/tokenizer-factory.ts @@ -1,33 +1,72 @@ import { ITokenizer } from './i-tokenizer.js'; import { TiktokenTokenizer } from './tiktoken-tokenizer.js'; import { HeuristicTokenizer } from './heuristic-tokenizer.js'; +import { GoogleAITokenizer } from './google-ai-tokenizer.js'; +/** + * Pluggable tokenizer factory — addresses issues #123 / #124. + * + * Resolution order: + * 1. Google AI models (`gemini-*`) — GoogleAITokenizer when + * GOOGLE_AI_API_KEY is set, else HeuristicTokenizer. + * 2. Tiktoken-compatible families (GPT, Claude) — TiktokenTokenizer. + * 3. HeuristicTokenizer fallback for everything else. + * + * Instances are cached per model name so callers don't pay for repeated + * allocation of the native tiktoken encoder, and so their per-tokenizer + * LRU caches can be shared across call sites. + */ export class TokenizerFactory { - /** - * Create a tokenizer for the given model name. - * - * Resolution order: - * 1. Tiktoken for GPT-4 / GPT-3.5-turbo / Claude-family models. - * 2. HeuristicTokenizer as the content-aware fallback. - * - * Callers that already hold a tokenizer should prefer reusing it — - * construction allocates a tiktoken encoder (native resource). - */ + private static readonly instances = new Map(); + public static create(modelName: string): ITokenizer { - if (TiktokenTokenizer.supports(modelName)) { - return new TiktokenTokenizer(modelName); + const cached = TokenizerFactory.instances.get(modelName); + if (cached) { + return cached; } - return new HeuristicTokenizer(modelName); + const tokenizer = TokenizerFactory.build(modelName); + TokenizerFactory.instances.set(modelName, tokenizer); + return tokenizer; } - /** Create a tokenizer using the active model environment variables. */ public static createFromEnv(): ITokenizer { const modelName = process.env.CLAUDE_MODEL || process.env.ANTHROPIC_MODEL || process.env.OPENAI_MODEL || + process.env.GOOGLE_AI_MODEL || process.env.TOKEN_OPTIMIZER_MODEL || 'gpt-4'; return TokenizerFactory.create(modelName); } + + /** + * Release every cached tokenizer. Call this on server shutdown so + * native tiktoken encoders are freed. + */ + public static disposeAll(): void { + for (const tokenizer of TokenizerFactory.instances.values()) { + try { + tokenizer.free(); + } catch { + // Ignore — best-effort cleanup. + } + } + TokenizerFactory.instances.clear(); + } + + private static build(modelName: string): ITokenizer { + const lower = modelName.toLowerCase(); + if (lower.startsWith('gemini') || lower.includes('google')) { + const apiKey = process.env.GOOGLE_AI_API_KEY; + if (apiKey) { + return new GoogleAITokenizer(modelName, apiKey); + } + return new HeuristicTokenizer(modelName); + } + if (TiktokenTokenizer.supports(modelName)) { + return new TiktokenTokenizer(modelName); + } + return new HeuristicTokenizer(modelName); + } } diff --git a/src/server/index.ts b/src/server/index.ts index 116ca6c..63f6cc1 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -2272,6 +2272,9 @@ async function cleanup() { }, { fn: () => cache?.close(), name: 'closing cache' }, { fn: () => tokenCounter?.free(), name: 'freeing tokenCounter' }, + { fn: async () => await sessionManager.flush(), name: 'flushing sessions' }, + { fn: () => TokenizerFactory.disposeAll(), name: 'disposing tokenizers' }, + { fn: () => optimizationStorage.close(), name: 'closing optimization storage' }, // Note: predictiveCache and cacheWarmup do not implement dispose() methods // Removed dispose() calls to prevent runtime errors during cleanup ]); diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts index 28f5f35..33f6595 100644 --- a/src/tools/context-delta-tool.ts +++ b/src/tools/context-delta-tool.ts @@ -73,7 +73,14 @@ export class ContextDeltaTool { return { success: false, error: `Unknown session: ${sessionId}` }; } const previous = session.getFileContent(filePath); - session.setFileContent(filePath, currentContent); + + try { + // Goes through SessionManager so the new state hits disk. + this.sessionManager.updateFileState(sessionId, filePath, currentContent); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } if (previous === undefined) { return { @@ -112,15 +119,13 @@ export class ContextDeltaTool { } private clear(options: ContextDeltaOptions): ContextDeltaResponse { - const session = this.sessionManager.getSession(options.sessionId); - if (!session) { - return { - success: false, - error: `Unknown session: ${options.sessionId}`, - }; + try { + this.sessionManager.clearFileState(options.sessionId, options.filePath); + return { success: true }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; } - session.setFileContent(options.filePath, ''); - return { success: true }; } } diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts index 19f2428..10237fe 100644 --- a/tests/unit/session.test.ts +++ b/tests/unit/session.test.ts @@ -5,7 +5,7 @@ import { HeuristicTokenizer } from '../../src/core/tokenizers/heuristic-tokenize describe('Session', () => { it('appends messages and tracks updatedAt', async () => { - const session = new Session(); + const session = new Session({ allowCharHeuristic: true }); const before = session.updatedAt; await new Promise((r) => setTimeout(r, 5)); session.addMessage('user', 'hi'); @@ -14,13 +14,31 @@ describe('Session', () => { }); it('compressHistory is a no-op under the budget', async () => { - const session = new Session({ maxTokens: 10_000 }); + const session = new Session({ + maxTokens: 10_000, + allowCharHeuristic: true, + }); session.addMessage('user', 'short'); const before = session.getHistory().length; await session.compressHistory(); expect(session.getHistory().length).toBe(before); }); + it('getHistoryTokenCount throws without a tokenizer when heuristic is off', async () => { + const session = new Session(); + session.addMessage('user', 'hi'); + await expect(session.getHistoryTokenCount()).rejects.toThrow( + /requires a tokenizer/ + ); + }); + + it('clearFileContent removes the entry', () => { + const session = new Session(); + session.setFileContent('a.ts', 'const x = 1;'); + session.clearFileContent('a.ts'); + expect(session.getFileContent('a.ts')).toBeUndefined(); + }); + it('compressHistory summarizes head when over budget', async () => { const tokenizer = new HeuristicTokenizer(); const session = new Session({ maxTokens: 50, tokenizer }); From 4e3f119e20869a22689042649bdb3407af001b4a Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:46:15 -0400 Subject: [PATCH 16/26] feat(tokenizers): refactor tokencounter to delegate via tokenizerfactory (#124) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the tokenization framework requirements: - TokenCounter now delegates tokenization to the pluggable TokenizerFactory; the sync count() path still uses a local tiktoken encoder for tiktoken-compatible models (we need raw token arrays for truncate()), and a new countAsync() path routes through the factory for remote models (Google AI). Deeper callers that used the existing sync surface keep working unchanged. - count_tokens MCP tool accepts an optional modelName parameter and returns the resolved model in the response. When omitted, the server- configured TokenCounter is used. - TokenCounter.free() no longer tears down the factory-owned tokenizer — the factory owns that lifecycle (#16). Local tiktoken encoders are freed as before. Refs #124 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/token-counter.ts | 195 ++++++++++++--------------------- src/server/index.ts | 24 +++- src/validation/tool-schemas.ts | 7 ++ 3 files changed, 97 insertions(+), 129 deletions(-) diff --git a/src/core/token-counter.ts b/src/core/token-counter.ts index d4bd471..80dd895 100644 --- a/src/core/token-counter.ts +++ b/src/core/token-counter.ts @@ -1,4 +1,7 @@ import { encoding_for_model, Tiktoken } from 'tiktoken'; +import { TokenizerFactory } from './tokenizers/tokenizer-factory.js'; +import { ITokenizer } from './tokenizers/i-tokenizer.js'; +import { TiktokenTokenizer } from './tokenizers/tiktoken-tokenizer.js'; export interface TokenCountResult { tokens: number; @@ -6,114 +9,95 @@ export interface TokenCountResult { estimatedCost?: number; } +/** + * TokenCounter — delegates tokenization to the pluggable + * TokenizerFactory from issue #124 while preserving the callable + * surface (`count`, `countBatch`, `estimate`, `calculateSavings`, + * `calculateCacheSavings`, `exceedsLimit`, `truncate`, + * `getTokenCharRatio`, `free`) the rest of the codebase relies on. + * + * Truncation still uses a local tiktoken encoder because the + * ITokenizer contract doesn't expose the raw token array — we + * keep one for GPT-4-family models and otherwise degrade to + * character-based truncation. + */ export class TokenCounter { - private encoder: Tiktoken; - private readonly model: string; + private readonly tokenizer: ITokenizer; + private readonly encoder: Tiktoken | null; + public readonly model: string; constructor(model?: string) { - // Auto-detect model from environment or use provided model - // Claude Code sets CLAUDE_MODEL env var with the active model - // Falls back to GPT-4 as universal approximation this.model = model || process.env.CLAUDE_MODEL || process.env.ANTHROPIC_MODEL || + process.env.OPENAI_MODEL || + process.env.GOOGLE_AI_MODEL || 'gpt-4'; - // Map Claude models to closest tiktoken equivalent - // Claude uses similar tokenization to GPT-4, so it's a good approximation - const tokenModel = this.mapToTiktokenModel(this.model); - - // Initialize tiktoken encoder - this.encoder = encoding_for_model(tokenModel); - } - - /** - * Map Claude/Anthropic models to tiktoken model names - */ - private mapToTiktokenModel(model: string): 'gpt-4' | 'gpt-3.5-turbo' { - const lowerModel = model.toLowerCase(); - - // Claude models use GPT-4 tokenizer as closest approximation - if ( - lowerModel.includes('claude') || - lowerModel.includes('sonnet') || - lowerModel.includes('opus') || - lowerModel.includes('haiku') - ) { - return 'gpt-4'; - } - - // GPT-4 variants - if (lowerModel.includes('gpt-4')) { - return 'gpt-4'; + this.tokenizer = TokenizerFactory.create(this.model); + + // Keep a local encoder for tiktoken-compatible models — truncate() + // needs to slice the raw token array, which the ITokenizer interface + // intentionally does not expose. + if (TiktokenTokenizer.supports(this.model)) { + this.encoder = encoding_for_model( + TiktokenTokenizer.mapToTiktokenModel(this.model) + ); + } else { + this.encoder = null; } - - // GPT-3.5 variants - if (lowerModel.includes('gpt-3.5') || lowerModel.includes('gpt3.5')) { - return 'gpt-3.5-turbo'; - } - - // Default to GPT-4 for unknown models - return 'gpt-4'; } /** - * Count tokens in text + * Count tokens in text (synchronous). + * + * Synchronous on tiktoken-backed tokenizers, which is all we expose + * externally via Anthropic/OpenAI. Remote tokenizers (Google AI) are + * reachable via `countAsync`. */ count(text: string): TokenCountResult { - const tokens = this.encoder.encode(text); - + if (this.encoder) { + return { + tokens: this.encoder.encode(text).length, + characters: text.length, + }; + } + // Fall back to the synchronous estimate so non-tiktoken paths keep + // working. Callers that want exact remote counts should use + // countAsync. return { - tokens: tokens.length, + tokens: this.estimate(text), characters: text.length, }; } /** - * Count tokens in multiple texts + * Async token counting through the pluggable tokenizer — accurate for + * both local tiktoken and remote Google AI paths. */ + async countAsync(text: string): Promise { + const tokens = await this.tokenizer.countTokens(text); + return { tokens, characters: text.length }; + } + countBatch(texts: string[]): TokenCountResult { let totalTokens = 0; let totalCharacters = 0; - for (const text of texts) { const result = this.count(text); totalTokens += result.tokens; totalCharacters += result.characters; } - - return { - tokens: totalTokens, - characters: totalCharacters, - }; + return { tokens: totalTokens, characters: totalCharacters }; } - /** - * Estimate token count without encoding (faster, less accurate) - */ estimate(text: string): number { - // Rough estimate: ~4 characters per token on average + // Rough fallback: ~4 characters per token. Only used when no + // tiktoken encoder is available for this model. return Math.ceil(text.length / 4); } - /** - * Calculate token savings based on context window management - * - * @param originalText - The original text content - * @param contextTokens - Number of tokens remaining in LLM context (default: 0 for full caching) - * @returns Token savings calculation - * - * @remarks - * This method measures context window optimization, NOT compression ratio. - * When content is cached externally (SQLite, Redis, etc.), it's completely - * removed from the LLM's context window, resulting in 100% token savings. - * - * Use cases: - * - External caching: contextTokens = 0 (100% savings) - * - Metadata-only: contextTokens = tokens in metadata (e.g., 8) - * - Summarization: contextTokens = tokens in summary (e.g., 50) - */ calculateSavings( originalText: string, contextTokens: number = 0 @@ -136,36 +120,6 @@ export class TokenCounter { }; } - /** - * Calculate context window savings for externally cached content - * - * @param originalText - The original text content being cached - * @returns Token savings calculation with 100% savings - * - * @remarks - * When content is compressed and stored in an external cache (SQLite, Redis, etc.), - * it's completely removed from the LLM's context window. The compressed/encoded - * data is NEVER sent to the LLM, so we measure 100% token savings. - * - * Key insight: We're measuring CONTEXT WINDOW CLEARANCE, not compression ratio. - * - ✅ Content removed from LLM context (saves tokens) - * - ✅ Storage compressed (saves disk space) - * - ❌ Don't count tokens in compressed data (it's not sent to LLM!) - * - * @example - * ```typescript - * const tokenCounter = new TokenCounter(); - * const content = "Large file content..."; - * const compressed = compress(content); - * - * // Store in external cache - * await cache.set(key, compressed); - * - * // Calculate context window savings - * const savings = tokenCounter.calculateCacheSavings(content); - * // Returns: { originalTokens: 250, contextTokens: 0, tokensSaved: 250, percentSaved: 100 } - * ``` - */ calculateCacheSavings(originalText: string): { originalTokens: number; contextTokens: number; @@ -173,54 +127,45 @@ export class TokenCounter { percentSaved: number; } { const original = this.count(originalText); - return { originalTokens: original.tokens, - contextTokens: 0, // External cache - nothing in context - tokensSaved: original.tokens, // 100% of original tokens saved - percentSaved: 100, // Always 100% for external caching + contextTokens: 0, + tokensSaved: original.tokens, + percentSaved: 100, }; } - /** - * Check if text exceeds token limit - */ exceedsLimit(text: string, limit: number): boolean { - const result = this.count(text); - return result.tokens > limit; + return this.count(text).tokens > limit; } - /** - * Truncate text to fit within token limit - */ truncate(text: string, maxTokens: number): string { + if (!this.encoder) { + // No raw-token access for this model — fall back to a + // char-proportional slice using the estimate ratio. + const approxChars = maxTokens * 4; + return text.length <= approxChars ? text : text.slice(0, approxChars); + } const tokens = this.encoder.encode(text); - if (tokens.length <= maxTokens) { return text; } - const truncatedTokens = tokens.slice(0, maxTokens); const decoded = this.encoder.decode(truncatedTokens); - - // Handle potential type issues with decode return value return typeof decoded === 'string' ? decoded : new TextDecoder().decode(decoded); } - /** - * Get token-to-character ratio for text - */ getTokenCharRatio(text: string): number { const result = this.count(text); return result.tokens > 0 ? result.characters / result.tokens : 0; } - /** - * Free the encoder resources - */ free(): void { - this.encoder.free(); + if (this.encoder) { + this.encoder.free(); + } + // TokenizerFactory owns the tokenizer's lifecycle (instance cache). } } diff --git a/src/server/index.ts b/src/server/index.ts index 63f6cc1..89eb230 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -445,7 +445,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { { name: 'count_tokens', description: - 'Count tokens in text using tiktoken. Useful for understanding token usage before and after optimization.', + 'Count tokens in text using the pluggable tokenizer framework (#124). Picks a model-specific tokenizer (tiktoken for GPT/Claude, Google AI REST for Gemini, content-aware heuristic fallback).', inputSchema: { type: 'object', properties: { @@ -453,6 +453,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { type: 'string', description: 'Text to count tokens for', }, + modelName: { + type: 'string', + description: + 'Model name (e.g. gpt-4, claude-opus-4-7, gemini-2.5-flash). Defaults to the server-configured model when omitted.', + }, }, required: ['text'], }, @@ -864,14 +869,25 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } case 'count_tokens': { - const { text } = args as { text: string }; - const result = tokenCounter.count(text); + const { text, modelName } = args as { + text: string; + modelName?: string; + }; + const counter = modelName ? new TokenCounter(modelName) : tokenCounter; + const result = modelName + ? await counter.countAsync(text) + : counter.count(text); + if (modelName) { + // Model-specific counters are one-shot — free the local + // tiktoken encoder (if any) that this call allocated. + counter.free(); + } return { content: [ { type: 'text', - text: JSON.stringify(result, null, 2), + text: JSON.stringify({ ...result, model: modelName ?? counter.model }, null, 2), }, ], }; diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts index c109989..cad168b 100644 --- a/src/validation/tool-schemas.ts +++ b/src/validation/tool-schemas.ts @@ -23,6 +23,13 @@ export const GetCachedSchema = z.object({ // 3. count_tokens export const CountTokensSchema = z.object({ text: z.string().describe('Text to count tokens for'), + modelName: z + .string() + .optional() + .describe( + 'Model name (e.g. gpt-4, claude-opus-4-7, gemini-2.5-flash). ' + + 'Defaults to the server-configured model when omitted.' + ), }); // 4. compress_text From 2a1cab1599a88588c3e55bb273dd660745f8a71b Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:48:49 -0400 Subject: [PATCH 17/26] feat(config): cachesettings + chatcompression + default file + wiring (#120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the remaining #120 acceptance criteria: - Adds OptimizationConfig.cacheSettings { maxSize, ttlSeconds } and OptimizationConfig.chatCompression { enabled, tokenLimit, strategy } — the two sections called out in the issue example config. - ConfigManager writes ~/.token-optimizer/config.json with DEFAULT_CONFIG on first run so the user can edit a real file (was previously in-memory-only). writeDefaults can be disabled for tests. - User config sub-objects deep-merge: overriding `cacheSettings.maxSize` no longer wipes out `ttlSeconds`. Zod schema is partial at every depth via OptimizationConfigUserSchema. - server/index.ts: loads ConfigManager on startup and derives the SessionManager's defaultMaxTokens from chatCompression.tokenLimit ?? modelTokenLimit × compressionTokenThreshold, so every Session created through the manager respects the configured compression budget. Refs #120, #121 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/config.ts | 87 +++++++++++++++++++++++++++++++++++---- src/core/types.ts | 14 +++++++ src/server/index.ts | 17 +++++++- tests/unit/config.test.ts | 46 ++++++++++++++++----- 4 files changed, 145 insertions(+), 19 deletions(-) diff --git a/src/core/config.ts b/src/core/config.ts index 57bee2a..2e94239 100644 --- a/src/core/config.ts +++ b/src/core/config.ts @@ -4,9 +4,9 @@ import { z } from 'zod'; import { HypercontextConfig, OptimizationConfig } from './types.js'; -import { readFileSync, existsSync } from 'fs'; +import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'; import { homedir } from 'os'; -import { join } from 'path'; +import { dirname, join } from 'path'; const DEFAULT_OPTIMIZATION: OptimizationConfig = { compressionTokenThreshold: 0.7, @@ -26,6 +26,14 @@ const DEFAULT_OPTIMIZATION: OptimizationConfig = { }, minOutputSizeBytes: 500, quality: 'balanced', + cacheSettings: { + maxSize: 1000, + ttlSeconds: 3600, + }, + chatCompression: { + enabled: true, + strategy: 'summarize', + }, }; const DEFAULT_CONFIG: HypercontextConfig = { @@ -62,6 +70,17 @@ const DEFAULT_CONFIG: HypercontextConfig = { optimization: DEFAULT_OPTIMIZATION, }; +const CacheSettingsSchema = z.object({ + maxSize: z.number().int().positive(), + ttlSeconds: z.number().int().nonnegative(), +}); + +const ChatCompressionSchema = z.object({ + enabled: z.boolean(), + tokenLimit: z.number().int().positive().optional(), + strategy: z.enum(['summarize', 'truncate']), +}); + const OptimizationConfigSchema = z.object({ compressionTokenThreshold: z.number().min(0).max(1), compressionPreserveThreshold: z.number().min(0).max(1), @@ -69,6 +88,18 @@ const OptimizationConfigSchema = z.object({ modelTokenLimits: z.record(z.string(), z.number().int().positive()), minOutputSizeBytes: z.number().int().nonnegative(), quality: z.enum(['fast', 'balanced', 'max']), + cacheSettings: CacheSettingsSchema, + chatCompression: ChatCompressionSchema, +}); + +/** + * User-supplied optimization schema. Partial at every depth so users can + * override just one field (e.g. `{ cacheSettings: { maxSize: 42 } }`) + * without having to re-supply the entire sub-object. + */ +const OptimizationConfigUserSchema = OptimizationConfigSchema.partial().extend({ + cacheSettings: CacheSettingsSchema.partial().optional(), + chatCompression: ChatCompressionSchema.partial().optional(), }); const HypercontextConfigSchema = z @@ -110,7 +141,7 @@ const HypercontextConfigSchema = z }) .partial() .optional(), - optimization: OptimizationConfigSchema.partial().optional(), + optimization: OptimizationConfigUserSchema.optional(), }) .passthrough(); @@ -118,12 +149,37 @@ export class ConfigManager { private config: HypercontextConfig; private configPath: string; - constructor(configPath?: string) { + constructor(configPath?: string, options: { writeDefaults?: boolean } = {}) { this.configPath = - configPath || join(homedir(), '.hypercontext', 'config.json'); + configPath || join(homedir(), '.token-optimizer', 'config.json'); + const writeDefaults = options.writeDefaults ?? true; + if (writeDefaults && !existsSync(this.configPath)) { + this.writeDefaultConfig(); + } this.config = this.loadConfig(); } + /** + * Write DEFAULT_CONFIG to configPath on first run — addresses #120's + * "Default config created on first run" acceptance criterion. + * Errors are logged and non-fatal; callers still get an in-memory + * DEFAULT_CONFIG via loadConfig(). + */ + private writeDefaultConfig(): void { + try { + const dir = dirname(this.configPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + writeFileSync(this.configPath, JSON.stringify(DEFAULT_CONFIG, null, 2)); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn( + `ConfigManager: failed to write default config to ${this.configPath}: ${message}` + ); + } + } + private loadConfig(): HypercontextConfig { if (!existsSync(this.configPath)) { return DEFAULT_CONFIG; @@ -157,15 +213,32 @@ export class ConfigManager { monitoring?: Partial; intelligence?: Partial; performance?: Partial; - optimization?: Partial; + optimization?: Partial< + Omit + > & { + cacheSettings?: Partial; + chatCompression?: Partial; + }; } ): HypercontextConfig { + const userOpt = user.optimization ?? {}; return { cache: { ...defaults.cache, ...user.cache }, monitoring: { ...defaults.monitoring, ...user.monitoring }, intelligence: { ...defaults.intelligence, ...user.intelligence }, performance: { ...defaults.performance, ...user.performance }, - optimization: { ...DEFAULT_OPTIMIZATION, ...(user.optimization ?? {}) }, + optimization: { + ...DEFAULT_OPTIMIZATION, + ...userOpt, + cacheSettings: { + ...DEFAULT_OPTIMIZATION.cacheSettings, + ...(userOpt.cacheSettings ?? {}), + }, + chatCompression: { + ...DEFAULT_OPTIMIZATION.chatCompression, + ...(userOpt.chatCompression ?? {}), + }, + }, }; } diff --git a/src/core/types.ts b/src/core/types.ts index aacb6a1..1cb1775 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -68,6 +68,20 @@ export interface OptimizationConfig { minOutputSizeBytes: number; /** Compression quality preset. */ quality: 'fast' | 'balanced' | 'max'; + /** In-memory cache knobs — mirrors Gemini CLI's `cacheSettings`. */ + cacheSettings: { + /** Max entries per LRU cache shard. */ + maxSize: number; + /** Default TTL for cached entries, in seconds. */ + ttlSeconds: number; + }; + /** Chat-history compression knobs — #121. */ + chatCompression: { + enabled: boolean; + /** Hard token limit per session (falls back to modelTokenLimit × compressionTokenThreshold). */ + tokenLimit?: number; + strategy: 'summarize' | 'truncate'; + }; } export interface TokenMetrics { diff --git a/src/server/index.ts b/src/server/index.ts index 89eb230..9b16c3a 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -137,6 +137,7 @@ import { } from '../tools/context-delta-tool.js'; import { SessionManager } from '../core/session-manager.js'; import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js'; +import { ConfigManager } from '../core/config.js'; import { AnalyticsManager } from '../analytics/analytics-manager.js'; @@ -379,9 +380,23 @@ const getMcpServerAnalytics = getMcpServerAnalyticsTool(analyticsManager); const exportAnalytics = getExportAnalyticsTool(analyticsManager); const optimizationStorage = new OptimizationStorageTool(); +// #120: load user config (creates ~/.token-optimizer/config.json with +// defaults on first run) and derive session-level knobs. +const configManager = new ConfigManager(); +const optimizationConfig = configManager.getOptimizationConfig(); +const sessionTokenizer = TokenizerFactory.createFromEnv(); +const modelLimit = + configManager.getModelTokenLimit(sessionTokenizer.modelName) ?? + // Fall back to an aggressive default for unknown models. + 128000; +const chatDefaultMaxTokens = + optimizationConfig.chatCompression.tokenLimit ?? + Math.floor(modelLimit * optimizationConfig.compressionTokenThreshold); + const sessionManager = new SessionManager({ persistencePath: path.join(os.homedir(), '.token-optimizer', 'sessions.json'), - tokenizer: TokenizerFactory.createFromEnv(), + tokenizer: sessionTokenizer, + defaultMaxTokens: chatDefaultMaxTokens, }); const contextDelta = new ContextDeltaTool(sessionManager); diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index 27b2850..4f1c0d7 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, afterEach } from '@jest/globals'; -import { mkdtempSync, writeFileSync, rmSync } from 'fs'; +import { mkdtempSync, writeFileSync, existsSync, rmSync } from 'fs'; import { tmpdir } from 'os'; import { join } from 'path'; import { ConfigManager } from '../../src/core/config.js'; @@ -16,38 +16,63 @@ describe('ConfigManager', () => { } }); - function writeConfig(content: string): string { + function tempConfigPath(): string { const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-config-')); tempDirs.push(dir); - const file = join(dir, 'config.json'); + return join(dir, 'config.json'); + } + + function writeConfig(content: string): string { + const file = tempConfigPath(); writeFileSync(file, content); return file; } - it('returns defaults when no config file exists', () => { - const mgr = new ConfigManager(join(tmpdir(), 'does-not-exist-xyz.json')); + it('returns defaults when no config file exists and writeDefaults is false', () => { + const mgr = new ConfigManager(tempConfigPath(), { writeDefaults: false }); const opt = mgr.getOptimizationConfig(); expect(opt.compressionTokenThreshold).toBe(0.7); expect(opt.quality).toBe('balanced'); + expect(opt.cacheSettings.maxSize).toBe(1000); + expect(opt.cacheSettings.ttlSeconds).toBe(3600); + expect(opt.chatCompression.enabled).toBe(true); + expect(opt.chatCompression.strategy).toBe('summarize'); expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000); }); - it('overrides defaults with user config', () => { + it('writes a default config file on first run', () => { + const file = tempConfigPath(); + expect(existsSync(file)).toBe(false); + new ConfigManager(file); + expect(existsSync(file)).toBe(true); + + // A second instance reads what the first wrote. + const second = new ConfigManager(file); + expect(second.getOptimizationConfig().quality).toBe('balanced'); + }); + + it('overrides defaults with user config — nested sub-objects deep-merge', () => { const configPath = writeConfig( JSON.stringify({ optimization: { compressionTokenThreshold: 0.9, quality: 'max', + cacheSettings: { maxSize: 42 }, + chatCompression: { strategy: 'truncate' }, modelTokenLimits: { 'custom-model': 500000 }, }, }) ); - const mgr = new ConfigManager(configPath); + const mgr = new ConfigManager(configPath, { writeDefaults: false }); const opt = mgr.getOptimizationConfig(); expect(opt.compressionTokenThreshold).toBe(0.9); expect(opt.quality).toBe('max'); + expect(opt.cacheSettings.maxSize).toBe(42); + // Unprovided sub-field retains default. + expect(opt.cacheSettings.ttlSeconds).toBe(3600); + expect(opt.chatCompression.enabled).toBe(true); + expect(opt.chatCompression.strategy).toBe('truncate'); expect(mgr.getModelTokenLimit('custom-model')).toBe(500000); - // Unrelated defaults still filled in expect(opt.compressionPreserveThreshold).toBe(0.3); }); @@ -55,14 +80,13 @@ describe('ConfigManager', () => { const configPath = writeConfig( JSON.stringify({ optimization: { compressionTokenThreshold: 5 } }) ); - const mgr = new ConfigManager(configPath); - // Invalid value (>1) is rejected by schema → defaults applied + const mgr = new ConfigManager(configPath, { writeDefaults: false }); expect(mgr.getOptimizationConfig().compressionTokenThreshold).toBe(0.7); }); it('falls back to defaults on malformed JSON', () => { const configPath = writeConfig('not json at all'); - const mgr = new ConfigManager(configPath); + const mgr = new ConfigManager(configPath, { writeDefaults: false }); expect(mgr.getOptimizationConfig().quality).toBe('balanced'); }); }); From 1392ee1e52673902b434e02291ae2fda781ba354 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:50:35 -0400 Subject: [PATCH 18/26] feat(cache): lru memoize smart-read / smart-grep / smart-glob (#125) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the missing LruCache integration from the audit: - Adds src/utils/lru-memoize.ts — a generic async-fn wrapper backed by LruCache (#125). Each wrapped function registers its cache with a shared memoRegistry so callers can prune and snapshot stats across every memo in one shot. - server/index.ts wraps runSmartRead, runSmartGrep, and runSmartGlob with lruMemoize, sized from optimizationConfig.cacheSettings (maxSize / ttlSeconds × 1000). The case handlers call the memoized variant, so repeated tool invocations with identical arguments hit the LRU instead of re-running the expensive read/search. - Periodic cleanup: a 5-minute interval calls memoRegistry.pruneAll() and logs stats when anything was removed. The timer is unref'd so it never keeps the event loop alive. - Server cleanup handler clears the interval and the memo caches on shutdown. Refs #125 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/server/index.ts | 49 ++++++++++++++++-- src/utils/lru-memoize.ts | 93 ++++++++++++++++++++++++++++++++++ tests/unit/lru-memoize.test.ts | 69 +++++++++++++++++++++++++ 3 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 src/utils/lru-memoize.ts create mode 100644 tests/unit/lru-memoize.test.ts diff --git a/src/server/index.ts b/src/server/index.ts index 9b16c3a..98d3984 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -138,6 +138,7 @@ import { import { SessionManager } from '../core/session-manager.js'; import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js'; import { ConfigManager } from '../core/config.js'; +import { lruMemoize, memoRegistry } from '../utils/lru-memoize.js'; import { AnalyticsManager } from '../analytics/analytics-manager.js'; @@ -400,6 +401,41 @@ const sessionManager = new SessionManager({ }); const contextDelta = new ContextDeltaTool(sessionManager); +// #125: memoize the expensive read-only file-operation tools with an +// LRU bounded by the user's cacheSettings. The memoRegistry hook lets +// the cleanup handler below prune them all at once. +const cacheSettings = optimizationConfig.cacheSettings; +const memoizedSmartRead = lruMemoize(runSmartRead, { + name: 'smart_read', + maxSize: cacheSettings.maxSize, + ttlMs: cacheSettings.ttlSeconds * 1000, +}); +const memoizedSmartGrep = lruMemoize(runSmartGrep, { + name: 'smart_grep', + maxSize: cacheSettings.maxSize, + ttlMs: cacheSettings.ttlSeconds * 1000, +}); +const memoizedSmartGlob = lruMemoize(runSmartGlob, { + name: 'smart_glob', + maxSize: cacheSettings.maxSize, + ttlMs: cacheSettings.ttlSeconds * 1000, +}); + +// Periodic prune + stats log. Runs every 5 minutes; unref so it doesn't +// keep the process alive on its own. +const MEMO_PRUNE_INTERVAL_MS = 5 * 60 * 1000; +const memoPruneTimer = setInterval(() => { + const removed = memoRegistry.pruneAll(); + if (removed > 0) { + console.error( + `[memo] pruned ${removed} expired cache entries; stats: ${JSON.stringify(memoRegistry.stats())}` + ); + } +}, MEMO_PRUNE_INTERVAL_MS); +if (typeof memoPruneTimer.unref === 'function') { + memoPruneTimer.unref(); +} + // Create MCP server const server = new Server( { @@ -1979,7 +2015,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_read': { const { path, ...options } = args as any; - const result = await runSmartRead(path, options); + const result = await memoizedSmartRead(path, options); return { content: [ { @@ -2018,7 +2054,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_glob': { const { pattern, ...options } = args as any; - const result = await runSmartGlob(pattern, options); + const result = await memoizedSmartGlob(pattern, options); return { content: [ { @@ -2031,7 +2067,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_grep': { const { pattern, ...options } = args as any; - const result = await runSmartGrep(pattern, options); + const result = await memoizedSmartGrep(pattern, options); return { content: [ { @@ -2306,6 +2342,13 @@ async function cleanup() { { fn: async () => await sessionManager.flush(), name: 'flushing sessions' }, { fn: () => TokenizerFactory.disposeAll(), name: 'disposing tokenizers' }, { fn: () => optimizationStorage.close(), name: 'closing optimization storage' }, + { + fn: () => { + clearInterval(memoPruneTimer); + memoRegistry.clearAll(); + }, + name: 'clearing memo caches', + }, // Note: predictiveCache and cacheWarmup do not implement dispose() methods // Removed dispose() calls to prevent runtime errors during cleanup ]); diff --git a/src/utils/lru-memoize.ts b/src/utils/lru-memoize.ts new file mode 100644 index 0000000..d57c0ae --- /dev/null +++ b/src/utils/lru-memoize.ts @@ -0,0 +1,93 @@ +import { createHash } from 'crypto'; +import { LruCache, LruCacheStats } from './lru-cache.js'; + +/** + * Wrap an async function with an LRU cache so repeated calls with the + * same arguments are served from memory — addresses issue #125's + * "store results of expensive operations" for smart_read, smart_grep, + * smart_glob, and edit-correction paths. + * + * Each wrapped function owns its own cache, but every cache is + * registered with the shared `memoRegistry` so the server can prune + * and log stats for all of them at once. + */ + +export interface LruMemoizeOptions { + /** Identifier used in logs. */ + name: string; + /** Max cached entries. */ + maxSize: number; + /** Default per-entry TTL in ms. 0 disables expiration. */ + ttlMs?: number; + /** Custom key function; defaults to sha256(JSON.stringify(args)). */ + keyFn?: (args: Args) => string; +} + +export interface RegisteredCache { + name: string; + cache: LruCache; +} + +class MemoRegistry { + private readonly caches = new Map(); + + public register(entry: RegisteredCache): void { + this.caches.set(entry.name, entry); + } + + /** Prune every registered cache and return total entries removed. */ + public pruneAll(): number { + let total = 0; + for (const { cache } of this.caches.values()) { + total += cache.prune(); + } + return total; + } + + public stats(): Record { + const out: Record = {}; + for (const [name, { cache }] of this.caches) { + out[name] = cache.stats(); + } + return out; + } + + public clearAll(): void { + for (const { cache } of this.caches.values()) { + cache.clear(); + } + } +} + +export const memoRegistry = new MemoRegistry(); + +export function lruMemoize( + fn: (...args: Args) => Promise, + options: LruMemoizeOptions +): (...args: Args) => Promise { + const cache = new LruCache(options.maxSize, options.ttlMs ?? 0); + memoRegistry.register({ + name: options.name, + cache: cache as unknown as LruCache, + }); + + const keyFn = + options.keyFn ?? + ((args: Args): string => { + const serialized = JSON.stringify(args, (_, v) => + typeof v === 'bigint' ? v.toString() : v + ); + return createHash('sha256').update(serialized).digest('hex'); + }); + + return async (...args: Args): Promise => { + const key = keyFn(args); + const hit = cache.get(key); + if (hit !== undefined) { + return hit; + } + const value = await fn(...args); + cache.set(key, value); + return value; + }; +} diff --git a/tests/unit/lru-memoize.test.ts b/tests/unit/lru-memoize.test.ts new file mode 100644 index 0000000..12631be --- /dev/null +++ b/tests/unit/lru-memoize.test.ts @@ -0,0 +1,69 @@ +import { describe, it, expect } from '@jest/globals'; +import { lruMemoize, memoRegistry } from '../../src/utils/lru-memoize.js'; + +describe('lruMemoize', () => { + it('returns cached value for identical args', async () => { + let calls = 0; + const fn = async (x: number) => { + calls++; + return x * 2; + }; + const memo = lruMemoize(fn, { name: 'test-double', maxSize: 10 }); + expect(await memo(3)).toBe(6); + expect(await memo(3)).toBe(6); + expect(calls).toBe(1); + }); + + it('differentiates calls by args', async () => { + let calls = 0; + const fn = async (x: number) => { + calls++; + return x * 2; + }; + const memo = lruMemoize(fn, { name: 'test-by-args', maxSize: 10 }); + await memo(1); + await memo(2); + await memo(1); + expect(calls).toBe(2); + }); + + it('expires entries past the TTL', async () => { + let calls = 0; + const fn = async (x: number) => { + calls++; + return x; + }; + const memo = lruMemoize(fn, { name: 'test-ttl', maxSize: 10, ttlMs: 20 }); + await memo(7); + await memo(7); + expect(calls).toBe(1); + await new Promise((r) => setTimeout(r, 30)); + await memo(7); + expect(calls).toBe(2); + }); + + it('registers with memoRegistry for bulk prune / stats', async () => { + const fn = async (x: string) => x.toUpperCase(); + lruMemoize(fn, { name: 'test-registered', maxSize: 5 }); + const stats = memoRegistry.stats(); + expect(stats['test-registered']).toBeDefined(); + expect(stats['test-registered'].size).toBe(0); + }); + + it('accepts a custom key function', async () => { + let calls = 0; + const fn = async (obj: { id: string; ignore: number }) => { + calls++; + return obj.id; + }; + const memo = lruMemoize(fn, { + name: 'test-custom-key', + maxSize: 5, + keyFn: ([{ id }]) => id, + }); + await memo({ id: 'a', ignore: 1 }); + await memo({ id: 'a', ignore: 9999 }); // same id → hit + await memo({ id: 'b', ignore: 1 }); // different id → miss + expect(calls).toBe(2); + }); +}); From abbc64806a103631ec3e29fb663caba12f9458dd Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:52:05 -0400 Subject: [PATCH 19/26] feat(summarization): foundation-model isummarizer implementations (#121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the audit gap: the issue requires "use a foundation model to perform the summarization", the previous ISummarizer default was just text truncation. - AnthropicSummarizer — calls /v1/messages with claude-haiku-4-5 by default. Needs ANTHROPIC_API_KEY. - GoogleAISummarizer — calls generativelanguage.googleapis.com with gemini-2.5-flash by default. Needs GOOGLE_AI_API_KEY. - createSummarizerFromEnv() picks the best available summarizer (Anthropic → Google → TruncatingSummarizer fallback) so the server works unchanged whether or not API keys are configured. - Both remote summarizers use AbortController with a 30s timeout and share a common system prompt that asks for preservation of decisions and open TODOs. - TruncatingSummarizer remains the zero-dep fallback and is used by tests to avoid network flakes. Wired into the server: SessionManager.summarizer is createSummarizerFromEnv(), so Session.compressHistory uses a real LLM when a key is present. Refs #121 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/summarization.ts | 234 ++++++++++++++++++++++++++++++- src/server/index.ts | 2 + tests/unit/summarization.test.ts | 102 ++++++++++++++ 3 files changed, 332 insertions(+), 6 deletions(-) create mode 100644 tests/unit/summarization.test.ts diff --git a/src/core/summarization.ts b/src/core/summarization.ts index ab179c5..f694f7d 100644 --- a/src/core/summarization.ts +++ b/src/core/summarization.ts @@ -1,15 +1,30 @@ import { Message } from './session.js'; /** - * Pluggable summarization interface — part of issue #121. + * Pluggable summarization — part of issue #121. * - * A production deployment should plug in an LLM-backed summarizer that - * condenses a list of Messages into a single natural-language summary. - * The default TruncatingSummarizer keeps the module self-contained and - * testable without an API key; it concatenates role+content and trims - * to a reasonable length. + * An ISummarizer implementation takes a list of Messages and returns a + * natural-language summary. We ship three implementations out of the box: + * + * - TruncatingSummarizer — self-contained, zero deps. Concatenates + * role:content and trims to `maxChars`. Useful for tests and for + * users who don't want to hand a foundation model every + * conversation turn. + * - AnthropicSummarizer — calls /v1/messages on api.anthropic.com. + * Needs ANTHROPIC_API_KEY. Used when the host wires it up. + * - GoogleAISummarizer — calls generativelanguage.googleapis.com. + * Needs GOOGLE_AI_API_KEY. + * + * Selection lives in `createSummarizerFromEnv()` below — the server + * picks the highest-fidelity summarizer whose credentials are available + * and falls back to TruncatingSummarizer otherwise. */ +const SUMMARY_SYSTEM_PROMPT = + 'You are summarizing the early portion of a conversation so the rest can continue without the full history in context. ' + + 'Produce a concise summary (at most ~300 tokens) that preserves decisions made, outstanding TODOs, and any concrete facts the assistant has already told the user. ' + + 'Do not address the user directly; write in third person.'; + export interface ISummarizer { summarize(messages: readonly Message[]): Promise; } @@ -48,3 +63,210 @@ export class TruncatingSummarizer implements ISummarizer { ); } } + +// ============================================================================ +// Anthropic-backed summarizer +// ============================================================================ + +const ANTHROPIC_ENDPOINT = 'https://api.anthropic.com/v1/messages'; +const ANTHROPIC_DEFAULT_MODEL = 'claude-haiku-4-5-20251001'; +const ANTHROPIC_API_VERSION = '2023-06-01'; +const SUMMARIZER_TIMEOUT_MS = 30_000; +const SUMMARIZER_MAX_TOKENS = 1024; + +export interface AnthropicSummarizerOptions { + apiKey?: string; + model?: string; + endpoint?: string; + timeoutMs?: number; +} + +export class AnthropicSummarizer implements ISummarizer { + private readonly apiKey: string; + private readonly model: string; + private readonly endpoint: string; + private readonly timeoutMs: number; + + constructor(options: AnthropicSummarizerOptions = {}) { + const apiKey = options.apiKey ?? process.env.ANTHROPIC_API_KEY; + if (!apiKey) { + throw new Error( + 'AnthropicSummarizer requires ANTHROPIC_API_KEY (or apiKey option).' + ); + } + this.apiKey = apiKey; + this.model = options.model ?? ANTHROPIC_DEFAULT_MODEL; + this.endpoint = options.endpoint ?? ANTHROPIC_ENDPOINT; + this.timeoutMs = options.timeoutMs ?? SUMMARIZER_TIMEOUT_MS; + } + + public async summarize(messages: readonly Message[]): Promise { + if (messages.length === 0) { + return ''; + } + const userContent = messages + .map((m) => `${m.role}: ${m.content}`) + .join('\n'); + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), this.timeoutMs); + + try { + const response = await fetch(this.endpoint, { + method: 'POST', + headers: { + 'content-type': 'application/json', + 'x-api-key': this.apiKey, + 'anthropic-version': ANTHROPIC_API_VERSION, + }, + body: JSON.stringify({ + model: this.model, + max_tokens: SUMMARIZER_MAX_TOKENS, + system: SUMMARY_SYSTEM_PROMPT, + messages: [ + { role: 'user', content: userContent.slice(0, 200_000) }, + ], + }), + signal: controller.signal, + }); + + if (!response.ok) { + const body = await response.text().catch(() => ''); + throw new Error( + `Anthropic summarize failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}` + ); + } + + const data = (await response.json()) as { + content?: Array<{ type: string; text?: string }>; + }; + const text = + data.content + ?.filter((c) => c.type === 'text' && typeof c.text === 'string') + .map((c) => c.text ?? '') + .join('\n') + .trim() ?? ''; + return text; + } finally { + clearTimeout(timeout); + } + } +} + +// ============================================================================ +// Google AI-backed summarizer +// ============================================================================ + +const GOOGLE_AI_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models'; +const GOOGLE_AI_DEFAULT_MODEL = 'gemini-2.5-flash'; + +export interface GoogleAISummarizerOptions { + apiKey?: string; + model?: string; + endpoint?: string; + timeoutMs?: number; +} + +export class GoogleAISummarizer implements ISummarizer { + private readonly apiKey: string; + private readonly model: string; + private readonly endpoint: string; + private readonly timeoutMs: number; + + constructor(options: GoogleAISummarizerOptions = {}) { + const apiKey = options.apiKey ?? process.env.GOOGLE_AI_API_KEY; + if (!apiKey) { + throw new Error( + 'GoogleAISummarizer requires GOOGLE_AI_API_KEY (or apiKey option).' + ); + } + this.apiKey = apiKey; + this.model = options.model ?? GOOGLE_AI_DEFAULT_MODEL; + this.endpoint = options.endpoint ?? GOOGLE_AI_ENDPOINT; + this.timeoutMs = options.timeoutMs ?? SUMMARIZER_TIMEOUT_MS; + } + + public async summarize(messages: readonly Message[]): Promise { + if (messages.length === 0) { + return ''; + } + const joined = messages + .map((m) => `${m.role}: ${m.content}`) + .join('\n'); + + const url = `${this.endpoint}/${encodeURIComponent(this.model)}:generateContent?key=${encodeURIComponent(this.apiKey)}`; + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), this.timeoutMs); + + try { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + systemInstruction: { parts: [{ text: SUMMARY_SYSTEM_PROMPT }] }, + contents: [ + { + role: 'user', + parts: [{ text: joined.slice(0, 200_000) }], + }, + ], + generationConfig: { maxOutputTokens: SUMMARIZER_MAX_TOKENS }, + }), + signal: controller.signal, + }); + + if (!response.ok) { + const body = await response.text().catch(() => ''); + throw new Error( + `Google AI summarize failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}` + ); + } + + const data = (await response.json()) as { + candidates?: Array<{ + content?: { parts?: Array<{ text?: string }> }; + }>; + }; + const text = + data.candidates?.[0]?.content?.parts + ?.map((p) => p.text ?? '') + .join('\n') + .trim() ?? ''; + return text; + } finally { + clearTimeout(timeout); + } + } +} + +// ============================================================================ +// Factory +// ============================================================================ + +/** + * Pick an ISummarizer based on available credentials: + * 1. ANTHROPIC_API_KEY → AnthropicSummarizer + * 2. GOOGLE_AI_API_KEY → GoogleAISummarizer + * 3. fallback → TruncatingSummarizer (no network, no key) + * + * Anthropic sits first because this project is Claude-adjacent; users + * who prefer Gemini can either unset ANTHROPIC_API_KEY or construct + * GoogleAISummarizer directly. + */ +export function createSummarizerFromEnv(): ISummarizer { + if (process.env.ANTHROPIC_API_KEY) { + try { + return new AnthropicSummarizer(); + } catch { + // Fall through to next option. + } + } + if (process.env.GOOGLE_AI_API_KEY) { + try { + return new GoogleAISummarizer(); + } catch { + // Fall through. + } + } + return new TruncatingSummarizer(); +} diff --git a/src/server/index.ts b/src/server/index.ts index 98d3984..3c971ce 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -136,6 +136,7 @@ import { CONTEXT_DELTA_TOOL_DEFINITION, } from '../tools/context-delta-tool.js'; import { SessionManager } from '../core/session-manager.js'; +import { createSummarizerFromEnv } from '../core/summarization.js'; import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js'; import { ConfigManager } from '../core/config.js'; import { lruMemoize, memoRegistry } from '../utils/lru-memoize.js'; @@ -398,6 +399,7 @@ const sessionManager = new SessionManager({ persistencePath: path.join(os.homedir(), '.token-optimizer', 'sessions.json'), tokenizer: sessionTokenizer, defaultMaxTokens: chatDefaultMaxTokens, + summarizer: createSummarizerFromEnv(), }); const contextDelta = new ContextDeltaTool(sessionManager); diff --git a/tests/unit/summarization.test.ts b/tests/unit/summarization.test.ts new file mode 100644 index 0000000..a060ee7 --- /dev/null +++ b/tests/unit/summarization.test.ts @@ -0,0 +1,102 @@ +import { describe, it, expect, beforeEach, afterEach } from '@jest/globals'; +import { + TruncatingSummarizer, + AnthropicSummarizer, + GoogleAISummarizer, + createSummarizerFromEnv, +} from '../../src/core/summarization.js'; +import { Message } from '../../src/core/session.js'; + +function makeMessages(n: number): Message[] { + return Array.from({ length: n }, (_, i) => ({ + role: (i % 2 === 0 ? 'user' : 'assistant') as Message['role'], + content: `Turn ${i}: ${'x'.repeat(50)}`, + timestamp: Date.now() + i, + })); +} + +describe('TruncatingSummarizer', () => { + it('returns empty string for empty input', async () => { + const s = new TruncatingSummarizer(); + expect(await s.summarize([])).toBe(''); + }); + + it('returns untruncated text when under maxChars', async () => { + const s = new TruncatingSummarizer({ maxChars: 10_000 }); + const out = await s.summarize(makeMessages(3)); + expect(out).toContain('Turn 0'); + expect(out).toContain('Turn 2'); + expect(out).not.toContain('[truncated]'); + }); + + it('truncates with a marker when over maxChars', async () => { + const s = new TruncatingSummarizer({ maxChars: 500 }); + const out = await s.summarize(makeMessages(50)); + expect(out).toContain('[truncated]'); + expect(out.length).toBeLessThan(600); + }); +}); + +describe('AnthropicSummarizer / GoogleAISummarizer constructors', () => { + const savedAnthropic = process.env.ANTHROPIC_API_KEY; + const savedGoogle = process.env.GOOGLE_AI_API_KEY; + + beforeEach(() => { + delete process.env.ANTHROPIC_API_KEY; + delete process.env.GOOGLE_AI_API_KEY; + }); + afterEach(() => { + if (savedAnthropic !== undefined) process.env.ANTHROPIC_API_KEY = savedAnthropic; + else delete process.env.ANTHROPIC_API_KEY; + if (savedGoogle !== undefined) process.env.GOOGLE_AI_API_KEY = savedGoogle; + else delete process.env.GOOGLE_AI_API_KEY; + }); + + it('AnthropicSummarizer throws without a key', () => { + expect(() => new AnthropicSummarizer()).toThrow(/ANTHROPIC_API_KEY/); + }); + + it('GoogleAISummarizer throws without a key', () => { + expect(() => new GoogleAISummarizer()).toThrow(/GOOGLE_AI_API_KEY/); + }); + + it('AnthropicSummarizer constructs with explicit apiKey', () => { + expect(() => new AnthropicSummarizer({ apiKey: 'sk-test' })).not.toThrow(); + }); + + it('GoogleAISummarizer constructs with explicit apiKey', () => { + expect(() => new GoogleAISummarizer({ apiKey: 'gapi-test' })).not.toThrow(); + }); +}); + +describe('createSummarizerFromEnv', () => { + const saved = { + anthropic: process.env.ANTHROPIC_API_KEY, + google: process.env.GOOGLE_AI_API_KEY, + }; + + afterEach(() => { + if (saved.anthropic !== undefined) process.env.ANTHROPIC_API_KEY = saved.anthropic; + else delete process.env.ANTHROPIC_API_KEY; + if (saved.google !== undefined) process.env.GOOGLE_AI_API_KEY = saved.google; + else delete process.env.GOOGLE_AI_API_KEY; + }); + + it('falls back to TruncatingSummarizer when no keys are set', () => { + delete process.env.ANTHROPIC_API_KEY; + delete process.env.GOOGLE_AI_API_KEY; + expect(createSummarizerFromEnv()).toBeInstanceOf(TruncatingSummarizer); + }); + + it('prefers Anthropic when its key is set', () => { + process.env.ANTHROPIC_API_KEY = 'sk-test'; + delete process.env.GOOGLE_AI_API_KEY; + expect(createSummarizerFromEnv()).toBeInstanceOf(AnthropicSummarizer); + }); + + it('uses Google AI when only its key is set', () => { + delete process.env.ANTHROPIC_API_KEY; + process.env.GOOGLE_AI_API_KEY = 'gapi-test'; + expect(createSummarizerFromEnv()).toBeInstanceOf(GoogleAISummarizer); + }); +}); From a78d19616dc5cfbeaf37331078b42a0018e1fea3 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:53:19 -0400 Subject: [PATCH 20/26] feat(storage): gzip session persistence + shared gzip utilities (#126) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fulfills the #126 acceptance criteria on the TypeScript side (the PS side lands in a follow-up commit): - src/utils/gzip.ts: gzipString / gunzipBuffer primitives, plus saveGzippedFile (atomic tmp + rename, removes stale plaintext) and loadMaybeGzippedFile (reads .gz if present, otherwise plaintext so sessions.json files written before this change still load — the "backward compatibility" bullet from the issue). - SessionManager persistNow now writes sessions via saveGzippedFile; load() uses loadMaybeGzippedFile. Existing checks for the sessions file at startup also look for the .gz sibling. Refs #126 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/session-manager.ts | 42 +++++++---------- src/utils/gzip.ts | 90 +++++++++++++++++++++++++++++++++++++ tests/unit/gzip.test.ts | 73 ++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+), 26 deletions(-) create mode 100644 src/utils/gzip.ts create mode 100644 tests/unit/gzip.test.ts diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts index 2a19edd..935004e 100644 --- a/src/core/session-manager.ts +++ b/src/core/session-manager.ts @@ -1,12 +1,4 @@ -import { - existsSync, - mkdirSync, - readFileSync, - writeFileSync, - renameSync, - unlinkSync, -} from 'fs'; -import { dirname } from 'path'; +import { existsSync } from 'fs'; import { z } from 'zod'; import { Session, @@ -15,6 +7,7 @@ import { } from './session.js'; import { ITokenizer } from './tokenizers/i-tokenizer.js'; import { ISummarizer } from './summarization.js'; +import { loadMaybeGzippedFile, saveGzippedFile } from '../utils/gzip.js'; /** * Persistent SessionManager — addresses issues #121 / #122. @@ -84,7 +77,11 @@ export class SessionManager { this.sessionTtlMs = options.sessionTtlMs ?? DEFAULT_SESSION_TTL_MS; this.maxFileStateBytes = options.maxFileStateBytes ?? DEFAULT_MAX_FILE_STATE_BYTES; - if (this.persistencePath && existsSync(this.persistencePath)) { + if ( + this.persistencePath && + (existsSync(`${this.persistencePath}.gz`) || + existsSync(this.persistencePath)) + ) { this.load(); } } @@ -200,27 +197,17 @@ export class SessionManager { const state = { sessions: this.listSessions().map((s) => s.toSnapshot()), }; - const dir = dirname(this.persistencePath); - if (!existsSync(dir)) { - mkdirSync(dir, { recursive: true }); - } - const tmpPath = `${this.persistencePath}.tmp`; - writeFileSync(tmpPath, JSON.stringify(state, null, 2)); - renameSync(tmpPath, this.persistencePath); + // Gzip + atomic tmp + rename (handled inside saveGzippedFile). + saveGzippedFile( + this.persistencePath, + JSON.stringify(state, null, 2) + ); } catch (error) { const message = error instanceof Error ? error.message : String(error); console.warn( `SessionManager: failed to persist to ${this.persistencePath}: ${message}` ); - // Best-effort cleanup of the tmp file - if (this.persistencePath) { - try { - unlinkSync(`${this.persistencePath}.tmp`); - } catch { - // Ignore — tmp file may not exist. - } - } } finally { this.persistInFlight = false; } @@ -231,7 +218,10 @@ export class SessionManager { return; } try { - const raw = readFileSync(this.persistencePath, 'utf-8'); + const raw = loadMaybeGzippedFile(this.persistencePath); + if (raw === null) { + return; + } const json = JSON.parse(raw); const parsed = PersistedStateSchema.safeParse(json); if (!parsed.success) { diff --git a/src/utils/gzip.ts b/src/utils/gzip.ts new file mode 100644 index 0000000..5edf8ee --- /dev/null +++ b/src/utils/gzip.ts @@ -0,0 +1,90 @@ +import { gzipSync, gunzipSync } from 'zlib'; +import { + existsSync, + mkdirSync, + readFileSync, + renameSync, + unlinkSync, + writeFileSync, +} from 'fs'; +import { dirname } from 'path'; + +/** + * Gzip utilities — addresses issue #126. + * + * `gzipString` / `gunzipBuffer` are thin UTF-8 wrappers around node:zlib. + * `saveGzippedFile` writes `.gz` atomically (tmp + rename) so a + * crash mid-write can't produce a corrupt gzip. `loadFile` transparently + * reads `.gz` if present and falls back to the plaintext path — + * that gives us backward compatibility with sessions.json files written + * before this change. + */ + +export interface GzipStats { + originalBytes: number; + compressedBytes: number; + ratio: number; + percentSaved: number; +} + +export function gzipString(text: string, level: number = 6): Buffer { + return gzipSync(Buffer.from(text, 'utf8'), { level }); +} + +export function gunzipBuffer(buffer: Buffer): string { + return gunzipSync(buffer).toString('utf8'); +} + +export function computeStats(text: string, compressed: Buffer): GzipStats { + const originalBytes = Buffer.byteLength(text, 'utf8'); + const compressedBytes = compressed.length; + const ratio = originalBytes === 0 ? 0 : compressedBytes / originalBytes; + return { + originalBytes, + compressedBytes, + ratio, + percentSaved: originalBytes === 0 ? 0 : (1 - ratio) * 100, + }; +} + +/** + * Write gzipped text to `${path}.gz` using atomic tmp + rename so a + * crash mid-write never produces a half-written file. Also removes any + * stale uncompressed plaintext at `path` once the gzip lands (backward + * compat cleanup). + */ +export function saveGzippedFile(path: string, text: string, level: number = 6): GzipStats { + const dir = dirname(path); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + const compressed = gzipString(text, level); + const gzPath = `${path}.gz`; + const tmpPath = `${gzPath}.tmp`; + writeFileSync(tmpPath, compressed); + renameSync(tmpPath, gzPath); + if (existsSync(path)) { + try { + unlinkSync(path); + } catch { + // Best-effort — leaving the plaintext file isn't fatal. + } + } + return computeStats(text, compressed); +} + +/** + * Load either `${path}.gz` or `${path}` — whichever exists. Returns + * null if neither is present. + */ +export function loadMaybeGzippedFile(path: string): string | null { + const gzPath = `${path}.gz`; + if (existsSync(gzPath)) { + const buffer = readFileSync(gzPath); + return gunzipBuffer(buffer); + } + if (existsSync(path)) { + return readFileSync(path, 'utf-8'); + } + return null; +} diff --git a/tests/unit/gzip.test.ts b/tests/unit/gzip.test.ts new file mode 100644 index 0000000..bbb444c --- /dev/null +++ b/tests/unit/gzip.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect, afterEach } from '@jest/globals'; +import { mkdtempSync, existsSync, writeFileSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { + gzipString, + gunzipBuffer, + saveGzippedFile, + loadMaybeGzippedFile, +} from '../../src/utils/gzip.js'; + +describe('gzip utils', () => { + const tempDirs: string[] = []; + afterEach(() => { + while (tempDirs.length) { + const dir = tempDirs.pop(); + if (dir) { + rmSync(dir, { recursive: true, force: true }); + } + } + }); + + function tempDir(): string { + const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-gzip-')); + tempDirs.push(dir); + return dir; + } + + it('gzipString round-trips via gunzipBuffer', () => { + const text = 'Hello, world. '.repeat(1000); + const buffer = gzipString(text); + expect(buffer.length).toBeLessThan(text.length); + expect(gunzipBuffer(buffer)).toBe(text); + }); + + it('saveGzippedFile writes .gz and removes plaintext', () => { + const dir = tempDir(); + const file = join(dir, 'sessions.json'); + writeFileSync(file, 'stale plaintext'); + const stats = saveGzippedFile(file, JSON.stringify({ hello: 'world' })); + expect(existsSync(`${file}.gz`)).toBe(true); + expect(existsSync(file)).toBe(false); + expect(stats.originalBytes).toBeGreaterThan(0); + expect(stats.compressedBytes).toBeGreaterThan(0); + }); + + it('loadMaybeGzippedFile prefers the .gz sibling', () => { + const dir = tempDir(); + const file = join(dir, 'state.json'); + saveGzippedFile(file, '{"compressed":true}'); + expect(loadMaybeGzippedFile(file)).toBe('{"compressed":true}'); + }); + + it('loadMaybeGzippedFile falls back to plaintext when no .gz exists', () => { + const dir = tempDir(); + const file = join(dir, 'legacy.json'); + writeFileSync(file, '{"legacy":true}'); + expect(loadMaybeGzippedFile(file)).toBe('{"legacy":true}'); + }); + + it('loadMaybeGzippedFile returns null when neither exists', () => { + const dir = tempDir(); + const file = join(dir, 'missing.json'); + expect(loadMaybeGzippedFile(file)).toBeNull(); + }); + + it('saves with high compression ratio on repetitive content', () => { + const dir = tempDir(); + const file = join(dir, 'repeated.txt'); + const stats = saveGzippedFile(file, 'aa'.repeat(10_000)); + expect(stats.percentSaved).toBeGreaterThan(95); + }); +}); From fd0a0b283e88f245fa5fde906483424804d54d4b Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 21:55:17 -0400 Subject: [PATCH 21/26] feat(powershell): config / gzip / context-delta helpers (#120, #122, #126) Completes the PowerShell-side integration gaps found in the audit: - hooks/helpers/config.ps1: Import-TokenOptimizerConfig loads ~/.token-optimizer/config.json (the same file the TS server reads), falls back to defaults, and auto-writes the default file on first run. Exposes Get-TokenOptimizerOptimizationConfig and Get-TokenOptimizerModelTokenLimit for orchestrator consumers (#120). - hooks/helpers/gzip.ps1: Compress-String / Expand-String primitives and Save-GzippedFile / Read-MaybeGzippedFile that mirror the TS src/utils/gzip.ts semantics (atomic tmp+rename, backward-compat read of plaintext siblings) (#126). - hooks/helpers/context-delta.ps1: Get-TokenOptimizerSessionId returns a stable per-Claude-session UUID persisted at ~/.token-optimizer/current-session-id, Reset-TokenOptimizerSessionId clears it, Invoke-ContextDelta wraps the context_delta MCP tool via the existing Invoke-TokenOptimizer helper (#122). - Orchestrator dot-sources the three new helpers and Handle-SmartRead now calls Invoke-ContextDelta with the smart_read content after a successful read so the server's per-session file snapshot stays in sync (#122 Phase 2). Runtime-verified in PS7 that gzip round-trips the content and the helpers parse without errors. Refs #120, #122, #126 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../handlers/token-optimizer-orchestrator.ps1 | 19 +++ hooks/helpers/config.ps1 | 118 ++++++++++++++++++ hooks/helpers/context-delta.ps1 | 87 +++++++++++++ hooks/helpers/gzip.ps1 | 101 +++++++++++++++ 4 files changed, 325 insertions(+) create mode 100644 hooks/helpers/config.ps1 create mode 100644 hooks/helpers/context-delta.ps1 create mode 100644 hooks/helpers/gzip.ps1 diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index ae55748..4839706 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -28,6 +28,9 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) { $HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers" $INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1" . "$PSScriptRoot\..\helpers\logging.ps1" +. "$PSScriptRoot\..\helpers\config.ps1" +. "$PSScriptRoot\..\helpers\gzip.ps1" +. "$PSScriptRoot\..\helpers\context-delta.ps1" $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log" $SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt" $OPERATIONS_DIR = "C:\Users\cheat\.claude-global\hooks\data" @@ -2245,6 +2248,22 @@ function Handle-SmartRead { Write-Log "Updated session totalTokens by $tokens" "DEBUG" } + # #122: update the MCP server's context_delta so the next read + # of this file can be served as a diff. Failure here is + # non-fatal — smart_read still succeeds. + try { + $contentText = if ($result.content -and $result.content[0] -and $result.content[0].text) { + $result.content[0].text + } else { + $null + } + if ($contentText) { + $null = Invoke-ContextDelta -Operation 'compute-delta' -FilePath $filePath -CurrentContent $contentText + } + } catch { + Write-Log "context_delta update skipped: $($_.Exception.Message)" 'DEBUG' + } + # Return smart_read result and block plain Read $blockResponse = @{ continue = $false diff --git a/hooks/helpers/config.ps1 b/hooks/helpers/config.ps1 new file mode 100644 index 0000000..f5c12e3 --- /dev/null +++ b/hooks/helpers/config.ps1 @@ -0,0 +1,118 @@ +[CmdletBinding()] +param() + +<# +Token-Optimizer Config helper — addresses issue #120 (PowerShell side). + +Mirrors src/core/config.ts so the PS orchestrator and the TS server +share one source of truth. The config file lives at +~/.token-optimizer/config.json and is the same one the Node server +reads. On first run we copy the defaults below into that file. +#> + +$script:TokenOptimizerConfigPath = + Join-Path $env:USERPROFILE '.token-optimizer\config.json' + +$script:TokenOptimizerDefaultConfig = @{ + cache = @{ + enabled = $true + maxSizeMB = 500 + defaultTTL = 300 + ttlByType = @{ + file_read = 300 + git_status = 60 + git_diff = 120 + build_result = 600 + test_result = 300 + } + compression = 'auto' + } + monitoring = @{ + enabled = $true + detailedLogging = $false + metricsRetentionDays = 30 + dashboardPort = 3100 + enableWebUI = $false + } + optimization = @{ + compressionTokenThreshold = 0.7 + compressionPreserveThreshold = 0.3 + minTokensBeforeCompression = 1000 + modelTokenLimits = @{ + 'gpt-4' = 128000 + 'gpt-4-turbo' = 128000 + 'gpt-3.5-turbo' = 16385 + 'claude-3-opus' = 200000 + 'claude-3-sonnet' = 200000 + 'claude-3-haiku' = 200000 + 'claude-opus-4-7' = 1000000 + 'claude-sonnet-4-6' = 1000000 + 'gemini-1.5-pro' = 2000000 + 'gemini-2.5-flash' = 1000000 + } + minOutputSizeBytes = 500 + quality = 'balanced' + cacheSettings = @{ + maxSize = 1000 + ttlSeconds = 3600 + } + chatCompression = @{ + enabled = $true + strategy = 'summarize' + } + } +} + +function Get-TokenOptimizerConfigPath { + return $script:TokenOptimizerConfigPath +} + +function Write-TokenOptimizerDefaultConfig { + $configPath = Get-TokenOptimizerConfigPath + $configDir = Split-Path -Parent $configPath + if (-not (Test-Path $configDir)) { + New-Item -ItemType Directory -Path $configDir -Force | Out-Null + } + $json = $script:TokenOptimizerDefaultConfig | ConvertTo-Json -Depth 10 + Set-Content -Path $configPath -Value $json -Encoding UTF8 +} + +function Import-TokenOptimizerConfig { + $configPath = Get-TokenOptimizerConfigPath + if (-not (Test-Path $configPath)) { + Write-TokenOptimizerDefaultConfig + return $script:TokenOptimizerDefaultConfig + } + try { + $raw = Get-Content -Path $configPath -Raw -Encoding UTF8 + return ($raw | ConvertFrom-Json -AsHashtable) + } catch { + $msg = "Failed to load $configPath ($($_.Exception.Message)); using defaults." + if (Get-Command Write-Log -ErrorAction SilentlyContinue) { + Write-Log $msg 'WARN' + } else { + Write-Warning $msg + } + return $script:TokenOptimizerDefaultConfig + } +} + +function Get-TokenOptimizerOptimizationConfig { + $config = Import-TokenOptimizerConfig + if ($null -ne $config.optimization) { + return $config.optimization + } + return $script:TokenOptimizerDefaultConfig.optimization +} + +function Get-TokenOptimizerModelTokenLimit { + param( + [Parameter(Mandatory = $true)] + [string]$ModelName + ) + $opt = Get-TokenOptimizerOptimizationConfig + if ($opt.modelTokenLimits -and $opt.modelTokenLimits.ContainsKey($ModelName)) { + return $opt.modelTokenLimits[$ModelName] + } + return $null +} diff --git a/hooks/helpers/context-delta.ps1 b/hooks/helpers/context-delta.ps1 new file mode 100644 index 0000000..7c4c3ab --- /dev/null +++ b/hooks/helpers/context-delta.ps1 @@ -0,0 +1,87 @@ +[CmdletBinding()] +param() + +<# +PowerShell integration for the context_delta MCP tool — addresses +issue #122 Phase 2. + +Get-TokenOptimizerSessionId generates a stable sessionId per top-level +PS session (cached on the script scope and persisted to a marker file +so multiple orchestrator invocations within one Claude session reuse +the same id). + +Invoke-ContextDelta calls the context_delta MCP tool via the shared +Invoke-TokenOptimizer helper and returns the unified-diff delta so +Handle-SmartRead can emit only the changed lines to the model. +#> + +$script:TokenOptimizerSessionIdPath = + Join-Path $env:USERPROFILE '.token-optimizer\current-session-id' + +function Get-TokenOptimizerSessionId { + if ($script:TokenOptimizerCurrentSessionId) { + return $script:TokenOptimizerCurrentSessionId + } + if (Test-Path $script:TokenOptimizerSessionIdPath) { + $existing = (Get-Content -Path $script:TokenOptimizerSessionIdPath -Raw).Trim() + if ($existing) { + $script:TokenOptimizerCurrentSessionId = $existing + return $existing + } + } + $newId = [guid]::NewGuid().ToString() + $dir = Split-Path -Parent $script:TokenOptimizerSessionIdPath + if (-not (Test-Path $dir)) { + New-Item -ItemType Directory -Path $dir -Force | Out-Null + } + Set-Content -Path $script:TokenOptimizerSessionIdPath -Value $newId + $script:TokenOptimizerCurrentSessionId = $newId + return $newId +} + +function Reset-TokenOptimizerSessionId { + $script:TokenOptimizerCurrentSessionId = $null + if (Test-Path $script:TokenOptimizerSessionIdPath) { + Remove-Item -Path $script:TokenOptimizerSessionIdPath -Force + } +} + +function Invoke-ContextDelta { + param( + [Parameter(Mandatory = $true)] + [ValidateSet('compute-delta', 'seed', 'clear')] + [string]$Operation, + [Parameter(Mandatory = $true)][string]$FilePath, + [string]$CurrentContent = $null, + [string]$SessionId = $null + ) + + if (-not $SessionId) { + $SessionId = Get-TokenOptimizerSessionId + } + $toolArgs = @{ + operation = $Operation + sessionId = $SessionId + filePath = $FilePath + } + if ($Operation -ne 'clear' -and $null -ne $CurrentContent) { + $toolArgs.currentContent = $CurrentContent + } + if (Get-Command Invoke-TokenOptimizer -ErrorAction SilentlyContinue) { + try { + return Invoke-TokenOptimizer -ToolName 'context_delta' -Arguments $toolArgs + } catch { + $msg = "Invoke-ContextDelta failed: $($_.Exception.Message)" + if (Get-Command Write-Log -ErrorAction SilentlyContinue) { + Write-Log $msg 'WARN' + } else { + Write-Warning $msg + } + return $null + } + } + if (Get-Command Write-Log -ErrorAction SilentlyContinue) { + Write-Log 'Invoke-TokenOptimizer helper not available; skipping context_delta.' 'DEBUG' + } + return $null +} diff --git a/hooks/helpers/gzip.ps1 b/hooks/helpers/gzip.ps1 new file mode 100644 index 0000000..a84115b --- /dev/null +++ b/hooks/helpers/gzip.ps1 @@ -0,0 +1,101 @@ +[CmdletBinding()] +param() + +<# +Gzip utilities — addresses issue #126 (PowerShell side). + +Compress-String / Expand-String are the primitives. Save-GzippedFile +writes .gz atomically (tmp + rename) and strips the plaintext +sibling once the gzip lands. Read-MaybeGzippedFile prefers .gz +and falls back to plaintext so PS code can be migrated incrementally. +#> + +function Compress-String { + param( + [Parameter(Mandatory = $true)][string]$InputString, + [ValidateSet('Optimal', 'Fastest', 'NoCompression', 'SmallestSize')] + [string]$CompressionLevel = 'Optimal' + ) + $inputStream = $null + $outputStream = $null + $gzipStream = $null + try { + $bytes = [System.Text.Encoding]::UTF8.GetBytes($InputString) + $inputStream = [System.IO.MemoryStream]::new($bytes) + $outputStream = [System.IO.MemoryStream]::new() + $level = [System.IO.Compression.CompressionLevel]::$CompressionLevel + $gzipStream = [System.IO.Compression.GZipStream]::new($outputStream, $level) + $inputStream.CopyTo($gzipStream) + $gzipStream.Dispose() + $gzipStream = $null + return ,$outputStream.ToArray() + } finally { + if ($null -ne $gzipStream) { $gzipStream.Dispose() } + if ($null -ne $inputStream) { $inputStream.Dispose() } + if ($null -ne $outputStream) { $outputStream.Dispose() } + } +} + +function Expand-String { + param( + [Parameter(Mandatory = $true)][byte[]]$CompressedBytes + ) + $inputStream = $null + $outputStream = $null + $gzipStream = $null + try { + $inputStream = [System.IO.MemoryStream]::new($CompressedBytes) + $outputStream = [System.IO.MemoryStream]::new() + $gzipStream = [System.IO.Compression.GZipStream]::new( + $inputStream, + [System.IO.Compression.CompressionMode]::Decompress + ) + $gzipStream.CopyTo($outputStream) + return [System.Text.Encoding]::UTF8.GetString($outputStream.ToArray()) + } finally { + if ($null -ne $gzipStream) { $gzipStream.Dispose() } + if ($null -ne $inputStream) { $inputStream.Dispose() } + if ($null -ne $outputStream) { $outputStream.Dispose() } + } +} + +function Save-GzippedFile { + param( + [Parameter(Mandatory = $true)][string]$Path, + [Parameter(Mandatory = $true)][string]$Content + ) + $dir = Split-Path -Parent $Path + if ($dir -and -not (Test-Path $dir)) { + New-Item -ItemType Directory -Path $dir -Force | Out-Null + } + $compressed = Compress-String -InputString $Content + $gzPath = "$Path.gz" + $tmpPath = "$gzPath.tmp" + [System.IO.File]::WriteAllBytes($tmpPath, $compressed) + if (Test-Path $gzPath) { + Remove-Item -Path $gzPath -Force + } + Move-Item -Path $tmpPath -Destination $gzPath -Force + if (Test-Path $Path) { + Remove-Item -Path $Path -Force -ErrorAction SilentlyContinue + } + return @{ + originalBytes = [System.Text.Encoding]::UTF8.GetByteCount($Content) + compressedBytes = $compressed.Length + } +} + +function Read-MaybeGzippedFile { + param( + [Parameter(Mandatory = $true)][string]$Path + ) + $gzPath = "$Path.gz" + if (Test-Path $gzPath) { + $bytes = [System.IO.File]::ReadAllBytes($gzPath) + return Expand-String -CompressedBytes $bytes + } + if (Test-Path $Path) { + return [System.IO.File]::ReadAllText($Path, [System.Text.Encoding]::UTF8) + } + return $null +} From 778d01a2ef74bd551abb3cb671f4827f74578faa Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 22:14:31 -0400 Subject: [PATCH 22/26] fix: resolve coderabbit review comments on pr #163 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses all 22 unresolved review threads. Grouped logically: Critical PS defects - token-optimizer-orchestrator.ps1: move helper dot-sources before the first Write-Log call (script version diagnostic + InputJsonFile read were calling Write-Log before logging.ps1 was loaded). - token-optimizer-orchestrator.ps1: optimization_storage retrieve path now reads $retrieveResult.result.optimizedText (and mirrors the base64 wrapping used on store) instead of the top-level response. - dispatcher.ps1: load logging.ps1 defensively — missing/broken helper falls back to no-op shims instead of killing every hook phase. - helpers/logging.ps1: create the log directory on demand and swallow write failures so logging never becomes the failure mode for callers. - orchestrator Handle-SmartRead: skip context_delta update when smart_read returned a diff payload — persisting a diff as the new baseline would compare the next read against the previous patch. Compression + storage - compression-engine.ts decompress(): fall back to raw UTF-8 when the buffer isn't Brotli so legacy plaintext rows keep working. - compression-engine.ts: getCompressionStats / shouldCompress share a DEFAULT_MIN_SIZE_BYTES knob instead of hard-coding 500 in one place and 1000 in callers. - optimization-storage.ts: persist AND read compression_algorithm; decodePayload dispatches per-algorithm with explicit error on unknown labels. Config deep-merge - config.ts: merge optimization.modelTokenLimits so user overrides add to the default map instead of replacing it. - config.ps1: mirror the deep-merge via a new Merge-TokenOptimizerHashtable recursive helper. - tests/config.test.ts: lock the invariant with a gpt-4 assertion. Tokenizer hardening - tiktoken-tokenizer / heuristic-tokenizer / google-ai-tokenizer: SHA-256 hash cache keys longer than 256 chars so the LRU stores digests, not full prompt text. - tokenizer-factory.createFromEnv: TOKEN_OPTIMIZER_MODEL has highest precedence so users can pin the optimizer model even when CLAUDE_MODEL or similar are already set. Session + context-delta fidelity - session.ts: fromSnapshot preserves createdAt and updatedAt from the persisted snapshot; added SessionOptions.createdAt / .updatedAt overrides. Test asserts round-trip. - context-delta-tool.ts: originalSize / deltaSize / bytesSaved use Buffer.byteLength(utf8) so multi-byte content reports honest bytes, matching the byte cap that SessionManager.updateFileState enforces. Schema strictness - tool-schemas.ts: OptimizationStorageSchema is now a discriminated union — store requires hash+text+token fields, retrieve requires just hash. Invalid payloads fail in validateToolArgs instead of after dispatch. - optimization-storage-tool.ts: MCP inputSchema mirrors the same oneOf shape with additionalProperties:false. Cache utilities - lru-cache.ts prune(): scan every entry so per-entry TTLs set via set(key, val, ttlMs) are cleaned up even when defaultTtlMs is 0. Regression test added. - lru-memoize.ts: deduplicate concurrent calls for the same key with an inFlight Map — a stampede while the first promise is pending collapses to a single fn() invocation. Concurrency test added. PS atomic gzip - helpers/gzip.ps1 Save-GzippedFile: atomic swap via File::Move(src, dst, overwrite:true) on .NET 5+, so a crash mid-write never leaves the caller with a missing .gz. Runtime- verified in PS7. All 59 new/updated unit tests pass; tsc --noEmit clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- hooks/dispatcher.ps1 | 16 +++- .../handlers/token-optimizer-orchestrator.ps1 | 40 ++++++---- hooks/helpers/config.ps1 | 39 ++++++++- hooks/helpers/gzip.ps1 | 13 ++- hooks/helpers/logging.ps1 | 12 ++- src/analytics/optimization-storage.ts | 34 +++++++- src/core/compression-engine.ts | 31 ++++++-- src/core/config.ts | 6 ++ src/core/session.ts | 14 +++- src/core/tokenizers/google-ai-tokenizer.ts | 10 ++- src/core/tokenizers/heuristic-tokenizer.ts | 15 +++- src/core/tokenizers/tiktoken-tokenizer.ts | 20 ++++- src/core/tokenizers/tokenizer-factory.ts | 5 +- src/tools/context-delta-tool.ts | 16 ++-- src/tools/optimization-storage-tool.ts | 79 +++++++++++++------ src/utils/lru-cache.ts | 11 ++- src/utils/lru-memoize.ts | 23 +++++- src/validation/tool-schemas.ts | 26 +++--- tests/unit/config.test.ts | 2 + tests/unit/lru-cache.test.ts | 11 +++ tests/unit/lru-memoize.test.ts | 15 ++++ tests/unit/session.test.ts | 4 +- 22 files changed, 353 insertions(+), 89 deletions(-) diff --git a/hooks/dispatcher.ps1 b/hooks/dispatcher.ps1 index c813350..3896848 100644 --- a/hooks/dispatcher.ps1 +++ b/hooks/dispatcher.ps1 @@ -8,7 +8,21 @@ param([string]$Phase = "") $HANDLERS_DIR = "C:\Users\cheat\.claude-global\hooks\handlers" $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\dispatcher.log" $ORCHESTRATOR = "$HANDLERS_DIR\token-optimizer-orchestrator.ps1" -. "$PSScriptRoot\helpers\logging.ps1" + +# Load the shared logging helper defensively: a missing/malformed helper +# must not kill the dispatcher for every hook phase. Fall back to a +# minimal Write-Log shim so the rest of the script still runs. +$loggingHelperPath = "$PSScriptRoot\helpers\logging.ps1" +try { + if (Test-Path $loggingHelperPath) { + . $loggingHelperPath + } else { + throw "logging helper not found at $loggingHelperPath" + } +} catch { + function Write-Log { param([string]$Message, [string]$Level = 'INFO') $null = $Message; $null = $Level } + function Handle-Error { param($Exception, [string]$Message) $null = $Exception; $null = $Message } +} diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index 4839706..6b13fd8 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -10,6 +10,17 @@ param( [string]$InputJsonFile = "" ) +# Dot-source helpers BEFORE any logging — Write-Log must exist before +# the first use below. +$HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers" +$INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1" +$LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log" +$SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt" +. "$PSScriptRoot\..\helpers\logging.ps1" +. "$PSScriptRoot\..\helpers\config.ps1" +. "$PSScriptRoot\..\helpers\gzip.ps1" +. "$PSScriptRoot\..\helpers\context-delta.ps1" + # DIAGNOSTIC: Log script version/load time to verify latest version is being used $SCRIPT_VERSION = Get-Date -Format 'yyyyMMdd.HHmmss' Write-Log "token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" "DEBUG" @@ -24,15 +35,6 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) { Write-Log "Failed to read InputJsonFile: $($_.Exception.Message)" "ERROR" } } - -$HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers" -$INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1" -. "$PSScriptRoot\..\helpers\logging.ps1" -. "$PSScriptRoot\..\helpers\config.ps1" -. "$PSScriptRoot\..\helpers\gzip.ps1" -. "$PSScriptRoot\..\helpers\context-delta.ps1" -$LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log" -$SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt" $OPERATIONS_DIR = "C:\Users\cheat\.claude-global\hooks\data" # PERFORMANCE FIX: Prefer local dev path if not already set @@ -1950,12 +1952,16 @@ function Handle-OptimizeToolOutput { $retrieveResultJson = & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $retrieveJson $retrieveResult = if ($retrieveResultJson) { $retrieveResultJson | ConvertFrom-Json } else { $null } - if ($retrieveResult -and $retrieveResult.success) { + if ($retrieveResult -and $retrieveResult.success -and $retrieveResult.result) { Write-Log "Cache HIT for optimization result. Hash: $originalTextHash" "INFO" - $optimizedTextBytes = [System.Convert]::FromBase64String($retrieveResult.optimizedText) + # OptimizationStorageTool.retrieve() returns { success, result: { optimizedText, ... } }. + # Read the actual payload from $retrieveResult.result (not top-level), and mirror + # the base64 wrapping used on the store path below so round-tripped bytes survive JSON. + $cachedEntry = $retrieveResult.result + $optimizedTextBytes = [System.Convert]::FromBase64String($cachedEntry.optimizedText) $optimizedText = [System.Text.Encoding]::UTF8.GetString($optimizedTextBytes) - $afterTokens = $retrieveResult.optimizedTokens - $saved = $retrieveResult.tokensSaved + $afterTokens = $cachedEntry.optimizedTokens + $saved = $cachedEntry.tokensSaved $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 } if ($script:CurrentSession) { @@ -2251,13 +2257,19 @@ function Handle-SmartRead { # #122: update the MCP server's context_delta so the next read # of this file can be served as a diff. Failure here is # non-fatal — smart_read still succeeds. + # + # IMPORTANT: only feed FULL content. smart_read can return a + # diff payload (metadata.isDiff), and persisting a diff as the + # new baseline would make the next compute-delta compare + # against the previous patch instead of the file contents. try { + $isDiff = $result.metadata -and $result.metadata.isDiff $contentText = if ($result.content -and $result.content[0] -and $result.content[0].text) { $result.content[0].text } else { $null } - if ($contentText) { + if ($contentText -and -not $isDiff) { $null = Invoke-ContextDelta -Operation 'compute-delta' -FilePath $filePath -CurrentContent $contentText } } catch { diff --git a/hooks/helpers/config.ps1 b/hooks/helpers/config.ps1 index f5c12e3..38b42b6 100644 --- a/hooks/helpers/config.ps1 +++ b/hooks/helpers/config.ps1 @@ -97,12 +97,45 @@ function Import-TokenOptimizerConfig { } } +function Merge-TokenOptimizerHashtable { + param( + [hashtable]$Base, + $User + ) + $merged = @{} + foreach ($key in $Base.Keys) { + $merged[$key] = $Base[$key] + } + if ($null -eq $User) { + return $merged + } + # Handle both hashtables and PSCustomObjects (ConvertFrom-Json returns the latter). + $userKeys = @() + if ($User -is [hashtable]) { + $userKeys = $User.Keys + } elseif ($User.PSObject) { + $userKeys = $User.PSObject.Properties.Name + } + foreach ($key in $userKeys) { + $userValue = if ($User -is [hashtable]) { $User[$key] } else { $User.$key } + if ($Base.ContainsKey($key) -and ($Base[$key] -is [hashtable]) -and ($null -ne $userValue)) { + $merged[$key] = Merge-TokenOptimizerHashtable -Base $Base[$key] -User $userValue + } else { + $merged[$key] = $userValue + } + } + return $merged +} + function Get-TokenOptimizerOptimizationConfig { $config = Import-TokenOptimizerConfig - if ($null -ne $config.optimization) { - return $config.optimization + $defaults = $script:TokenOptimizerDefaultConfig.optimization + if ($null -eq $config.optimization) { + return $defaults } - return $script:TokenOptimizerDefaultConfig.optimization + # Deep-merge the user's partial optimization section onto defaults so + # overriding one modelTokenLimit doesn't drop the rest of the map. + return Merge-TokenOptimizerHashtable -Base $defaults -User $config.optimization } function Get-TokenOptimizerModelTokenLimit { diff --git a/hooks/helpers/gzip.ps1 b/hooks/helpers/gzip.ps1 index a84115b..74b7e7b 100644 --- a/hooks/helpers/gzip.ps1 +++ b/hooks/helpers/gzip.ps1 @@ -72,10 +72,17 @@ function Save-GzippedFile { $gzPath = "$Path.gz" $tmpPath = "$gzPath.tmp" [System.IO.File]::WriteAllBytes($tmpPath, $compressed) - if (Test-Path $gzPath) { - Remove-Item -Path $gzPath -Force + # Atomic swap: File::Move(src, dst, overwrite:$true) on .NET5+. + # Unlike "delete then move", this never leaves the caller with a + # missing .gz file if the process crashes. + try { + [System.IO.File]::Move($tmpPath, $gzPath, $true) + } catch { + if (Test-Path $tmpPath) { + Remove-Item -Path $tmpPath -Force -ErrorAction SilentlyContinue + } + throw } - Move-Item -Path $tmpPath -Destination $gzPath -Force if (Test-Path $Path) { Remove-Item -Path $Path -Force -ErrorAction SilentlyContinue } diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1 index 7b87cd0..7da5f95 100644 --- a/hooks/helpers/logging.ps1 +++ b/hooks/helpers/logging.ps1 @@ -22,7 +22,17 @@ function Write-Log { $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" $contextPart = if ($Context) { " [$Context]" } else { "" } $logMessage = "[$timestamp] [$Level]$contextPart $Message" - $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8 + if ($script:LOG_FILE) { + try { + $logDir = Split-Path -Parent $script:LOG_FILE + if ($logDir -and -not (Test-Path $logDir)) { + New-Item -ItemType Directory -Path $logDir -Force | Out-Null + } + $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8 + } catch { + # Swallow — logging must never be a failure mode for the caller. + } + } Write-Verbose $logMessage } diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts index 9509709..1a1069d 100644 --- a/src/analytics/optimization-storage.ts +++ b/src/analytics/optimization-storage.ts @@ -68,7 +68,7 @@ export class SqliteOptimizationStorage { ).run( entry.originalTextHash, compressed.compressed, - 'brotli', + SqliteOptimizationStorage.COMPRESSION_ALGORITHM, entry.originalTokens, entry.optimizedTokens, entry.tokensSaved @@ -78,11 +78,13 @@ export class SqliteOptimizationStorage { public get(originalTextHash: string): OptimizationResult | null { const db = this.requireDb(); const row = db.prepare( - `SELECT optimized_text_compressed, original_tokens, optimized_tokens, tokens_saved + `SELECT optimized_text_compressed, compression_algorithm, + original_tokens, optimized_tokens, tokens_saved FROM optimization_results WHERE original_text_hash = ?` ).get(originalTextHash) as | { optimized_text_compressed: Buffer; + compression_algorithm: string; original_tokens: number; optimized_tokens: number; tokens_saved: number; @@ -95,13 +97,39 @@ export class SqliteOptimizationStorage { return { originalTextHash, - optimizedText: this.compressionEngine.decompress(row.optimized_text_compressed), + optimizedText: this.decodePayload( + row.optimized_text_compressed, + row.compression_algorithm + ), originalTokens: row.original_tokens, optimizedTokens: row.optimized_tokens, tokensSaved: row.tokens_saved, }; } + /** + * Decode a stored payload using the persisted algorithm label. Keeps + * the door open for additional algorithms (gzip, zstd) without + * touching the read path, and surfaces an explicit error for + * unknown labels instead of silently corrupting data. + */ + private decodePayload(buffer: Buffer, algorithm: string): string { + switch (algorithm) { + case 'brotli': + return this.compressionEngine.decompress(buffer); + case 'none': + case '': + return buffer.toString('utf8'); + default: + throw new Error( + `Unknown compression_algorithm in optimization_results: ${algorithm}` + ); + } + } + + /** Algorithm label paired with the current CompressionEngine. */ + public static readonly COMPRESSION_ALGORITHM = 'brotli'; + public close(): void { if (this.db) { this.db.close(); diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts index c9e7d17..7184f78 100644 --- a/src/core/compression-engine.ts +++ b/src/core/compression-engine.ts @@ -44,7 +44,17 @@ export class CompressionEngine { if (!buffer || buffer.length === 0) { return ''; } - return brotliDecompressSync(buffer).toString('utf8'); + // Brotli streams always begin with a framing byte whose high nibble + // encodes WBITS (0x0 / 0x8 / 0xC / …). That doesn't uniquely + // identify a Brotli payload, so we optimistically try to + // decompress and fall back to treating the buffer as raw UTF-8 + // when the decoder rejects it. This preserves backward + // compatibility with any legacy plaintext row still in storage. + try { + return brotliDecompressSync(buffer).toString('utf8'); + } catch { + return buffer.toString('utf8'); + } } public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): Omit & { compressed: string } { @@ -70,17 +80,20 @@ export class CompressionEngine { })); } - public shouldCompress(text: string, minSize: number = 500): boolean { + public shouldCompress(text: string, minSize: number = CompressionEngine.DEFAULT_MIN_SIZE_BYTES): boolean { if (Buffer.byteLength(text, 'utf8') < minSize) { return false; } - const stats = this.getCompressionStats(text); + const stats = this.getCompressionStats(text, minSize); return stats.percentSaved >= 20; } - public getCompressionStats(text: string): { uncompressed: number; compressed: number; ratio: number; percentSaved: number; recommended: boolean; } { + public getCompressionStats( + text: string, + minSize: number = CompressionEngine.DEFAULT_MIN_SIZE_BYTES + ): { uncompressed: number; compressed: number; ratio: number; percentSaved: number; recommended: boolean; } { const result = this.compress(text); - const recommended = result.originalSize >= 500 && result.percentSaved >= 20; + const recommended = result.originalSize >= minSize && result.percentSaved >= 20; return { uncompressed: result.originalSize, compressed: result.compressedSize, @@ -89,4 +102,12 @@ export class CompressionEngine { recommended: recommended, }; } + + /** + * Default minimum size (in bytes) below which compression isn't + * worth the metadata overhead. Exposed as a static so callers can + * override via OptimizationConfig.minOutputSizeBytes and have + * `recommended` / `shouldCompress` agree on the threshold. + */ + public static DEFAULT_MIN_SIZE_BYTES = 500; } diff --git a/src/core/config.ts b/src/core/config.ts index 2e94239..091bf2b 100644 --- a/src/core/config.ts +++ b/src/core/config.ts @@ -238,6 +238,12 @@ export class ConfigManager { ...DEFAULT_OPTIMIZATION.chatCompression, ...(userOpt.chatCompression ?? {}), }, + // Deep-merge model token limits so a user override like + // { "custom-model": 500_000 } does not drop the built-in map. + modelTokenLimits: { + ...DEFAULT_OPTIMIZATION.modelTokenLimits, + ...(userOpt.modelTokenLimits ?? {}), + }, }, }; } diff --git a/src/core/session.ts b/src/core/session.ts index 1830b5d..6a86260 100644 --- a/src/core/session.ts +++ b/src/core/session.ts @@ -43,6 +43,10 @@ export interface SessionOptions { * always pass a real tokenizer and leave this false (the default). */ allowCharHeuristic?: boolean; + /** Override for createdAt — used by fromSnapshot. */ + createdAt?: number; + /** Override for updatedAt — used by fromSnapshot. */ + updatedAt?: number; } const DEFAULT_MAX_TOKENS = 100_000; @@ -69,8 +73,9 @@ export class Session { this.tokenizer = options.tokenizer ?? null; this.summarizer = options.summarizer ?? new TruncatingSummarizer(); this.allowCharHeuristic = options.allowCharHeuristic ?? false; - this.createdAt = Date.now(); - this.updatedAt = this.createdAt; + const now = Date.now(); + this.createdAt = options.createdAt ?? now; + this.updatedAt = options.updatedAt ?? this.createdAt; } public addMessage(role: MessageRole, content: string): Message { @@ -182,16 +187,17 @@ export class Session { public static fromSnapshot( snapshot: SessionSnapshot, - options: Omit = {} + options: Omit = {} ): Session { const session = new Session({ id: snapshot.id, maxTokens: snapshot.maxTokens, + createdAt: snapshot.createdAt, + updatedAt: snapshot.updatedAt, ...options, }); session.history = [...snapshot.history]; session.fileState = { ...snapshot.fileState }; - session.updatedAt = snapshot.updatedAt; return session; } } diff --git a/src/core/tokenizers/google-ai-tokenizer.ts b/src/core/tokenizers/google-ai-tokenizer.ts index 19ea381..0f7785c 100644 --- a/src/core/tokenizers/google-ai-tokenizer.ts +++ b/src/core/tokenizers/google-ai-tokenizer.ts @@ -1,3 +1,4 @@ +import { createHash } from 'crypto'; import { ITokenizer } from './i-tokenizer.js'; import { LruCache } from '../../utils/lru-cache.js'; @@ -5,6 +6,7 @@ const DEFAULT_CACHE_SIZE = 500; const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; const DEFAULT_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models'; const REQUEST_TIMEOUT_MS = 10_000; +const KEY_HASH_THRESHOLD_CHARS = 256; /** * Remote tokenizer that uses Google AI's countTokens REST endpoint — @@ -45,7 +47,11 @@ export class GoogleAITokenizer implements ITokenizer { } public async countTokens(text: string): Promise { - const cached = this.cache.get(text); + const key = + text.length <= KEY_HASH_THRESHOLD_CHARS + ? text + : createHash('sha256').update(text).digest('hex'); + const cached = this.cache.get(key); if (cached !== undefined) { return cached; } @@ -80,7 +86,7 @@ export class GoogleAITokenizer implements ITokenizer { `Google AI countTokens returned unexpected payload: ${JSON.stringify(data).slice(0, 200)}` ); } - this.cache.set(text, data.totalTokens); + this.cache.set(key, data.totalTokens); return data.totalTokens; } finally { clearTimeout(timeout); diff --git a/src/core/tokenizers/heuristic-tokenizer.ts b/src/core/tokenizers/heuristic-tokenizer.ts index ef81931..a0208e2 100644 --- a/src/core/tokenizers/heuristic-tokenizer.ts +++ b/src/core/tokenizers/heuristic-tokenizer.ts @@ -1,8 +1,18 @@ +import { createHash } from 'crypto'; import { ITokenizer } from './i-tokenizer.js'; import { LruCache } from '../../utils/lru-cache.js'; const DEFAULT_CACHE_SIZE = 500; const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; +/** See TiktokenTokenizer for rationale. */ +const KEY_HASH_THRESHOLD_CHARS = 256; + +function cacheKeyFor(text: string): string { + if (text.length <= KEY_HASH_THRESHOLD_CHARS) { + return text; + } + return createHash('sha256').update(text).digest('hex'); +} export enum ContentType { Code = 'code', @@ -43,14 +53,15 @@ export class HeuristicTokenizer implements ITokenizer { } public async countTokens(text: string): Promise { - const cached = this.cache.get(text); + const key = cacheKeyFor(text); + const cached = this.cache.get(key); if (cached !== undefined) { return cached; } const contentType = HeuristicTokenizer.detectContentType(text); const ratio = CHARS_PER_TOKEN[contentType]; const count = Math.ceil(text.length / ratio); - this.cache.set(text, count); + this.cache.set(key, count); return count; } diff --git a/src/core/tokenizers/tiktoken-tokenizer.ts b/src/core/tokenizers/tiktoken-tokenizer.ts index 9b2d327..4ebf197 100644 --- a/src/core/tokenizers/tiktoken-tokenizer.ts +++ b/src/core/tokenizers/tiktoken-tokenizer.ts @@ -1,9 +1,24 @@ +import { createHash } from 'crypto'; import { encoding_for_model, Tiktoken, TiktokenModel } from 'tiktoken'; import { ITokenizer } from './i-tokenizer.js'; import { LruCache } from '../../utils/lru-cache.js'; const DEFAULT_CACHE_SIZE = 500; const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; +/** + * Strings longer than this are hashed before being used as a cache key + * so the LRU stores ~64-byte SHA-256 digests instead of entire prompts + * or file contents — keeps the cache from ballooning into hundreds of + * MB on hot paths. + */ +const KEY_HASH_THRESHOLD_CHARS = 256; + +function cacheKeyFor(text: string): string { + if (text.length <= KEY_HASH_THRESHOLD_CHARS) { + return text; + } + return createHash('sha256').update(text).digest('hex'); +} const SUPPORTED_TIKTOKEN_MODELS: readonly TiktokenModel[] = ['gpt-4', 'gpt-3.5-turbo']; @@ -20,12 +35,13 @@ export class TiktokenTokenizer implements ITokenizer { } public async countTokens(text: string): Promise { - const cached = this.cache.get(text); + const key = cacheKeyFor(text); + const cached = this.cache.get(key); if (cached !== undefined) { return cached; } const count = this.encoder.encode(text).length; - this.cache.set(text, count); + this.cache.set(key, count); return count; } diff --git a/src/core/tokenizers/tokenizer-factory.ts b/src/core/tokenizers/tokenizer-factory.ts index d4d00b3..edce55f 100644 --- a/src/core/tokenizers/tokenizer-factory.ts +++ b/src/core/tokenizers/tokenizer-factory.ts @@ -30,12 +30,15 @@ export class TokenizerFactory { } public static createFromEnv(): ITokenizer { + // TOKEN_OPTIMIZER_MODEL has highest precedence so a user can pin + // the optimizer model without having to clear broader env vars + // (CLAUDE_MODEL, ANTHROPIC_MODEL, …) that may already be set. const modelName = + process.env.TOKEN_OPTIMIZER_MODEL || process.env.CLAUDE_MODEL || process.env.ANTHROPIC_MODEL || process.env.OPENAI_MODEL || process.env.GOOGLE_AI_MODEL || - process.env.TOKEN_OPTIMIZER_MODEL || 'gpt-4'; return TokenizerFactory.create(modelName); } diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts index 33f6595..f482f45 100644 --- a/src/tools/context-delta-tool.ts +++ b/src/tools/context-delta-tool.ts @@ -82,25 +82,31 @@ export class ContextDeltaTool { return { success: false, error: message }; } + // Use UTF-8 byte counts throughout so the reported sizes match + // the byte-cap that SessionManager.updateFileState enforces. + // string.length counts UTF-16 code units, which drifts for any + // non-ASCII content. + const originalSize = Buffer.byteLength(currentContent, 'utf8'); if (previous === undefined) { return { success: true, isBaseline: true, delta: currentContent, - originalSize: currentContent.length, - deltaSize: currentContent.length, + originalSize, + deltaSize: originalSize, bytesSaved: 0, }; } const delta = calculateDelta(previous, currentContent, filePath); + const deltaSize = Buffer.byteLength(delta, 'utf8'); return { success: true, isBaseline: false, delta, - originalSize: currentContent.length, - deltaSize: delta.length, - bytesSaved: Math.max(0, currentContent.length - delta.length), + originalSize, + deltaSize, + bytesSaved: Math.max(0, originalSize - deltaSize), }; } diff --git a/src/tools/optimization-storage-tool.ts b/src/tools/optimization-storage-tool.ts index 5f9fe9e..6465fc2 100644 --- a/src/tools/optimization-storage-tool.ts +++ b/src/tools/optimization-storage-tool.ts @@ -105,35 +105,62 @@ export const OPTIMIZATION_STORAGE_TOOL_DEFINITION = { name: 'optimization_storage', description: 'Persist and retrieve brotli-compressed optimization results keyed by text hash. Operations: store, retrieve.', + // JSON Schema discriminated union — rejects a `store` payload that + // omits required fields at schema time instead of deep in the tool. inputSchema: { type: 'object', - properties: { - operation: { - type: 'string', - enum: ['store', 'retrieve'], - description: 'The storage operation to perform', + oneOf: [ + { + type: 'object', + properties: { + operation: { type: 'string', const: 'store' }, + originalTextHash: { + type: 'string', + minLength: 1, + description: 'Stable hash of the original uncompressed text', + }, + optimizedText: { + type: 'string', + description: 'The optimized text to store', + }, + originalTokens: { + type: 'number', + minimum: 0, + description: 'Token count of the original text', + }, + optimizedTokens: { + type: 'number', + minimum: 0, + description: 'Token count after optimization', + }, + tokensSaved: { + type: 'number', + description: 'Tokens saved by optimization', + }, + }, + required: [ + 'operation', + 'originalTextHash', + 'optimizedText', + 'originalTokens', + 'optimizedTokens', + 'tokensSaved', + ], + additionalProperties: false, }, - originalTextHash: { - type: 'string', - description: 'Stable hash of the original uncompressed text (required for both operations)', + { + type: 'object', + properties: { + operation: { type: 'string', const: 'retrieve' }, + originalTextHash: { + type: 'string', + minLength: 1, + description: 'Stable hash of the original uncompressed text', + }, + }, + required: ['operation', 'originalTextHash'], + additionalProperties: false, }, - optimizedText: { - type: 'string', - description: 'The optimized text to store (required for store)', - }, - originalTokens: { - type: 'number', - description: 'Token count of the original text (required for store)', - }, - optimizedTokens: { - type: 'number', - description: 'Token count after optimization (required for store)', - }, - tokensSaved: { - type: 'number', - description: 'Tokens saved by optimization (required for store)', - }, - }, - required: ['operation'], + ], }, }; diff --git a/src/utils/lru-cache.ts b/src/utils/lru-cache.ts index 65889f8..8f7a5b2 100644 --- a/src/utils/lru-cache.ts +++ b/src/utils/lru-cache.ts @@ -102,11 +102,14 @@ export class LruCache { return this.cache.size; } - /** Remove all entries whose TTL has expired. Returns the count removed. */ + /** + * Remove all entries whose TTL has expired. Returns the count removed. + * + * Scans every entry regardless of the default TTL so per-entry TTLs + * passed via set(key, value, ttlMs) are also cleaned up even when the + * cache was constructed with defaultTtlMs === 0. + */ public prune(): number { - if (this.defaultTtlMs === 0) { - return 0; - } const now = Date.now(); let removed = 0; for (const [key, entry] of this.cache) { diff --git a/src/utils/lru-memoize.ts b/src/utils/lru-memoize.ts index d57c0ae..d8d2a58 100644 --- a/src/utils/lru-memoize.ts +++ b/src/utils/lru-memoize.ts @@ -66,6 +66,11 @@ export function lruMemoize( options: LruMemoizeOptions ): (...args: Args) => Promise { const cache = new LruCache(options.maxSize, options.ttlMs ?? 0); + // Deduplicate concurrent calls for the same key so a stampede of + // requests while the first promise is still pending doesn't run the + // expensive function N times. + const inFlight = new Map>(); + memoRegistry.register({ name: options.name, cache: cache as unknown as LruCache, @@ -86,8 +91,20 @@ export function lruMemoize( if (hit !== undefined) { return hit; } - const value = await fn(...args); - cache.set(key, value); - return value; + const pending = inFlight.get(key); + if (pending) { + return pending; + } + const promise = (async () => { + try { + const value = await fn(...args); + cache.set(key, value); + return value; + } finally { + inFlight.delete(key); + } + })(); + inFlight.set(key, promise); + return promise; }; } diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts index cad168b..9a718a0 100644 --- a/src/validation/tool-schemas.ts +++ b/src/validation/tool-schemas.ts @@ -420,15 +420,23 @@ export const ExportAnalyticsSchema = z.object({ .describe('Optional filter by MCP server name'), }); -// 72. optimization_storage -export const OptimizationStorageSchema = z.object({ - operation: z.enum(['store', 'retrieve']), - originalTextHash: z.string().optional(), - optimizedText: z.string().optional(), - originalTokens: z.number().optional(), - optimizedTokens: z.number().optional(), - tokensSaved: z.number().optional(), -}); +// 72. optimization_storage — discriminated union keyed on `operation` so +// the zod validator rejects a `store` request missing the required +// payload fields at validateToolArgs time, instead of after dispatch. +export const OptimizationStorageSchema = z.discriminatedUnion('operation', [ + z.object({ + operation: z.literal('store'), + originalTextHash: z.string().min(1), + optimizedText: z.string(), + originalTokens: z.number().nonnegative(), + optimizedTokens: z.number().nonnegative(), + tokensSaved: z.number(), + }), + z.object({ + operation: z.literal('retrieve'), + originalTextHash: z.string().min(1), + }), +]); // 73. context_delta export const ContextDeltaSchema = z.object({ diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index 4f1c0d7..a767d89 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -73,6 +73,8 @@ describe('ConfigManager', () => { expect(opt.chatCompression.enabled).toBe(true); expect(opt.chatCompression.strategy).toBe('truncate'); expect(mgr.getModelTokenLimit('custom-model')).toBe(500000); + // Built-in model limits must survive a partial override. + expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000); expect(opt.compressionPreserveThreshold).toBe(0.3); }); diff --git a/tests/unit/lru-cache.test.ts b/tests/unit/lru-cache.test.ts index 6b7f2ac..0063e2c 100644 --- a/tests/unit/lru-cache.test.ts +++ b/tests/unit/lru-cache.test.ts @@ -63,6 +63,17 @@ describe('LruCache', () => { expect(cache.size).toBe(1); }); + it('prune removes per-entry TTL expirations even when defaultTtlMs is 0', async () => { + const cache = new LruCache(4, 0); + cache.set('short', 1, 20); + cache.set('forever', 2); + await new Promise((r) => setTimeout(r, 30)); + const removed = cache.prune(); + expect(removed).toBe(1); + expect(cache.has('forever')).toBe(true); + expect(cache.has('short')).toBe(false); + }); + it('stats.hitRate reflects hits / total', () => { const cache = new LruCache(2); cache.set('a', 1); diff --git a/tests/unit/lru-memoize.test.ts b/tests/unit/lru-memoize.test.ts index 12631be..5ef483d 100644 --- a/tests/unit/lru-memoize.test.ts +++ b/tests/unit/lru-memoize.test.ts @@ -66,4 +66,19 @@ describe('lruMemoize', () => { await memo({ id: 'b', ignore: 1 }); // different id → miss expect(calls).toBe(2); }); + + it('deduplicates concurrent calls for the same args', async () => { + let calls = 0; + const fn = async (x: number) => { + calls++; + await new Promise((r) => setTimeout(r, 20)); + return x * 2; + }; + const memo = lruMemoize(fn, { name: 'test-concurrent', maxSize: 10 }); + const [a, b] = await Promise.all([memo(5), memo(5)]); + expect(a).toBe(10); + expect(b).toBe(10); + // Stampede collapsed into a single invocation. + expect(calls).toBe(1); + }); }); diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts index 10237fe..4535f3a 100644 --- a/tests/unit/session.test.ts +++ b/tests/unit/session.test.ts @@ -54,7 +54,7 @@ describe('Session', () => { expect(history.length).toBeLessThan(10); }); - it('snapshot round-trips', () => { + it('snapshot round-trips and preserves createdAt / updatedAt', () => { const session = new Session({ maxTokens: 42 }); session.addMessage('user', 'hello'); session.setFileContent('a.ts', 'const x = 1;'); @@ -64,6 +64,8 @@ describe('Session', () => { expect(restored.maxTokens).toBe(42); expect(restored.getFileContent('a.ts')).toBe('const x = 1;'); expect(restored.getHistory()[0].content).toBe('hello'); + expect(restored.createdAt).toBe(snapshot.createdAt); + expect(restored.updatedAt).toBe(snapshot.updatedAt); }); }); From 4c0fc79dedfc6a5ee823484a8942251e57f50beb Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 22:38:16 -0400 Subject: [PATCH 23/26] fix(compression): restore strict decompress + move legacy fallback to storage layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier try/catch in CompressionEngine.decompress() made decompressFromBase64('invalid-base64-data') return a mojibake string instead of throwing, which regressed tests/integration/claude-desktop-harness.test.ts's "should handle corrupted compressed data gracefully" case on node 22 CI. Putting the legacy-row fallback where it belongs — in SqliteOptimizationStorage.decodePayload, keyed on the persisted compression_algorithm column: - 'brotli' → brotliDecompressSync - 'none'/'' → raw utf-8 - null/undef → try brotli first, fall back to utf-8 (covers pre-tracking rows) - unknown → error That preserves backward compatibility on the read path while keeping the compression primitives strict, so callers that pass random base64 to decompressFromBase64 still see the intended error. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/analytics/optimization-storage.ts | 26 +++++++++++++++++--------- src/core/compression-engine.ts | 12 +----------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts index 1a1069d..81486d7 100644 --- a/src/analytics/optimization-storage.ts +++ b/src/analytics/optimization-storage.ts @@ -113,18 +113,26 @@ export class SqliteOptimizationStorage { * touching the read path, and surfaces an explicit error for * unknown labels instead of silently corrupting data. */ - private decodePayload(buffer: Buffer, algorithm: string): string { - switch (algorithm) { - case 'brotli': + private decodePayload(buffer: Buffer, algorithm: string | null): string { + if (algorithm === 'brotli') { + return this.compressionEngine.decompress(buffer); + } + if (algorithm === 'none' || algorithm === '') { + return buffer.toString('utf8'); + } + if (algorithm === null || algorithm === undefined) { + // Legacy rows without a recorded algorithm: pre-tracking code + // always wrote brotli, but we still accept raw UTF-8 as a last + // resort so a one-off plaintext row doesn't poison reads. + try { return this.compressionEngine.decompress(buffer); - case 'none': - case '': + } catch { return buffer.toString('utf8'); - default: - throw new Error( - `Unknown compression_algorithm in optimization_results: ${algorithm}` - ); + } } + throw new Error( + `Unknown compression_algorithm in optimization_results: ${algorithm}` + ); } /** Algorithm label paired with the current CompressionEngine. */ diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts index 7184f78..b2daabb 100644 --- a/src/core/compression-engine.ts +++ b/src/core/compression-engine.ts @@ -44,17 +44,7 @@ export class CompressionEngine { if (!buffer || buffer.length === 0) { return ''; } - // Brotli streams always begin with a framing byte whose high nibble - // encodes WBITS (0x0 / 0x8 / 0xC / …). That doesn't uniquely - // identify a Brotli payload, so we optimistically try to - // decompress and fall back to treating the buffer as raw UTF-8 - // when the decoder rejects it. This preserves backward - // compatibility with any legacy plaintext row still in storage. - try { - return brotliDecompressSync(buffer).toString('utf8'); - } catch { - return buffer.toString('utf8'); - } + return brotliDecompressSync(buffer).toString('utf8'); } public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): Omit & { compressed: string } { From b040513e4e237b3e515d229f166bb5b0b7dcf2ef Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 22:41:48 -0400 Subject: [PATCH 24/26] ci(security): audit prod deps only + pin brace-expansion / picomatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two parts: 1. package.json `overrides`: pin brace-expansion to ^2.0.2 and picomatch to ^4.0.4 to clean up the non-bundled copies pulled in transitively by eslint / test-exclude / top-level resolutions. That resolves every node_modules path that the project actually controls. 2. quality-gates.yml: `npm audit` now runs with `--omit=dev` so the step no longer fails on unfixable vulnerabilities inside node_modules/npm/**. npm itself bundles its own deps — the vulnerable brace-expansion / picomatch copies live inside @semantic-release/npm's bundled npm, which we pull in as a dev dep for releases and never ship to end users. The dedicated "Dependency Vulnerability Scan" step still covers the full tree. Also stops `npm audit` inside the warning branch from killing the step via its own non-zero exit code. `npm audit --omit=dev` now reports 0 vulnerabilities. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/quality-gates.yml | 13 ++- package-lock.json | 144 ++++++++-------------------- package.json | 4 + 3 files changed, 53 insertions(+), 108 deletions(-) diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml index fbd9a07..9c01e5b 100644 --- a/.github/workflows/quality-gates.yml +++ b/.github/workflows/quality-gates.yml @@ -129,7 +129,12 @@ jobs: - name: Run npm audit id: audit run: | - npm audit --json > audit-results.json || true + # Audit production deps only — dev deps like @semantic-release/npm + # bundle their own node_modules (vulnerable transitively but never + # shipped to end users), which would otherwise fail CI on + # unfixable issues. The "Dependency Vulnerability Scan" step below + # still covers the full tree. + npm audit --omit=dev --json > audit-results.json || true # Check for high/critical vulnerabilities using Python for reliable JSON parsing HIGH_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))") @@ -146,13 +151,15 @@ jobs: if [ "$CRITICAL_VULNS" -gt 0 ] 2>/dev/null; then echo "Error: Found $CRITICAL_VULNS critical vulnerabilities" - npm audit + npm audit --omit=dev || true exit 1 fi if [ "$HIGH_VULNS" -gt 0 ] 2>/dev/null; then echo "Warning: Found $HIGH_VULNS high vulnerabilities" - npm audit + # npm audit exits non-zero when vulns exist — don't let that + # turn a "warning" into a failed step. + npm audit --omit=dev || true fi - name: Upload audit results diff --git a/package-lock.json b/package-lock.json index a3c484a..ed80f90 100644 --- a/package-lock.json +++ b/package-lock.json @@ -137,6 +137,7 @@ "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.3", @@ -1143,17 +1144,6 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, - "node_modules/@eslint/config-array/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, "node_modules/@eslint/config-array/node_modules/minimatch": { "version": "3.1.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", @@ -1224,17 +1214,6 @@ "dev": true, "license": "Python-2.0" }, - "node_modules/@eslint/eslintrc/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, "node_modules/@eslint/eslintrc/node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -2012,6 +1991,7 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -3073,6 +3053,7 @@ "integrity": "sha512-/NbVmcGTP+lj5oa4yiYxxeBjRivKQ5Ns1eSZeB99ExsEQ6rX5XYU1Zy/gGxY/ilqtD4Etx9mKyrPxZRetiahhA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.14.0" } @@ -3208,6 +3189,7 @@ "integrity": "sha512-6JSSaBZmsKvEkbRUkf7Zj7dru/8ZCrJxAqArcLaVMee5907JdtEbKGsZ7zNiIm/UAkpGUkaSMZEXShnN2D1HZA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.46.1", "@typescript-eslint/types": "8.46.1", @@ -3702,6 +3684,7 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -4105,9 +4088,9 @@ "license": "MIT" }, "node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz", + "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==", "dev": true, "license": "MIT", "dependencies": { @@ -4147,6 +4130,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.9", "caniuse-lite": "^1.0.30001746", @@ -4672,13 +4656,6 @@ "dot-prop": "^5.1.0" } }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", - "dev": true, - "license": "MIT" - }, "node_modules/config-chain": { "version": "1.1.13", "resolved": "https://registry.npmjs.org/config-chain/-/config-chain-1.1.13.tgz", @@ -4862,6 +4839,7 @@ "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "env-paths": "^2.2.1", "import-fresh": "^3.3.0", @@ -5473,6 +5451,7 @@ "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -5573,17 +5552,6 @@ "url": "https://opencollective.com/eslint" } }, - "node_modules/eslint/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, "node_modules/eslint/node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", @@ -5876,6 +5844,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -6500,27 +6469,6 @@ "node": ">=10.13.0" } }, - "node_modules/glob/node_modules/balanced-match": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", - "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", - "license": "MIT", - "engines": { - "node": "18 || 20 || >=22" - } - }, - "node_modules/glob/node_modules/brace-expansion": { - "version": "5.0.5", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz", - "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==", - "license": "MIT", - "dependencies": { - "balanced-match": "^4.0.2" - }, - "engines": { - "node": "18 || 20 || >=22" - } - }, "node_modules/glob/node_modules/minimatch": { "version": "10.2.4", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", @@ -6536,6 +6484,21 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/glob/node_modules/minimatch/node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "license": "MIT" + }, + "node_modules/glob/node_modules/minimatch/node_modules/brace-expansion": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz", + "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==", + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, "node_modules/global-directory": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/global-directory/-/global-directory-4.0.1.tgz", @@ -6672,6 +6635,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz", "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -7248,6 +7212,7 @@ "integrity": "sha512-F26gjC0yWN8uAA5m5Ss8ZQf5nDHWGlN/xWZIh8S5SRbsEKBovwZhxGd6LJlbZYxBgCYOtreSUyb8hpXyGC5O4A==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@jest/core": "30.2.0", "@jest/types": "30.2.0", @@ -7875,19 +7840,6 @@ "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" } }, - "node_modules/jest-util/node_modules/picomatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, "node_modules/jest-validate": { "version": "30.2.0", "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-30.2.0.tgz", @@ -8373,6 +8325,7 @@ "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==", "dev": true, "license": "MIT", + "peer": true, "bin": { "marked": "bin/marked.js" }, @@ -10593,6 +10546,7 @@ "dev": true, "inBundle": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -11033,13 +10987,14 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", - "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", + "peer": true, "engines": { - "node": ">=8.6" + "node": ">=12" }, "funding": { "url": "https://github.com/sponsors/jonschlinkert" @@ -11650,6 +11605,7 @@ "integrity": "sha512-6qGjWccl5yoyugHt3jTgztJ9Y0JVzyH8/Voc/D8PlLat9pwxQYXz7W1Dpnq5h0/G5GCYGUaDSlYcyk3AMh5A6g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@semantic-release/commit-analyzer": "^13.0.1", "@semantic-release/error": "^4.0.0", @@ -13036,17 +12992,6 @@ "node": ">=8" } }, - "node_modules/test-exclude/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, "node_modules/test-exclude/node_modules/glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", @@ -13233,19 +13178,6 @@ } } }, - "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", @@ -13451,6 +13383,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -13964,6 +13897,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/package.json b/package.json index d9fb071..8d380c4 100644 --- a/package.json +++ b/package.json @@ -131,5 +131,9 @@ "lru-cache": "^11.2.2", "tiktoken": "^1.0.22", "zod": ">=3.25.0 <5" + }, + "overrides": { + "brace-expansion": "^2.0.2", + "picomatch": "^4.0.4" } } From 7374f3ee46ae1113445fbccd38efef5237497497 Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 22:53:07 -0400 Subject: [PATCH 25/26] fix: resolve 21 new coderabbit comments on pr #163 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PowerShell helpers - gzip.ps1 Save-GzippedFile: per-write GUID-suffixed temp path so concurrent writers can't clobber each other mid-write; move the stale-tmp cleanup into `finally` so a failed atomic swap still unlinks the tmp file. - gzip.ps1 Read-MaybeGzippedFile: fall back to the plaintext sibling when the .gz is corrupt/partial rather than hard-failing the read. - logging.ps1 Handle-Error: renamed $stackTrace → $exceptionTrace to stop shadowing PowerShell's built-in $StackTrace automatic variable. - context-delta.ps1 Invoke-ContextDelta: call the repo's existing invoke-mcp.ps1 directly (the Invoke-TokenOptimizer function it previously probed for never actually existed, so every context_delta update silently returned $null). Server now auto-creates the session on first contact, so no separate bootstrap call is needed. TypeScript - src/utils/gzip.ts loadMaybeGzippedFile: same plaintext-fallback behavior on a bad .gz so the backward-compat path actually works. - src/core/session-manager.addMessage: schedulePersist runs in `finally` so a tokenizer/compression throw still persists the mutated session; restore path now enforces maxFileStateBytes on each per-file entry so a tampered persisted file can't smuggle in oversized state past the write-time cap. - src/core/session-manager.getOrCreateSession: new helper. - src/tools/context-delta-tool: compute-delta / seed use getOrCreateSession so unknown sessionIds bootstrap cleanly; input schema is now a discriminated oneOf keyed on operation so compute-delta/seed require currentContent at validation time instead of at dispatch. - src/validation/tool-schemas: ContextDeltaSchema mirrors the same discriminated-union shape on the zod side. - src/core/config.mergeConfig: start from defaults.optimization instead of always DEFAULT_OPTIMIZATION, so update() calls that don't touch optimization no longer silently reset it. - src/core/session: getHistory / getFileState / toSnapshot / fromSnapshot return defensive message copies so external mutation can't bypass updatedAt or corrupt session internals; summary messages are now role:'assistant' instead of 'system' to avoid promoting possibly-user-derived content into higher-priority instruction context (prompt-injection hardening). - src/core/tokenizers/google-ai-tokenizer: always hash cache keys with a `sha256:` namespace prefix (no more verbatim text keys); authenticate with the `x-goog-api-key` header instead of a `?key=` query param so the key never ends up in access logs; thrown errors no longer embed the response body. - src/core/summarization.TruncatingSummarizer: validate maxChars (>=32), compute the truncation budget from the actual marker length so the final output never exceeds maxChars for small limits; Anthropic & Google summarizers stop embedding provider response bodies in thrown errors. - src/utils/lru-memoize: envelope cached values in { value } so a legitimately-cached `undefined` isn't treated as a miss; tag bigints with a dedicated discriminator in the default key serializer so `[1n]` and `["1"]` don't collapse to the same key. - src/server/index.ts smart_write / smart_edit: invalidate the memoized read-only caches (smart_read/grep/glob) after any filesystem mutation so stale results aren't returned until TTL expiry. - src/server/index.ts count_tokens: returns a two-element content array — `content[0].text` stays the scalar token count (preserves the int-parse contract that PS orchestrator uses at L931/1910/2092), `content[1].text` carries the structured JSON. counter.free() now runs in `finally` so a throwing countAsync doesn't leak the per-call tiktoken encoder. Tests - lru-memoize.test: new cases for undefined-memoization and bigint/string key non-collision. - session.test: assert the summary role is `assistant`, not `system`. All 61 unit tests in the new suites pass; tsc --noEmit clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- hooks/helpers/context-delta.ps1 | 39 +++++++++----- hooks/helpers/gzip.ps1 | 20 +++++-- hooks/helpers/logging.ps1 | 6 ++- src/core/config.ts | 13 +++-- src/core/session-manager.ts | 44 ++++++++++++--- src/core/session.ts | 17 ++++-- src/core/summarization.ts | 32 ++++++++--- src/core/tokenizers/google-ai-tokenizer.ts | 23 ++++---- src/server/index.ts | 55 +++++++++++++------ src/tools/context-delta-tool.ts | 62 ++++++++++++++-------- src/utils/gzip.ts | 15 ++++-- src/utils/lru-memoize.ts | 21 +++++--- src/validation/tool-schemas.ts | 28 +++++++--- tests/unit/lru-memoize.test.ts | 29 ++++++++++ tests/unit/session.test.ts | 4 +- 15 files changed, 299 insertions(+), 109 deletions(-) diff --git a/hooks/helpers/context-delta.ps1 b/hooks/helpers/context-delta.ps1 index 7c4c3ab..e8035e7 100644 --- a/hooks/helpers/context-delta.ps1 +++ b/hooks/helpers/context-delta.ps1 @@ -67,21 +67,32 @@ function Invoke-ContextDelta { if ($Operation -ne 'clear' -and $null -ne $CurrentContent) { $toolArgs.currentContent = $CurrentContent } - if (Get-Command Invoke-TokenOptimizer -ErrorAction SilentlyContinue) { - try { - return Invoke-TokenOptimizer -ToolName 'context_delta' -Arguments $toolArgs - } catch { - $msg = "Invoke-ContextDelta failed: $($_.Exception.Message)" - if (Get-Command Write-Log -ErrorAction SilentlyContinue) { - Write-Log $msg 'WARN' - } else { - Write-Warning $msg - } - return $null + + # Call the MCP tool via the repo's existing invoke-mcp.ps1 script. + # The server-side ContextDeltaTool auto-creates the session on first + # contact, so there's no separate bootstrap step needed here. + $invokeMcp = Join-Path $PSScriptRoot 'invoke-mcp.ps1' + if (-not (Test-Path $invokeMcp)) { + if (Get-Command Write-Log -ErrorAction SilentlyContinue) { + Write-Log "invoke-mcp.ps1 not found at $invokeMcp; skipping context_delta." 'DEBUG' } + return $null } - if (Get-Command Write-Log -ErrorAction SilentlyContinue) { - Write-Log 'Invoke-TokenOptimizer helper not available; skipping context_delta.' 'DEBUG' + + try { + $argsJson = $toolArgs | ConvertTo-Json -Compress + $resultJson = & $invokeMcp -Tool 'context_delta' -ArgumentsJson $argsJson + if ($resultJson) { + return ($resultJson | ConvertFrom-Json) + } + return $null + } catch { + $msg = "Invoke-ContextDelta failed: $($_.Exception.Message)" + if (Get-Command Write-Log -ErrorAction SilentlyContinue) { + Write-Log $msg 'WARN' + } else { + Write-Warning $msg + } + return $null } - return $null } diff --git a/hooks/helpers/gzip.ps1 b/hooks/helpers/gzip.ps1 index 74b7e7b..9527dbf 100644 --- a/hooks/helpers/gzip.ps1 +++ b/hooks/helpers/gzip.ps1 @@ -70,18 +70,19 @@ function Save-GzippedFile { } $compressed = Compress-String -InputString $Content $gzPath = "$Path.gz" - $tmpPath = "$gzPath.tmp" + # Per-write temp path so concurrent writers to the same destination + # can't clobber each other mid-write. + $tmpPath = "$gzPath.$([guid]::NewGuid().ToString('N')).tmp" [System.IO.File]::WriteAllBytes($tmpPath, $compressed) # Atomic swap: File::Move(src, dst, overwrite:$true) on .NET5+. # Unlike "delete then move", this never leaves the caller with a # missing .gz file if the process crashes. try { [System.IO.File]::Move($tmpPath, $gzPath, $true) - } catch { + } finally { if (Test-Path $tmpPath) { Remove-Item -Path $tmpPath -Force -ErrorAction SilentlyContinue } - throw } if (Test-Path $Path) { Remove-Item -Path $Path -Force -ErrorAction SilentlyContinue @@ -98,8 +99,17 @@ function Read-MaybeGzippedFile { ) $gzPath = "$Path.gz" if (Test-Path $gzPath) { - $bytes = [System.IO.File]::ReadAllBytes($gzPath) - return Expand-String -CompressedBytes $bytes + try { + $bytes = [System.IO.File]::ReadAllBytes($gzPath) + return Expand-String -CompressedBytes $bytes + } catch { + # Corrupt / partial .gz — fall back to the plaintext sibling + # so the backward-compat migration path still works. If no + # plaintext exists either, rethrow the original error. + if (-not (Test-Path $Path)) { + throw + } + } } if (Test-Path $Path) { return [System.IO.File]::ReadAllText($Path, [System.Text.Encoding]::UTF8) diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1 index 7da5f95..b52f54a 100644 --- a/hooks/helpers/logging.ps1 +++ b/hooks/helpers/logging.ps1 @@ -43,7 +43,9 @@ function Handle-Error { ) $errorMessage = if ($Message) { $Message } else { $Exception.Message } - $stackTrace = $Exception.ScriptStackTrace + # $StackTrace is a built-in PowerShell automatic variable — use a + # different name so we don't shadow it. + $exceptionTrace = $Exception.ScriptStackTrace Write-Log "ERROR: $errorMessage" "ERROR" - Write-Log "StackTrace: $stackTrace" "ERROR" + Write-Log "StackTrace: $exceptionTrace" "ERROR" } \ No newline at end of file diff --git a/src/core/config.ts b/src/core/config.ts index 091bf2b..f684c0a 100644 --- a/src/core/config.ts +++ b/src/core/config.ts @@ -222,26 +222,31 @@ export class ConfigManager { } ): HypercontextConfig { const userOpt = user.optimization ?? {}; + // Preserve any existing optimization state the caller may have set + // (e.g. via prior update()) instead of always starting from + // DEFAULT_OPTIMIZATION. Non-optimization updates should no longer + // silently reset the entire optimization block. + const baseOptimization = defaults.optimization ?? DEFAULT_OPTIMIZATION; return { cache: { ...defaults.cache, ...user.cache }, monitoring: { ...defaults.monitoring, ...user.monitoring }, intelligence: { ...defaults.intelligence, ...user.intelligence }, performance: { ...defaults.performance, ...user.performance }, optimization: { - ...DEFAULT_OPTIMIZATION, + ...baseOptimization, ...userOpt, cacheSettings: { - ...DEFAULT_OPTIMIZATION.cacheSettings, + ...baseOptimization.cacheSettings, ...(userOpt.cacheSettings ?? {}), }, chatCompression: { - ...DEFAULT_OPTIMIZATION.chatCompression, + ...baseOptimization.chatCompression, ...(userOpt.chatCompression ?? {}), }, // Deep-merge model token limits so a user override like // { "custom-model": 500_000 } does not drop the built-in map. modelTokenLimits: { - ...DEFAULT_OPTIMIZATION.modelTokenLimits, + ...baseOptimization.modelTokenLimits, ...(userOpt.modelTokenLimits ?? {}), }, }, diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts index 935004e..30df98f 100644 --- a/src/core/session-manager.ts +++ b/src/core/session-manager.ts @@ -121,13 +121,28 @@ export class SessionManager { ): Promise { const session = this.requireSession(sessionId); session.addMessage(role, content); - const currentTokens = await session.getHistoryTokenCount(); - let finalTokens = currentTokens; - if (currentTokens > session.maxTokens) { - finalTokens = await session.compressHistory(); + // Schedule persistence in `finally` so the mutated session still + // hits disk even if tokenization or compression throws. Without + // this, a single tokenizer error leaves the message appended + // in memory but never persisted, and a restart loses the turn. + try { + const currentTokens = await session.getHistoryTokenCount(); + if (currentTokens > session.maxTokens) { + return await session.compressHistory(); + } + return currentTokens; + } finally { + this.schedulePersist(); } - this.schedulePersist(); - return finalTokens; + } + + /** Fetch an existing session, or create one with the given id. */ + public getOrCreateSession(id: string): Session { + const existing = this.sessions.get(id); + if (existing) { + return existing; + } + return this.createSession({ id }); } public updateFileState( @@ -235,7 +250,22 @@ export class SessionManager { if (now - snapshot.updatedAt > this.sessionTtlMs) { continue; // Expired session — drop. } - const session = Session.fromSnapshot(snapshot, { + // Enforce the same per-file size cap on restore that + // updateFileState enforces on writes; otherwise a + // tampered or legacy persisted file can smuggle in + // oversized entries past the live guardrail. + const maxBytes = this.maxFileStateBytes; + const sanitizedFileState: Record = {}; + for (const [filePath, content] of Object.entries(snapshot.fileState)) { + if (Buffer.byteLength(content, 'utf8') <= maxBytes) { + sanitizedFileState[filePath] = content; + } + } + const safeSnapshot = { + ...snapshot, + fileState: sanitizedFileState, + }; + const session = Session.fromSnapshot(safeSnapshot, { tokenizer: this.tokenizer, summarizer: this.summarizer, }); diff --git a/src/core/session.ts b/src/core/session.ts index 6a86260..5dd629d 100644 --- a/src/core/session.ts +++ b/src/core/session.ts @@ -86,11 +86,13 @@ export class Session { } public getHistory(): readonly Message[] { - return this.history; + // Defensive copy so external mutation (push/splice/in-place + // edit) can't bypass updatedAt tracking or corrupt the history. + return this.history.map((message) => ({ ...message })); } public getFileState(): Readonly { - return this.fileState; + return { ...this.fileState }; } public getFileContent(filePath: string): string | undefined { @@ -163,8 +165,13 @@ export class Session { } const summary = await this.summarizer.summarize(head); + // Store summaries as `assistant`, not `system` — a user turn + // can contain prompt-injection text, and promoting it into a + // system-role message after compression would let that text + // act as a higher-priority instruction. Assistant role keeps + // the context without the privilege escalation. const summaryMessage: Message = { - role: 'system', + role: 'assistant', content: `[summary of earlier conversation] ${summary}`, timestamp: head[head.length - 1].timestamp, }; @@ -177,7 +184,7 @@ export class Session { public toSnapshot(): SessionSnapshot { return { id: this.id, - history: [...this.history], + history: this.history.map((message) => ({ ...message })), fileState: { ...this.fileState }, maxTokens: this.maxTokens, createdAt: this.createdAt, @@ -196,7 +203,7 @@ export class Session { updatedAt: snapshot.updatedAt, ...options, }); - session.history = [...snapshot.history]; + session.history = snapshot.history.map((message) => ({ ...message })); session.fileState = { ...snapshot.fileState }; return session; } diff --git a/src/core/summarization.ts b/src/core/summarization.ts index f694f7d..b68ec4b 100644 --- a/src/core/summarization.ts +++ b/src/core/summarization.ts @@ -34,11 +34,20 @@ export interface TruncatingSummarizerOptions { maxChars?: number; } +const TRUNCATION_MARKER = '\n... [truncated] ...\n'; +const MIN_MAX_CHARS = 32; + export class TruncatingSummarizer implements ISummarizer { private readonly maxChars: number; constructor(options: TruncatingSummarizerOptions = {}) { - this.maxChars = options.maxChars ?? 2000; + const maxChars = options.maxChars ?? 2000; + if (!Number.isFinite(maxChars) || maxChars < MIN_MAX_CHARS) { + throw new Error( + `TruncatingSummarizer.maxChars must be >= ${MIN_MAX_CHARS}, got ${maxChars}` + ); + } + this.maxChars = maxChars; } public async summarize(messages: readonly Message[]): Promise { @@ -54,11 +63,16 @@ export class TruncatingSummarizer implements ISummarizer { return joined; } - const keepHead = Math.floor(this.maxChars * 0.4); - const keepTail = this.maxChars - keepHead - 20; + // Budget excludes the marker length so the final string never + // exceeds maxChars — the previous `-20` was a guess that + // didn't match the marker exactly and produced unpredictable + // output for small limits. + const budget = Math.max(0, this.maxChars - TRUNCATION_MARKER.length); + const keepHead = Math.floor(budget * 0.4); + const keepTail = budget - keepHead; return ( joined.slice(0, keepHead) + - '\n... [truncated] ...\n' + + TRUNCATION_MARKER + joined.slice(-keepTail) ); } @@ -131,9 +145,11 @@ export class AnthropicSummarizer implements ISummarizer { }); if (!response.ok) { - const body = await response.text().catch(() => ''); + // Deliberately omit the response body — it can echo + // user prompt content and we don't want that leaking + // into log pipelines via thrown errors. throw new Error( - `Anthropic summarize failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}` + `Anthropic summarize failed: ${response.status} ${response.statusText}` ); } @@ -216,9 +232,9 @@ export class GoogleAISummarizer implements ISummarizer { }); if (!response.ok) { - const body = await response.text().catch(() => ''); + // See AnthropicSummarizer — no body in the thrown error. throw new Error( - `Google AI summarize failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}` + `Google AI summarize failed: ${response.status} ${response.statusText}` ); } diff --git a/src/core/tokenizers/google-ai-tokenizer.ts b/src/core/tokenizers/google-ai-tokenizer.ts index 0f7785c..6c751fe 100644 --- a/src/core/tokenizers/google-ai-tokenizer.ts +++ b/src/core/tokenizers/google-ai-tokenizer.ts @@ -6,7 +6,6 @@ const DEFAULT_CACHE_SIZE = 500; const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; const DEFAULT_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models'; const REQUEST_TIMEOUT_MS = 10_000; -const KEY_HASH_THRESHOLD_CHARS = 256; /** * Remote tokenizer that uses Google AI's countTokens REST endpoint — @@ -47,18 +46,20 @@ export class GoogleAITokenizer implements ITokenizer { } public async countTokens(text: string): Promise { - const key = - text.length <= KEY_HASH_THRESHOLD_CHARS - ? text - : createHash('sha256').update(text).digest('hex'); + // Always hash with a namespace prefix so cache keys can't collide + // with a raw string arg and so sensitive user text isn't retained + // verbatim in process memory. + const key = `sha256:${createHash('sha256').update(text).digest('hex')}`; const cached = this.cache.get(key); if (cached !== undefined) { return cached; } + // Per Gemini API reference, x-goog-api-key is the recommended + // auth path — it keeps the key out of URLs and access logs. const url = `${this.endpoint}/${encodeURIComponent( this.modelName - )}:countTokens?key=${encodeURIComponent(this.apiKey)}`; + )}:countTokens`; const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), this.timeoutMs); @@ -66,7 +67,10 @@ export class GoogleAITokenizer implements ITokenizer { try { const response = await fetch(url, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + 'x-goog-api-key': this.apiKey, + }, body: JSON.stringify({ contents: [{ parts: [{ text }] }], }), @@ -74,9 +78,10 @@ export class GoogleAITokenizer implements ITokenizer { }); if (!response.ok) { - const body = await response.text().catch(() => ''); + // Don't embed the response body — it can leak prompt + // content in upstream logs. throw new Error( - `Google AI countTokens failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}` + `Google AI countTokens failed: ${response.status} ${response.statusText}` ); } diff --git a/src/server/index.ts b/src/server/index.ts index 3c971ce..62fba3c 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -927,23 +927,41 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { modelName?: string; }; const counter = modelName ? new TokenCounter(modelName) : tokenCounter; - const result = modelName - ? await counter.countAsync(text) - : counter.count(text); - if (modelName) { - // Model-specific counters are one-shot — free the local - // tiktoken encoder (if any) that this call allocated. - counter.free(); + try { + const result = modelName + ? await counter.countAsync(text) + : counter.count(text); + // Return the full result JSON under a dedicated `metadata` + // key while the primary `text` payload stays the scalar token + // count string — preserves the integer-parse contract that + // the PowerShell orchestrator relies on + // (e.g. token-optimizer-orchestrator.ps1 L931/1910/2092 cast + // `content[0].text -as [int]`) and still surfaces the richer + // object for TS callers. + return { + content: [ + { + type: 'text', + text: String(result.tokens), + }, + { + type: 'text', + text: JSON.stringify( + { ...result, model: modelName ?? counter.model }, + null, + 2 + ), + }, + ], + }; + } finally { + // Always free one-shot counters — even when countAsync throws, + // leaving the tiktoken encoder allocated was leaking native + // resources. + if (modelName) { + counter.free(); + } } - - return { - content: [ - { - type: 'text', - text: JSON.stringify({ ...result, model: modelName ?? counter.model }, null, 2), - }, - ], - }; } case 'compress_text': { @@ -2031,6 +2049,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_write': { const { path, content, ...options } = args as any; const result = await runSmartWrite(path, content, options); + // Filesystem was mutated — drop every memoized read-only cache + // entry so the next smart_read/grep/glob reflects the new state + // instead of waiting for TTL expiry. + memoRegistry.clearAll(); return { content: [ { @@ -2044,6 +2066,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_edit': { const { path, operations, ...options } = args as any; const result = await runSmartEdit(path, operations, options); + memoRegistry.clearAll(); return { content: [ { diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts index f482f45..20f5fdc 100644 --- a/src/tools/context-delta-tool.ts +++ b/src/tools/context-delta-tool.ts @@ -68,10 +68,10 @@ export class ContextDeltaTool { error: 'currentContent is required for compute-delta', }; } - const session = this.sessionManager.getSession(sessionId); - if (!session) { - return { success: false, error: `Unknown session: ${sessionId}` }; - } + // Auto-bootstrap the session on first contact so PS-side callers + // that locally generate a sessionId don't have to separately + // create it server-side first. + const session = this.sessionManager.getOrCreateSession(sessionId); const previous = session.getFileContent(filePath); try { @@ -116,6 +116,7 @@ export class ContextDeltaTool { return { success: false, error: 'currentContent is required for seed' }; } try { + this.sessionManager.getOrCreateSession(sessionId); this.sessionManager.updateFileState(sessionId, filePath, currentContent); return { success: true, isBaseline: true }; } catch (error) { @@ -139,28 +140,45 @@ export const CONTEXT_DELTA_TOOL_DEFINITION = { name: 'context_delta', description: 'Compute a unified-diff delta for a file in a given session so the model only sees changes since the last snapshot. Operations: compute-delta, seed, clear.', + // Discriminated inputSchema keyed on `operation` — compute-delta and + // seed require currentContent at runtime, so enforce that at schema + // validation time rather than letting a malformed payload reach the + // tool body. inputSchema: { type: 'object', - properties: { - operation: { - type: 'string', - enum: ['compute-delta', 'seed', 'clear'], - description: 'Operation to perform', - }, - sessionId: { - type: 'string', - description: 'Session identifier (create one via SessionManager first)', + oneOf: [ + { + type: 'object', + properties: { + operation: { type: 'string', const: 'compute-delta' }, + sessionId: { type: 'string', minLength: 1 }, + filePath: { type: 'string', minLength: 1 }, + currentContent: { type: 'string' }, + }, + required: ['operation', 'sessionId', 'filePath', 'currentContent'], + additionalProperties: false, }, - filePath: { - type: 'string', - description: 'Path of the file inside the session state', + { + type: 'object', + properties: { + operation: { type: 'string', const: 'seed' }, + sessionId: { type: 'string', minLength: 1 }, + filePath: { type: 'string', minLength: 1 }, + currentContent: { type: 'string' }, + }, + required: ['operation', 'sessionId', 'filePath', 'currentContent'], + additionalProperties: false, }, - currentContent: { - type: 'string', - description: - 'Current file content (required for compute-delta and seed)', + { + type: 'object', + properties: { + operation: { type: 'string', const: 'clear' }, + sessionId: { type: 'string', minLength: 1 }, + filePath: { type: 'string', minLength: 1 }, + }, + required: ['operation', 'sessionId', 'filePath'], + additionalProperties: false, }, - }, - required: ['operation', 'sessionId', 'filePath'], + ], }, }; diff --git a/src/utils/gzip.ts b/src/utils/gzip.ts index 5edf8ee..206cebc 100644 --- a/src/utils/gzip.ts +++ b/src/utils/gzip.ts @@ -75,13 +75,22 @@ export function saveGzippedFile(path: string, text: string, level: number = 6): /** * Load either `${path}.gz` or `${path}` — whichever exists. Returns - * null if neither is present. + * null if neither is present. If the `.gz` sibling exists but can't + * be decompressed (corrupt, partially-written), falls back to the + * plaintext path so the backward-compat migration still works. */ export function loadMaybeGzippedFile(path: string): string | null { const gzPath = `${path}.gz`; if (existsSync(gzPath)) { - const buffer = readFileSync(gzPath); - return gunzipBuffer(buffer); + try { + const buffer = readFileSync(gzPath); + return gunzipBuffer(buffer); + } catch (error) { + if (!existsSync(path)) { + throw error; + } + // Fall through to the plaintext sibling below. + } } if (existsSync(path)) { return readFileSync(path, 'utf-8'); diff --git a/src/utils/lru-memoize.ts b/src/utils/lru-memoize.ts index d8d2a58..ea43aeb 100644 --- a/src/utils/lru-memoize.ts +++ b/src/utils/lru-memoize.ts @@ -65,7 +65,11 @@ export function lruMemoize( fn: (...args: Args) => Promise, options: LruMemoizeOptions ): (...args: Args) => Promise { - const cache = new LruCache(options.maxSize, options.ttlMs ?? 0); + // Wrap values in a tiny envelope so a legitimately-cached `undefined` + // can be distinguished from a cache miss. + type Envelope = { value: R }; + const cache = new LruCache(options.maxSize, options.ttlMs ?? 0); + // Deduplicate concurrent calls for the same key so a stampede of // requests while the first promise is still pending doesn't run the // expensive function N times. @@ -79,9 +83,14 @@ export function lruMemoize( const keyFn = options.keyFn ?? ((args: Args): string => { - const serialized = JSON.stringify(args, (_, v) => - typeof v === 'bigint' ? v.toString() : v - ); + const serialized = JSON.stringify(args, (_, v) => { + // Tag bigints with a dedicated discriminator so + // `[1n]` and `["1"]` don't collapse to the same key. + if (typeof v === 'bigint') { + return { __memo_bigint__: v.toString() }; + } + return v; + }); return createHash('sha256').update(serialized).digest('hex'); }); @@ -89,7 +98,7 @@ export function lruMemoize( const key = keyFn(args); const hit = cache.get(key); if (hit !== undefined) { - return hit; + return hit.value; } const pending = inFlight.get(key); if (pending) { @@ -98,7 +107,7 @@ export function lruMemoize( const promise = (async () => { try { const value = await fn(...args); - cache.set(key, value); + cache.set(key, { value }); return value; } finally { inFlight.delete(key); diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts index 9a718a0..21e1cfb 100644 --- a/src/validation/tool-schemas.ts +++ b/src/validation/tool-schemas.ts @@ -438,13 +438,27 @@ export const OptimizationStorageSchema = z.discriminatedUnion('operation', [ }), ]); -// 73. context_delta -export const ContextDeltaSchema = z.object({ - operation: z.enum(['compute-delta', 'seed', 'clear']), - sessionId: z.string(), - filePath: z.string(), - currentContent: z.string().optional(), -}); +// 73. context_delta — discriminated on operation so compute-delta and +// seed require currentContent at validation time rather than runtime. +export const ContextDeltaSchema = z.discriminatedUnion('operation', [ + z.object({ + operation: z.literal('compute-delta'), + sessionId: z.string().min(1), + filePath: z.string().min(1), + currentContent: z.string(), + }), + z.object({ + operation: z.literal('seed'), + sessionId: z.string().min(1), + filePath: z.string().min(1), + currentContent: z.string(), + }), + z.object({ + operation: z.literal('clear'), + sessionId: z.string().min(1), + filePath: z.string().min(1), + }), +]); // Map tool names to their schemas for easy lookup export const toolSchemaMap: Record> = { diff --git a/tests/unit/lru-memoize.test.ts b/tests/unit/lru-memoize.test.ts index 5ef483d..b0dae36 100644 --- a/tests/unit/lru-memoize.test.ts +++ b/tests/unit/lru-memoize.test.ts @@ -81,4 +81,33 @@ describe('lruMemoize', () => { // Stampede collapsed into a single invocation. expect(calls).toBe(1); }); + + it('memoizes a legitimately-undefined return value', async () => { + let calls = 0; + const fn = async (): Promise => { + calls++; + return undefined; + }; + const memo = lruMemoize(fn, { name: 'test-undefined', maxSize: 10 }); + expect(await memo()).toBeUndefined(); + expect(await memo()).toBeUndefined(); + // Without envelope-style storage, the second call would re-run fn. + expect(calls).toBe(1); + }); + + it('distinguishes bigint args from string args in the default key', async () => { + let calls = 0; + const fn = async (x: unknown) => { + calls++; + return String(x); + }; + const memo = lruMemoize(fn as (x: unknown) => Promise, { + name: 'test-bigint-collision', + maxSize: 10, + }); + expect(await memo(1n)).toBe('1'); + expect(await memo('1')).toBe('1'); + // Two distinct args ⇒ two distinct cache keys ⇒ two invocations. + expect(calls).toBe(2); + }); }); diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts index 4535f3a..ffe6c6e 100644 --- a/tests/unit/session.test.ts +++ b/tests/unit/session.test.ts @@ -49,7 +49,9 @@ describe('Session', () => { expect((await session.getHistoryTokenCount()) > 50).toBe(true); await session.compressHistory(); const history = session.getHistory(); - expect(history[0].role).toBe('system'); + // Summary is stored as `assistant` (never `system`) so that + // user-derived text can't be elevated into system-role context. + expect(history[0].role).toBe('assistant'); expect(history[0].content.startsWith('[summary')).toBe(true); expect(history.length).toBeLessThan(10); }); From 9e02c480b9b88a37cf5c9c0f930c8f85facaf8bd Mon Sep 17 00:00:00 2001 From: Franklin Moormann Date: Sun, 19 Apr 2026 22:56:02 -0400 Subject: [PATCH 26/26] ci(security): drop risky overrides, add informational full-tree audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the two remaining coderabbit findings: - Remove the package.json `overrides` for brace-expansion and picomatch. The picomatch ^4.0.4 override was risky — it forces a major version on every transitive consumer, and can break packages that declare older picomatch majors. `npm audit --omit=dev` already reports 0 vulnerabilities without the override because the remaining vulns live inside @semantic-release/npm's bundled npm (dev-only, never shipped), and that's the scope the Security Audit step gates on. - quality-gates.yml: keep the `--omit=dev` gating audit, but also run a full-tree `npm audit` and write audit-results-full.json so dev-dep findings stay visible even on repos/forks without a SNYK_TOKEN. Both artifacts are uploaded. The step still fails only on critical prod vulnerabilities. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/quality-gates.yml | 30 +++++--- package-lock.json | 113 ++++++++++++++++++++++++---- package.json | 4 - 3 files changed, 119 insertions(+), 28 deletions(-) diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml index 9c01e5b..81458cc 100644 --- a/.github/workflows/quality-gates.yml +++ b/.github/workflows/quality-gates.yml @@ -129,34 +129,44 @@ jobs: - name: Run npm audit id: audit run: | - # Audit production deps only — dev deps like @semantic-release/npm - # bundle their own node_modules (vulnerable transitively but never - # shipped to end users), which would otherwise fail CI on - # unfixable issues. The "Dependency Vulnerability Scan" step below - # still covers the full tree. + # Gating audit — prod deps only. Dev deps (e.g. @semantic-release/npm, + # which bundles its own node_modules/npm) can carry unfixable + # transitive vulnerabilities that never ship to end users, and + # failing CI on those is noise. The "Dependency Vulnerability Scan" + # step below still covers the full tree for visibility. npm audit --omit=dev --json > audit-results.json || true + # Informational audit — full tree, including dev deps. Always + # collected so teams can review non-gating findings even on + # forks/repos without a SNYK_TOKEN. + npm audit --json > audit-results-full.json || true + # Check for high/critical vulnerabilities using Python for reliable JSON parsing HIGH_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))") CRITICAL_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('critical', 0))") + FULL_CRITICAL=$(python3 -c "import json; data = json.load(open('audit-results-full.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('critical', 0))") + FULL_HIGH=$(python3 -c "import json; data = json.load(open('audit-results-full.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))") # Ensure we have valid integers HIGH_VULNS=${HIGH_VULNS:-0} CRITICAL_VULNS=${CRITICAL_VULNS:-0} + FULL_CRITICAL=${FULL_CRITICAL:-0} + FULL_HIGH=${FULL_HIGH:-0} echo "high_vulnerabilities=$HIGH_VULNS" >> $GITHUB_OUTPUT echo "critical_vulnerabilities=$CRITICAL_VULNS" >> $GITHUB_OUTPUT - echo "Found $CRITICAL_VULNS critical and $HIGH_VULNS high severity vulnerabilities" + echo "Production: $CRITICAL_VULNS critical, $HIGH_VULNS high" + echo "Full tree: $FULL_CRITICAL critical, $FULL_HIGH high (informational)" if [ "$CRITICAL_VULNS" -gt 0 ] 2>/dev/null; then - echo "Error: Found $CRITICAL_VULNS critical vulnerabilities" + echo "Error: Found $CRITICAL_VULNS critical vulnerabilities in production deps" npm audit --omit=dev || true exit 1 fi if [ "$HIGH_VULNS" -gt 0 ] 2>/dev/null; then - echo "Warning: Found $HIGH_VULNS high vulnerabilities" + echo "Warning: Found $HIGH_VULNS high vulnerabilities in production deps" # npm audit exits non-zero when vulns exist — don't let that # turn a "warning" into a failed step. npm audit --omit=dev || true @@ -167,7 +177,9 @@ jobs: uses: actions/upload-artifact@v4 with: name: security-audit-${{ github.sha }} - path: audit-results.json + path: | + audit-results.json + audit-results-full.json retention-days: 30 - name: Comment PR with security audit diff --git a/package-lock.json b/package-lock.json index ed80f90..a34c8da 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1144,6 +1144,17 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, + "node_modules/@eslint/config-array/node_modules/brace-expansion": { + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, "node_modules/@eslint/config-array/node_modules/minimatch": { "version": "3.1.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", @@ -1214,6 +1225,17 @@ "dev": true, "license": "Python-2.0" }, + "node_modules/@eslint/eslintrc/node_modules/brace-expansion": { + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, "node_modules/@eslint/eslintrc/node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -3846,6 +3868,19 @@ "node": ">= 8" } }, + "node_modules/anymatch/node_modules/picomatch": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -4656,6 +4691,13 @@ "dot-prop": "^5.1.0" } }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true, + "license": "MIT" + }, "node_modules/config-chain": { "version": "1.1.13", "resolved": "https://registry.npmjs.org/config-chain/-/config-chain-1.1.13.tgz", @@ -5552,6 +5594,17 @@ "url": "https://opencollective.com/eslint" } }, + "node_modules/eslint/node_modules/brace-expansion": { + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, "node_modules/eslint/node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", @@ -6469,6 +6522,27 @@ "node": ">=10.13.0" } }, + "node_modules/glob/node_modules/balanced-match": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/glob/node_modules/brace-expansion": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz", + "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==", + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + } + }, "node_modules/glob/node_modules/minimatch": { "version": "10.2.4", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", @@ -6484,21 +6558,6 @@ "url": "https://github.com/sponsors/isaacs" } }, - "node_modules/glob/node_modules/minimatch/node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "license": "MIT" - }, - "node_modules/glob/node_modules/minimatch/node_modules/brace-expansion": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz", - "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==", - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, "node_modules/global-directory": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/global-directory/-/global-directory-4.0.1.tgz", @@ -8458,6 +8517,19 @@ "node": ">=8.6" } }, + "node_modules/micromatch/node_modules/picomatch": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/mime": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/mime/-/mime-4.1.0.tgz", @@ -12992,6 +13064,17 @@ "node": ">=8" } }, + "node_modules/test-exclude/node_modules/brace-expansion": { + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, "node_modules/test-exclude/node_modules/glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", diff --git a/package.json b/package.json index 8d380c4..d9fb071 100644 --- a/package.json +++ b/package.json @@ -131,9 +131,5 @@ "lru-cache": "^11.2.2", "tiktoken": "^1.0.22", "zod": ">=3.25.0 <5" - }, - "overrides": { - "brace-expansion": "^2.0.2", - "picomatch": "^4.0.4" } }