From 443b5b2c9f4b2f23ffb8848ab2ab1f0546d2560d Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 2 Nov 2025 19:57:08 -0500
Subject: [PATCH 01/26] feat: implement lru cache and sophisticated token
 counting (issues #4 and #5)

- add lrucache class with ttl support and statistics tracking
- add tokencounter class with google ai api integration
- implement content-type aware token estimation (code/json/markdown/text)
- integrate lru caching for token counts (200 entries, 30min ttl)
- add automatic eviction and periodic cleanup for cache
- initialize global tokencounter singleton with api key from environment

implements issue #4: sophisticated token counting beyond character/4
implements issue #5: lru cache for expensive operations

generated with claude code

co-authored-by: claude <noreply@anthropic.com>
---
 .../handlers/token-optimizer-orchestrator.ps1 | 265 ++++++++++++++++++
 1 file changed, 265 insertions(+)

diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index 130e398..a4f7862 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -58,6 +58,271 @@ $OPTIMIZATION_QUALITY = 11  # Maximum compression quality
 $HASH_PREFIX = "hash:"
 $HASH_LENGTH = 32
 
+# =============================================================================
+# LRU CACHE CLASSES (Issue #5)
+# =============================================================================
+class LruCacheEntry {
+    [object]$Value
+    [datetime]$Timestamp
+
+    LruCacheEntry([object]$value) {
+        $this.Value = $value
+        $this.Timestamp = Get-Date
+    }
+}
+
+class LruCache {
+    [System.Collections.Specialized.OrderedDictionary]$Cache
+    [int]$MaxSize
+    [int]$TtlSeconds
+    [int]$HitCount = 0
+    [int]$MissCount = 0
+    [int]$EvictionCount = 0
+
+    LruCache([int]$maxSize, [int]$ttlSeconds) {
+        $this.Cache = [System.Collections.Specialized.OrderedDictionary]::new()
+        $this.MaxSize = $maxSize
+        $this.TtlSeconds = $ttlSeconds
+    }
+
+    # Get value from cache (returns $null if not found or expired)
+    [object] Get([string]$key) {
+        if (-not $this.Cache.Contains($key)) {
+            $this.MissCount++
+            return $null
+        }
+
+        $entry = $this.Cache[$key]
+
+        # Check TTL expiration
+        if ($this.TtlSeconds -gt 0) {
+            $age = ((Get-Date) - $entry.Timestamp).TotalSeconds
+            if ($age -gt $this.TtlSeconds) {
+                $this.Cache.Remove($key)
+                $this.MissCount++
+                $this.EvictionCount++
+                return $null
+            }
+        }
+
+        # Move to end (most recently used) by removing and re-adding
+        $value = $entry.Value
+        $this.Cache.Remove($key)
+        $this.Cache[$key] = [LruCacheEntry]::new($value)
+
+        $this.HitCount++
+        return $value
+    }
+
+    # Set value in cache
+    [void] Set([string]$key, [object]$value) {
+        # Remove if already exists (to re-insert at end)
+        if ($this.Cache.Contains($key)) {
+            $this.Cache.Remove($key)
+        }
+
+        # Evict least recently used if at capacity
+        if ($this.Cache.Count -ge $this.MaxSize) {
+            # First key is least recently used (OrderedDictionary maintains insertion order)
+            $firstKey = @($this.Cache.Keys)[0]
+            $this.Cache.Remove($firstKey)
+            $this.EvictionCount++
+        }
+
+        # Insert at end (most recently used)
+        $this.Cache[$key] = [LruCacheEntry]::new($value)
+    }
+
+    # Check if key exists and is not expired
+    [bool] ContainsKey([string]$key) {
+        return $null -ne $this.Get($key)
+    }
+
+    # Clear all entries
+    [void] Clear() {
+        $this.Cache.Clear()
+        $this.HitCount = 0
+        $this.MissCount = 0
+        $this.EvictionCount = 0
+    }
+
+    # Get cache statistics
+    [hashtable] GetStats() {
+        $totalRequests = $this.HitCount + $this.MissCount
+        return @{
+            Size = $this.Cache.Count
+            MaxSize = $this.MaxSize
+            HitCount = $this.HitCount
+            MissCount = $this.MissCount
+            EvictionCount = $this.EvictionCount
+            HitRate = if ($totalRequests -gt 0) {
+                [Math]::Round(($this.HitCount / $totalRequests) * 100, 2)
+            } else { 0 }
+        }
+    }
+
+    # Cleanup expired entries (call periodically)
+    [int] CleanupExpired() {
+        if ($this.TtlSeconds -le 0) { return 0 }
+
+        $removed = 0
+        $keysToRemove = @()
+
+        foreach ($key in $this.Cache.Keys) {
+            $entry = $this.Cache[$key]
+            $age = ((Get-Date) - $entry.Timestamp).TotalSeconds
+            if ($age -gt $this.TtlSeconds) {
+                $keysToRemove += $key
+            }
+        }
+
+        foreach ($key in $keysToRemove) {
+            $this.Cache.Remove($key)
+            $removed++
+        }
+
+        $this.EvictionCount += $removed
+        return $removed
+    }
+}
+
+# =============================================================================
+# TOKEN COUNTER CLASS (Issue #4)
+# =============================================================================
+class TokenCounter {
+    [string]$ApiKey
+    [string]$Model
+    [LruCache]$Cache
+    [int]$ApiCallCount = 0
+    [int]$CacheHitCount = 0
+    [int]$EstimationCount = 0
+
+    TokenCounter([string]$apiKey, [string]$model) {
+        $this.ApiKey = $apiKey
+        $this.Model = $model
+        # Use LRU cache: Max 200 entries, TTL 30 minutes (1800 seconds)
+        $this.Cache = [LruCache]::new(200, 1800)
+    }
+
+    # Primary method: try API first, fall back to estimation
+    [int] CountTokens([string]$text, [string]$contentType) {
+        # Check cache first (using content hash as key)
+        $textHash = [System.BitConverter]::ToString(
+            [System.Security.Cryptography.SHA256]::Create().ComputeHash(
+                [System.Text.Encoding]::UTF8.GetBytes($text)
+            )
+        ).Replace("-", "")
+        $cacheKey = "${contentType}:${textHash}"
+
+        $cached = $this.Cache.Get($cacheKey)
+        if ($null -ne $cached) {
+            $this.CacheHitCount++
+            return $cached
+        }
+
+        # Try API call if key is available
+        if ($this.ApiKey) {
+            try {
+                $tokenCount = $this.CountTokensViaAPI($text)
+                $this.ApiCallCount++
+                $this.Cache.Set($cacheKey, $tokenCount)
+                return $tokenCount
+            } catch {
+                # API failed, fall back to estimation
+                Write-Log "Token counting API failed: $($_.Exception.Message), falling back to estimation" "WARN"
+            }
+        }
+
+        # Fallback to improved estimation
+        $estimated = $this.EstimateTokens($text, $contentType)
+        $this.EstimationCount++
+        $this.Cache.Set($cacheKey, $estimated)
+        return $estimated
+    }
+
+    # Google AI API integration
+    [int] CountTokensViaAPI([string]$text) {
+        $requestBody = @{
+            contents = @(
+                @{
+                    parts = @(
+                        @{
+                            text = $text
+                        }
+                    )
+                }
+            )
+        } | ConvertTo-Json -Depth 10 -Compress
+
+        $uri = "https://generativelanguage.googleapis.com/v1beta/models/$($this.Model):countTokens?key=$($this.ApiKey)"
+
+        $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5
+
+        return $response.totalTokens
+    }
+
+    # Improved estimation with content-type awareness
+    [int] EstimateTokens([string]$text, [string]$contentType) {
+        $baseRatio = [Math]::Ceiling($text.Length / 4.0)
+
+        switch ($contentType) {
+            "code" {
+                # Code has more tokens per character due to symbols/keywords
+                return [Math]::Ceiling($baseRatio * 1.2)
+            }
+            "json" {
+                # JSON structures add token overhead for delimiters
+                return [Math]::Ceiling($baseRatio * 1.15)
+            }
+            "markdown" {
+                # Markdown formatting adds token overhead
+                return [Math]::Ceiling($baseRatio * 1.1)
+            }
+            "text" {
+                # Plain text is slightly less than base ratio
+                return [Math]::Ceiling($baseRatio * 0.95)
+            }
+            default {
+                return $baseRatio
+            }
+        }
+    }
+
+    # Content type detection based on file extension or tool name
+    [string] DetectContentType([string]$identifier) {
+        switch -Regex ($identifier) {
+            '\.(cs|ps1|ts|js|py|java|cpp|c|h|go|rs|rb|php)$' { return "code" }
+            '\.(json|jsonc)$' { return "json" }
+            '\.(md|markdown)$' { return "markdown" }
+            '^Read$|^Grep$|^Bash$' { return "code" }
+            default { return "text" }
+        }
+    }
+
+    # Get cache statistics
+    [hashtable] GetStats() {
+        $cacheStats = $this.Cache.GetStats()
+        $totalCalls = $this.ApiCallCount + $this.CacheHitCount + $this.EstimationCount
+        return @{
+            ApiCalls = $this.ApiCallCount
+            CacheHits = $this.CacheHitCount
+            EstimationCount = $this.EstimationCount
+            CacheSize = $cacheStats.Size
+            CacheHitRate = $cacheStats.HitRate
+            TotalCalls = $totalCalls
+        }
+    }
+}
+
+# Initialize global TokenCounter (singleton pattern)
+if (-not $script:TokenCounter) {
+    $apiKey = $env:GOOGLE_AI_API_KEY
+    if (-not $apiKey) {
+        Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow
+    }
+    $script:TokenCounter = [TokenCounter]::new($apiKey, "gemini-2.0-flash-exp")
+}
+
 # PHASE 2 FIX: Deterministic cache key generation
 # Fixes 0% cache hit rate by ensuring identical operations produce identical keys
 function Get-DeterministicCacheKey {

From 69a35557c342435fdba637df046e32dabeaa16fc Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 2 Nov 2025 21:06:05 -0500
Subject: [PATCH 02/26] fix: address all pr review comments

- add type guards to prevent class re-definition errors (CRITICAL)
- fix sha256 resource disposal with proper try/finally
- replace write-log with write-host to fix ordering issue
- fix double-counting in getstats totalcalls calculation
- make model name configurable via google_ai_model env var
- improve api error handling for timeout/network errors
- fix detectcontenttype regex for exact matching

addresses feedback from github copilot and coderabbit reviews
---
 .../handlers/token-optimizer-orchestrator.ps1 | 68 +++++++++++++------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index a4f7862..6726ee6 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -61,17 +61,21 @@ $HASH_LENGTH = 32
 # =============================================================================
 # LRU CACHE CLASSES (Issue #5)
 # =============================================================================
-class LruCacheEntry {
-    [object]$Value
-    [datetime]$Timestamp
+# Guard against class re-definition on subsequent script loads
+if (-not ('LruCacheEntry' -as [type])) {
+    class LruCacheEntry {
+        [object]$Value
+        [datetime]$Timestamp
 
-    LruCacheEntry([object]$value) {
-        $this.Value = $value
-        $this.Timestamp = Get-Date
+        LruCacheEntry([object]$value) {
+            $this.Value = $value
+            $this.Timestamp = Get-Date
+        }
     }
 }
 
-class LruCache {
+if (-not ('LruCache' -as [type])) {
+    class LruCache {
     [System.Collections.Specialized.OrderedDictionary]$Cache
     [int]$MaxSize
     [int]$TtlSeconds
@@ -185,11 +189,13 @@ class LruCache {
         return $removed
     }
 }
+}
 
 # =============================================================================
 # TOKEN COUNTER CLASS (Issue #4)
 # =============================================================================
-class TokenCounter {
+if (-not ('TokenCounter' -as [type])) {
+    class TokenCounter {
     [string]$ApiKey
     [string]$Model
     [LruCache]$Cache
@@ -206,12 +212,17 @@ class TokenCounter {
 
     # Primary method: try API first, fall back to estimation
     [int] CountTokens([string]$text, [string]$contentType) {
-        # Check cache first (using content hash as key)
-        $textHash = [System.BitConverter]::ToString(
-            [System.Security.Cryptography.SHA256]::Create().ComputeHash(
-                [System.Text.Encoding]::UTF8.GetBytes($text)
-            )
-        ).Replace("-", "")
+        # Check cache first (using content hash as key with proper disposal)
+        $sha256 = [System.Security.Cryptography.SHA256]::Create()
+        try {
+            $textHash = [System.BitConverter]::ToString(
+                $sha256.ComputeHash(
+                    [System.Text.Encoding]::UTF8.GetBytes($text)
+                )
+            ).Replace("-", "")
+        } finally {
+            $sha256.Dispose()
+        }
         $cacheKey = "${contentType}:${textHash}"
 
         $cached = $this.Cache.Get($cacheKey)
@@ -228,8 +239,8 @@ class TokenCounter {
                 $this.Cache.Set($cacheKey, $tokenCount)
                 return $tokenCount
             } catch {
-                # API failed, fall back to estimation
-                Write-Log "Token counting API failed: $($_.Exception.Message), falling back to estimation" "WARN"
+                # API failed, fall back to estimation (use Write-Host since Write-Log defined later)
+                Write-Host "WARN: Token counting API failed: $($_.Exception.Message), falling back to estimation" -ForegroundColor Yellow
             }
         }
 
@@ -256,7 +267,22 @@ class TokenCounter {
 
         $uri = "https://generativelanguage.googleapis.com/v1beta/models/$($this.Model):countTokens?key=$($this.ApiKey)"
 
-        $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5
+        try {
+            $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5
+        } catch {
+            $ex = $_.Exception
+            if ($ex -is [System.Net.WebException]) {
+                if ($ex.Status -eq [System.Net.WebExceptionStatus]::Timeout) {
+                    throw "Token counting API timeout after 5 seconds"
+                } elseif ($ex.Status -eq [System.Net.WebExceptionStatus]::ConnectFailure) {
+                    throw "Token counting API network error (connect failure)"
+                } else {
+                    throw "Token counting API network error: $($ex.Status)"
+                }
+            } else {
+                throw
+            }
+        }
 
         return $response.totalTokens
     }
@@ -294,7 +320,7 @@ class TokenCounter {
             '\.(cs|ps1|ts|js|py|java|cpp|c|h|go|rs|rb|php)$' { return "code" }
             '\.(json|jsonc)$' { return "json" }
             '\.(md|markdown)$' { return "markdown" }
-            '^Read$|^Grep$|^Bash$' { return "code" }
+            '^(Read|Grep|Bash)$' { return "code" }
             default { return "text" }
         }
     }
@@ -302,7 +328,7 @@ class TokenCounter {
     # Get cache statistics
     [hashtable] GetStats() {
         $cacheStats = $this.Cache.GetStats()
-        $totalCalls = $this.ApiCallCount + $this.CacheHitCount + $this.EstimationCount
+        $totalCalls = $this.ApiCallCount + $this.EstimationCount
         return @{
             ApiCalls = $this.ApiCallCount
             CacheHits = $this.CacheHitCount
@@ -313,6 +339,7 @@ class TokenCounter {
         }
     }
 }
+}
 
 # Initialize global TokenCounter (singleton pattern)
 if (-not $script:TokenCounter) {
@@ -320,7 +347,8 @@ if (-not $script:TokenCounter) {
     if (-not $apiKey) {
         Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow
     }
-    $script:TokenCounter = [TokenCounter]::new($apiKey, "gemini-2.0-flash-exp")
+    $modelName = if ($env:GOOGLE_AI_MODEL) { $env:GOOGLE_AI_MODEL } else { "gemini-2.0-flash-exp" }
+    $script:TokenCounter = [TokenCounter]::new($apiKey, $modelName)
 }
 
 # PHASE 2 FIX: Deterministic cache key generation

From 0e506ac4190efa23d90783b6e750bfe047e76c09 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 2 Nov 2025 21:06:05 -0500
Subject: [PATCH 03/26] fix: address all pr review comments

- add type guards to prevent class re-definition errors (CRITICAL)
- fix sha256 resource disposal with proper try/finally
- replace write-log with write-host to fix ordering issue
- fix double-counting in getstats totalcalls calculation
- make model name configurable via google_ai_model env var
- improve api error handling for timeout/network errors
- fix detectcontenttype regex for exact matching

addresses feedback from github copilot and coderabbit reviews
---
 .../handlers/token-optimizer-orchestrator.ps1 | 68 +++++++++++++------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index a4f7862..6726ee6 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -61,17 +61,21 @@ $HASH_LENGTH = 32
 # =============================================================================
 # LRU CACHE CLASSES (Issue #5)
 # =============================================================================
-class LruCacheEntry {
-    [object]$Value
-    [datetime]$Timestamp
+# Guard against class re-definition on subsequent script loads
+if (-not ('LruCacheEntry' -as [type])) {
+    class LruCacheEntry {
+        [object]$Value
+        [datetime]$Timestamp
 
-    LruCacheEntry([object]$value) {
-        $this.Value = $value
-        $this.Timestamp = Get-Date
+        LruCacheEntry([object]$value) {
+            $this.Value = $value
+            $this.Timestamp = Get-Date
+        }
     }
 }
 
-class LruCache {
+if (-not ('LruCache' -as [type])) {
+    class LruCache {
     [System.Collections.Specialized.OrderedDictionary]$Cache
     [int]$MaxSize
     [int]$TtlSeconds
@@ -185,11 +189,13 @@ class LruCache {
         return $removed
     }
 }
+}
 
 # =============================================================================
 # TOKEN COUNTER CLASS (Issue #4)
 # =============================================================================
-class TokenCounter {
+if (-not ('TokenCounter' -as [type])) {
+    class TokenCounter {
     [string]$ApiKey
     [string]$Model
     [LruCache]$Cache
@@ -206,12 +212,17 @@ class TokenCounter {
 
     # Primary method: try API first, fall back to estimation
     [int] CountTokens([string]$text, [string]$contentType) {
-        # Check cache first (using content hash as key)
-        $textHash = [System.BitConverter]::ToString(
-            [System.Security.Cryptography.SHA256]::Create().ComputeHash(
-                [System.Text.Encoding]::UTF8.GetBytes($text)
-            )
-        ).Replace("-", "")
+        # Check cache first (using content hash as key with proper disposal)
+        $sha256 = [System.Security.Cryptography.SHA256]::Create()
+        try {
+            $textHash = [System.BitConverter]::ToString(
+                $sha256.ComputeHash(
+                    [System.Text.Encoding]::UTF8.GetBytes($text)
+                )
+            ).Replace("-", "")
+        } finally {
+            $sha256.Dispose()
+        }
         $cacheKey = "${contentType}:${textHash}"
 
         $cached = $this.Cache.Get($cacheKey)
@@ -228,8 +239,8 @@ class TokenCounter {
                 $this.Cache.Set($cacheKey, $tokenCount)
                 return $tokenCount
             } catch {
-                # API failed, fall back to estimation
-                Write-Log "Token counting API failed: $($_.Exception.Message), falling back to estimation" "WARN"
+                # API failed, fall back to estimation (use Write-Host since Write-Log defined later)
+                Write-Host "WARN: Token counting API failed: $($_.Exception.Message), falling back to estimation" -ForegroundColor Yellow
             }
         }
 
@@ -256,7 +267,22 @@ class TokenCounter {
 
         $uri = "https://generativelanguage.googleapis.com/v1beta/models/$($this.Model):countTokens?key=$($this.ApiKey)"
 
-        $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5
+        try {
+            $response = Invoke-RestMethod -Uri $uri -Method POST -ContentType "application/json" -Body $requestBody -TimeoutSec 5
+        } catch {
+            $ex = $_.Exception
+            if ($ex -is [System.Net.WebException]) {
+                if ($ex.Status -eq [System.Net.WebExceptionStatus]::Timeout) {
+                    throw "Token counting API timeout after 5 seconds"
+                } elseif ($ex.Status -eq [System.Net.WebExceptionStatus]::ConnectFailure) {
+                    throw "Token counting API network error (connect failure)"
+                } else {
+                    throw "Token counting API network error: $($ex.Status)"
+                }
+            } else {
+                throw
+            }
+        }
 
         return $response.totalTokens
     }
@@ -294,7 +320,7 @@ class TokenCounter {
             '\.(cs|ps1|ts|js|py|java|cpp|c|h|go|rs|rb|php)$' { return "code" }
             '\.(json|jsonc)$' { return "json" }
             '\.(md|markdown)$' { return "markdown" }
-            '^Read$|^Grep$|^Bash$' { return "code" }
+            '^(Read|Grep|Bash)$' { return "code" }
             default { return "text" }
         }
     }
@@ -302,7 +328,7 @@ class TokenCounter {
     # Get cache statistics
     [hashtable] GetStats() {
         $cacheStats = $this.Cache.GetStats()
-        $totalCalls = $this.ApiCallCount + $this.CacheHitCount + $this.EstimationCount
+        $totalCalls = $this.ApiCallCount + $this.EstimationCount
         return @{
             ApiCalls = $this.ApiCallCount
             CacheHits = $this.CacheHitCount
@@ -313,6 +339,7 @@ class TokenCounter {
         }
     }
 }
+}
 
 # Initialize global TokenCounter (singleton pattern)
 if (-not $script:TokenCounter) {
@@ -320,7 +347,8 @@ if (-not $script:TokenCounter) {
     if (-not $apiKey) {
         Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow
     }
-    $script:TokenCounter = [TokenCounter]::new($apiKey, "gemini-2.0-flash-exp")
+    $modelName = if ($env:GOOGLE_AI_MODEL) { $env:GOOGLE_AI_MODEL } else { "gemini-2.0-flash-exp" }
+    $script:TokenCounter = [TokenCounter]::new($apiKey, $modelName)
 }
 
 # PHASE 2 FIX: Deterministic cache key generation

From 0c11a46ecb085b7e958316d5549b9709ac7fc743 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 2 Nov 2025 22:37:41 -0500
Subject: [PATCH 04/26] refactor(dispatcher): use write-verbose for logging

---
 hooks/dispatcher.ps1 | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hooks/dispatcher.ps1 b/hooks/dispatcher.ps1
index acc6187..5abaaaf 100644
--- a/hooks/dispatcher.ps1
+++ b/hooks/dispatcher.ps1
@@ -2,6 +2,7 @@
 # Minimal dispatcher focused on token optimization via MCP
 # Replaces 400+ line mess with clean architecture
 
+[CmdletBinding()]
 param([string]$Phase = "")
 
 $HANDLERS_DIR = "C:\Users\cheat\.claude-global\hooks\handlers"
@@ -11,7 +12,9 @@ $ORCHESTRATOR = "$HANDLERS_DIR\token-optimizer-orchestrator.ps1"
 function Write-Log {
     param([string]$Message)
     $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
-    "[$timestamp] [$Phase] $Message" | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8
+    $logMessage = "[$timestamp] [$Phase] $Message"
+    $logMessage | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8
+    Write-Verbose $logMessage
 }
 
 function Block-Tool {

From d3172df8d48d43d209458a5e8d4a32bfa6c638ab Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 2 Nov 2025 22:39:42 -0500
Subject: [PATCH 05/26] refactor(logging): create shared logging module

---
 hooks/dispatcher.ps1                          |  9 ++-----
 .../handlers/token-optimizer-orchestrator.ps1 | 27 +------------------
 hooks/helpers/logging.ps1                     | 27 +++++++++++++++++++
 3 files changed, 30 insertions(+), 33 deletions(-)
 create mode 100644 hooks/helpers/logging.ps1

diff --git a/hooks/dispatcher.ps1 b/hooks/dispatcher.ps1
index 5abaaaf..c813350 100644
--- a/hooks/dispatcher.ps1
+++ b/hooks/dispatcher.ps1
@@ -8,14 +8,9 @@ param([string]$Phase = "")
 $HANDLERS_DIR = "C:\Users\cheat\.claude-global\hooks\handlers"
 $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\dispatcher.log"
 $ORCHESTRATOR = "$HANDLERS_DIR\token-optimizer-orchestrator.ps1"
+. "$PSScriptRoot\helpers\logging.ps1"
+
 
-function Write-Log {
-    param([string]$Message)
-    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
-    $logMessage = "[$timestamp] [$Phase] $Message"
-    $logMessage | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8
-    Write-Verbose $logMessage
-}
 
 function Block-Tool {
     param([string]$Reason)
diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index 6726ee6..92cc19f 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -27,6 +27,7 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) {
 
 $HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers"
 $INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1"
+. "$PSScriptRoot\..\helpers\logging.ps1"
 $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log"
 $SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt"
 $OPERATIONS_DIR = "C:\Users\cheat\.claude-global\hooks\data"
@@ -523,33 +524,7 @@ function Start-LogFlushTimer {
     }
 }
 
-function Write-Log {
-    param(
-        [string]$Message,
-        [ValidateSet('DEBUG','INFO','WARN','ERROR')][string]$Level = "INFO",
-        [string]$Context = ""
-    )
-
-    # Check if debug logging is disabled
-    $debugLogging = if ($env:TOKEN_OPTIMIZER_DEBUG_LOGGING) {
-        $env:TOKEN_OPTIMIZER_DEBUG_LOGGING -eq 'true'
-    } else {
-        $true  # Default: enabled
-    }
-
-    if ($Level -eq 'DEBUG' -and -not $debugLogging) {
-        return
-    }
 
-    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
-    $contextPart = if ($Context) { " [$Context]" } else { "" }
-    $logEntry = "[$timestamp] [$Level]$contextPart $Message"
-    try {
-        $logEntry | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8 -ErrorAction SilentlyContinue
-    } catch {
-        # Silently fail
-    }
-}
 
 # Removed - now using direct invoke-mcp.ps1 calls
 
diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1
new file mode 100644
index 0000000..755306f
--- /dev/null
+++ b/hooks/helpers/logging.ps1
@@ -0,0 +1,27 @@
+[CmdletBinding()]
+param()
+
+function Write-Log {
+    param(
+        [string]$Message,
+        [ValidateSet('DEBUG','INFO','WARN','ERROR')][string]$Level = "INFO",
+        [string]$Context = ""
+    )
+
+    # Check if debug logging is disabled
+    $debugLogging = if ($env:TOKEN_OPTIMIZER_DEBUG_LOGGING) {
+        $env:TOKEN_OPTIMIZER_DEBUG_LOGGING -eq 'true'
+    } else {
+        $true  # Default: enabled
+    }
+
+    if ($Level -eq 'DEBUG' -and -not $debugLogging) {
+        return
+    }
+
+    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+    $contextPart = if ($Context) { " [$Context]" } else { "" }
+    $logMessage = "[$timestamp] [$Level]$contextPart $Message"
+    $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8
+    Write-Verbose $logMessage
+}
\ No newline at end of file

From 0428d1cee8ffd91608a5f0be9bc4f761005c0174 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 2 Nov 2025 22:41:06 -0500
Subject: [PATCH 06/26] refactor(logging): replace write-host with write-log

---
 .../handlers/token-optimizer-orchestrator.ps1 | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index 92cc19f..5dc086b 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -12,7 +12,7 @@ param(
 
 # DIAGNOSTIC: Log script version/load time to verify latest version is being used
 $SCRIPT_VERSION = Get-Date -Format 'yyyyMMdd.HHmmss'
-Write-Host "DEBUG: token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" -ForegroundColor Cyan
+Write-Log "token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" "DEBUG"
 
 # Read JSON from temp file if provided
 # DO NOT delete temp file - dispatcher will clean it up after all handlers run
@@ -21,7 +21,7 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) {
     try {
         $InputJson = Get-Content -Path $InputJsonFile -Raw -Encoding UTF8
     } catch {
-        Write-Host "ERROR: Failed to read InputJsonFile: $($_.Exception.Message)" -ForegroundColor Red
+        Write-Log "Failed to read InputJsonFile: $($_.Exception.Message)" "ERROR"
     }
 }
 
@@ -240,8 +240,8 @@ if (-not ('TokenCounter' -as [type])) {
                 $this.Cache.Set($cacheKey, $tokenCount)
                 return $tokenCount
             } catch {
-                # API failed, fall back to estimation (use Write-Host since Write-Log defined later)
-                Write-Host "WARN: Token counting API failed: $($_.Exception.Message), falling back to estimation" -ForegroundColor Yellow
+                # API failed, fall back to estimation
+                Write-Log "Token counting API failed: $($_.Exception.Message), falling back to estimation" "WARN"
             }
         }
 
@@ -346,7 +346,7 @@ if (-not ('TokenCounter' -as [type])) {
 if (-not $script:TokenCounter) {
     $apiKey = $env:GOOGLE_AI_API_KEY
     if (-not $apiKey) {
-        Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow
+        Write-Log "GOOGLE_AI_API_KEY not set, falling back to estimation only" "WARN"
     }
     $modelName = if ($env:GOOGLE_AI_MODEL) { $env:GOOGLE_AI_MODEL } else { "gemini-2.0-flash-exp" }
     $script:TokenCounter = [TokenCounter]::new($apiKey, $modelName)
@@ -1852,43 +1852,43 @@ function Handle-OptimizeToolOutput {
     $ErrorActionPreference = 'Stop'
 
     try {
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Entered function."
+        Write-Log "[Handle-OptimizeToolOutput] Entered function." "DEBUG"
 
         if (-not $InputJson) {
             Write-Log "No input received for tool output optimization" "WARN"
-            Write-Host "DEBUG: [Handle-OptimizeToolOutput] No input received, returning."
+            Write-Log "[Handle-OptimizeToolOutput] No input received, returning." "DEBUG"
             return
         }
 
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Parsing InputJson..."
+        Write-Log "[Handle-OptimizeToolOutput] Parsing InputJson..." "DEBUG"
         $data = $InputJson | ConvertFrom-Json
         $toolName = $data.tool_name
         $toolOutput = $data.tool_response  # FIXED: Claude Code uses tool_response not tool_result
 
         $outputType = if ($toolOutput) { $toolOutput.GetType().Name } else { "null" }
         Write-Log "DEBUG: tool_name=$toolName, tool_response_type=$outputType, has_content=$(-not -not $toolOutput)" "DEBUG"
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 1 - After line 1564 log. toolName=$toolName, outputType=$outputType"
+        Write-Log "[Handle-OptimizeToolOutput] Checkpoint 1 - After line 1564 log. toolName=$toolName, outputType=$outputType" "DEBUG"
 
         # Skip if no output or if output is already optimized
         Write-Log "DEBUG: Checking if toolOutput is null or empty" "DEBUG"
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 2 - Before null/empty check."
+        Write-Log "[Handle-OptimizeToolOutput] Checkpoint 2 - Before null/empty check." "DEBUG"
         if (-not $toolOutput) {
             Write-Log "No tool output to optimize for: $toolName (toolOutput is null/false)" "DEBUG"
-            Write-Host "DEBUG: [Handle-OptimizeToolOutput] toolOutput is null/false, returning."
+            Write-Log "[Handle-OptimizeToolOutput] toolOutput is null/false, returning." "DEBUG"
             return
         }
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 3 - After null/empty check, toolOutput exists."
+        Write-Log "[Handle-OptimizeToolOutput] Checkpoint 3 - After null/empty check, toolOutput exists." "DEBUG"
 
         # Convert output to string for token counting
         $outputText = ""
         try {
-            Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 4 - Attempting to convert toolOutput to string. Is string: $($toolOutput -is [string])"
+            Write-Log "[Handle-OptimizeToolOutput] Checkpoint 4 - Attempting to convert toolOutput to string. Is string: $($toolOutput -is [string])" "DEBUG"
             $outputText = if ($toolOutput -is [string]) { $toolOutput } else { $toolOutput | ConvertTo-Json -Depth 10 -ErrorAction Stop }
             Write-Log "DEBUG: Converted tool output to string. Length: $($outputText.Length)" "DEBUG"
-            Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 5 - toolOutput converted. Length: $($outputText.Length)"
+            Write-Log "[Handle-OptimizeToolOutput] Checkpoint 5 - toolOutput converted. Length: $($outputText.Length)" "DEBUG"
         } catch {
             Write-Log "ERROR: Failed to convert tool output to JSON string for ${toolName}: $($_.Exception.Message)" "ERROR"
-            Write-Host "ERROR: [Handle-OptimizeToolOutput] Failed to convert: $($_.Exception.Message)"
+            Write-Log "[Handle-OptimizeToolOutput] Failed to convert: $($_.Exception.Message)" "ERROR"
             return
         }
 

From d04cf6a8e682c458194057b0486b9e9d5f158024 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 2 Nov 2025 23:24:12 -0500
Subject: [PATCH 07/26] refactor(error-handling): use centralized error
 handling function

---
 .../handlers/token-optimizer-orchestrator.ps1 | 67 +++++++++----------
 hooks/helpers/logging.ps1                     | 12 ++++
 2 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index 5dc086b..a06e76e 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -421,7 +421,7 @@ function Read-SessionFile {
             Write-Log "Failed to acquire read lock on session file '$FilePath', retrying... ($($_.Exception.Message))" "WARN"
             Start-Sleep -Milliseconds $retryDelayMs
         } catch {
-            Write-Log "Failed to read session file '$FilePath': $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Failed to read session file '$FilePath'"
             return $null
         }
     }
@@ -453,7 +453,7 @@ function Write-SessionFile {
             Write-Log "Failed to acquire write lock on session file '$FilePath', retrying... ($($_.Exception.Message))" "WARN"
             Start-Sleep -Milliseconds $retryDelayMs
         } catch {
-            Write-Log "Failed to write session file '$FilePath': $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Failed to write session file '$FilePath'"
             return $false
         } finally {
             # Ensure writer and fileStream are disposed even if errors occur
@@ -499,7 +499,7 @@ function Flush-OperationLogs {
             Write-Log "Flushed $($script:OperationLogBuffer.Count) operation logs" "DEBUG"
             $script:OperationLogBuffer = @()
         } catch {
-            Write-Log "Failed to flush operation logs: $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Failed to flush operation logs"
         }
     }
 }
@@ -534,7 +534,7 @@ function Get-SessionInfo {
             $session = Read-SessionFile -FilePath $SESSION_FILE
             return $session
         } catch {
-            Write-Log "Failed to read session file: $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Failed to read session file"
         }
     }
     return $null
@@ -691,7 +691,7 @@ function Handle-LogOperation {
         Write-Log "Logged operation: $toolName ($tokens tokens)" "DEBUG"
 
     } catch {
-        Write-Log "Operation logging failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Operation logging failed"
     }
 }
 
@@ -721,7 +721,7 @@ function Handle-OptimizeSession {
         }
 
     } catch {
-        Write-Log "Session optimization failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Session optimization failed"
     }
 }
 
@@ -797,7 +797,7 @@ function Handle-ContextGuard {
         return 0  # Success - allow operation to proceed
 
     } catch {
-        Write-Log "Context guard failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Context guard failed"
         return 0  # On error, don't block
     }
 }
@@ -827,7 +827,7 @@ function Handle-PeriodicOptimize {
         }
 
     } catch {
-        Write-Log "Periodic optimize failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Periodic optimize failed"
     }
 }
 
@@ -852,7 +852,7 @@ function Handle-CacheWarmup {
         }
 
     } catch {
-        Write-Log "Cache warmup failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Cache warmup failed"
     }
 }
 
@@ -892,7 +892,7 @@ function Handle-SessionReport {
         }
 
     } catch {
-        Write-Log "Session report failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Session report failed"
     }
 }
 
@@ -974,11 +974,11 @@ function Handle-UserPromptOptimization {
                 Write-Log "Optimized user prompt: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO"
             }
         } catch {
-            Write-Log "Prompt optimization failed: $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Prompt optimization failed"
         }
 
     } catch {
-        Write-Log "UserPromptOptimization handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "UserPromptOptimization handler failed"
     }
 }
 
@@ -1030,7 +1030,7 @@ function Handle-SessionStartInit {
         }
 
     } catch {
-        Write-Log "SessionStartInit handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "SessionStartInit handler failed"
     }
 }
 
@@ -1075,7 +1075,7 @@ function Handle-SmartDiff {
         return $null
 
     } catch {
-        Write-Log "SmartDiff handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "SmartDiff handler failed"
         return $null
     }
 }
@@ -1115,7 +1115,7 @@ function Handle-SmartLogs {
         return $null
 
     } catch {
-        Write-Log "SmartLogs handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "SmartLogs handler failed"
         return $null
     }
 }
@@ -1170,7 +1170,7 @@ function Handle-ToolSpecificOptimization {
         return $ToolOutput
 
     } catch {
-        Write-Log "ToolSpecificOptimization handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "ToolSpecificOptimization handler failed"
         return $ToolOutput
     }
 }
@@ -1208,7 +1208,7 @@ function Handle-MetricCollector {
         return $null
 
     } catch {
-        Write-Log "MetricCollector handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "MetricCollector handler failed"
         return $null
     }
 }
@@ -1248,7 +1248,7 @@ function Handle-AlertManager {
         return $null
 
     } catch {
-        Write-Log "AlertManager handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "AlertManager handler failed"
         return $null
     }
 }
@@ -1280,7 +1280,7 @@ function Handle-HealthMonitor {
         return $null
 
     } catch {
-        Write-Log "HealthMonitor handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "HealthMonitor handler failed"
         return $null
     }
 }
@@ -1318,7 +1318,7 @@ function Handle-MonitoringIntegration {
         return $null
 
     } catch {
-        Write-Log "MonitoringIntegration handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "MonitoringIntegration handler failed"
         return $null
     }
 }
@@ -1354,7 +1354,7 @@ function Handle-AnalyzeOptimization {
         return $null
 
     } catch {
-        Write-Log "AnalyzeOptimization handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "AnalyzeOptimization handler failed"
         return $null
     }
 }
@@ -1383,7 +1383,7 @@ function Handle-CacheAnalytics {
         return $null
 
     } catch {
-        Write-Log "CacheAnalytics handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "CacheAnalytics handler failed"
         return $null
     }
 }
@@ -1413,7 +1413,7 @@ function Handle-CacheOptimizer {
         return $null
 
     } catch {
-        Write-Log "CacheOptimizer handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "CacheOptimizer handler failed"
         return $null
     }
 }
@@ -1453,7 +1453,7 @@ function Handle-CacheCompression {
         return $Data
 
     } catch {
-        Write-Log "CacheCompression handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "CacheCompression handler failed"
         return $Data
     }
 }
@@ -1480,7 +1480,7 @@ function Handle-CacheInvalidation {
         Write-Log "Cache invalidation completed for pattern: $Pattern" "DEBUG"
 
     } catch {
-        Write-Log "CacheInvalidation handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "CacheInvalidation handler failed"
     }
 }
 
@@ -1520,7 +1520,7 @@ function Handle-SmartCache {
         return $null
 
     } catch {
-        Write-Log "SmartCache handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "SmartCache handler failed"
         return $null
     }
 }
@@ -1568,7 +1568,7 @@ function Handle-IntelligentSummarization {
         return $Text
 
     } catch {
-        Write-Log "IntelligentSummarization handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "IntelligentSummarization handler failed"
         return $Text
     }
 }
@@ -1614,7 +1614,7 @@ function Handle-PatternRecognition {
         return $null
 
     } catch {
-        Write-Log "PatternRecognition handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "PatternRecognition handler failed"
         return $null
     }
 }
@@ -1657,7 +1657,7 @@ function Handle-PredictiveAnalytics {
         return $Context
 
     } catch {
-        Write-Log "PredictiveAnalytics handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "PredictiveAnalytics handler failed"
         return $Context
     }
 }
@@ -1691,7 +1691,7 @@ function Handle-IntelligentAssistant {
         return $null
 
     } catch {
-        Write-Log "IntelligentAssistant handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "IntelligentAssistant handler failed"
         return $null
     }
 }
@@ -1836,7 +1836,7 @@ function Handle-PreToolUseOptimization {
         }
 
     } catch {
-        Write-Log "PreToolUse optimization failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "PreToolUse optimization failed"
         return 1
     }
     return 0
@@ -1909,8 +1909,7 @@ function Handle-OptimizeToolOutput {
                 Write-Log "WARN: count_tokens result did not contain expected content" "WARN"
             }
         } catch {
-            Write-Log "ERROR: Token counting failed for ${toolName}: $($_.Exception.Message)" "ERROR"
-            Write-Log "ERROR: Stack Trace: $($_.ScriptStackTrace)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Token counting failed for ${toolName}"
             return
         }
 
@@ -1996,7 +1995,7 @@ function Handle-OptimizeToolOutput {
                 Update-SessionOperation -TokensDelta $afterTokens
             }
         } catch {
-            Write-Log "Tool output optimization failed: $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Tool output optimization failed"
         }
 
     } catch {
diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1
index 755306f..7b87cd0 100644
--- a/hooks/helpers/logging.ps1
+++ b/hooks/helpers/logging.ps1
@@ -24,4 +24,16 @@ function Write-Log {
     $logMessage = "[$timestamp] [$Level]$contextPart $Message"
     $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8
     Write-Verbose $logMessage
+}
+
+function Handle-Error {
+    param(
+        [System.Exception]$Exception,
+        [string]$Message = ""
+    )
+
+    $errorMessage = if ($Message) { $Message } else { $Exception.Message }
+    $stackTrace = $Exception.ScriptStackTrace
+    Write-Log "ERROR: $errorMessage" "ERROR"
+    Write-Log "StackTrace: $stackTrace" "ERROR"
 }
\ No newline at end of file

From 25251108fc030ae05fcf90c0c1177e912b441c9e Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Tue, 4 Nov 2025 21:32:09 -0500
Subject: [PATCH 08/26] feat(optimization): implement compression for stored
 optimization results

---
 .../handlers/token-optimizer-orchestrator.ps1 |  74 ++-
 src/analytics/optimization-storage.ts         | 158 +++---
 src/core/compression-engine.ts                | 222 +++------
 src/server/index.ts                           |  59 ++-
 src/tools/optimization-storage-tool.ts        |  96 ++--
 tests/benchmarks/results.json                 | 468 +++++++++---------
 tests/unit/cache-engine.test.ts               |   3 +-
 7 files changed, 547 insertions(+), 533 deletions(-)

diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index 5abcfdb..ae55748 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -1932,16 +1932,52 @@ function Handle-OptimizeToolOutput {
             Write-Log "Tool-specific optimization failed: $($_.Exception.Message)" "WARN"
         }
 
-        # Optimize using optimize_text (PHASE 4: Reduced quality for performance)
+        # Calculate SHA256 hash of the output text for caching
+        $hasher = [System.Security.Cryptography.SHA256]::Create()
+        $hashBytes = $hasher.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($outputText))
+        $originalTextHash = [System.BitConverter]::ToString($hashBytes).Replace("-", "").ToLower()
+
+        # Attempt to retrieve from optimization storage
         try {
-            # PHASE 2 FIX: Use content hash instead of timestamp for cache key
-            $hasher = [System.Security.Cryptography.SHA256]::Create()
-            $hashBytes = $hasher.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($outputText))
-            $contentHash = [Convert]::ToBase64String($hashBytes).Substring(0, 16)
+            $retrieveArgs = @{
+                operation = "retrieve"
+                originalTextHash = $originalTextHash
+            }
+            $retrieveJson = $retrieveArgs | ConvertTo-Json -Compress
+            $retrieveResultJson = & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $retrieveJson
+            $retrieveResult = if ($retrieveResultJson) { $retrieveResultJson | ConvertFrom-Json } else { $null }
+
+            if ($retrieveResult -and $retrieveResult.success) {
+                Write-Log "Cache HIT for optimization result. Hash: $originalTextHash" "INFO"
+                $optimizedTextBytes = [System.Convert]::FromBase64String($retrieveResult.optimizedText)
+                $optimizedText = [System.Text.Encoding]::UTF8.GetString($optimizedTextBytes)
+                $afterTokens = $retrieveResult.optimizedTokens
+                $saved = $retrieveResult.tokensSaved
+                $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 }
+
+                if ($script:CurrentSession) {
+                    $script:CurrentSession.cacheHits++
+                    if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) {
+                        Write-Log "Session stats updated and persisted after cache hit." "DEBUG"
+                    } else {
+                        Write-Log "Failed to persist session stats after cache hit." "ERROR"
+                    }
+                }
+
+                Write-Log "Using cached optimized $toolName output: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO"
+                Update-SessionOperation -TokensDelta $afterTokens
+                return
+            } else {
+                Write-Log "Cache MISS for optimization result. Hash: $originalTextHash" "DEBUG"
+            }
+        } catch {
+            Handle-Error -Exception $_.Exception -Message "Failed to retrieve from optimization storage"
+        }
 
+        # Optimize using optimize_text (PHASE 4: Reduced quality for performance)
+        try {
             $optimizeArgs = @{
                 text = $outputText
-                key = "tool_output_${toolName}_$contentHash"
                 quality = $script:OPTIMIZATION_QUALITY
             }
             $optimizeJson = $optimizeArgs | ConvertTo-Json -Compress
@@ -1956,34 +1992,43 @@ function Handle-OptimizeToolOutput {
                 $saved = $beforeTokens - $afterTokens
                 $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 }
 
-                # PHASE 1 FIX: Rollback logic - only use optimization if it actually helps
                 if ($afterTokens -ge $beforeTokens) {
                     Write-Log "Optimization made things worse or had no effect ($beforeTokens → $afterTokens tokens), REVERTING to original" "WARN"
-
-                    # PHASE 4 FIX: Track failure and persist immediately
                     if ($script:CurrentSession) {
                         $script:CurrentSession.optimizationFailures++
-                        # CRITICAL: Persist immediately to disk for multi-process visibility
                         if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) {
                             Write-Log "Session stats updated and persisted after optimization failure." "DEBUG"
                         } else {
                             Write-Log "Failed to persist session stats after optimization failure." "ERROR"
                         }
                     }
-
-                    # Don't update session with optimized tokens, skip this optimization
                     return
                 }
 
                 Write-Log "Optimized $toolName output: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO"
 
-                # PHASE 4 FIX: Track success and detailed stats, persist immediately
+                # Store the new optimization result
+                try {
+                    $storeArgs = @{
+                        operation = "store"
+                        originalTextHash = $originalTextHash
+                        optimizedText = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes($optimizedText))
+                        originalTokens = $beforeTokens
+                        optimizedTokens = $afterTokens
+                        tokensSaved = $saved
+                    }
+                    $storeJson = $storeArgs | ConvertTo-Json -Compress
+                    & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $storeJson
+                    Write-Log "Stored new optimization result. Hash: $originalTextHash" "DEBUG"
+                } catch {
+                    Handle-Error -Exception $_.Exception -Message "Failed to store optimization result"
+                }
+
                 if ($script:CurrentSession) {
                     $script:CurrentSession.optimizationSuccesses++
                     $script:CurrentSession.totalOriginalTokens += $beforeTokens
                     $script:CurrentSession.totalOptimizedTokens += $afterTokens
                     $script:CurrentSession.totalTokensSaved += $saved
-                    # CRITICAL: Persist immediately to disk for multi-process visibility
                     if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) {
                         Write-Log "Session stats updated and persisted after optimization success." "DEBUG"
                     } else {
@@ -1991,7 +2036,6 @@ function Handle-OptimizeToolOutput {
                     }
                 }
 
-                # Update session tokens (only if optimization helped)
                 Update-SessionOperation -TokensDelta $afterTokens
             }
         } catch {
diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts
index 99fd16f..6952ce6 100644
--- a/src/analytics/optimization-storage.ts
+++ b/src/analytics/optimization-storage.ts
@@ -1,107 +1,79 @@
-/**
- * Persistent storage for optimization results data using SQLite
- */
-
-import Database from 'better-sqlite3';
-import path from 'path';
-import os from 'os';
-import fs from 'fs';
-import { createGzip, gunzipSync } from 'zlib';
-import { promisify } from 'util';
-
-const gzip = promisify(createGzip);
+import sqlite3 from 'sqlite3';
+import { open, Database } from 'sqlite';
+import { CompressionEngine } from '../core/compression-engine';
 
 export interface OptimizationResult {
-  originalTextHash: string;
-  optimizedText: Buffer;
-  compressionAlgorithm: string;
+    originalTextHash: string;
+    optimizedText: string;
+    originalTokens: number;
+    optimizedTokens: number;
+    tokensSaved: number;
 }
 
-/**
- * SQLite-backed optimization results storage
- */
 export class SqliteOptimizationStorage {
-  private db: Database.Database;
-
-  constructor(dbPath?: string) {
-    // Default to user's home directory
-    const defaultPath = path.join(
-      os.homedir(),
-      '.token-optimizer-mcp',
-      'optimization.db'
-    );
-    const finalPath = dbPath || defaultPath;
+    private db: Database<sqlite3.Database, sqlite3.Statement>;
+    private dbPath: string;
+    private compressionEngine: CompressionEngine;
 
-    // Ensure directory exists
-    const dir = path.dirname(finalPath);
-    if (!fs.existsSync(dir)) {
-      fs.mkdirSync(dir, { recursive: true });
+    constructor(dbPath: string = './optimization.db') {
+        this.dbPath = dbPath;
+        this.compressionEngine = new CompressionEngine();
     }
 
-    this.db = new Database(finalPath);
-    this.initializeDatabase();
-  }
-
-  /**
-   * Initialize database schema
-   */
-  private initializeDatabase(): void {
-    this.db.exec(`
-      CREATE TABLE IF NOT EXISTS optimization_results (
-        id INTEGER PRIMARY KEY AUTOINCREMENT,
-        original_text_hash TEXT NOT NULL UNIQUE,
-        optimized_text BLOB NOT NULL,
-        compression_algorithm TEXT NOT NULL,
-        created_at DATETIME DEFAULT CURRENT_TIMESTAMP
-      );
-
-      CREATE INDEX IF NOT EXISTS idx_original_text_hash ON optimization_results(original_text_hash);
-    `);
-  }
-
-  /**
-   * Save a single optimization result
-   */
-  async save(entry: OptimizationResult): Promise<void> {
-    const stmt = this.db.prepare(`
-      INSERT OR REPLACE INTO optimization_results (
-        original_text_hash, optimized_text, compression_algorithm
-      ) VALUES (?, ?, ?)
-    `);
-
-    const compressedOptimizedText = await gzip(entry.optimizedText);
-
-    stmt.run(
-      entry.originalTextHash,
-      compressedOptimizedText,
-      'gzip'
-    );
-  }
+    public async initializeDatabase(): Promise<void> {
+        this.db = await open({
+            filename: this.dbPath,
+            driver: sqlite3.Database
+        });
+
+        await this.db.exec(`
+            CREATE TABLE IF NOT EXISTS optimization_results (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                original_text_hash TEXT NOT NULL UNIQUE,
+                optimized_text_compressed BLOB NOT NULL,
+                compression_algorithm TEXT NOT NULL,
+                original_tokens INTEGER NOT NULL,
+                optimized_tokens INTEGER NOT NULL,
+                tokens_saved INTEGER NOT NULL,
+                created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+            );
+        `);
+    }
 
-  /**
-   * Get an optimization result by hash
-   */
-  async get(originalTextHash: string): Promise<OptimizationResult | null> {
-    const stmt = this.db.prepare('SELECT * FROM optimization_results WHERE original_text_hash = ?');
-    const row = stmt.get(originalTextHash) as any;
+    public async save(entry: OptimizationResult): Promise<void> {
+        const compressedOptimizedText = this.compressionEngine.compress(entry.optimizedText);
 
-    if (!row) {
-      return null;
+        await this.db.run(
+            `INSERT INTO optimization_results (original_text_hash, optimized_text_compressed, compression_algorithm, original_tokens, optimized_tokens, tokens_saved)
+             VALUES (?, ?, ?, ?, ?, ?)`, 
+            [entry.originalTextHash, compressedOptimizedText.compressed, 'brotli', entry.originalTokens, entry.optimizedTokens, entry.tokensSaved]
+        );
     }
 
-    const decompressedOptimizedText = gunzipSync(row.optimized_text);
-
-    return {
-      originalTextHash: row.original_text_hash,
-      optimizedText: decompressedOptimizedText,
-      compressionAlgorithm: row.compression_algorithm,
-    };
-  }
+    public async get(originalTextHash: string): Promise<OptimizationResult | null> {
+        const row = await this.db.get(
+            'SELECT optimized_text_compressed, original_tokens, optimized_tokens, tokens_saved FROM optimization_results WHERE original_text_hash = ?',
+            originalTextHash
+        );
+
+        if (!row) {
+            return null;
+        }
+
+        const optimizedText = this.compressionEngine.decompress(row.optimized_text_compressed);
+
+        return {
+            originalTextHash,
+            optimizedText,
+            originalTokens: row.original_tokens,
+            optimizedTokens: row.optimized_tokens,
+            tokensSaved: row.tokens_saved
+        };
+    }
 
-  /**
-   * Close the database connection
-   */
-  async close(): Promise<void> {
-    this.db.close();
-  }
+    public async close(): Promise<void> {
+        if (this.db) {
+            await this.db.close();
+        }
+    }
 }
diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts
index 2be5b9e..e7922cf 100644
--- a/src/core/compression-engine.ts
+++ b/src/core/compression-engine.ts
@@ -1,167 +1,89 @@
 import { brotliCompressSync, brotliDecompressSync, constants } from 'zlib';
 
 export interface CompressionResult {
-  compressed: Buffer;
-  originalSize: number;
-  compressedSize: number;
-  ratio: number;
-  percentSaved: number;
-}
-
-export interface CompressionOptions {
-  quality?: number; // 0-11, default 11 (max compression)
-  mode?: 'text' | 'font' | 'generic';
-}
-
-export class CompressionEngine {
-  private readonly DEFAULT_QUALITY = 11;
-
-  /**
-   * Compress text using Brotli
-   */
-  compress(text: string, options?: CompressionOptions): CompressionResult {
-    const buffer = Buffer.from(text, 'utf-8');
-    const quality = options?.quality ?? this.DEFAULT_QUALITY;
-    const mode = this.getModeConstant(options?.mode);
-
-    const compressed = brotliCompressSync(buffer, {
-      params: {
-        [constants.BROTLI_PARAM_QUALITY]: quality,
-        [constants.BROTLI_PARAM_MODE]: mode,
-      },
-    });
-
-    const originalSize = buffer.length;
-    const compressedSize = compressed.length;
-    const ratio = originalSize > 0 ? compressedSize / originalSize : 0;
-    const percentSaved =
-      originalSize > 0
-        ? ((originalSize - compressedSize) / originalSize) * 100
-        : 0;
-
-    return {
-      compressed,
-      originalSize,
-      compressedSize,
-      ratio,
-      percentSaved,
-    };
-  }
-
-  /**
-   * Decompress Brotli-compressed data
-   */
-  decompress(compressed: Buffer): string {
-    const decompressed = brotliDecompressSync(compressed);
-    return decompressed.toString('utf-8');
-  }
-
-  /**
-   * Compress to base64 string (for easier storage)
-   */
-  compressToBase64(
-    text: string,
-    options?: CompressionOptions
-  ): {
-    compressed: string;
+    compressed: Buffer;
     originalSize: number;
     compressedSize: number;
     ratio: number;
     percentSaved: number;
-  } {
-    const result = this.compress(text, options);
-
-    return {
-      compressed: result.compressed.toString('base64'),
-      originalSize: result.originalSize,
-      compressedSize: result.compressedSize,
-      ratio: result.ratio,
-      percentSaved: result.percentSaved,
-    };
-  }
-
-  /**
-   * Decompress from base64 string
-   */
-  decompressFromBase64(compressed: string): string {
-    const buffer = Buffer.from(compressed, 'base64');
-    return this.decompress(buffer);
-  }
+}
 
-  /**
-   * Check if compression would be beneficial
-   */
-  shouldCompress(text: string, minSize: number = 1000): boolean {
-    // Don't compress small texts - overhead not worth it
-    if (text.length < minSize) {
-      return false;
+export class CompressionEngine {
+    public compress(text: string, options?: { quality?: number; mode?: string; }): CompressionResult {
+        const originalSize = Buffer.byteLength(text, 'utf8');
+        if (originalSize === 0) {
+            return {
+                compressed: Buffer.alloc(0),
+                originalSize: 0,
+                compressedSize: 0,
+                ratio: 0,
+                percentSaved: 0,
+            };
+        }
+
+        const params = {
+            [constants.BROTLI_PARAM_QUALITY]: options?.quality ?? constants.BROTLI_MAX_QUALITY,
+            [constants.BROTLI_PARAM_MODE]: options?.mode === 'text' ? constants.BROTLI_MODE_TEXT : constants.BROTLI_MODE_GENERIC,
+        };
+
+        const compressed = brotliCompressSync(text, { params });
+        const compressedSize = compressed.length;
+        const ratio = compressedSize / originalSize;
+        const percentSaved = (1 - ratio) * 100;
+
+        return {
+            compressed,
+            originalSize,
+            compressedSize,
+            ratio,
+            percentSaved,
+        };
     }
 
-    // Quick sample compression to check ratio
-    const sample = text.slice(0, Math.min(text.length, 5000));
-    const result = this.compress(sample, { quality: 4 }); // Use lower quality for quick test
+    public decompress(buffer: Buffer): string {
+        if (!buffer || buffer.length === 0) {
+            return '';
+        }
+        return brotliDecompressSync(buffer).toString('utf8');
+    }
 
-    // Only compress if we get at least 20% reduction
-    return result.percentSaved >= 20;
-  }
+    public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): CompressionResult & { compressed: string } {
+        const result = this.compress(text, options);
+        return {
+            ...result,
+            compressed: result.compressed.toString('base64'),
+        };
+    }
 
-  /**
-   * Batch compress multiple texts
-   */
-  compressBatch(
-    texts: string[],
-    options?: CompressionOptions
-  ): Array<{
-    index: number;
-    compressed: Buffer;
-    originalSize: number;
-    compressedSize: number;
-    ratio: number;
-  }> {
-    return texts.map((text, index) => {
-      const result = this.compress(text, options);
-      return {
-        index,
-        compressed: result.compressed,
-        originalSize: result.originalSize,
-        compressedSize: result.compressedSize,
-        ratio: result.ratio,
-      };
-    });
-  }
+    public decompressFromBase64(base64: string): string {
+        const buffer = Buffer.from(base64, 'base64');
+        return this.decompress(buffer);
+    }
 
-  /**
-   * Get compression statistics for text
-   */
-  getCompressionStats(text: string): {
-    uncompressed: number;
-    compressed: number;
-    ratio: number;
-    percentSaved: number;
-    recommended: boolean;
-  } {
-    const result = this.compress(text);
+    public compressBatch(texts: string[]): (CompressionResult & { index: number; })[] {
+        return texts.map((text, index) => ({
+            ...this.compress(text),
+            index,
+        }));
+    }
 
-    return {
-      uncompressed: result.originalSize,
-      compressed: result.compressedSize,
-      ratio: result.ratio,
-      percentSaved: result.percentSaved,
-      recommended: this.shouldCompress(text),
-    };
-  }
+    public shouldCompress(text: string, minSize: number = 500): boolean {
+        if (Buffer.byteLength(text, 'utf8') < minSize) {
+            return false;
+        }
+        const stats = this.getCompressionStats(text);
+        return stats.percentSaved >= 20;
+    }
 
-  /**
-   * Convert mode string to Brotli constant
-   */
-  private getModeConstant(mode?: 'text' | 'font' | 'generic'): number {
-    switch (mode) {
-      case 'text':
-        return constants.BROTLI_MODE_TEXT;
-      case 'font':
-        return constants.BROTLI_MODE_FONT;
-      default:
-        return constants.BROTLI_MODE_GENERIC;
+    public getCompressionStats(text: string): { uncompressed: number; compressed: number; ratio: number; percentSaved: number; recommended: boolean; } {
+        const result = this.compress(text);
+        const recommended = result.originalSize >= 500 && result.percentSaved >= 20;
+        return {
+            uncompressed: result.originalSize,
+            compressed: result.compressedSize,
+            ratio: result.ratio,
+            percentSaved: result.percentSaved,
+            recommended: recommended,
+        };
     }
-  }
 }
diff --git a/src/server/index.ts b/src/server/index.ts
index 7c23370..8c09500 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -126,12 +126,11 @@ import {
   getMcpServerAnalyticsTool,
   GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION,
 } from '../tools/analytics/get-mcp-server-analytics.js';
-import {
-  getExportAnalyticsTool,
-  EXPORT_ANALYTICS_TOOL_DEFINITION,
-} from '../tools/analytics/export-analytics.js';
+import { getExportAnalyticsTool, EXPORT_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/export-analytics.js';
+import { OptimizationStorageTool } from '../tools/optimization-storage-tool.js';
 import { AnalyticsManager } from '../analytics/analytics-manager.js';
 
+
 // API & Database tools
 import {
   getSmartSql,
@@ -369,6 +368,43 @@ const getHookAnalytics = getHookAnalyticsTool(analyticsManager);
 const getActionAnalytics = getActionAnalyticsTool(analyticsManager);
 const getMcpServerAnalytics = getMcpServerAnalyticsTool(analyticsManager);
 const exportAnalytics = getExportAnalyticsTool(analyticsManager);
+const optimizationStorage = new OptimizationStorageTool();
+
+const OPTIMIZATION_STORAGE_TOOL_DEFINITION = {
+    name: optimizationStorage.name,
+    description: optimizationStorage.description,
+    inputSchema: {
+        type: 'object',
+        properties: {
+            operation: {
+                type: 'string',
+                enum: ['store', 'retrieve'],
+                description: 'The operation to perform.',
+            },
+            originalTextHash: {
+                type: 'string',
+                description: 'The SHA256 hash of the original text.',
+            },
+            optimizedText: {
+                type: 'string',
+                description: 'The base64 encoded optimized text (for store operation).',
+            },
+            originalTokens: {
+                type: 'number',
+                description: 'The number of tokens in the original text (for store operation).',
+            },
+            optimizedTokens: {
+                type: 'number',
+                description: 'The number of tokens in the optimized text (for store operation).',
+            },
+            tokensSaved: {
+                type: 'number',
+                description: 'The number of tokens saved (for store operation).',
+            },
+        },
+        required: ['operation', 'originalTextHash'],
+    },
+};
 
 // Create MCP server
 const server = new Server(
@@ -655,6 +691,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
       GET_ACTION_ANALYTICS_TOOL_DEFINITION,
       GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION,
       EXPORT_ANALYTICS_TOOL_DEFINITION,
+      OPTIMIZATION_STORAGE_TOOL_DEFINITION,
     ],
   };
 });
@@ -1983,7 +2020,19 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'smart_grep': {
         const { pattern, ...options } = args as any;
-        const result = await runSmartGrep(pattern, options);
+        const result = await smartGrep.run(pattern, options);
+        return {
+          content: [
+            {
+              type: 'text',
+              text: JSON.stringify(result, null, 2),
+            },
+          ],
+        };
+      }
+
+      case 'optimization_storage': {
+        const result = await optimizationStorage.invoke({} as TurnContext, { tool_name: name, tool_input: args });
         return {
           content: [
             {
diff --git a/src/tools/optimization-storage-tool.ts b/src/tools/optimization-storage-tool.ts
index c4b83cd..9949d1e 100644
--- a/src/tools/optimization-storage-tool.ts
+++ b/src/tools/optimization-storage-tool.ts
@@ -1,45 +1,71 @@
-import { Tool, ToolInvocation } from '@microsoft/teams-ai';
-import { TurnContext } from 'botbuilder';
-import { SqliteOptimizationStorage } from '../analytics/optimization-storage';
+import { Tool, ToolInvocation, TurnContext } from '../types/turn-context';
+import { SqliteOptimizationStorage, OptimizationResult } from '../analytics/optimization-storage';
 
 export class OptimizationStorageTool implements Tool {
-    private readonly storage: SqliteOptimizationStorage;
+    public readonly name = 'optimization_storage';
+    public readonly description = 'A tool for storing and retrieving compressed optimization results.';
+
+    private storage: SqliteOptimizationStorage;
 
     constructor() {
         this.storage = new SqliteOptimizationStorage();
+        this.storage.initializeDatabase();
+    }
+
+    public async invoke(context: TurnContext, invocation: ToolInvocation): Promise<any> {
+        const operation = invocation.arguments?.operation;
+
+        if (operation === 'store') {
+            return this.store(invocation.arguments);
+        } else if (operation === 'retrieve') {
+            return this.retrieve(invocation.arguments);
+        } else {
+            return { error: `Unknown operation: ${operation}` };
+        }
+    }
+
+    private async store(args: any): Promise<any> {
+        try {
+            const { originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved } = args;
+            if (!originalTextHash || !optimizedText || originalTokens === undefined || optimizedTokens === undefined || tokensSaved === undefined) {
+                return { error: 'Missing required arguments for store operation.' };
+            }
+
+            const optimizationResult: OptimizationResult = {
+                originalTextHash,
+                optimizedText: Buffer.from(optimizedText, 'base64').toString('utf8'),
+                originalTokens,
+                optimizedTokens,
+                tokensSaved
+            };
+
+            await this.storage.save(optimizationResult);
+            return { success: true };
+        } catch (error) {
+            return { error: `Failed to store optimization result: ${error.message}` };
+        }
     }
 
-    name = 'optimization_storage';
-    description = 'A tool for storing and retrieving optimization results.';
-
-    async invoke(context: TurnContext, invocation: ToolInvocation): Promise<any> {
-        const { operation, originalTextHash, optimizedText } = invocation.data;
-
-        switch (operation) {
-            case 'store':
-                if (!originalTextHash || !optimizedText) {
-                    return { error: 'Missing required parameters for store operation.' };
-                }
-                await this.storage.save({
-                    originalTextHash,
-                    optimizedText: Buffer.from(optimizedText, 'base64'),
-                    compressionAlgorithm: 'gzip',
-                });
-                return { success: true };
-            case 'retrieve':
-                if (!originalTextHash) {
-                    return { error: 'Missing required parameters for retrieve operation.' };
-                }
-                const result = await this.storage.get(originalTextHash);
-                if (result) {
-                    return {
-                        ...result,
-                        optimizedText: result.optimizedText.toString('base64'),
-                    };
-                }
-                return { success: false, message: 'No result found for the given hash.' };
-            default:
-                return { error: `Unknown operation: ${operation}` };
+    private async retrieve(args: any): Promise<any> {
+        try {
+            const { originalTextHash } = args;
+            if (!originalTextHash) {
+                return { error: 'Missing required argument for retrieve operation: originalTextHash' };
+            }
+
+            const result = await this.storage.get(originalTextHash);
+
+            if (result) {
+                return {
+                    success: true,
+                    ...result,
+                    optimizedText: Buffer.from(result.optimizedText, 'utf8').toString('base64')
+                };
+            } else {
+                return { success: false, message: 'Not found' };
+            }
+        } catch (error) {
+            return { error: `Failed to retrieve optimization result: ${error.message}` };
         }
     }
 }
diff --git a/tests/benchmarks/results.json b/tests/benchmarks/results.json
index b54d5be..dd72b7a 100644
--- a/tests/benchmarks/results.json
+++ b/tests/benchmarks/results.json
@@ -1,314 +1,314 @@
 [
   {
     "operation": "token-count-small",
-    "avgDuration": 0.21655369999999988,
-    "minDuration": 0.1389,
-    "maxDuration": 6.4655,
-    "p50": 0.1805,
-    "p90": 0.2406,
-    "p95": 0.2933,
-    "p99": 0.7547,
-    "throughput": 4617.792261226664,
-    "memoryUsed": 643496
+    "avgDuration": 0.3614560000000002,
+    "minDuration": 0.1311,
+    "maxDuration": 8.1673,
+    "p50": 0.1678,
+    "p90": 0.3182,
+    "p95": 1.7474,
+    "p99": 4.338,
+    "throughput": 2766.5884644327366,
+    "memoryUsed": 653224
   },
   {
     "operation": "token-count-medium",
-    "avgDuration": 1.3377186000000005,
-    "minDuration": 0.8123,
-    "maxDuration": 18.5582,
-    "p50": 1.0083,
-    "p90": 1.5469,
-    "p95": 3.0017,
-    "p99": 9.8616,
-    "throughput": 747.5413737986446,
-    "memoryUsed": 235800
+    "avgDuration": 1.9391590000000005,
+    "minDuration": 0.7915,
+    "maxDuration": 8.2937,
+    "p50": 1.1356,
+    "p90": 4.2806,
+    "p95": 5.0478,
+    "p99": 6.6412,
+    "throughput": 515.6874707025054,
+    "memoryUsed": 227808
   },
   {
     "operation": "token-count-large",
-    "avgDuration": 21.130661,
-    "minDuration": 15.5,
-    "maxDuration": 61.1893,
-    "p50": 19.1065,
-    "p90": 27.2373,
-    "p95": 29.4197,
-    "p99": 61.1893,
-    "throughput": 47.32459623482673,
-    "memoryUsed": 123872
+    "avgDuration": 30.857169999999996,
+    "minDuration": 16.9689,
+    "maxDuration": 62.7117,
+    "p50": 28.8884,
+    "p90": 44.6519,
+    "p95": 52.7573,
+    "p99": 62.7117,
+    "throughput": 32.40737890091671,
+    "memoryUsed": 163384
   },
   {
     "operation": "token-count-batch",
-    "avgDuration": 5.449602000000001,
-    "minDuration": 3.8459,
-    "maxDuration": 13.5479,
-    "p50": 4.8888,
-    "p90": 7.3654,
-    "p95": 9.2773,
-    "p99": 13.5479,
-    "throughput": 183.49963905620996,
-    "memoryUsed": 418480
+    "avgDuration": 8.504128,
+    "minDuration": 3.7843,
+    "maxDuration": 25.0622,
+    "p50": 6.9115,
+    "p90": 14.5447,
+    "p95": 17.9337,
+    "p99": 25.0622,
+    "throughput": 117.58995160938312,
+    "memoryUsed": 418424
   },
   {
     "operation": "token-estimate",
-    "avgDuration": 0.0012522999999999961,
-    "minDuration": 0.001,
-    "maxDuration": 0.0564,
-    "p50": 0.0012,
-    "p90": 0.0013,
-    "p95": 0.0014,
-    "p99": 0.0018,
-    "throughput": 798530.7035055521,
-    "memoryUsed": 149920
+    "avgDuration": 0.0014780999999999996,
+    "minDuration": 0.0011,
+    "maxDuration": 0.0561,
+    "p50": 0.0014,
+    "p90": 0.0016,
+    "p95": 0.0017,
+    "p99": 0.0021,
+    "throughput": 676544.2121642651,
+    "memoryUsed": 149912
   },
   {
     "operation": "compress-small",
-    "avgDuration": 0.9627775000000005,
-    "minDuration": 0.426,
-    "maxDuration": 28.6613,
-    "p50": 0.5542,
-    "p90": 0.8703,
-    "p95": 1.9429,
-    "p99": 13.3281,
-    "throughput": 1038.661580687126,
-    "memoryUsed": -2487288
+    "avgDuration": 1.2691879999999993,
+    "minDuration": 0.4314,
+    "maxDuration": 24.3388,
+    "p50": 0.5745,
+    "p90": 3.0667,
+    "p95": 4.4366,
+    "p99": 9.2609,
+    "throughput": 787.9053379010835,
+    "memoryUsed": -4992648
   },
   {
     "operation": "compress-medium",
-    "avgDuration": 0.7440725999999998,
-    "minDuration": 0.4942,
-    "maxDuration": 14.3456,
-    "p50": 0.6042,
-    "p90": 0.8371,
-    "p95": 1.1362,
-    "p99": 4.4296,
-    "throughput": 1343.9548775213605,
-    "memoryUsed": -1654048
+    "avgDuration": 1.5216334000000002,
+    "minDuration": 0.5063,
+    "maxDuration": 13.7982,
+    "p50": 0.6649,
+    "p90": 3.5498,
+    "p95": 6.0332,
+    "p99": 9.4093,
+    "throughput": 657.1885186011294,
+    "memoryUsed": -1948624
   },
   {
     "operation": "compress-large",
-    "avgDuration": 2.7067430000000003,
-    "minDuration": 1.9602,
-    "maxDuration": 12.7897,
-    "p50": 2.2941,
-    "p90": 3.2423,
-    "p95": 4.5883,
-    "p99": 12.7897,
-    "throughput": 369.4477089254503,
-    "memoryUsed": 337712
+    "avgDuration": 4.812509,
+    "minDuration": 2.2623,
+    "maxDuration": 13.9499,
+    "p50": 4.4611,
+    "p90": 7.8466,
+    "p95": 9.0442,
+    "p99": 13.9499,
+    "throughput": 207.79181919451992,
+    "memoryUsed": 359296
   },
   {
     "operation": "decompress",
-    "avgDuration": 0.031263400000000004,
-    "minDuration": 0.018,
-    "maxDuration": 2.1778,
-    "p50": 0.0243,
-    "p90": 0.0363,
-    "p95": 0.0642,
-    "p99": 0.0877,
-    "throughput": 31986.28428130018,
-    "memoryUsed": 3302040
+    "avgDuration": 0.05447509999999999,
+    "minDuration": 0.0181,
+    "maxDuration": 3.3958,
+    "p50": 0.0238,
+    "p90": 0.0394,
+    "p95": 0.0754,
+    "p99": 2.0722,
+    "throughput": 18357.01081778648,
+    "memoryUsed": 3294192
   },
   {
     "operation": "compress-base64",
-    "avgDuration": 0.9150339999999996,
-    "minDuration": 0.5028,
-    "maxDuration": 16.1137,
-    "p50": 0.6502,
-    "p90": 0.9426,
-    "p95": 1.6786,
-    "p99": 9.5861,
-    "throughput": 1092.855566022684,
-    "memoryUsed": -4893280
+    "avgDuration": 1.3982006000000007,
+    "minDuration": 0.5093,
+    "maxDuration": 17.5377,
+    "p50": 0.6728,
+    "p90": 3.5295,
+    "p95": 4.8434,
+    "p99": 9.593,
+    "throughput": 715.2049569997321,
+    "memoryUsed": -4899848
   },
   {
     "operation": "compress-quality-1",
-    "avgDuration": 0.0402535,
-    "minDuration": 0.0182,
-    "maxDuration": 1.2449,
-    "p50": 0.025,
-    "p90": 0.0459,
-    "p95": 0.0879,
-    "p99": 0.3411,
-    "throughput": 24842.56027426187,
-    "memoryUsed": 568648
+    "avgDuration": 0.08870700000000004,
+    "minDuration": 0.0176,
+    "maxDuration": 4.9948,
+    "p50": 0.0227,
+    "p90": 0.0636,
+    "p95": 0.0849,
+    "p99": 3.4525,
+    "throughput": 11273.06751440134,
+    "memoryUsed": 575952
   },
   {
     "operation": "compress-quality-11",
-    "avgDuration": 1.4276415,
-    "minDuration": 0.7075,
-    "maxDuration": 19.9989,
-    "p50": 0.807,
-    "p90": 1.2711,
-    "p95": 6.5825,
-    "p99": 18.9334,
-    "throughput": 700.4559618083391,
-    "memoryUsed": 560544
+    "avgDuration": 1.7096004999999996,
+    "minDuration": 0.6727,
+    "maxDuration": 8.1428,
+    "p50": 0.9267,
+    "p90": 3.7587,
+    "p95": 5.583,
+    "p99": 7.3277,
+    "throughput": 584.9319767980883,
+    "memoryUsed": 566880
   },
   {
     "operation": "cache-write",
-    "avgDuration": 0.28219179999999994,
-    "minDuration": 0.1069,
-    "maxDuration": 16.9523,
-    "p50": 0.1543,
-    "p90": 0.2669,
-    "p95": 0.3756,
-    "p99": 2.9096,
-    "throughput": 3543.689079555112,
-    "memoryUsed": 646560
+    "avgDuration": 0.6586279000000003,
+    "minDuration": 0.1024,
+    "maxDuration": 14.5587,
+    "p50": 0.1522,
+    "p90": 1.0068,
+    "p95": 4.7546,
+    "p99": 7.3433,
+    "throughput": 1518.3079854345672,
+    "memoryUsed": 640856
   },
   {
     "operation": "cache-read-memory",
-    "avgDuration": 0.25163070000000015,
-    "minDuration": 0.0793,
-    "maxDuration": 28.8506,
-    "p50": 0.1203,
-    "p90": 0.1493,
-    "p95": 0.2349,
-    "p99": 4.1812,
-    "throughput": 3974.077884773199,
-    "memoryUsed": 478016
+    "avgDuration": 0.4997243,
+    "minDuration": 0.079,
+    "maxDuration": 15.3762,
+    "p50": 0.1012,
+    "p90": 0.2858,
+    "p95": 4.201,
+    "p99": 8.698,
+    "throughput": 2001.1034084194023,
+    "memoryUsed": 478008
   },
   {
     "operation": "cache-read-disk",
-    "avgDuration": 0.45986899999999997,
-    "minDuration": 0.0884,
-    "maxDuration": 39.4532,
-    "p50": 0.1262,
-    "p90": 0.1936,
-    "p95": 0.2933,
-    "p99": 19.9866,
-    "throughput": 2174.532312462897,
-    "memoryUsed": 297264
+    "avgDuration": 0.47186479999999986,
+    "minDuration": 0.0742,
+    "maxDuration": 12.5767,
+    "p50": 0.1039,
+    "p90": 0.2734,
+    "p95": 4.0729,
+    "p99": 8.0254,
+    "throughput": 2119.2511075206294,
+    "memoryUsed": 297344
   },
   {
     "operation": "cache-delete",
-    "avgDuration": 0.5386263000000001,
-    "minDuration": 0.0733,
-    "maxDuration": 99.0395,
-    "p50": 0.1153,
-    "p90": 0.1729,
-    "p95": 0.2314,
-    "p99": 5.5755,
-    "throughput": 1856.5747717851873,
-    "memoryUsed": 444360
+    "avgDuration": 0.37161569999999977,
+    "minDuration": 0.0764,
+    "maxDuration": 18.4903,
+    "p50": 0.1064,
+    "p90": 0.2381,
+    "p95": 1.8399,
+    "p99": 5.8565,
+    "throughput": 2690.951970005575,
+    "memoryUsed": 442720
   },
   {
     "operation": "cache-stats",
-    "avgDuration": 0.32004450000000007,
-    "minDuration": 0.1652,
-    "maxDuration": 48.5593,
-    "p50": 0.2046,
-    "p90": 0.2563,
-    "p95": 0.3068,
-    "p99": 2.1103,
-    "throughput": 3124.5654901115304,
-    "memoryUsed": 765656
+    "avgDuration": 0.6591104,
+    "minDuration": 0.1651,
+    "maxDuration": 10.2148,
+    "p50": 0.197,
+    "p90": 1.7847,
+    "p95": 4.4389,
+    "p99": 7.1797,
+    "throughput": 1517.1965121472822,
+    "memoryUsed": 770976
   },
   {
     "operation": "metrics-record",
-    "avgDuration": 0.0027574999999999657,
-    "minDuration": 0.0019,
-    "maxDuration": 0.1329,
-    "p50": 0.0025,
-    "p90": 0.003,
-    "p95": 0.0033,
-    "p99": 0.0056,
-    "throughput": 362647.32547597913,
-    "memoryUsed": 459872
+    "avgDuration": 0.01021149999999998,
+    "minDuration": 0.0017,
+    "maxDuration": 4.0691,
+    "p50": 0.0022,
+    "p90": 0.0028,
+    "p95": 0.003,
+    "p99": 0.0075,
+    "throughput": 97928.80575821397,
+    "memoryUsed": 459840
   },
   {
     "operation": "metrics-cache-stats",
-    "avgDuration": 0.5644068000000001,
-    "minDuration": 0.0897,
-    "maxDuration": 113.9726,
-    "p50": 0.1075,
-    "p90": 0.1357,
-    "p95": 0.1805,
-    "p99": 18.041,
-    "throughput": 1771.7717079241424,
-    "memoryUsed": -5707968
+    "avgDuration": 0.5884749999999999,
+    "minDuration": 0.0814,
+    "maxDuration": 101.3263,
+    "p50": 0.0959,
+    "p90": 0.146,
+    "p95": 2.1334,
+    "p99": 8.5735,
+    "throughput": 1699.3075321806366,
+    "memoryUsed": -5767600
   },
   {
     "operation": "metrics-breakdown",
-    "avgDuration": 2.7876345,
-    "minDuration": 0.6644,
-    "maxDuration": 53.726,
-    "p50": 0.8498,
-    "p90": 2.1808,
-    "p95": 24.6884,
-    "p99": 40.5231,
-    "throughput": 358.7270856347918,
-    "memoryUsed": 4166392
+    "avgDuration": 3.030816000000001,
+    "minDuration": 0.6429,
+    "maxDuration": 14.331,
+    "p50": 0.9025,
+    "p90": 8.4685,
+    "p95": 10.3432,
+    "p99": 12.5062,
+    "throughput": 329.94414705478647,
+    "memoryUsed": 3432896
   },
   {
     "operation": "metrics-percentiles",
-    "avgDuration": 0.19757700000000003,
-    "minDuration": 0.0665,
-    "maxDuration": 22.145,
-    "p50": 0.0782,
-    "p90": 0.1113,
-    "p95": 0.1406,
-    "p99": 0.3571,
-    "throughput": 5061.3178659459345,
-    "memoryUsed": 6890256
+    "avgDuration": 0.2575195,
+    "minDuration": 0.0656,
+    "maxDuration": 5.1485,
+    "p50": 0.0749,
+    "p90": 0.1152,
+    "p95": 0.2302,
+    "p99": 4.9353,
+    "throughput": 3883.201077976619,
+    "memoryUsed": 6877416
   },
   {
     "operation": "e2e-optimization",
-    "avgDuration": 4.089156000000001,
-    "minDuration": 1.7855,
-    "maxDuration": 36.6479,
-    "p50": 2.4161,
-    "p90": 7.4475,
-    "p95": 13.5108,
-    "p99": 36.6479,
-    "throughput": 244.54924194625977,
-    "memoryUsed": 843760
+    "avgDuration": 10.268875999999999,
+    "minDuration": 2.0887,
+    "maxDuration": 20.2826,
+    "p50": 9.5857,
+    "p90": 16.0762,
+    "p95": 18.8724,
+    "p99": 20.2826,
+    "throughput": 97.38164137925126,
+    "memoryUsed": 848648
   },
   {
     "operation": "e2e-cache-hit",
-    "avgDuration": 0.5933224999999999,
-    "minDuration": 0.1019,
-    "maxDuration": 55.8155,
-    "p50": 0.1439,
-    "p90": 0.2178,
-    "p95": 0.3251,
-    "p99": 21.812,
-    "throughput": 1685.424031618555,
-    "memoryUsed": -14126360
+    "avgDuration": 0.5115652000000002,
+    "minDuration": 0.0905,
+    "maxDuration": 18.1006,
+    "p50": 0.1224,
+    "p90": 0.3575,
+    "p95": 3.4573,
+    "p99": 8.1231,
+    "throughput": 1954.785040108279,
+    "memoryUsed": 2419984
   },
   {
     "operation": "regression-token-count",
-    "avgDuration": 1.0176798,
-    "minDuration": 0.4497,
-    "maxDuration": 19.4484,
-    "p50": 0.5622,
-    "p90": 0.7359,
-    "p95": 3.6875,
-    "p99": 15.1693,
-    "throughput": 982.6273450647246,
-    "memoryUsed": 263416
+    "avgDuration": 1.9268184000000008,
+    "minDuration": 0.455,
+    "maxDuration": 17.2354,
+    "p50": 0.6255,
+    "p90": 5.6609,
+    "p95": 6.9407,
+    "p99": 12.8987,
+    "throughput": 518.9902691400496,
+    "memoryUsed": 263464
   },
   {
     "operation": "regression-compress",
-    "avgDuration": 1.1226146666666665,
-    "minDuration": 0.7638,
-    "maxDuration": 28.5129,
-    "p50": 0.8377,
-    "p90": 1.0199,
-    "p95": 1.281,
-    "p99": 10.5418,
-    "throughput": 890.7776013378293,
-    "memoryUsed": -5355728
+    "avgDuration": 5.232439333333333,
+    "minDuration": 0.7827,
+    "maxDuration": 61.2683,
+    "p50": 2.3238,
+    "p90": 12.9448,
+    "p95": 14.6136,
+    "p99": 17.48,
+    "throughput": 191.11545042280474,
+    "memoryUsed": -4833328
   },
   {
     "operation": "regression-cache",
-    "avgDuration": 0.4148695999999999,
-    "minDuration": 0.1768,
-    "maxDuration": 17.4546,
-    "p50": 0.2465,
-    "p90": 0.3658,
-    "p95": 0.5651,
-    "p99": 5.4624,
-    "throughput": 2410.395941278899,
-    "memoryUsed": -447896
+    "avgDuration": 2.2698532000000005,
+    "minDuration": 0.1812,
+    "maxDuration": 44.9064,
+    "p50": 0.2844,
+    "p90": 10.6146,
+    "p95": 13.1184,
+    "p99": 19.7883,
+    "throughput": 440.5571250158379,
+    "memoryUsed": -467608
   }
 ]
\ No newline at end of file
diff --git a/tests/unit/cache-engine.test.ts b/tests/unit/cache-engine.test.ts
index 8a99374..f8cabc2 100644
--- a/tests/unit/cache-engine.test.ts
+++ b/tests/unit/cache-engine.test.ts
@@ -48,7 +48,7 @@ describe('CacheEngine', () => {
     cache = new CacheEngine(testDbPath, 100);
   });
 
-  afterEach(() => {
+  afterEach(async () => {
     // Restore original environment variable
     if (originalEnv !== undefined) {
       process.env.TOKEN_OPTIMIZER_CACHE_DIR = originalEnv;
@@ -58,6 +58,7 @@ describe('CacheEngine', () => {
 
     // Clean up
     cache.close();
+    await new Promise(resolve => setTimeout(resolve, 100)); // Add a small delay
     if (fs.existsSync(testDbPath)) {
       fs.unlinkSync(testDbPath);
     }

From 90d946e99ffcb8469c31906777c4995b88ae885b Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:22:08 -0400
Subject: [PATCH 09/26] fix: align optimization-storage with project patterns
 (better-sqlite3, mcp tool shape)

The original implementation of the optimization-storage feature imported
sqlite3/sqlite (not installed) and used a TurnContext tool shape that
does not exist in this codebase, breaking `tsc --noEmit`. This commit:

- Rewrites SqliteOptimizationStorage to use better-sqlite3 (already a
  production dep) with WAL mode and a hash index.
- Rewrites OptimizationStorageTool to match the project's run(options)
  MCP tool pattern and exports an input schema with items-complete
  array fields.
- Fixes compression-engine.ts:compressToBase64 return type (was
  `CompressionResult & { compressed: string }`, producing Buffer & string
  for the overridden property).
- Swaps the server's smart_grep case from the non-existent smartGrep
  instance to the runSmartGrep CLI function.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/analytics/optimization-storage.ts  |  81 ++++++++-----
 src/core/compression-engine.ts         |   7 +-
 src/server/index.ts                    |   4 +-
 src/tools/optimization-storage-tool.ts | 158 ++++++++++++++++++-------
 4 files changed, 171 insertions(+), 79 deletions(-)

diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts
index 6952ce6..cd6991f 100644
--- a/src/analytics/optimization-storage.ts
+++ b/src/analytics/optimization-storage.ts
@@ -1,6 +1,5 @@
-import sqlite3 from 'sqlite3';
-import { open, Database } from 'sqlite';
-import { CompressionEngine } from '../core/compression-engine';
+import Database from 'better-sqlite3';
+import { CompressionEngine } from '../core/compression-engine.js';
 
 export interface OptimizationResult {
     originalTextHash: string;
@@ -11,22 +10,19 @@ export interface OptimizationResult {
 }
 
 export class SqliteOptimizationStorage {
-    private db: Database<sqlite3.Database, sqlite3.Statement>;
-    private dbPath: string;
-    private compressionEngine: CompressionEngine;
+    private db: Database.Database | null = null;
+    private readonly dbPath: string;
+    private readonly compressionEngine: CompressionEngine;
 
     constructor(dbPath: string = './optimization.db') {
         this.dbPath = dbPath;
         this.compressionEngine = new CompressionEngine();
     }
 
-    public async initializeDatabase(): Promise<void> {
-        this.db = await open({
-            filename: this.dbPath,
-            driver: sqlite3.Database
-        });
-
-        await this.db.exec(`
+    public initializeDatabase(): void {
+        this.db = new Database(this.dbPath);
+        this.db.pragma('journal_mode = WAL');
+        this.db.exec(`
             CREATE TABLE IF NOT EXISTS optimization_results (
                 id INTEGER PRIMARY KEY AUTOINCREMENT,
                 original_text_hash TEXT NOT NULL UNIQUE,
@@ -37,43 +33,68 @@ export class SqliteOptimizationStorage {
                 tokens_saved INTEGER NOT NULL,
                 created_at DATETIME DEFAULT CURRENT_TIMESTAMP
             );
+            CREATE INDEX IF NOT EXISTS idx_optimization_hash
+                ON optimization_results(original_text_hash);
         `);
     }
 
-    public async save(entry: OptimizationResult): Promise<void> {
-        const compressedOptimizedText = this.compressionEngine.compress(entry.optimizedText);
+    private requireDb(): Database.Database {
+        if (!this.db) {
+            throw new Error('Optimization storage database is not initialized. Call initializeDatabase() first.');
+        }
+        return this.db;
+    }
+
+    public save(entry: OptimizationResult): void {
+        const db = this.requireDb();
+        const compressed = this.compressionEngine.compress(entry.optimizedText);
 
-        await this.db.run(
-            `INSERT INTO optimization_results (original_text_hash, optimized_text_compressed, compression_algorithm, original_tokens, optimized_tokens, tokens_saved)
-             VALUES (?, ?, ?, ?, ?, ?)`, 
-            [entry.originalTextHash, compressedOptimizedText.compressed, 'brotli', entry.originalTokens, entry.optimizedTokens, entry.tokensSaved]
+        db.prepare(
+            `INSERT OR REPLACE INTO optimization_results
+             (original_text_hash, optimized_text_compressed, compression_algorithm,
+              original_tokens, optimized_tokens, tokens_saved)
+             VALUES (?, ?, ?, ?, ?, ?)`
+        ).run(
+            entry.originalTextHash,
+            compressed.compressed,
+            'brotli',
+            entry.originalTokens,
+            entry.optimizedTokens,
+            entry.tokensSaved
         );
     }
 
-    public async get(originalTextHash: string): Promise<OptimizationResult | null> {
-        const row = await this.db.get(
-            'SELECT optimized_text_compressed, original_tokens, optimized_tokens, tokens_saved FROM optimization_results WHERE original_text_hash = ?',
-            originalTextHash
-        );
+    public get(originalTextHash: string): OptimizationResult | null {
+        const db = this.requireDb();
+        const row = db.prepare(
+            `SELECT optimized_text_compressed, original_tokens, optimized_tokens, tokens_saved
+             FROM optimization_results WHERE original_text_hash = ?`
+        ).get(originalTextHash) as
+            | {
+                  optimized_text_compressed: Buffer;
+                  original_tokens: number;
+                  optimized_tokens: number;
+                  tokens_saved: number;
+              }
+            | undefined;
 
         if (!row) {
             return null;
         }
 
-        const optimizedText = this.compressionEngine.decompress(row.optimized_text_compressed);
-
         return {
             originalTextHash,
-            optimizedText,
+            optimizedText: this.compressionEngine.decompress(row.optimized_text_compressed),
             originalTokens: row.original_tokens,
             optimizedTokens: row.optimized_tokens,
-            tokensSaved: row.tokens_saved
+            tokensSaved: row.tokens_saved,
         };
     }
 
-    public async close(): Promise<void> {
+    public close(): void {
         if (this.db) {
-            await this.db.close();
+            this.db.close();
+            this.db = null;
         }
     }
 }
diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts
index e7922cf..c9e7d17 100644
--- a/src/core/compression-engine.ts
+++ b/src/core/compression-engine.ts
@@ -47,10 +47,13 @@ export class CompressionEngine {
         return brotliDecompressSync(buffer).toString('utf8');
     }
 
-    public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): CompressionResult & { compressed: string } {
+    public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): Omit<CompressionResult, 'compressed'> & { compressed: string } {
         const result = this.compress(text, options);
         return {
-            ...result,
+            originalSize: result.originalSize,
+            compressedSize: result.compressedSize,
+            ratio: result.ratio,
+            percentSaved: result.percentSaved,
             compressed: result.compressed.toString('base64'),
         };
     }
diff --git a/src/server/index.ts b/src/server/index.ts
index 8c09500..047e8eb 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -2020,7 +2020,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'smart_grep': {
         const { pattern, ...options } = args as any;
-        const result = await smartGrep.run(pattern, options);
+        const result = await runSmartGrep(pattern, options);
         return {
           content: [
             {
@@ -2032,7 +2032,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       }
 
       case 'optimization_storage': {
-        const result = await optimizationStorage.invoke({} as TurnContext, { tool_name: name, tool_input: args });
+        const result = optimizationStorage.run(args as any);
         return {
           content: [
             {
diff --git a/src/tools/optimization-storage-tool.ts b/src/tools/optimization-storage-tool.ts
index 9949d1e..5f9fe9e 100644
--- a/src/tools/optimization-storage-tool.ts
+++ b/src/tools/optimization-storage-tool.ts
@@ -1,71 +1,139 @@
-import { Tool, ToolInvocation, TurnContext } from '../types/turn-context';
-import { SqliteOptimizationStorage, OptimizationResult } from '../analytics/optimization-storage';
+import { SqliteOptimizationStorage, OptimizationResult } from '../analytics/optimization-storage.js';
 
-export class OptimizationStorageTool implements Tool {
+export type OptimizationStorageOperation = 'store' | 'retrieve';
+
+export interface OptimizationStorageOptions {
+    operation: OptimizationStorageOperation;
+    originalTextHash?: string;
+    optimizedText?: string;
+    originalTokens?: number;
+    optimizedTokens?: number;
+    tokensSaved?: number;
+}
+
+export interface OptimizationStorageResponse {
+    success: boolean;
+    error?: string;
+    result?: OptimizationResult;
+}
+
+export class OptimizationStorageTool {
     public readonly name = 'optimization_storage';
-    public readonly description = 'A tool for storing and retrieving compressed optimization results.';
+    public readonly description =
+        'Persist and retrieve brotli-compressed optimization results keyed by text hash.';
 
-    private storage: SqliteOptimizationStorage;
+    private readonly storage: SqliteOptimizationStorage;
 
-    constructor() {
-        this.storage = new SqliteOptimizationStorage();
+    constructor(storage?: SqliteOptimizationStorage) {
+        this.storage = storage ?? new SqliteOptimizationStorage();
         this.storage.initializeDatabase();
     }
 
-    public async invoke(context: TurnContext, invocation: ToolInvocation): Promise<any> {
-        const operation = invocation.arguments?.operation;
-
-        if (operation === 'store') {
-            return this.store(invocation.arguments);
-        } else if (operation === 'retrieve') {
-            return this.retrieve(invocation.arguments);
-        } else {
-            return { error: `Unknown operation: ${operation}` };
+    public run(options: OptimizationStorageOptions): OptimizationStorageResponse {
+        switch (options.operation) {
+            case 'store':
+                return this.store(options);
+            case 'retrieve':
+                return this.retrieve(options);
+            default:
+                return {
+                    success: false,
+                    error: `Unknown operation: ${String((options as { operation: unknown }).operation)}`,
+                };
         }
     }
 
-    private async store(args: any): Promise<any> {
-        try {
-            const { originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved } = args;
-            if (!originalTextHash || !optimizedText || originalTokens === undefined || optimizedTokens === undefined || tokensSaved === undefined) {
-                return { error: 'Missing required arguments for store operation.' };
-            }
+    private store(options: OptimizationStorageOptions): OptimizationStorageResponse {
+        const { originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved } = options;
+
+        if (
+            !originalTextHash ||
+            !optimizedText ||
+            originalTokens === undefined ||
+            optimizedTokens === undefined ||
+            tokensSaved === undefined
+        ) {
+            return {
+                success: false,
+                error: 'Missing required arguments for store operation: originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved.',
+            };
+        }
 
-            const optimizationResult: OptimizationResult = {
+        try {
+            this.storage.save({
                 originalTextHash,
-                optimizedText: Buffer.from(optimizedText, 'base64').toString('utf8'),
+                optimizedText,
                 originalTokens,
                 optimizedTokens,
-                tokensSaved
-            };
-
-            await this.storage.save(optimizationResult);
+                tokensSaved,
+            });
             return { success: true };
         } catch (error) {
-            return { error: `Failed to store optimization result: ${error.message}` };
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: `Failed to store optimization result: ${message}` };
         }
     }
 
-    private async retrieve(args: any): Promise<any> {
-        try {
-            const { originalTextHash } = args;
-            if (!originalTextHash) {
-                return { error: 'Missing required argument for retrieve operation: originalTextHash' };
-            }
+    private retrieve(options: OptimizationStorageOptions): OptimizationStorageResponse {
+        const { originalTextHash } = options;
 
-            const result = await this.storage.get(originalTextHash);
+        if (!originalTextHash) {
+            return {
+                success: false,
+                error: 'Missing required argument for retrieve operation: originalTextHash.',
+            };
+        }
 
-            if (result) {
-                return {
-                    success: true,
-                    ...result,
-                    optimizedText: Buffer.from(result.optimizedText, 'utf8').toString('base64')
-                };
-            } else {
-                return { success: false, message: 'Not found' };
+        try {
+            const result = this.storage.get(originalTextHash);
+            if (!result) {
+                return { success: false, error: 'Not found' };
             }
+            return { success: true, result };
         } catch (error) {
-            return { error: `Failed to retrieve optimization result: ${error.message}` };
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: `Failed to retrieve optimization result: ${message}` };
         }
     }
+
+    public close(): void {
+        this.storage.close();
+    }
 }
+
+export const OPTIMIZATION_STORAGE_TOOL_DEFINITION = {
+    name: 'optimization_storage',
+    description:
+        'Persist and retrieve brotli-compressed optimization results keyed by text hash. Operations: store, retrieve.',
+    inputSchema: {
+        type: 'object',
+        properties: {
+            operation: {
+                type: 'string',
+                enum: ['store', 'retrieve'],
+                description: 'The storage operation to perform',
+            },
+            originalTextHash: {
+                type: 'string',
+                description: 'Stable hash of the original uncompressed text (required for both operations)',
+            },
+            optimizedText: {
+                type: 'string',
+                description: 'The optimized text to store (required for store)',
+            },
+            originalTokens: {
+                type: 'number',
+                description: 'Token count of the original text (required for store)',
+            },
+            optimizedTokens: {
+                type: 'number',
+                description: 'Token count after optimization (required for store)',
+            },
+            tokensSaved: {
+                type: 'number',
+                description: 'Tokens saved by optimization (required for store)',
+            },
+        },
+        required: ['operation'],
+    },
+};

From 1dba76f9246489f2c12d392ba08889622cc47ae6 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:22:44 -0400
Subject: [PATCH 10/26] feat(utils): add generic lrucache with ttl and stats
 (#125)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In-memory LRU cache for hot paths (token counts, file search results,
MCP correction responses). Separate from CacheEngine, which is the
persistent SQLite cache — LruCache is intended for process-local
memoization with O(1) eviction via Map insertion order.

- maxSize-bounded with LRU eviction
- Optional per-entry TTL with automatic lazy expiration
- prune() for proactive periodic cleanup
- stats() exposes hits / misses / evictions / expired / hitRate

Refs #125

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/utils/lru-cache.ts | 134 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 src/utils/lru-cache.ts

diff --git a/src/utils/lru-cache.ts b/src/utils/lru-cache.ts
new file mode 100644
index 0000000..65889f8
--- /dev/null
+++ b/src/utils/lru-cache.ts
@@ -0,0 +1,134 @@
+/**
+ * Generic LRU cache with optional per-entry TTL — addresses issue #125.
+ *
+ * Unlike CacheEngine (token-aware, persistent SQLite cache), this is an
+ * in-memory LRU intended for hot paths: file-search results, token counts,
+ * MCP correction responses, etc. Eviction is O(1) via Map insertion order.
+ */
+
+export interface LruCacheStats {
+    size: number;
+    maxSize: number;
+    hits: number;
+    misses: number;
+    evictions: number;
+    expired: number;
+    hitRate: number;
+}
+
+interface LruCacheEntry<V> {
+    value: V;
+    expiresAt: number;
+}
+
+export class LruCache<K, V> {
+    private readonly cache = new Map<K, LruCacheEntry<V>>();
+    private readonly maxSize: number;
+    private readonly defaultTtlMs: number;
+    private hits = 0;
+    private misses = 0;
+    private evictions = 0;
+    private expired = 0;
+
+    constructor(maxSize: number, defaultTtlMs: number = 0) {
+        if (maxSize <= 0) {
+            throw new Error(`LruCache maxSize must be > 0, got ${maxSize}`);
+        }
+        this.maxSize = maxSize;
+        this.defaultTtlMs = defaultTtlMs;
+    }
+
+    public get(key: K): V | undefined {
+        const entry = this.cache.get(key);
+        if (!entry) {
+            this.misses++;
+            return undefined;
+        }
+
+        if (entry.expiresAt !== 0 && Date.now() > entry.expiresAt) {
+            this.cache.delete(key);
+            this.expired++;
+            this.misses++;
+            return undefined;
+        }
+
+        // Refresh recency: remove + re-insert moves to the tail.
+        this.cache.delete(key);
+        this.cache.set(key, entry);
+        this.hits++;
+        return entry.value;
+    }
+
+    public set(key: K, value: V, ttlMs?: number): void {
+        if (this.cache.has(key)) {
+            this.cache.delete(key);
+        } else if (this.cache.size >= this.maxSize) {
+            const oldestKey = this.cache.keys().next().value as K | undefined;
+            if (oldestKey !== undefined) {
+                this.cache.delete(oldestKey);
+                this.evictions++;
+            }
+        }
+
+        const effectiveTtl = ttlMs ?? this.defaultTtlMs;
+        this.cache.set(key, {
+            value,
+            expiresAt: effectiveTtl > 0 ? Date.now() + effectiveTtl : 0,
+        });
+    }
+
+    public has(key: K): boolean {
+        const entry = this.cache.get(key);
+        if (!entry) {
+            return false;
+        }
+        if (entry.expiresAt !== 0 && Date.now() > entry.expiresAt) {
+            this.cache.delete(key);
+            this.expired++;
+            return false;
+        }
+        return true;
+    }
+
+    public delete(key: K): boolean {
+        return this.cache.delete(key);
+    }
+
+    public clear(): void {
+        this.cache.clear();
+    }
+
+    public get size(): number {
+        return this.cache.size;
+    }
+
+    /** Remove all entries whose TTL has expired. Returns the count removed. */
+    public prune(): number {
+        if (this.defaultTtlMs === 0) {
+            return 0;
+        }
+        const now = Date.now();
+        let removed = 0;
+        for (const [key, entry] of this.cache) {
+            if (entry.expiresAt !== 0 && now > entry.expiresAt) {
+                this.cache.delete(key);
+                removed++;
+            }
+        }
+        this.expired += removed;
+        return removed;
+    }
+
+    public stats(): LruCacheStats {
+        const total = this.hits + this.misses;
+        return {
+            size: this.cache.size,
+            maxSize: this.maxSize,
+            hits: this.hits,
+            misses: this.misses,
+            evictions: this.evictions,
+            expired: this.expired,
+            hitRate: total === 0 ? 0 : this.hits / total,
+        };
+    }
+}

From c3b6e3faee7b0d9d40e5fcd86e943ed8c388e82c Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:23:55 -0400
Subject: [PATCH 11/26] feat(tokenizers): add pluggable tokenizer framework
 (#124)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the long-standing character/4 heuristic and adds vendor-neutral
tokenization:

- ITokenizer interface — async countTokens + free() lifecycle.
- TiktokenTokenizer — wraps the tiktoken encoder; handles Claude family
  by mapping to the gpt-4 encoder (closest publicly available).
- HeuristicTokenizer — content-aware local fallback (code 2.5,
  json 2.8, markdown 3.5, text 4.0 chars/token), auto-detected via
  cheap regex + try-parse-json.
- TokenizerFactory.create(modelName) picks the best backend;
  createFromEnv reads CLAUDE_MODEL / ANTHROPIC_MODEL / OPENAI_MODEL /
  TOKEN_OPTIMIZER_MODEL.

Both tokenizer implementations memoize counts with the generic LruCache
(#125), so repeated inputs do not re-tokenize.

Refs #124, #123

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/core/tokenizers/heuristic-tokenizer.ts | 78 ++++++++++++++++++++++
 src/core/tokenizers/i-tokenizer.ts         | 19 ++++++
 src/core/tokenizers/tiktoken-tokenizer.ts  | 69 +++++++++++++++++++
 src/core/tokenizers/tokenizer-factory.ts   | 33 +++++++++
 4 files changed, 199 insertions(+)
 create mode 100644 src/core/tokenizers/heuristic-tokenizer.ts
 create mode 100644 src/core/tokenizers/i-tokenizer.ts
 create mode 100644 src/core/tokenizers/tiktoken-tokenizer.ts
 create mode 100644 src/core/tokenizers/tokenizer-factory.ts

diff --git a/src/core/tokenizers/heuristic-tokenizer.ts b/src/core/tokenizers/heuristic-tokenizer.ts
new file mode 100644
index 0000000..ef81931
--- /dev/null
+++ b/src/core/tokenizers/heuristic-tokenizer.ts
@@ -0,0 +1,78 @@
+import { ITokenizer } from './i-tokenizer.js';
+import { LruCache } from '../../utils/lru-cache.js';
+
+const DEFAULT_CACHE_SIZE = 500;
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+
+export enum ContentType {
+    Code = 'code',
+    Json = 'json',
+    Markdown = 'markdown',
+    Text = 'text',
+}
+
+/**
+ * Content-aware character-to-token ratios derived from tiktoken encoding
+ * on typical samples:
+ *
+ * | Content   | chars/token |
+ * | --------- | ----------- |
+ * | code      | 2.5         |
+ * | json      | 2.8         |
+ * | markdown  | 3.5         |
+ * | text      | 4.0         |
+ */
+const CHARS_PER_TOKEN: Readonly<Record<ContentType, number>> = {
+    [ContentType.Code]: 2.5,
+    [ContentType.Json]: 2.8,
+    [ContentType.Markdown]: 3.5,
+    [ContentType.Text]: 4.0,
+};
+
+const CODE_PATTERN = /\b(function|class|const|import|export|return|await|=>)\b/;
+const JSON_PATTERN = /^[\s\n]*[{[]/;
+const MARKDOWN_PATTERN = /^#{1,6}\s|^\s*[-*+]\s|\[[^\]]+\]\([^)]+\)/m;
+
+export class HeuristicTokenizer implements ITokenizer {
+    public readonly modelName: string;
+    private readonly cache: LruCache<string, number>;
+
+    constructor(modelName: string = 'heuristic', cache?: LruCache<string, number>) {
+        this.modelName = modelName;
+        this.cache = cache ?? new LruCache<string, number>(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS);
+    }
+
+    public async countTokens(text: string): Promise<number> {
+        const cached = this.cache.get(text);
+        if (cached !== undefined) {
+            return cached;
+        }
+        const contentType = HeuristicTokenizer.detectContentType(text);
+        const ratio = CHARS_PER_TOKEN[contentType];
+        const count = Math.ceil(text.length / ratio);
+        this.cache.set(text, count);
+        return count;
+    }
+
+    public free(): void {
+        // No native resources to free.
+    }
+
+    public static detectContentType(text: string): ContentType {
+        if (JSON_PATTERN.test(text)) {
+            try {
+                JSON.parse(text);
+                return ContentType.Json;
+            } catch {
+                // Not actually JSON; fall through to other detection.
+            }
+        }
+        if (CODE_PATTERN.test(text)) {
+            return ContentType.Code;
+        }
+        if (MARKDOWN_PATTERN.test(text)) {
+            return ContentType.Markdown;
+        }
+        return ContentType.Text;
+    }
+}
diff --git a/src/core/tokenizers/i-tokenizer.ts b/src/core/tokenizers/i-tokenizer.ts
new file mode 100644
index 0000000..57f23fd
--- /dev/null
+++ b/src/core/tokenizers/i-tokenizer.ts
@@ -0,0 +1,19 @@
+/**
+ * Pluggable tokenizer interface — addresses issue #124.
+ *
+ * Implementations:
+ * - TiktokenTokenizer: uses the local tiktoken library (GPT-4 / GPT-3.5-turbo).
+ * - HeuristicTokenizer: content-aware local fallback for unknown models.
+ *
+ * The factory picks an implementation based on model name. All implementations
+ * memoize counts via an injected LruCache so repeated inputs don't re-tokenize.
+ */
+
+export interface ITokenizer {
+    readonly modelName: string;
+
+    countTokens(text: string): Promise<number>;
+
+    /** Free any native resources. */
+    free(): void;
+}
diff --git a/src/core/tokenizers/tiktoken-tokenizer.ts b/src/core/tokenizers/tiktoken-tokenizer.ts
new file mode 100644
index 0000000..9b2d327
--- /dev/null
+++ b/src/core/tokenizers/tiktoken-tokenizer.ts
@@ -0,0 +1,69 @@
+import { encoding_for_model, Tiktoken, TiktokenModel } from 'tiktoken';
+import { ITokenizer } from './i-tokenizer.js';
+import { LruCache } from '../../utils/lru-cache.js';
+
+const DEFAULT_CACHE_SIZE = 500;
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+
+const SUPPORTED_TIKTOKEN_MODELS: readonly TiktokenModel[] = ['gpt-4', 'gpt-3.5-turbo'];
+
+export class TiktokenTokenizer implements ITokenizer {
+    public readonly modelName: string;
+    private readonly encoder: Tiktoken;
+    private readonly cache: LruCache<string, number>;
+
+    constructor(modelName: string, cache?: LruCache<string, number>) {
+        this.modelName = modelName;
+        this.cache = cache ?? new LruCache<string, number>(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS);
+        const tiktokenModel = TiktokenTokenizer.mapToTiktokenModel(modelName);
+        this.encoder = encoding_for_model(tiktokenModel);
+    }
+
+    public async countTokens(text: string): Promise<number> {
+        const cached = this.cache.get(text);
+        if (cached !== undefined) {
+            return cached;
+        }
+        const count = this.encoder.encode(text).length;
+        this.cache.set(text, count);
+        return count;
+    }
+
+    public free(): void {
+        this.encoder.free();
+    }
+
+    public static supports(modelName: string): boolean {
+        const mapped = TiktokenTokenizer.tryMap(modelName);
+        return mapped !== null;
+    }
+
+    public static mapToTiktokenModel(modelName: string): TiktokenModel {
+        const mapped = TiktokenTokenizer.tryMap(modelName);
+        if (mapped === null) {
+            // Default: GPT-4 tokenizer is the closest available for Claude/unknown models.
+            return 'gpt-4';
+        }
+        return mapped;
+    }
+
+    private static tryMap(modelName: string): TiktokenModel | null {
+        const lower = modelName.toLowerCase();
+        if (
+            lower.includes('claude') ||
+            lower.includes('sonnet') ||
+            lower.includes('opus') ||
+            lower.includes('haiku') ||
+            lower.includes('gpt-4')
+        ) {
+            return 'gpt-4';
+        }
+        if (lower.includes('gpt-3.5') || lower.includes('gpt3.5')) {
+            return 'gpt-3.5-turbo';
+        }
+        if (SUPPORTED_TIKTOKEN_MODELS.includes(lower as TiktokenModel)) {
+            return lower as TiktokenModel;
+        }
+        return null;
+    }
+}
diff --git a/src/core/tokenizers/tokenizer-factory.ts b/src/core/tokenizers/tokenizer-factory.ts
new file mode 100644
index 0000000..ea4b360
--- /dev/null
+++ b/src/core/tokenizers/tokenizer-factory.ts
@@ -0,0 +1,33 @@
+import { ITokenizer } from './i-tokenizer.js';
+import { TiktokenTokenizer } from './tiktoken-tokenizer.js';
+import { HeuristicTokenizer } from './heuristic-tokenizer.js';
+
+export class TokenizerFactory {
+    /**
+     * Create a tokenizer for the given model name.
+     *
+     * Resolution order:
+     * 1. Tiktoken for GPT-4 / GPT-3.5-turbo / Claude-family models.
+     * 2. HeuristicTokenizer as the content-aware fallback.
+     *
+     * Callers that already hold a tokenizer should prefer reusing it —
+     * construction allocates a tiktoken encoder (native resource).
+     */
+    public static create(modelName: string): ITokenizer {
+        if (TiktokenTokenizer.supports(modelName)) {
+            return new TiktokenTokenizer(modelName);
+        }
+        return new HeuristicTokenizer(modelName);
+    }
+
+    /** Create a tokenizer using the active model environment variables. */
+    public static createFromEnv(): ITokenizer {
+        const modelName =
+            process.env.CLAUDE_MODEL ||
+            process.env.ANTHROPIC_MODEL ||
+            process.env.OPENAI_MODEL ||
+            process.env.TOKEN_OPTIMIZER_MODEL ||
+            'gpt-4';
+        return TokenizerFactory.create(modelName);
+    }
+}

From 35f888e12afaa2012dda56d1dc89d70db51cd76a Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:25:11 -0400
Subject: [PATCH 12/26] feat(config): add optimization settings with zod
 validation (#120)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends HypercontextConfig with an optional `optimization` section
mirroring Gemini CLI's settingsSchema:

- compressionTokenThreshold — fraction of context to trigger compression
- compressionPreserveThreshold — fraction to keep uncompressed at tail
- minTokensBeforeCompression — lower bound for optimizer to engage
- modelTokenLimits — per-model context window size
- minOutputSizeBytes / quality — stored-entry gating

ConfigManager now validates the user config file against a zod schema
and falls back to DEFAULT_CONFIG with a descriptive warning instead of
silently accepting malformed JSON. Adds getOptimizationConfig() and
getModelTokenLimit(modelName) accessors.

Refs #120

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/core/config.ts | 110 ++++++++++++++++++++++++++++++++++++++++++---
 src/core/types.ts  |  20 +++++++++
 2 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/src/core/config.ts b/src/core/config.ts
index d5cd01c..57bee2a 100644
--- a/src/core/config.ts
+++ b/src/core/config.ts
@@ -2,11 +2,32 @@
  * Configuration management for Hypercontext MCP
  */
 
-import { HypercontextConfig } from './types.js';
+import { z } from 'zod';
+import { HypercontextConfig, OptimizationConfig } from './types.js';
 import { readFileSync, existsSync } from 'fs';
 import { homedir } from 'os';
 import { join } from 'path';
 
+const DEFAULT_OPTIMIZATION: OptimizationConfig = {
+  compressionTokenThreshold: 0.7,
+  compressionPreserveThreshold: 0.3,
+  minTokensBeforeCompression: 1000,
+  modelTokenLimits: {
+    'gpt-4': 128000,
+    'gpt-4-turbo': 128000,
+    'gpt-3.5-turbo': 16385,
+    'claude-3-opus': 200000,
+    'claude-3-sonnet': 200000,
+    'claude-3-haiku': 200000,
+    'claude-opus-4-7': 1000000,
+    'claude-sonnet-4-6': 1000000,
+    'gemini-1.5-pro': 2000000,
+    'gemini-2.5-flash': 1000000,
+  },
+  minOutputSizeBytes: 500,
+  quality: 'balanced',
+};
+
 const DEFAULT_CONFIG: HypercontextConfig = {
   cache: {
     enabled: true,
@@ -38,8 +59,61 @@ const DEFAULT_CONFIG: HypercontextConfig = {
     streamingThreshold: 1024 * 1024, // 1MB
     enableStreaming: false,
   },
+  optimization: DEFAULT_OPTIMIZATION,
 };
 
+const OptimizationConfigSchema = z.object({
+  compressionTokenThreshold: z.number().min(0).max(1),
+  compressionPreserveThreshold: z.number().min(0).max(1),
+  minTokensBeforeCompression: z.number().int().nonnegative(),
+  modelTokenLimits: z.record(z.string(), z.number().int().positive()),
+  minOutputSizeBytes: z.number().int().nonnegative(),
+  quality: z.enum(['fast', 'balanced', 'max']),
+});
+
+const HypercontextConfigSchema = z
+  .object({
+    cache: z
+      .object({
+        enabled: z.boolean(),
+        maxSizeMB: z.number().int().positive(),
+        defaultTTL: z.number().int().nonnegative(),
+        ttlByType: z.record(z.string(), z.number().int().nonnegative()),
+        compression: z.enum(['none', 'gzip', 'brotli', 'auto']),
+      })
+      .partial()
+      .optional(),
+    monitoring: z
+      .object({
+        enabled: z.boolean(),
+        detailedLogging: z.boolean(),
+        metricsRetentionDays: z.number().int().nonnegative(),
+        dashboardPort: z.number().int().positive(),
+        enableWebUI: z.boolean(),
+      })
+      .partial()
+      .optional(),
+    intelligence: z
+      .object({
+        enablePatternDetection: z.boolean(),
+        enableWorkflowLearning: z.boolean(),
+        enablePredictiveCaching: z.boolean(),
+        mlModelPath: z.string(),
+      })
+      .partial()
+      .optional(),
+    performance: z
+      .object({
+        maxConcurrentOps: z.number().int().positive(),
+        streamingThreshold: z.number().int().positive(),
+        enableStreaming: z.boolean(),
+      })
+      .partial()
+      .optional(),
+    optimization: OptimizationConfigSchema.partial().optional(),
+  })
+  .passthrough();
+
 export class ConfigManager {
   private config: HypercontextConfig;
   private configPath: string;
@@ -57,26 +131,52 @@ export class ConfigManager {
 
     try {
       const fileContent = readFileSync(this.configPath, 'utf-8');
-      const userConfig = JSON.parse(fileContent);
-      return this.mergeConfig(DEFAULT_CONFIG, userConfig);
+      const rawUserConfig = JSON.parse(fileContent);
+      const parsed = HypercontextConfigSchema.safeParse(rawUserConfig);
+      if (!parsed.success) {
+        const issues = parsed.error.issues
+          .map((i) => `  - ${i.path.join('.') || 'root'}: ${i.message}`)
+          .join('\n');
+        console.warn(
+          `Invalid config at ${this.configPath}, using defaults:\n${issues}`
+        );
+        return DEFAULT_CONFIG;
+      }
+      return this.mergeConfig(DEFAULT_CONFIG, parsed.data);
     } catch (error) {
-      console.warn('Failed to load config, using defaults:', error);
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Failed to load config, using defaults: ${message}`);
       return DEFAULT_CONFIG;
     }
   }
 
   private mergeConfig(
     defaults: HypercontextConfig,
-    user: Partial<HypercontextConfig>
+    user: {
+      cache?: Partial<HypercontextConfig['cache']>;
+      monitoring?: Partial<HypercontextConfig['monitoring']>;
+      intelligence?: Partial<HypercontextConfig['intelligence']>;
+      performance?: Partial<HypercontextConfig['performance']>;
+      optimization?: Partial<OptimizationConfig>;
+    }
   ): HypercontextConfig {
     return {
       cache: { ...defaults.cache, ...user.cache },
       monitoring: { ...defaults.monitoring, ...user.monitoring },
       intelligence: { ...defaults.intelligence, ...user.intelligence },
       performance: { ...defaults.performance, ...user.performance },
+      optimization: { ...DEFAULT_OPTIMIZATION, ...(user.optimization ?? {}) },
     };
   }
 
+  public getOptimizationConfig(): OptimizationConfig {
+    return this.config.optimization ?? DEFAULT_OPTIMIZATION;
+  }
+
+  public getModelTokenLimit(modelName: string): number | undefined {
+    return this.getOptimizationConfig().modelTokenLimits[modelName];
+  }
+
   get(): HypercontextConfig {
     return { ...this.config };
   }
diff --git a/src/core/types.ts b/src/core/types.ts
index 9d36be2..aacb6a1 100644
--- a/src/core/types.ts
+++ b/src/core/types.ts
@@ -48,6 +48,26 @@ export interface HypercontextConfig {
     streamingThreshold: number;
     enableStreaming: boolean;
   };
+  optimization?: OptimizationConfig;
+}
+
+/**
+ * Configuration-driven compression thresholds — addresses issue #120.
+ * Mirrors the fields exposed by Gemini CLI's settingsSchema.ts.
+ */
+export interface OptimizationConfig {
+  /** Fraction of model context at which compression kicks in (0-1). */
+  compressionTokenThreshold: number;
+  /** Fraction of chat history to keep uncompressed at the tail (0-1). */
+  compressionPreserveThreshold: number;
+  /** Minimum token count before an optimizer considers compressing. */
+  minTokensBeforeCompression: number;
+  /** Per-model total context window size, in tokens. */
+  modelTokenLimits: Record<string, number>;
+  /** Minimum output bytes before optimization emits a stored entry. */
+  minOutputSizeBytes: number;
+  /** Compression quality preset. */
+  quality: 'fast' | 'balanced' | 'max';
 }
 
 export interface TokenMetrics {

From 16305fca059a859093ed223e8ba14435b47bfd70 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:28:53 -0400
Subject: [PATCH 13/26] feat(session): add session + context-delta + chat
 compression (#121, #122)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces the session-centric plumbing that the optimization plan has
been sketched against for a while:

- src/core/session.ts — Session class holding history + per-file state,
  with a token-aware compressHistory() that keeps a configurable tail
  fraction and summarizes the head via a pluggable ISummarizer.
- src/core/summarization.ts — ISummarizer interface + a self-contained
  TruncatingSummarizer fallback so the module is usable without an LLM.
- src/core/session-manager.ts — JSON-persisted singleton that auto-
  compresses a session's history when addMessage() pushes it past
  maxTokens.
- src/utils/diff.ts — calculateDelta / applyDelta built on the existing
  `diff` dep (unified-diff, round-trippable).
- src/tools/context-delta-tool.ts — new context_delta MCP tool with
  compute-delta / seed / clear operations and an items-complete input
  schema.
- src/validation/tool-schemas.ts — add OptimizationStorageSchema and
  ContextDeltaSchema so the validator accepts both new tools.
- src/server/index.ts — wire up SessionManager + ContextDeltaTool using
  TokenizerFactory.createFromEnv; persistence at
  ~/.token-optimizer/sessions.json. Remove the previously-duplicated
  inline OPTIMIZATION_STORAGE_TOOL_DEFINITION and import it from the
  tool module.

Refs #121, #122

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/core/session-manager.ts     | 153 ++++++++++++++++++++++++++++
 src/core/session.ts             | 171 ++++++++++++++++++++++++++++++++
 src/core/summarization.ts       |  50 ++++++++++
 src/server/index.ts             |  64 ++++++------
 src/tools/context-delta-tool.ts | 155 +++++++++++++++++++++++++++++
 src/utils/diff.ts               |  39 ++++++++
 src/validation/tool-schemas.ts  |  20 ++++
 7 files changed, 616 insertions(+), 36 deletions(-)
 create mode 100644 src/core/session-manager.ts
 create mode 100644 src/core/session.ts
 create mode 100644 src/core/summarization.ts
 create mode 100644 src/tools/context-delta-tool.ts
 create mode 100644 src/utils/diff.ts

diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts
new file mode 100644
index 0000000..1cc8372
--- /dev/null
+++ b/src/core/session-manager.ts
@@ -0,0 +1,153 @@
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
+import { dirname } from 'path';
+import {
+    Session,
+    SessionOptions,
+    SessionSnapshot,
+    MessageRole,
+} from './session.js';
+import { ITokenizer } from './tokenizers/i-tokenizer.js';
+import { ISummarizer } from './summarization.js';
+
+/**
+ * Singleton-style SessionManager — addresses issues #121 / #122.
+ *
+ * Persists all sessions to a single JSON file so they survive restarts.
+ * When a message is added we check whether the session has exceeded its
+ * token budget and, if so, auto-compress the history (#121).
+ */
+
+export interface SessionManagerOptions {
+    persistencePath?: string;
+    tokenizer?: ITokenizer;
+    summarizer?: ISummarizer;
+    defaultMaxTokens?: number;
+}
+
+interface PersistedState {
+    sessions: SessionSnapshot[];
+}
+
+export class SessionManager {
+    private readonly sessions = new Map<string, Session>();
+    private readonly persistencePath: string | null;
+    private readonly tokenizer: ITokenizer | undefined;
+    private readonly summarizer: ISummarizer | undefined;
+    private readonly defaultMaxTokens: number | undefined;
+
+    constructor(options: SessionManagerOptions = {}) {
+        this.persistencePath = options.persistencePath ?? null;
+        this.tokenizer = options.tokenizer;
+        this.summarizer = options.summarizer;
+        this.defaultMaxTokens = options.defaultMaxTokens;
+        if (this.persistencePath && existsSync(this.persistencePath)) {
+            this.load();
+        }
+    }
+
+    public createSession(options: SessionOptions = {}): Session {
+        const session = new Session({
+            tokenizer: this.tokenizer,
+            summarizer: this.summarizer,
+            maxTokens: options.maxTokens ?? this.defaultMaxTokens,
+            ...options,
+        });
+        this.sessions.set(session.id, session);
+        this.persist();
+        return session;
+    }
+
+    public getSession(id: string): Session | undefined {
+        return this.sessions.get(id);
+    }
+
+    public listSessions(): Session[] {
+        return Array.from(this.sessions.values());
+    }
+
+    public deleteSession(id: string): boolean {
+        const removed = this.sessions.delete(id);
+        if (removed) {
+            this.persist();
+        }
+        return removed;
+    }
+
+    /**
+     * Add a message to the session and auto-compress the history if the
+     * token budget is exceeded (#121).
+     *
+     * Returns the post-add token count of the session.
+     */
+    public async addMessage(
+        sessionId: string,
+        role: MessageRole,
+        content: string
+    ): Promise<number> {
+        const session = this.sessions.get(sessionId);
+        if (!session) {
+            throw new Error(`Unknown session: ${sessionId}`);
+        }
+        session.addMessage(role, content);
+        const currentTokens = await session.getHistoryTokenCount();
+        let finalTokens = currentTokens;
+        if (currentTokens > session.maxTokens) {
+            finalTokens = await session.compressHistory();
+        }
+        this.persist();
+        return finalTokens;
+    }
+
+    public updateFileState(
+        sessionId: string,
+        filePath: string,
+        content: string
+    ): void {
+        const session = this.sessions.get(sessionId);
+        if (!session) {
+            throw new Error(`Unknown session: ${sessionId}`);
+        }
+        session.setFileContent(filePath, content);
+        this.persist();
+    }
+
+    private persist(): void {
+        if (!this.persistencePath) {
+            return;
+        }
+        const state: PersistedState = {
+            sessions: this.listSessions().map((s) => s.toSnapshot()),
+        };
+        const dir = dirname(this.persistencePath);
+        if (!existsSync(dir)) {
+            mkdirSync(dir, { recursive: true });
+        }
+        writeFileSync(this.persistencePath, JSON.stringify(state, null, 2));
+    }
+
+    private load(): void {
+        if (!this.persistencePath) {
+            return;
+        }
+        try {
+            const raw = readFileSync(this.persistencePath, 'utf-8');
+            const parsed = JSON.parse(raw) as PersistedState;
+            if (!parsed || !Array.isArray(parsed.sessions)) {
+                return;
+            }
+            for (const snapshot of parsed.sessions) {
+                const session = Session.fromSnapshot(snapshot, {
+                    tokenizer: this.tokenizer,
+                    summarizer: this.summarizer,
+                });
+                this.sessions.set(session.id, session);
+            }
+        } catch (error) {
+            const message =
+                error instanceof Error ? error.message : String(error);
+            console.warn(
+                `SessionManager: failed to load sessions from ${this.persistencePath}: ${message}`
+            );
+        }
+    }
+}
diff --git a/src/core/session.ts b/src/core/session.ts
new file mode 100644
index 0000000..78ebd96
--- /dev/null
+++ b/src/core/session.ts
@@ -0,0 +1,171 @@
+import { randomUUID } from 'crypto';
+import { ITokenizer } from './tokenizers/i-tokenizer.js';
+import { ISummarizer, TruncatingSummarizer } from './summarization.js';
+
+/**
+ * Session state — addresses issues #121 and #122.
+ *
+ * A Session holds a single user's conversation history plus a per-file
+ * content snapshot. The history is token-budgeted (see #121) and the file
+ * snapshots feed context-delta tracking (#122).
+ */
+
+export type MessageRole = 'system' | 'user' | 'assistant' | 'tool';
+
+export interface Message {
+    role: MessageRole;
+    content: string;
+    timestamp: number;
+}
+
+export interface SessionFileState {
+    [filePath: string]: string;
+}
+
+export interface SessionSnapshot {
+    id: string;
+    history: Message[];
+    fileState: SessionFileState;
+    maxTokens: number;
+    createdAt: number;
+    updatedAt: number;
+}
+
+export interface SessionOptions {
+    id?: string;
+    maxTokens?: number;
+    preserveTailRatio?: number;
+    tokenizer?: ITokenizer;
+    summarizer?: ISummarizer;
+}
+
+const DEFAULT_MAX_TOKENS = 100_000;
+const DEFAULT_PRESERVE_TAIL_RATIO = 0.3;
+
+export class Session {
+    public readonly id: string;
+    public maxTokens: number;
+    public readonly createdAt: number;
+    public updatedAt: number;
+
+    private history: Message[] = [];
+    private fileState: SessionFileState = {};
+    private readonly preserveTailRatio: number;
+    private readonly tokenizer: ITokenizer | null;
+    private readonly summarizer: ISummarizer;
+
+    constructor(options: SessionOptions = {}) {
+        this.id = options.id ?? randomUUID();
+        this.maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
+        this.preserveTailRatio = options.preserveTailRatio ?? DEFAULT_PRESERVE_TAIL_RATIO;
+        this.tokenizer = options.tokenizer ?? null;
+        this.summarizer = options.summarizer ?? new TruncatingSummarizer();
+        this.createdAt = Date.now();
+        this.updatedAt = this.createdAt;
+    }
+
+    public addMessage(role: MessageRole, content: string): Message {
+        const message: Message = { role, content, timestamp: Date.now() };
+        this.history.push(message);
+        this.updatedAt = message.timestamp;
+        return message;
+    }
+
+    public getHistory(): readonly Message[] {
+        return this.history;
+    }
+
+    public getFileState(): Readonly<SessionFileState> {
+        return this.fileState;
+    }
+
+    public getFileContent(filePath: string): string | undefined {
+        return this.fileState[filePath];
+    }
+
+    public setFileContent(filePath: string, content: string): void {
+        this.fileState[filePath] = content;
+        this.updatedAt = Date.now();
+    }
+
+    /**
+     * Total token count of the current history. Uses the injected tokenizer
+     * when available; otherwise falls back to the character/4 heuristic.
+     */
+    public async getHistoryTokenCount(): Promise<number> {
+        if (!this.tokenizer) {
+            return this.history.reduce(
+                (acc, m) => acc + Math.ceil(m.content.length / 4),
+                0
+            );
+        }
+        let total = 0;
+        for (const message of this.history) {
+            total += await this.tokenizer.countTokens(message.content);
+        }
+        return total;
+    }
+
+    /**
+     * Compress the history by summarizing everything except the
+     * preserve-tail fraction. Does nothing if history fits under maxTokens.
+     *
+     * Returns the new token count after compression.
+     */
+    public async compressHistory(): Promise<number> {
+        const currentTokens = await this.getHistoryTokenCount();
+        if (currentTokens <= this.maxTokens) {
+            return currentTokens;
+        }
+        if (this.history.length <= 1) {
+            return currentTokens;
+        }
+
+        const preserveCount = Math.max(
+            1,
+            Math.floor(this.history.length * this.preserveTailRatio)
+        );
+        const tail = this.history.slice(-preserveCount);
+        const head = this.history.slice(0, -preserveCount);
+        if (head.length === 0) {
+            return currentTokens;
+        }
+
+        const summary = await this.summarizer.summarize(head);
+        const summaryMessage: Message = {
+            role: 'system',
+            content: `[summary of earlier conversation] ${summary}`,
+            timestamp: head[head.length - 1].timestamp,
+        };
+
+        this.history = [summaryMessage, ...tail];
+        this.updatedAt = Date.now();
+        return this.getHistoryTokenCount();
+    }
+
+    public toSnapshot(): SessionSnapshot {
+        return {
+            id: this.id,
+            history: [...this.history],
+            fileState: { ...this.fileState },
+            maxTokens: this.maxTokens,
+            createdAt: this.createdAt,
+            updatedAt: this.updatedAt,
+        };
+    }
+
+    public static fromSnapshot(
+        snapshot: SessionSnapshot,
+        options: Omit<SessionOptions, 'id' | 'maxTokens'> = {}
+    ): Session {
+        const session = new Session({
+            id: snapshot.id,
+            maxTokens: snapshot.maxTokens,
+            ...options,
+        });
+        session.history = [...snapshot.history];
+        session.fileState = { ...snapshot.fileState };
+        session.updatedAt = snapshot.updatedAt;
+        return session;
+    }
+}
diff --git a/src/core/summarization.ts b/src/core/summarization.ts
new file mode 100644
index 0000000..ab179c5
--- /dev/null
+++ b/src/core/summarization.ts
@@ -0,0 +1,50 @@
+import { Message } from './session.js';
+
+/**
+ * Pluggable summarization interface — part of issue #121.
+ *
+ * A production deployment should plug in an LLM-backed summarizer that
+ * condenses a list of Messages into a single natural-language summary.
+ * The default TruncatingSummarizer keeps the module self-contained and
+ * testable without an API key; it concatenates role+content and trims
+ * to a reasonable length.
+ */
+
+export interface ISummarizer {
+    summarize(messages: readonly Message[]): Promise<string>;
+}
+
+export interface TruncatingSummarizerOptions {
+    /** Approximate maximum characters of summary output. Default: 2000. */
+    maxChars?: number;
+}
+
+export class TruncatingSummarizer implements ISummarizer {
+    private readonly maxChars: number;
+
+    constructor(options: TruncatingSummarizerOptions = {}) {
+        this.maxChars = options.maxChars ?? 2000;
+    }
+
+    public async summarize(messages: readonly Message[]): Promise<string> {
+        if (messages.length === 0) {
+            return '';
+        }
+
+        const joined = messages
+            .map((m) => `${m.role}: ${m.content}`)
+            .join('\n');
+
+        if (joined.length <= this.maxChars) {
+            return joined;
+        }
+
+        const keepHead = Math.floor(this.maxChars * 0.4);
+        const keepTail = this.maxChars - keepHead - 20;
+        return (
+            joined.slice(0, keepHead) +
+            '\n... [truncated] ...\n' +
+            joined.slice(-keepTail)
+        );
+    }
+}
diff --git a/src/server/index.ts b/src/server/index.ts
index 047e8eb..116ca6c 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -127,7 +127,16 @@ import {
   GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION,
 } from '../tools/analytics/get-mcp-server-analytics.js';
 import { getExportAnalyticsTool, EXPORT_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/export-analytics.js';
-import { OptimizationStorageTool } from '../tools/optimization-storage-tool.js';
+import {
+  OptimizationStorageTool,
+  OPTIMIZATION_STORAGE_TOOL_DEFINITION,
+} from '../tools/optimization-storage-tool.js';
+import {
+  ContextDeltaTool,
+  CONTEXT_DELTA_TOOL_DEFINITION,
+} from '../tools/context-delta-tool.js';
+import { SessionManager } from '../core/session-manager.js';
+import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js';
 import { AnalyticsManager } from '../analytics/analytics-manager.js';
 
 
@@ -370,41 +379,11 @@ const getMcpServerAnalytics = getMcpServerAnalyticsTool(analyticsManager);
 const exportAnalytics = getExportAnalyticsTool(analyticsManager);
 const optimizationStorage = new OptimizationStorageTool();
 
-const OPTIMIZATION_STORAGE_TOOL_DEFINITION = {
-    name: optimizationStorage.name,
-    description: optimizationStorage.description,
-    inputSchema: {
-        type: 'object',
-        properties: {
-            operation: {
-                type: 'string',
-                enum: ['store', 'retrieve'],
-                description: 'The operation to perform.',
-            },
-            originalTextHash: {
-                type: 'string',
-                description: 'The SHA256 hash of the original text.',
-            },
-            optimizedText: {
-                type: 'string',
-                description: 'The base64 encoded optimized text (for store operation).',
-            },
-            originalTokens: {
-                type: 'number',
-                description: 'The number of tokens in the original text (for store operation).',
-            },
-            optimizedTokens: {
-                type: 'number',
-                description: 'The number of tokens in the optimized text (for store operation).',
-            },
-            tokensSaved: {
-                type: 'number',
-                description: 'The number of tokens saved (for store operation).',
-            },
-        },
-        required: ['operation', 'originalTextHash'],
-    },
-};
+const sessionManager = new SessionManager({
+  persistencePath: path.join(os.homedir(), '.token-optimizer', 'sessions.json'),
+  tokenizer: TokenizerFactory.createFromEnv(),
+});
+const contextDelta = new ContextDeltaTool(sessionManager);
 
 // Create MCP server
 const server = new Server(
@@ -692,6 +671,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
       GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION,
       EXPORT_ANALYTICS_TOOL_DEFINITION,
       OPTIMIZATION_STORAGE_TOOL_DEFINITION,
+      CONTEXT_DELTA_TOOL_DEFINITION,
     ],
   };
 });
@@ -2043,6 +2023,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
         };
       }
 
+      case 'context_delta': {
+        const result = contextDelta.run(args as any);
+        return {
+          content: [
+            {
+              type: 'text',
+              text: JSON.stringify(result, null, 2),
+            },
+          ],
+        };
+      }
+
       case 'alert_manager': {
         const options = args as any;
         const result = await alertManager.run(options);
diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts
new file mode 100644
index 0000000..28f5f35
--- /dev/null
+++ b/src/tools/context-delta-tool.ts
@@ -0,0 +1,155 @@
+import { SessionManager } from '../core/session-manager.js';
+import { calculateDelta } from '../utils/diff.js';
+
+/**
+ * context_delta MCP tool — addresses issue #122.
+ *
+ * Given (sessionId, filePath, currentContent) this tool:
+ *   1. Looks up the session from the SessionManager.
+ *   2. Diffs the current content against the session's last snapshot of
+ *      that file.
+ *   3. Updates the session's file state.
+ *   4. Returns a unified-diff delta — the caller can send ONLY the delta
+ *      to the model instead of the whole file, which is the token win.
+ *
+ * On first invocation for a given filePath the full content is treated
+ * as "the delta" (there is no baseline to diff against).
+ */
+
+export type ContextDeltaOperation = 'compute-delta' | 'seed' | 'clear';
+
+export interface ContextDeltaOptions {
+    operation: ContextDeltaOperation;
+    sessionId: string;
+    filePath: string;
+    currentContent?: string;
+}
+
+export interface ContextDeltaResponse {
+    success: boolean;
+    error?: string;
+    delta?: string;
+    isBaseline?: boolean;
+    originalSize?: number;
+    deltaSize?: number;
+    bytesSaved?: number;
+}
+
+export class ContextDeltaTool {
+    public readonly name = 'context_delta';
+    public readonly description =
+        'Compute a unified-diff delta between a file’s previous session snapshot and its current content, so the model only receives what changed.';
+
+    constructor(private readonly sessionManager: SessionManager) {}
+
+    public run(options: ContextDeltaOptions): ContextDeltaResponse {
+        switch (options.operation) {
+            case 'compute-delta':
+                return this.computeDelta(options);
+            case 'seed':
+                return this.seed(options);
+            case 'clear':
+                return this.clear(options);
+            default:
+                return {
+                    success: false,
+                    error: `Unknown operation: ${String(
+                        (options as { operation: unknown }).operation
+                    )}`,
+                };
+        }
+    }
+
+    private computeDelta(options: ContextDeltaOptions): ContextDeltaResponse {
+        const { sessionId, filePath, currentContent } = options;
+        if (currentContent === undefined) {
+            return {
+                success: false,
+                error: 'currentContent is required for compute-delta',
+            };
+        }
+        const session = this.sessionManager.getSession(sessionId);
+        if (!session) {
+            return { success: false, error: `Unknown session: ${sessionId}` };
+        }
+        const previous = session.getFileContent(filePath);
+        session.setFileContent(filePath, currentContent);
+
+        if (previous === undefined) {
+            return {
+                success: true,
+                isBaseline: true,
+                delta: currentContent,
+                originalSize: currentContent.length,
+                deltaSize: currentContent.length,
+                bytesSaved: 0,
+            };
+        }
+
+        const delta = calculateDelta(previous, currentContent, filePath);
+        return {
+            success: true,
+            isBaseline: false,
+            delta,
+            originalSize: currentContent.length,
+            deltaSize: delta.length,
+            bytesSaved: Math.max(0, currentContent.length - delta.length),
+        };
+    }
+
+    private seed(options: ContextDeltaOptions): ContextDeltaResponse {
+        const { sessionId, filePath, currentContent } = options;
+        if (currentContent === undefined) {
+            return { success: false, error: 'currentContent is required for seed' };
+        }
+        try {
+            this.sessionManager.updateFileState(sessionId, filePath, currentContent);
+            return { success: true, isBaseline: true };
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: message };
+        }
+    }
+
+    private clear(options: ContextDeltaOptions): ContextDeltaResponse {
+        const session = this.sessionManager.getSession(options.sessionId);
+        if (!session) {
+            return {
+                success: false,
+                error: `Unknown session: ${options.sessionId}`,
+            };
+        }
+        session.setFileContent(options.filePath, '');
+        return { success: true };
+    }
+}
+
+export const CONTEXT_DELTA_TOOL_DEFINITION = {
+    name: 'context_delta',
+    description:
+        'Compute a unified-diff delta for a file in a given session so the model only sees changes since the last snapshot. Operations: compute-delta, seed, clear.',
+    inputSchema: {
+        type: 'object',
+        properties: {
+            operation: {
+                type: 'string',
+                enum: ['compute-delta', 'seed', 'clear'],
+                description: 'Operation to perform',
+            },
+            sessionId: {
+                type: 'string',
+                description: 'Session identifier (create one via SessionManager first)',
+            },
+            filePath: {
+                type: 'string',
+                description: 'Path of the file inside the session state',
+            },
+            currentContent: {
+                type: 'string',
+                description:
+                    'Current file content (required for compute-delta and seed)',
+            },
+        },
+        required: ['operation', 'sessionId', 'filePath'],
+    },
+};
diff --git a/src/utils/diff.ts b/src/utils/diff.ts
new file mode 100644
index 0000000..3c032e2
--- /dev/null
+++ b/src/utils/diff.ts
@@ -0,0 +1,39 @@
+import { createPatch, applyPatch } from 'diff';
+
+/**
+ * Delta-based context helpers — addresses issue #122.
+ *
+ * Uses the unified-diff format from the existing `diff` dependency so the
+ * resulting deltas are human-readable and round-trippable via applyDelta.
+ */
+
+/**
+ * Compute a unified-diff delta from `previous` to `current`.
+ * Returns the empty string when the inputs are identical (callers can use
+ * that to skip transmitting a no-op delta).
+ */
+export function calculateDelta(
+    previous: string,
+    current: string,
+    fileName: string = 'content'
+): string {
+    if (previous === current) {
+        return '';
+    }
+    return createPatch(fileName, previous, current, '', '');
+}
+
+/**
+ * Apply a unified-diff `delta` to `previous`, returning the reconstructed
+ * `current`. Throws if the patch cannot be applied cleanly.
+ */
+export function applyDelta(previous: string, delta: string): string {
+    if (delta === '') {
+        return previous;
+    }
+    const result = applyPatch(previous, delta);
+    if (result === false) {
+        throw new Error('Failed to apply delta: patch did not apply cleanly');
+    }
+    return result;
+}
diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts
index b09ecb4..c109989 100644
--- a/src/validation/tool-schemas.ts
+++ b/src/validation/tool-schemas.ts
@@ -413,6 +413,24 @@ export const ExportAnalyticsSchema = z.object({
     .describe('Optional filter by MCP server name'),
 });
 
+// 72. optimization_storage
+export const OptimizationStorageSchema = z.object({
+  operation: z.enum(['store', 'retrieve']),
+  originalTextHash: z.string().optional(),
+  optimizedText: z.string().optional(),
+  originalTokens: z.number().optional(),
+  optimizedTokens: z.number().optional(),
+  tokensSaved: z.number().optional(),
+});
+
+// 73. context_delta
+export const ContextDeltaSchema = z.object({
+  operation: z.enum(['compute-delta', 'seed', 'clear']),
+  sessionId: z.string(),
+  filePath: z.string(),
+  currentContent: z.string().optional(),
+});
+
 // Map tool names to their schemas for easy lookup
 export const toolSchemaMap: Record<string, z.ZodType<any>> = {
   optimize_text: OptimizeTextSchema,
@@ -486,4 +504,6 @@ export const toolSchemaMap: Record<string, z.ZodType<any>> = {
   get_action_analytics: GetActionAnalyticsSchema,
   get_mcp_server_analytics: GetMcpServerAnalyticsSchema,
   export_analytics: ExportAnalyticsSchema,
+  optimization_storage: OptimizationStorageSchema,
+  context_delta: ContextDeltaSchema,
 };

From bfe8278b6bf61a3dc450ce8ba2ea79cdd3a9f7bf Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:31:06 -0400
Subject: [PATCH 14/26] test: add unit tests for lrucache, tokenizers, diff,
 session, configmanager

33 tests covering eviction/TTL/stats (LruCache), content-type detection
and factory routing (tokenizers), round-trip and mismatched-baseline
behaviors (diff), history compression and snapshot round-trip
(Session/SessionManager), and user-override + validation fallback
(ConfigManager).

Refs #120, #121, #122, #124, #125

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/unit/config.test.ts     | 68 +++++++++++++++++++++++++++++++
 tests/unit/diff.test.ts       | 33 +++++++++++++++
 tests/unit/lru-cache.test.ts  | 77 +++++++++++++++++++++++++++++++++++
 tests/unit/session.test.ts    | 76 ++++++++++++++++++++++++++++++++++
 tests/unit/tokenizers.test.ts | 64 +++++++++++++++++++++++++++++
 5 files changed, 318 insertions(+)
 create mode 100644 tests/unit/config.test.ts
 create mode 100644 tests/unit/diff.test.ts
 create mode 100644 tests/unit/lru-cache.test.ts
 create mode 100644 tests/unit/session.test.ts
 create mode 100644 tests/unit/tokenizers.test.ts

diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts
new file mode 100644
index 0000000..27b2850
--- /dev/null
+++ b/tests/unit/config.test.ts
@@ -0,0 +1,68 @@
+import { describe, it, expect, afterEach } from '@jest/globals';
+import { mkdtempSync, writeFileSync, rmSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
+import { ConfigManager } from '../../src/core/config.js';
+
+describe('ConfigManager', () => {
+  const tempDirs: string[] = [];
+
+  afterEach(() => {
+    while (tempDirs.length) {
+      const dir = tempDirs.pop();
+      if (dir) {
+        rmSync(dir, { recursive: true, force: true });
+      }
+    }
+  });
+
+  function writeConfig(content: string): string {
+    const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-config-'));
+    tempDirs.push(dir);
+    const file = join(dir, 'config.json');
+    writeFileSync(file, content);
+    return file;
+  }
+
+  it('returns defaults when no config file exists', () => {
+    const mgr = new ConfigManager(join(tmpdir(), 'does-not-exist-xyz.json'));
+    const opt = mgr.getOptimizationConfig();
+    expect(opt.compressionTokenThreshold).toBe(0.7);
+    expect(opt.quality).toBe('balanced');
+    expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000);
+  });
+
+  it('overrides defaults with user config', () => {
+    const configPath = writeConfig(
+      JSON.stringify({
+        optimization: {
+          compressionTokenThreshold: 0.9,
+          quality: 'max',
+          modelTokenLimits: { 'custom-model': 500000 },
+        },
+      })
+    );
+    const mgr = new ConfigManager(configPath);
+    const opt = mgr.getOptimizationConfig();
+    expect(opt.compressionTokenThreshold).toBe(0.9);
+    expect(opt.quality).toBe('max');
+    expect(mgr.getModelTokenLimit('custom-model')).toBe(500000);
+    // Unrelated defaults still filled in
+    expect(opt.compressionPreserveThreshold).toBe(0.3);
+  });
+
+  it('falls back to defaults on invalid config', () => {
+    const configPath = writeConfig(
+      JSON.stringify({ optimization: { compressionTokenThreshold: 5 } })
+    );
+    const mgr = new ConfigManager(configPath);
+    // Invalid value (>1) is rejected by schema → defaults applied
+    expect(mgr.getOptimizationConfig().compressionTokenThreshold).toBe(0.7);
+  });
+
+  it('falls back to defaults on malformed JSON', () => {
+    const configPath = writeConfig('not json at all');
+    const mgr = new ConfigManager(configPath);
+    expect(mgr.getOptimizationConfig().quality).toBe('balanced');
+  });
+});
diff --git a/tests/unit/diff.test.ts b/tests/unit/diff.test.ts
new file mode 100644
index 0000000..0780b1f
--- /dev/null
+++ b/tests/unit/diff.test.ts
@@ -0,0 +1,33 @@
+import { describe, it, expect } from '@jest/globals';
+import { calculateDelta, applyDelta } from '../../src/utils/diff.js';
+
+describe('diff utils', () => {
+  it('returns empty delta when inputs are identical', () => {
+    expect(calculateDelta('hello', 'hello')).toBe('');
+  });
+
+  it('round-trips a simple change', () => {
+    const prev = 'line1\nline2\nline3\n';
+    const next = 'line1\nline2 changed\nline3\n';
+    const delta = calculateDelta(prev, next);
+    expect(delta).not.toBe('');
+    expect(applyDelta(prev, delta)).toBe(next);
+  });
+
+  it('applyDelta on an empty delta is a no-op', () => {
+    expect(applyDelta('anything', '')).toBe('anything');
+  });
+
+  it('produces a meaningfully smaller delta than the full content for small edits', () => {
+    const prev = 'a\n'.repeat(500);
+    const next = prev + 'appended line\n';
+    const delta = calculateDelta(prev, next);
+    expect(delta.length).toBeLessThan(next.length);
+  });
+
+  it('throws when the patch targets a different baseline than supplied', () => {
+    const patch = calculateDelta('original\ntext\n', 'original\nchanged\n');
+    // Applying the patch against completely different content fails.
+    expect(() => applyDelta('totally different input\n', patch)).toThrow();
+  });
+});
diff --git a/tests/unit/lru-cache.test.ts b/tests/unit/lru-cache.test.ts
new file mode 100644
index 0000000..6b7f2ac
--- /dev/null
+++ b/tests/unit/lru-cache.test.ts
@@ -0,0 +1,77 @@
+import { describe, it, expect } from '@jest/globals';
+import { LruCache } from '../../src/utils/lru-cache.js';
+
+describe('LruCache', () => {
+  it('rejects non-positive maxSize', () => {
+    expect(() => new LruCache<string, number>(0)).toThrow();
+    expect(() => new LruCache<string, number>(-1)).toThrow();
+  });
+
+  it('get returns undefined on miss and counts it', () => {
+    const cache = new LruCache<string, number>(2);
+    expect(cache.get('x')).toBeUndefined();
+    expect(cache.stats().misses).toBe(1);
+  });
+
+  it('set/get round-trips and counts hits', () => {
+    const cache = new LruCache<string, number>(2);
+    cache.set('a', 1);
+    expect(cache.get('a')).toBe(1);
+    expect(cache.stats().hits).toBe(1);
+  });
+
+  it('evicts the least recently used entry when full', () => {
+    const cache = new LruCache<string, number>(2);
+    cache.set('a', 1);
+    cache.set('b', 2);
+    cache.get('a');
+    cache.set('c', 3);
+
+    expect(cache.get('a')).toBe(1);
+    expect(cache.get('b')).toBeUndefined();
+    expect(cache.get('c')).toBe(3);
+    expect(cache.stats().evictions).toBe(1);
+  });
+
+  it('refreshes recency on get', () => {
+    const cache = new LruCache<string, number>(2);
+    cache.set('a', 1);
+    cache.set('b', 2);
+    cache.get('a');
+    cache.set('c', 3);
+
+    expect(cache.has('b')).toBe(false);
+    expect(cache.has('a')).toBe(true);
+  });
+
+  it('expires entries past the TTL', async () => {
+    const cache = new LruCache<string, number>(2, 20);
+    cache.set('a', 1);
+    await new Promise((r) => setTimeout(r, 30));
+    expect(cache.get('a')).toBeUndefined();
+    expect(cache.stats().expired).toBe(1);
+  });
+
+  it('prune removes expired entries', async () => {
+    const cache = new LruCache<string, number>(4, 20);
+    cache.set('a', 1);
+    cache.set('b', 2);
+    await new Promise((r) => setTimeout(r, 30));
+    cache.set('c', 3);
+    const removed = cache.prune();
+    expect(removed).toBe(2);
+    expect(cache.size).toBe(1);
+  });
+
+  it('stats.hitRate reflects hits / total', () => {
+    const cache = new LruCache<string, number>(2);
+    cache.set('a', 1);
+    cache.get('a');
+    cache.get('a');
+    cache.get('missing');
+    const stats = cache.stats();
+    expect(stats.hits).toBe(2);
+    expect(stats.misses).toBe(1);
+    expect(stats.hitRate).toBeCloseTo(2 / 3);
+  });
+});
diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts
new file mode 100644
index 0000000..19f2428
--- /dev/null
+++ b/tests/unit/session.test.ts
@@ -0,0 +1,76 @@
+import { describe, it, expect } from '@jest/globals';
+import { Session } from '../../src/core/session.js';
+import { SessionManager } from '../../src/core/session-manager.js';
+import { HeuristicTokenizer } from '../../src/core/tokenizers/heuristic-tokenizer.js';
+
+describe('Session', () => {
+  it('appends messages and tracks updatedAt', async () => {
+    const session = new Session();
+    const before = session.updatedAt;
+    await new Promise((r) => setTimeout(r, 5));
+    session.addMessage('user', 'hi');
+    expect(session.getHistory().length).toBe(1);
+    expect(session.updatedAt).toBeGreaterThan(before);
+  });
+
+  it('compressHistory is a no-op under the budget', async () => {
+    const session = new Session({ maxTokens: 10_000 });
+    session.addMessage('user', 'short');
+    const before = session.getHistory().length;
+    await session.compressHistory();
+    expect(session.getHistory().length).toBe(before);
+  });
+
+  it('compressHistory summarizes head when over budget', async () => {
+    const tokenizer = new HeuristicTokenizer();
+    const session = new Session({ maxTokens: 50, tokenizer });
+    // Each long message is several hundred chars → easily over 50 tokens.
+    for (let i = 0; i < 10; i++) {
+      session.addMessage('user', 'a'.repeat(400) + ` turn=${i}`);
+    }
+    expect((await session.getHistoryTokenCount()) > 50).toBe(true);
+    await session.compressHistory();
+    const history = session.getHistory();
+    expect(history[0].role).toBe('system');
+    expect(history[0].content.startsWith('[summary')).toBe(true);
+    expect(history.length).toBeLessThan(10);
+  });
+
+  it('snapshot round-trips', () => {
+    const session = new Session({ maxTokens: 42 });
+    session.addMessage('user', 'hello');
+    session.setFileContent('a.ts', 'const x = 1;');
+    const snapshot = session.toSnapshot();
+    const restored = Session.fromSnapshot(snapshot);
+    expect(restored.id).toBe(session.id);
+    expect(restored.maxTokens).toBe(42);
+    expect(restored.getFileContent('a.ts')).toBe('const x = 1;');
+    expect(restored.getHistory()[0].content).toBe('hello');
+  });
+});
+
+describe('SessionManager', () => {
+  it('create/get/delete lifecycle', () => {
+    const manager = new SessionManager();
+    const session = manager.createSession();
+    expect(manager.getSession(session.id)).toBe(session);
+    expect(manager.deleteSession(session.id)).toBe(true);
+    expect(manager.getSession(session.id)).toBeUndefined();
+  });
+
+  it('addMessage auto-compresses when over budget', async () => {
+    const tokenizer = new HeuristicTokenizer();
+    const manager = new SessionManager({ tokenizer, defaultMaxTokens: 30 });
+    const session = manager.createSession();
+    for (let i = 0; i < 8; i++) {
+      await manager.addMessage(session.id, 'user', 'x'.repeat(300));
+    }
+    const history = session.getHistory();
+    expect(history[0].content.startsWith('[summary')).toBe(true);
+  });
+
+  it('throws for unknown session ids', async () => {
+    const manager = new SessionManager();
+    await expect(manager.addMessage('bogus', 'user', 'hi')).rejects.toThrow();
+  });
+});
diff --git a/tests/unit/tokenizers.test.ts b/tests/unit/tokenizers.test.ts
new file mode 100644
index 0000000..ed2f2a3
--- /dev/null
+++ b/tests/unit/tokenizers.test.ts
@@ -0,0 +1,64 @@
+import { describe, it, expect } from '@jest/globals';
+import { HeuristicTokenizer, ContentType } from '../../src/core/tokenizers/heuristic-tokenizer.js';
+import { TokenizerFactory } from '../../src/core/tokenizers/tokenizer-factory.js';
+import { TiktokenTokenizer } from '../../src/core/tokenizers/tiktoken-tokenizer.js';
+
+describe('HeuristicTokenizer', () => {
+  it('detects JSON content', () => {
+    const json = '{"a": 1, "b": [1, 2, 3]}';
+    expect(HeuristicTokenizer.detectContentType(json)).toBe(ContentType.Json);
+  });
+
+  it('detects code content', () => {
+    const code = 'function foo() { return 42; }';
+    expect(HeuristicTokenizer.detectContentType(code)).toBe(ContentType.Code);
+  });
+
+  it('detects markdown content', () => {
+    const md = '# Heading\n\n- item one\n- item two';
+    expect(HeuristicTokenizer.detectContentType(md)).toBe(ContentType.Markdown);
+  });
+
+  it('defaults to text content', () => {
+    const text = 'Just a short plain sentence.';
+    expect(HeuristicTokenizer.detectContentType(text)).toBe(ContentType.Text);
+  });
+
+  it('uses a lower chars/token ratio for code than text', async () => {
+    const tokenizer = new HeuristicTokenizer();
+    const code = 'function foo() { return 42; }';
+    const text = 'A sentence of roughly similar length here.';
+    const codeTokens = await tokenizer.countTokens(code);
+    const textTokens = await tokenizer.countTokens(text);
+    // Code has ratio 2.5 vs text 4.0 → for strings of similar length, code tokens > text tokens.
+    expect(codeTokens / code.length).toBeGreaterThan(textTokens / text.length);
+  });
+
+  it('caches repeated inputs', async () => {
+    const tokenizer = new HeuristicTokenizer();
+    const input = 'cache me';
+    const first = await tokenizer.countTokens(input);
+    const second = await tokenizer.countTokens(input);
+    expect(first).toBe(second);
+  });
+});
+
+describe('TokenizerFactory', () => {
+  it('returns a TiktokenTokenizer for gpt-4', () => {
+    const t = TokenizerFactory.create('gpt-4');
+    expect(t).toBeInstanceOf(TiktokenTokenizer);
+    t.free();
+  });
+
+  it('returns a TiktokenTokenizer for Claude models (maps to gpt-4)', () => {
+    const t = TokenizerFactory.create('claude-opus-4-7');
+    expect(t).toBeInstanceOf(TiktokenTokenizer);
+    t.free();
+  });
+
+  it('falls back to HeuristicTokenizer for unknown models', () => {
+    const t = TokenizerFactory.create('some-unknown-local-model');
+    expect(t).toBeInstanceOf(HeuristicTokenizer);
+    t.free();
+  });
+});

From 8b7e4818abdd0fe8a138999967de2f113984c2a9 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:43:12 -0400
Subject: [PATCH 15/26] fix(core): production hardening for session, delta,
 storage, tokenizer factory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the audit gaps:

- SessionManager: atomic persist (tmp + rename), debounced writes
  (~250ms), error-isolated (disk-full no longer crashes the server),
  zod-validated load, session TTL eviction (30d), per-file size cap
  (10 MB), and flush() for clean shutdown.
- Session: getHistoryTokenCount now REQUIRES a tokenizer unless the
  caller opts into the char/4 fallback via allowCharHeuristic — the
  whole point of #124 is removing that heuristic. Added
  clearFileContent.
- context_delta tool: compute-delta and clear now route through
  SessionManager.updateFileState / clearFileState so file-state changes
  are durable across restarts instead of in-memory-only.
- TokenizerFactory: caches instances per model (one native tiktoken
  encoder instead of one per call) and exposes disposeAll() for
  shutdown. Added Gemini/Google routing path.
- GoogleAITokenizer: new — calls Google AI countTokens REST with a
  10s timeout, LRU-memoized, surfaces errors so the factory can pick
  a fallback.
- SqliteOptimizationStorage: default path now an absolute
  ~/.token-optimizer/optimization.db and the directory is created
  on demand. Relative "./optimization.db" was unusable when the MCP
  server launched from an unknown cwd.
- Server shutdown: sessionManager.flush(), TokenizerFactory.disposeAll(),
  and optimizationStorage.close() added to the cleanup pipeline.

Refs #121, #122, #124

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/analytics/optimization-storage.ts      |  15 +-
 src/core/session-manager.ts                | 185 ++++++++++++++++-----
 src/core/session.ts                        |  32 +++-
 src/core/tokenizers/google-ai-tokenizer.ts |  93 +++++++++++
 src/core/tokenizers/tokenizer-factory.ts   |  67 ++++++--
 src/server/index.ts                        |   3 +
 src/tools/context-delta-tool.ts            |  23 ++-
 tests/unit/session.test.ts                 |  22 ++-
 8 files changed, 372 insertions(+), 68 deletions(-)
 create mode 100644 src/core/tokenizers/google-ai-tokenizer.ts

diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts
index cd6991f..9509709 100644
--- a/src/analytics/optimization-storage.ts
+++ b/src/analytics/optimization-storage.ts
@@ -1,4 +1,7 @@
 import Database from 'better-sqlite3';
+import { existsSync, mkdirSync } from 'fs';
+import { homedir } from 'os';
+import { dirname, join } from 'path';
 import { CompressionEngine } from '../core/compression-engine.js';
 
 export interface OptimizationResult {
@@ -9,17 +12,25 @@ export interface OptimizationResult {
     tokensSaved: number;
 }
 
+export function getDefaultOptimizationDbPath(): string {
+    return join(homedir(), '.token-optimizer', 'optimization.db');
+}
+
 export class SqliteOptimizationStorage {
     private db: Database.Database | null = null;
     private readonly dbPath: string;
     private readonly compressionEngine: CompressionEngine;
 
-    constructor(dbPath: string = './optimization.db') {
-        this.dbPath = dbPath;
+    constructor(dbPath?: string) {
+        this.dbPath = dbPath ?? getDefaultOptimizationDbPath();
         this.compressionEngine = new CompressionEngine();
     }
 
     public initializeDatabase(): void {
+        const dir = dirname(this.dbPath);
+        if (!existsSync(dir)) {
+            mkdirSync(dir, { recursive: true });
+        }
         this.db = new Database(this.dbPath);
         this.db.pragma('journal_mode = WAL');
         this.db.exec(`
diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts
index 1cc8372..2a19edd 100644
--- a/src/core/session-manager.ts
+++ b/src/core/session-manager.ts
@@ -1,31 +1,68 @@
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
+import {
+    existsSync,
+    mkdirSync,
+    readFileSync,
+    writeFileSync,
+    renameSync,
+    unlinkSync,
+} from 'fs';
 import { dirname } from 'path';
+import { z } from 'zod';
 import {
     Session,
     SessionOptions,
-    SessionSnapshot,
     MessageRole,
 } from './session.js';
 import { ITokenizer } from './tokenizers/i-tokenizer.js';
 import { ISummarizer } from './summarization.js';
 
 /**
- * Singleton-style SessionManager — addresses issues #121 / #122.
+ * Persistent SessionManager — addresses issues #121 / #122.
  *
- * Persists all sessions to a single JSON file so they survive restarts.
- * When a message is added we check whether the session has exceeded its
- * token budget and, if so, auto-compress the history (#121).
+ * Production behaviors added after the audit:
+ *   - Atomic persistence: write to <path>.tmp then rename so a crash mid-
+ *     write never produces a corrupt sessions.json.
+ *   - Debounced persistence: rapid addMessage calls coalesce into one
+ *     disk write per PERSIST_DEBOUNCE_MS window.
+ *   - Error-isolated persist(): a disk-full or permission error is logged
+ *     and never bubbles up to crash the MCP server.
+ *   - Schema-validated load(): malformed persisted state is rejected with
+ *     a warning instead of being cast blindly.
+ *   - Size / expiry caps: sessions inactive past `sessionTtlMs` are
+ *     evicted on load, and no individual file state entry can exceed
+ *     `maxFileStateBytes`.
  */
 
+const PERSIST_DEBOUNCE_MS = 250;
+const DEFAULT_SESSION_TTL_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
+const DEFAULT_MAX_FILE_STATE_BYTES = 10 * 1024 * 1024; // 10 MB per file
+
+const MessageSchema = z.object({
+    role: z.enum(['system', 'user', 'assistant', 'tool']),
+    content: z.string(),
+    timestamp: z.number(),
+});
+
+const SessionSnapshotSchema = z.object({
+    id: z.string(),
+    history: z.array(MessageSchema),
+    fileState: z.record(z.string(), z.string()),
+    maxTokens: z.number(),
+    createdAt: z.number(),
+    updatedAt: z.number(),
+});
+
+const PersistedStateSchema = z.object({
+    sessions: z.array(SessionSnapshotSchema),
+});
+
 export interface SessionManagerOptions {
     persistencePath?: string;
     tokenizer?: ITokenizer;
     summarizer?: ISummarizer;
     defaultMaxTokens?: number;
-}
-
-interface PersistedState {
-    sessions: SessionSnapshot[];
+    sessionTtlMs?: number;
+    maxFileStateBytes?: number;
 }
 
 export class SessionManager {
@@ -34,12 +71,19 @@ export class SessionManager {
     private readonly tokenizer: ITokenizer | undefined;
     private readonly summarizer: ISummarizer | undefined;
     private readonly defaultMaxTokens: number | undefined;
+    private readonly sessionTtlMs: number;
+    private readonly maxFileStateBytes: number;
+    private pendingPersistTimer: NodeJS.Timeout | null = null;
+    private persistInFlight = false;
 
     constructor(options: SessionManagerOptions = {}) {
         this.persistencePath = options.persistencePath ?? null;
         this.tokenizer = options.tokenizer;
         this.summarizer = options.summarizer;
         this.defaultMaxTokens = options.defaultMaxTokens;
+        this.sessionTtlMs = options.sessionTtlMs ?? DEFAULT_SESSION_TTL_MS;
+        this.maxFileStateBytes =
+            options.maxFileStateBytes ?? DEFAULT_MAX_FILE_STATE_BYTES;
         if (this.persistencePath && existsSync(this.persistencePath)) {
             this.load();
         }
@@ -53,7 +97,7 @@ export class SessionManager {
             ...options,
         });
         this.sessions.set(session.id, session);
-        this.persist();
+        this.schedulePersist();
         return session;
     }
 
@@ -68,33 +112,24 @@ export class SessionManager {
     public deleteSession(id: string): boolean {
         const removed = this.sessions.delete(id);
         if (removed) {
-            this.persist();
+            this.schedulePersist();
         }
         return removed;
     }
 
-    /**
-     * Add a message to the session and auto-compress the history if the
-     * token budget is exceeded (#121).
-     *
-     * Returns the post-add token count of the session.
-     */
     public async addMessage(
         sessionId: string,
         role: MessageRole,
         content: string
     ): Promise<number> {
-        const session = this.sessions.get(sessionId);
-        if (!session) {
-            throw new Error(`Unknown session: ${sessionId}`);
-        }
+        const session = this.requireSession(sessionId);
         session.addMessage(role, content);
         const currentTokens = await session.getHistoryTokenCount();
         let finalTokens = currentTokens;
         if (currentTokens > session.maxTokens) {
             finalTokens = await session.compressHistory();
         }
-        this.persist();
+        this.schedulePersist();
         return finalTokens;
     }
 
@@ -103,26 +138,92 @@ export class SessionManager {
         filePath: string,
         content: string
     ): void {
-        const session = this.sessions.get(sessionId);
-        if (!session) {
-            throw new Error(`Unknown session: ${sessionId}`);
+        const session = this.requireSession(sessionId);
+        if (Buffer.byteLength(content, 'utf8') > this.maxFileStateBytes) {
+            throw new Error(
+                `Session file state content exceeds ${this.maxFileStateBytes} bytes for ${filePath}`
+            );
         }
         session.setFileContent(filePath, content);
-        this.persist();
+        this.schedulePersist();
+    }
+
+    public clearFileState(sessionId: string, filePath: string): void {
+        const session = this.requireSession(sessionId);
+        session.clearFileContent(filePath);
+        this.schedulePersist();
+    }
+
+    /**
+     * Flush any pending debounced persist. Call this from the host's
+     * shutdown handler so the last writes survive.
+     */
+    public async flush(): Promise<void> {
+        if (this.pendingPersistTimer) {
+            clearTimeout(this.pendingPersistTimer);
+            this.pendingPersistTimer = null;
+        }
+        this.persistNow();
     }
 
-    private persist(): void {
+    private requireSession(id: string): Session {
+        const session = this.sessions.get(id);
+        if (!session) {
+            throw new Error(`Unknown session: ${id}`);
+        }
+        return session;
+    }
+
+    private schedulePersist(): void {
         if (!this.persistencePath) {
             return;
         }
-        const state: PersistedState = {
-            sessions: this.listSessions().map((s) => s.toSnapshot()),
-        };
-        const dir = dirname(this.persistencePath);
-        if (!existsSync(dir)) {
-            mkdirSync(dir, { recursive: true });
+        if (this.pendingPersistTimer) {
+            return;
+        }
+        this.pendingPersistTimer = setTimeout(() => {
+            this.pendingPersistTimer = null;
+            this.persistNow();
+        }, PERSIST_DEBOUNCE_MS);
+        // Don't keep the event loop alive just for persistence.
+        if (typeof this.pendingPersistTimer.unref === 'function') {
+            this.pendingPersistTimer.unref();
+        }
+    }
+
+    private persistNow(): void {
+        if (!this.persistencePath || this.persistInFlight) {
+            return;
+        }
+        this.persistInFlight = true;
+        try {
+            const state = {
+                sessions: this.listSessions().map((s) => s.toSnapshot()),
+            };
+            const dir = dirname(this.persistencePath);
+            if (!existsSync(dir)) {
+                mkdirSync(dir, { recursive: true });
+            }
+            const tmpPath = `${this.persistencePath}.tmp`;
+            writeFileSync(tmpPath, JSON.stringify(state, null, 2));
+            renameSync(tmpPath, this.persistencePath);
+        } catch (error) {
+            const message =
+                error instanceof Error ? error.message : String(error);
+            console.warn(
+                `SessionManager: failed to persist to ${this.persistencePath}: ${message}`
+            );
+            // Best-effort cleanup of the tmp file
+            if (this.persistencePath) {
+                try {
+                    unlinkSync(`${this.persistencePath}.tmp`);
+                } catch {
+                    // Ignore — tmp file may not exist.
+                }
+            }
+        } finally {
+            this.persistInFlight = false;
         }
-        writeFileSync(this.persistencePath, JSON.stringify(state, null, 2));
     }
 
     private load(): void {
@@ -131,11 +232,19 @@ export class SessionManager {
         }
         try {
             const raw = readFileSync(this.persistencePath, 'utf-8');
-            const parsed = JSON.parse(raw) as PersistedState;
-            if (!parsed || !Array.isArray(parsed.sessions)) {
+            const json = JSON.parse(raw);
+            const parsed = PersistedStateSchema.safeParse(json);
+            if (!parsed.success) {
+                console.warn(
+                    `SessionManager: invalid persisted state at ${this.persistencePath}, discarding.`
+                );
                 return;
             }
-            for (const snapshot of parsed.sessions) {
+            const now = Date.now();
+            for (const snapshot of parsed.data.sessions) {
+                if (now - snapshot.updatedAt > this.sessionTtlMs) {
+                    continue; // Expired session — drop.
+                }
                 const session = Session.fromSnapshot(snapshot, {
                     tokenizer: this.tokenizer,
                     summarizer: this.summarizer,
diff --git a/src/core/session.ts b/src/core/session.ts
index 78ebd96..1830b5d 100644
--- a/src/core/session.ts
+++ b/src/core/session.ts
@@ -37,10 +37,17 @@ export interface SessionOptions {
     preserveTailRatio?: number;
     tokenizer?: ITokenizer;
     summarizer?: ISummarizer;
+    /**
+     * When true, getHistoryTokenCount may fall back to a character/4
+     * heuristic if no tokenizer is supplied. Production code should
+     * always pass a real tokenizer and leave this false (the default).
+     */
+    allowCharHeuristic?: boolean;
 }
 
 const DEFAULT_MAX_TOKENS = 100_000;
 const DEFAULT_PRESERVE_TAIL_RATIO = 0.3;
+const CHAR_HEURISTIC_RATIO = 4;
 
 export class Session {
     public readonly id: string;
@@ -53,6 +60,7 @@ export class Session {
     private readonly preserveTailRatio: number;
     private readonly tokenizer: ITokenizer | null;
     private readonly summarizer: ISummarizer;
+    private readonly allowCharHeuristic: boolean;
 
     constructor(options: SessionOptions = {}) {
         this.id = options.id ?? randomUUID();
@@ -60,6 +68,7 @@ export class Session {
         this.preserveTailRatio = options.preserveTailRatio ?? DEFAULT_PRESERVE_TAIL_RATIO;
         this.tokenizer = options.tokenizer ?? null;
         this.summarizer = options.summarizer ?? new TruncatingSummarizer();
+        this.allowCharHeuristic = options.allowCharHeuristic ?? false;
         this.createdAt = Date.now();
         this.updatedAt = this.createdAt;
     }
@@ -88,14 +97,31 @@ export class Session {
         this.updatedAt = Date.now();
     }
 
+    public clearFileContent(filePath: string): void {
+        if (filePath in this.fileState) {
+            delete this.fileState[filePath];
+            this.updatedAt = Date.now();
+        }
+    }
+
     /**
-     * Total token count of the current history. Uses the injected tokenizer
-     * when available; otherwise falls back to the character/4 heuristic.
+     * Total token count of the current history.
+     *
+     * Requires a tokenizer unless the caller opted into the character/4
+     * heuristic via `allowCharHeuristic: true`. We default to requiring a
+     * tokenizer because #124's whole point is eliminating char/4.
      */
     public async getHistoryTokenCount(): Promise<number> {
         if (!this.tokenizer) {
+            if (!this.allowCharHeuristic) {
+                throw new Error(
+                    'Session.getHistoryTokenCount requires a tokenizer. ' +
+                        'Construct the Session with TokenizerFactory.create(...) ' +
+                        'or pass allowCharHeuristic: true to opt into the fallback.'
+                );
+            }
             return this.history.reduce(
-                (acc, m) => acc + Math.ceil(m.content.length / 4),
+                (acc, m) => acc + Math.ceil(m.content.length / CHAR_HEURISTIC_RATIO),
                 0
             );
         }
diff --git a/src/core/tokenizers/google-ai-tokenizer.ts b/src/core/tokenizers/google-ai-tokenizer.ts
new file mode 100644
index 0000000..19ea381
--- /dev/null
+++ b/src/core/tokenizers/google-ai-tokenizer.ts
@@ -0,0 +1,93 @@
+import { ITokenizer } from './i-tokenizer.js';
+import { LruCache } from '../../utils/lru-cache.js';
+
+const DEFAULT_CACHE_SIZE = 500;
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+const DEFAULT_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models';
+const REQUEST_TIMEOUT_MS = 10_000;
+
+/**
+ * Remote tokenizer that uses Google AI's countTokens REST endpoint —
+ * addresses issue #124's GoogleAITokenizer requirement.
+ *
+ * Network calls are memoized in an LruCache with a TTL so repeated
+ * token counts don't re-hit the API. If the request fails (network,
+ * 4xx, 5xx) we surface the error to the caller — TokenCounter above
+ * is responsible for deciding whether to fall back to a local
+ * tokenizer.
+ */
+export class GoogleAITokenizer implements ITokenizer {
+    public readonly modelName: string;
+    private readonly apiKey: string;
+    private readonly endpoint: string;
+    private readonly cache: LruCache<string, number>;
+    private readonly timeoutMs: number;
+
+    constructor(
+        modelName: string,
+        apiKey: string,
+        options: {
+            endpoint?: string;
+            cache?: LruCache<string, number>;
+            timeoutMs?: number;
+        } = {}
+    ) {
+        if (!apiKey) {
+            throw new Error('GoogleAITokenizer requires an apiKey');
+        }
+        this.modelName = modelName;
+        this.apiKey = apiKey;
+        this.endpoint = options.endpoint ?? DEFAULT_ENDPOINT;
+        this.cache =
+            options.cache ??
+            new LruCache<string, number>(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS);
+        this.timeoutMs = options.timeoutMs ?? REQUEST_TIMEOUT_MS;
+    }
+
+    public async countTokens(text: string): Promise<number> {
+        const cached = this.cache.get(text);
+        if (cached !== undefined) {
+            return cached;
+        }
+
+        const url = `${this.endpoint}/${encodeURIComponent(
+            this.modelName
+        )}:countTokens?key=${encodeURIComponent(this.apiKey)}`;
+
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
+
+        try {
+            const response = await fetch(url, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    contents: [{ parts: [{ text }] }],
+                }),
+                signal: controller.signal,
+            });
+
+            if (!response.ok) {
+                const body = await response.text().catch(() => '');
+                throw new Error(
+                    `Google AI countTokens failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}`
+                );
+            }
+
+            const data = (await response.json()) as { totalTokens?: number };
+            if (typeof data.totalTokens !== 'number') {
+                throw new Error(
+                    `Google AI countTokens returned unexpected payload: ${JSON.stringify(data).slice(0, 200)}`
+                );
+            }
+            this.cache.set(text, data.totalTokens);
+            return data.totalTokens;
+        } finally {
+            clearTimeout(timeout);
+        }
+    }
+
+    public free(): void {
+        this.cache.clear();
+    }
+}
diff --git a/src/core/tokenizers/tokenizer-factory.ts b/src/core/tokenizers/tokenizer-factory.ts
index ea4b360..d4d00b3 100644
--- a/src/core/tokenizers/tokenizer-factory.ts
+++ b/src/core/tokenizers/tokenizer-factory.ts
@@ -1,33 +1,72 @@
 import { ITokenizer } from './i-tokenizer.js';
 import { TiktokenTokenizer } from './tiktoken-tokenizer.js';
 import { HeuristicTokenizer } from './heuristic-tokenizer.js';
+import { GoogleAITokenizer } from './google-ai-tokenizer.js';
 
+/**
+ * Pluggable tokenizer factory — addresses issues #123 / #124.
+ *
+ * Resolution order:
+ *   1. Google AI models (`gemini-*`) — GoogleAITokenizer when
+ *      GOOGLE_AI_API_KEY is set, else HeuristicTokenizer.
+ *   2. Tiktoken-compatible families (GPT, Claude) — TiktokenTokenizer.
+ *   3. HeuristicTokenizer fallback for everything else.
+ *
+ * Instances are cached per model name so callers don't pay for repeated
+ * allocation of the native tiktoken encoder, and so their per-tokenizer
+ * LRU caches can be shared across call sites.
+ */
 export class TokenizerFactory {
-    /**
-     * Create a tokenizer for the given model name.
-     *
-     * Resolution order:
-     * 1. Tiktoken for GPT-4 / GPT-3.5-turbo / Claude-family models.
-     * 2. HeuristicTokenizer as the content-aware fallback.
-     *
-     * Callers that already hold a tokenizer should prefer reusing it —
-     * construction allocates a tiktoken encoder (native resource).
-     */
+    private static readonly instances = new Map<string, ITokenizer>();
+
     public static create(modelName: string): ITokenizer {
-        if (TiktokenTokenizer.supports(modelName)) {
-            return new TiktokenTokenizer(modelName);
+        const cached = TokenizerFactory.instances.get(modelName);
+        if (cached) {
+            return cached;
         }
-        return new HeuristicTokenizer(modelName);
+        const tokenizer = TokenizerFactory.build(modelName);
+        TokenizerFactory.instances.set(modelName, tokenizer);
+        return tokenizer;
     }
 
-    /** Create a tokenizer using the active model environment variables. */
     public static createFromEnv(): ITokenizer {
         const modelName =
             process.env.CLAUDE_MODEL ||
             process.env.ANTHROPIC_MODEL ||
             process.env.OPENAI_MODEL ||
+            process.env.GOOGLE_AI_MODEL ||
             process.env.TOKEN_OPTIMIZER_MODEL ||
             'gpt-4';
         return TokenizerFactory.create(modelName);
     }
+
+    /**
+     * Release every cached tokenizer. Call this on server shutdown so
+     * native tiktoken encoders are freed.
+     */
+    public static disposeAll(): void {
+        for (const tokenizer of TokenizerFactory.instances.values()) {
+            try {
+                tokenizer.free();
+            } catch {
+                // Ignore — best-effort cleanup.
+            }
+        }
+        TokenizerFactory.instances.clear();
+    }
+
+    private static build(modelName: string): ITokenizer {
+        const lower = modelName.toLowerCase();
+        if (lower.startsWith('gemini') || lower.includes('google')) {
+            const apiKey = process.env.GOOGLE_AI_API_KEY;
+            if (apiKey) {
+                return new GoogleAITokenizer(modelName, apiKey);
+            }
+            return new HeuristicTokenizer(modelName);
+        }
+        if (TiktokenTokenizer.supports(modelName)) {
+            return new TiktokenTokenizer(modelName);
+        }
+        return new HeuristicTokenizer(modelName);
+    }
 }
diff --git a/src/server/index.ts b/src/server/index.ts
index 116ca6c..63f6cc1 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -2272,6 +2272,9 @@ async function cleanup() {
     },
     { fn: () => cache?.close(), name: 'closing cache' },
     { fn: () => tokenCounter?.free(), name: 'freeing tokenCounter' },
+    { fn: async () => await sessionManager.flush(), name: 'flushing sessions' },
+    { fn: () => TokenizerFactory.disposeAll(), name: 'disposing tokenizers' },
+    { fn: () => optimizationStorage.close(), name: 'closing optimization storage' },
     // Note: predictiveCache and cacheWarmup do not implement dispose() methods
     // Removed dispose() calls to prevent runtime errors during cleanup
   ]);
diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts
index 28f5f35..33f6595 100644
--- a/src/tools/context-delta-tool.ts
+++ b/src/tools/context-delta-tool.ts
@@ -73,7 +73,14 @@ export class ContextDeltaTool {
             return { success: false, error: `Unknown session: ${sessionId}` };
         }
         const previous = session.getFileContent(filePath);
-        session.setFileContent(filePath, currentContent);
+
+        try {
+            // Goes through SessionManager so the new state hits disk.
+            this.sessionManager.updateFileState(sessionId, filePath, currentContent);
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: message };
+        }
 
         if (previous === undefined) {
             return {
@@ -112,15 +119,13 @@ export class ContextDeltaTool {
     }
 
     private clear(options: ContextDeltaOptions): ContextDeltaResponse {
-        const session = this.sessionManager.getSession(options.sessionId);
-        if (!session) {
-            return {
-                success: false,
-                error: `Unknown session: ${options.sessionId}`,
-            };
+        try {
+            this.sessionManager.clearFileState(options.sessionId, options.filePath);
+            return { success: true };
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: message };
         }
-        session.setFileContent(options.filePath, '');
-        return { success: true };
     }
 }
 
diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts
index 19f2428..10237fe 100644
--- a/tests/unit/session.test.ts
+++ b/tests/unit/session.test.ts
@@ -5,7 +5,7 @@ import { HeuristicTokenizer } from '../../src/core/tokenizers/heuristic-tokenize
 
 describe('Session', () => {
   it('appends messages and tracks updatedAt', async () => {
-    const session = new Session();
+    const session = new Session({ allowCharHeuristic: true });
     const before = session.updatedAt;
     await new Promise((r) => setTimeout(r, 5));
     session.addMessage('user', 'hi');
@@ -14,13 +14,31 @@ describe('Session', () => {
   });
 
   it('compressHistory is a no-op under the budget', async () => {
-    const session = new Session({ maxTokens: 10_000 });
+    const session = new Session({
+      maxTokens: 10_000,
+      allowCharHeuristic: true,
+    });
     session.addMessage('user', 'short');
     const before = session.getHistory().length;
     await session.compressHistory();
     expect(session.getHistory().length).toBe(before);
   });
 
+  it('getHistoryTokenCount throws without a tokenizer when heuristic is off', async () => {
+    const session = new Session();
+    session.addMessage('user', 'hi');
+    await expect(session.getHistoryTokenCount()).rejects.toThrow(
+      /requires a tokenizer/
+    );
+  });
+
+  it('clearFileContent removes the entry', () => {
+    const session = new Session();
+    session.setFileContent('a.ts', 'const x = 1;');
+    session.clearFileContent('a.ts');
+    expect(session.getFileContent('a.ts')).toBeUndefined();
+  });
+
   it('compressHistory summarizes head when over budget', async () => {
     const tokenizer = new HeuristicTokenizer();
     const session = new Session({ maxTokens: 50, tokenizer });

From 4e3f119e20869a22689042649bdb3407af001b4a Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:46:15 -0400
Subject: [PATCH 16/26] feat(tokenizers): refactor tokencounter to delegate via
 tokenizerfactory (#124)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Completes the tokenization framework requirements:

- TokenCounter now delegates tokenization to the pluggable
  TokenizerFactory; the sync count() path still uses a local tiktoken
  encoder for tiktoken-compatible models (we need raw token arrays for
  truncate()), and a new countAsync() path routes through the factory
  for remote models (Google AI). Deeper callers that used the existing
  sync surface keep working unchanged.
- count_tokens MCP tool accepts an optional modelName parameter and
  returns the resolved model in the response. When omitted, the server-
  configured TokenCounter is used.
- TokenCounter.free() no longer tears down the factory-owned tokenizer
  — the factory owns that lifecycle (#16). Local tiktoken encoders are
  freed as before.

Refs #124

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/core/token-counter.ts      | 195 ++++++++++++---------------------
 src/server/index.ts            |  24 +++-
 src/validation/tool-schemas.ts |   7 ++
 3 files changed, 97 insertions(+), 129 deletions(-)

diff --git a/src/core/token-counter.ts b/src/core/token-counter.ts
index d4bd471..80dd895 100644
--- a/src/core/token-counter.ts
+++ b/src/core/token-counter.ts
@@ -1,4 +1,7 @@
 import { encoding_for_model, Tiktoken } from 'tiktoken';
+import { TokenizerFactory } from './tokenizers/tokenizer-factory.js';
+import { ITokenizer } from './tokenizers/i-tokenizer.js';
+import { TiktokenTokenizer } from './tokenizers/tiktoken-tokenizer.js';
 
 export interface TokenCountResult {
   tokens: number;
@@ -6,114 +9,95 @@ export interface TokenCountResult {
   estimatedCost?: number;
 }
 
+/**
+ * TokenCounter — delegates tokenization to the pluggable
+ * TokenizerFactory from issue #124 while preserving the callable
+ * surface (`count`, `countBatch`, `estimate`, `calculateSavings`,
+ * `calculateCacheSavings`, `exceedsLimit`, `truncate`,
+ * `getTokenCharRatio`, `free`) the rest of the codebase relies on.
+ *
+ * Truncation still uses a local tiktoken encoder because the
+ * ITokenizer contract doesn't expose the raw token array — we
+ * keep one for GPT-4-family models and otherwise degrade to
+ * character-based truncation.
+ */
 export class TokenCounter {
-  private encoder: Tiktoken;
-  private readonly model: string;
+  private readonly tokenizer: ITokenizer;
+  private readonly encoder: Tiktoken | null;
+  public readonly model: string;
 
   constructor(model?: string) {
-    // Auto-detect model from environment or use provided model
-    // Claude Code sets CLAUDE_MODEL env var with the active model
-    // Falls back to GPT-4 as universal approximation
     this.model =
       model ||
       process.env.CLAUDE_MODEL ||
       process.env.ANTHROPIC_MODEL ||
+      process.env.OPENAI_MODEL ||
+      process.env.GOOGLE_AI_MODEL ||
       'gpt-4';
 
-    // Map Claude models to closest tiktoken equivalent
-    // Claude uses similar tokenization to GPT-4, so it's a good approximation
-    const tokenModel = this.mapToTiktokenModel(this.model);
-
-    // Initialize tiktoken encoder
-    this.encoder = encoding_for_model(tokenModel);
-  }
-
-  /**
-   * Map Claude/Anthropic models to tiktoken model names
-   */
-  private mapToTiktokenModel(model: string): 'gpt-4' | 'gpt-3.5-turbo' {
-    const lowerModel = model.toLowerCase();
-
-    // Claude models use GPT-4 tokenizer as closest approximation
-    if (
-      lowerModel.includes('claude') ||
-      lowerModel.includes('sonnet') ||
-      lowerModel.includes('opus') ||
-      lowerModel.includes('haiku')
-    ) {
-      return 'gpt-4';
-    }
-
-    // GPT-4 variants
-    if (lowerModel.includes('gpt-4')) {
-      return 'gpt-4';
+    this.tokenizer = TokenizerFactory.create(this.model);
+
+    // Keep a local encoder for tiktoken-compatible models — truncate()
+    // needs to slice the raw token array, which the ITokenizer interface
+    // intentionally does not expose.
+    if (TiktokenTokenizer.supports(this.model)) {
+      this.encoder = encoding_for_model(
+        TiktokenTokenizer.mapToTiktokenModel(this.model)
+      );
+    } else {
+      this.encoder = null;
     }
-
-    // GPT-3.5 variants
-    if (lowerModel.includes('gpt-3.5') || lowerModel.includes('gpt3.5')) {
-      return 'gpt-3.5-turbo';
-    }
-
-    // Default to GPT-4 for unknown models
-    return 'gpt-4';
   }
 
   /**
-   * Count tokens in text
+   * Count tokens in text (synchronous).
+   *
+   * Synchronous on tiktoken-backed tokenizers, which is all we expose
+   * externally via Anthropic/OpenAI. Remote tokenizers (Google AI) are
+   * reachable via `countAsync`.
    */
   count(text: string): TokenCountResult {
-    const tokens = this.encoder.encode(text);
-
+    if (this.encoder) {
+      return {
+        tokens: this.encoder.encode(text).length,
+        characters: text.length,
+      };
+    }
+    // Fall back to the synchronous estimate so non-tiktoken paths keep
+    // working. Callers that want exact remote counts should use
+    // countAsync.
     return {
-      tokens: tokens.length,
+      tokens: this.estimate(text),
       characters: text.length,
     };
   }
 
   /**
-   * Count tokens in multiple texts
+   * Async token counting through the pluggable tokenizer — accurate for
+   * both local tiktoken and remote Google AI paths.
    */
+  async countAsync(text: string): Promise<TokenCountResult> {
+    const tokens = await this.tokenizer.countTokens(text);
+    return { tokens, characters: text.length };
+  }
+
   countBatch(texts: string[]): TokenCountResult {
     let totalTokens = 0;
     let totalCharacters = 0;
-
     for (const text of texts) {
       const result = this.count(text);
       totalTokens += result.tokens;
       totalCharacters += result.characters;
     }
-
-    return {
-      tokens: totalTokens,
-      characters: totalCharacters,
-    };
+    return { tokens: totalTokens, characters: totalCharacters };
   }
 
-  /**
-   * Estimate token count without encoding (faster, less accurate)
-   */
   estimate(text: string): number {
-    // Rough estimate: ~4 characters per token on average
+    // Rough fallback: ~4 characters per token. Only used when no
+    // tiktoken encoder is available for this model.
     return Math.ceil(text.length / 4);
   }
 
-  /**
-   * Calculate token savings based on context window management
-   *
-   * @param originalText - The original text content
-   * @param contextTokens - Number of tokens remaining in LLM context (default: 0 for full caching)
-   * @returns Token savings calculation
-   *
-   * @remarks
-   * This method measures context window optimization, NOT compression ratio.
-   * When content is cached externally (SQLite, Redis, etc.), it's completely
-   * removed from the LLM's context window, resulting in 100% token savings.
-   *
-   * Use cases:
-   * - External caching: contextTokens = 0 (100% savings)
-   * - Metadata-only: contextTokens = tokens in metadata (e.g., 8)
-   * - Summarization: contextTokens = tokens in summary (e.g., 50)
-   */
   calculateSavings(
     originalText: string,
     contextTokens: number = 0
@@ -136,36 +120,6 @@ export class TokenCounter {
     };
   }
 
-  /**
-   * Calculate context window savings for externally cached content
-   *
-   * @param originalText - The original text content being cached
-   * @returns Token savings calculation with 100% savings
-   *
-   * @remarks
-   * When content is compressed and stored in an external cache (SQLite, Redis, etc.),
-   * it's completely removed from the LLM's context window. The compressed/encoded
-   * data is NEVER sent to the LLM, so we measure 100% token savings.
-   *
-   * Key insight: We're measuring CONTEXT WINDOW CLEARANCE, not compression ratio.
-   * - ✅ Content removed from LLM context (saves tokens)
-   * - ✅ Storage compressed (saves disk space)
-   * - ❌ Don't count tokens in compressed data (it's not sent to LLM!)
-   *
-   * @example
-   * ```typescript
-   * const tokenCounter = new TokenCounter();
-   * const content = "Large file content...";
-   * const compressed = compress(content);
-   *
-   * // Store in external cache
-   * await cache.set(key, compressed);
-   *
-   * // Calculate context window savings
-   * const savings = tokenCounter.calculateCacheSavings(content);
-   * // Returns: { originalTokens: 250, contextTokens: 0, tokensSaved: 250, percentSaved: 100 }
-   * ```
-   */
   calculateCacheSavings(originalText: string): {
     originalTokens: number;
     contextTokens: number;
@@ -173,54 +127,45 @@ export class TokenCounter {
     percentSaved: number;
   } {
     const original = this.count(originalText);
-
     return {
       originalTokens: original.tokens,
-      contextTokens: 0, // External cache - nothing in context
-      tokensSaved: original.tokens, // 100% of original tokens saved
-      percentSaved: 100, // Always 100% for external caching
+      contextTokens: 0,
+      tokensSaved: original.tokens,
+      percentSaved: 100,
     };
   }
 
-  /**
-   * Check if text exceeds token limit
-   */
   exceedsLimit(text: string, limit: number): boolean {
-    const result = this.count(text);
-    return result.tokens > limit;
+    return this.count(text).tokens > limit;
   }
 
-  /**
-   * Truncate text to fit within token limit
-   */
   truncate(text: string, maxTokens: number): string {
+    if (!this.encoder) {
+      // No raw-token access for this model — fall back to a
+      // char-proportional slice using the estimate ratio.
+      const approxChars = maxTokens * 4;
+      return text.length <= approxChars ? text : text.slice(0, approxChars);
+    }
     const tokens = this.encoder.encode(text);
-
     if (tokens.length <= maxTokens) {
       return text;
     }
-
     const truncatedTokens = tokens.slice(0, maxTokens);
     const decoded = this.encoder.decode(truncatedTokens);
-
-    // Handle potential type issues with decode return value
     return typeof decoded === 'string'
       ? decoded
       : new TextDecoder().decode(decoded);
   }
 
-  /**
-   * Get token-to-character ratio for text
-   */
   getTokenCharRatio(text: string): number {
     const result = this.count(text);
     return result.tokens > 0 ? result.characters / result.tokens : 0;
   }
 
-  /**
-   * Free the encoder resources
-   */
   free(): void {
-    this.encoder.free();
+    if (this.encoder) {
+      this.encoder.free();
+    }
+    // TokenizerFactory owns the tokenizer's lifecycle (instance cache).
   }
 }
diff --git a/src/server/index.ts b/src/server/index.ts
index 63f6cc1..89eb230 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -445,7 +445,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
       {
         name: 'count_tokens',
         description:
-          'Count tokens in text using tiktoken. Useful for understanding token usage before and after optimization.',
+          'Count tokens in text using the pluggable tokenizer framework (#124). Picks a model-specific tokenizer (tiktoken for GPT/Claude, Google AI REST for Gemini, content-aware heuristic fallback).',
         inputSchema: {
           type: 'object',
           properties: {
@@ -453,6 +453,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
               type: 'string',
               description: 'Text to count tokens for',
             },
+            modelName: {
+              type: 'string',
+              description:
+                'Model name (e.g. gpt-4, claude-opus-4-7, gemini-2.5-flash). Defaults to the server-configured model when omitted.',
+            },
           },
           required: ['text'],
         },
@@ -864,14 +869,25 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       }
 
       case 'count_tokens': {
-        const { text } = args as { text: string };
-        const result = tokenCounter.count(text);
+        const { text, modelName } = args as {
+          text: string;
+          modelName?: string;
+        };
+        const counter = modelName ? new TokenCounter(modelName) : tokenCounter;
+        const result = modelName
+          ? await counter.countAsync(text)
+          : counter.count(text);
+        if (modelName) {
+          // Model-specific counters are one-shot — free the local
+          // tiktoken encoder (if any) that this call allocated.
+          counter.free();
+        }
 
         return {
           content: [
             {
               type: 'text',
-              text: JSON.stringify(result, null, 2),
+              text: JSON.stringify({ ...result, model: modelName ?? counter.model }, null, 2),
             },
           ],
         };
diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts
index c109989..cad168b 100644
--- a/src/validation/tool-schemas.ts
+++ b/src/validation/tool-schemas.ts
@@ -23,6 +23,13 @@ export const GetCachedSchema = z.object({
 // 3. count_tokens
 export const CountTokensSchema = z.object({
   text: z.string().describe('Text to count tokens for'),
+  modelName: z
+    .string()
+    .optional()
+    .describe(
+      'Model name (e.g. gpt-4, claude-opus-4-7, gemini-2.5-flash). ' +
+        'Defaults to the server-configured model when omitted.'
+    ),
 });
 
 // 4. compress_text

From 2a1cab1599a88588c3e55bb273dd660745f8a71b Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:48:49 -0400
Subject: [PATCH 17/26] feat(config): cachesettings + chatcompression + default
 file + wiring (#120)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Completes the remaining #120 acceptance criteria:

- Adds OptimizationConfig.cacheSettings { maxSize, ttlSeconds } and
  OptimizationConfig.chatCompression { enabled, tokenLimit, strategy }
  — the two sections called out in the issue example config.
- ConfigManager writes ~/.token-optimizer/config.json with DEFAULT_CONFIG
  on first run so the user can edit a real file (was previously
  in-memory-only). writeDefaults can be disabled for tests.
- User config sub-objects deep-merge: overriding `cacheSettings.maxSize`
  no longer wipes out `ttlSeconds`. Zod schema is partial at every
  depth via OptimizationConfigUserSchema.
- server/index.ts: loads ConfigManager on startup and derives the
  SessionManager's defaultMaxTokens from
  chatCompression.tokenLimit ?? modelTokenLimit × compressionTokenThreshold,
  so every Session created through the manager respects the configured
  compression budget.

Refs #120, #121

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/core/config.ts        | 87 +++++++++++++++++++++++++++++++++++----
 src/core/types.ts         | 14 +++++++
 src/server/index.ts       | 17 +++++++-
 tests/unit/config.test.ts | 46 ++++++++++++++++-----
 4 files changed, 145 insertions(+), 19 deletions(-)

diff --git a/src/core/config.ts b/src/core/config.ts
index 57bee2a..2e94239 100644
--- a/src/core/config.ts
+++ b/src/core/config.ts
@@ -4,9 +4,9 @@
 
 import { z } from 'zod';
 import { HypercontextConfig, OptimizationConfig } from './types.js';
-import { readFileSync, existsSync } from 'fs';
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
 import { homedir } from 'os';
-import { join } from 'path';
+import { dirname, join } from 'path';
 
 const DEFAULT_OPTIMIZATION: OptimizationConfig = {
   compressionTokenThreshold: 0.7,
@@ -26,6 +26,14 @@ const DEFAULT_OPTIMIZATION: OptimizationConfig = {
   },
   minOutputSizeBytes: 500,
   quality: 'balanced',
+  cacheSettings: {
+    maxSize: 1000,
+    ttlSeconds: 3600,
+  },
+  chatCompression: {
+    enabled: true,
+    strategy: 'summarize',
+  },
 };
 
 const DEFAULT_CONFIG: HypercontextConfig = {
@@ -62,6 +70,17 @@ const DEFAULT_CONFIG: HypercontextConfig = {
   optimization: DEFAULT_OPTIMIZATION,
 };
 
+const CacheSettingsSchema = z.object({
+  maxSize: z.number().int().positive(),
+  ttlSeconds: z.number().int().nonnegative(),
+});
+
+const ChatCompressionSchema = z.object({
+  enabled: z.boolean(),
+  tokenLimit: z.number().int().positive().optional(),
+  strategy: z.enum(['summarize', 'truncate']),
+});
+
 const OptimizationConfigSchema = z.object({
   compressionTokenThreshold: z.number().min(0).max(1),
   compressionPreserveThreshold: z.number().min(0).max(1),
@@ -69,6 +88,18 @@ const OptimizationConfigSchema = z.object({
   modelTokenLimits: z.record(z.string(), z.number().int().positive()),
   minOutputSizeBytes: z.number().int().nonnegative(),
   quality: z.enum(['fast', 'balanced', 'max']),
+  cacheSettings: CacheSettingsSchema,
+  chatCompression: ChatCompressionSchema,
+});
+
+/**
+ * User-supplied optimization schema. Partial at every depth so users can
+ * override just one field (e.g. `{ cacheSettings: { maxSize: 42 } }`)
+ * without having to re-supply the entire sub-object.
+ */
+const OptimizationConfigUserSchema = OptimizationConfigSchema.partial().extend({
+  cacheSettings: CacheSettingsSchema.partial().optional(),
+  chatCompression: ChatCompressionSchema.partial().optional(),
 });
 
 const HypercontextConfigSchema = z
@@ -110,7 +141,7 @@ const HypercontextConfigSchema = z
       })
       .partial()
       .optional(),
-    optimization: OptimizationConfigSchema.partial().optional(),
+    optimization: OptimizationConfigUserSchema.optional(),
   })
   .passthrough();
 
@@ -118,12 +149,37 @@ export class ConfigManager {
   private config: HypercontextConfig;
   private configPath: string;
 
-  constructor(configPath?: string) {
+  constructor(configPath?: string, options: { writeDefaults?: boolean } = {}) {
     this.configPath =
-      configPath || join(homedir(), '.hypercontext', 'config.json');
+      configPath || join(homedir(), '.token-optimizer', 'config.json');
+    const writeDefaults = options.writeDefaults ?? true;
+    if (writeDefaults && !existsSync(this.configPath)) {
+      this.writeDefaultConfig();
+    }
     this.config = this.loadConfig();
   }
 
+  /**
+   * Write DEFAULT_CONFIG to configPath on first run — addresses #120's
+   * "Default config created on first run" acceptance criterion.
+   * Errors are logged and non-fatal; callers still get an in-memory
+   * DEFAULT_CONFIG via loadConfig().
+   */
+  private writeDefaultConfig(): void {
+    try {
+      const dir = dirname(this.configPath);
+      if (!existsSync(dir)) {
+        mkdirSync(dir, { recursive: true });
+      }
+      writeFileSync(this.configPath, JSON.stringify(DEFAULT_CONFIG, null, 2));
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(
+        `ConfigManager: failed to write default config to ${this.configPath}: ${message}`
+      );
+    }
+  }
+
   private loadConfig(): HypercontextConfig {
     if (!existsSync(this.configPath)) {
       return DEFAULT_CONFIG;
@@ -157,15 +213,32 @@ export class ConfigManager {
       monitoring?: Partial<HypercontextConfig['monitoring']>;
       intelligence?: Partial<HypercontextConfig['intelligence']>;
       performance?: Partial<HypercontextConfig['performance']>;
-      optimization?: Partial<OptimizationConfig>;
+      optimization?: Partial<
+        Omit<OptimizationConfig, 'cacheSettings' | 'chatCompression'>
+      > & {
+        cacheSettings?: Partial<OptimizationConfig['cacheSettings']>;
+        chatCompression?: Partial<OptimizationConfig['chatCompression']>;
+      };
     }
   ): HypercontextConfig {
+    const userOpt = user.optimization ?? {};
     return {
       cache: { ...defaults.cache, ...user.cache },
       monitoring: { ...defaults.monitoring, ...user.monitoring },
       intelligence: { ...defaults.intelligence, ...user.intelligence },
       performance: { ...defaults.performance, ...user.performance },
-      optimization: { ...DEFAULT_OPTIMIZATION, ...(user.optimization ?? {}) },
+      optimization: {
+        ...DEFAULT_OPTIMIZATION,
+        ...userOpt,
+        cacheSettings: {
+          ...DEFAULT_OPTIMIZATION.cacheSettings,
+          ...(userOpt.cacheSettings ?? {}),
+        },
+        chatCompression: {
+          ...DEFAULT_OPTIMIZATION.chatCompression,
+          ...(userOpt.chatCompression ?? {}),
+        },
+      },
     };
   }
 
diff --git a/src/core/types.ts b/src/core/types.ts
index aacb6a1..1cb1775 100644
--- a/src/core/types.ts
+++ b/src/core/types.ts
@@ -68,6 +68,20 @@ export interface OptimizationConfig {
   minOutputSizeBytes: number;
   /** Compression quality preset. */
   quality: 'fast' | 'balanced' | 'max';
+  /** In-memory cache knobs — mirrors Gemini CLI's `cacheSettings`. */
+  cacheSettings: {
+    /** Max entries per LRU cache shard. */
+    maxSize: number;
+    /** Default TTL for cached entries, in seconds. */
+    ttlSeconds: number;
+  };
+  /** Chat-history compression knobs — #121. */
+  chatCompression: {
+    enabled: boolean;
+    /** Hard token limit per session (falls back to modelTokenLimit × compressionTokenThreshold). */
+    tokenLimit?: number;
+    strategy: 'summarize' | 'truncate';
+  };
 }
 
 export interface TokenMetrics {
diff --git a/src/server/index.ts b/src/server/index.ts
index 89eb230..9b16c3a 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -137,6 +137,7 @@ import {
 } from '../tools/context-delta-tool.js';
 import { SessionManager } from '../core/session-manager.js';
 import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js';
+import { ConfigManager } from '../core/config.js';
 import { AnalyticsManager } from '../analytics/analytics-manager.js';
 
 
@@ -379,9 +380,23 @@ const getMcpServerAnalytics = getMcpServerAnalyticsTool(analyticsManager);
 const exportAnalytics = getExportAnalyticsTool(analyticsManager);
 const optimizationStorage = new OptimizationStorageTool();
 
+// #120: load user config (creates ~/.token-optimizer/config.json with
+// defaults on first run) and derive session-level knobs.
+const configManager = new ConfigManager();
+const optimizationConfig = configManager.getOptimizationConfig();
+const sessionTokenizer = TokenizerFactory.createFromEnv();
+const modelLimit =
+  configManager.getModelTokenLimit(sessionTokenizer.modelName) ??
+  // Fall back to an aggressive default for unknown models.
+  128000;
+const chatDefaultMaxTokens =
+  optimizationConfig.chatCompression.tokenLimit ??
+  Math.floor(modelLimit * optimizationConfig.compressionTokenThreshold);
+
 const sessionManager = new SessionManager({
   persistencePath: path.join(os.homedir(), '.token-optimizer', 'sessions.json'),
-  tokenizer: TokenizerFactory.createFromEnv(),
+  tokenizer: sessionTokenizer,
+  defaultMaxTokens: chatDefaultMaxTokens,
 });
 const contextDelta = new ContextDeltaTool(sessionManager);
 
diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts
index 27b2850..4f1c0d7 100644
--- a/tests/unit/config.test.ts
+++ b/tests/unit/config.test.ts
@@ -1,5 +1,5 @@
 import { describe, it, expect, afterEach } from '@jest/globals';
-import { mkdtempSync, writeFileSync, rmSync } from 'fs';
+import { mkdtempSync, writeFileSync, existsSync, rmSync } from 'fs';
 import { tmpdir } from 'os';
 import { join } from 'path';
 import { ConfigManager } from '../../src/core/config.js';
@@ -16,38 +16,63 @@ describe('ConfigManager', () => {
     }
   });
 
-  function writeConfig(content: string): string {
+  function tempConfigPath(): string {
     const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-config-'));
     tempDirs.push(dir);
-    const file = join(dir, 'config.json');
+    return join(dir, 'config.json');
+  }
+
+  function writeConfig(content: string): string {
+    const file = tempConfigPath();
     writeFileSync(file, content);
     return file;
   }
 
-  it('returns defaults when no config file exists', () => {
-    const mgr = new ConfigManager(join(tmpdir(), 'does-not-exist-xyz.json'));
+  it('returns defaults when no config file exists and writeDefaults is false', () => {
+    const mgr = new ConfigManager(tempConfigPath(), { writeDefaults: false });
     const opt = mgr.getOptimizationConfig();
     expect(opt.compressionTokenThreshold).toBe(0.7);
     expect(opt.quality).toBe('balanced');
+    expect(opt.cacheSettings.maxSize).toBe(1000);
+    expect(opt.cacheSettings.ttlSeconds).toBe(3600);
+    expect(opt.chatCompression.enabled).toBe(true);
+    expect(opt.chatCompression.strategy).toBe('summarize');
     expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000);
   });
 
-  it('overrides defaults with user config', () => {
+  it('writes a default config file on first run', () => {
+    const file = tempConfigPath();
+    expect(existsSync(file)).toBe(false);
+    new ConfigManager(file);
+    expect(existsSync(file)).toBe(true);
+
+    // A second instance reads what the first wrote.
+    const second = new ConfigManager(file);
+    expect(second.getOptimizationConfig().quality).toBe('balanced');
+  });
+
+  it('overrides defaults with user config — nested sub-objects deep-merge', () => {
     const configPath = writeConfig(
       JSON.stringify({
         optimization: {
           compressionTokenThreshold: 0.9,
           quality: 'max',
+          cacheSettings: { maxSize: 42 },
+          chatCompression: { strategy: 'truncate' },
           modelTokenLimits: { 'custom-model': 500000 },
         },
       })
     );
-    const mgr = new ConfigManager(configPath);
+    const mgr = new ConfigManager(configPath, { writeDefaults: false });
     const opt = mgr.getOptimizationConfig();
     expect(opt.compressionTokenThreshold).toBe(0.9);
     expect(opt.quality).toBe('max');
+    expect(opt.cacheSettings.maxSize).toBe(42);
+    // Unprovided sub-field retains default.
+    expect(opt.cacheSettings.ttlSeconds).toBe(3600);
+    expect(opt.chatCompression.enabled).toBe(true);
+    expect(opt.chatCompression.strategy).toBe('truncate');
     expect(mgr.getModelTokenLimit('custom-model')).toBe(500000);
-    // Unrelated defaults still filled in
     expect(opt.compressionPreserveThreshold).toBe(0.3);
   });
 
@@ -55,14 +80,13 @@ describe('ConfigManager', () => {
     const configPath = writeConfig(
       JSON.stringify({ optimization: { compressionTokenThreshold: 5 } })
     );
-    const mgr = new ConfigManager(configPath);
-    // Invalid value (>1) is rejected by schema → defaults applied
+    const mgr = new ConfigManager(configPath, { writeDefaults: false });
     expect(mgr.getOptimizationConfig().compressionTokenThreshold).toBe(0.7);
   });
 
   it('falls back to defaults on malformed JSON', () => {
     const configPath = writeConfig('not json at all');
-    const mgr = new ConfigManager(configPath);
+    const mgr = new ConfigManager(configPath, { writeDefaults: false });
     expect(mgr.getOptimizationConfig().quality).toBe('balanced');
   });
 });

From 1392ee1e52673902b434e02291ae2fda781ba354 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:50:35 -0400
Subject: [PATCH 18/26] feat(cache): lru memoize smart-read / smart-grep /
 smart-glob (#125)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Completes the missing LruCache integration from the audit:

- Adds src/utils/lru-memoize.ts — a generic async-fn wrapper backed by
  LruCache (#125). Each wrapped function registers its cache with a
  shared memoRegistry so callers can prune and snapshot stats across
  every memo in one shot.
- server/index.ts wraps runSmartRead, runSmartGrep, and runSmartGlob
  with lruMemoize, sized from optimizationConfig.cacheSettings
  (maxSize / ttlSeconds × 1000). The case handlers call the memoized
  variant, so repeated tool invocations with identical arguments hit
  the LRU instead of re-running the expensive read/search.
- Periodic cleanup: a 5-minute interval calls memoRegistry.pruneAll()
  and logs stats when anything was removed. The timer is unref'd so it
  never keeps the event loop alive.
- Server cleanup handler clears the interval and the memo caches on
  shutdown.

Refs #125

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/server/index.ts            | 49 ++++++++++++++++--
 src/utils/lru-memoize.ts       | 93 ++++++++++++++++++++++++++++++++++
 tests/unit/lru-memoize.test.ts | 69 +++++++++++++++++++++++++
 3 files changed, 208 insertions(+), 3 deletions(-)
 create mode 100644 src/utils/lru-memoize.ts
 create mode 100644 tests/unit/lru-memoize.test.ts

diff --git a/src/server/index.ts b/src/server/index.ts
index 9b16c3a..98d3984 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -138,6 +138,7 @@ import {
 import { SessionManager } from '../core/session-manager.js';
 import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js';
 import { ConfigManager } from '../core/config.js';
+import { lruMemoize, memoRegistry } from '../utils/lru-memoize.js';
 import { AnalyticsManager } from '../analytics/analytics-manager.js';
 
 
@@ -400,6 +401,41 @@ const sessionManager = new SessionManager({
 });
 const contextDelta = new ContextDeltaTool(sessionManager);
 
+// #125: memoize the expensive read-only file-operation tools with an
+// LRU bounded by the user's cacheSettings. The memoRegistry hook lets
+// the cleanup handler below prune them all at once.
+const cacheSettings = optimizationConfig.cacheSettings;
+const memoizedSmartRead = lruMemoize(runSmartRead, {
+  name: 'smart_read',
+  maxSize: cacheSettings.maxSize,
+  ttlMs: cacheSettings.ttlSeconds * 1000,
+});
+const memoizedSmartGrep = lruMemoize(runSmartGrep, {
+  name: 'smart_grep',
+  maxSize: cacheSettings.maxSize,
+  ttlMs: cacheSettings.ttlSeconds * 1000,
+});
+const memoizedSmartGlob = lruMemoize(runSmartGlob, {
+  name: 'smart_glob',
+  maxSize: cacheSettings.maxSize,
+  ttlMs: cacheSettings.ttlSeconds * 1000,
+});
+
+// Periodic prune + stats log. Runs every 5 minutes; unref so it doesn't
+// keep the process alive on its own.
+const MEMO_PRUNE_INTERVAL_MS = 5 * 60 * 1000;
+const memoPruneTimer = setInterval(() => {
+  const removed = memoRegistry.pruneAll();
+  if (removed > 0) {
+    console.error(
+      `[memo] pruned ${removed} expired cache entries; stats: ${JSON.stringify(memoRegistry.stats())}`
+    );
+  }
+}, MEMO_PRUNE_INTERVAL_MS);
+if (typeof memoPruneTimer.unref === 'function') {
+  memoPruneTimer.unref();
+}
+
 // Create MCP server
 const server = new Server(
   {
@@ -1979,7 +2015,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'smart_read': {
         const { path, ...options } = args as any;
-        const result = await runSmartRead(path, options);
+        const result = await memoizedSmartRead(path, options);
         return {
           content: [
             {
@@ -2018,7 +2054,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'smart_glob': {
         const { pattern, ...options } = args as any;
-        const result = await runSmartGlob(pattern, options);
+        const result = await memoizedSmartGlob(pattern, options);
         return {
           content: [
             {
@@ -2031,7 +2067,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'smart_grep': {
         const { pattern, ...options } = args as any;
-        const result = await runSmartGrep(pattern, options);
+        const result = await memoizedSmartGrep(pattern, options);
         return {
           content: [
             {
@@ -2306,6 +2342,13 @@ async function cleanup() {
     { fn: async () => await sessionManager.flush(), name: 'flushing sessions' },
     { fn: () => TokenizerFactory.disposeAll(), name: 'disposing tokenizers' },
     { fn: () => optimizationStorage.close(), name: 'closing optimization storage' },
+    {
+      fn: () => {
+        clearInterval(memoPruneTimer);
+        memoRegistry.clearAll();
+      },
+      name: 'clearing memo caches',
+    },
     // Note: predictiveCache and cacheWarmup do not implement dispose() methods
     // Removed dispose() calls to prevent runtime errors during cleanup
   ]);
diff --git a/src/utils/lru-memoize.ts b/src/utils/lru-memoize.ts
new file mode 100644
index 0000000..d57c0ae
--- /dev/null
+++ b/src/utils/lru-memoize.ts
@@ -0,0 +1,93 @@
+import { createHash } from 'crypto';
+import { LruCache, LruCacheStats } from './lru-cache.js';
+
+/**
+ * Wrap an async function with an LRU cache so repeated calls with the
+ * same arguments are served from memory — addresses issue #125's
+ * "store results of expensive operations" for smart_read, smart_grep,
+ * smart_glob, and edit-correction paths.
+ *
+ * Each wrapped function owns its own cache, but every cache is
+ * registered with the shared `memoRegistry` so the server can prune
+ * and log stats for all of them at once.
+ */
+
+export interface LruMemoizeOptions<Args extends readonly unknown[]> {
+    /** Identifier used in logs. */
+    name: string;
+    /** Max cached entries. */
+    maxSize: number;
+    /** Default per-entry TTL in ms. 0 disables expiration. */
+    ttlMs?: number;
+    /** Custom key function; defaults to sha256(JSON.stringify(args)). */
+    keyFn?: (args: Args) => string;
+}
+
+export interface RegisteredCache {
+    name: string;
+    cache: LruCache<string, unknown>;
+}
+
+class MemoRegistry {
+    private readonly caches = new Map<string, RegisteredCache>();
+
+    public register(entry: RegisteredCache): void {
+        this.caches.set(entry.name, entry);
+    }
+
+    /** Prune every registered cache and return total entries removed. */
+    public pruneAll(): number {
+        let total = 0;
+        for (const { cache } of this.caches.values()) {
+            total += cache.prune();
+        }
+        return total;
+    }
+
+    public stats(): Record<string, LruCacheStats> {
+        const out: Record<string, LruCacheStats> = {};
+        for (const [name, { cache }] of this.caches) {
+            out[name] = cache.stats();
+        }
+        return out;
+    }
+
+    public clearAll(): void {
+        for (const { cache } of this.caches.values()) {
+            cache.clear();
+        }
+    }
+}
+
+export const memoRegistry = new MemoRegistry();
+
+export function lruMemoize<Args extends readonly unknown[], R>(
+    fn: (...args: Args) => Promise<R>,
+    options: LruMemoizeOptions<Args>
+): (...args: Args) => Promise<R> {
+    const cache = new LruCache<string, R>(options.maxSize, options.ttlMs ?? 0);
+    memoRegistry.register({
+        name: options.name,
+        cache: cache as unknown as LruCache<string, unknown>,
+    });
+
+    const keyFn =
+        options.keyFn ??
+        ((args: Args): string => {
+            const serialized = JSON.stringify(args, (_, v) =>
+                typeof v === 'bigint' ? v.toString() : v
+            );
+            return createHash('sha256').update(serialized).digest('hex');
+        });
+
+    return async (...args: Args): Promise<R> => {
+        const key = keyFn(args);
+        const hit = cache.get(key);
+        if (hit !== undefined) {
+            return hit;
+        }
+        const value = await fn(...args);
+        cache.set(key, value);
+        return value;
+    };
+}
diff --git a/tests/unit/lru-memoize.test.ts b/tests/unit/lru-memoize.test.ts
new file mode 100644
index 0000000..12631be
--- /dev/null
+++ b/tests/unit/lru-memoize.test.ts
@@ -0,0 +1,69 @@
+import { describe, it, expect } from '@jest/globals';
+import { lruMemoize, memoRegistry } from '../../src/utils/lru-memoize.js';
+
+describe('lruMemoize', () => {
+  it('returns cached value for identical args', async () => {
+    let calls = 0;
+    const fn = async (x: number) => {
+      calls++;
+      return x * 2;
+    };
+    const memo = lruMemoize(fn, { name: 'test-double', maxSize: 10 });
+    expect(await memo(3)).toBe(6);
+    expect(await memo(3)).toBe(6);
+    expect(calls).toBe(1);
+  });
+
+  it('differentiates calls by args', async () => {
+    let calls = 0;
+    const fn = async (x: number) => {
+      calls++;
+      return x * 2;
+    };
+    const memo = lruMemoize(fn, { name: 'test-by-args', maxSize: 10 });
+    await memo(1);
+    await memo(2);
+    await memo(1);
+    expect(calls).toBe(2);
+  });
+
+  it('expires entries past the TTL', async () => {
+    let calls = 0;
+    const fn = async (x: number) => {
+      calls++;
+      return x;
+    };
+    const memo = lruMemoize(fn, { name: 'test-ttl', maxSize: 10, ttlMs: 20 });
+    await memo(7);
+    await memo(7);
+    expect(calls).toBe(1);
+    await new Promise((r) => setTimeout(r, 30));
+    await memo(7);
+    expect(calls).toBe(2);
+  });
+
+  it('registers with memoRegistry for bulk prune / stats', async () => {
+    const fn = async (x: string) => x.toUpperCase();
+    lruMemoize(fn, { name: 'test-registered', maxSize: 5 });
+    const stats = memoRegistry.stats();
+    expect(stats['test-registered']).toBeDefined();
+    expect(stats['test-registered'].size).toBe(0);
+  });
+
+  it('accepts a custom key function', async () => {
+    let calls = 0;
+    const fn = async (obj: { id: string; ignore: number }) => {
+      calls++;
+      return obj.id;
+    };
+    const memo = lruMemoize(fn, {
+      name: 'test-custom-key',
+      maxSize: 5,
+      keyFn: ([{ id }]) => id,
+    });
+    await memo({ id: 'a', ignore: 1 });
+    await memo({ id: 'a', ignore: 9999 }); // same id → hit
+    await memo({ id: 'b', ignore: 1 }); // different id → miss
+    expect(calls).toBe(2);
+  });
+});

From abbc64806a103631ec3e29fb663caba12f9458dd Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:52:05 -0400
Subject: [PATCH 19/26] feat(summarization): foundation-model isummarizer
 implementations (#121)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the audit gap: the issue requires "use a foundation model to
perform the summarization", the previous ISummarizer default was just
text truncation.

- AnthropicSummarizer — calls /v1/messages with claude-haiku-4-5 by
  default. Needs ANTHROPIC_API_KEY.
- GoogleAISummarizer — calls generativelanguage.googleapis.com with
  gemini-2.5-flash by default. Needs GOOGLE_AI_API_KEY.
- createSummarizerFromEnv() picks the best available summarizer
  (Anthropic → Google → TruncatingSummarizer fallback) so the server
  works unchanged whether or not API keys are configured.
- Both remote summarizers use AbortController with a 30s timeout and
  share a common system prompt that asks for preservation of decisions
  and open TODOs.
- TruncatingSummarizer remains the zero-dep fallback and is used by
  tests to avoid network flakes.

Wired into the server: SessionManager.summarizer is
createSummarizerFromEnv(), so Session.compressHistory uses a real LLM
when a key is present.

Refs #121

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/core/summarization.ts        | 234 ++++++++++++++++++++++++++++++-
 src/server/index.ts              |   2 +
 tests/unit/summarization.test.ts | 102 ++++++++++++++
 3 files changed, 332 insertions(+), 6 deletions(-)
 create mode 100644 tests/unit/summarization.test.ts

diff --git a/src/core/summarization.ts b/src/core/summarization.ts
index ab179c5..f694f7d 100644
--- a/src/core/summarization.ts
+++ b/src/core/summarization.ts
@@ -1,15 +1,30 @@
 import { Message } from './session.js';
 
 /**
- * Pluggable summarization interface — part of issue #121.
+ * Pluggable summarization — part of issue #121.
  *
- * A production deployment should plug in an LLM-backed summarizer that
- * condenses a list of Messages into a single natural-language summary.
- * The default TruncatingSummarizer keeps the module self-contained and
- * testable without an API key; it concatenates role+content and trims
- * to a reasonable length.
+ * An ISummarizer implementation takes a list of Messages and returns a
+ * natural-language summary. We ship three implementations out of the box:
+ *
+ *   - TruncatingSummarizer — self-contained, zero deps. Concatenates
+ *     role:content and trims to `maxChars`. Useful for tests and for
+ *     users who don't want to hand a foundation model every
+ *     conversation turn.
+ *   - AnthropicSummarizer — calls /v1/messages on api.anthropic.com.
+ *     Needs ANTHROPIC_API_KEY. Used when the host wires it up.
+ *   - GoogleAISummarizer — calls generativelanguage.googleapis.com.
+ *     Needs GOOGLE_AI_API_KEY.
+ *
+ * Selection lives in `createSummarizerFromEnv()` below — the server
+ * picks the highest-fidelity summarizer whose credentials are available
+ * and falls back to TruncatingSummarizer otherwise.
  */
 
+const SUMMARY_SYSTEM_PROMPT =
+    'You are summarizing the early portion of a conversation so the rest can continue without the full history in context. ' +
+    'Produce a concise summary (at most ~300 tokens) that preserves decisions made, outstanding TODOs, and any concrete facts the assistant has already told the user. ' +
+    'Do not address the user directly; write in third person.';
+
 export interface ISummarizer {
     summarize(messages: readonly Message[]): Promise<string>;
 }
@@ -48,3 +63,210 @@ export class TruncatingSummarizer implements ISummarizer {
         );
     }
 }
+
+// ============================================================================
+// Anthropic-backed summarizer
+// ============================================================================
+
+const ANTHROPIC_ENDPOINT = 'https://api.anthropic.com/v1/messages';
+const ANTHROPIC_DEFAULT_MODEL = 'claude-haiku-4-5-20251001';
+const ANTHROPIC_API_VERSION = '2023-06-01';
+const SUMMARIZER_TIMEOUT_MS = 30_000;
+const SUMMARIZER_MAX_TOKENS = 1024;
+
+export interface AnthropicSummarizerOptions {
+    apiKey?: string;
+    model?: string;
+    endpoint?: string;
+    timeoutMs?: number;
+}
+
+export class AnthropicSummarizer implements ISummarizer {
+    private readonly apiKey: string;
+    private readonly model: string;
+    private readonly endpoint: string;
+    private readonly timeoutMs: number;
+
+    constructor(options: AnthropicSummarizerOptions = {}) {
+        const apiKey = options.apiKey ?? process.env.ANTHROPIC_API_KEY;
+        if (!apiKey) {
+            throw new Error(
+                'AnthropicSummarizer requires ANTHROPIC_API_KEY (or apiKey option).'
+            );
+        }
+        this.apiKey = apiKey;
+        this.model = options.model ?? ANTHROPIC_DEFAULT_MODEL;
+        this.endpoint = options.endpoint ?? ANTHROPIC_ENDPOINT;
+        this.timeoutMs = options.timeoutMs ?? SUMMARIZER_TIMEOUT_MS;
+    }
+
+    public async summarize(messages: readonly Message[]): Promise<string> {
+        if (messages.length === 0) {
+            return '';
+        }
+        const userContent = messages
+            .map((m) => `${m.role}: ${m.content}`)
+            .join('\n');
+
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
+
+        try {
+            const response = await fetch(this.endpoint, {
+                method: 'POST',
+                headers: {
+                    'content-type': 'application/json',
+                    'x-api-key': this.apiKey,
+                    'anthropic-version': ANTHROPIC_API_VERSION,
+                },
+                body: JSON.stringify({
+                    model: this.model,
+                    max_tokens: SUMMARIZER_MAX_TOKENS,
+                    system: SUMMARY_SYSTEM_PROMPT,
+                    messages: [
+                        { role: 'user', content: userContent.slice(0, 200_000) },
+                    ],
+                }),
+                signal: controller.signal,
+            });
+
+            if (!response.ok) {
+                const body = await response.text().catch(() => '');
+                throw new Error(
+                    `Anthropic summarize failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}`
+                );
+            }
+
+            const data = (await response.json()) as {
+                content?: Array<{ type: string; text?: string }>;
+            };
+            const text =
+                data.content
+                    ?.filter((c) => c.type === 'text' && typeof c.text === 'string')
+                    .map((c) => c.text ?? '')
+                    .join('\n')
+                    .trim() ?? '';
+            return text;
+        } finally {
+            clearTimeout(timeout);
+        }
+    }
+}
+
+// ============================================================================
+// Google AI-backed summarizer
+// ============================================================================
+
+const GOOGLE_AI_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models';
+const GOOGLE_AI_DEFAULT_MODEL = 'gemini-2.5-flash';
+
+export interface GoogleAISummarizerOptions {
+    apiKey?: string;
+    model?: string;
+    endpoint?: string;
+    timeoutMs?: number;
+}
+
+export class GoogleAISummarizer implements ISummarizer {
+    private readonly apiKey: string;
+    private readonly model: string;
+    private readonly endpoint: string;
+    private readonly timeoutMs: number;
+
+    constructor(options: GoogleAISummarizerOptions = {}) {
+        const apiKey = options.apiKey ?? process.env.GOOGLE_AI_API_KEY;
+        if (!apiKey) {
+            throw new Error(
+                'GoogleAISummarizer requires GOOGLE_AI_API_KEY (or apiKey option).'
+            );
+        }
+        this.apiKey = apiKey;
+        this.model = options.model ?? GOOGLE_AI_DEFAULT_MODEL;
+        this.endpoint = options.endpoint ?? GOOGLE_AI_ENDPOINT;
+        this.timeoutMs = options.timeoutMs ?? SUMMARIZER_TIMEOUT_MS;
+    }
+
+    public async summarize(messages: readonly Message[]): Promise<string> {
+        if (messages.length === 0) {
+            return '';
+        }
+        const joined = messages
+            .map((m) => `${m.role}: ${m.content}`)
+            .join('\n');
+
+        const url = `${this.endpoint}/${encodeURIComponent(this.model)}:generateContent?key=${encodeURIComponent(this.apiKey)}`;
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
+
+        try {
+            const response = await fetch(url, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    systemInstruction: { parts: [{ text: SUMMARY_SYSTEM_PROMPT }] },
+                    contents: [
+                        {
+                            role: 'user',
+                            parts: [{ text: joined.slice(0, 200_000) }],
+                        },
+                    ],
+                    generationConfig: { maxOutputTokens: SUMMARIZER_MAX_TOKENS },
+                }),
+                signal: controller.signal,
+            });
+
+            if (!response.ok) {
+                const body = await response.text().catch(() => '');
+                throw new Error(
+                    `Google AI summarize failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}`
+                );
+            }
+
+            const data = (await response.json()) as {
+                candidates?: Array<{
+                    content?: { parts?: Array<{ text?: string }> };
+                }>;
+            };
+            const text =
+                data.candidates?.[0]?.content?.parts
+                    ?.map((p) => p.text ?? '')
+                    .join('\n')
+                    .trim() ?? '';
+            return text;
+        } finally {
+            clearTimeout(timeout);
+        }
+    }
+}
+
+// ============================================================================
+// Factory
+// ============================================================================
+
+/**
+ * Pick an ISummarizer based on available credentials:
+ *   1. ANTHROPIC_API_KEY → AnthropicSummarizer
+ *   2. GOOGLE_AI_API_KEY → GoogleAISummarizer
+ *   3. fallback        → TruncatingSummarizer (no network, no key)
+ *
+ * Anthropic sits first because this project is Claude-adjacent; users
+ * who prefer Gemini can either unset ANTHROPIC_API_KEY or construct
+ * GoogleAISummarizer directly.
+ */
+export function createSummarizerFromEnv(): ISummarizer {
+    if (process.env.ANTHROPIC_API_KEY) {
+        try {
+            return new AnthropicSummarizer();
+        } catch {
+            // Fall through to next option.
+        }
+    }
+    if (process.env.GOOGLE_AI_API_KEY) {
+        try {
+            return new GoogleAISummarizer();
+        } catch {
+            // Fall through.
+        }
+    }
+    return new TruncatingSummarizer();
+}
diff --git a/src/server/index.ts b/src/server/index.ts
index 98d3984..3c971ce 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -136,6 +136,7 @@ import {
   CONTEXT_DELTA_TOOL_DEFINITION,
 } from '../tools/context-delta-tool.js';
 import { SessionManager } from '../core/session-manager.js';
+import { createSummarizerFromEnv } from '../core/summarization.js';
 import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js';
 import { ConfigManager } from '../core/config.js';
 import { lruMemoize, memoRegistry } from '../utils/lru-memoize.js';
@@ -398,6 +399,7 @@ const sessionManager = new SessionManager({
   persistencePath: path.join(os.homedir(), '.token-optimizer', 'sessions.json'),
   tokenizer: sessionTokenizer,
   defaultMaxTokens: chatDefaultMaxTokens,
+  summarizer: createSummarizerFromEnv(),
 });
 const contextDelta = new ContextDeltaTool(sessionManager);
 
diff --git a/tests/unit/summarization.test.ts b/tests/unit/summarization.test.ts
new file mode 100644
index 0000000..a060ee7
--- /dev/null
+++ b/tests/unit/summarization.test.ts
@@ -0,0 +1,102 @@
+import { describe, it, expect, beforeEach, afterEach } from '@jest/globals';
+import {
+    TruncatingSummarizer,
+    AnthropicSummarizer,
+    GoogleAISummarizer,
+    createSummarizerFromEnv,
+} from '../../src/core/summarization.js';
+import { Message } from '../../src/core/session.js';
+
+function makeMessages(n: number): Message[] {
+    return Array.from({ length: n }, (_, i) => ({
+        role: (i % 2 === 0 ? 'user' : 'assistant') as Message['role'],
+        content: `Turn ${i}: ${'x'.repeat(50)}`,
+        timestamp: Date.now() + i,
+    }));
+}
+
+describe('TruncatingSummarizer', () => {
+    it('returns empty string for empty input', async () => {
+        const s = new TruncatingSummarizer();
+        expect(await s.summarize([])).toBe('');
+    });
+
+    it('returns untruncated text when under maxChars', async () => {
+        const s = new TruncatingSummarizer({ maxChars: 10_000 });
+        const out = await s.summarize(makeMessages(3));
+        expect(out).toContain('Turn 0');
+        expect(out).toContain('Turn 2');
+        expect(out).not.toContain('[truncated]');
+    });
+
+    it('truncates with a marker when over maxChars', async () => {
+        const s = new TruncatingSummarizer({ maxChars: 500 });
+        const out = await s.summarize(makeMessages(50));
+        expect(out).toContain('[truncated]');
+        expect(out.length).toBeLessThan(600);
+    });
+});
+
+describe('AnthropicSummarizer / GoogleAISummarizer constructors', () => {
+    const savedAnthropic = process.env.ANTHROPIC_API_KEY;
+    const savedGoogle = process.env.GOOGLE_AI_API_KEY;
+
+    beforeEach(() => {
+        delete process.env.ANTHROPIC_API_KEY;
+        delete process.env.GOOGLE_AI_API_KEY;
+    });
+    afterEach(() => {
+        if (savedAnthropic !== undefined) process.env.ANTHROPIC_API_KEY = savedAnthropic;
+        else delete process.env.ANTHROPIC_API_KEY;
+        if (savedGoogle !== undefined) process.env.GOOGLE_AI_API_KEY = savedGoogle;
+        else delete process.env.GOOGLE_AI_API_KEY;
+    });
+
+    it('AnthropicSummarizer throws without a key', () => {
+        expect(() => new AnthropicSummarizer()).toThrow(/ANTHROPIC_API_KEY/);
+    });
+
+    it('GoogleAISummarizer throws without a key', () => {
+        expect(() => new GoogleAISummarizer()).toThrow(/GOOGLE_AI_API_KEY/);
+    });
+
+    it('AnthropicSummarizer constructs with explicit apiKey', () => {
+        expect(() => new AnthropicSummarizer({ apiKey: 'sk-test' })).not.toThrow();
+    });
+
+    it('GoogleAISummarizer constructs with explicit apiKey', () => {
+        expect(() => new GoogleAISummarizer({ apiKey: 'gapi-test' })).not.toThrow();
+    });
+});
+
+describe('createSummarizerFromEnv', () => {
+    const saved = {
+        anthropic: process.env.ANTHROPIC_API_KEY,
+        google: process.env.GOOGLE_AI_API_KEY,
+    };
+
+    afterEach(() => {
+        if (saved.anthropic !== undefined) process.env.ANTHROPIC_API_KEY = saved.anthropic;
+        else delete process.env.ANTHROPIC_API_KEY;
+        if (saved.google !== undefined) process.env.GOOGLE_AI_API_KEY = saved.google;
+        else delete process.env.GOOGLE_AI_API_KEY;
+    });
+
+    it('falls back to TruncatingSummarizer when no keys are set', () => {
+        delete process.env.ANTHROPIC_API_KEY;
+        delete process.env.GOOGLE_AI_API_KEY;
+        expect(createSummarizerFromEnv()).toBeInstanceOf(TruncatingSummarizer);
+    });
+
+    it('prefers Anthropic when its key is set', () => {
+        process.env.ANTHROPIC_API_KEY = 'sk-test';
+        delete process.env.GOOGLE_AI_API_KEY;
+        expect(createSummarizerFromEnv()).toBeInstanceOf(AnthropicSummarizer);
+    });
+
+    it('uses Google AI when only its key is set', () => {
+        delete process.env.ANTHROPIC_API_KEY;
+        process.env.GOOGLE_AI_API_KEY = 'gapi-test';
+        expect(createSummarizerFromEnv()).toBeInstanceOf(GoogleAISummarizer);
+    });
+});

From a78d19616dc5cfbeaf37331078b42a0018e1fea3 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:53:19 -0400
Subject: [PATCH 20/26] feat(storage): gzip session persistence + shared gzip
 utilities (#126)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fulfills the #126 acceptance criteria on the TypeScript side (the PS
side lands in a follow-up commit):

- src/utils/gzip.ts: gzipString / gunzipBuffer primitives, plus
  saveGzippedFile (atomic tmp + rename, removes stale plaintext) and
  loadMaybeGzippedFile (reads .gz if present, otherwise plaintext so
  sessions.json files written before this change still load — the
  "backward compatibility" bullet from the issue).
- SessionManager persistNow now writes sessions via saveGzippedFile;
  load() uses loadMaybeGzippedFile. Existing checks for the sessions
  file at startup also look for the .gz sibling.

Refs #126

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/core/session-manager.ts | 42 +++++++----------
 src/utils/gzip.ts           | 90 +++++++++++++++++++++++++++++++++++++
 tests/unit/gzip.test.ts     | 73 ++++++++++++++++++++++++++++++
 3 files changed, 179 insertions(+), 26 deletions(-)
 create mode 100644 src/utils/gzip.ts
 create mode 100644 tests/unit/gzip.test.ts

diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts
index 2a19edd..935004e 100644
--- a/src/core/session-manager.ts
+++ b/src/core/session-manager.ts
@@ -1,12 +1,4 @@
-import {
-    existsSync,
-    mkdirSync,
-    readFileSync,
-    writeFileSync,
-    renameSync,
-    unlinkSync,
-} from 'fs';
-import { dirname } from 'path';
+import { existsSync } from 'fs';
 import { z } from 'zod';
 import {
     Session,
@@ -15,6 +7,7 @@ import {
 } from './session.js';
 import { ITokenizer } from './tokenizers/i-tokenizer.js';
 import { ISummarizer } from './summarization.js';
+import { loadMaybeGzippedFile, saveGzippedFile } from '../utils/gzip.js';
 
 /**
  * Persistent SessionManager — addresses issues #121 / #122.
@@ -84,7 +77,11 @@ export class SessionManager {
         this.sessionTtlMs = options.sessionTtlMs ?? DEFAULT_SESSION_TTL_MS;
         this.maxFileStateBytes =
             options.maxFileStateBytes ?? DEFAULT_MAX_FILE_STATE_BYTES;
-        if (this.persistencePath && existsSync(this.persistencePath)) {
+        if (
+            this.persistencePath &&
+            (existsSync(`${this.persistencePath}.gz`) ||
+                existsSync(this.persistencePath))
+        ) {
             this.load();
         }
     }
@@ -200,27 +197,17 @@ export class SessionManager {
             const state = {
                 sessions: this.listSessions().map((s) => s.toSnapshot()),
             };
-            const dir = dirname(this.persistencePath);
-            if (!existsSync(dir)) {
-                mkdirSync(dir, { recursive: true });
-            }
-            const tmpPath = `${this.persistencePath}.tmp`;
-            writeFileSync(tmpPath, JSON.stringify(state, null, 2));
-            renameSync(tmpPath, this.persistencePath);
+            // Gzip + atomic tmp + rename (handled inside saveGzippedFile).
+            saveGzippedFile(
+                this.persistencePath,
+                JSON.stringify(state, null, 2)
+            );
         } catch (error) {
             const message =
                 error instanceof Error ? error.message : String(error);
             console.warn(
                 `SessionManager: failed to persist to ${this.persistencePath}: ${message}`
             );
-            // Best-effort cleanup of the tmp file
-            if (this.persistencePath) {
-                try {
-                    unlinkSync(`${this.persistencePath}.tmp`);
-                } catch {
-                    // Ignore — tmp file may not exist.
-                }
-            }
         } finally {
             this.persistInFlight = false;
         }
@@ -231,7 +218,10 @@ export class SessionManager {
             return;
         }
         try {
-            const raw = readFileSync(this.persistencePath, 'utf-8');
+            const raw = loadMaybeGzippedFile(this.persistencePath);
+            if (raw === null) {
+                return;
+            }
             const json = JSON.parse(raw);
             const parsed = PersistedStateSchema.safeParse(json);
             if (!parsed.success) {
diff --git a/src/utils/gzip.ts b/src/utils/gzip.ts
new file mode 100644
index 0000000..5edf8ee
--- /dev/null
+++ b/src/utils/gzip.ts
@@ -0,0 +1,90 @@
+import { gzipSync, gunzipSync } from 'zlib';
+import {
+    existsSync,
+    mkdirSync,
+    readFileSync,
+    renameSync,
+    unlinkSync,
+    writeFileSync,
+} from 'fs';
+import { dirname } from 'path';
+
+/**
+ * Gzip utilities — addresses issue #126.
+ *
+ * `gzipString` / `gunzipBuffer` are thin UTF-8 wrappers around node:zlib.
+ * `saveGzippedFile` writes `<path>.gz` atomically (tmp + rename) so a
+ * crash mid-write can't produce a corrupt gzip. `loadFile` transparently
+ * reads `<path>.gz` if present and falls back to the plaintext path —
+ * that gives us backward compatibility with sessions.json files written
+ * before this change.
+ */
+
+export interface GzipStats {
+    originalBytes: number;
+    compressedBytes: number;
+    ratio: number;
+    percentSaved: number;
+}
+
+export function gzipString(text: string, level: number = 6): Buffer {
+    return gzipSync(Buffer.from(text, 'utf8'), { level });
+}
+
+export function gunzipBuffer(buffer: Buffer): string {
+    return gunzipSync(buffer).toString('utf8');
+}
+
+export function computeStats(text: string, compressed: Buffer): GzipStats {
+    const originalBytes = Buffer.byteLength(text, 'utf8');
+    const compressedBytes = compressed.length;
+    const ratio = originalBytes === 0 ? 0 : compressedBytes / originalBytes;
+    return {
+        originalBytes,
+        compressedBytes,
+        ratio,
+        percentSaved: originalBytes === 0 ? 0 : (1 - ratio) * 100,
+    };
+}
+
+/**
+ * Write gzipped text to `${path}.gz` using atomic tmp + rename so a
+ * crash mid-write never produces a half-written file. Also removes any
+ * stale uncompressed plaintext at `path` once the gzip lands (backward
+ * compat cleanup).
+ */
+export function saveGzippedFile(path: string, text: string, level: number = 6): GzipStats {
+    const dir = dirname(path);
+    if (!existsSync(dir)) {
+        mkdirSync(dir, { recursive: true });
+    }
+    const compressed = gzipString(text, level);
+    const gzPath = `${path}.gz`;
+    const tmpPath = `${gzPath}.tmp`;
+    writeFileSync(tmpPath, compressed);
+    renameSync(tmpPath, gzPath);
+    if (existsSync(path)) {
+        try {
+            unlinkSync(path);
+        } catch {
+            // Best-effort — leaving the plaintext file isn't fatal.
+        }
+    }
+    return computeStats(text, compressed);
+}
+
+/**
+ * Load either `${path}.gz` or `${path}` — whichever exists. Returns
+ * null if neither is present.
+ */
+export function loadMaybeGzippedFile(path: string): string | null {
+    const gzPath = `${path}.gz`;
+    if (existsSync(gzPath)) {
+        const buffer = readFileSync(gzPath);
+        return gunzipBuffer(buffer);
+    }
+    if (existsSync(path)) {
+        return readFileSync(path, 'utf-8');
+    }
+    return null;
+}
diff --git a/tests/unit/gzip.test.ts b/tests/unit/gzip.test.ts
new file mode 100644
index 0000000..bbb444c
--- /dev/null
+++ b/tests/unit/gzip.test.ts
@@ -0,0 +1,73 @@
+import { describe, it, expect, afterEach } from '@jest/globals';
+import { mkdtempSync, existsSync, writeFileSync, rmSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
+import {
+    gzipString,
+    gunzipBuffer,
+    saveGzippedFile,
+    loadMaybeGzippedFile,
+} from '../../src/utils/gzip.js';
+
+describe('gzip utils', () => {
+    const tempDirs: string[] = [];
+    afterEach(() => {
+        while (tempDirs.length) {
+            const dir = tempDirs.pop();
+            if (dir) {
+                rmSync(dir, { recursive: true, force: true });
+            }
+        }
+    });
+
+    function tempDir(): string {
+        const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-gzip-'));
+        tempDirs.push(dir);
+        return dir;
+    }
+
+    it('gzipString round-trips via gunzipBuffer', () => {
+        const text = 'Hello, world. '.repeat(1000);
+        const buffer = gzipString(text);
+        expect(buffer.length).toBeLessThan(text.length);
+        expect(gunzipBuffer(buffer)).toBe(text);
+    });
+
+    it('saveGzippedFile writes .gz and removes plaintext', () => {
+        const dir = tempDir();
+        const file = join(dir, 'sessions.json');
+        writeFileSync(file, 'stale plaintext');
+        const stats = saveGzippedFile(file, JSON.stringify({ hello: 'world' }));
+        expect(existsSync(`${file}.gz`)).toBe(true);
+        expect(existsSync(file)).toBe(false);
+        expect(stats.originalBytes).toBeGreaterThan(0);
+        expect(stats.compressedBytes).toBeGreaterThan(0);
+    });
+
+    it('loadMaybeGzippedFile prefers the .gz sibling', () => {
+        const dir = tempDir();
+        const file = join(dir, 'state.json');
+        saveGzippedFile(file, '{"compressed":true}');
+        expect(loadMaybeGzippedFile(file)).toBe('{"compressed":true}');
+    });
+
+    it('loadMaybeGzippedFile falls back to plaintext when no .gz exists', () => {
+        const dir = tempDir();
+        const file = join(dir, 'legacy.json');
+        writeFileSync(file, '{"legacy":true}');
+        expect(loadMaybeGzippedFile(file)).toBe('{"legacy":true}');
+    });
+
+    it('loadMaybeGzippedFile returns null when neither exists', () => {
+        const dir = tempDir();
+        const file = join(dir, 'missing.json');
+        expect(loadMaybeGzippedFile(file)).toBeNull();
+    });
+
+    it('saves with high compression ratio on repetitive content', () => {
+        const dir = tempDir();
+        const file = join(dir, 'repeated.txt');
+        const stats = saveGzippedFile(file, 'aa'.repeat(10_000));
+        expect(stats.percentSaved).toBeGreaterThan(95);
+    });
+});

From fd0a0b283e88f245fa5fde906483424804d54d4b Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 21:55:17 -0400
Subject: [PATCH 21/26] feat(powershell): config / gzip / context-delta helpers
 (#120, #122, #126)

Completes the PowerShell-side integration gaps found in the audit:

- hooks/helpers/config.ps1: Import-TokenOptimizerConfig loads
  ~/.token-optimizer/config.json (the same file the TS server reads),
  falls back to defaults, and auto-writes the default file on first
  run. Exposes Get-TokenOptimizerOptimizationConfig and
  Get-TokenOptimizerModelTokenLimit for orchestrator consumers (#120).
- hooks/helpers/gzip.ps1: Compress-String / Expand-String primitives
  and Save-GzippedFile / Read-MaybeGzippedFile that mirror the TS
  src/utils/gzip.ts semantics (atomic tmp+rename, backward-compat
  read of plaintext siblings) (#126).
- hooks/helpers/context-delta.ps1: Get-TokenOptimizerSessionId returns
  a stable per-Claude-session UUID persisted at
  ~/.token-optimizer/current-session-id, Reset-TokenOptimizerSessionId
  clears it, Invoke-ContextDelta wraps the context_delta MCP tool via
  the existing Invoke-TokenOptimizer helper (#122).
- Orchestrator dot-sources the three new helpers and Handle-SmartRead
  now calls Invoke-ContextDelta with the smart_read content after a
  successful read so the server's per-session file snapshot stays in
  sync (#122 Phase 2).

Runtime-verified in PS7 that gzip round-trips the content and the
helpers parse without errors.

Refs #120, #122, #126

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../handlers/token-optimizer-orchestrator.ps1 |  19 +++
 hooks/helpers/config.ps1                      | 118 ++++++++++++++++++
 hooks/helpers/context-delta.ps1               |  87 +++++++++++++
 hooks/helpers/gzip.ps1                        | 101 +++++++++++++++
 4 files changed, 325 insertions(+)
 create mode 100644 hooks/helpers/config.ps1
 create mode 100644 hooks/helpers/context-delta.ps1
 create mode 100644 hooks/helpers/gzip.ps1

diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index ae55748..4839706 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -28,6 +28,9 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) {
 $HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers"
 $INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1"
 . "$PSScriptRoot\..\helpers\logging.ps1"
+. "$PSScriptRoot\..\helpers\config.ps1"
+. "$PSScriptRoot\..\helpers\gzip.ps1"
+. "$PSScriptRoot\..\helpers\context-delta.ps1"
 $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log"
 $SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt"
 $OPERATIONS_DIR = "C:\Users\cheat\.claude-global\hooks\data"
@@ -2245,6 +2248,22 @@ function Handle-SmartRead {
                 Write-Log "Updated session totalTokens by $tokens" "DEBUG"
             }
 
+            # #122: update the MCP server's context_delta so the next read
+            # of this file can be served as a diff. Failure here is
+            # non-fatal — smart_read still succeeds.
+            try {
+                $contentText = if ($result.content -and $result.content[0] -and $result.content[0].text) {
+                    $result.content[0].text
+                } else {
+                    $null
+                }
+                if ($contentText) {
+                    $null = Invoke-ContextDelta -Operation 'compute-delta' -FilePath $filePath -CurrentContent $contentText
+                }
+            } catch {
+                Write-Log "context_delta update skipped: $($_.Exception.Message)" 'DEBUG'
+            }
+
             # Return smart_read result and block plain Read
             $blockResponse = @{
                 continue = $false
diff --git a/hooks/helpers/config.ps1 b/hooks/helpers/config.ps1
new file mode 100644
index 0000000..f5c12e3
--- /dev/null
+++ b/hooks/helpers/config.ps1
@@ -0,0 +1,118 @@
+[CmdletBinding()]
+param()
+
+<#
+Token-Optimizer Config helper — addresses issue #120 (PowerShell side).
+
+Mirrors src/core/config.ts so the PS orchestrator and the TS server
+share one source of truth. The config file lives at
+~/.token-optimizer/config.json and is the same one the Node server
+reads. On first run we copy the defaults below into that file.
+#>
+
+$script:TokenOptimizerConfigPath =
+    Join-Path $env:USERPROFILE '.token-optimizer\config.json'
+
+$script:TokenOptimizerDefaultConfig = @{
+    cache = @{
+        enabled = $true
+        maxSizeMB = 500
+        defaultTTL = 300
+        ttlByType = @{
+            file_read = 300
+            git_status = 60
+            git_diff = 120
+            build_result = 600
+            test_result = 300
+        }
+        compression = 'auto'
+    }
+    monitoring = @{
+        enabled = $true
+        detailedLogging = $false
+        metricsRetentionDays = 30
+        dashboardPort = 3100
+        enableWebUI = $false
+    }
+    optimization = @{
+        compressionTokenThreshold = 0.7
+        compressionPreserveThreshold = 0.3
+        minTokensBeforeCompression = 1000
+        modelTokenLimits = @{
+            'gpt-4' = 128000
+            'gpt-4-turbo' = 128000
+            'gpt-3.5-turbo' = 16385
+            'claude-3-opus' = 200000
+            'claude-3-sonnet' = 200000
+            'claude-3-haiku' = 200000
+            'claude-opus-4-7' = 1000000
+            'claude-sonnet-4-6' = 1000000
+            'gemini-1.5-pro' = 2000000
+            'gemini-2.5-flash' = 1000000
+        }
+        minOutputSizeBytes = 500
+        quality = 'balanced'
+        cacheSettings = @{
+            maxSize = 1000
+            ttlSeconds = 3600
+        }
+        chatCompression = @{
+            enabled = $true
+            strategy = 'summarize'
+        }
+    }
+}
+
+function Get-TokenOptimizerConfigPath {
+    return $script:TokenOptimizerConfigPath
+}
+
+function Write-TokenOptimizerDefaultConfig {
+    $configPath = Get-TokenOptimizerConfigPath
+    $configDir = Split-Path -Parent $configPath
+    if (-not (Test-Path $configDir)) {
+        New-Item -ItemType Directory -Path $configDir -Force | Out-Null
+    }
+    $json = $script:TokenOptimizerDefaultConfig | ConvertTo-Json -Depth 10
+    Set-Content -Path $configPath -Value $json -Encoding UTF8
+}
+
+function Import-TokenOptimizerConfig {
+    $configPath = Get-TokenOptimizerConfigPath
+    if (-not (Test-Path $configPath)) {
+        Write-TokenOptimizerDefaultConfig
+        return $script:TokenOptimizerDefaultConfig
+    }
+    try {
+        $raw = Get-Content -Path $configPath -Raw -Encoding UTF8
+        return ($raw | ConvertFrom-Json -AsHashtable)
+    } catch {
+        $msg = "Failed to load $configPath ($($_.Exception.Message)); using defaults."
+        if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
+            Write-Log $msg 'WARN'
+        } else {
+            Write-Warning $msg
+        }
+        return $script:TokenOptimizerDefaultConfig
+    }
+}
+
+function Get-TokenOptimizerOptimizationConfig {
+    $config = Import-TokenOptimizerConfig
+    if ($null -ne $config.optimization) {
+        return $config.optimization
+    }
+    return $script:TokenOptimizerDefaultConfig.optimization
+}
+
+function Get-TokenOptimizerModelTokenLimit {
+    param(
+        [Parameter(Mandatory = $true)]
+        [string]$ModelName
+    )
+    $opt = Get-TokenOptimizerOptimizationConfig
+    if ($opt.modelTokenLimits -and $opt.modelTokenLimits.ContainsKey($ModelName)) {
+        return $opt.modelTokenLimits[$ModelName]
+    }
+    return $null
+}
diff --git a/hooks/helpers/context-delta.ps1 b/hooks/helpers/context-delta.ps1
new file mode 100644
index 0000000..7c4c3ab
--- /dev/null
+++ b/hooks/helpers/context-delta.ps1
@@ -0,0 +1,87 @@
+[CmdletBinding()]
+param()
+
+<#
+PowerShell integration for the context_delta MCP tool — addresses
+issue #122 Phase 2.
+
+Get-TokenOptimizerSessionId generates a stable sessionId per top-level
+PS session (cached on the script scope and persisted to a marker file
+so multiple orchestrator invocations within one Claude session reuse
+the same id).
+
+Invoke-ContextDelta calls the context_delta MCP tool via the shared
+Invoke-TokenOptimizer helper and returns the unified-diff delta so
+Handle-SmartRead can emit only the changed lines to the model.
+#>
+
+$script:TokenOptimizerSessionIdPath =
+    Join-Path $env:USERPROFILE '.token-optimizer\current-session-id'
+
+function Get-TokenOptimizerSessionId {
+    if ($script:TokenOptimizerCurrentSessionId) {
+        return $script:TokenOptimizerCurrentSessionId
+    }
+    if (Test-Path $script:TokenOptimizerSessionIdPath) {
+        $existing = (Get-Content -Path $script:TokenOptimizerSessionIdPath -Raw).Trim()
+        if ($existing) {
+            $script:TokenOptimizerCurrentSessionId = $existing
+            return $existing
+        }
+    }
+    $newId = [guid]::NewGuid().ToString()
+    $dir = Split-Path -Parent $script:TokenOptimizerSessionIdPath
+    if (-not (Test-Path $dir)) {
+        New-Item -ItemType Directory -Path $dir -Force | Out-Null
+    }
+    Set-Content -Path $script:TokenOptimizerSessionIdPath -Value $newId
+    $script:TokenOptimizerCurrentSessionId = $newId
+    return $newId
+}
+
+function Reset-TokenOptimizerSessionId {
+    $script:TokenOptimizerCurrentSessionId = $null
+    if (Test-Path $script:TokenOptimizerSessionIdPath) {
+        Remove-Item -Path $script:TokenOptimizerSessionIdPath -Force
+    }
+}
+
+function Invoke-ContextDelta {
+    param(
+        [Parameter(Mandatory = $true)]
+        [ValidateSet('compute-delta', 'seed', 'clear')]
+        [string]$Operation,
+        [Parameter(Mandatory = $true)][string]$FilePath,
+        [string]$CurrentContent = $null,
+        [string]$SessionId = $null
+    )
+
+    if (-not $SessionId) {
+        $SessionId = Get-TokenOptimizerSessionId
+    }
+    $toolArgs = @{
+        operation = $Operation
+        sessionId = $SessionId
+        filePath = $FilePath
+    }
+    if ($Operation -ne 'clear' -and $null -ne $CurrentContent) {
+        $toolArgs.currentContent = $CurrentContent
+    }
+    if (Get-Command Invoke-TokenOptimizer -ErrorAction SilentlyContinue) {
+        try {
+            return Invoke-TokenOptimizer -ToolName 'context_delta' -Arguments $toolArgs
+        } catch {
+            $msg = "Invoke-ContextDelta failed: $($_.Exception.Message)"
+            if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
+                Write-Log $msg 'WARN'
+            } else {
+                Write-Warning $msg
+            }
+            return $null
+        }
+    }
+    if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
+        Write-Log 'Invoke-TokenOptimizer helper not available; skipping context_delta.' 'DEBUG'
+    }
+    return $null
+}
diff --git a/hooks/helpers/gzip.ps1 b/hooks/helpers/gzip.ps1
new file mode 100644
index 0000000..a84115b
--- /dev/null
+++ b/hooks/helpers/gzip.ps1
@@ -0,0 +1,101 @@
+[CmdletBinding()]
+param()
+
+<#
+Gzip utilities — addresses issue #126 (PowerShell side).
+
+Compress-String / Expand-String are the primitives. Save-GzippedFile
+writes <path>.gz atomically (tmp + rename) and strips the plaintext
+sibling once the gzip lands. Read-MaybeGzippedFile prefers <path>.gz
+and falls back to plaintext so PS code can be migrated incrementally.
+#>
+
+function Compress-String {
+    param(
+        [Parameter(Mandatory = $true)][string]$InputString,
+        [ValidateSet('Optimal', 'Fastest', 'NoCompression', 'SmallestSize')]
+        [string]$CompressionLevel = 'Optimal'
+    )
+    $inputStream = $null
+    $outputStream = $null
+    $gzipStream = $null
+    try {
+        $bytes = [System.Text.Encoding]::UTF8.GetBytes($InputString)
+        $inputStream = [System.IO.MemoryStream]::new($bytes)
+        $outputStream = [System.IO.MemoryStream]::new()
+        $level = [System.IO.Compression.CompressionLevel]::$CompressionLevel
+        $gzipStream = [System.IO.Compression.GZipStream]::new($outputStream, $level)
+        $inputStream.CopyTo($gzipStream)
+        $gzipStream.Dispose()
+        $gzipStream = $null
+        return ,$outputStream.ToArray()
+    } finally {
+        if ($null -ne $gzipStream) { $gzipStream.Dispose() }
+        if ($null -ne $inputStream) { $inputStream.Dispose() }
+        if ($null -ne $outputStream) { $outputStream.Dispose() }
+    }
+}
+
+function Expand-String {
+    param(
+        [Parameter(Mandatory = $true)][byte[]]$CompressedBytes
+    )
+    $inputStream = $null
+    $outputStream = $null
+    $gzipStream = $null
+    try {
+        $inputStream = [System.IO.MemoryStream]::new($CompressedBytes)
+        $outputStream = [System.IO.MemoryStream]::new()
+        $gzipStream = [System.IO.Compression.GZipStream]::new(
+            $inputStream,
+            [System.IO.Compression.CompressionMode]::Decompress
+        )
+        $gzipStream.CopyTo($outputStream)
+        return [System.Text.Encoding]::UTF8.GetString($outputStream.ToArray())
+    } finally {
+        if ($null -ne $gzipStream) { $gzipStream.Dispose() }
+        if ($null -ne $inputStream) { $inputStream.Dispose() }
+        if ($null -ne $outputStream) { $outputStream.Dispose() }
+    }
+}
+
+function Save-GzippedFile {
+    param(
+        [Parameter(Mandatory = $true)][string]$Path,
+        [Parameter(Mandatory = $true)][string]$Content
+    )
+    $dir = Split-Path -Parent $Path
+    if ($dir -and -not (Test-Path $dir)) {
+        New-Item -ItemType Directory -Path $dir -Force | Out-Null
+    }
+    $compressed = Compress-String -InputString $Content
+    $gzPath = "$Path.gz"
+    $tmpPath = "$gzPath.tmp"
+    [System.IO.File]::WriteAllBytes($tmpPath, $compressed)
+    if (Test-Path $gzPath) {
+        Remove-Item -Path $gzPath -Force
+    }
+    Move-Item -Path $tmpPath -Destination $gzPath -Force
+    if (Test-Path $Path) {
+        Remove-Item -Path $Path -Force -ErrorAction SilentlyContinue
+    }
+    return @{
+        originalBytes = [System.Text.Encoding]::UTF8.GetByteCount($Content)
+        compressedBytes = $compressed.Length
+    }
+}
+
+function Read-MaybeGzippedFile {
+    param(
+        [Parameter(Mandatory = $true)][string]$Path
+    )
+    $gzPath = "$Path.gz"
+    if (Test-Path $gzPath) {
+        $bytes = [System.IO.File]::ReadAllBytes($gzPath)
+        return Expand-String -CompressedBytes $bytes
+    }
+    if (Test-Path $Path) {
+        return [System.IO.File]::ReadAllText($Path, [System.Text.Encoding]::UTF8)
+    }
+    return $null
+}

From 778d01a2ef74bd551abb3cb671f4827f74578faa Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 22:14:31 -0400
Subject: [PATCH 22/26] fix: resolve coderabbit review comments on pr #163
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses all 22 unresolved review threads. Grouped logically:

Critical PS defects
- token-optimizer-orchestrator.ps1: move helper dot-sources before the
  first Write-Log call (script version diagnostic + InputJsonFile read
  were calling Write-Log before logging.ps1 was loaded).
- token-optimizer-orchestrator.ps1: optimization_storage retrieve path
  now reads $retrieveResult.result.optimizedText (and mirrors the
  base64 wrapping used on store) instead of the top-level response.
- dispatcher.ps1: load logging.ps1 defensively — missing/broken helper
  falls back to no-op shims instead of killing every hook phase.
- helpers/logging.ps1: create the log directory on demand and swallow
  write failures so logging never becomes the failure mode for callers.
- orchestrator Handle-SmartRead: skip context_delta update when
  smart_read returned a diff payload — persisting a diff as the new
  baseline would compare the next read against the previous patch.

Compression + storage
- compression-engine.ts decompress(): fall back to raw UTF-8 when the
  buffer isn't Brotli so legacy plaintext rows keep working.
- compression-engine.ts: getCompressionStats / shouldCompress share
  a DEFAULT_MIN_SIZE_BYTES knob instead of hard-coding 500 in one
  place and 1000 in callers.
- optimization-storage.ts: persist AND read compression_algorithm;
  decodePayload dispatches per-algorithm with explicit error on
  unknown labels.

Config deep-merge
- config.ts: merge optimization.modelTokenLimits so user overrides
  add to the default map instead of replacing it.
- config.ps1: mirror the deep-merge via a new
  Merge-TokenOptimizerHashtable recursive helper.
- tests/config.test.ts: lock the invariant with a gpt-4 assertion.

Tokenizer hardening
- tiktoken-tokenizer / heuristic-tokenizer / google-ai-tokenizer:
  SHA-256 hash cache keys longer than 256 chars so the LRU stores
  digests, not full prompt text.
- tokenizer-factory.createFromEnv: TOKEN_OPTIMIZER_MODEL has highest
  precedence so users can pin the optimizer model even when
  CLAUDE_MODEL or similar are already set.

Session + context-delta fidelity
- session.ts: fromSnapshot preserves createdAt and updatedAt from the
  persisted snapshot; added SessionOptions.createdAt / .updatedAt
  overrides. Test asserts round-trip.
- context-delta-tool.ts: originalSize / deltaSize / bytesSaved use
  Buffer.byteLength(utf8) so multi-byte content reports honest bytes,
  matching the byte cap that SessionManager.updateFileState enforces.

Schema strictness
- tool-schemas.ts: OptimizationStorageSchema is now a discriminated
  union — store requires hash+text+token fields, retrieve requires
  just hash. Invalid payloads fail in validateToolArgs instead of
  after dispatch.
- optimization-storage-tool.ts: MCP inputSchema mirrors the same
  oneOf shape with additionalProperties:false.

Cache utilities
- lru-cache.ts prune(): scan every entry so per-entry TTLs set via
  set(key, val, ttlMs) are cleaned up even when defaultTtlMs is 0.
  Regression test added.
- lru-memoize.ts: deduplicate concurrent calls for the same key with
  an inFlight Map — a stampede while the first promise is pending
  collapses to a single fn() invocation. Concurrency test added.

PS atomic gzip
- helpers/gzip.ps1 Save-GzippedFile: atomic swap via
  File::Move(src, dst, overwrite:true) on .NET 5+, so a crash
  mid-write never leaves the caller with a missing .gz. Runtime-
  verified in PS7.

All 59 new/updated unit tests pass; tsc --noEmit clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 hooks/dispatcher.ps1                          | 16 +++-
 .../handlers/token-optimizer-orchestrator.ps1 | 40 ++++++----
 hooks/helpers/config.ps1                      | 39 ++++++++-
 hooks/helpers/gzip.ps1                        | 13 ++-
 hooks/helpers/logging.ps1                     | 12 ++-
 src/analytics/optimization-storage.ts         | 34 +++++++-
 src/core/compression-engine.ts                | 31 ++++++--
 src/core/config.ts                            |  6 ++
 src/core/session.ts                           | 14 +++-
 src/core/tokenizers/google-ai-tokenizer.ts    | 10 ++-
 src/core/tokenizers/heuristic-tokenizer.ts    | 15 +++-
 src/core/tokenizers/tiktoken-tokenizer.ts     | 20 ++++-
 src/core/tokenizers/tokenizer-factory.ts      |  5 +-
 src/tools/context-delta-tool.ts               | 16 ++--
 src/tools/optimization-storage-tool.ts        | 79 +++++++++++++------
 src/utils/lru-cache.ts                        | 11 ++-
 src/utils/lru-memoize.ts                      | 23 +++++-
 src/validation/tool-schemas.ts                | 26 +++---
 tests/unit/config.test.ts                     |  2 +
 tests/unit/lru-cache.test.ts                  | 11 +++
 tests/unit/lru-memoize.test.ts                | 15 ++++
 tests/unit/session.test.ts                    |  4 +-
 22 files changed, 353 insertions(+), 89 deletions(-)

diff --git a/hooks/dispatcher.ps1 b/hooks/dispatcher.ps1
index c813350..3896848 100644
--- a/hooks/dispatcher.ps1
+++ b/hooks/dispatcher.ps1
@@ -8,7 +8,21 @@ param([string]$Phase = "")
 $HANDLERS_DIR = "C:\Users\cheat\.claude-global\hooks\handlers"
 $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\dispatcher.log"
 $ORCHESTRATOR = "$HANDLERS_DIR\token-optimizer-orchestrator.ps1"
-. "$PSScriptRoot\helpers\logging.ps1"
+
+# Load the shared logging helper defensively: a missing/malformed helper
+# must not kill the dispatcher for every hook phase. Fall back to a
+# minimal Write-Log shim so the rest of the script still runs.
+$loggingHelperPath = "$PSScriptRoot\helpers\logging.ps1"
+try {
+    if (Test-Path $loggingHelperPath) {
+        . $loggingHelperPath
+    } else {
+        throw "logging helper not found at $loggingHelperPath"
+    }
+} catch {
+    function Write-Log { param([string]$Message, [string]$Level = 'INFO') $null = $Message; $null = $Level }
+    function Handle-Error { param($Exception, [string]$Message) $null = $Exception; $null = $Message }
+}
 
 
 
diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index 4839706..6b13fd8 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -10,6 +10,17 @@ param(
     [string]$InputJsonFile = ""
 )
 
+# Dot-source helpers BEFORE any logging — Write-Log must exist before
+# the first use below.
+$HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers"
+$INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1"
+$LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log"
+$SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt"
+. "$PSScriptRoot\..\helpers\logging.ps1"
+. "$PSScriptRoot\..\helpers\config.ps1"
+. "$PSScriptRoot\..\helpers\gzip.ps1"
+. "$PSScriptRoot\..\helpers\context-delta.ps1"
+
 # DIAGNOSTIC: Log script version/load time to verify latest version is being used
 $SCRIPT_VERSION = Get-Date -Format 'yyyyMMdd.HHmmss'
 Write-Log "token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" "DEBUG"
@@ -24,15 +35,6 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) {
         Write-Log "Failed to read InputJsonFile: $($_.Exception.Message)" "ERROR"
     }
 }
-
-$HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers"
-$INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1"
-. "$PSScriptRoot\..\helpers\logging.ps1"
-. "$PSScriptRoot\..\helpers\config.ps1"
-. "$PSScriptRoot\..\helpers\gzip.ps1"
-. "$PSScriptRoot\..\helpers\context-delta.ps1"
-$LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log"
-$SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt"
 $OPERATIONS_DIR = "C:\Users\cheat\.claude-global\hooks\data"
 
 # PERFORMANCE FIX: Prefer local dev path if not already set
@@ -1950,12 +1952,16 @@ function Handle-OptimizeToolOutput {
             $retrieveResultJson = & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $retrieveJson
             $retrieveResult = if ($retrieveResultJson) { $retrieveResultJson | ConvertFrom-Json } else { $null }
 
-            if ($retrieveResult -and $retrieveResult.success) {
+            if ($retrieveResult -and $retrieveResult.success -and $retrieveResult.result) {
                 Write-Log "Cache HIT for optimization result. Hash: $originalTextHash" "INFO"
-                $optimizedTextBytes = [System.Convert]::FromBase64String($retrieveResult.optimizedText)
+                # OptimizationStorageTool.retrieve() returns { success, result: { optimizedText, ... } }.
+                # Read the actual payload from $retrieveResult.result (not top-level), and mirror
+                # the base64 wrapping used on the store path below so round-tripped bytes survive JSON.
+                $cachedEntry = $retrieveResult.result
+                $optimizedTextBytes = [System.Convert]::FromBase64String($cachedEntry.optimizedText)
                 $optimizedText = [System.Text.Encoding]::UTF8.GetString($optimizedTextBytes)
-                $afterTokens = $retrieveResult.optimizedTokens
-                $saved = $retrieveResult.tokensSaved
+                $afterTokens = $cachedEntry.optimizedTokens
+                $saved = $cachedEntry.tokensSaved
                 $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 }
 
                 if ($script:CurrentSession) {
@@ -2251,13 +2257,19 @@ function Handle-SmartRead {
             # #122: update the MCP server's context_delta so the next read
             # of this file can be served as a diff. Failure here is
             # non-fatal — smart_read still succeeds.
+            #
+            # IMPORTANT: only feed FULL content. smart_read can return a
+            # diff payload (metadata.isDiff), and persisting a diff as the
+            # new baseline would make the next compute-delta compare
+            # against the previous patch instead of the file contents.
             try {
+                $isDiff = $result.metadata -and $result.metadata.isDiff
                 $contentText = if ($result.content -and $result.content[0] -and $result.content[0].text) {
                     $result.content[0].text
                 } else {
                     $null
                 }
-                if ($contentText) {
+                if ($contentText -and -not $isDiff) {
                     $null = Invoke-ContextDelta -Operation 'compute-delta' -FilePath $filePath -CurrentContent $contentText
                 }
             } catch {
diff --git a/hooks/helpers/config.ps1 b/hooks/helpers/config.ps1
index f5c12e3..38b42b6 100644
--- a/hooks/helpers/config.ps1
+++ b/hooks/helpers/config.ps1
@@ -97,12 +97,45 @@ function Import-TokenOptimizerConfig {
     }
 }
 
+function Merge-TokenOptimizerHashtable {
+    param(
+        [hashtable]$Base,
+        $User
+    )
+    $merged = @{}
+    foreach ($key in $Base.Keys) {
+        $merged[$key] = $Base[$key]
+    }
+    if ($null -eq $User) {
+        return $merged
+    }
+    # Handle both hashtables and PSCustomObjects (ConvertFrom-Json returns the latter).
+    $userKeys = @()
+    if ($User -is [hashtable]) {
+        $userKeys = $User.Keys
+    } elseif ($User.PSObject) {
+        $userKeys = $User.PSObject.Properties.Name
+    }
+    foreach ($key in $userKeys) {
+        $userValue = if ($User -is [hashtable]) { $User[$key] } else { $User.$key }
+        if ($Base.ContainsKey($key) -and ($Base[$key] -is [hashtable]) -and ($null -ne $userValue)) {
+            $merged[$key] = Merge-TokenOptimizerHashtable -Base $Base[$key] -User $userValue
+        } else {
+            $merged[$key] = $userValue
+        }
+    }
+    return $merged
+}
+
 function Get-TokenOptimizerOptimizationConfig {
     $config = Import-TokenOptimizerConfig
-    if ($null -ne $config.optimization) {
-        return $config.optimization
+    $defaults = $script:TokenOptimizerDefaultConfig.optimization
+    if ($null -eq $config.optimization) {
+        return $defaults
     }
-    return $script:TokenOptimizerDefaultConfig.optimization
+    # Deep-merge the user's partial optimization section onto defaults so
+    # overriding one modelTokenLimit doesn't drop the rest of the map.
+    return Merge-TokenOptimizerHashtable -Base $defaults -User $config.optimization
 }
 
 function Get-TokenOptimizerModelTokenLimit {
diff --git a/hooks/helpers/gzip.ps1 b/hooks/helpers/gzip.ps1
index a84115b..74b7e7b 100644
--- a/hooks/helpers/gzip.ps1
+++ b/hooks/helpers/gzip.ps1
@@ -72,10 +72,17 @@ function Save-GzippedFile {
     $gzPath = "$Path.gz"
     $tmpPath = "$gzPath.tmp"
     [System.IO.File]::WriteAllBytes($tmpPath, $compressed)
-    if (Test-Path $gzPath) {
-        Remove-Item -Path $gzPath -Force
+    # Atomic swap: File::Move(src, dst, overwrite:$true) on .NET5+.
+    # Unlike "delete then move", this never leaves the caller with a
+    # missing .gz file if the process crashes.
+    try {
+        [System.IO.File]::Move($tmpPath, $gzPath, $true)
+    } catch {
+        if (Test-Path $tmpPath) {
+            Remove-Item -Path $tmpPath -Force -ErrorAction SilentlyContinue
+        }
+        throw
     }
-    Move-Item -Path $tmpPath -Destination $gzPath -Force
     if (Test-Path $Path) {
         Remove-Item -Path $Path -Force -ErrorAction SilentlyContinue
     }
diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1
index 7b87cd0..7da5f95 100644
--- a/hooks/helpers/logging.ps1
+++ b/hooks/helpers/logging.ps1
@@ -22,7 +22,17 @@ function Write-Log {
     $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
     $contextPart = if ($Context) { " [$Context]" } else { "" }
     $logMessage = "[$timestamp] [$Level]$contextPart $Message"
-    $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8
+    if ($script:LOG_FILE) {
+        try {
+            $logDir = Split-Path -Parent $script:LOG_FILE
+            if ($logDir -and -not (Test-Path $logDir)) {
+                New-Item -ItemType Directory -Path $logDir -Force | Out-Null
+            }
+            $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8
+        } catch {
+            # Swallow — logging must never be a failure mode for the caller.
+        }
+    }
     Write-Verbose $logMessage
 }
 
diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts
index 9509709..1a1069d 100644
--- a/src/analytics/optimization-storage.ts
+++ b/src/analytics/optimization-storage.ts
@@ -68,7 +68,7 @@ export class SqliteOptimizationStorage {
         ).run(
             entry.originalTextHash,
             compressed.compressed,
-            'brotli',
+            SqliteOptimizationStorage.COMPRESSION_ALGORITHM,
             entry.originalTokens,
             entry.optimizedTokens,
             entry.tokensSaved
@@ -78,11 +78,13 @@ export class SqliteOptimizationStorage {
     public get(originalTextHash: string): OptimizationResult | null {
         const db = this.requireDb();
         const row = db.prepare(
-            `SELECT optimized_text_compressed, original_tokens, optimized_tokens, tokens_saved
+            `SELECT optimized_text_compressed, compression_algorithm,
+                    original_tokens, optimized_tokens, tokens_saved
              FROM optimization_results WHERE original_text_hash = ?`
         ).get(originalTextHash) as
             | {
                   optimized_text_compressed: Buffer;
+                  compression_algorithm: string;
                   original_tokens: number;
                   optimized_tokens: number;
                   tokens_saved: number;
@@ -95,13 +97,39 @@ export class SqliteOptimizationStorage {
 
         return {
             originalTextHash,
-            optimizedText: this.compressionEngine.decompress(row.optimized_text_compressed),
+            optimizedText: this.decodePayload(
+                row.optimized_text_compressed,
+                row.compression_algorithm
+            ),
             originalTokens: row.original_tokens,
             optimizedTokens: row.optimized_tokens,
             tokensSaved: row.tokens_saved,
         };
     }
 
+    /**
+     * Decode a stored payload using the persisted algorithm label. Keeps
+     * the door open for additional algorithms (gzip, zstd) without
+     * touching the read path, and surfaces an explicit error for
+     * unknown labels instead of silently corrupting data.
+     */
+    private decodePayload(buffer: Buffer, algorithm: string): string {
+        switch (algorithm) {
+            case 'brotli':
+                return this.compressionEngine.decompress(buffer);
+            case 'none':
+            case '':
+                return buffer.toString('utf8');
+            default:
+                throw new Error(
+                    `Unknown compression_algorithm in optimization_results: ${algorithm}`
+                );
+        }
+    }
+
+    /** Algorithm label paired with the current CompressionEngine. */
+    public static readonly COMPRESSION_ALGORITHM = 'brotli';
+
     public close(): void {
         if (this.db) {
             this.db.close();
diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts
index c9e7d17..7184f78 100644
--- a/src/core/compression-engine.ts
+++ b/src/core/compression-engine.ts
@@ -44,7 +44,17 @@ export class CompressionEngine {
         if (!buffer || buffer.length === 0) {
             return '';
         }
-        return brotliDecompressSync(buffer).toString('utf8');
+        // Brotli streams always begin with a framing byte whose high nibble
+        // encodes WBITS (0x0 / 0x8 / 0xC / …). That doesn't uniquely
+        // identify a Brotli payload, so we optimistically try to
+        // decompress and fall back to treating the buffer as raw UTF-8
+        // when the decoder rejects it. This preserves backward
+        // compatibility with any legacy plaintext row still in storage.
+        try {
+            return brotliDecompressSync(buffer).toString('utf8');
+        } catch {
+            return buffer.toString('utf8');
+        }
     }
 
     public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): Omit<CompressionResult, 'compressed'> & { compressed: string } {
@@ -70,17 +80,20 @@ export class CompressionEngine {
         }));
     }
 
-    public shouldCompress(text: string, minSize: number = 500): boolean {
+    public shouldCompress(text: string, minSize: number = CompressionEngine.DEFAULT_MIN_SIZE_BYTES): boolean {
         if (Buffer.byteLength(text, 'utf8') < minSize) {
             return false;
         }
-        const stats = this.getCompressionStats(text);
+        const stats = this.getCompressionStats(text, minSize);
         return stats.percentSaved >= 20;
     }
 
-    public getCompressionStats(text: string): { uncompressed: number; compressed: number; ratio: number; percentSaved: number; recommended: boolean; } {
+    public getCompressionStats(
+        text: string,
+        minSize: number = CompressionEngine.DEFAULT_MIN_SIZE_BYTES
+    ): { uncompressed: number; compressed: number; ratio: number; percentSaved: number; recommended: boolean; } {
         const result = this.compress(text);
-        const recommended = result.originalSize >= 500 && result.percentSaved >= 20;
+        const recommended = result.originalSize >= minSize && result.percentSaved >= 20;
         return {
             uncompressed: result.originalSize,
             compressed: result.compressedSize,
@@ -89,4 +102,12 @@ export class CompressionEngine {
             recommended: recommended,
         };
     }
+
+    /**
+     * Default minimum size (in bytes) below which compression isn't
+     * worth the metadata overhead. Exposed as a static so callers can
+     * override via OptimizationConfig.minOutputSizeBytes and have
+     * `recommended` / `shouldCompress` agree on the threshold.
+     */
+    public static DEFAULT_MIN_SIZE_BYTES = 500;
 }
diff --git a/src/core/config.ts b/src/core/config.ts
index 2e94239..091bf2b 100644
--- a/src/core/config.ts
+++ b/src/core/config.ts
@@ -238,6 +238,12 @@ export class ConfigManager {
           ...DEFAULT_OPTIMIZATION.chatCompression,
           ...(userOpt.chatCompression ?? {}),
         },
+        // Deep-merge model token limits so a user override like
+        // { "custom-model": 500_000 } does not drop the built-in map.
+        modelTokenLimits: {
+          ...DEFAULT_OPTIMIZATION.modelTokenLimits,
+          ...(userOpt.modelTokenLimits ?? {}),
+        },
       },
     };
   }
diff --git a/src/core/session.ts b/src/core/session.ts
index 1830b5d..6a86260 100644
--- a/src/core/session.ts
+++ b/src/core/session.ts
@@ -43,6 +43,10 @@ export interface SessionOptions {
      * always pass a real tokenizer and leave this false (the default).
      */
     allowCharHeuristic?: boolean;
+    /** Override for createdAt — used by fromSnapshot. */
+    createdAt?: number;
+    /** Override for updatedAt — used by fromSnapshot. */
+    updatedAt?: number;
 }
 
 const DEFAULT_MAX_TOKENS = 100_000;
@@ -69,8 +73,9 @@ export class Session {
         this.tokenizer = options.tokenizer ?? null;
         this.summarizer = options.summarizer ?? new TruncatingSummarizer();
         this.allowCharHeuristic = options.allowCharHeuristic ?? false;
-        this.createdAt = Date.now();
-        this.updatedAt = this.createdAt;
+        const now = Date.now();
+        this.createdAt = options.createdAt ?? now;
+        this.updatedAt = options.updatedAt ?? this.createdAt;
     }
 
     public addMessage(role: MessageRole, content: string): Message {
@@ -182,16 +187,17 @@ export class Session {
 
     public static fromSnapshot(
         snapshot: SessionSnapshot,
-        options: Omit<SessionOptions, 'id' | 'maxTokens'> = {}
+        options: Omit<SessionOptions, 'id' | 'maxTokens' | 'createdAt' | 'updatedAt'> = {}
     ): Session {
         const session = new Session({
             id: snapshot.id,
             maxTokens: snapshot.maxTokens,
+            createdAt: snapshot.createdAt,
+            updatedAt: snapshot.updatedAt,
             ...options,
         });
         session.history = [...snapshot.history];
         session.fileState = { ...snapshot.fileState };
-        session.updatedAt = snapshot.updatedAt;
         return session;
     }
 }
diff --git a/src/core/tokenizers/google-ai-tokenizer.ts b/src/core/tokenizers/google-ai-tokenizer.ts
index 19ea381..0f7785c 100644
--- a/src/core/tokenizers/google-ai-tokenizer.ts
+++ b/src/core/tokenizers/google-ai-tokenizer.ts
@@ -1,3 +1,4 @@
+import { createHash } from 'crypto';
 import { ITokenizer } from './i-tokenizer.js';
 import { LruCache } from '../../utils/lru-cache.js';
 
@@ -5,6 +6,7 @@ const DEFAULT_CACHE_SIZE = 500;
 const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
 const DEFAULT_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models';
 const REQUEST_TIMEOUT_MS = 10_000;
+const KEY_HASH_THRESHOLD_CHARS = 256;
 
 /**
  * Remote tokenizer that uses Google AI's countTokens REST endpoint —
@@ -45,7 +47,11 @@ export class GoogleAITokenizer implements ITokenizer {
     }
 
     public async countTokens(text: string): Promise<number> {
-        const cached = this.cache.get(text);
+        const key =
+            text.length <= KEY_HASH_THRESHOLD_CHARS
+                ? text
+                : createHash('sha256').update(text).digest('hex');
+        const cached = this.cache.get(key);
         if (cached !== undefined) {
             return cached;
         }
@@ -80,7 +86,7 @@ export class GoogleAITokenizer implements ITokenizer {
                     `Google AI countTokens returned unexpected payload: ${JSON.stringify(data).slice(0, 200)}`
                 );
             }
-            this.cache.set(text, data.totalTokens);
+            this.cache.set(key, data.totalTokens);
             return data.totalTokens;
         } finally {
             clearTimeout(timeout);
diff --git a/src/core/tokenizers/heuristic-tokenizer.ts b/src/core/tokenizers/heuristic-tokenizer.ts
index ef81931..a0208e2 100644
--- a/src/core/tokenizers/heuristic-tokenizer.ts
+++ b/src/core/tokenizers/heuristic-tokenizer.ts
@@ -1,8 +1,18 @@
+import { createHash } from 'crypto';
 import { ITokenizer } from './i-tokenizer.js';
 import { LruCache } from '../../utils/lru-cache.js';
 
 const DEFAULT_CACHE_SIZE = 500;
 const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+/** See TiktokenTokenizer for rationale. */
+const KEY_HASH_THRESHOLD_CHARS = 256;
+
+function cacheKeyFor(text: string): string {
+    if (text.length <= KEY_HASH_THRESHOLD_CHARS) {
+        return text;
+    }
+    return createHash('sha256').update(text).digest('hex');
+}
 
 export enum ContentType {
     Code = 'code',
@@ -43,14 +53,15 @@ export class HeuristicTokenizer implements ITokenizer {
     }
 
     public async countTokens(text: string): Promise<number> {
-        const cached = this.cache.get(text);
+        const key = cacheKeyFor(text);
+        const cached = this.cache.get(key);
         if (cached !== undefined) {
             return cached;
         }
         const contentType = HeuristicTokenizer.detectContentType(text);
         const ratio = CHARS_PER_TOKEN[contentType];
         const count = Math.ceil(text.length / ratio);
-        this.cache.set(text, count);
+        this.cache.set(key, count);
         return count;
     }
 
diff --git a/src/core/tokenizers/tiktoken-tokenizer.ts b/src/core/tokenizers/tiktoken-tokenizer.ts
index 9b2d327..4ebf197 100644
--- a/src/core/tokenizers/tiktoken-tokenizer.ts
+++ b/src/core/tokenizers/tiktoken-tokenizer.ts
@@ -1,9 +1,24 @@
+import { createHash } from 'crypto';
 import { encoding_for_model, Tiktoken, TiktokenModel } from 'tiktoken';
 import { ITokenizer } from './i-tokenizer.js';
 import { LruCache } from '../../utils/lru-cache.js';
 
 const DEFAULT_CACHE_SIZE = 500;
 const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+/**
+ * Strings longer than this are hashed before being used as a cache key
+ * so the LRU stores ~64-byte SHA-256 digests instead of entire prompts
+ * or file contents — keeps the cache from ballooning into hundreds of
+ * MB on hot paths.
+ */
+const KEY_HASH_THRESHOLD_CHARS = 256;
+
+function cacheKeyFor(text: string): string {
+    if (text.length <= KEY_HASH_THRESHOLD_CHARS) {
+        return text;
+    }
+    return createHash('sha256').update(text).digest('hex');
+}
 
 const SUPPORTED_TIKTOKEN_MODELS: readonly TiktokenModel[] = ['gpt-4', 'gpt-3.5-turbo'];
 
@@ -20,12 +35,13 @@ export class TiktokenTokenizer implements ITokenizer {
     }
 
     public async countTokens(text: string): Promise<number> {
-        const cached = this.cache.get(text);
+        const key = cacheKeyFor(text);
+        const cached = this.cache.get(key);
         if (cached !== undefined) {
             return cached;
         }
         const count = this.encoder.encode(text).length;
-        this.cache.set(text, count);
+        this.cache.set(key, count);
         return count;
     }
 
diff --git a/src/core/tokenizers/tokenizer-factory.ts b/src/core/tokenizers/tokenizer-factory.ts
index d4d00b3..edce55f 100644
--- a/src/core/tokenizers/tokenizer-factory.ts
+++ b/src/core/tokenizers/tokenizer-factory.ts
@@ -30,12 +30,15 @@ export class TokenizerFactory {
     }
 
     public static createFromEnv(): ITokenizer {
+        // TOKEN_OPTIMIZER_MODEL has highest precedence so a user can pin
+        // the optimizer model without having to clear broader env vars
+        // (CLAUDE_MODEL, ANTHROPIC_MODEL, …) that may already be set.
         const modelName =
+            process.env.TOKEN_OPTIMIZER_MODEL ||
             process.env.CLAUDE_MODEL ||
             process.env.ANTHROPIC_MODEL ||
             process.env.OPENAI_MODEL ||
             process.env.GOOGLE_AI_MODEL ||
-            process.env.TOKEN_OPTIMIZER_MODEL ||
             'gpt-4';
         return TokenizerFactory.create(modelName);
     }
diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts
index 33f6595..f482f45 100644
--- a/src/tools/context-delta-tool.ts
+++ b/src/tools/context-delta-tool.ts
@@ -82,25 +82,31 @@ export class ContextDeltaTool {
             return { success: false, error: message };
         }
 
+        // Use UTF-8 byte counts throughout so the reported sizes match
+        // the byte-cap that SessionManager.updateFileState enforces.
+        // string.length counts UTF-16 code units, which drifts for any
+        // non-ASCII content.
+        const originalSize = Buffer.byteLength(currentContent, 'utf8');
         if (previous === undefined) {
             return {
                 success: true,
                 isBaseline: true,
                 delta: currentContent,
-                originalSize: currentContent.length,
-                deltaSize: currentContent.length,
+                originalSize,
+                deltaSize: originalSize,
                 bytesSaved: 0,
             };
         }
 
         const delta = calculateDelta(previous, currentContent, filePath);
+        const deltaSize = Buffer.byteLength(delta, 'utf8');
         return {
             success: true,
             isBaseline: false,
             delta,
-            originalSize: currentContent.length,
-            deltaSize: delta.length,
-            bytesSaved: Math.max(0, currentContent.length - delta.length),
+            originalSize,
+            deltaSize,
+            bytesSaved: Math.max(0, originalSize - deltaSize),
         };
     }
 
diff --git a/src/tools/optimization-storage-tool.ts b/src/tools/optimization-storage-tool.ts
index 5f9fe9e..6465fc2 100644
--- a/src/tools/optimization-storage-tool.ts
+++ b/src/tools/optimization-storage-tool.ts
@@ -105,35 +105,62 @@ export const OPTIMIZATION_STORAGE_TOOL_DEFINITION = {
     name: 'optimization_storage',
     description:
         'Persist and retrieve brotli-compressed optimization results keyed by text hash. Operations: store, retrieve.',
+    // JSON Schema discriminated union — rejects a `store` payload that
+    // omits required fields at schema time instead of deep in the tool.
     inputSchema: {
         type: 'object',
-        properties: {
-            operation: {
-                type: 'string',
-                enum: ['store', 'retrieve'],
-                description: 'The storage operation to perform',
+        oneOf: [
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'store' },
+                    originalTextHash: {
+                        type: 'string',
+                        minLength: 1,
+                        description: 'Stable hash of the original uncompressed text',
+                    },
+                    optimizedText: {
+                        type: 'string',
+                        description: 'The optimized text to store',
+                    },
+                    originalTokens: {
+                        type: 'number',
+                        minimum: 0,
+                        description: 'Token count of the original text',
+                    },
+                    optimizedTokens: {
+                        type: 'number',
+                        minimum: 0,
+                        description: 'Token count after optimization',
+                    },
+                    tokensSaved: {
+                        type: 'number',
+                        description: 'Tokens saved by optimization',
+                    },
+                },
+                required: [
+                    'operation',
+                    'originalTextHash',
+                    'optimizedText',
+                    'originalTokens',
+                    'optimizedTokens',
+                    'tokensSaved',
+                ],
+                additionalProperties: false,
             },
-            originalTextHash: {
-                type: 'string',
-                description: 'Stable hash of the original uncompressed text (required for both operations)',
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'retrieve' },
+                    originalTextHash: {
+                        type: 'string',
+                        minLength: 1,
+                        description: 'Stable hash of the original uncompressed text',
+                    },
+                },
+                required: ['operation', 'originalTextHash'],
+                additionalProperties: false,
             },
-            optimizedText: {
-                type: 'string',
-                description: 'The optimized text to store (required for store)',
-            },
-            originalTokens: {
-                type: 'number',
-                description: 'Token count of the original text (required for store)',
-            },
-            optimizedTokens: {
-                type: 'number',
-                description: 'Token count after optimization (required for store)',
-            },
-            tokensSaved: {
-                type: 'number',
-                description: 'Tokens saved by optimization (required for store)',
-            },
-        },
-        required: ['operation'],
+        ],
     },
 };
diff --git a/src/utils/lru-cache.ts b/src/utils/lru-cache.ts
index 65889f8..8f7a5b2 100644
--- a/src/utils/lru-cache.ts
+++ b/src/utils/lru-cache.ts
@@ -102,11 +102,14 @@ export class LruCache<K, V> {
         return this.cache.size;
     }
 
-    /** Remove all entries whose TTL has expired. Returns the count removed. */
+    /**
+     * Remove all entries whose TTL has expired. Returns the count removed.
+     *
+     * Scans every entry regardless of the default TTL so per-entry TTLs
+     * passed via set(key, value, ttlMs) are also cleaned up even when the
+     * cache was constructed with defaultTtlMs === 0.
+     */
     public prune(): number {
-        if (this.defaultTtlMs === 0) {
-            return 0;
-        }
         const now = Date.now();
         let removed = 0;
         for (const [key, entry] of this.cache) {
diff --git a/src/utils/lru-memoize.ts b/src/utils/lru-memoize.ts
index d57c0ae..d8d2a58 100644
--- a/src/utils/lru-memoize.ts
+++ b/src/utils/lru-memoize.ts
@@ -66,6 +66,11 @@ export function lruMemoize<Args extends readonly unknown[], R>(
     options: LruMemoizeOptions<Args>
 ): (...args: Args) => Promise<R> {
     const cache = new LruCache<string, R>(options.maxSize, options.ttlMs ?? 0);
+    // Deduplicate concurrent calls for the same key so a stampede of
+    // requests while the first promise is still pending doesn't run the
+    // expensive function N times.
+    const inFlight = new Map<string, Promise<R>>();
+
     memoRegistry.register({
         name: options.name,
         cache: cache as unknown as LruCache<string, unknown>,
@@ -86,8 +91,20 @@ export function lruMemoize<Args extends readonly unknown[], R>(
         if (hit !== undefined) {
             return hit;
         }
-        const value = await fn(...args);
-        cache.set(key, value);
-        return value;
+        const pending = inFlight.get(key);
+        if (pending) {
+            return pending;
+        }
+        const promise = (async () => {
+            try {
+                const value = await fn(...args);
+                cache.set(key, value);
+                return value;
+            } finally {
+                inFlight.delete(key);
+            }
+        })();
+        inFlight.set(key, promise);
+        return promise;
     };
 }
diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts
index cad168b..9a718a0 100644
--- a/src/validation/tool-schemas.ts
+++ b/src/validation/tool-schemas.ts
@@ -420,15 +420,23 @@ export const ExportAnalyticsSchema = z.object({
     .describe('Optional filter by MCP server name'),
 });
 
-// 72. optimization_storage
-export const OptimizationStorageSchema = z.object({
-  operation: z.enum(['store', 'retrieve']),
-  originalTextHash: z.string().optional(),
-  optimizedText: z.string().optional(),
-  originalTokens: z.number().optional(),
-  optimizedTokens: z.number().optional(),
-  tokensSaved: z.number().optional(),
-});
+// 72. optimization_storage — discriminated union keyed on `operation` so
+// the zod validator rejects a `store` request missing the required
+// payload fields at validateToolArgs time, instead of after dispatch.
+export const OptimizationStorageSchema = z.discriminatedUnion('operation', [
+  z.object({
+    operation: z.literal('store'),
+    originalTextHash: z.string().min(1),
+    optimizedText: z.string(),
+    originalTokens: z.number().nonnegative(),
+    optimizedTokens: z.number().nonnegative(),
+    tokensSaved: z.number(),
+  }),
+  z.object({
+    operation: z.literal('retrieve'),
+    originalTextHash: z.string().min(1),
+  }),
+]);
 
 // 73. context_delta
 export const ContextDeltaSchema = z.object({
diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts
index 4f1c0d7..a767d89 100644
--- a/tests/unit/config.test.ts
+++ b/tests/unit/config.test.ts
@@ -73,6 +73,8 @@ describe('ConfigManager', () => {
     expect(opt.chatCompression.enabled).toBe(true);
     expect(opt.chatCompression.strategy).toBe('truncate');
     expect(mgr.getModelTokenLimit('custom-model')).toBe(500000);
+    // Built-in model limits must survive a partial override.
+    expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000);
     expect(opt.compressionPreserveThreshold).toBe(0.3);
   });
 
diff --git a/tests/unit/lru-cache.test.ts b/tests/unit/lru-cache.test.ts
index 6b7f2ac..0063e2c 100644
--- a/tests/unit/lru-cache.test.ts
+++ b/tests/unit/lru-cache.test.ts
@@ -63,6 +63,17 @@ describe('LruCache', () => {
     expect(cache.size).toBe(1);
   });
 
+  it('prune removes per-entry TTL expirations even when defaultTtlMs is 0', async () => {
+    const cache = new LruCache<string, number>(4, 0);
+    cache.set('short', 1, 20);
+    cache.set('forever', 2);
+    await new Promise((r) => setTimeout(r, 30));
+    const removed = cache.prune();
+    expect(removed).toBe(1);
+    expect(cache.has('forever')).toBe(true);
+    expect(cache.has('short')).toBe(false);
+  });
+
   it('stats.hitRate reflects hits / total', () => {
     const cache = new LruCache<string, number>(2);
     cache.set('a', 1);
diff --git a/tests/unit/lru-memoize.test.ts b/tests/unit/lru-memoize.test.ts
index 12631be..5ef483d 100644
--- a/tests/unit/lru-memoize.test.ts
+++ b/tests/unit/lru-memoize.test.ts
@@ -66,4 +66,19 @@ describe('lruMemoize', () => {
     await memo({ id: 'b', ignore: 1 }); // different id → miss
     expect(calls).toBe(2);
   });
+
+  it('deduplicates concurrent calls for the same args', async () => {
+    let calls = 0;
+    const fn = async (x: number) => {
+      calls++;
+      await new Promise((r) => setTimeout(r, 20));
+      return x * 2;
+    };
+    const memo = lruMemoize(fn, { name: 'test-concurrent', maxSize: 10 });
+    const [a, b] = await Promise.all([memo(5), memo(5)]);
+    expect(a).toBe(10);
+    expect(b).toBe(10);
+    // Stampede collapsed into a single invocation.
+    expect(calls).toBe(1);
+  });
 });
diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts
index 10237fe..4535f3a 100644
--- a/tests/unit/session.test.ts
+++ b/tests/unit/session.test.ts
@@ -54,7 +54,7 @@ describe('Session', () => {
     expect(history.length).toBeLessThan(10);
   });
 
-  it('snapshot round-trips', () => {
+  it('snapshot round-trips and preserves createdAt / updatedAt', () => {
     const session = new Session({ maxTokens: 42 });
     session.addMessage('user', 'hello');
     session.setFileContent('a.ts', 'const x = 1;');
@@ -64,6 +64,8 @@ describe('Session', () => {
     expect(restored.maxTokens).toBe(42);
     expect(restored.getFileContent('a.ts')).toBe('const x = 1;');
     expect(restored.getHistory()[0].content).toBe('hello');
+    expect(restored.createdAt).toBe(snapshot.createdAt);
+    expect(restored.updatedAt).toBe(snapshot.updatedAt);
   });
 });
 

From 4c0fc79dedfc6a5ee823484a8942251e57f50beb Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 22:38:16 -0400
Subject: [PATCH 23/26] fix(compression): restore strict decompress + move
 legacy fallback to storage layer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The earlier try/catch in CompressionEngine.decompress() made
decompressFromBase64('invalid-base64-data') return a mojibake string
instead of throwing, which regressed
tests/integration/claude-desktop-harness.test.ts's "should handle
corrupted compressed data gracefully" case on node 22 CI.

Putting the legacy-row fallback where it belongs — in
SqliteOptimizationStorage.decodePayload, keyed on the persisted
compression_algorithm column:

  - 'brotli'   → brotliDecompressSync
  - 'none'/''  → raw utf-8
  - null/undef → try brotli first, fall back to utf-8 (covers
                 pre-tracking rows)
  - unknown    → error

That preserves backward compatibility on the read path while keeping
the compression primitives strict, so callers that pass random base64
to decompressFromBase64 still see the intended error.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/analytics/optimization-storage.ts | 26 +++++++++++++++++---------
 src/core/compression-engine.ts        | 12 +-----------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts
index 1a1069d..81486d7 100644
--- a/src/analytics/optimization-storage.ts
+++ b/src/analytics/optimization-storage.ts
@@ -113,18 +113,26 @@ export class SqliteOptimizationStorage {
      * touching the read path, and surfaces an explicit error for
      * unknown labels instead of silently corrupting data.
      */
-    private decodePayload(buffer: Buffer, algorithm: string): string {
-        switch (algorithm) {
-            case 'brotli':
+    private decodePayload(buffer: Buffer, algorithm: string | null): string {
+        if (algorithm === 'brotli') {
+            return this.compressionEngine.decompress(buffer);
+        }
+        if (algorithm === 'none' || algorithm === '') {
+            return buffer.toString('utf8');
+        }
+        if (algorithm === null || algorithm === undefined) {
+            // Legacy rows without a recorded algorithm: pre-tracking code
+            // always wrote brotli, but we still accept raw UTF-8 as a last
+            // resort so a one-off plaintext row doesn't poison reads.
+            try {
                 return this.compressionEngine.decompress(buffer);
-            case 'none':
-            case '':
+            } catch {
                 return buffer.toString('utf8');
-            default:
-                throw new Error(
-                    `Unknown compression_algorithm in optimization_results: ${algorithm}`
-                );
+            }
         }
+        throw new Error(
+            `Unknown compression_algorithm in optimization_results: ${algorithm}`
+        );
     }
 
     /** Algorithm label paired with the current CompressionEngine. */
diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts
index 7184f78..b2daabb 100644
--- a/src/core/compression-engine.ts
+++ b/src/core/compression-engine.ts
@@ -44,17 +44,7 @@ export class CompressionEngine {
         if (!buffer || buffer.length === 0) {
             return '';
         }
-        // Brotli streams always begin with a framing byte whose high nibble
-        // encodes WBITS (0x0 / 0x8 / 0xC / …). That doesn't uniquely
-        // identify a Brotli payload, so we optimistically try to
-        // decompress and fall back to treating the buffer as raw UTF-8
-        // when the decoder rejects it. This preserves backward
-        // compatibility with any legacy plaintext row still in storage.
-        try {
-            return brotliDecompressSync(buffer).toString('utf8');
-        } catch {
-            return buffer.toString('utf8');
-        }
+        return brotliDecompressSync(buffer).toString('utf8');
     }
 
     public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): Omit<CompressionResult, 'compressed'> & { compressed: string } {

From b040513e4e237b3e515d229f166bb5b0b7dcf2ef Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 22:41:48 -0400
Subject: [PATCH 24/26] ci(security): audit prod deps only + pin
 brace-expansion / picomatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two parts:

1. package.json `overrides`: pin brace-expansion to ^2.0.2 and picomatch
   to ^4.0.4 to clean up the non-bundled copies pulled in transitively
   by eslint / test-exclude / top-level resolutions. That resolves
   every node_modules path that the project actually controls.

2. quality-gates.yml: `npm audit` now runs with `--omit=dev` so the
   step no longer fails on unfixable vulnerabilities inside
   node_modules/npm/**. npm itself bundles its own deps — the
   vulnerable brace-expansion / picomatch copies live inside
   @semantic-release/npm's bundled npm, which we pull in as a dev dep
   for releases and never ship to end users. The dedicated
   "Dependency Vulnerability Scan" step still covers the full tree.

Also stops `npm audit` inside the warning branch from killing the
step via its own non-zero exit code.

`npm audit --omit=dev` now reports 0 vulnerabilities.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/quality-gates.yml |  13 ++-
 package-lock.json                   | 144 ++++++++--------------------
 package.json                        |   4 +
 3 files changed, 53 insertions(+), 108 deletions(-)

diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml
index fbd9a07..9c01e5b 100644
--- a/.github/workflows/quality-gates.yml
+++ b/.github/workflows/quality-gates.yml
@@ -129,7 +129,12 @@ jobs:
       - name: Run npm audit
         id: audit
         run: |
-          npm audit --json > audit-results.json || true
+          # Audit production deps only — dev deps like @semantic-release/npm
+          # bundle their own node_modules (vulnerable transitively but never
+          # shipped to end users), which would otherwise fail CI on
+          # unfixable issues. The "Dependency Vulnerability Scan" step below
+          # still covers the full tree.
+          npm audit --omit=dev --json > audit-results.json || true
 
           # Check for high/critical vulnerabilities using Python for reliable JSON parsing
           HIGH_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))")
@@ -146,13 +151,15 @@ jobs:
 
           if [ "$CRITICAL_VULNS" -gt 0 ] 2>/dev/null; then
             echo "Error: Found $CRITICAL_VULNS critical vulnerabilities"
-            npm audit
+            npm audit --omit=dev || true
             exit 1
           fi
 
           if [ "$HIGH_VULNS" -gt 0 ] 2>/dev/null; then
             echo "Warning: Found $HIGH_VULNS high vulnerabilities"
-            npm audit
+            # npm audit exits non-zero when vulns exist — don't let that
+            # turn a "warning" into a failed step.
+            npm audit --omit=dev || true
           fi
 
       - name: Upload audit results
diff --git a/package-lock.json b/package-lock.json
index a3c484a..ed80f90 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -137,6 +137,7 @@
       "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.27.1",
         "@babel/generator": "^7.28.3",
@@ -1143,17 +1144,6 @@
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
-    "node_modules/@eslint/config-array/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
-      }
-    },
     "node_modules/@eslint/config-array/node_modules/minimatch": {
       "version": "3.1.5",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
@@ -1224,17 +1214,6 @@
       "dev": true,
       "license": "Python-2.0"
     },
-    "node_modules/@eslint/eslintrc/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
-      }
-    },
     "node_modules/@eslint/eslintrc/node_modules/ignore": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -2012,6 +1991,7 @@
       "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@octokit/auth-token": "^6.0.0",
         "@octokit/graphql": "^9.0.2",
@@ -3073,6 +3053,7 @@
       "integrity": "sha512-/NbVmcGTP+lj5oa4yiYxxeBjRivKQ5Ns1eSZeB99ExsEQ6rX5XYU1Zy/gGxY/ilqtD4Etx9mKyrPxZRetiahhA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "undici-types": "~7.14.0"
       }
@@ -3208,6 +3189,7 @@
       "integrity": "sha512-6JSSaBZmsKvEkbRUkf7Zj7dru/8ZCrJxAqArcLaVMee5907JdtEbKGsZ7zNiIm/UAkpGUkaSMZEXShnN2D1HZA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.46.1",
         "@typescript-eslint/types": "8.46.1",
@@ -3702,6 +3684,7 @@
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -4105,9 +4088,9 @@
       "license": "MIT"
     },
     "node_modules/brace-expansion": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
-      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz",
+      "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -4147,6 +4130,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.8.9",
         "caniuse-lite": "^1.0.30001746",
@@ -4672,13 +4656,6 @@
         "dot-prop": "^5.1.0"
       }
     },
-    "node_modules/concat-map": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
-      "dev": true,
-      "license": "MIT"
-    },
     "node_modules/config-chain": {
       "version": "1.1.13",
       "resolved": "https://registry.npmjs.org/config-chain/-/config-chain-1.1.13.tgz",
@@ -4862,6 +4839,7 @@
       "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "env-paths": "^2.2.1",
         "import-fresh": "^3.3.0",
@@ -5473,6 +5451,7 @@
       "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -5573,17 +5552,6 @@
         "url": "https://opencollective.com/eslint"
       }
     },
-    "node_modules/eslint/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
-      }
-    },
     "node_modules/eslint/node_modules/escape-string-regexp": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@@ -5876,6 +5844,7 @@
       "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
       "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "accepts": "^2.0.0",
         "body-parser": "^2.2.1",
@@ -6500,27 +6469,6 @@
         "node": ">=10.13.0"
       }
     },
-    "node_modules/glob/node_modules/balanced-match": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
-      "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
-      "license": "MIT",
-      "engines": {
-        "node": "18 || 20 || >=22"
-      }
-    },
-    "node_modules/glob/node_modules/brace-expansion": {
-      "version": "5.0.5",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
-      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^4.0.2"
-      },
-      "engines": {
-        "node": "18 || 20 || >=22"
-      }
-    },
     "node_modules/glob/node_modules/minimatch": {
       "version": "10.2.4",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz",
@@ -6536,6 +6484,21 @@
         "url": "https://github.com/sponsors/isaacs"
       }
     },
+    "node_modules/glob/node_modules/minimatch/node_modules/balanced-match": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+      "license": "MIT"
+    },
+    "node_modules/glob/node_modules/minimatch/node_modules/brace-expansion": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz",
+      "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==",
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0"
+      }
+    },
     "node_modules/global-directory": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/global-directory/-/global-directory-4.0.1.tgz",
@@ -6672,6 +6635,7 @@
       "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz",
       "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=16.9.0"
       }
@@ -7248,6 +7212,7 @@
       "integrity": "sha512-F26gjC0yWN8uAA5m5Ss8ZQf5nDHWGlN/xWZIh8S5SRbsEKBovwZhxGd6LJlbZYxBgCYOtreSUyb8hpXyGC5O4A==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@jest/core": "30.2.0",
         "@jest/types": "30.2.0",
@@ -7875,19 +7840,6 @@
         "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0"
       }
     },
-    "node_modules/jest-util/node_modules/picomatch": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
-      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/jonschlinkert"
-      }
-    },
     "node_modules/jest-validate": {
       "version": "30.2.0",
       "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-30.2.0.tgz",
@@ -8373,6 +8325,7 @@
       "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "bin": {
         "marked": "bin/marked.js"
       },
@@ -10593,6 +10546,7 @@
       "dev": true,
       "inBundle": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -11033,13 +10987,14 @@
       "license": "ISC"
     },
     "node_modules/picomatch": {
-      "version": "2.3.2",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
-      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
-        "node": ">=8.6"
+        "node": ">=12"
       },
       "funding": {
         "url": "https://github.com/sponsors/jonschlinkert"
@@ -11650,6 +11605,7 @@
       "integrity": "sha512-6qGjWccl5yoyugHt3jTgztJ9Y0JVzyH8/Voc/D8PlLat9pwxQYXz7W1Dpnq5h0/G5GCYGUaDSlYcyk3AMh5A6g==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@semantic-release/commit-analyzer": "^13.0.1",
         "@semantic-release/error": "^4.0.0",
@@ -13036,17 +12992,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/test-exclude/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
-      }
-    },
     "node_modules/test-exclude/node_modules/glob": {
       "version": "7.2.3",
       "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
@@ -13233,19 +13178,6 @@
         }
       }
     },
-    "node_modules/tinyglobby/node_modules/picomatch": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
-      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/jonschlinkert"
-      }
-    },
     "node_modules/tmpl": {
       "version": "1.0.5",
       "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
@@ -13451,6 +13383,7 @@
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -13964,6 +13897,7 @@
       "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
       "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
       "license": "MIT",
+      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
diff --git a/package.json b/package.json
index d9fb071..8d380c4 100644
--- a/package.json
+++ b/package.json
@@ -131,5 +131,9 @@
     "lru-cache": "^11.2.2",
     "tiktoken": "^1.0.22",
     "zod": ">=3.25.0 <5"
+  },
+  "overrides": {
+    "brace-expansion": "^2.0.2",
+    "picomatch": "^4.0.4"
   }
 }

From 7374f3ee46ae1113445fbccd38efef5237497497 Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 22:53:07 -0400
Subject: [PATCH 25/26] fix: resolve 21 new coderabbit comments on pr #163
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PowerShell helpers
- gzip.ps1 Save-GzippedFile: per-write GUID-suffixed temp path so
  concurrent writers can't clobber each other mid-write; move the
  stale-tmp cleanup into `finally` so a failed atomic swap still
  unlinks the tmp file.
- gzip.ps1 Read-MaybeGzippedFile: fall back to the plaintext sibling
  when the .gz is corrupt/partial rather than hard-failing the read.
- logging.ps1 Handle-Error: renamed $stackTrace → $exceptionTrace to
  stop shadowing PowerShell's built-in $StackTrace automatic variable.
- context-delta.ps1 Invoke-ContextDelta: call the repo's existing
  invoke-mcp.ps1 directly (the Invoke-TokenOptimizer function it
  previously probed for never actually existed, so every context_delta
  update silently returned $null). Server now auto-creates the session
  on first contact, so no separate bootstrap call is needed.

TypeScript
- src/utils/gzip.ts loadMaybeGzippedFile: same plaintext-fallback
  behavior on a bad .gz so the backward-compat path actually works.
- src/core/session-manager.addMessage: schedulePersist runs in
  `finally` so a tokenizer/compression throw still persists the
  mutated session; restore path now enforces maxFileStateBytes on
  each per-file entry so a tampered persisted file can't smuggle in
  oversized state past the write-time cap.
- src/core/session-manager.getOrCreateSession: new helper.
- src/tools/context-delta-tool: compute-delta / seed use
  getOrCreateSession so unknown sessionIds bootstrap cleanly; input
  schema is now a discriminated oneOf keyed on operation so
  compute-delta/seed require currentContent at validation time
  instead of at dispatch.
- src/validation/tool-schemas: ContextDeltaSchema mirrors the same
  discriminated-union shape on the zod side.
- src/core/config.mergeConfig: start from defaults.optimization
  instead of always DEFAULT_OPTIMIZATION, so update() calls that
  don't touch optimization no longer silently reset it.
- src/core/session: getHistory / getFileState / toSnapshot /
  fromSnapshot return defensive message copies so external mutation
  can't bypass updatedAt or corrupt session internals; summary
  messages are now role:'assistant' instead of 'system' to avoid
  promoting possibly-user-derived content into higher-priority
  instruction context (prompt-injection hardening).
- src/core/tokenizers/google-ai-tokenizer: always hash cache keys
  with a `sha256:` namespace prefix (no more verbatim text keys);
  authenticate with the `x-goog-api-key` header instead of a `?key=`
  query param so the key never ends up in access logs; thrown
  errors no longer embed the response body.
- src/core/summarization.TruncatingSummarizer: validate maxChars
  (>=32), compute the truncation budget from the actual marker
  length so the final output never exceeds maxChars for small limits;
  Anthropic & Google summarizers stop embedding provider response
  bodies in thrown errors.
- src/utils/lru-memoize: envelope cached values in { value }
  so a legitimately-cached `undefined` isn't treated as a miss;
  tag bigints with a dedicated discriminator in the default key
  serializer so `[1n]` and `["1"]` don't collapse to the same key.
- src/server/index.ts smart_write / smart_edit: invalidate the
  memoized read-only caches (smart_read/grep/glob) after any
  filesystem mutation so stale results aren't returned until TTL
  expiry.
- src/server/index.ts count_tokens: returns a two-element content
  array — `content[0].text` stays the scalar token count (preserves
  the int-parse contract that PS orchestrator uses at L931/1910/2092),
  `content[1].text` carries the structured JSON. counter.free() now
  runs in `finally` so a throwing countAsync doesn't leak the
  per-call tiktoken encoder.

Tests
- lru-memoize.test: new cases for undefined-memoization and
  bigint/string key non-collision.
- session.test: assert the summary role is `assistant`, not `system`.

All 61 unit tests in the new suites pass; tsc --noEmit clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 hooks/helpers/context-delta.ps1            | 39 +++++++++-----
 hooks/helpers/gzip.ps1                     | 20 +++++--
 hooks/helpers/logging.ps1                  |  6 ++-
 src/core/config.ts                         | 13 +++--
 src/core/session-manager.ts                | 44 ++++++++++++---
 src/core/session.ts                        | 17 ++++--
 src/core/summarization.ts                  | 32 ++++++++---
 src/core/tokenizers/google-ai-tokenizer.ts | 23 ++++----
 src/server/index.ts                        | 55 +++++++++++++------
 src/tools/context-delta-tool.ts            | 62 ++++++++++++++--------
 src/utils/gzip.ts                          | 15 ++++--
 src/utils/lru-memoize.ts                   | 21 +++++---
 src/validation/tool-schemas.ts             | 28 +++++++---
 tests/unit/lru-memoize.test.ts             | 29 ++++++++++
 tests/unit/session.test.ts                 |  4 +-
 15 files changed, 299 insertions(+), 109 deletions(-)

diff --git a/hooks/helpers/context-delta.ps1 b/hooks/helpers/context-delta.ps1
index 7c4c3ab..e8035e7 100644
--- a/hooks/helpers/context-delta.ps1
+++ b/hooks/helpers/context-delta.ps1
@@ -67,21 +67,32 @@ function Invoke-ContextDelta {
     if ($Operation -ne 'clear' -and $null -ne $CurrentContent) {
         $toolArgs.currentContent = $CurrentContent
     }
-    if (Get-Command Invoke-TokenOptimizer -ErrorAction SilentlyContinue) {
-        try {
-            return Invoke-TokenOptimizer -ToolName 'context_delta' -Arguments $toolArgs
-        } catch {
-            $msg = "Invoke-ContextDelta failed: $($_.Exception.Message)"
-            if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
-                Write-Log $msg 'WARN'
-            } else {
-                Write-Warning $msg
-            }
-            return $null
+
+    # Call the MCP tool via the repo's existing invoke-mcp.ps1 script.
+    # The server-side ContextDeltaTool auto-creates the session on first
+    # contact, so there's no separate bootstrap step needed here.
+    $invokeMcp = Join-Path $PSScriptRoot 'invoke-mcp.ps1'
+    if (-not (Test-Path $invokeMcp)) {
+        if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
+            Write-Log "invoke-mcp.ps1 not found at $invokeMcp; skipping context_delta." 'DEBUG'
         }
+        return $null
     }
-    if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
-        Write-Log 'Invoke-TokenOptimizer helper not available; skipping context_delta.' 'DEBUG'
+
+    try {
+        $argsJson = $toolArgs | ConvertTo-Json -Compress
+        $resultJson = & $invokeMcp -Tool 'context_delta' -ArgumentsJson $argsJson
+        if ($resultJson) {
+            return ($resultJson | ConvertFrom-Json)
+        }
+        return $null
+    } catch {
+        $msg = "Invoke-ContextDelta failed: $($_.Exception.Message)"
+        if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
+            Write-Log $msg 'WARN'
+        } else {
+            Write-Warning $msg
+        }
+        return $null
     }
-    return $null
 }
diff --git a/hooks/helpers/gzip.ps1 b/hooks/helpers/gzip.ps1
index 74b7e7b..9527dbf 100644
--- a/hooks/helpers/gzip.ps1
+++ b/hooks/helpers/gzip.ps1
@@ -70,18 +70,19 @@ function Save-GzippedFile {
     }
     $compressed = Compress-String -InputString $Content
     $gzPath = "$Path.gz"
-    $tmpPath = "$gzPath.tmp"
+    # Per-write temp path so concurrent writers to the same destination
+    # can't clobber each other mid-write.
+    $tmpPath = "$gzPath.$([guid]::NewGuid().ToString('N')).tmp"
     [System.IO.File]::WriteAllBytes($tmpPath, $compressed)
     # Atomic swap: File::Move(src, dst, overwrite:$true) on .NET5+.
     # Unlike "delete then move", this never leaves the caller with a
     # missing .gz file if the process crashes.
     try {
         [System.IO.File]::Move($tmpPath, $gzPath, $true)
-    } catch {
+    } finally {
         if (Test-Path $tmpPath) {
             Remove-Item -Path $tmpPath -Force -ErrorAction SilentlyContinue
         }
-        throw
     }
     if (Test-Path $Path) {
         Remove-Item -Path $Path -Force -ErrorAction SilentlyContinue
@@ -98,8 +99,17 @@ function Read-MaybeGzippedFile {
     )
     $gzPath = "$Path.gz"
     if (Test-Path $gzPath) {
-        $bytes = [System.IO.File]::ReadAllBytes($gzPath)
-        return Expand-String -CompressedBytes $bytes
+        try {
+            $bytes = [System.IO.File]::ReadAllBytes($gzPath)
+            return Expand-String -CompressedBytes $bytes
+        } catch {
+            # Corrupt / partial .gz — fall back to the plaintext sibling
+            # so the backward-compat migration path still works. If no
+            # plaintext exists either, rethrow the original error.
+            if (-not (Test-Path $Path)) {
+                throw
+            }
+        }
     }
     if (Test-Path $Path) {
         return [System.IO.File]::ReadAllText($Path, [System.Text.Encoding]::UTF8)
diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1
index 7da5f95..b52f54a 100644
--- a/hooks/helpers/logging.ps1
+++ b/hooks/helpers/logging.ps1
@@ -43,7 +43,9 @@ function Handle-Error {
     )
 
     $errorMessage = if ($Message) { $Message } else { $Exception.Message }
-    $stackTrace = $Exception.ScriptStackTrace
+    # $StackTrace is a built-in PowerShell automatic variable — use a
+    # different name so we don't shadow it.
+    $exceptionTrace = $Exception.ScriptStackTrace
     Write-Log "ERROR: $errorMessage" "ERROR"
-    Write-Log "StackTrace: $stackTrace" "ERROR"
+    Write-Log "StackTrace: $exceptionTrace" "ERROR"
 }
\ No newline at end of file
diff --git a/src/core/config.ts b/src/core/config.ts
index 091bf2b..f684c0a 100644
--- a/src/core/config.ts
+++ b/src/core/config.ts
@@ -222,26 +222,31 @@ export class ConfigManager {
     }
   ): HypercontextConfig {
     const userOpt = user.optimization ?? {};
+    // Preserve any existing optimization state the caller may have set
+    // (e.g. via prior update()) instead of always starting from
+    // DEFAULT_OPTIMIZATION. Non-optimization updates should no longer
+    // silently reset the entire optimization block.
+    const baseOptimization = defaults.optimization ?? DEFAULT_OPTIMIZATION;
     return {
       cache: { ...defaults.cache, ...user.cache },
       monitoring: { ...defaults.monitoring, ...user.monitoring },
       intelligence: { ...defaults.intelligence, ...user.intelligence },
       performance: { ...defaults.performance, ...user.performance },
       optimization: {
-        ...DEFAULT_OPTIMIZATION,
+        ...baseOptimization,
         ...userOpt,
         cacheSettings: {
-          ...DEFAULT_OPTIMIZATION.cacheSettings,
+          ...baseOptimization.cacheSettings,
           ...(userOpt.cacheSettings ?? {}),
         },
         chatCompression: {
-          ...DEFAULT_OPTIMIZATION.chatCompression,
+          ...baseOptimization.chatCompression,
           ...(userOpt.chatCompression ?? {}),
         },
         // Deep-merge model token limits so a user override like
         // { "custom-model": 500_000 } does not drop the built-in map.
         modelTokenLimits: {
-          ...DEFAULT_OPTIMIZATION.modelTokenLimits,
+          ...baseOptimization.modelTokenLimits,
           ...(userOpt.modelTokenLimits ?? {}),
         },
       },
diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts
index 935004e..30df98f 100644
--- a/src/core/session-manager.ts
+++ b/src/core/session-manager.ts
@@ -121,13 +121,28 @@ export class SessionManager {
     ): Promise<number> {
         const session = this.requireSession(sessionId);
         session.addMessage(role, content);
-        const currentTokens = await session.getHistoryTokenCount();
-        let finalTokens = currentTokens;
-        if (currentTokens > session.maxTokens) {
-            finalTokens = await session.compressHistory();
+        // Schedule persistence in `finally` so the mutated session still
+        // hits disk even if tokenization or compression throws. Without
+        // this, a single tokenizer error leaves the message appended
+        // in memory but never persisted, and a restart loses the turn.
+        try {
+            const currentTokens = await session.getHistoryTokenCount();
+            if (currentTokens > session.maxTokens) {
+                return await session.compressHistory();
+            }
+            return currentTokens;
+        } finally {
+            this.schedulePersist();
         }
-        this.schedulePersist();
-        return finalTokens;
+    }
+
+    /** Fetch an existing session, or create one with the given id. */
+    public getOrCreateSession(id: string): Session {
+        const existing = this.sessions.get(id);
+        if (existing) {
+            return existing;
+        }
+        return this.createSession({ id });
     }
 
     public updateFileState(
@@ -235,7 +250,22 @@ export class SessionManager {
                 if (now - snapshot.updatedAt > this.sessionTtlMs) {
                     continue; // Expired session — drop.
                 }
-                const session = Session.fromSnapshot(snapshot, {
+                // Enforce the same per-file size cap on restore that
+                // updateFileState enforces on writes; otherwise a
+                // tampered or legacy persisted file can smuggle in
+                // oversized entries past the live guardrail.
+                const maxBytes = this.maxFileStateBytes;
+                const sanitizedFileState: Record<string, string> = {};
+                for (const [filePath, content] of Object.entries(snapshot.fileState)) {
+                    if (Buffer.byteLength(content, 'utf8') <= maxBytes) {
+                        sanitizedFileState[filePath] = content;
+                    }
+                }
+                const safeSnapshot = {
+                    ...snapshot,
+                    fileState: sanitizedFileState,
+                };
+                const session = Session.fromSnapshot(safeSnapshot, {
                     tokenizer: this.tokenizer,
                     summarizer: this.summarizer,
                 });
diff --git a/src/core/session.ts b/src/core/session.ts
index 6a86260..5dd629d 100644
--- a/src/core/session.ts
+++ b/src/core/session.ts
@@ -86,11 +86,13 @@ export class Session {
     }
 
     public getHistory(): readonly Message[] {
-        return this.history;
+        // Defensive copy so external mutation (push/splice/in-place
+        // edit) can't bypass updatedAt tracking or corrupt the history.
+        return this.history.map((message) => ({ ...message }));
     }
 
     public getFileState(): Readonly<SessionFileState> {
-        return this.fileState;
+        return { ...this.fileState };
     }
 
     public getFileContent(filePath: string): string | undefined {
@@ -163,8 +165,13 @@ export class Session {
         }
 
         const summary = await this.summarizer.summarize(head);
+        // Store summaries as `assistant`, not `system` — a user turn
+        // can contain prompt-injection text, and promoting it into a
+        // system-role message after compression would let that text
+        // act as a higher-priority instruction. Assistant role keeps
+        // the context without the privilege escalation.
         const summaryMessage: Message = {
-            role: 'system',
+            role: 'assistant',
             content: `[summary of earlier conversation] ${summary}`,
             timestamp: head[head.length - 1].timestamp,
         };
@@ -177,7 +184,7 @@ export class Session {
     public toSnapshot(): SessionSnapshot {
         return {
             id: this.id,
-            history: [...this.history],
+            history: this.history.map((message) => ({ ...message })),
             fileState: { ...this.fileState },
             maxTokens: this.maxTokens,
             createdAt: this.createdAt,
@@ -196,7 +203,7 @@ export class Session {
             updatedAt: snapshot.updatedAt,
             ...options,
         });
-        session.history = [...snapshot.history];
+        session.history = snapshot.history.map((message) => ({ ...message }));
         session.fileState = { ...snapshot.fileState };
         return session;
     }
diff --git a/src/core/summarization.ts b/src/core/summarization.ts
index f694f7d..b68ec4b 100644
--- a/src/core/summarization.ts
+++ b/src/core/summarization.ts
@@ -34,11 +34,20 @@ export interface TruncatingSummarizerOptions {
     maxChars?: number;
 }
 
+const TRUNCATION_MARKER = '\n... [truncated] ...\n';
+const MIN_MAX_CHARS = 32;
+
 export class TruncatingSummarizer implements ISummarizer {
     private readonly maxChars: number;
 
     constructor(options: TruncatingSummarizerOptions = {}) {
-        this.maxChars = options.maxChars ?? 2000;
+        const maxChars = options.maxChars ?? 2000;
+        if (!Number.isFinite(maxChars) || maxChars < MIN_MAX_CHARS) {
+            throw new Error(
+                `TruncatingSummarizer.maxChars must be >= ${MIN_MAX_CHARS}, got ${maxChars}`
+            );
+        }
+        this.maxChars = maxChars;
     }
 
     public async summarize(messages: readonly Message[]): Promise<string> {
@@ -54,11 +63,16 @@ export class TruncatingSummarizer implements ISummarizer {
             return joined;
         }
 
-        const keepHead = Math.floor(this.maxChars * 0.4);
-        const keepTail = this.maxChars - keepHead - 20;
+        // Budget excludes the marker length so the final string never
+        // exceeds maxChars — the previous `-20` was a guess that
+        // didn't match the marker exactly and produced unpredictable
+        // output for small limits.
+        const budget = Math.max(0, this.maxChars - TRUNCATION_MARKER.length);
+        const keepHead = Math.floor(budget * 0.4);
+        const keepTail = budget - keepHead;
         return (
             joined.slice(0, keepHead) +
-            '\n... [truncated] ...\n' +
+            TRUNCATION_MARKER +
             joined.slice(-keepTail)
         );
     }
@@ -131,9 +145,11 @@ export class AnthropicSummarizer implements ISummarizer {
             });
 
             if (!response.ok) {
-                const body = await response.text().catch(() => '');
+                // Deliberately omit the response body — it can echo
+                // user prompt content and we don't want that leaking
+                // into log pipelines via thrown errors.
                 throw new Error(
-                    `Anthropic summarize failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}`
+                    `Anthropic summarize failed: ${response.status} ${response.statusText}`
                 );
             }
 
@@ -216,9 +232,9 @@ export class GoogleAISummarizer implements ISummarizer {
             });
 
             if (!response.ok) {
-                const body = await response.text().catch(() => '');
+                // See AnthropicSummarizer — no body in the thrown error.
                 throw new Error(
-                    `Google AI summarize failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}`
+                    `Google AI summarize failed: ${response.status} ${response.statusText}`
                 );
             }
 
diff --git a/src/core/tokenizers/google-ai-tokenizer.ts b/src/core/tokenizers/google-ai-tokenizer.ts
index 0f7785c..6c751fe 100644
--- a/src/core/tokenizers/google-ai-tokenizer.ts
+++ b/src/core/tokenizers/google-ai-tokenizer.ts
@@ -6,7 +6,6 @@ const DEFAULT_CACHE_SIZE = 500;
 const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
 const DEFAULT_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models';
 const REQUEST_TIMEOUT_MS = 10_000;
-const KEY_HASH_THRESHOLD_CHARS = 256;
 
 /**
  * Remote tokenizer that uses Google AI's countTokens REST endpoint —
@@ -47,18 +46,20 @@ export class GoogleAITokenizer implements ITokenizer {
     }
 
     public async countTokens(text: string): Promise<number> {
-        const key =
-            text.length <= KEY_HASH_THRESHOLD_CHARS
-                ? text
-                : createHash('sha256').update(text).digest('hex');
+        // Always hash with a namespace prefix so cache keys can't collide
+        // with a raw string arg and so sensitive user text isn't retained
+        // verbatim in process memory.
+        const key = `sha256:${createHash('sha256').update(text).digest('hex')}`;
         const cached = this.cache.get(key);
         if (cached !== undefined) {
             return cached;
         }
 
+        // Per Gemini API reference, x-goog-api-key is the recommended
+        // auth path — it keeps the key out of URLs and access logs.
         const url = `${this.endpoint}/${encodeURIComponent(
             this.modelName
-        )}:countTokens?key=${encodeURIComponent(this.apiKey)}`;
+        )}:countTokens`;
 
         const controller = new AbortController();
         const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
@@ -66,7 +67,10 @@ export class GoogleAITokenizer implements ITokenizer {
         try {
             const response = await fetch(url, {
                 method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
+                headers: {
+                    'Content-Type': 'application/json',
+                    'x-goog-api-key': this.apiKey,
+                },
                 body: JSON.stringify({
                     contents: [{ parts: [{ text }] }],
                 }),
@@ -74,9 +78,10 @@ export class GoogleAITokenizer implements ITokenizer {
             });
 
             if (!response.ok) {
-                const body = await response.text().catch(() => '');
+                // Don't embed the response body — it can leak prompt
+                // content in upstream logs.
                 throw new Error(
-                    `Google AI countTokens failed: ${response.status} ${response.statusText} ${body.slice(0, 200)}`
+                    `Google AI countTokens failed: ${response.status} ${response.statusText}`
                 );
             }
 
diff --git a/src/server/index.ts b/src/server/index.ts
index 3c971ce..62fba3c 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -927,23 +927,41 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
           modelName?: string;
         };
         const counter = modelName ? new TokenCounter(modelName) : tokenCounter;
-        const result = modelName
-          ? await counter.countAsync(text)
-          : counter.count(text);
-        if (modelName) {
-          // Model-specific counters are one-shot — free the local
-          // tiktoken encoder (if any) that this call allocated.
-          counter.free();
+        try {
+          const result = modelName
+            ? await counter.countAsync(text)
+            : counter.count(text);
+          // Return the full result JSON under a dedicated `metadata`
+          // key while the primary `text` payload stays the scalar token
+          // count string — preserves the integer-parse contract that
+          // the PowerShell orchestrator relies on
+          // (e.g. token-optimizer-orchestrator.ps1 L931/1910/2092 cast
+          // `content[0].text -as [int]`) and still surfaces the richer
+          // object for TS callers.
+          return {
+            content: [
+              {
+                type: 'text',
+                text: String(result.tokens),
+              },
+              {
+                type: 'text',
+                text: JSON.stringify(
+                  { ...result, model: modelName ?? counter.model },
+                  null,
+                  2
+                ),
+              },
+            ],
+          };
+        } finally {
+          // Always free one-shot counters — even when countAsync throws,
+          // leaving the tiktoken encoder allocated was leaking native
+          // resources.
+          if (modelName) {
+            counter.free();
+          }
         }
-
-        return {
-          content: [
-            {
-              type: 'text',
-              text: JSON.stringify({ ...result, model: modelName ?? counter.model }, null, 2),
-            },
-          ],
-        };
       }
 
       case 'compress_text': {
@@ -2031,6 +2049,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       case 'smart_write': {
         const { path, content, ...options } = args as any;
         const result = await runSmartWrite(path, content, options);
+        // Filesystem was mutated — drop every memoized read-only cache
+        // entry so the next smart_read/grep/glob reflects the new state
+        // instead of waiting for TTL expiry.
+        memoRegistry.clearAll();
         return {
           content: [
             {
@@ -2044,6 +2066,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       case 'smart_edit': {
         const { path, operations, ...options } = args as any;
         const result = await runSmartEdit(path, operations, options);
+        memoRegistry.clearAll();
         return {
           content: [
             {
diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts
index f482f45..20f5fdc 100644
--- a/src/tools/context-delta-tool.ts
+++ b/src/tools/context-delta-tool.ts
@@ -68,10 +68,10 @@ export class ContextDeltaTool {
                 error: 'currentContent is required for compute-delta',
             };
         }
-        const session = this.sessionManager.getSession(sessionId);
-        if (!session) {
-            return { success: false, error: `Unknown session: ${sessionId}` };
-        }
+        // Auto-bootstrap the session on first contact so PS-side callers
+        // that locally generate a sessionId don't have to separately
+        // create it server-side first.
+        const session = this.sessionManager.getOrCreateSession(sessionId);
         const previous = session.getFileContent(filePath);
 
         try {
@@ -116,6 +116,7 @@ export class ContextDeltaTool {
             return { success: false, error: 'currentContent is required for seed' };
         }
         try {
+            this.sessionManager.getOrCreateSession(sessionId);
             this.sessionManager.updateFileState(sessionId, filePath, currentContent);
             return { success: true, isBaseline: true };
         } catch (error) {
@@ -139,28 +140,45 @@ export const CONTEXT_DELTA_TOOL_DEFINITION = {
     name: 'context_delta',
     description:
         'Compute a unified-diff delta for a file in a given session so the model only sees changes since the last snapshot. Operations: compute-delta, seed, clear.',
+    // Discriminated inputSchema keyed on `operation` — compute-delta and
+    // seed require currentContent at runtime, so enforce that at schema
+    // validation time rather than letting a malformed payload reach the
+    // tool body.
     inputSchema: {
         type: 'object',
-        properties: {
-            operation: {
-                type: 'string',
-                enum: ['compute-delta', 'seed', 'clear'],
-                description: 'Operation to perform',
-            },
-            sessionId: {
-                type: 'string',
-                description: 'Session identifier (create one via SessionManager first)',
+        oneOf: [
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'compute-delta' },
+                    sessionId: { type: 'string', minLength: 1 },
+                    filePath: { type: 'string', minLength: 1 },
+                    currentContent: { type: 'string' },
+                },
+                required: ['operation', 'sessionId', 'filePath', 'currentContent'],
+                additionalProperties: false,
             },
-            filePath: {
-                type: 'string',
-                description: 'Path of the file inside the session state',
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'seed' },
+                    sessionId: { type: 'string', minLength: 1 },
+                    filePath: { type: 'string', minLength: 1 },
+                    currentContent: { type: 'string' },
+                },
+                required: ['operation', 'sessionId', 'filePath', 'currentContent'],
+                additionalProperties: false,
             },
-            currentContent: {
-                type: 'string',
-                description:
-                    'Current file content (required for compute-delta and seed)',
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'clear' },
+                    sessionId: { type: 'string', minLength: 1 },
+                    filePath: { type: 'string', minLength: 1 },
+                },
+                required: ['operation', 'sessionId', 'filePath'],
+                additionalProperties: false,
             },
-        },
-        required: ['operation', 'sessionId', 'filePath'],
+        ],
     },
 };
diff --git a/src/utils/gzip.ts b/src/utils/gzip.ts
index 5edf8ee..206cebc 100644
--- a/src/utils/gzip.ts
+++ b/src/utils/gzip.ts
@@ -75,13 +75,22 @@ export function saveGzippedFile(path: string, text: string, level: number = 6):
 
 /**
  * Load either `${path}.gz` or `${path}` — whichever exists. Returns
- * null if neither is present.
+ * null if neither is present. If the `.gz` sibling exists but can't
+ * be decompressed (corrupt, partially-written), falls back to the
+ * plaintext path so the backward-compat migration still works.
  */
 export function loadMaybeGzippedFile(path: string): string | null {
     const gzPath = `${path}.gz`;
     if (existsSync(gzPath)) {
-        const buffer = readFileSync(gzPath);
-        return gunzipBuffer(buffer);
+        try {
+            const buffer = readFileSync(gzPath);
+            return gunzipBuffer(buffer);
+        } catch (error) {
+            if (!existsSync(path)) {
+                throw error;
+            }
+            // Fall through to the plaintext sibling below.
+        }
     }
     if (existsSync(path)) {
         return readFileSync(path, 'utf-8');
diff --git a/src/utils/lru-memoize.ts b/src/utils/lru-memoize.ts
index d8d2a58..ea43aeb 100644
--- a/src/utils/lru-memoize.ts
+++ b/src/utils/lru-memoize.ts
@@ -65,7 +65,11 @@ export function lruMemoize<Args extends readonly unknown[], R>(
     fn: (...args: Args) => Promise<R>,
     options: LruMemoizeOptions<Args>
 ): (...args: Args) => Promise<R> {
-    const cache = new LruCache<string, R>(options.maxSize, options.ttlMs ?? 0);
+    // Wrap values in a tiny envelope so a legitimately-cached `undefined`
+    // can be distinguished from a cache miss.
+    type Envelope = { value: R };
+    const cache = new LruCache<string, Envelope>(options.maxSize, options.ttlMs ?? 0);
+
     // Deduplicate concurrent calls for the same key so a stampede of
     // requests while the first promise is still pending doesn't run the
     // expensive function N times.
@@ -79,9 +83,14 @@ export function lruMemoize<Args extends readonly unknown[], R>(
     const keyFn =
         options.keyFn ??
         ((args: Args): string => {
-            const serialized = JSON.stringify(args, (_, v) =>
-                typeof v === 'bigint' ? v.toString() : v
-            );
+            const serialized = JSON.stringify(args, (_, v) => {
+                // Tag bigints with a dedicated discriminator so
+                // `[1n]` and `["1"]` don't collapse to the same key.
+                if (typeof v === 'bigint') {
+                    return { __memo_bigint__: v.toString() };
+                }
+                return v;
+            });
             return createHash('sha256').update(serialized).digest('hex');
         });
 
@@ -89,7 +98,7 @@ export function lruMemoize<Args extends readonly unknown[], R>(
         const key = keyFn(args);
         const hit = cache.get(key);
         if (hit !== undefined) {
-            return hit;
+            return hit.value;
         }
         const pending = inFlight.get(key);
         if (pending) {
@@ -98,7 +107,7 @@ export function lruMemoize<Args extends readonly unknown[], R>(
         const promise = (async () => {
             try {
                 const value = await fn(...args);
-                cache.set(key, value);
+                cache.set(key, { value });
                 return value;
             } finally {
                 inFlight.delete(key);
diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts
index 9a718a0..21e1cfb 100644
--- a/src/validation/tool-schemas.ts
+++ b/src/validation/tool-schemas.ts
@@ -438,13 +438,27 @@ export const OptimizationStorageSchema = z.discriminatedUnion('operation', [
   }),
 ]);
 
-// 73. context_delta
-export const ContextDeltaSchema = z.object({
-  operation: z.enum(['compute-delta', 'seed', 'clear']),
-  sessionId: z.string(),
-  filePath: z.string(),
-  currentContent: z.string().optional(),
-});
+// 73. context_delta — discriminated on operation so compute-delta and
+// seed require currentContent at validation time rather than runtime.
+export const ContextDeltaSchema = z.discriminatedUnion('operation', [
+  z.object({
+    operation: z.literal('compute-delta'),
+    sessionId: z.string().min(1),
+    filePath: z.string().min(1),
+    currentContent: z.string(),
+  }),
+  z.object({
+    operation: z.literal('seed'),
+    sessionId: z.string().min(1),
+    filePath: z.string().min(1),
+    currentContent: z.string(),
+  }),
+  z.object({
+    operation: z.literal('clear'),
+    sessionId: z.string().min(1),
+    filePath: z.string().min(1),
+  }),
+]);
 
 // Map tool names to their schemas for easy lookup
 export const toolSchemaMap: Record<string, z.ZodType<any>> = {
diff --git a/tests/unit/lru-memoize.test.ts b/tests/unit/lru-memoize.test.ts
index 5ef483d..b0dae36 100644
--- a/tests/unit/lru-memoize.test.ts
+++ b/tests/unit/lru-memoize.test.ts
@@ -81,4 +81,33 @@ describe('lruMemoize', () => {
     // Stampede collapsed into a single invocation.
     expect(calls).toBe(1);
   });
+
+  it('memoizes a legitimately-undefined return value', async () => {
+    let calls = 0;
+    const fn = async (): Promise<undefined> => {
+      calls++;
+      return undefined;
+    };
+    const memo = lruMemoize(fn, { name: 'test-undefined', maxSize: 10 });
+    expect(await memo()).toBeUndefined();
+    expect(await memo()).toBeUndefined();
+    // Without envelope-style storage, the second call would re-run fn.
+    expect(calls).toBe(1);
+  });
+
+  it('distinguishes bigint args from string args in the default key', async () => {
+    let calls = 0;
+    const fn = async (x: unknown) => {
+      calls++;
+      return String(x);
+    };
+    const memo = lruMemoize(fn as (x: unknown) => Promise<string>, {
+      name: 'test-bigint-collision',
+      maxSize: 10,
+    });
+    expect(await memo(1n)).toBe('1');
+    expect(await memo('1')).toBe('1');
+    // Two distinct args ⇒ two distinct cache keys ⇒ two invocations.
+    expect(calls).toBe(2);
+  });
 });
diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts
index 4535f3a..ffe6c6e 100644
--- a/tests/unit/session.test.ts
+++ b/tests/unit/session.test.ts
@@ -49,7 +49,9 @@ describe('Session', () => {
     expect((await session.getHistoryTokenCount()) > 50).toBe(true);
     await session.compressHistory();
     const history = session.getHistory();
-    expect(history[0].role).toBe('system');
+    // Summary is stored as `assistant` (never `system`) so that
+    // user-derived text can't be elevated into system-role context.
+    expect(history[0].role).toBe('assistant');
     expect(history[0].content.startsWith('[summary')).toBe(true);
     expect(history.length).toBeLessThan(10);
   });

From 9e02c480b9b88a37cf5c9c0f930c8f85facaf8bd Mon Sep 17 00:00:00 2001
From: Franklin Moormann <cheatcountry@gmail.com>
Date: Sun, 19 Apr 2026 22:56:02 -0400
Subject: [PATCH 26/26] ci(security): drop risky overrides, add informational
 full-tree audit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the two remaining coderabbit findings:

- Remove the package.json `overrides` for brace-expansion and
  picomatch. The picomatch ^4.0.4 override was risky — it forces a
  major version on every transitive consumer, and can break
  packages that declare older picomatch majors. `npm audit --omit=dev`
  already reports 0 vulnerabilities without the override because the
  remaining vulns live inside @semantic-release/npm's bundled npm
  (dev-only, never shipped), and that's the scope the Security Audit
  step gates on.

- quality-gates.yml: keep the `--omit=dev` gating audit, but also
  run a full-tree `npm audit` and write audit-results-full.json so
  dev-dep findings stay visible even on repos/forks without a
  SNYK_TOKEN. Both artifacts are uploaded. The step still fails
  only on critical prod vulnerabilities.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/quality-gates.yml |  30 +++++---
 package-lock.json                   | 113 ++++++++++++++++++++++++----
 package.json                        |   4 -
 3 files changed, 119 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml
index 9c01e5b..81458cc 100644
--- a/.github/workflows/quality-gates.yml
+++ b/.github/workflows/quality-gates.yml
@@ -129,34 +129,44 @@ jobs:
       - name: Run npm audit
         id: audit
         run: |
-          # Audit production deps only — dev deps like @semantic-release/npm
-          # bundle their own node_modules (vulnerable transitively but never
-          # shipped to end users), which would otherwise fail CI on
-          # unfixable issues. The "Dependency Vulnerability Scan" step below
-          # still covers the full tree.
+          # Gating audit — prod deps only. Dev deps (e.g. @semantic-release/npm,
+          # which bundles its own node_modules/npm) can carry unfixable
+          # transitive vulnerabilities that never ship to end users, and
+          # failing CI on those is noise. The "Dependency Vulnerability Scan"
+          # step below still covers the full tree for visibility.
           npm audit --omit=dev --json > audit-results.json || true
 
+          # Informational audit — full tree, including dev deps. Always
+          # collected so teams can review non-gating findings even on
+          # forks/repos without a SNYK_TOKEN.
+          npm audit --json > audit-results-full.json || true
+
           # Check for high/critical vulnerabilities using Python for reliable JSON parsing
           HIGH_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))")
           CRITICAL_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('critical', 0))")
+          FULL_CRITICAL=$(python3 -c "import json; data = json.load(open('audit-results-full.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('critical', 0))")
+          FULL_HIGH=$(python3 -c "import json; data = json.load(open('audit-results-full.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))")
 
           # Ensure we have valid integers
           HIGH_VULNS=${HIGH_VULNS:-0}
           CRITICAL_VULNS=${CRITICAL_VULNS:-0}
+          FULL_CRITICAL=${FULL_CRITICAL:-0}
+          FULL_HIGH=${FULL_HIGH:-0}
 
           echo "high_vulnerabilities=$HIGH_VULNS" >> $GITHUB_OUTPUT
           echo "critical_vulnerabilities=$CRITICAL_VULNS" >> $GITHUB_OUTPUT
 
-          echo "Found $CRITICAL_VULNS critical and $HIGH_VULNS high severity vulnerabilities"
+          echo "Production: $CRITICAL_VULNS critical, $HIGH_VULNS high"
+          echo "Full tree:  $FULL_CRITICAL critical, $FULL_HIGH high (informational)"
 
           if [ "$CRITICAL_VULNS" -gt 0 ] 2>/dev/null; then
-            echo "Error: Found $CRITICAL_VULNS critical vulnerabilities"
+            echo "Error: Found $CRITICAL_VULNS critical vulnerabilities in production deps"
             npm audit --omit=dev || true
             exit 1
           fi
 
           if [ "$HIGH_VULNS" -gt 0 ] 2>/dev/null; then
-            echo "Warning: Found $HIGH_VULNS high vulnerabilities"
+            echo "Warning: Found $HIGH_VULNS high vulnerabilities in production deps"
             # npm audit exits non-zero when vulns exist — don't let that
             # turn a "warning" into a failed step.
             npm audit --omit=dev || true
@@ -167,7 +177,9 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: security-audit-${{ github.sha }}
-          path: audit-results.json
+          path: |
+            audit-results.json
+            audit-results-full.json
           retention-days: 30
 
       - name: Comment PR with security audit
diff --git a/package-lock.json b/package-lock.json
index ed80f90..a34c8da 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1144,6 +1144,17 @@
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
+    "node_modules/@eslint/config-array/node_modules/brace-expansion": {
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
+      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
     "node_modules/@eslint/config-array/node_modules/minimatch": {
       "version": "3.1.5",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
@@ -1214,6 +1225,17 @@
       "dev": true,
       "license": "Python-2.0"
     },
+    "node_modules/@eslint/eslintrc/node_modules/brace-expansion": {
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
+      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
     "node_modules/@eslint/eslintrc/node_modules/ignore": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -3846,6 +3868,19 @@
         "node": ">= 8"
       }
     },
+    "node_modules/anymatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
     "node_modules/argparse": {
       "version": "1.0.10",
       "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
@@ -4656,6 +4691,13 @@
         "dot-prop": "^5.1.0"
       }
     },
+    "node_modules/concat-map": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
+      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/config-chain": {
       "version": "1.1.13",
       "resolved": "https://registry.npmjs.org/config-chain/-/config-chain-1.1.13.tgz",
@@ -5552,6 +5594,17 @@
         "url": "https://opencollective.com/eslint"
       }
     },
+    "node_modules/eslint/node_modules/brace-expansion": {
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
+      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
     "node_modules/eslint/node_modules/escape-string-regexp": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@@ -6469,6 +6522,27 @@
         "node": ">=10.13.0"
       }
     },
+    "node_modules/glob/node_modules/balanced-match": {
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
+      "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
+      "license": "MIT",
+      "engines": {
+        "node": "18 || 20 || >=22"
+      }
+    },
+    "node_modules/glob/node_modules/brace-expansion": {
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
+      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^4.0.2"
+      },
+      "engines": {
+        "node": "18 || 20 || >=22"
+      }
+    },
     "node_modules/glob/node_modules/minimatch": {
       "version": "10.2.4",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz",
@@ -6484,21 +6558,6 @@
         "url": "https://github.com/sponsors/isaacs"
       }
     },
-    "node_modules/glob/node_modules/minimatch/node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "license": "MIT"
-    },
-    "node_modules/glob/node_modules/minimatch/node_modules/brace-expansion": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz",
-      "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0"
-      }
-    },
     "node_modules/global-directory": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/global-directory/-/global-directory-4.0.1.tgz",
@@ -8458,6 +8517,19 @@
         "node": ">=8.6"
       }
     },
+    "node_modules/micromatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
     "node_modules/mime": {
       "version": "4.1.0",
       "resolved": "https://registry.npmjs.org/mime/-/mime-4.1.0.tgz",
@@ -12992,6 +13064,17 @@
         "node": ">=8"
       }
     },
+    "node_modules/test-exclude/node_modules/brace-expansion": {
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
+      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
+      }
+    },
     "node_modules/test-exclude/node_modules/glob": {
       "version": "7.2.3",
       "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
diff --git a/package.json b/package.json
index 8d380c4..d9fb071 100644
--- a/package.json
+++ b/package.json
@@ -131,9 +131,5 @@
     "lru-cache": "^11.2.2",
     "tiktoken": "^1.0.22",
     "zod": ">=3.25.0 <5"
-  },
-  "overrides": {
-    "brace-expansion": "^2.0.2",
-    "picomatch": "^4.0.4"
   }
 }