diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml
index fbd9a07..81458cc 100644
--- a/.github/workflows/quality-gates.yml
+++ b/.github/workflows/quality-gates.yml
@@ -129,30 +129,47 @@ jobs:
       - name: Run npm audit
         id: audit
         run: |
-          npm audit --json > audit-results.json || true
+          # Gating audit — prod deps only. Dev deps (e.g. @semantic-release/npm,
+          # which bundles its own node_modules/npm) can carry unfixable
+          # transitive vulnerabilities that never ship to end users, and
+          # failing CI on those is noise. The "Dependency Vulnerability Scan"
+          # step below still covers the full tree for visibility.
+          npm audit --omit=dev --json > audit-results.json || true
+
+          # Informational audit — full tree, including dev deps. Always
+          # collected so teams can review non-gating findings even on
+          # forks/repos without a SNYK_TOKEN.
+          npm audit --json > audit-results-full.json || true
 
           # Check for high/critical vulnerabilities using Python for reliable JSON parsing
           HIGH_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))")
           CRITICAL_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('critical', 0))")
+          FULL_CRITICAL=$(python3 -c "import json; data = json.load(open('audit-results-full.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('critical', 0))")
+          FULL_HIGH=$(python3 -c "import json; data = json.load(open('audit-results-full.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))")
 
           # Ensure we have valid integers
           HIGH_VULNS=${HIGH_VULNS:-0}
           CRITICAL_VULNS=${CRITICAL_VULNS:-0}
+          FULL_CRITICAL=${FULL_CRITICAL:-0}
+          FULL_HIGH=${FULL_HIGH:-0}
 
           echo "high_vulnerabilities=$HIGH_VULNS" >> $GITHUB_OUTPUT
           echo "critical_vulnerabilities=$CRITICAL_VULNS" >> $GITHUB_OUTPUT
 
-          echo "Found $CRITICAL_VULNS critical and $HIGH_VULNS high severity vulnerabilities"
+          echo "Production: $CRITICAL_VULNS critical, $HIGH_VULNS high"
+          echo "Full tree:  $FULL_CRITICAL critical, $FULL_HIGH high (informational)"
 
           if [ "$CRITICAL_VULNS" -gt 0 ] 2>/dev/null; then
-            echo "Error: Found $CRITICAL_VULNS critical vulnerabilities"
-            npm audit
+            echo "Error: Found $CRITICAL_VULNS critical vulnerabilities in production deps"
+            npm audit --omit=dev || true
             exit 1
           fi
 
           if [ "$HIGH_VULNS" -gt 0 ] 2>/dev/null; then
-            echo "Warning: Found $HIGH_VULNS high vulnerabilities"
-            npm audit
+            echo "Warning: Found $HIGH_VULNS high vulnerabilities in production deps"
+            # npm audit exits non-zero when vulns exist — don't let that
+            # turn a "warning" into a failed step.
+            npm audit --omit=dev || true
           fi
 
       - name: Upload audit results
@@ -160,7 +177,9 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: security-audit-${{ github.sha }}
-          path: audit-results.json
+          path: |
+            audit-results.json
+            audit-results-full.json
           retention-days: 30
 
       - name: Comment PR with security audit
diff --git a/hooks/dispatcher.ps1 b/hooks/dispatcher.ps1
index acc6187..3896848 100644
--- a/hooks/dispatcher.ps1
+++ b/hooks/dispatcher.ps1
@@ -2,18 +2,30 @@
 # Minimal dispatcher focused on token optimization via MCP
 # Replaces 400+ line mess with clean architecture
 
+[CmdletBinding()]
 param([string]$Phase = "")
 
 $HANDLERS_DIR = "C:\Users\cheat\.claude-global\hooks\handlers"
 $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\dispatcher.log"
 $ORCHESTRATOR = "$HANDLERS_DIR\token-optimizer-orchestrator.ps1"
 
-function Write-Log {
-    param([string]$Message)
-    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
-    "[$timestamp] [$Phase] $Message" | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8
+# Load the shared logging helper defensively: a missing/malformed helper
+# must not kill the dispatcher for every hook phase. Fall back to a
+# minimal Write-Log shim so the rest of the script still runs.
+$loggingHelperPath = "$PSScriptRoot\helpers\logging.ps1"
+try {
+    if (Test-Path $loggingHelperPath) {
+        . $loggingHelperPath
+    } else {
+        throw "logging helper not found at $loggingHelperPath"
+    }
+} catch {
+    function Write-Log { param([string]$Message, [string]$Level = 'INFO') $null = $Message; $null = $Level }
+    function Handle-Error { param($Exception, [string]$Message) $null = $Exception; $null = $Message }
 }
 
+
+
 function Block-Tool {
     param([string]$Reason)
 
diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1
index 6726ee6..6b13fd8 100644
--- a/hooks/handlers/token-optimizer-orchestrator.ps1
+++ b/hooks/handlers/token-optimizer-orchestrator.ps1
@@ -10,9 +10,20 @@ param(
     [string]$InputJsonFile = ""
 )
 
+# Dot-source helpers BEFORE any logging — Write-Log must exist before
+# the first use below.
+$HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers"
+$INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1"
+$LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log"
+$SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt"
+. "$PSScriptRoot\..\helpers\logging.ps1"
+. "$PSScriptRoot\..\helpers\config.ps1"
+. "$PSScriptRoot\..\helpers\gzip.ps1"
+. "$PSScriptRoot\..\helpers\context-delta.ps1"
+
 # DIAGNOSTIC: Log script version/load time to verify latest version is being used
 $SCRIPT_VERSION = Get-Date -Format 'yyyyMMdd.HHmmss'
-Write-Host "DEBUG: token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" -ForegroundColor Cyan
+Write-Log "token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" "DEBUG"
 
 # Read JSON from temp file if provided
 # DO NOT delete temp file - dispatcher will clean it up after all handlers run
@@ -21,14 +32,9 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) {
     try {
         $InputJson = Get-Content -Path $InputJsonFile -Raw -Encoding UTF8
     } catch {
-        Write-Host "ERROR: Failed to read InputJsonFile: $($_.Exception.Message)" -ForegroundColor Red
+        Write-Log "Failed to read InputJsonFile: $($_.Exception.Message)" "ERROR"
     }
 }
-
-$HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers"
-$INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1"
-$LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log"
-$SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt"
 $OPERATIONS_DIR = "C:\Users\cheat\.claude-global\hooks\data"
 
 # PERFORMANCE FIX: Prefer local dev path if not already set
@@ -345,7 +351,7 @@ if (-not ('TokenCounter' -as [type])) {
 if (-not $script:TokenCounter) {
     $apiKey = $env:GOOGLE_AI_API_KEY
     if (-not $apiKey) {
-        Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow
+        Write-Log "GOOGLE_AI_API_KEY not set, falling back to estimation only" "WARN"
     }
     $modelName = if ($env:GOOGLE_AI_MODEL) { $env:GOOGLE_AI_MODEL } else { "gemini-2.0-flash-exp" }
     $script:TokenCounter = [TokenCounter]::new($apiKey, $modelName)
@@ -420,7 +426,7 @@ function Read-SessionFile {
             Write-Log "Failed to acquire read lock on session file '$FilePath', retrying... ($($_.Exception.Message))" "WARN"
             Start-Sleep -Milliseconds $retryDelayMs
         } catch {
-            Write-Log "Failed to read session file '$FilePath': $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Failed to read session file '$FilePath'"
             return $null
         }
     }
@@ -452,7 +458,7 @@ function Write-SessionFile {
             Write-Log "Failed to acquire write lock on session file '$FilePath', retrying... ($($_.Exception.Message))" "WARN"
             Start-Sleep -Milliseconds $retryDelayMs
         } catch {
-            Write-Log "Failed to write session file '$FilePath': $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Failed to write session file '$FilePath'"
             return $false
         } finally {
             # Ensure writer and fileStream are disposed even if errors occur
@@ -498,7 +504,7 @@ function Flush-OperationLogs {
             Write-Log "Flushed $($script:OperationLogBuffer.Count) operation logs" "DEBUG"
             $script:OperationLogBuffer = @()
         } catch {
-            Write-Log "Failed to flush operation logs: $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Failed to flush operation logs"
         }
     }
 }
@@ -523,33 +529,7 @@ function Start-LogFlushTimer {
     }
 }
 
-function Write-Log {
-    param(
-        [string]$Message,
-        [ValidateSet('DEBUG','INFO','WARN','ERROR')][string]$Level = "INFO",
-        [string]$Context = ""
-    )
-
-    # Check if debug logging is disabled
-    $debugLogging = if ($env:TOKEN_OPTIMIZER_DEBUG_LOGGING) {
-        $env:TOKEN_OPTIMIZER_DEBUG_LOGGING -eq 'true'
-    } else {
-        $true  # Default: enabled
-    }
-
-    if ($Level -eq 'DEBUG' -and -not $debugLogging) {
-        return
-    }
 
-    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
-    $contextPart = if ($Context) { " [$Context]" } else { "" }
-    $logEntry = "[$timestamp] [$Level]$contextPart $Message"
-    try {
-        $logEntry | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8 -ErrorAction SilentlyContinue
-    } catch {
-        # Silently fail
-    }
-}
 
 # Removed - now using direct invoke-mcp.ps1 calls
 
@@ -559,7 +539,7 @@ function Get-SessionInfo {
             $session = Read-SessionFile -FilePath $SESSION_FILE
             return $session
         } catch {
-            Write-Log "Failed to read session file: $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Failed to read session file"
         }
     }
     return $null
@@ -716,7 +696,7 @@ function Handle-LogOperation {
         Write-Log "Logged operation: $toolName ($tokens tokens)" "DEBUG"
 
     } catch {
-        Write-Log "Operation logging failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Operation logging failed"
     }
 }
 
@@ -746,7 +726,7 @@ function Handle-OptimizeSession {
         }
 
     } catch {
-        Write-Log "Session optimization failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Session optimization failed"
     }
 }
 
@@ -822,7 +802,7 @@ function Handle-ContextGuard {
         return 0  # Success - allow operation to proceed
 
     } catch {
-        Write-Log "Context guard failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Context guard failed"
         return 0  # On error, don't block
     }
 }
@@ -852,7 +832,7 @@ function Handle-PeriodicOptimize {
         }
 
     } catch {
-        Write-Log "Periodic optimize failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Periodic optimize failed"
     }
 }
 
@@ -877,7 +857,7 @@ function Handle-CacheWarmup {
         }
 
     } catch {
-        Write-Log "Cache warmup failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Cache warmup failed"
     }
 }
 
@@ -917,7 +897,7 @@ function Handle-SessionReport {
         }
 
     } catch {
-        Write-Log "Session report failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "Session report failed"
     }
 }
 
@@ -999,11 +979,11 @@ function Handle-UserPromptOptimization {
                 Write-Log "Optimized user prompt: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO"
             }
         } catch {
-            Write-Log "Prompt optimization failed: $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Prompt optimization failed"
         }
 
     } catch {
-        Write-Log "UserPromptOptimization handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "UserPromptOptimization handler failed"
     }
 }
 
@@ -1055,7 +1035,7 @@ function Handle-SessionStartInit {
         }
 
     } catch {
-        Write-Log "SessionStartInit handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "SessionStartInit handler failed"
     }
 }
 
@@ -1100,7 +1080,7 @@ function Handle-SmartDiff {
         return $null
 
     } catch {
-        Write-Log "SmartDiff handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "SmartDiff handler failed"
         return $null
     }
 }
@@ -1140,7 +1120,7 @@ function Handle-SmartLogs {
         return $null
 
     } catch {
-        Write-Log "SmartLogs handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "SmartLogs handler failed"
         return $null
     }
 }
@@ -1195,7 +1175,7 @@ function Handle-ToolSpecificOptimization {
         return $ToolOutput
 
     } catch {
-        Write-Log "ToolSpecificOptimization handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "ToolSpecificOptimization handler failed"
         return $ToolOutput
     }
 }
@@ -1233,7 +1213,7 @@ function Handle-MetricCollector {
         return $null
 
     } catch {
-        Write-Log "MetricCollector handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "MetricCollector handler failed"
         return $null
     }
 }
@@ -1273,7 +1253,7 @@ function Handle-AlertManager {
         return $null
 
     } catch {
-        Write-Log "AlertManager handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "AlertManager handler failed"
         return $null
     }
 }
@@ -1305,7 +1285,7 @@ function Handle-HealthMonitor {
         return $null
 
     } catch {
-        Write-Log "HealthMonitor handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "HealthMonitor handler failed"
         return $null
     }
 }
@@ -1343,7 +1323,7 @@ function Handle-MonitoringIntegration {
         return $null
 
     } catch {
-        Write-Log "MonitoringIntegration handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "MonitoringIntegration handler failed"
         return $null
     }
 }
@@ -1379,7 +1359,7 @@ function Handle-AnalyzeOptimization {
         return $null
 
     } catch {
-        Write-Log "AnalyzeOptimization handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "AnalyzeOptimization handler failed"
         return $null
     }
 }
@@ -1408,7 +1388,7 @@ function Handle-CacheAnalytics {
         return $null
 
     } catch {
-        Write-Log "CacheAnalytics handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "CacheAnalytics handler failed"
         return $null
     }
 }
@@ -1438,7 +1418,7 @@ function Handle-CacheOptimizer {
         return $null
 
     } catch {
-        Write-Log "CacheOptimizer handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "CacheOptimizer handler failed"
         return $null
     }
 }
@@ -1478,7 +1458,7 @@ function Handle-CacheCompression {
         return $Data
 
     } catch {
-        Write-Log "CacheCompression handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "CacheCompression handler failed"
         return $Data
     }
 }
@@ -1505,7 +1485,7 @@ function Handle-CacheInvalidation {
         Write-Log "Cache invalidation completed for pattern: $Pattern" "DEBUG"
 
     } catch {
-        Write-Log "CacheInvalidation handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "CacheInvalidation handler failed"
     }
 }
 
@@ -1545,7 +1525,7 @@ function Handle-SmartCache {
         return $null
 
     } catch {
-        Write-Log "SmartCache handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "SmartCache handler failed"
         return $null
     }
 }
@@ -1593,7 +1573,7 @@ function Handle-IntelligentSummarization {
         return $Text
 
     } catch {
-        Write-Log "IntelligentSummarization handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "IntelligentSummarization handler failed"
         return $Text
     }
 }
@@ -1639,7 +1619,7 @@ function Handle-PatternRecognition {
         return $null
 
     } catch {
-        Write-Log "PatternRecognition handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "PatternRecognition handler failed"
         return $null
     }
 }
@@ -1682,7 +1662,7 @@ function Handle-PredictiveAnalytics {
         return $Context
 
     } catch {
-        Write-Log "PredictiveAnalytics handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "PredictiveAnalytics handler failed"
         return $Context
     }
 }
@@ -1716,7 +1696,7 @@ function Handle-IntelligentAssistant {
         return $null
 
     } catch {
-        Write-Log "IntelligentAssistant handler failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "IntelligentAssistant handler failed"
         return $null
     }
 }
@@ -1861,7 +1841,7 @@ function Handle-PreToolUseOptimization {
         }
 
     } catch {
-        Write-Log "PreToolUse optimization failed: $($_.Exception.Message)" "ERROR"
+        Handle-Error -Exception $_.Exception -Message "PreToolUse optimization failed"
         return 1
     }
     return 0
@@ -1877,43 +1857,43 @@ function Handle-OptimizeToolOutput {
     $ErrorActionPreference = 'Stop'
 
     try {
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Entered function."
+        Write-Log "[Handle-OptimizeToolOutput] Entered function." "DEBUG"
 
         if (-not $InputJson) {
             Write-Log "No input received for tool output optimization" "WARN"
-            Write-Host "DEBUG: [Handle-OptimizeToolOutput] No input received, returning."
+            Write-Log "[Handle-OptimizeToolOutput] No input received, returning." "DEBUG"
             return
         }
 
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Parsing InputJson..."
+        Write-Log "[Handle-OptimizeToolOutput] Parsing InputJson..." "DEBUG"
         $data = $InputJson | ConvertFrom-Json
         $toolName = $data.tool_name
         $toolOutput = $data.tool_response  # FIXED: Claude Code uses tool_response not tool_result
 
         $outputType = if ($toolOutput) { $toolOutput.GetType().Name } else { "null" }
         Write-Log "DEBUG: tool_name=$toolName, tool_response_type=$outputType, has_content=$(-not -not $toolOutput)" "DEBUG"
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 1 - After line 1564 log. toolName=$toolName, outputType=$outputType"
+        Write-Log "[Handle-OptimizeToolOutput] Checkpoint 1 - After line 1564 log. toolName=$toolName, outputType=$outputType" "DEBUG"
 
         # Skip if no output or if output is already optimized
         Write-Log "DEBUG: Checking if toolOutput is null or empty" "DEBUG"
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 2 - Before null/empty check."
+        Write-Log "[Handle-OptimizeToolOutput] Checkpoint 2 - Before null/empty check." "DEBUG"
         if (-not $toolOutput) {
             Write-Log "No tool output to optimize for: $toolName (toolOutput is null/false)" "DEBUG"
-            Write-Host "DEBUG: [Handle-OptimizeToolOutput] toolOutput is null/false, returning."
+            Write-Log "[Handle-OptimizeToolOutput] toolOutput is null/false, returning." "DEBUG"
             return
         }
-        Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 3 - After null/empty check, toolOutput exists."
+        Write-Log "[Handle-OptimizeToolOutput] Checkpoint 3 - After null/empty check, toolOutput exists." "DEBUG"
 
         # Convert output to string for token counting
         $outputText = ""
         try {
-            Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 4 - Attempting to convert toolOutput to string. Is string: $($toolOutput -is [string])"
+            Write-Log "[Handle-OptimizeToolOutput] Checkpoint 4 - Attempting to convert toolOutput to string. Is string: $($toolOutput -is [string])" "DEBUG"
             $outputText = if ($toolOutput -is [string]) { $toolOutput } else { $toolOutput | ConvertTo-Json -Depth 10 -ErrorAction Stop }
             Write-Log "DEBUG: Converted tool output to string. Length: $($outputText.Length)" "DEBUG"
-            Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 5 - toolOutput converted. Length: $($outputText.Length)"
+            Write-Log "[Handle-OptimizeToolOutput] Checkpoint 5 - toolOutput converted. Length: $($outputText.Length)" "DEBUG"
         } catch {
             Write-Log "ERROR: Failed to convert tool output to JSON string for ${toolName}: $($_.Exception.Message)" "ERROR"
-            Write-Host "ERROR: [Handle-OptimizeToolOutput] Failed to convert: $($_.Exception.Message)"
+            Write-Log "[Handle-OptimizeToolOutput] Failed to convert: $($_.Exception.Message)" "ERROR"
             return
         }
 
@@ -1934,8 +1914,7 @@ function Handle-OptimizeToolOutput {
                 Write-Log "WARN: count_tokens result did not contain expected content" "WARN"
             }
         } catch {
-            Write-Log "ERROR: Token counting failed for ${toolName}: $($_.Exception.Message)" "ERROR"
-            Write-Log "ERROR: Stack Trace: $($_.ScriptStackTrace)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Token counting failed for ${toolName}"
             return
         }
 
@@ -1958,16 +1937,56 @@ function Handle-OptimizeToolOutput {
             Write-Log "Tool-specific optimization failed: $($_.Exception.Message)" "WARN"
         }
 
-        # Optimize using optimize_text (PHASE 4: Reduced quality for performance)
+        # Calculate SHA256 hash of the output text for caching
+        $hasher = [System.Security.Cryptography.SHA256]::Create()
+        $hashBytes = $hasher.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($outputText))
+        $originalTextHash = [System.BitConverter]::ToString($hashBytes).Replace("-", "").ToLower()
+
+        # Attempt to retrieve from optimization storage
         try {
-            # PHASE 2 FIX: Use content hash instead of timestamp for cache key
-            $hasher = [System.Security.Cryptography.SHA256]::Create()
-            $hashBytes = $hasher.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($outputText))
-            $contentHash = [Convert]::ToBase64String($hashBytes).Substring(0, 16)
+            $retrieveArgs = @{
+                operation = "retrieve"
+                originalTextHash = $originalTextHash
+            }
+            $retrieveJson = $retrieveArgs | ConvertTo-Json -Compress
+            $retrieveResultJson = & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $retrieveJson
+            $retrieveResult = if ($retrieveResultJson) { $retrieveResultJson | ConvertFrom-Json } else { $null }
+
+            if ($retrieveResult -and $retrieveResult.success -and $retrieveResult.result) {
+                Write-Log "Cache HIT for optimization result. Hash: $originalTextHash" "INFO"
+                # OptimizationStorageTool.retrieve() returns { success, result: { optimizedText, ... } }.
+                # Read the actual payload from $retrieveResult.result (not top-level), and mirror
+                # the base64 wrapping used on the store path below so round-tripped bytes survive JSON.
+                $cachedEntry = $retrieveResult.result
+                $optimizedTextBytes = [System.Convert]::FromBase64String($cachedEntry.optimizedText)
+                $optimizedText = [System.Text.Encoding]::UTF8.GetString($optimizedTextBytes)
+                $afterTokens = $cachedEntry.optimizedTokens
+                $saved = $cachedEntry.tokensSaved
+                $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 }
 
+                if ($script:CurrentSession) {
+                    $script:CurrentSession.cacheHits++
+                    if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) {
+                        Write-Log "Session stats updated and persisted after cache hit." "DEBUG"
+                    } else {
+                        Write-Log "Failed to persist session stats after cache hit." "ERROR"
+                    }
+                }
+
+                Write-Log "Using cached optimized $toolName output: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO"
+                Update-SessionOperation -TokensDelta $afterTokens
+                return
+            } else {
+                Write-Log "Cache MISS for optimization result. Hash: $originalTextHash" "DEBUG"
+            }
+        } catch {
+            Handle-Error -Exception $_.Exception -Message "Failed to retrieve from optimization storage"
+        }
+
+        # Optimize using optimize_text (PHASE 4: Reduced quality for performance)
+        try {
             $optimizeArgs = @{
                 text = $outputText
-                key = "tool_output_${toolName}_$contentHash"
                 quality = $script:OPTIMIZATION_QUALITY
             }
             $optimizeJson = $optimizeArgs | ConvertTo-Json -Compress
@@ -1982,34 +2001,43 @@ function Handle-OptimizeToolOutput {
                 $saved = $beforeTokens - $afterTokens
                 $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 }
 
-                # PHASE 1 FIX: Rollback logic - only use optimization if it actually helps
                 if ($afterTokens -ge $beforeTokens) {
                     Write-Log "Optimization made things worse or had no effect ($beforeTokens → $afterTokens tokens), REVERTING to original" "WARN"
-
-                    # PHASE 4 FIX: Track failure and persist immediately
                     if ($script:CurrentSession) {
                         $script:CurrentSession.optimizationFailures++
-                        # CRITICAL: Persist immediately to disk for multi-process visibility
                         if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) {
                             Write-Log "Session stats updated and persisted after optimization failure." "DEBUG"
                         } else {
                             Write-Log "Failed to persist session stats after optimization failure." "ERROR"
                         }
                     }
-
-                    # Don't update session with optimized tokens, skip this optimization
                     return
                 }
 
                 Write-Log "Optimized $toolName output: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO"
 
-                # PHASE 4 FIX: Track success and detailed stats, persist immediately
+                # Store the new optimization result
+                try {
+                    $storeArgs = @{
+                        operation = "store"
+                        originalTextHash = $originalTextHash
+                        optimizedText = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes($optimizedText))
+                        originalTokens = $beforeTokens
+                        optimizedTokens = $afterTokens
+                        tokensSaved = $saved
+                    }
+                    $storeJson = $storeArgs | ConvertTo-Json -Compress
+                    & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $storeJson
+                    Write-Log "Stored new optimization result. Hash: $originalTextHash" "DEBUG"
+                } catch {
+                    Handle-Error -Exception $_.Exception -Message "Failed to store optimization result"
+                }
+
                 if ($script:CurrentSession) {
                     $script:CurrentSession.optimizationSuccesses++
                     $script:CurrentSession.totalOriginalTokens += $beforeTokens
                     $script:CurrentSession.totalOptimizedTokens += $afterTokens
                     $script:CurrentSession.totalTokensSaved += $saved
-                    # CRITICAL: Persist immediately to disk for multi-process visibility
                     if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) {
                         Write-Log "Session stats updated and persisted after optimization success." "DEBUG"
                     } else {
@@ -2017,11 +2045,10 @@ function Handle-OptimizeToolOutput {
                     }
                 }
 
-                # Update session tokens (only if optimization helped)
                 Update-SessionOperation -TokensDelta $afterTokens
             }
         } catch {
-            Write-Log "Tool output optimization failed: $($_.Exception.Message)" "ERROR"
+            Handle-Error -Exception $_.Exception -Message "Tool output optimization failed"
         }
 
     } catch {
@@ -2227,6 +2254,28 @@ function Handle-SmartRead {
                 Write-Log "Updated session totalTokens by $tokens" "DEBUG"
             }
 
+            # #122: update the MCP server's context_delta so the next read
+            # of this file can be served as a diff. Failure here is
+            # non-fatal — smart_read still succeeds.
+            #
+            # IMPORTANT: only feed FULL content. smart_read can return a
+            # diff payload (metadata.isDiff), and persisting a diff as the
+            # new baseline would make the next compute-delta compare
+            # against the previous patch instead of the file contents.
+            try {
+                $isDiff = $result.metadata -and $result.metadata.isDiff
+                $contentText = if ($result.content -and $result.content[0] -and $result.content[0].text) {
+                    $result.content[0].text
+                } else {
+                    $null
+                }
+                if ($contentText -and -not $isDiff) {
+                    $null = Invoke-ContextDelta -Operation 'compute-delta' -FilePath $filePath -CurrentContent $contentText
+                }
+            } catch {
+                Write-Log "context_delta update skipped: $($_.Exception.Message)" 'DEBUG'
+            }
+
             # Return smart_read result and block plain Read
             $blockResponse = @{
                 continue = $false
diff --git a/hooks/helpers/config.ps1 b/hooks/helpers/config.ps1
new file mode 100644
index 0000000..38b42b6
--- /dev/null
+++ b/hooks/helpers/config.ps1
@@ -0,0 +1,151 @@
+[CmdletBinding()]
+param()
+
+<#
+Token-Optimizer Config helper — addresses issue #120 (PowerShell side).
+
+Mirrors src/core/config.ts so the PS orchestrator and the TS server
+share one source of truth. The config file lives at
+~/.token-optimizer/config.json and is the same one the Node server
+reads. On first run we copy the defaults below into that file.
+#>
+
+$script:TokenOptimizerConfigPath =
+    Join-Path $env:USERPROFILE '.token-optimizer\config.json'
+
+$script:TokenOptimizerDefaultConfig = @{
+    cache = @{
+        enabled = $true
+        maxSizeMB = 500
+        defaultTTL = 300
+        ttlByType = @{
+            file_read = 300
+            git_status = 60
+            git_diff = 120
+            build_result = 600
+            test_result = 300
+        }
+        compression = 'auto'
+    }
+    monitoring = @{
+        enabled = $true
+        detailedLogging = $false
+        metricsRetentionDays = 30
+        dashboardPort = 3100
+        enableWebUI = $false
+    }
+    optimization = @{
+        compressionTokenThreshold = 0.7
+        compressionPreserveThreshold = 0.3
+        minTokensBeforeCompression = 1000
+        modelTokenLimits = @{
+            'gpt-4' = 128000
+            'gpt-4-turbo' = 128000
+            'gpt-3.5-turbo' = 16385
+            'claude-3-opus' = 200000
+            'claude-3-sonnet' = 200000
+            'claude-3-haiku' = 200000
+            'claude-opus-4-7' = 1000000
+            'claude-sonnet-4-6' = 1000000
+            'gemini-1.5-pro' = 2000000
+            'gemini-2.5-flash' = 1000000
+        }
+        minOutputSizeBytes = 500
+        quality = 'balanced'
+        cacheSettings = @{
+            maxSize = 1000
+            ttlSeconds = 3600
+        }
+        chatCompression = @{
+            enabled = $true
+            strategy = 'summarize'
+        }
+    }
+}
+
+function Get-TokenOptimizerConfigPath {
+    return $script:TokenOptimizerConfigPath
+}
+
+function Write-TokenOptimizerDefaultConfig {
+    $configPath = Get-TokenOptimizerConfigPath
+    $configDir = Split-Path -Parent $configPath
+    if (-not (Test-Path $configDir)) {
+        New-Item -ItemType Directory -Path $configDir -Force | Out-Null
+    }
+    $json = $script:TokenOptimizerDefaultConfig | ConvertTo-Json -Depth 10
+    Set-Content -Path $configPath -Value $json -Encoding UTF8
+}
+
+function Import-TokenOptimizerConfig {
+    $configPath = Get-TokenOptimizerConfigPath
+    if (-not (Test-Path $configPath)) {
+        Write-TokenOptimizerDefaultConfig
+        return $script:TokenOptimizerDefaultConfig
+    }
+    try {
+        $raw = Get-Content -Path $configPath -Raw -Encoding UTF8
+        return ($raw | ConvertFrom-Json -AsHashtable)
+    } catch {
+        $msg = "Failed to load $configPath ($($_.Exception.Message)); using defaults."
+        if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
+            Write-Log $msg 'WARN'
+        } else {
+            Write-Warning $msg
+        }
+        return $script:TokenOptimizerDefaultConfig
+    }
+}
+
+function Merge-TokenOptimizerHashtable {
+    param(
+        [hashtable]$Base,
+        $User
+    )
+    $merged = @{}
+    foreach ($key in $Base.Keys) {
+        $merged[$key] = $Base[$key]
+    }
+    if ($null -eq $User) {
+        return $merged
+    }
+    # Handle both hashtables and PSCustomObjects (ConvertFrom-Json returns the latter).
+    $userKeys = @()
+    if ($User -is [hashtable]) {
+        $userKeys = $User.Keys
+    } elseif ($User.PSObject) {
+        $userKeys = $User.PSObject.Properties.Name
+    }
+    foreach ($key in $userKeys) {
+        $userValue = if ($User -is [hashtable]) { $User[$key] } else { $User.$key }
+        if ($Base.ContainsKey($key) -and ($Base[$key] -is [hashtable]) -and ($null -ne $userValue)) {
+            $merged[$key] = Merge-TokenOptimizerHashtable -Base $Base[$key] -User $userValue
+        } else {
+            $merged[$key] = $userValue
+        }
+    }
+    return $merged
+}
+
+function Get-TokenOptimizerOptimizationConfig {
+    $config = Import-TokenOptimizerConfig
+    $defaults = $script:TokenOptimizerDefaultConfig.optimization
+    if ($null -eq $config.optimization) {
+        return $defaults
+    }
+    # Deep-merge the user's partial optimization section onto defaults so
+    # overriding one modelTokenLimit doesn't drop the rest of the map.
+    return Merge-TokenOptimizerHashtable -Base $defaults -User $config.optimization
+}
+
+function Get-TokenOptimizerModelTokenLimit {
+    param(
+        [Parameter(Mandatory = $true)]
+        [string]$ModelName
+    )
+    $opt = Get-TokenOptimizerOptimizationConfig
+    if ($opt.modelTokenLimits -and $opt.modelTokenLimits.ContainsKey($ModelName)) {
+        return $opt.modelTokenLimits[$ModelName]
+    }
+    return $null
+}
diff --git a/hooks/helpers/context-delta.ps1 b/hooks/helpers/context-delta.ps1
new file mode 100644
index 0000000..e8035e7
--- /dev/null
+++ b/hooks/helpers/context-delta.ps1
@@ -0,0 +1,98 @@
+[CmdletBinding()]
+param()
+
+<#
+PowerShell integration for the context_delta MCP tool — addresses
+issue #122 Phase 2.
+
+Get-TokenOptimizerSessionId generates a stable sessionId per top-level
+PS session (cached on the script scope and persisted to a marker file
+so multiple orchestrator invocations within one Claude session reuse
+the same id).
+
+Invoke-ContextDelta calls the context_delta MCP tool via the shared
+Invoke-TokenOptimizer helper and returns the unified-diff delta so
+Handle-SmartRead can emit only the changed lines to the model.
+#>
+
+$script:TokenOptimizerSessionIdPath =
+    Join-Path $env:USERPROFILE '.token-optimizer\current-session-id'
+
+function Get-TokenOptimizerSessionId {
+    if ($script:TokenOptimizerCurrentSessionId) {
+        return $script:TokenOptimizerCurrentSessionId
+    }
+    if (Test-Path $script:TokenOptimizerSessionIdPath) {
+        $existing = (Get-Content -Path $script:TokenOptimizerSessionIdPath -Raw).Trim()
+        if ($existing) {
+            $script:TokenOptimizerCurrentSessionId = $existing
+            return $existing
+        }
+    }
+    $newId = [guid]::NewGuid().ToString()
+    $dir = Split-Path -Parent $script:TokenOptimizerSessionIdPath
+    if (-not (Test-Path $dir)) {
+        New-Item -ItemType Directory -Path $dir -Force | Out-Null
+    }
+    Set-Content -Path $script:TokenOptimizerSessionIdPath -Value $newId
+    $script:TokenOptimizerCurrentSessionId = $newId
+    return $newId
+}
+
+function Reset-TokenOptimizerSessionId {
+    $script:TokenOptimizerCurrentSessionId = $null
+    if (Test-Path $script:TokenOptimizerSessionIdPath) {
+        Remove-Item -Path $script:TokenOptimizerSessionIdPath -Force
+    }
+}
+
+function Invoke-ContextDelta {
+    param(
+        [Parameter(Mandatory = $true)]
+        [ValidateSet('compute-delta', 'seed', 'clear')]
+        [string]$Operation,
+        [Parameter(Mandatory = $true)][string]$FilePath,
+        [string]$CurrentContent = $null,
+        [string]$SessionId = $null
+    )
+
+    if (-not $SessionId) {
+        $SessionId = Get-TokenOptimizerSessionId
+    }
+    $toolArgs = @{
+        operation = $Operation
+        sessionId = $SessionId
+        filePath = $FilePath
+    }
+    if ($Operation -ne 'clear' -and $null -ne $CurrentContent) {
+        $toolArgs.currentContent = $CurrentContent
+    }
+
+    # Call the MCP tool via the repo's existing invoke-mcp.ps1 script.
+    # The server-side ContextDeltaTool auto-creates the session on first
+    # contact, so there's no separate bootstrap step needed here.
+    $invokeMcp = Join-Path $PSScriptRoot 'invoke-mcp.ps1'
+    if (-not (Test-Path $invokeMcp)) {
+        if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
+            Write-Log "invoke-mcp.ps1 not found at $invokeMcp; skipping context_delta." 'DEBUG'
+        }
+        return $null
+    }
+
+    try {
+        $argsJson = $toolArgs | ConvertTo-Json -Compress
+        $resultJson = & $invokeMcp -Tool 'context_delta' -ArgumentsJson $argsJson
+        if ($resultJson) {
+            return ($resultJson | ConvertFrom-Json)
+        }
+        return $null
+    } catch {
+        $msg = "Invoke-ContextDelta failed: $($_.Exception.Message)"
+        if (Get-Command Write-Log -ErrorAction SilentlyContinue) {
+            Write-Log $msg 'WARN'
+        } else {
+            Write-Warning $msg
+        }
+        return $null
+    }
+}
diff --git a/hooks/helpers/gzip.ps1 b/hooks/helpers/gzip.ps1
new file mode 100644
index 0000000..9527dbf
--- /dev/null
+++ b/hooks/helpers/gzip.ps1
@@ -0,0 +1,118 @@
+[CmdletBinding()]
+param()
+
+<#
+Gzip utilities — addresses issue #126 (PowerShell side).
+
+Compress-String / Expand-String are the primitives. Save-GzippedFile
+writes <path>.gz atomically (tmp + rename) and strips the plaintext
+sibling once the gzip lands. Read-MaybeGzippedFile prefers <path>.gz
+and falls back to plaintext so PS code can be migrated incrementally.
+#>
+
+function Compress-String {
+    param(
+        [Parameter(Mandatory = $true)][string]$InputString,
+        [ValidateSet('Optimal', 'Fastest', 'NoCompression', 'SmallestSize')]
+        [string]$CompressionLevel = 'Optimal'
+    )
+    $inputStream = $null
+    $outputStream = $null
+    $gzipStream = $null
+    try {
+        $bytes = [System.Text.Encoding]::UTF8.GetBytes($InputString)
+        $inputStream = [System.IO.MemoryStream]::new($bytes)
+        $outputStream = [System.IO.MemoryStream]::new()
+        $level = [System.IO.Compression.CompressionLevel]::$CompressionLevel
+        $gzipStream = [System.IO.Compression.GZipStream]::new($outputStream, $level)
+        $inputStream.CopyTo($gzipStream)
+        $gzipStream.Dispose()
+        $gzipStream = $null
+        return ,$outputStream.ToArray()
+    } finally {
+        if ($null -ne $gzipStream) { $gzipStream.Dispose() }
+        if ($null -ne $inputStream) { $inputStream.Dispose() }
+        if ($null -ne $outputStream) { $outputStream.Dispose() }
+    }
+}
+
+function Expand-String {
+    param(
+        [Parameter(Mandatory = $true)][byte[]]$CompressedBytes
+    )
+    $inputStream = $null
+    $outputStream = $null
+    $gzipStream = $null
+    try {
+        $inputStream = [System.IO.MemoryStream]::new($CompressedBytes)
+        $outputStream = [System.IO.MemoryStream]::new()
+        $gzipStream = [System.IO.Compression.GZipStream]::new(
+            $inputStream,
+            [System.IO.Compression.CompressionMode]::Decompress
+        )
+        $gzipStream.CopyTo($outputStream)
+        return [System.Text.Encoding]::UTF8.GetString($outputStream.ToArray())
+    } finally {
+        if ($null -ne $gzipStream) { $gzipStream.Dispose() }
+        if ($null -ne $inputStream) { $inputStream.Dispose() }
+        if ($null -ne $outputStream) { $outputStream.Dispose() }
+    }
+}
+
+function Save-GzippedFile {
+    param(
+        [Parameter(Mandatory = $true)][string]$Path,
+        [Parameter(Mandatory = $true)][string]$Content
+    )
+    $dir = Split-Path -Parent $Path
+    if ($dir -and -not (Test-Path $dir)) {
+        New-Item -ItemType Directory -Path $dir -Force | Out-Null
+    }
+    $compressed = Compress-String -InputString $Content
+    $gzPath = "$Path.gz"
+    # Per-write temp path so concurrent writers to the same destination
+    # can't clobber each other mid-write.
+    $tmpPath = "$gzPath.$([guid]::NewGuid().ToString('N')).tmp"
+    [System.IO.File]::WriteAllBytes($tmpPath, $compressed)
+    # Atomic swap: File::Move(src, dst, overwrite:$true) on .NET5+.
+    # Unlike "delete then move", this never leaves the caller with a
+    # missing .gz file if the process crashes.
+    try {
+        [System.IO.File]::Move($tmpPath, $gzPath, $true)
+    } finally {
+        if (Test-Path $tmpPath) {
+            Remove-Item -Path $tmpPath -Force -ErrorAction SilentlyContinue
+        }
+    }
+    if (Test-Path $Path) {
+        Remove-Item -Path $Path -Force -ErrorAction SilentlyContinue
+    }
+    return @{
+        originalBytes = [System.Text.Encoding]::UTF8.GetByteCount($Content)
+        compressedBytes = $compressed.Length
+    }
+}
+
+function Read-MaybeGzippedFile {
+    param(
+        [Parameter(Mandatory = $true)][string]$Path
+    )
+    $gzPath = "$Path.gz"
+    if (Test-Path $gzPath) {
+        try {
+            $bytes = [System.IO.File]::ReadAllBytes($gzPath)
+            return Expand-String -CompressedBytes $bytes
+        } catch {
+            # Corrupt / partial .gz — fall back to the plaintext sibling
+            # so the backward-compat migration path still works. If no
+            # plaintext exists either, rethrow the original error.
+            if (-not (Test-Path $Path)) {
+                throw
+            }
+        }
+    }
+    if (Test-Path $Path) {
+        return [System.IO.File]::ReadAllText($Path, [System.Text.Encoding]::UTF8)
+    }
+    return $null
+}
diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1
new file mode 100644
index 0000000..b52f54a
--- /dev/null
+++ b/hooks/helpers/logging.ps1
@@ -0,0 +1,51 @@
+[CmdletBinding()]
+param()
+
+function Write-Log {
+    param(
+        [string]$Message,
+        [ValidateSet('DEBUG','INFO','WARN','ERROR')][string]$Level = "INFO",
+        [string]$Context = ""
+    )
+
+    # Check if debug logging is disabled
+    $debugLogging = if ($env:TOKEN_OPTIMIZER_DEBUG_LOGGING) {
+        $env:TOKEN_OPTIMIZER_DEBUG_LOGGING -eq 'true'
+    } else {
+        $true  # Default: enabled
+    }
+
+    if ($Level -eq 'DEBUG' -and -not $debugLogging) {
+        return
+    }
+
+    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
+    $contextPart = if ($Context) { " [$Context]" } else { "" }
+    $logMessage = "[$timestamp] [$Level]$contextPart $Message"
+    if ($script:LOG_FILE) {
+        try {
+            $logDir = Split-Path -Parent $script:LOG_FILE
+            if ($logDir -and -not (Test-Path $logDir)) {
+                New-Item -ItemType Directory -Path $logDir -Force | Out-Null
+            }
+            $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8
+        } catch {
+            # Swallow — logging must never be a failure mode for the caller.
+        }
+    }
+    Write-Verbose $logMessage
+}
+
+function Handle-Error {
+    param(
+        [System.Exception]$Exception,
+        [string]$Message = ""
+    )
+
+    $errorMessage = if ($Message) { $Message } else { $Exception.Message }
+    # $StackTrace is a built-in PowerShell automatic variable — use a
+    # different name so we don't shadow it.
+    $exceptionTrace = $Exception.ScriptStackTrace
+    Write-Log "ERROR: $errorMessage" "ERROR"
+    Write-Log "StackTrace: $exceptionTrace" "ERROR"
+}
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
index a3c484a..a34c8da 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -137,6 +137,7 @@
       "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.27.1",
         "@babel/generator": "^7.28.3",
@@ -1144,9 +1145,9 @@
       }
     },
     "node_modules/@eslint/config-array/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
+      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -1225,9 +1226,9 @@
       "license": "Python-2.0"
     },
     "node_modules/@eslint/eslintrc/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
+      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -2012,6 +2013,7 @@
       "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@octokit/auth-token": "^6.0.0",
         "@octokit/graphql": "^9.0.2",
@@ -3073,6 +3075,7 @@
       "integrity": "sha512-/NbVmcGTP+lj5oa4yiYxxeBjRivKQ5Ns1eSZeB99ExsEQ6rX5XYU1Zy/gGxY/ilqtD4Etx9mKyrPxZRetiahhA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "undici-types": "~7.14.0"
       }
@@ -3208,6 +3211,7 @@
       "integrity": "sha512-6JSSaBZmsKvEkbRUkf7Zj7dru/8ZCrJxAqArcLaVMee5907JdtEbKGsZ7zNiIm/UAkpGUkaSMZEXShnN2D1HZA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.46.1",
         "@typescript-eslint/types": "8.46.1",
@@ -3702,6 +3706,7 @@
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -3863,6 +3868,19 @@
         "node": ">= 8"
       }
     },
+    "node_modules/anymatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
     "node_modules/argparse": {
       "version": "1.0.10",
       "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
@@ -4105,9 +4123,9 @@
       "license": "MIT"
     },
     "node_modules/brace-expansion": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
-      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz",
+      "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -4147,6 +4165,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.8.9",
         "caniuse-lite": "^1.0.30001746",
@@ -4862,6 +4881,7 @@
       "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "env-paths": "^2.2.1",
         "import-fresh": "^3.3.0",
@@ -5473,6 +5493,7 @@
       "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -5574,9 +5595,9 @@
       }
     },
     "node_modules/eslint/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
+      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -5876,6 +5897,7 @@
       "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
       "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "accepts": "^2.0.0",
         "body-parser": "^2.2.1",
@@ -6672,6 +6694,7 @@
       "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz",
       "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=16.9.0"
       }
@@ -7248,6 +7271,7 @@
       "integrity": "sha512-F26gjC0yWN8uAA5m5Ss8ZQf5nDHWGlN/xWZIh8S5SRbsEKBovwZhxGd6LJlbZYxBgCYOtreSUyb8hpXyGC5O4A==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@jest/core": "30.2.0",
         "@jest/types": "30.2.0",
@@ -7875,19 +7899,6 @@
         "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0"
       }
     },
-    "node_modules/jest-util/node_modules/picomatch": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
-      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/jonschlinkert"
-      }
-    },
     "node_modules/jest-validate": {
       "version": "30.2.0",
       "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-30.2.0.tgz",
@@ -8373,6 +8384,7 @@
       "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "bin": {
         "marked": "bin/marked.js"
       },
@@ -8505,6 +8517,19 @@
         "node": ">=8.6"
       }
     },
+    "node_modules/micromatch/node_modules/picomatch": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
     "node_modules/mime": {
       "version": "4.1.0",
       "resolved": "https://registry.npmjs.org/mime/-/mime-4.1.0.tgz",
@@ -10593,6 +10618,7 @@
       "dev": true,
       "inBundle": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -11033,13 +11059,14 @@
       "license": "ISC"
     },
     "node_modules/picomatch": {
-      "version": "2.3.2",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
-      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
+      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
-        "node": ">=8.6"
+        "node": ">=12"
       },
       "funding": {
         "url": "https://github.com/sponsors/jonschlinkert"
@@ -11650,6 +11677,7 @@
       "integrity": "sha512-6qGjWccl5yoyugHt3jTgztJ9Y0JVzyH8/Voc/D8PlLat9pwxQYXz7W1Dpnq5h0/G5GCYGUaDSlYcyk3AMh5A6g==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@semantic-release/commit-analyzer": "^13.0.1",
         "@semantic-release/error": "^4.0.0",
@@ -13037,9 +13065,9 @@
       }
     },
     "node_modules/test-exclude/node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "version": "1.1.14",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
+      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -13233,19 +13261,6 @@
         }
       }
     },
-    "node_modules/tinyglobby/node_modules/picomatch": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
-      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/jonschlinkert"
-      }
-    },
     "node_modules/tmpl": {
       "version": "1.0.5",
       "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
@@ -13451,6 +13466,7 @@
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -13964,6 +13980,7 @@
       "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
       "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
       "license": "MIT",
+      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts
new file mode 100644
index 0000000..81486d7
--- /dev/null
+++ b/src/analytics/optimization-storage.ts
@@ -0,0 +1,147 @@
+import Database from 'better-sqlite3';
+import { existsSync, mkdirSync } from 'fs';
+import { homedir } from 'os';
+import { dirname, join } from 'path';
+import { CompressionEngine } from '../core/compression-engine.js';
+
+export interface OptimizationResult {
+    originalTextHash: string;
+    optimizedText: string;
+    originalTokens: number;
+    optimizedTokens: number;
+    tokensSaved: number;
+}
+
+export function getDefaultOptimizationDbPath(): string {
+    return join(homedir(), '.token-optimizer', 'optimization.db');
+}
+
+export class SqliteOptimizationStorage {
+    private db: Database.Database | null = null;
+    private readonly dbPath: string;
+    private readonly compressionEngine: CompressionEngine;
+
+    constructor(dbPath?: string) {
+        this.dbPath = dbPath ?? getDefaultOptimizationDbPath();
+        this.compressionEngine = new CompressionEngine();
+    }
+
+    public initializeDatabase(): void {
+        const dir = dirname(this.dbPath);
+        if (!existsSync(dir)) {
+            mkdirSync(dir, { recursive: true });
+        }
+        this.db = new Database(this.dbPath);
+        this.db.pragma('journal_mode = WAL');
+        this.db.exec(`
+            CREATE TABLE IF NOT EXISTS optimization_results (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                original_text_hash TEXT NOT NULL UNIQUE,
+                optimized_text_compressed BLOB NOT NULL,
+                compression_algorithm TEXT NOT NULL,
+                original_tokens INTEGER NOT NULL,
+                optimized_tokens INTEGER NOT NULL,
+                tokens_saved INTEGER NOT NULL,
+                created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+            );
+            CREATE INDEX IF NOT EXISTS idx_optimization_hash
+                ON optimization_results(original_text_hash);
+        `);
+    }
+
+    private requireDb(): Database.Database {
+        if (!this.db) {
+            throw new Error('Optimization storage database is not initialized. Call initializeDatabase() first.');
+        }
+        return this.db;
+    }
+
+    public save(entry: OptimizationResult): void {
+        const db = this.requireDb();
+        const compressed = this.compressionEngine.compress(entry.optimizedText);
+
+        db.prepare(
+            `INSERT OR REPLACE INTO optimization_results
+             (original_text_hash, optimized_text_compressed, compression_algorithm,
+              original_tokens, optimized_tokens, tokens_saved)
+             VALUES (?, ?, ?, ?, ?, ?)`
+        ).run(
+            entry.originalTextHash,
+            compressed.compressed,
+            SqliteOptimizationStorage.COMPRESSION_ALGORITHM,
+            entry.originalTokens,
+            entry.optimizedTokens,
+            entry.tokensSaved
+        );
+    }
+
+    public get(originalTextHash: string): OptimizationResult | null {
+        const db = this.requireDb();
+        const row = db.prepare(
+            `SELECT optimized_text_compressed, compression_algorithm,
+                    original_tokens, optimized_tokens, tokens_saved
+             FROM optimization_results WHERE original_text_hash = ?`
+        ).get(originalTextHash) as
+            | {
+                  optimized_text_compressed: Buffer;
+                  compression_algorithm: string;
+                  original_tokens: number;
+                  optimized_tokens: number;
+                  tokens_saved: number;
+              }
+            | undefined;
+
+        if (!row) {
+            return null;
+        }
+
+        return {
+            originalTextHash,
+            optimizedText: this.decodePayload(
+                row.optimized_text_compressed,
+                row.compression_algorithm
+            ),
+            originalTokens: row.original_tokens,
+            optimizedTokens: row.optimized_tokens,
+            tokensSaved: row.tokens_saved,
+        };
+    }
+
+    /**
+     * Decode a stored payload using the persisted algorithm label. Keeps
+     * the door open for additional algorithms (gzip, zstd) without
+     * touching the read path, and surfaces an explicit error for
+     * unknown labels instead of silently corrupting data.
+     */
+    private decodePayload(buffer: Buffer, algorithm: string | null): string {
+        if (algorithm === 'brotli') {
+            return this.compressionEngine.decompress(buffer);
+        }
+        if (algorithm === 'none' || algorithm === '') {
+            return buffer.toString('utf8');
+        }
+        if (algorithm === null || algorithm === undefined) {
+            // Legacy rows without a recorded algorithm: pre-tracking code
+            // always wrote brotli, but we still accept raw UTF-8 as a last
+            // resort so a one-off plaintext row doesn't poison reads.
+            try {
+                return this.compressionEngine.decompress(buffer);
+            } catch {
+                return buffer.toString('utf8');
+            }
+        }
+        throw new Error(
+            `Unknown compression_algorithm in optimization_results: ${algorithm}`
+        );
+    }
+
+    /** Algorithm label paired with the current CompressionEngine. */
+    public static readonly COMPRESSION_ALGORITHM = 'brotli';
+
+    public close(): void {
+        if (this.db) {
+            this.db.close();
+            this.db = null;
+        }
+    }
+}
diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts
index 2be5b9e..b2daabb 100644
--- a/src/core/compression-engine.ts
+++ b/src/core/compression-engine.ts
@@ -1,167 +1,103 @@
 import { brotliCompressSync, brotliDecompressSync, constants } from 'zlib';
 
 export interface CompressionResult {
-  compressed: Buffer;
-  originalSize: number;
-  compressedSize: number;
-  ratio: number;
-  percentSaved: number;
-}
-
-export interface CompressionOptions {
-  quality?: number; // 0-11, default 11 (max compression)
-  mode?: 'text' | 'font' | 'generic';
-}
-
-export class CompressionEngine {
-  private readonly DEFAULT_QUALITY = 11;
-
-  /**
-   * Compress text using Brotli
-   */
-  compress(text: string, options?: CompressionOptions): CompressionResult {
-    const buffer = Buffer.from(text, 'utf-8');
-    const quality = options?.quality ?? this.DEFAULT_QUALITY;
-    const mode = this.getModeConstant(options?.mode);
-
-    const compressed = brotliCompressSync(buffer, {
-      params: {
-        [constants.BROTLI_PARAM_QUALITY]: quality,
-        [constants.BROTLI_PARAM_MODE]: mode,
-      },
-    });
-
-    const originalSize = buffer.length;
-    const compressedSize = compressed.length;
-    const ratio = originalSize > 0 ? compressedSize / originalSize : 0;
-    const percentSaved =
-      originalSize > 0
-        ? ((originalSize - compressedSize) / originalSize) * 100
-        : 0;
-
-    return {
-      compressed,
-      originalSize,
-      compressedSize,
-      ratio,
-      percentSaved,
-    };
-  }
-
-  /**
-   * Decompress Brotli-compressed data
-   */
-  decompress(compressed: Buffer): string {
-    const decompressed = brotliDecompressSync(compressed);
-    return decompressed.toString('utf-8');
-  }
-
-  /**
-   * Compress to base64 string (for easier storage)
-   */
-  compressToBase64(
-    text: string,
-    options?: CompressionOptions
-  ): {
-    compressed: string;
+    compressed: Buffer;
     originalSize: number;
     compressedSize: number;
     ratio: number;
     percentSaved: number;
-  } {
-    const result = this.compress(text, options);
-
-    return {
-      compressed: result.compressed.toString('base64'),
-      originalSize: result.originalSize,
-      compressedSize: result.compressedSize,
-      ratio: result.ratio,
-      percentSaved: result.percentSaved,
-    };
-  }
-
-  /**
-   * Decompress from base64 string
-   */
-  decompressFromBase64(compressed: string): string {
-    const buffer = Buffer.from(compressed, 'base64');
-    return this.decompress(buffer);
-  }
+}
 
-  /**
-   * Check if compression would be beneficial
-   */
-  shouldCompress(text: string, minSize: number = 1000): boolean {
-    // Don't compress small texts - overhead not worth it
-    if (text.length < minSize) {
-      return false;
+export class CompressionEngine {
+    public compress(text: string, options?: { quality?: number; mode?: string; }): CompressionResult {
+        const originalSize = Buffer.byteLength(text, 'utf8');
+        if (originalSize === 0) {
+            return {
+                compressed: Buffer.alloc(0),
+                originalSize: 0,
+                compressedSize: 0,
+                ratio: 0,
+                percentSaved: 0,
+            };
+        }
+
+        const params = {
+            [constants.BROTLI_PARAM_QUALITY]: options?.quality ?? constants.BROTLI_MAX_QUALITY,
+            [constants.BROTLI_PARAM_MODE]: options?.mode === 'text' ? constants.BROTLI_MODE_TEXT : constants.BROTLI_MODE_GENERIC,
+        };
+
+        const compressed = brotliCompressSync(text, { params });
+        const compressedSize = compressed.length;
+        const ratio = compressedSize / originalSize;
+        const percentSaved = (1 - ratio) * 100;
+
+        return {
+            compressed,
+            originalSize,
+            compressedSize,
+            ratio,
+            percentSaved,
+        };
     }
 
-    // Quick sample compression to check ratio
-    const sample = text.slice(0, Math.min(text.length, 5000));
-    const result = this.compress(sample, { quality: 4 }); // Use lower quality for quick test
+    public decompress(buffer: Buffer): string {
+        if (!buffer || buffer.length === 0) {
+            return '';
+        }
+        return brotliDecompressSync(buffer).toString('utf8');
+    }
 
-    // Only compress if we get at least 20% reduction
-    return result.percentSaved >= 20;
-  }
+    public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): Omit<CompressionResult, 'compressed'> & { compressed: string } {
+        const result = this.compress(text, options);
+        return {
+            originalSize: result.originalSize,
+            compressedSize: result.compressedSize,
+            ratio: result.ratio,
+            percentSaved: result.percentSaved,
+            compressed: result.compressed.toString('base64'),
+        };
+    }
 
-  /**
-   * Batch compress multiple texts
-   */
-  compressBatch(
-    texts: string[],
-    options?: CompressionOptions
-  ): Array<{
-    index: number;
-    compressed: Buffer;
-    originalSize: number;
-    compressedSize: number;
-    ratio: number;
-  }> {
-    return texts.map((text, index) => {
-      const result = this.compress(text, options);
-      return {
-        index,
-        compressed: result.compressed,
-        originalSize: result.originalSize,
-        compressedSize: result.compressedSize,
-        ratio: result.ratio,
-      };
-    });
-  }
+    public decompressFromBase64(base64: string): string {
+        const buffer = Buffer.from(base64, 'base64');
+        return this.decompress(buffer);
+    }
 
-  /**
-   * Get compression statistics for text
-   */
-  getCompressionStats(text: string): {
-    uncompressed: number;
-    compressed: number;
-    ratio: number;
-    percentSaved: number;
-    recommended: boolean;
-  } {
-    const result = this.compress(text);
+    public compressBatch(texts: string[]): (CompressionResult & { index: number; })[] {
+        return texts.map((text, index) => ({
+            ...this.compress(text),
+            index,
+        }));
+    }
 
-    return {
-      uncompressed: result.originalSize,
-      compressed: result.compressedSize,
-      ratio: result.ratio,
-      percentSaved: result.percentSaved,
-      recommended: this.shouldCompress(text),
-    };
-  }
+    public shouldCompress(text: string, minSize: number = CompressionEngine.DEFAULT_MIN_SIZE_BYTES): boolean {
+        if (Buffer.byteLength(text, 'utf8') < minSize) {
+            return false;
+        }
+        const stats = this.getCompressionStats(text, minSize);
+        return stats.percentSaved >= 20;
+    }
 
-  /**
-   * Convert mode string to Brotli constant
-   */
-  private getModeConstant(mode?: 'text' | 'font' | 'generic'): number {
-    switch (mode) {
-      case 'text':
-        return constants.BROTLI_MODE_TEXT;
-      case 'font':
-        return constants.BROTLI_MODE_FONT;
-      default:
-        return constants.BROTLI_MODE_GENERIC;
+    public getCompressionStats(
+        text: string,
+        minSize: number = CompressionEngine.DEFAULT_MIN_SIZE_BYTES
+    ): { uncompressed: number; compressed: number; ratio: number; percentSaved: number; recommended: boolean; } {
+        const result = this.compress(text);
+        const recommended = result.originalSize >= minSize && result.percentSaved >= 20;
+        return {
+            uncompressed: result.originalSize,
+            compressed: result.compressedSize,
+            ratio: result.ratio,
+            percentSaved: result.percentSaved,
+            recommended: recommended,
+        };
     }
-  }
+
+    /**
+     * Default minimum size (in bytes) below which compression isn't
+     * worth the metadata overhead. Exposed as a static so callers can
+     * override via OptimizationConfig.minOutputSizeBytes and have
+     * `recommended` / `shouldCompress` agree on the threshold.
+     */
+    public static DEFAULT_MIN_SIZE_BYTES = 500;
 }
diff --git a/src/core/config.ts b/src/core/config.ts
index d5cd01c..f684c0a 100644
--- a/src/core/config.ts
+++ b/src/core/config.ts
@@ -2,10 +2,39 @@
  * Configuration management for Hypercontext MCP
  */
 
-import { HypercontextConfig } from './types.js';
-import { readFileSync, existsSync } from 'fs';
+import { z } from 'zod';
+import { HypercontextConfig, OptimizationConfig } from './types.js';
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
 import { homedir } from 'os';
-import { join } from 'path';
+import { dirname, join } from 'path';
+
+const DEFAULT_OPTIMIZATION: OptimizationConfig = {
+  compressionTokenThreshold: 0.7,
+  compressionPreserveThreshold: 0.3,
+  minTokensBeforeCompression: 1000,
+  modelTokenLimits: {
+    'gpt-4': 128000,
+    'gpt-4-turbo': 128000,
+    'gpt-3.5-turbo': 16385,
+    'claude-3-opus': 200000,
+    'claude-3-sonnet': 200000,
+    'claude-3-haiku': 200000,
+    'claude-opus-4-7': 1000000,
+    'claude-sonnet-4-6': 1000000,
+    'gemini-1.5-pro': 2000000,
+    'gemini-2.5-flash': 1000000,
+  },
+  minOutputSizeBytes: 500,
+  quality: 'balanced',
+  cacheSettings: {
+    maxSize: 1000,
+    ttlSeconds: 3600,
+  },
+  chatCompression: {
+    enabled: true,
+    strategy: 'summarize',
+  },
+};
 
 const DEFAULT_CONFIG: HypercontextConfig = {
   cache: {
@@ -38,18 +67,119 @@ const DEFAULT_CONFIG: HypercontextConfig = {
     streamingThreshold: 1024 * 1024, // 1MB
     enableStreaming: false,
   },
+  optimization: DEFAULT_OPTIMIZATION,
 };
 
+const CacheSettingsSchema = z.object({
+  maxSize: z.number().int().positive(),
+  ttlSeconds: z.number().int().nonnegative(),
+});
+
+const ChatCompressionSchema = z.object({
+  enabled: z.boolean(),
+  tokenLimit: z.number().int().positive().optional(),
+  strategy: z.enum(['summarize', 'truncate']),
+});
+
+const OptimizationConfigSchema = z.object({
+  compressionTokenThreshold: z.number().min(0).max(1),
+  compressionPreserveThreshold: z.number().min(0).max(1),
+  minTokensBeforeCompression: z.number().int().nonnegative(),
+  modelTokenLimits: z.record(z.string(), z.number().int().positive()),
+  minOutputSizeBytes: z.number().int().nonnegative(),
+  quality: z.enum(['fast', 'balanced', 'max']),
+  cacheSettings: CacheSettingsSchema,
+  chatCompression: ChatCompressionSchema,
+});
+
+/**
+ * User-supplied optimization schema. Partial at every depth so users can
+ * override just one field (e.g. `{ cacheSettings: { maxSize: 42 } }`)
+ * without having to re-supply the entire sub-object.
+ */
+const OptimizationConfigUserSchema = OptimizationConfigSchema.partial().extend({
+  cacheSettings: CacheSettingsSchema.partial().optional(),
+  chatCompression: ChatCompressionSchema.partial().optional(),
+});
+
+const HypercontextConfigSchema = z
+  .object({
+    cache: z
+      .object({
+        enabled: z.boolean(),
+        maxSizeMB: z.number().int().positive(),
+        defaultTTL: z.number().int().nonnegative(),
+        ttlByType: z.record(z.string(), z.number().int().nonnegative()),
+        compression: z.enum(['none', 'gzip', 'brotli', 'auto']),
+      })
+      .partial()
+      .optional(),
+    monitoring: z
+      .object({
+        enabled: z.boolean(),
+        detailedLogging: z.boolean(),
+        metricsRetentionDays: z.number().int().nonnegative(),
+        dashboardPort: z.number().int().positive(),
+        enableWebUI: z.boolean(),
+      })
+      .partial()
+      .optional(),
+    intelligence: z
+      .object({
+        enablePatternDetection: z.boolean(),
+        enableWorkflowLearning: z.boolean(),
+        enablePredictiveCaching: z.boolean(),
+        mlModelPath: z.string(),
+      })
+      .partial()
+      .optional(),
+    performance: z
+      .object({
+        maxConcurrentOps: z.number().int().positive(),
+        streamingThreshold: z.number().int().positive(),
+        enableStreaming: z.boolean(),
+      })
+      .partial()
+      .optional(),
+    optimization: OptimizationConfigUserSchema.optional(),
+  })
+  .passthrough();
+
 export class ConfigManager {
   private config: HypercontextConfig;
   private configPath: string;
 
-  constructor(configPath?: string) {
+  constructor(configPath?: string, options: { writeDefaults?: boolean } = {}) {
     this.configPath =
-      configPath || join(homedir(), '.hypercontext', 'config.json');
+      configPath || join(homedir(), '.token-optimizer', 'config.json');
+    const writeDefaults = options.writeDefaults ?? true;
+    if (writeDefaults && !existsSync(this.configPath)) {
+      this.writeDefaultConfig();
+    }
     this.config = this.loadConfig();
   }
 
+  /**
+   * Write DEFAULT_CONFIG to configPath on first run — addresses #120's
+   * "Default config created on first run" acceptance criterion.
+   * Errors are logged and non-fatal; callers still get an in-memory
+   * DEFAULT_CONFIG via loadConfig().
+   */
+  private writeDefaultConfig(): void {
+    try {
+      const dir = dirname(this.configPath);
+      if (!existsSync(dir)) {
+        mkdirSync(dir, { recursive: true });
+      }
+      writeFileSync(this.configPath, JSON.stringify(DEFAULT_CONFIG, null, 2));
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(
+        `ConfigManager: failed to write default config to ${this.configPath}: ${message}`
+      );
+    }
+  }
+
   private loadConfig(): HypercontextConfig {
     if (!existsSync(this.configPath)) {
       return DEFAULT_CONFIG;
@@ -57,26 +187,80 @@ export class ConfigManager {
 
     try {
       const fileContent = readFileSync(this.configPath, 'utf-8');
-      const userConfig = JSON.parse(fileContent);
-      return this.mergeConfig(DEFAULT_CONFIG, userConfig);
+      const rawUserConfig = JSON.parse(fileContent);
+      const parsed = HypercontextConfigSchema.safeParse(rawUserConfig);
+      if (!parsed.success) {
+        const issues = parsed.error.issues
+          .map((i) => `  - ${i.path.join('.') || 'root'}: ${i.message}`)
+          .join('\n');
+        console.warn(
+          `Invalid config at ${this.configPath}, using defaults:\n${issues}`
+        );
+        return DEFAULT_CONFIG;
+      }
+      return this.mergeConfig(DEFAULT_CONFIG, parsed.data);
     } catch (error) {
-      console.warn('Failed to load config, using defaults:', error);
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Failed to load config, using defaults: ${message}`);
       return DEFAULT_CONFIG;
     }
   }
 
   private mergeConfig(
     defaults: HypercontextConfig,
-    user: Partial<HypercontextConfig>
+    user: {
+      cache?: Partial<HypercontextConfig['cache']>;
+      monitoring?: Partial<HypercontextConfig['monitoring']>;
+      intelligence?: Partial<HypercontextConfig['intelligence']>;
+      performance?: Partial<HypercontextConfig['performance']>;
+      optimization?: Partial<
+        Omit<OptimizationConfig, 'cacheSettings' | 'chatCompression'>
+      > & {
+        cacheSettings?: Partial<OptimizationConfig['cacheSettings']>;
+        chatCompression?: Partial<OptimizationConfig['chatCompression']>;
+      };
+    }
   ): HypercontextConfig {
+    const userOpt = user.optimization ?? {};
+    // Preserve any existing optimization state the caller may have set
+    // (e.g. via prior update()) instead of always starting from
+    // DEFAULT_OPTIMIZATION. Non-optimization updates should no longer
+    // silently reset the entire optimization block.
+    const baseOptimization = defaults.optimization ?? DEFAULT_OPTIMIZATION;
     return {
       cache: { ...defaults.cache, ...user.cache },
       monitoring: { ...defaults.monitoring, ...user.monitoring },
       intelligence: { ...defaults.intelligence, ...user.intelligence },
       performance: { ...defaults.performance, ...user.performance },
+      optimization: {
+        ...baseOptimization,
+        ...userOpt,
+        cacheSettings: {
+          ...baseOptimization.cacheSettings,
+          ...(userOpt.cacheSettings ?? {}),
+        },
+        chatCompression: {
+          ...baseOptimization.chatCompression,
+          ...(userOpt.chatCompression ?? {}),
+        },
+        // Deep-merge model token limits so a user override like
+        // { "custom-model": 500_000 } does not drop the built-in map.
+        modelTokenLimits: {
+          ...baseOptimization.modelTokenLimits,
+          ...(userOpt.modelTokenLimits ?? {}),
+        },
+      },
     };
   }
 
+  public getOptimizationConfig(): OptimizationConfig {
+    return this.config.optimization ?? DEFAULT_OPTIMIZATION;
+  }
+
+  public getModelTokenLimit(modelName: string): number | undefined {
+    return this.getOptimizationConfig().modelTokenLimits[modelName];
+  }
+
   get(): HypercontextConfig {
     return { ...this.config };
   }
diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts
new file mode 100644
index 0000000..30df98f
--- /dev/null
+++ b/src/core/session-manager.ts
@@ -0,0 +1,282 @@
+import { existsSync } from 'fs';
+import { z } from 'zod';
+import {
+    Session,
+    SessionOptions,
+    MessageRole,
+} from './session.js';
+import { ITokenizer } from './tokenizers/i-tokenizer.js';
+import { ISummarizer } from './summarization.js';
+import { loadMaybeGzippedFile, saveGzippedFile } from '../utils/gzip.js';
+
+/**
+ * Persistent SessionManager — addresses issues #121 / #122.
+ *
+ * Production behaviors added after the audit:
+ *   - Atomic persistence: write to <path>.tmp then rename so a crash mid-
+ *     write never produces a corrupt sessions.json.
+ *   - Debounced persistence: rapid addMessage calls coalesce into one
+ *     disk write per PERSIST_DEBOUNCE_MS window.
+ *   - Error-isolated persist(): a disk-full or permission error is logged
+ *     and never bubbles up to crash the MCP server.
+ *   - Schema-validated load(): malformed persisted state is rejected with
+ *     a warning instead of being cast blindly.
+ *   - Size / expiry caps: sessions inactive past `sessionTtlMs` are
+ *     evicted on load, and no individual file state entry can exceed
+ *     `maxFileStateBytes`.
+ */
+
+const PERSIST_DEBOUNCE_MS = 250;
+const DEFAULT_SESSION_TTL_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
+const DEFAULT_MAX_FILE_STATE_BYTES = 10 * 1024 * 1024; // 10 MB per file
+
+const MessageSchema = z.object({
+    role: z.enum(['system', 'user', 'assistant', 'tool']),
+    content: z.string(),
+    timestamp: z.number(),
+});
+
+const SessionSnapshotSchema = z.object({
+    id: z.string(),
+    history: z.array(MessageSchema),
+    fileState: z.record(z.string(), z.string()),
+    maxTokens: z.number(),
+    createdAt: z.number(),
+    updatedAt: z.number(),
+});
+
+const PersistedStateSchema = z.object({
+    sessions: z.array(SessionSnapshotSchema),
+});
+
+export interface SessionManagerOptions {
+    persistencePath?: string;
+    tokenizer?: ITokenizer;
+    summarizer?: ISummarizer;
+    defaultMaxTokens?: number;
+    sessionTtlMs?: number;
+    maxFileStateBytes?: number;
+}
+
+export class SessionManager {
+    private readonly sessions = new Map<string, Session>();
+    private readonly persistencePath: string | null;
+    private readonly tokenizer: ITokenizer | undefined;
+    private readonly summarizer: ISummarizer | undefined;
+    private readonly defaultMaxTokens: number | undefined;
+    private readonly sessionTtlMs: number;
+    private readonly maxFileStateBytes: number;
+    private pendingPersistTimer: NodeJS.Timeout | null = null;
+    private persistInFlight = false;
+
+    constructor(options: SessionManagerOptions = {}) {
+        this.persistencePath = options.persistencePath ?? null;
+        this.tokenizer = options.tokenizer;
+        this.summarizer = options.summarizer;
+        this.defaultMaxTokens = options.defaultMaxTokens;
+        this.sessionTtlMs = options.sessionTtlMs ?? DEFAULT_SESSION_TTL_MS;
+        this.maxFileStateBytes =
+            options.maxFileStateBytes ?? DEFAULT_MAX_FILE_STATE_BYTES;
+        if (
+            this.persistencePath &&
+            (existsSync(`${this.persistencePath}.gz`) ||
+                existsSync(this.persistencePath))
+        ) {
+            this.load();
+        }
+    }
+
+    public createSession(options: SessionOptions = {}): Session {
+        const session = new Session({
+            tokenizer: this.tokenizer,
+            summarizer: this.summarizer,
+            maxTokens: options.maxTokens ?? this.defaultMaxTokens,
+            ...options,
+        });
+        this.sessions.set(session.id, session);
+        this.schedulePersist();
+        return session;
+    }
+
+    public getSession(id: string): Session | undefined {
+        return this.sessions.get(id);
+    }
+
+    public listSessions(): Session[] {
+        return Array.from(this.sessions.values());
+    }
+
+    public deleteSession(id: string): boolean {
+        const removed = this.sessions.delete(id);
+        if (removed) {
+            this.schedulePersist();
+        }
+        return removed;
+    }
+
+    public async addMessage(
+        sessionId: string,
+        role: MessageRole,
+        content: string
+    ): Promise<number> {
+        const session = this.requireSession(sessionId);
+        session.addMessage(role, content);
+        // Schedule persistence in `finally` so the mutated session still
+        // hits disk even if tokenization or compression throws. Without
+        // this, a single tokenizer error leaves the message appended
+        // in memory but never persisted, and a restart loses the turn.
+        try {
+            const currentTokens = await session.getHistoryTokenCount();
+            if (currentTokens > session.maxTokens) {
+                return await session.compressHistory();
+            }
+            return currentTokens;
+        } finally {
+            this.schedulePersist();
+        }
+    }
+
+    /** Fetch an existing session, or create one with the given id. */
+    public getOrCreateSession(id: string): Session {
+        const existing = this.sessions.get(id);
+        if (existing) {
+            return existing;
+        }
+        return this.createSession({ id });
+    }
+
+    public updateFileState(
+        sessionId: string,
+        filePath: string,
+        content: string
+    ): void {
+        const session = this.requireSession(sessionId);
+        if (Buffer.byteLength(content, 'utf8') > this.maxFileStateBytes) {
+            throw new Error(
+                `Session file state content exceeds ${this.maxFileStateBytes} bytes for ${filePath}`
+            );
+        }
+        session.setFileContent(filePath, content);
+        this.schedulePersist();
+    }
+
+    public clearFileState(sessionId: string, filePath: string): void {
+        const session = this.requireSession(sessionId);
+        session.clearFileContent(filePath);
+        this.schedulePersist();
+    }
+
+    /**
+     * Flush any pending debounced persist. Call this from the host's
+     * shutdown handler so the last writes survive.
+     */
+    public async flush(): Promise<void> {
+        if (this.pendingPersistTimer) {
+            clearTimeout(this.pendingPersistTimer);
+            this.pendingPersistTimer = null;
+        }
+        this.persistNow();
+    }
+
+    private requireSession(id: string): Session {
+        const session = this.sessions.get(id);
+        if (!session) {
+            throw new Error(`Unknown session: ${id}`);
+        }
+        return session;
+    }
+
+    private schedulePersist(): void {
+        if (!this.persistencePath) {
+            return;
+        }
+        if (this.pendingPersistTimer) {
+            return;
+        }
+        this.pendingPersistTimer = setTimeout(() => {
+            this.pendingPersistTimer = null;
+            this.persistNow();
+        }, PERSIST_DEBOUNCE_MS);
+        // Don't keep the event loop alive just for persistence.
+        if (typeof this.pendingPersistTimer.unref === 'function') {
+            this.pendingPersistTimer.unref();
+        }
+    }
+
+    private persistNow(): void {
+        if (!this.persistencePath || this.persistInFlight) {
+            return;
+        }
+        this.persistInFlight = true;
+        try {
+            const state = {
+                sessions: this.listSessions().map((s) => s.toSnapshot()),
+            };
+            // Gzip + atomic tmp + rename (handled inside saveGzippedFile).
+            saveGzippedFile(
+                this.persistencePath,
+                JSON.stringify(state, null, 2)
+            );
+        } catch (error) {
+            const message =
+                error instanceof Error ? error.message : String(error);
+            console.warn(
+                `SessionManager: failed to persist to ${this.persistencePath}: ${message}`
+            );
+        } finally {
+            this.persistInFlight = false;
+        }
+    }
+
+    private load(): void {
+        if (!this.persistencePath) {
+            return;
+        }
+        try {
+            const raw = loadMaybeGzippedFile(this.persistencePath);
+            if (raw === null) {
+                return;
+            }
+            const json = JSON.parse(raw);
+            const parsed = PersistedStateSchema.safeParse(json);
+            if (!parsed.success) {
+                console.warn(
+                    `SessionManager: invalid persisted state at ${this.persistencePath}, discarding.`
+                );
+                return;
+            }
+            const now = Date.now();
+            for (const snapshot of parsed.data.sessions) {
+                if (now - snapshot.updatedAt > this.sessionTtlMs) {
+                    continue; // Expired session — drop.
+                }
+                // Enforce the same per-file size cap on restore that
+                // updateFileState enforces on writes; otherwise a
+                // tampered or legacy persisted file can smuggle in
+                // oversized entries past the live guardrail.
+                const maxBytes = this.maxFileStateBytes;
+                const sanitizedFileState: Record<string, string> = {};
+                for (const [filePath, content] of Object.entries(snapshot.fileState)) {
+                    if (Buffer.byteLength(content, 'utf8') <= maxBytes) {
+                        sanitizedFileState[filePath] = content;
+                    }
+                }
+                const safeSnapshot = {
+                    ...snapshot,
+                    fileState: sanitizedFileState,
+                };
+                const session = Session.fromSnapshot(safeSnapshot, {
+                    tokenizer: this.tokenizer,
+                    summarizer: this.summarizer,
+                });
+                this.sessions.set(session.id, session);
+            }
+        } catch (error) {
+            const message =
+                error instanceof Error ? error.message : String(error);
+            console.warn(
+                `SessionManager: failed to load sessions from ${this.persistencePath}: ${message}`
+            );
+        }
+    }
+}
diff --git a/src/core/session.ts b/src/core/session.ts
new file mode 100644
index 0000000..5dd629d
--- /dev/null
+++ b/src/core/session.ts
@@ -0,0 +1,210 @@
+import { randomUUID } from 'crypto';
+import { ITokenizer } from './tokenizers/i-tokenizer.js';
+import { ISummarizer, TruncatingSummarizer } from './summarization.js';
+
+/**
+ * Session state — addresses issues #121 and #122.
+ *
+ * A Session holds a single user's conversation history plus a per-file
+ * content snapshot. The history is token-budgeted (see #121) and the file
+ * snapshots feed context-delta tracking (#122).
+ */
+
+export type MessageRole = 'system' | 'user' | 'assistant' | 'tool';
+
+export interface Message {
+    role: MessageRole;
+    content: string;
+    timestamp: number;
+}
+
+export interface SessionFileState {
+    [filePath: string]: string;
+}
+
+export interface SessionSnapshot {
+    id: string;
+    history: Message[];
+    fileState: SessionFileState;
+    maxTokens: number;
+    createdAt: number;
+    updatedAt: number;
+}
+
+export interface SessionOptions {
+    id?: string;
+    maxTokens?: number;
+    preserveTailRatio?: number;
+    tokenizer?: ITokenizer;
+    summarizer?: ISummarizer;
+    /**
+     * When true, getHistoryTokenCount may fall back to a character/4
+     * heuristic if no tokenizer is supplied. Production code should
+     * always pass a real tokenizer and leave this false (the default).
+     */
+    allowCharHeuristic?: boolean;
+    /** Override for createdAt — used by fromSnapshot. */
+    createdAt?: number;
+    /** Override for updatedAt — used by fromSnapshot. */
+    updatedAt?: number;
+}
+
+const DEFAULT_MAX_TOKENS = 100_000;
+const DEFAULT_PRESERVE_TAIL_RATIO = 0.3;
+const CHAR_HEURISTIC_RATIO = 4;
+
+export class Session {
+    public readonly id: string;
+    public maxTokens: number;
+    public readonly createdAt: number;
+    public updatedAt: number;
+
+    private history: Message[] = [];
+    private fileState: SessionFileState = {};
+    private readonly preserveTailRatio: number;
+    private readonly tokenizer: ITokenizer | null;
+    private readonly summarizer: ISummarizer;
+    private readonly allowCharHeuristic: boolean;
+
+    constructor(options: SessionOptions = {}) {
+        this.id = options.id ?? randomUUID();
+        this.maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
+        this.preserveTailRatio = options.preserveTailRatio ?? DEFAULT_PRESERVE_TAIL_RATIO;
+        this.tokenizer = options.tokenizer ?? null;
+        this.summarizer = options.summarizer ?? new TruncatingSummarizer();
+        this.allowCharHeuristic = options.allowCharHeuristic ?? false;
+        const now = Date.now();
+        this.createdAt = options.createdAt ?? now;
+        this.updatedAt = options.updatedAt ?? this.createdAt;
+    }
+
+    public addMessage(role: MessageRole, content: string): Message {
+        const message: Message = { role, content, timestamp: Date.now() };
+        this.history.push(message);
+        this.updatedAt = message.timestamp;
+        return message;
+    }
+
+    public getHistory(): readonly Message[] {
+        // Defensive copy so external mutation (push/splice/in-place
+        // edit) can't bypass updatedAt tracking or corrupt the history.
+        return this.history.map((message) => ({ ...message }));
+    }
+
+    public getFileState(): Readonly<SessionFileState> {
+        return { ...this.fileState };
+    }
+
+    public getFileContent(filePath: string): string | undefined {
+        return this.fileState[filePath];
+    }
+
+    public setFileContent(filePath: string, content: string): void {
+        this.fileState[filePath] = content;
+        this.updatedAt = Date.now();
+    }
+
+    public clearFileContent(filePath: string): void {
+        if (filePath in this.fileState) {
+            delete this.fileState[filePath];
+            this.updatedAt = Date.now();
+        }
+    }
+
+    /**
+     * Total token count of the current history.
+     *
+     * Requires a tokenizer unless the caller opted into the character/4
+     * heuristic via `allowCharHeuristic: true`. We default to requiring a
+     * tokenizer because #124's whole point is eliminating char/4.
+     */
+    public async getHistoryTokenCount(): Promise<number> {
+        if (!this.tokenizer) {
+            if (!this.allowCharHeuristic) {
+                throw new Error(
+                    'Session.getHistoryTokenCount requires a tokenizer. ' +
+                        'Construct the Session with TokenizerFactory.create(...) ' +
+                        'or pass allowCharHeuristic: true to opt into the fallback.'
+                );
+            }
+            return this.history.reduce(
+                (acc, m) => acc + Math.ceil(m.content.length / CHAR_HEURISTIC_RATIO),
+                0
+            );
+        }
+        let total = 0;
+        for (const message of this.history) {
+            total += await this.tokenizer.countTokens(message.content);
+        }
+        return total;
+    }
+
+    /**
+     * Compress the history by summarizing everything except the
+     * preserve-tail fraction. Does nothing if history fits under maxTokens.
+     *
+     * Returns the new token count after compression.
+     */
+    public async compressHistory(): Promise<number> {
+        const currentTokens = await this.getHistoryTokenCount();
+        if (currentTokens <= this.maxTokens) {
+            return currentTokens;
+        }
+        if (this.history.length <= 1) {
+            return currentTokens;
+        }
+
+        const preserveCount = Math.max(
+            1,
+            Math.floor(this.history.length * this.preserveTailRatio)
+        );
+        const tail = this.history.slice(-preserveCount);
+        const head = this.history.slice(0, -preserveCount);
+        if (head.length === 0) {
+            return currentTokens;
+        }
+
+        const summary = await this.summarizer.summarize(head);
+        // Store summaries as `assistant`, not `system` — a user turn
+        // can contain prompt-injection text, and promoting it into a
+        // system-role message after compression would let that text
+        // act as a higher-priority instruction. Assistant role keeps
+        // the context without the privilege escalation.
+        const summaryMessage: Message = {
+            role: 'assistant',
+            content: `[summary of earlier conversation] ${summary}`,
+            timestamp: head[head.length - 1].timestamp,
+        };
+
+        this.history = [summaryMessage, ...tail];
+        this.updatedAt = Date.now();
+        return this.getHistoryTokenCount();
+    }
+
+    public toSnapshot(): SessionSnapshot {
+        return {
+            id: this.id,
+            history: this.history.map((message) => ({ ...message })),
+            fileState: { ...this.fileState },
+            maxTokens: this.maxTokens,
+            createdAt: this.createdAt,
+            updatedAt: this.updatedAt,
+        };
+    }
+
+    public static fromSnapshot(
+        snapshot: SessionSnapshot,
+        options: Omit<SessionOptions, 'id' | 'maxTokens' | 'createdAt' | 'updatedAt'> = {}
+    ): Session {
+        const session = new Session({
+            id: snapshot.id,
+            maxTokens: snapshot.maxTokens,
+            createdAt: snapshot.createdAt,
+            updatedAt: snapshot.updatedAt,
+            ...options,
+        });
+        session.history = snapshot.history.map((message) => ({ ...message }));
+        session.fileState = { ...snapshot.fileState };
+        return session;
+    }
+}
diff --git a/src/core/summarization.ts b/src/core/summarization.ts
new file mode 100644
index 0000000..b68ec4b
--- /dev/null
+++ b/src/core/summarization.ts
@@ -0,0 +1,288 @@
+import { Message } from './session.js';
+
+/**
+ * Pluggable summarization — part of issue #121.
+ *
+ * An ISummarizer implementation takes a list of Messages and returns a
+ * natural-language summary. We ship three implementations out of the box:
+ *
+ *   - TruncatingSummarizer — self-contained, zero deps. Concatenates
+ *     role:content and trims to `maxChars`. Useful for tests and for
+ *     users who don't want to hand a foundation model every
+ *     conversation turn.
+ *   - AnthropicSummarizer — calls /v1/messages on api.anthropic.com.
+ *     Needs ANTHROPIC_API_KEY. Used when the host wires it up.
+ *   - GoogleAISummarizer — calls generativelanguage.googleapis.com.
+ *     Needs GOOGLE_AI_API_KEY.
+ *
+ * Selection lives in `createSummarizerFromEnv()` below — the server
+ * picks the highest-fidelity summarizer whose credentials are available
+ * and falls back to TruncatingSummarizer otherwise.
+ */
+
+const SUMMARY_SYSTEM_PROMPT =
+    'You are summarizing the early portion of a conversation so the rest can continue without the full history in context. ' +
+    'Produce a concise summary (at most ~300 tokens) that preserves decisions made, outstanding TODOs, and any concrete facts the assistant has already told the user. ' +
+    'Do not address the user directly; write in third person.';
+
+export interface ISummarizer {
+    summarize(messages: readonly Message[]): Promise<string>;
+}
+
+export interface TruncatingSummarizerOptions {
+    /** Approximate maximum characters of summary output. Default: 2000. */
+    maxChars?: number;
+}
+
+const TRUNCATION_MARKER = '\n... [truncated] ...\n';
+const MIN_MAX_CHARS = 32;
+
+export class TruncatingSummarizer implements ISummarizer {
+    private readonly maxChars: number;
+
+    constructor(options: TruncatingSummarizerOptions = {}) {
+        const maxChars = options.maxChars ?? 2000;
+        if (!Number.isFinite(maxChars) || maxChars < MIN_MAX_CHARS) {
+            throw new Error(
+                `TruncatingSummarizer.maxChars must be >= ${MIN_MAX_CHARS}, got ${maxChars}`
+            );
+        }
+        this.maxChars = maxChars;
+    }
+
+    public async summarize(messages: readonly Message[]): Promise<string> {
+        if (messages.length === 0) {
+            return '';
+        }
+
+        const joined = messages
+            .map((m) => `${m.role}: ${m.content}`)
+            .join('\n');
+
+        if (joined.length <= this.maxChars) {
+            return joined;
+        }
+
+        // Budget excludes the marker length so the final string never
+        // exceeds maxChars — the previous `-20` was a guess that
+        // didn't match the marker exactly and produced unpredictable
+        // output for small limits.
+        const budget = Math.max(0, this.maxChars - TRUNCATION_MARKER.length);
+        const keepHead = Math.floor(budget * 0.4);
+        const keepTail = budget - keepHead;
+        return (
+            joined.slice(0, keepHead) +
+            TRUNCATION_MARKER +
+            joined.slice(-keepTail)
+        );
+    }
+}
+
+// ============================================================================
+// Anthropic-backed summarizer
+// ============================================================================
+
+const ANTHROPIC_ENDPOINT = 'https://api.anthropic.com/v1/messages';
+const ANTHROPIC_DEFAULT_MODEL = 'claude-haiku-4-5-20251001';
+const ANTHROPIC_API_VERSION = '2023-06-01';
+const SUMMARIZER_TIMEOUT_MS = 30_000;
+const SUMMARIZER_MAX_TOKENS = 1024;
+
+export interface AnthropicSummarizerOptions {
+    apiKey?: string;
+    model?: string;
+    endpoint?: string;
+    timeoutMs?: number;
+}
+
+export class AnthropicSummarizer implements ISummarizer {
+    private readonly apiKey: string;
+    private readonly model: string;
+    private readonly endpoint: string;
+    private readonly timeoutMs: number;
+
+    constructor(options: AnthropicSummarizerOptions = {}) {
+        const apiKey = options.apiKey ?? process.env.ANTHROPIC_API_KEY;
+        if (!apiKey) {
+            throw new Error(
+                'AnthropicSummarizer requires ANTHROPIC_API_KEY (or apiKey option).'
+            );
+        }
+        this.apiKey = apiKey;
+        this.model = options.model ?? ANTHROPIC_DEFAULT_MODEL;
+        this.endpoint = options.endpoint ?? ANTHROPIC_ENDPOINT;
+        this.timeoutMs = options.timeoutMs ?? SUMMARIZER_TIMEOUT_MS;
+    }
+
+    public async summarize(messages: readonly Message[]): Promise<string> {
+        if (messages.length === 0) {
+            return '';
+        }
+        const userContent = messages
+            .map((m) => `${m.role}: ${m.content}`)
+            .join('\n');
+
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
+
+        try {
+            const response = await fetch(this.endpoint, {
+                method: 'POST',
+                headers: {
+                    'content-type': 'application/json',
+                    'x-api-key': this.apiKey,
+                    'anthropic-version': ANTHROPIC_API_VERSION,
+                },
+                body: JSON.stringify({
+                    model: this.model,
+                    max_tokens: SUMMARIZER_MAX_TOKENS,
+                    system: SUMMARY_SYSTEM_PROMPT,
+                    messages: [
+                        { role: 'user', content: userContent.slice(0, 200_000) },
+                    ],
+                }),
+                signal: controller.signal,
+            });
+
+            if (!response.ok) {
+                // Deliberately omit the response body — it can echo
+                // user prompt content and we don't want that leaking
+                // into log pipelines via thrown errors.
+                throw new Error(
+                    `Anthropic summarize failed: ${response.status} ${response.statusText}`
+                );
+            }
+
+            const data = (await response.json()) as {
+                content?: Array<{ type: string; text?: string }>;
+            };
+            const text =
+                data.content
+                    ?.filter((c) => c.type === 'text' && typeof c.text === 'string')
+                    .map((c) => c.text ?? '')
+                    .join('\n')
+                    .trim() ?? '';
+            return text;
+        } finally {
+            clearTimeout(timeout);
+        }
+    }
+}
+
+// ============================================================================
+// Google AI-backed summarizer
+// ============================================================================
+
+const GOOGLE_AI_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models';
+const GOOGLE_AI_DEFAULT_MODEL = 'gemini-2.5-flash';
+
+export interface GoogleAISummarizerOptions {
+    apiKey?: string;
+    model?: string;
+    endpoint?: string;
+    timeoutMs?: number;
+}
+
+export class GoogleAISummarizer implements ISummarizer {
+    private readonly apiKey: string;
+    private readonly model: string;
+    private readonly endpoint: string;
+    private readonly timeoutMs: number;
+
+    constructor(options: GoogleAISummarizerOptions = {}) {
+        const apiKey = options.apiKey ?? process.env.GOOGLE_AI_API_KEY;
+        if (!apiKey) {
+            throw new Error(
+                'GoogleAISummarizer requires GOOGLE_AI_API_KEY (or apiKey option).'
+            );
+        }
+        this.apiKey = apiKey;
+        this.model = options.model ?? GOOGLE_AI_DEFAULT_MODEL;
+        this.endpoint = options.endpoint ?? GOOGLE_AI_ENDPOINT;
+        this.timeoutMs = options.timeoutMs ?? SUMMARIZER_TIMEOUT_MS;
+    }
+
+    public async summarize(messages: readonly Message[]): Promise<string> {
+        if (messages.length === 0) {
+            return '';
+        }
+        const joined = messages
+            .map((m) => `${m.role}: ${m.content}`)
+            .join('\n');
+
+        const url = `${this.endpoint}/${encodeURIComponent(this.model)}:generateContent?key=${encodeURIComponent(this.apiKey)}`;
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
+
+        try {
+            const response = await fetch(url, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    systemInstruction: { parts: [{ text: SUMMARY_SYSTEM_PROMPT }] },
+                    contents: [
+                        {
+                            role: 'user',
+                            parts: [{ text: joined.slice(0, 200_000) }],
+                        },
+                    ],
+                    generationConfig: { maxOutputTokens: SUMMARIZER_MAX_TOKENS },
+                }),
+                signal: controller.signal,
+            });
+
+            if (!response.ok) {
+                // See AnthropicSummarizer — no body in the thrown error.
+                throw new Error(
+                    `Google AI summarize failed: ${response.status} ${response.statusText}`
+                );
+            }
+
+            const data = (await response.json()) as {
+                candidates?: Array<{
+                    content?: { parts?: Array<{ text?: string }> };
+                }>;
+            };
+            const text =
+                data.candidates?.[0]?.content?.parts
+                    ?.map((p) => p.text ?? '')
+                    .join('\n')
+                    .trim() ?? '';
+            return text;
+        } finally {
+            clearTimeout(timeout);
+        }
+    }
+}
+
+// ============================================================================
+// Factory
+// ============================================================================
+
+/**
+ * Pick an ISummarizer based on available credentials:
+ *   1. ANTHROPIC_API_KEY → AnthropicSummarizer
+ *   2. GOOGLE_AI_API_KEY → GoogleAISummarizer
+ *   3. fallback        → TruncatingSummarizer (no network, no key)
+ *
+ * Anthropic sits first because this project is Claude-adjacent; users
+ * who prefer Gemini can either unset ANTHROPIC_API_KEY or construct
+ * GoogleAISummarizer directly.
+ */
+export function createSummarizerFromEnv(): ISummarizer {
+    if (process.env.ANTHROPIC_API_KEY) {
+        try {
+            return new AnthropicSummarizer();
+        } catch {
+            // Fall through to next option.
+        }
+    }
+    if (process.env.GOOGLE_AI_API_KEY) {
+        try {
+            return new GoogleAISummarizer();
+        } catch {
+            // Fall through.
+        }
+    }
+    return new TruncatingSummarizer();
+}
diff --git a/src/core/token-counter.ts b/src/core/token-counter.ts
index d4bd471..80dd895 100644
--- a/src/core/token-counter.ts
+++ b/src/core/token-counter.ts
@@ -1,4 +1,7 @@
 import { encoding_for_model, Tiktoken } from 'tiktoken';
+import { TokenizerFactory } from './tokenizers/tokenizer-factory.js';
+import { ITokenizer } from './tokenizers/i-tokenizer.js';
+import { TiktokenTokenizer } from './tokenizers/tiktoken-tokenizer.js';
 
 export interface TokenCountResult {
   tokens: number;
@@ -6,114 +9,95 @@ export interface TokenCountResult {
   estimatedCost?: number;
 }
 
+/**
+ * TokenCounter — delegates tokenization to the pluggable
+ * TokenizerFactory from issue #124 while preserving the callable
+ * surface (`count`, `countBatch`, `estimate`, `calculateSavings`,
+ * `calculateCacheSavings`, `exceedsLimit`, `truncate`,
+ * `getTokenCharRatio`, `free`) the rest of the codebase relies on.
+ *
+ * Truncation still uses a local tiktoken encoder because the
+ * ITokenizer contract doesn't expose the raw token array — we
+ * keep one for GPT-4-family models and otherwise degrade to
+ * character-based truncation.
+ */
 export class TokenCounter {
-  private encoder: Tiktoken;
-  private readonly model: string;
+  private readonly tokenizer: ITokenizer;
+  private readonly encoder: Tiktoken | null;
+  public readonly model: string;
 
   constructor(model?: string) {
-    // Auto-detect model from environment or use provided model
-    // Claude Code sets CLAUDE_MODEL env var with the active model
-    // Falls back to GPT-4 as universal approximation
     this.model =
       model ||
       process.env.CLAUDE_MODEL ||
       process.env.ANTHROPIC_MODEL ||
+      process.env.OPENAI_MODEL ||
+      process.env.GOOGLE_AI_MODEL ||
       'gpt-4';
 
-    // Map Claude models to closest tiktoken equivalent
-    // Claude uses similar tokenization to GPT-4, so it's a good approximation
-    const tokenModel = this.mapToTiktokenModel(this.model);
-
-    // Initialize tiktoken encoder
-    this.encoder = encoding_for_model(tokenModel);
-  }
-
-  /**
-   * Map Claude/Anthropic models to tiktoken model names
-   */
-  private mapToTiktokenModel(model: string): 'gpt-4' | 'gpt-3.5-turbo' {
-    const lowerModel = model.toLowerCase();
-
-    // Claude models use GPT-4 tokenizer as closest approximation
-    if (
-      lowerModel.includes('claude') ||
-      lowerModel.includes('sonnet') ||
-      lowerModel.includes('opus') ||
-      lowerModel.includes('haiku')
-    ) {
-      return 'gpt-4';
-    }
-
-    // GPT-4 variants
-    if (lowerModel.includes('gpt-4')) {
-      return 'gpt-4';
+    this.tokenizer = TokenizerFactory.create(this.model);
+
+    // Keep a local encoder for tiktoken-compatible models — truncate()
+    // needs to slice the raw token array, which the ITokenizer interface
+    // intentionally does not expose.
+    if (TiktokenTokenizer.supports(this.model)) {
+      this.encoder = encoding_for_model(
+        TiktokenTokenizer.mapToTiktokenModel(this.model)
+      );
+    } else {
+      this.encoder = null;
     }
-
-    // GPT-3.5 variants
-    if (lowerModel.includes('gpt-3.5') || lowerModel.includes('gpt3.5')) {
-      return 'gpt-3.5-turbo';
-    }
-
-    // Default to GPT-4 for unknown models
-    return 'gpt-4';
   }
 
   /**
-   * Count tokens in text
+   * Count tokens in text (synchronous).
+   *
+   * Synchronous on tiktoken-backed tokenizers, which is all we expose
+   * externally via Anthropic/OpenAI. Remote tokenizers (Google AI) are
+   * reachable via `countAsync`.
    */
   count(text: string): TokenCountResult {
-    const tokens = this.encoder.encode(text);
-
+    if (this.encoder) {
+      return {
+        tokens: this.encoder.encode(text).length,
+        characters: text.length,
+      };
+    }
+    // Fall back to the synchronous estimate so non-tiktoken paths keep
+    // working. Callers that want exact remote counts should use
+    // countAsync.
     return {
-      tokens: tokens.length,
+      tokens: this.estimate(text),
       characters: text.length,
     };
   }
 
   /**
-   * Count tokens in multiple texts
+   * Async token counting through the pluggable tokenizer — accurate for
+   * both local tiktoken and remote Google AI paths.
    */
+  async countAsync(text: string): Promise<TokenCountResult> {
+    const tokens = await this.tokenizer.countTokens(text);
+    return { tokens, characters: text.length };
+  }
+
   countBatch(texts: string[]): TokenCountResult {
     let totalTokens = 0;
     let totalCharacters = 0;
-
     for (const text of texts) {
       const result = this.count(text);
       totalTokens += result.tokens;
       totalCharacters += result.characters;
     }
-
-    return {
-      tokens: totalTokens,
-      characters: totalCharacters,
-    };
+    return { tokens: totalTokens, characters: totalCharacters };
   }
 
-  /**
-   * Estimate token count without encoding (faster, less accurate)
-   */
   estimate(text: string): number {
-    // Rough estimate: ~4 characters per token on average
+    // Rough fallback: ~4 characters per token. Only used when no
+    // tiktoken encoder is available for this model.
     return Math.ceil(text.length / 4);
   }
 
-  /**
-   * Calculate token savings based on context window management
-   *
-   * @param originalText - The original text content
-   * @param contextTokens - Number of tokens remaining in LLM context (default: 0 for full caching)
-   * @returns Token savings calculation
-   *
-   * @remarks
-   * This method measures context window optimization, NOT compression ratio.
-   * When content is cached externally (SQLite, Redis, etc.), it's completely
-   * removed from the LLM's context window, resulting in 100% token savings.
-   *
-   * Use cases:
-   * - External caching: contextTokens = 0 (100% savings)
-   * - Metadata-only: contextTokens = tokens in metadata (e.g., 8)
-   * - Summarization: contextTokens = tokens in summary (e.g., 50)
-   */
   calculateSavings(
     originalText: string,
     contextTokens: number = 0
@@ -136,36 +120,6 @@ export class TokenCounter {
     };
   }
 
-  /**
-   * Calculate context window savings for externally cached content
-   *
-   * @param originalText - The original text content being cached
-   * @returns Token savings calculation with 100% savings
-   *
-   * @remarks
-   * When content is compressed and stored in an external cache (SQLite, Redis, etc.),
-   * it's completely removed from the LLM's context window. The compressed/encoded
-   * data is NEVER sent to the LLM, so we measure 100% token savings.
-   *
-   * Key insight: We're measuring CONTEXT WINDOW CLEARANCE, not compression ratio.
-   * - ✅ Content removed from LLM context (saves tokens)
-   * - ✅ Storage compressed (saves disk space)
-   * - ❌ Don't count tokens in compressed data (it's not sent to LLM!)
-   *
-   * @example
-   * ```typescript
-   * const tokenCounter = new TokenCounter();
-   * const content = "Large file content...";
-   * const compressed = compress(content);
-   *
-   * // Store in external cache
-   * await cache.set(key, compressed);
-   *
-   * // Calculate context window savings
-   * const savings = tokenCounter.calculateCacheSavings(content);
-   * // Returns: { originalTokens: 250, contextTokens: 0, tokensSaved: 250, percentSaved: 100 }
-   * ```
-   */
   calculateCacheSavings(originalText: string): {
     originalTokens: number;
     contextTokens: number;
@@ -173,54 +127,45 @@ export class TokenCounter {
     percentSaved: number;
   } {
     const original = this.count(originalText);
-
     return {
       originalTokens: original.tokens,
-      contextTokens: 0, // External cache - nothing in context
-      tokensSaved: original.tokens, // 100% of original tokens saved
-      percentSaved: 100, // Always 100% for external caching
+      contextTokens: 0,
+      tokensSaved: original.tokens,
+      percentSaved: 100,
     };
   }
 
-  /**
-   * Check if text exceeds token limit
-   */
   exceedsLimit(text: string, limit: number): boolean {
-    const result = this.count(text);
-    return result.tokens > limit;
+    return this.count(text).tokens > limit;
   }
 
-  /**
-   * Truncate text to fit within token limit
-   */
   truncate(text: string, maxTokens: number): string {
+    if (!this.encoder) {
+      // No raw-token access for this model — fall back to a
+      // char-proportional slice using the estimate ratio.
+      const approxChars = maxTokens * 4;
+      return text.length <= approxChars ? text : text.slice(0, approxChars);
+    }
     const tokens = this.encoder.encode(text);
-
     if (tokens.length <= maxTokens) {
       return text;
     }
-
     const truncatedTokens = tokens.slice(0, maxTokens);
     const decoded = this.encoder.decode(truncatedTokens);
-
-    // Handle potential type issues with decode return value
     return typeof decoded === 'string'
       ? decoded
       : new TextDecoder().decode(decoded);
   }
 
-  /**
-   * Get token-to-character ratio for text
-   */
   getTokenCharRatio(text: string): number {
     const result = this.count(text);
     return result.tokens > 0 ? result.characters / result.tokens : 0;
   }
 
-  /**
-   * Free the encoder resources
-   */
   free(): void {
-    this.encoder.free();
+    if (this.encoder) {
+      this.encoder.free();
+    }
+    // TokenizerFactory owns the tokenizer's lifecycle (instance cache).
   }
 }
diff --git a/src/core/tokenizers/google-ai-tokenizer.ts b/src/core/tokenizers/google-ai-tokenizer.ts
new file mode 100644
index 0000000..6c751fe
--- /dev/null
+++ b/src/core/tokenizers/google-ai-tokenizer.ts
@@ -0,0 +1,104 @@
+import { createHash } from 'crypto';
+import { ITokenizer } from './i-tokenizer.js';
+import { LruCache } from '../../utils/lru-cache.js';
+
+const DEFAULT_CACHE_SIZE = 500;
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+const DEFAULT_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models';
+const REQUEST_TIMEOUT_MS = 10_000;
+
+/**
+ * Remote tokenizer that uses Google AI's countTokens REST endpoint —
+ * addresses issue #124's GoogleAITokenizer requirement.
+ *
+ * Network calls are memoized in an LruCache with a TTL so repeated
+ * token counts don't re-hit the API. If the request fails (network,
+ * 4xx, 5xx) we surface the error to the caller — TokenCounter above
+ * is responsible for deciding whether to fall back to a local
+ * tokenizer.
+ */
+export class GoogleAITokenizer implements ITokenizer {
+    public readonly modelName: string;
+    private readonly apiKey: string;
+    private readonly endpoint: string;
+    private readonly cache: LruCache<string, number>;
+    private readonly timeoutMs: number;
+
+    constructor(
+        modelName: string,
+        apiKey: string,
+        options: {
+            endpoint?: string;
+            cache?: LruCache<string, number>;
+            timeoutMs?: number;
+        } = {}
+    ) {
+        if (!apiKey) {
+            throw new Error('GoogleAITokenizer requires an apiKey');
+        }
+        this.modelName = modelName;
+        this.apiKey = apiKey;
+        this.endpoint = options.endpoint ?? DEFAULT_ENDPOINT;
+        this.cache =
+            options.cache ??
+            new LruCache<string, number>(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS);
+        this.timeoutMs = options.timeoutMs ?? REQUEST_TIMEOUT_MS;
+    }
+
+    public async countTokens(text: string): Promise<number> {
+        // Always hash with a namespace prefix so cache keys can't collide
+        // with a raw string arg and so sensitive user text isn't retained
+        // verbatim in process memory.
+        const key = `sha256:${createHash('sha256').update(text).digest('hex')}`;
+        const cached = this.cache.get(key);
+        if (cached !== undefined) {
+            return cached;
+        }
+
+        // Per Gemini API reference, x-goog-api-key is the recommended
+        // auth path — it keeps the key out of URLs and access logs.
+        const url = `${this.endpoint}/${encodeURIComponent(
+            this.modelName
+        )}:countTokens`;
+
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
+
+        try {
+            const response = await fetch(url, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'x-goog-api-key': this.apiKey,
+                },
+                body: JSON.stringify({
+                    contents: [{ parts: [{ text }] }],
+                }),
+                signal: controller.signal,
+            });
+
+            if (!response.ok) {
+                // Don't embed the response body — it can leak prompt
+                // content in upstream logs.
+                throw new Error(
+                    `Google AI countTokens failed: ${response.status} ${response.statusText}`
+                );
+            }
+
+            const data = (await response.json()) as { totalTokens?: number };
+            if (typeof data.totalTokens !== 'number') {
+                throw new Error(
+                    `Google AI countTokens returned unexpected payload: ${JSON.stringify(data).slice(0, 200)}`
+                );
+            }
+            this.cache.set(key, data.totalTokens);
+            return data.totalTokens;
+        } finally {
+            clearTimeout(timeout);
+        }
+    }
+
+    public free(): void {
+        this.cache.clear();
+    }
+}
diff --git a/src/core/tokenizers/heuristic-tokenizer.ts b/src/core/tokenizers/heuristic-tokenizer.ts
new file mode 100644
index 0000000..a0208e2
--- /dev/null
+++ b/src/core/tokenizers/heuristic-tokenizer.ts
@@ -0,0 +1,89 @@
+import { createHash } from 'crypto';
+import { ITokenizer } from './i-tokenizer.js';
+import { LruCache } from '../../utils/lru-cache.js';
+
+const DEFAULT_CACHE_SIZE = 500;
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+/** See TiktokenTokenizer for rationale. */
+const KEY_HASH_THRESHOLD_CHARS = 256;
+
+function cacheKeyFor(text: string): string {
+    if (text.length <= KEY_HASH_THRESHOLD_CHARS) {
+        return text;
+    }
+    return createHash('sha256').update(text).digest('hex');
+}
+
+export enum ContentType {
+    Code = 'code',
+    Json = 'json',
+    Markdown = 'markdown',
+    Text = 'text',
+}
+
+/**
+ * Content-aware character-to-token ratios derived from tiktoken encoding
+ * on typical samples:
+ *
+ * | Content   | chars/token |
+ * | --------- | ----------- |
+ * | code      | 2.5         |
+ * | json      | 2.8         |
+ * | markdown  | 3.5         |
+ * | text      | 4.0         |
+ */
+const CHARS_PER_TOKEN: Readonly<Record<ContentType, number>> = {
+    [ContentType.Code]: 2.5,
+    [ContentType.Json]: 2.8,
+    [ContentType.Markdown]: 3.5,
+    [ContentType.Text]: 4.0,
+};
+
+const CODE_PATTERN = /\b(function|class|const|import|export|return|await|=>)\b/;
+const JSON_PATTERN = /^[\s\n]*[{[]/;
+const MARKDOWN_PATTERN = /^#{1,6}\s|^\s*[-*+]\s|\[[^\]]+\]\([^)]+\)/m;
+
+export class HeuristicTokenizer implements ITokenizer {
+    public readonly modelName: string;
+    private readonly cache: LruCache<string, number>;
+
+    constructor(modelName: string = 'heuristic', cache?: LruCache<string, number>) {
+        this.modelName = modelName;
+        this.cache = cache ?? new LruCache<string, number>(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS);
+    }
+
+    public async countTokens(text: string): Promise<number> {
+        const key = cacheKeyFor(text);
+        const cached = this.cache.get(key);
+        if (cached !== undefined) {
+            return cached;
+        }
+        const contentType = HeuristicTokenizer.detectContentType(text);
+        const ratio = CHARS_PER_TOKEN[contentType];
+        const count = Math.ceil(text.length / ratio);
+        this.cache.set(key, count);
+        return count;
+    }
+
+    public free(): void {
+        // No native resources to free.
+    }
+
+    public static detectContentType(text: string): ContentType {
+        if (JSON_PATTERN.test(text)) {
+            try {
+                JSON.parse(text);
+                return ContentType.Json;
+            } catch {
+                // Not actually JSON; fall through to other detection.
+            }
+        }
+        if (CODE_PATTERN.test(text)) {
+            return ContentType.Code;
+        }
+        if (MARKDOWN_PATTERN.test(text)) {
+            return ContentType.Markdown;
+        }
+        return ContentType.Text;
+    }
+}
diff --git a/src/core/tokenizers/i-tokenizer.ts b/src/core/tokenizers/i-tokenizer.ts
new file mode 100644
index 0000000..57f23fd
--- /dev/null
+++ b/src/core/tokenizers/i-tokenizer.ts
@@ -0,0 +1,19 @@
+/**
+ * Pluggable tokenizer interface — addresses issue #124.
+ *
+ * Implementations:
+ * - TiktokenTokenizer: uses the local tiktoken library (GPT-4 / GPT-3.5-turbo).
+ * - HeuristicTokenizer: content-aware local fallback for unknown models.
+ *
+ * The factory picks an implementation based on model name. All implementations
+ * memoize counts via an injected LruCache so repeated inputs don't re-tokenize.
+ */
+
+export interface ITokenizer {
+    readonly modelName: string;
+
+    countTokens(text: string): Promise<number>;
+
+    /** Free any native resources. */
+    free(): void;
+}
diff --git a/src/core/tokenizers/tiktoken-tokenizer.ts b/src/core/tokenizers/tiktoken-tokenizer.ts
new file mode 100644
index 0000000..4ebf197
--- /dev/null
+++ b/src/core/tokenizers/tiktoken-tokenizer.ts
@@ -0,0 +1,85 @@
+import { createHash } from 'crypto';
+import { encoding_for_model, Tiktoken, TiktokenModel } from 'tiktoken';
+import { ITokenizer } from './i-tokenizer.js';
+import { LruCache } from '../../utils/lru-cache.js';
+
+const DEFAULT_CACHE_SIZE = 500;
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+/**
+ * Strings longer than this are hashed before being used as a cache key
+ * so the LRU stores ~64-byte SHA-256 digests instead of entire prompts
+ * or file contents — keeps the cache from ballooning into hundreds of
+ * MB on hot paths.
+ */
+const KEY_HASH_THRESHOLD_CHARS = 256;
+
+function cacheKeyFor(text: string): string {
+    if (text.length <= KEY_HASH_THRESHOLD_CHARS) {
+        return text;
+    }
+    return createHash('sha256').update(text).digest('hex');
+}
+
+const SUPPORTED_TIKTOKEN_MODELS: readonly TiktokenModel[] = ['gpt-4', 'gpt-3.5-turbo'];
+
+export class TiktokenTokenizer implements ITokenizer {
+    public readonly modelName: string;
+    private readonly encoder: Tiktoken;
+    private readonly cache: LruCache<string, number>;
+
+    constructor(modelName: string, cache?: LruCache<string, number>) {
+        this.modelName = modelName;
+        this.cache = cache ?? new LruCache<string, number>(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS);
+        const tiktokenModel = TiktokenTokenizer.mapToTiktokenModel(modelName);
+        this.encoder = encoding_for_model(tiktokenModel);
+    }
+
+    public async countTokens(text: string): Promise<number> {
+        const key = cacheKeyFor(text);
+        const cached = this.cache.get(key);
+        if (cached !== undefined) {
+            return cached;
+        }
+        const count = this.encoder.encode(text).length;
+        this.cache.set(key, count);
+        return count;
+    }
+
+    public free(): void {
+        this.encoder.free();
+    }
+
+    public static supports(modelName: string): boolean {
+        const mapped = TiktokenTokenizer.tryMap(modelName);
+        return mapped !== null;
+    }
+
+    public static mapToTiktokenModel(modelName: string): TiktokenModel {
+        const mapped = TiktokenTokenizer.tryMap(modelName);
+        if (mapped === null) {
+            // Default: GPT-4 tokenizer is the closest available for Claude/unknown models.
+            return 'gpt-4';
+        }
+        return mapped;
+    }
+
+    private static tryMap(modelName: string): TiktokenModel | null {
+        const lower = modelName.toLowerCase();
+        if (
+            lower.includes('claude') ||
+            lower.includes('sonnet') ||
+            lower.includes('opus') ||
+            lower.includes('haiku') ||
+            lower.includes('gpt-4')
+        ) {
+            return 'gpt-4';
+        }
+        if (lower.includes('gpt-3.5') || lower.includes('gpt3.5')) {
+            return 'gpt-3.5-turbo';
+        }
+        if (SUPPORTED_TIKTOKEN_MODELS.includes(lower as TiktokenModel)) {
+            return lower as TiktokenModel;
+        }
+        return null;
+    }
+}
diff --git a/src/core/tokenizers/tokenizer-factory.ts b/src/core/tokenizers/tokenizer-factory.ts
new file mode 100644
index 0000000..edce55f
--- /dev/null
+++ b/src/core/tokenizers/tokenizer-factory.ts
@@ -0,0 +1,75 @@
+import { ITokenizer } from './i-tokenizer.js';
+import { TiktokenTokenizer } from './tiktoken-tokenizer.js';
+import { HeuristicTokenizer } from './heuristic-tokenizer.js';
+import { GoogleAITokenizer } from './google-ai-tokenizer.js';
+
+/**
+ * Pluggable tokenizer factory — addresses issues #123 / #124.
+ *
+ * Resolution order:
+ *   1. Google AI models (`gemini-*`) — GoogleAITokenizer when
+ *      GOOGLE_AI_API_KEY is set, else HeuristicTokenizer.
+ *   2. Tiktoken-compatible families (GPT, Claude) — TiktokenTokenizer.
+ *   3. HeuristicTokenizer fallback for everything else.
+ *
+ * Instances are cached per model name so callers don't pay for repeated
+ * allocation of the native tiktoken encoder, and so their per-tokenizer
+ * LRU caches can be shared across call sites.
+ */
+export class TokenizerFactory {
+    private static readonly instances = new Map<string, ITokenizer>();
+
+    public static create(modelName: string): ITokenizer {
+        const cached = TokenizerFactory.instances.get(modelName);
+        if (cached) {
+            return cached;
+        }
+        const tokenizer = TokenizerFactory.build(modelName);
+        TokenizerFactory.instances.set(modelName, tokenizer);
+        return tokenizer;
+    }
+
+    public static createFromEnv(): ITokenizer {
+        // TOKEN_OPTIMIZER_MODEL has highest precedence so a user can pin
+        // the optimizer model without having to clear broader env vars
+        // (CLAUDE_MODEL, ANTHROPIC_MODEL, …) that may already be set.
+        const modelName =
+            process.env.TOKEN_OPTIMIZER_MODEL ||
+            process.env.CLAUDE_MODEL ||
+            process.env.ANTHROPIC_MODEL ||
+            process.env.OPENAI_MODEL ||
+            process.env.GOOGLE_AI_MODEL ||
+            'gpt-4';
+        return TokenizerFactory.create(modelName);
+    }
+
+    /**
+     * Release every cached tokenizer. Call this on server shutdown so
+     * native tiktoken encoders are freed.
+     */
+    public static disposeAll(): void {
+        for (const tokenizer of TokenizerFactory.instances.values()) {
+            try {
+                tokenizer.free();
+            } catch {
+                // Ignore — best-effort cleanup.
+            }
+        }
+        TokenizerFactory.instances.clear();
+    }
+
+    private static build(modelName: string): ITokenizer {
+        const lower = modelName.toLowerCase();
+        if (lower.startsWith('gemini') || lower.includes('google')) {
+            const apiKey = process.env.GOOGLE_AI_API_KEY;
+            if (apiKey) {
+                return new GoogleAITokenizer(modelName, apiKey);
+            }
+            return new HeuristicTokenizer(modelName);
+        }
+        if (TiktokenTokenizer.supports(modelName)) {
+            return new TiktokenTokenizer(modelName);
+        }
+        return new HeuristicTokenizer(modelName);
+    }
+}
diff --git a/src/core/types.ts b/src/core/types.ts
index 9d36be2..1cb1775 100644
--- a/src/core/types.ts
+++ b/src/core/types.ts
@@ -48,6 +48,40 @@ export interface HypercontextConfig {
     streamingThreshold: number;
     enableStreaming: boolean;
   };
+  optimization?: OptimizationConfig;
+}
+
+/**
+ * Configuration-driven compression thresholds — addresses issue #120.
+ * Mirrors the fields exposed by Gemini CLI's settingsSchema.ts.
+ */
+export interface OptimizationConfig {
+  /** Fraction of model context at which compression kicks in (0-1). */
+  compressionTokenThreshold: number;
+  /** Fraction of chat history to keep uncompressed at the tail (0-1). */
+  compressionPreserveThreshold: number;
+  /** Minimum token count before an optimizer considers compressing. */
+  minTokensBeforeCompression: number;
+  /** Per-model total context window size, in tokens. */
+  modelTokenLimits: Record<string, number>;
+  /** Minimum output bytes before optimization emits a stored entry. */
+  minOutputSizeBytes: number;
+  /** Compression quality preset. */
+  quality: 'fast' | 'balanced' | 'max';
+  /** In-memory cache knobs — mirrors Gemini CLI's `cacheSettings`. */
+  cacheSettings: {
+    /** Max entries per LRU cache shard. */
+    maxSize: number;
+    /** Default TTL for cached entries, in seconds. */
+    ttlSeconds: number;
+  };
+  /** Chat-history compression knobs — #121. */
+  chatCompression: {
+    enabled: boolean;
+    /** Hard token limit per session (falls back to modelTokenLimit × compressionTokenThreshold). */
+    tokenLimit?: number;
+    strategy: 'summarize' | 'truncate';
+  };
 }
 
 export interface TokenMetrics {
diff --git a/src/server/index.ts b/src/server/index.ts
index 7c23370..62fba3c 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -126,12 +126,23 @@ import {
   getMcpServerAnalyticsTool,
   GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION,
 } from '../tools/analytics/get-mcp-server-analytics.js';
+import { getExportAnalyticsTool, EXPORT_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/export-analytics.js';
 import {
-  getExportAnalyticsTool,
-  EXPORT_ANALYTICS_TOOL_DEFINITION,
-} from '../tools/analytics/export-analytics.js';
+  OptimizationStorageTool,
+  OPTIMIZATION_STORAGE_TOOL_DEFINITION,
+} from '../tools/optimization-storage-tool.js';
+import {
+  ContextDeltaTool,
+  CONTEXT_DELTA_TOOL_DEFINITION,
+} from '../tools/context-delta-tool.js';
+import { SessionManager } from '../core/session-manager.js';
+import { createSummarizerFromEnv } from '../core/summarization.js';
+import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js';
+import { ConfigManager } from '../core/config.js';
+import { lruMemoize, memoRegistry } from '../utils/lru-memoize.js';
 import { AnalyticsManager } from '../analytics/analytics-manager.js';
 
+
 // API & Database tools
 import {
   getSmartSql,
@@ -369,6 +380,63 @@ const getHookAnalytics = getHookAnalyticsTool(analyticsManager);
 const getActionAnalytics = getActionAnalyticsTool(analyticsManager);
 const getMcpServerAnalytics = getMcpServerAnalyticsTool(analyticsManager);
 const exportAnalytics = getExportAnalyticsTool(analyticsManager);
+const optimizationStorage = new OptimizationStorageTool();
+
+// #120: load user config (creates ~/.token-optimizer/config.json with
+// defaults on first run) and derive session-level knobs.
+const configManager = new ConfigManager();
+const optimizationConfig = configManager.getOptimizationConfig();
+const sessionTokenizer = TokenizerFactory.createFromEnv();
+const modelLimit =
+  configManager.getModelTokenLimit(sessionTokenizer.modelName) ??
+  // Fall back to an aggressive default for unknown models.
+  128000;
+const chatDefaultMaxTokens =
+  optimizationConfig.chatCompression.tokenLimit ??
+  Math.floor(modelLimit * optimizationConfig.compressionTokenThreshold);
+
+const sessionManager = new SessionManager({
+  persistencePath: path.join(os.homedir(), '.token-optimizer', 'sessions.json'),
+  tokenizer: sessionTokenizer,
+  defaultMaxTokens: chatDefaultMaxTokens,
+  summarizer: createSummarizerFromEnv(),
+});
+const contextDelta = new ContextDeltaTool(sessionManager);
+
+// #125: memoize the expensive read-only file-operation tools with an
+// LRU bounded by the user's cacheSettings. The memoRegistry hook lets
+// the cleanup handler below prune them all at once.
+const cacheSettings = optimizationConfig.cacheSettings;
+const memoizedSmartRead = lruMemoize(runSmartRead, {
+  name: 'smart_read',
+  maxSize: cacheSettings.maxSize,
+  ttlMs: cacheSettings.ttlSeconds * 1000,
+});
+const memoizedSmartGrep = lruMemoize(runSmartGrep, {
+  name: 'smart_grep',
+  maxSize: cacheSettings.maxSize,
+  ttlMs: cacheSettings.ttlSeconds * 1000,
+});
+const memoizedSmartGlob = lruMemoize(runSmartGlob, {
+  name: 'smart_glob',
+  maxSize: cacheSettings.maxSize,
+  ttlMs: cacheSettings.ttlSeconds * 1000,
+});
+
+// Periodic prune + stats log. Runs every 5 minutes; unref so it doesn't
+// keep the process alive on its own.
+const MEMO_PRUNE_INTERVAL_MS = 5 * 60 * 1000;
+const memoPruneTimer = setInterval(() => {
+  const removed = memoRegistry.pruneAll();
+  if (removed > 0) {
+    console.error(
+      `[memo] pruned ${removed} expired cache entries; stats: ${JSON.stringify(memoRegistry.stats())}`
+    );
+  }
+}, MEMO_PRUNE_INTERVAL_MS);
+if (typeof memoPruneTimer.unref === 'function') {
+  memoPruneTimer.unref();
+}
 
 // Create MCP server
 const server = new Server(
@@ -430,7 +498,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
       {
         name: 'count_tokens',
         description:
-          'Count tokens in text using tiktoken. Useful for understanding token usage before and after optimization.',
+          'Count tokens in text using the pluggable tokenizer framework (#124). Picks a model-specific tokenizer (tiktoken for GPT/Claude, Google AI REST for Gemini, content-aware heuristic fallback).',
         inputSchema: {
           type: 'object',
           properties: {
@@ -438,6 +506,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
               type: 'string',
               description: 'Text to count tokens for',
             },
+            modelName: {
+              type: 'string',
+              description:
+                'Model name (e.g. gpt-4, claude-opus-4-7, gemini-2.5-flash). Defaults to the server-configured model when omitted.',
+            },
           },
           required: ['text'],
         },
@@ -655,6 +728,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
       GET_ACTION_ANALYTICS_TOOL_DEFINITION,
       GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION,
       EXPORT_ANALYTICS_TOOL_DEFINITION,
+      OPTIMIZATION_STORAGE_TOOL_DEFINITION,
+      CONTEXT_DELTA_TOOL_DEFINITION,
     ],
   };
 });
@@ -847,17 +922,46 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       }
 
       case 'count_tokens': {
-        const { text } = args as { text: string };
-        const result = tokenCounter.count(text);
-
-        return {
-          content: [
-            {
-              type: 'text',
-              text: JSON.stringify(result, null, 2),
-            },
-          ],
+        const { text, modelName } = args as {
+          text: string;
+          modelName?: string;
         };
+        const counter = modelName ? new TokenCounter(modelName) : tokenCounter;
+        try {
+          const result = modelName
+            ? await counter.countAsync(text)
+            : counter.count(text);
+          // Return the full result JSON under a dedicated `metadata`
+          // key while the primary `text` payload stays the scalar token
+          // count string — preserves the integer-parse contract that
+          // the PowerShell orchestrator relies on
+          // (e.g. token-optimizer-orchestrator.ps1 L931/1910/2092 cast
+          // `content[0].text -as [int]`) and still surfaces the richer
+          // object for TS callers.
+          return {
+            content: [
+              {
+                type: 'text',
+                text: String(result.tokens),
+              },
+              {
+                type: 'text',
+                text: JSON.stringify(
+                  { ...result, model: modelName ?? counter.model },
+                  null,
+                  2
+                ),
+              },
+            ],
+          };
+        } finally {
+          // Always free one-shot counters — even when countAsync throws,
+          // leaving the tiktoken encoder allocated was leaking native
+          // resources.
+          if (modelName) {
+            counter.free();
+          }
+        }
       }
 
       case 'compress_text': {
@@ -1931,7 +2035,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'smart_read': {
         const { path, ...options } = args as any;
-        const result = await runSmartRead(path, options);
+        const result = await memoizedSmartRead(path, options);
         return {
           content: [
             {
@@ -1945,6 +2049,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       case 'smart_write': {
         const { path, content, ...options } = args as any;
         const result = await runSmartWrite(path, content, options);
+        // Filesystem was mutated — drop every memoized read-only cache
+        // entry so the next smart_read/grep/glob reflects the new state
+        // instead of waiting for TTL expiry.
+        memoRegistry.clearAll();
         return {
           content: [
             {
@@ -1958,6 +2066,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       case 'smart_edit': {
         const { path, operations, ...options } = args as any;
         const result = await runSmartEdit(path, operations, options);
+        memoRegistry.clearAll();
         return {
           content: [
             {
@@ -1970,7 +2079,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'smart_glob': {
         const { pattern, ...options } = args as any;
-        const result = await runSmartGlob(pattern, options);
+        const result = await memoizedSmartGlob(pattern, options);
         return {
           content: [
             {
@@ -1983,7 +2092,31 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 
       case 'smart_grep': {
         const { pattern, ...options } = args as any;
-        const result = await runSmartGrep(pattern, options);
+        const result = await memoizedSmartGrep(pattern, options);
+        return {
+          content: [
+            {
+              type: 'text',
+              text: JSON.stringify(result, null, 2),
+            },
+          ],
+        };
+      }
+
+      case 'optimization_storage': {
+        const result = optimizationStorage.run(args as any);
+        return {
+          content: [
+            {
+              type: 'text',
+              text: JSON.stringify(result, null, 2),
+            },
+          ],
+        };
+      }
+
+      case 'context_delta': {
+        const result = contextDelta.run(args as any);
         return {
           content: [
             {
@@ -2231,6 +2364,16 @@ async function cleanup() {
     },
     { fn: () => cache?.close(), name: 'closing cache' },
     { fn: () => tokenCounter?.free(), name: 'freeing tokenCounter' },
+    { fn: async () => await sessionManager.flush(), name: 'flushing sessions' },
+    { fn: () => TokenizerFactory.disposeAll(), name: 'disposing tokenizers' },
+    { fn: () => optimizationStorage.close(), name: 'closing optimization storage' },
+    {
+      fn: () => {
+        clearInterval(memoPruneTimer);
+        memoRegistry.clearAll();
+      },
+      name: 'clearing memo caches',
+    },
     // Note: predictiveCache and cacheWarmup do not implement dispose() methods
     // Removed dispose() calls to prevent runtime errors during cleanup
   ]);
diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts
new file mode 100644
index 0000000..20f5fdc
--- /dev/null
+++ b/src/tools/context-delta-tool.ts
@@ -0,0 +1,184 @@
+import { SessionManager } from '../core/session-manager.js';
+import { calculateDelta } from '../utils/diff.js';
+
+/**
+ * context_delta MCP tool — addresses issue #122.
+ *
+ * Given (sessionId, filePath, currentContent) this tool:
+ *   1. Looks up the session from the SessionManager.
+ *   2. Diffs the current content against the session's last snapshot of
+ *      that file.
+ *   3. Updates the session's file state.
+ *   4. Returns a unified-diff delta — the caller can send ONLY the delta
+ *      to the model instead of the whole file, which is the token win.
+ *
+ * On first invocation for a given filePath the full content is treated
+ * as "the delta" (there is no baseline to diff against).
+ */
+
+export type ContextDeltaOperation = 'compute-delta' | 'seed' | 'clear';
+
+export interface ContextDeltaOptions {
+    operation: ContextDeltaOperation;
+    sessionId: string;
+    filePath: string;
+    currentContent?: string;
+}
+
+export interface ContextDeltaResponse {
+    success: boolean;
+    error?: string;
+    delta?: string;
+    isBaseline?: boolean;
+    originalSize?: number;
+    deltaSize?: number;
+    bytesSaved?: number;
+}
+
+export class ContextDeltaTool {
+    public readonly name = 'context_delta';
+    public readonly description =
+        'Compute a unified-diff delta between a file’s previous session snapshot and its current content, so the model only receives what changed.';
+
+    constructor(private readonly sessionManager: SessionManager) {}
+
+    public run(options: ContextDeltaOptions): ContextDeltaResponse {
+        switch (options.operation) {
+            case 'compute-delta':
+                return this.computeDelta(options);
+            case 'seed':
+                return this.seed(options);
+            case 'clear':
+                return this.clear(options);
+            default:
+                return {
+                    success: false,
+                    error: `Unknown operation: ${String(
+                        (options as { operation: unknown }).operation
+                    )}`,
+                };
+        }
+    }
+
+    private computeDelta(options: ContextDeltaOptions): ContextDeltaResponse {
+        const { sessionId, filePath, currentContent } = options;
+        if (currentContent === undefined) {
+            return {
+                success: false,
+                error: 'currentContent is required for compute-delta',
+            };
+        }
+        // Auto-bootstrap the session on first contact so PS-side callers
+        // that locally generate a sessionId don't have to separately
+        // create it server-side first.
+        const session = this.sessionManager.getOrCreateSession(sessionId);
+        const previous = session.getFileContent(filePath);
+
+        try {
+            // Goes through SessionManager so the new state hits disk.
+            this.sessionManager.updateFileState(sessionId, filePath, currentContent);
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: message };
+        }
+
+        // Use UTF-8 byte counts throughout so the reported sizes match
+        // the byte-cap that SessionManager.updateFileState enforces.
+        // string.length counts UTF-16 code units, which drifts for any
+        // non-ASCII content.
+        const originalSize = Buffer.byteLength(currentContent, 'utf8');
+        if (previous === undefined) {
+            return {
+                success: true,
+                isBaseline: true,
+                delta: currentContent,
+                originalSize,
+                deltaSize: originalSize,
+                bytesSaved: 0,
+            };
+        }
+
+        const delta = calculateDelta(previous, currentContent, filePath);
+        const deltaSize = Buffer.byteLength(delta, 'utf8');
+        return {
+            success: true,
+            isBaseline: false,
+            delta,
+            originalSize,
+            deltaSize,
+            bytesSaved: Math.max(0, originalSize - deltaSize),
+        };
+    }
+
+    private seed(options: ContextDeltaOptions): ContextDeltaResponse {
+        const { sessionId, filePath, currentContent } = options;
+        if (currentContent === undefined) {
+            return { success: false, error: 'currentContent is required for seed' };
+        }
+        try {
+            this.sessionManager.getOrCreateSession(sessionId);
+            this.sessionManager.updateFileState(sessionId, filePath, currentContent);
+            return { success: true, isBaseline: true };
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: message };
+        }
+    }
+
+    private clear(options: ContextDeltaOptions): ContextDeltaResponse {
+        try {
+            this.sessionManager.clearFileState(options.sessionId, options.filePath);
+            return { success: true };
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: message };
+        }
+    }
+}
+
+export const CONTEXT_DELTA_TOOL_DEFINITION = {
+    name: 'context_delta',
+    description:
+        'Compute a unified-diff delta for a file in a given session so the model only sees changes since the last snapshot. Operations: compute-delta, seed, clear.',
+    // Discriminated inputSchema keyed on `operation` — compute-delta and
+    // seed require currentContent at runtime, so enforce that at schema
+    // validation time rather than letting a malformed payload reach the
+    // tool body.
+    inputSchema: {
+        type: 'object',
+        oneOf: [
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'compute-delta' },
+                    sessionId: { type: 'string', minLength: 1 },
+                    filePath: { type: 'string', minLength: 1 },
+                    currentContent: { type: 'string' },
+                },
+                required: ['operation', 'sessionId', 'filePath', 'currentContent'],
+                additionalProperties: false,
+            },
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'seed' },
+                    sessionId: { type: 'string', minLength: 1 },
+                    filePath: { type: 'string', minLength: 1 },
+                    currentContent: { type: 'string' },
+                },
+                required: ['operation', 'sessionId', 'filePath', 'currentContent'],
+                additionalProperties: false,
+            },
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'clear' },
+                    sessionId: { type: 'string', minLength: 1 },
+                    filePath: { type: 'string', minLength: 1 },
+                },
+                required: ['operation', 'sessionId', 'filePath'],
+                additionalProperties: false,
+            },
+        ],
+    },
+};
diff --git a/src/tools/optimization-storage-tool.ts b/src/tools/optimization-storage-tool.ts
new file mode 100644
index 0000000..6465fc2
--- /dev/null
+++ b/src/tools/optimization-storage-tool.ts
@@ -0,0 +1,166 @@
+import { SqliteOptimizationStorage, OptimizationResult } from '../analytics/optimization-storage.js';
+
+export type OptimizationStorageOperation = 'store' | 'retrieve';
+
+export interface OptimizationStorageOptions {
+    operation: OptimizationStorageOperation;
+    originalTextHash?: string;
+    optimizedText?: string;
+    originalTokens?: number;
+    optimizedTokens?: number;
+    tokensSaved?: number;
+}
+
+export interface OptimizationStorageResponse {
+    success: boolean;
+    error?: string;
+    result?: OptimizationResult;
+}
+
+export class OptimizationStorageTool {
+    public readonly name = 'optimization_storage';
+    public readonly description =
+        'Persist and retrieve brotli-compressed optimization results keyed by text hash.';
+
+    private readonly storage: SqliteOptimizationStorage;
+
+    constructor(storage?: SqliteOptimizationStorage) {
+        this.storage = storage ?? new SqliteOptimizationStorage();
+        this.storage.initializeDatabase();
+    }
+
+    public run(options: OptimizationStorageOptions): OptimizationStorageResponse {
+        switch (options.operation) {
+            case 'store':
+                return this.store(options);
+            case 'retrieve':
+                return this.retrieve(options);
+            default:
+                return {
+                    success: false,
+                    error: `Unknown operation: ${String((options as { operation: unknown }).operation)}`,
+                };
+        }
+    }
+
+    private store(options: OptimizationStorageOptions): OptimizationStorageResponse {
+        const { originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved } = options;
+
+        if (
+            !originalTextHash ||
+            !optimizedText ||
+            originalTokens === undefined ||
+            optimizedTokens === undefined ||
+            tokensSaved === undefined
+        ) {
+            return {
+                success: false,
+                error: 'Missing required arguments for store operation: originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved.',
+            };
+        }
+
+        try {
+            this.storage.save({
+                originalTextHash,
+                optimizedText,
+                originalTokens,
+                optimizedTokens,
+                tokensSaved,
+            });
+            return { success: true };
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: `Failed to store optimization result: ${message}` };
+        }
+    }
+
+    private retrieve(options: OptimizationStorageOptions): OptimizationStorageResponse {
+        const { originalTextHash } = options;
+
+        if (!originalTextHash) {
+            return {
+                success: false,
+                error: 'Missing required argument for retrieve operation: originalTextHash.',
+            };
+        }
+
+        try {
+            const result = this.storage.get(originalTextHash);
+            if (!result) {
+                return { success: false, error: 'Not found' };
+            }
+            return { success: true, result };
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { success: false, error: `Failed to retrieve optimization result: ${message}` };
+        }
+    }
+
+    public close(): void {
+        this.storage.close();
+    }
+}
+
+export const OPTIMIZATION_STORAGE_TOOL_DEFINITION = {
+    name: 'optimization_storage',
+    description:
+        'Persist and retrieve brotli-compressed optimization results keyed by text hash. Operations: store, retrieve.',
+    // JSON Schema discriminated union — rejects a `store` payload that
+    // omits required fields at schema time instead of deep in the tool.
+    inputSchema: {
+        type: 'object',
+        oneOf: [
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'store' },
+                    originalTextHash: {
+                        type: 'string',
+                        minLength: 1,
+                        description: 'Stable hash of the original uncompressed text',
+                    },
+                    optimizedText: {
+                        type: 'string',
+                        description: 'The optimized text to store',
+                    },
+                    originalTokens: {
+                        type: 'number',
+                        minimum: 0,
+                        description: 'Token count of the original text',
+                    },
+                    optimizedTokens: {
+                        type: 'number',
+                        minimum: 0,
+                        description: 'Token count after optimization',
+                    },
+                    tokensSaved: {
+                        type: 'number',
+                        description: 'Tokens saved by optimization',
+                    },
+                },
+                required: [
+                    'operation',
+                    'originalTextHash',
+                    'optimizedText',
+                    'originalTokens',
+                    'optimizedTokens',
+                    'tokensSaved',
+                ],
+                additionalProperties: false,
+            },
+            {
+                type: 'object',
+                properties: {
+                    operation: { type: 'string', const: 'retrieve' },
+                    originalTextHash: {
+                        type: 'string',
+                        minLength: 1,
+                        description: 'Stable hash of the original uncompressed text',
+                    },
+                },
+                required: ['operation', 'originalTextHash'],
+                additionalProperties: false,
+            },
+        ],
+    },
+};
diff --git a/src/utils/diff.ts b/src/utils/diff.ts
new file mode 100644
index 0000000..3c032e2
--- /dev/null
+++ b/src/utils/diff.ts
@@ -0,0 +1,39 @@
+import { createPatch, applyPatch } from 'diff';
+
+/**
+ * Delta-based context helpers — addresses issue #122.
+ *
+ * Uses the unified-diff format from the existing `diff` dependency so the
+ * resulting deltas are human-readable and round-trippable via applyDelta.
+ */
+
+/**
+ * Compute a unified-diff delta from `previous` to `current`.
+ * Returns the empty string when the inputs are identical (callers can use
+ * that to skip transmitting a no-op delta).
+ */
+export function calculateDelta(
+    previous: string,
+    current: string,
+    fileName: string = 'content'
+): string {
+    if (previous === current) {
+        return '';
+    }
+    return createPatch(fileName, previous, current, '', '');
+}
+
+/**
+ * Apply a unified-diff `delta` to `previous`, returning the reconstructed
+ * `current`. Throws if the patch cannot be applied cleanly.
+ */
+export function applyDelta(previous: string, delta: string): string {
+    if (delta === '') {
+        return previous;
+    }
+    const result = applyPatch(previous, delta);
+    if (result === false) {
+        throw new Error('Failed to apply delta: patch did not apply cleanly');
+    }
+    return result;
+}
diff --git a/src/utils/gzip.ts b/src/utils/gzip.ts
new file mode 100644
index 0000000..206cebc
--- /dev/null
+++ b/src/utils/gzip.ts
@@ -0,0 +1,99 @@
+import { gzipSync, gunzipSync } from 'zlib';
+import {
+    existsSync,
+    mkdirSync,
+    readFileSync,
+    renameSync,
+    unlinkSync,
+    writeFileSync,
+} from 'fs';
+import { dirname } from 'path';
+
+/**
+ * Gzip utilities — addresses issue #126.
+ *
+ * `gzipString` / `gunzipBuffer` are thin UTF-8 wrappers around node:zlib.
+ * `saveGzippedFile` writes `<path>.gz` atomically (tmp + rename) so a
+ * crash mid-write can't produce a corrupt gzip. `loadFile` transparently
+ * reads `<path>.gz` if present and falls back to the plaintext path —
+ * that gives us backward compatibility with sessions.json files written
+ * before this change.
+ */
+
+export interface GzipStats {
+    originalBytes: number;
+    compressedBytes: number;
+    ratio: number;
+    percentSaved: number;
+}
+
+export function gzipString(text: string, level: number = 6): Buffer {
+    return gzipSync(Buffer.from(text, 'utf8'), { level });
+}
+
+export function gunzipBuffer(buffer: Buffer): string {
+    return gunzipSync(buffer).toString('utf8');
+}
+
+export function computeStats(text: string, compressed: Buffer): GzipStats {
+    const originalBytes = Buffer.byteLength(text, 'utf8');
+    const compressedBytes = compressed.length;
+    const ratio = originalBytes === 0 ? 0 : compressedBytes / originalBytes;
+    return {
+        originalBytes,
+        compressedBytes,
+        ratio,
+        percentSaved: originalBytes === 0 ? 0 : (1 - ratio) * 100,
+    };
+}
+
+/**
+ * Write gzipped text to `${path}.gz` using atomic tmp + rename so a
+ * crash mid-write never produces a half-written file. Also removes any
+ * stale uncompressed plaintext at `path` once the gzip lands (backward
+ * compat cleanup).
+ */
+export function saveGzippedFile(path: string, text: string, level: number = 6): GzipStats {
+    const dir = dirname(path);
+    if (!existsSync(dir)) {
+        mkdirSync(dir, { recursive: true });
+    }
+    const compressed = gzipString(text, level);
+    const gzPath = `${path}.gz`;
+    const tmpPath = `${gzPath}.tmp`;
+    writeFileSync(tmpPath, compressed);
+    renameSync(tmpPath, gzPath);
+    if (existsSync(path)) {
+        try {
+            unlinkSync(path);
+        } catch {
+            // Best-effort — leaving the plaintext file isn't fatal.
+        }
+    }
+    return computeStats(text, compressed);
+}
+
+/**
+ * Load either `${path}.gz` or `${path}` — whichever exists. Returns
+ * null if neither is present. If the `.gz` sibling exists but can't
+ * be decompressed (corrupt, partially-written), falls back to the
+ * plaintext path so the backward-compat migration still works.
+ */
+export function loadMaybeGzippedFile(path: string): string | null {
+    const gzPath = `${path}.gz`;
+    if (existsSync(gzPath)) {
+        try {
+            const buffer = readFileSync(gzPath);
+            return gunzipBuffer(buffer);
+        } catch (error) {
+            if (!existsSync(path)) {
+                throw error;
+            }
+            // Fall through to the plaintext sibling below.
+        }
+    }
+    if (existsSync(path)) {
+        return readFileSync(path, 'utf-8');
+    }
+    return null;
+}
diff --git a/src/utils/lru-cache.ts b/src/utils/lru-cache.ts
new file mode 100644
index 0000000..8f7a5b2
--- /dev/null
+++ b/src/utils/lru-cache.ts
@@ -0,0 +1,137 @@
+/**
+ * Generic LRU cache with optional per-entry TTL — addresses issue #125.
+ *
+ * Unlike CacheEngine (token-aware, persistent SQLite cache), this is an
+ * in-memory LRU intended for hot paths: file-search results, token counts,
+ * MCP correction responses, etc. Eviction is O(1) via Map insertion order.
+ */
+
+export interface LruCacheStats {
+    size: number;
+    maxSize: number;
+    hits: number;
+    misses: number;
+    evictions: number;
+    expired: number;
+    hitRate: number;
+}
+
+interface LruCacheEntry<V> {
+    value: V;
+    expiresAt: number;
+}
+
+export class LruCache<K, V> {
+    private readonly cache = new Map<K, LruCacheEntry<V>>();
+    private readonly maxSize: number;
+    private readonly defaultTtlMs: number;
+    private hits = 0;
+    private misses = 0;
+    private evictions = 0;
+    private expired = 0;
+
+    constructor(maxSize: number, defaultTtlMs: number = 0) {
+        if (maxSize <= 0) {
+            throw new Error(`LruCache maxSize must be > 0, got ${maxSize}`);
+        }
+        this.maxSize = maxSize;
+        this.defaultTtlMs = defaultTtlMs;
+    }
+
+    public get(key: K): V | undefined {
+        const entry = this.cache.get(key);
+        if (!entry) {
+            this.misses++;
+            return undefined;
+        }
+
+        if (entry.expiresAt !== 0 && Date.now() > entry.expiresAt) {
+            this.cache.delete(key);
+            this.expired++;
+            this.misses++;
+            return undefined;
+        }
+
+        // Refresh recency: remove + re-insert moves to the tail.
+        this.cache.delete(key);
+        this.cache.set(key, entry);
+        this.hits++;
+        return entry.value;
+    }
+
+    public set(key: K, value: V, ttlMs?: number): void {
+        if (this.cache.has(key)) {
+            this.cache.delete(key);
+        } else if (this.cache.size >= this.maxSize) {
+            const oldestKey = this.cache.keys().next().value as K | undefined;
+            if (oldestKey !== undefined) {
+                this.cache.delete(oldestKey);
+                this.evictions++;
+            }
+        }
+
+        const effectiveTtl = ttlMs ?? this.defaultTtlMs;
+        this.cache.set(key, {
+            value,
+            expiresAt: effectiveTtl > 0 ? Date.now() + effectiveTtl : 0,
+        });
+    }
+
+    public has(key: K): boolean {
+        const entry = this.cache.get(key);
+        if (!entry) {
+            return false;
+        }
+        if (entry.expiresAt !== 0 && Date.now() > entry.expiresAt) {
+            this.cache.delete(key);
+            this.expired++;
+            return false;
+        }
+        return true;
+    }
+
+    public delete(key: K): boolean {
+        return this.cache.delete(key);
+    }
+
+    public clear(): void {
+        this.cache.clear();
+    }
+
+    public get size(): number {
+        return this.cache.size;
+    }
+
+    /**
+     * Remove all entries whose TTL has expired. Returns the count removed.
+     *
+     * Scans every entry regardless of the default TTL so per-entry TTLs
+     * passed via set(key, value, ttlMs) are also cleaned up even when the
+     * cache was constructed with defaultTtlMs === 0.
+     */
+    public prune(): number {
+        const now = Date.now();
+        let removed = 0;
+        for (const [key, entry] of this.cache) {
+            if (entry.expiresAt !== 0 && now > entry.expiresAt) {
+                this.cache.delete(key);
+                removed++;
+            }
+        }
+        this.expired += removed;
+        return removed;
+    }
+
+    public stats(): LruCacheStats {
+        const total = this.hits + this.misses;
+        return {
+            size: this.cache.size,
+            maxSize: this.maxSize,
+            hits: this.hits,
+            misses: this.misses,
+            evictions: this.evictions,
+            expired: this.expired,
+            hitRate: total === 0 ? 0 : this.hits / total,
+        };
+    }
+}
diff --git a/src/utils/lru-memoize.ts b/src/utils/lru-memoize.ts
new file mode 100644
index 0000000..ea43aeb
--- /dev/null
+++ b/src/utils/lru-memoize.ts
@@ -0,0 +1,119 @@
+import { createHash } from 'crypto';
+import { LruCache, LruCacheStats } from './lru-cache.js';
+
+/**
+ * Wrap an async function with an LRU cache so repeated calls with the
+ * same arguments are served from memory — addresses issue #125's
+ * "store results of expensive operations" for smart_read, smart_grep,
+ * smart_glob, and edit-correction paths.
+ *
+ * Each wrapped function owns its own cache, but every cache is
+ * registered with the shared `memoRegistry` so the server can prune
+ * and log stats for all of them at once.
+ */
+
+export interface LruMemoizeOptions<Args extends readonly unknown[]> {
+    /** Identifier used in logs. */
+    name: string;
+    /** Max cached entries. */
+    maxSize: number;
+    /** Default per-entry TTL in ms. 0 disables expiration. */
+    ttlMs?: number;
+    /** Custom key function; defaults to sha256(JSON.stringify(args)). */
+    keyFn?: (args: Args) => string;
+}
+
+export interface RegisteredCache {
+    name: string;
+    cache: LruCache<string, unknown>;
+}
+
+class MemoRegistry {
+    private readonly caches = new Map<string, RegisteredCache>();
+
+    public register(entry: RegisteredCache): void {
+        this.caches.set(entry.name, entry);
+    }
+
+    /** Prune every registered cache and return total entries removed. */
+    public pruneAll(): number {
+        let total = 0;
+        for (const { cache } of this.caches.values()) {
+            total += cache.prune();
+        }
+        return total;
+    }
+
+    public stats(): Record<string, LruCacheStats> {
+        const out: Record<string, LruCacheStats> = {};
+        for (const [name, { cache }] of this.caches) {
+            out[name] = cache.stats();
+        }
+        return out;
+    }
+
+    public clearAll(): void {
+        for (const { cache } of this.caches.values()) {
+            cache.clear();
+        }
+    }
+}
+
+export const memoRegistry = new MemoRegistry();
+
+export function lruMemoize<Args extends readonly unknown[], R>(
+    fn: (...args: Args) => Promise<R>,
+    options: LruMemoizeOptions<Args>
+): (...args: Args) => Promise<R> {
+    // Wrap values in a tiny envelope so a legitimately-cached `undefined`
+    // can be distinguished from a cache miss.
+    type Envelope = { value: R };
+    const cache = new LruCache<string, Envelope>(options.maxSize, options.ttlMs ?? 0);
+
+    // Deduplicate concurrent calls for the same key so a stampede of
+    // requests while the first promise is still pending doesn't run the
+    // expensive function N times.
+    const inFlight = new Map<string, Promise<R>>();
+
+    memoRegistry.register({
+        name: options.name,
+        cache: cache as unknown as LruCache<string, unknown>,
+    });
+
+    const keyFn =
+        options.keyFn ??
+        ((args: Args): string => {
+            const serialized = JSON.stringify(args, (_, v) => {
+                // Tag bigints with a dedicated discriminator so
+                // `[1n]` and `["1"]` don't collapse to the same key.
+                if (typeof v === 'bigint') {
+                    return { __memo_bigint__: v.toString() };
+                }
+                return v;
+            });
+            return createHash('sha256').update(serialized).digest('hex');
+        });
+
+    return async (...args: Args): Promise<R> => {
+        const key = keyFn(args);
+        const hit = cache.get(key);
+        if (hit !== undefined) {
+            return hit.value;
+        }
+        const pending = inFlight.get(key);
+        if (pending) {
+            return pending;
+        }
+        const promise = (async () => {
+            try {
+                const value = await fn(...args);
+                cache.set(key, { value });
+                return value;
+            } finally {
+                inFlight.delete(key);
+            }
+        })();
+        inFlight.set(key, promise);
+        return promise;
+    };
+}
diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts
index b09ecb4..21e1cfb 100644
--- a/src/validation/tool-schemas.ts
+++ b/src/validation/tool-schemas.ts
@@ -23,6 +23,13 @@ export const GetCachedSchema = z.object({
 // 3. count_tokens
 export const CountTokensSchema = z.object({
   text: z.string().describe('Text to count tokens for'),
+  modelName: z
+    .string()
+    .optional()
+    .describe(
+      'Model name (e.g. gpt-4, claude-opus-4-7, gemini-2.5-flash). ' +
+        'Defaults to the server-configured model when omitted.'
+    ),
 });
 
 // 4. compress_text
@@ -413,6 +420,46 @@ export const ExportAnalyticsSchema = z.object({
     .describe('Optional filter by MCP server name'),
 });
 
+// 72. optimization_storage — discriminated union keyed on `operation` so
+// the zod validator rejects a `store` request missing the required
+// payload fields at validateToolArgs time, instead of after dispatch.
+export const OptimizationStorageSchema = z.discriminatedUnion('operation', [
+  z.object({
+    operation: z.literal('store'),
+    originalTextHash: z.string().min(1),
+    optimizedText: z.string(),
+    originalTokens: z.number().nonnegative(),
+    optimizedTokens: z.number().nonnegative(),
+    tokensSaved: z.number(),
+  }),
+  z.object({
+    operation: z.literal('retrieve'),
+    originalTextHash: z.string().min(1),
+  }),
+]);
+
+// 73. context_delta — discriminated on operation so compute-delta and
+// seed require currentContent at validation time rather than runtime.
+export const ContextDeltaSchema = z.discriminatedUnion('operation', [
+  z.object({
+    operation: z.literal('compute-delta'),
+    sessionId: z.string().min(1),
+    filePath: z.string().min(1),
+    currentContent: z.string(),
+  }),
+  z.object({
+    operation: z.literal('seed'),
+    sessionId: z.string().min(1),
+    filePath: z.string().min(1),
+    currentContent: z.string(),
+  }),
+  z.object({
+    operation: z.literal('clear'),
+    sessionId: z.string().min(1),
+    filePath: z.string().min(1),
+  }),
+]);
+
 // Map tool names to their schemas for easy lookup
 export const toolSchemaMap: Record<string, z.ZodType<any>> = {
   optimize_text: OptimizeTextSchema,
@@ -486,4 +533,6 @@ export const toolSchemaMap: Record<string, z.ZodType<any>> = {
   get_action_analytics: GetActionAnalyticsSchema,
   get_mcp_server_analytics: GetMcpServerAnalyticsSchema,
   export_analytics: ExportAnalyticsSchema,
+  optimization_storage: OptimizationStorageSchema,
+  context_delta: ContextDeltaSchema,
 };
diff --git a/tests/benchmarks/results.json b/tests/benchmarks/results.json
index b54d5be..dd72b7a 100644
--- a/tests/benchmarks/results.json
+++ b/tests/benchmarks/results.json
@@ -1,314 +1,314 @@
 [
   {
     "operation": "token-count-small",
-    "avgDuration": 0.21655369999999988,
-    "minDuration": 0.1389,
-    "maxDuration": 6.4655,
-    "p50": 0.1805,
-    "p90": 0.2406,
-    "p95": 0.2933,
-    "p99": 0.7547,
-    "throughput": 4617.792261226664,
-    "memoryUsed": 643496
+    "avgDuration": 0.3614560000000002,
+    "minDuration": 0.1311,
+    "maxDuration": 8.1673,
+    "p50": 0.1678,
+    "p90": 0.3182,
+    "p95": 1.7474,
+    "p99": 4.338,
+    "throughput": 2766.5884644327366,
+    "memoryUsed": 653224
   },
   {
     "operation": "token-count-medium",
-    "avgDuration": 1.3377186000000005,
-    "minDuration": 0.8123,
-    "maxDuration": 18.5582,
-    "p50": 1.0083,
-    "p90": 1.5469,
-    "p95": 3.0017,
-    "p99": 9.8616,
-    "throughput": 747.5413737986446,
-    "memoryUsed": 235800
+    "avgDuration": 1.9391590000000005,
+    "minDuration": 0.7915,
+    "maxDuration": 8.2937,
+    "p50": 1.1356,
+    "p90": 4.2806,
+    "p95": 5.0478,
+    "p99": 6.6412,
+    "throughput": 515.6874707025054,
+    "memoryUsed": 227808
   },
   {
     "operation": "token-count-large",
-    "avgDuration": 21.130661,
-    "minDuration": 15.5,
-    "maxDuration": 61.1893,
-    "p50": 19.1065,
-    "p90": 27.2373,
-    "p95": 29.4197,
-    "p99": 61.1893,
-    "throughput": 47.32459623482673,
-    "memoryUsed": 123872
+    "avgDuration": 30.857169999999996,
+    "minDuration": 16.9689,
+    "maxDuration": 62.7117,
+    "p50": 28.8884,
+    "p90": 44.6519,
+    "p95": 52.7573,
+    "p99": 62.7117,
+    "throughput": 32.40737890091671,
+    "memoryUsed": 163384
   },
   {
     "operation": "token-count-batch",
-    "avgDuration": 5.449602000000001,
-    "minDuration": 3.8459,
-    "maxDuration": 13.5479,
-    "p50": 4.8888,
-    "p90": 7.3654,
-    "p95": 9.2773,
-    "p99": 13.5479,
-    "throughput": 183.49963905620996,
-    "memoryUsed": 418480
+    "avgDuration": 8.504128,
+    "minDuration": 3.7843,
+    "maxDuration": 25.0622,
+    "p50": 6.9115,
+    "p90": 14.5447,
+    "p95": 17.9337,
+    "p99": 25.0622,
+    "throughput": 117.58995160938312,
+    "memoryUsed": 418424
   },
   {
     "operation": "token-estimate",
-    "avgDuration": 0.0012522999999999961,
-    "minDuration": 0.001,
-    "maxDuration": 0.0564,
-    "p50": 0.0012,
-    "p90": 0.0013,
-    "p95": 0.0014,
-    "p99": 0.0018,
-    "throughput": 798530.7035055521,
-    "memoryUsed": 149920
+    "avgDuration": 0.0014780999999999996,
+    "minDuration": 0.0011,
+    "maxDuration": 0.0561,
+    "p50": 0.0014,
+    "p90": 0.0016,
+    "p95": 0.0017,
+    "p99": 0.0021,
+    "throughput": 676544.2121642651,
+    "memoryUsed": 149912
   },
   {
     "operation": "compress-small",
-    "avgDuration": 0.9627775000000005,
-    "minDuration": 0.426,
-    "maxDuration": 28.6613,
-    "p50": 0.5542,
-    "p90": 0.8703,
-    "p95": 1.9429,
-    "p99": 13.3281,
-    "throughput": 1038.661580687126,
-    "memoryUsed": -2487288
+    "avgDuration": 1.2691879999999993,
+    "minDuration": 0.4314,
+    "maxDuration": 24.3388,
+    "p50": 0.5745,
+    "p90": 3.0667,
+    "p95": 4.4366,
+    "p99": 9.2609,
+    "throughput": 787.9053379010835,
+    "memoryUsed": -4992648
   },
   {
     "operation": "compress-medium",
-    "avgDuration": 0.7440725999999998,
-    "minDuration": 0.4942,
-    "maxDuration": 14.3456,
-    "p50": 0.6042,
-    "p90": 0.8371,
-    "p95": 1.1362,
-    "p99": 4.4296,
-    "throughput": 1343.9548775213605,
-    "memoryUsed": -1654048
+    "avgDuration": 1.5216334000000002,
+    "minDuration": 0.5063,
+    "maxDuration": 13.7982,
+    "p50": 0.6649,
+    "p90": 3.5498,
+    "p95": 6.0332,
+    "p99": 9.4093,
+    "throughput": 657.1885186011294,
+    "memoryUsed": -1948624
   },
   {
     "operation": "compress-large",
-    "avgDuration": 2.7067430000000003,
-    "minDuration": 1.9602,
-    "maxDuration": 12.7897,
-    "p50": 2.2941,
-    "p90": 3.2423,
-    "p95": 4.5883,
-    "p99": 12.7897,
-    "throughput": 369.4477089254503,
-    "memoryUsed": 337712
+    "avgDuration": 4.812509,
+    "minDuration": 2.2623,
+    "maxDuration": 13.9499,
+    "p50": 4.4611,
+    "p90": 7.8466,
+    "p95": 9.0442,
+    "p99": 13.9499,
+    "throughput": 207.79181919451992,
+    "memoryUsed": 359296
   },
   {
     "operation": "decompress",
-    "avgDuration": 0.031263400000000004,
-    "minDuration": 0.018,
-    "maxDuration": 2.1778,
-    "p50": 0.0243,
-    "p90": 0.0363,
-    "p95": 0.0642,
-    "p99": 0.0877,
-    "throughput": 31986.28428130018,
-    "memoryUsed": 3302040
+    "avgDuration": 0.05447509999999999,
+    "minDuration": 0.0181,
+    "maxDuration": 3.3958,
+    "p50": 0.0238,
+    "p90": 0.0394,
+    "p95": 0.0754,
+    "p99": 2.0722,
+    "throughput": 18357.01081778648,
+    "memoryUsed": 3294192
   },
   {
     "operation": "compress-base64",
-    "avgDuration": 0.9150339999999996,
-    "minDuration": 0.5028,
-    "maxDuration": 16.1137,
-    "p50": 0.6502,
-    "p90": 0.9426,
-    "p95": 1.6786,
-    "p99": 9.5861,
-    "throughput": 1092.855566022684,
-    "memoryUsed": -4893280
+    "avgDuration": 1.3982006000000007,
+    "minDuration": 0.5093,
+    "maxDuration": 17.5377,
+    "p50": 0.6728,
+    "p90": 3.5295,
+    "p95": 4.8434,
+    "p99": 9.593,
+    "throughput": 715.2049569997321,
+    "memoryUsed": -4899848
   },
   {
     "operation": "compress-quality-1",
-    "avgDuration": 0.0402535,
-    "minDuration": 0.0182,
-    "maxDuration": 1.2449,
-    "p50": 0.025,
-    "p90": 0.0459,
-    "p95": 0.0879,
-    "p99": 0.3411,
-    "throughput": 24842.56027426187,
-    "memoryUsed": 568648
+    "avgDuration": 0.08870700000000004,
+    "minDuration": 0.0176,
+    "maxDuration": 4.9948,
+    "p50": 0.0227,
+    "p90": 0.0636,
+    "p95": 0.0849,
+    "p99": 3.4525,
+    "throughput": 11273.06751440134,
+    "memoryUsed": 575952
   },
   {
     "operation": "compress-quality-11",
-    "avgDuration": 1.4276415,
-    "minDuration": 0.7075,
-    "maxDuration": 19.9989,
-    "p50": 0.807,
-    "p90": 1.2711,
-    "p95": 6.5825,
-    "p99": 18.9334,
-    "throughput": 700.4559618083391,
-    "memoryUsed": 560544
+    "avgDuration": 1.7096004999999996,
+    "minDuration": 0.6727,
+    "maxDuration": 8.1428,
+    "p50": 0.9267,
+    "p90": 3.7587,
+    "p95": 5.583,
+    "p99": 7.3277,
+    "throughput": 584.9319767980883,
+    "memoryUsed": 566880
   },
   {
     "operation": "cache-write",
-    "avgDuration": 0.28219179999999994,
-    "minDuration": 0.1069,
-    "maxDuration": 16.9523,
-    "p50": 0.1543,
-    "p90": 0.2669,
-    "p95": 0.3756,
-    "p99": 2.9096,
-    "throughput": 3543.689079555112,
-    "memoryUsed": 646560
+    "avgDuration": 0.6586279000000003,
+    "minDuration": 0.1024,
+    "maxDuration": 14.5587,
+    "p50": 0.1522,
+    "p90": 1.0068,
+    "p95": 4.7546,
+    "p99": 7.3433,
+    "throughput": 1518.3079854345672,
+    "memoryUsed": 640856
   },
   {
     "operation": "cache-read-memory",
-    "avgDuration": 0.25163070000000015,
-    "minDuration": 0.0793,
-    "maxDuration": 28.8506,
-    "p50": 0.1203,
-    "p90": 0.1493,
-    "p95": 0.2349,
-    "p99": 4.1812,
-    "throughput": 3974.077884773199,
-    "memoryUsed": 478016
+    "avgDuration": 0.4997243,
+    "minDuration": 0.079,
+    "maxDuration": 15.3762,
+    "p50": 0.1012,
+    "p90": 0.2858,
+    "p95": 4.201,
+    "p99": 8.698,
+    "throughput": 2001.1034084194023,
+    "memoryUsed": 478008
   },
   {
     "operation": "cache-read-disk",
-    "avgDuration": 0.45986899999999997,
-    "minDuration": 0.0884,
-    "maxDuration": 39.4532,
-    "p50": 0.1262,
-    "p90": 0.1936,
-    "p95": 0.2933,
-    "p99": 19.9866,
-    "throughput": 2174.532312462897,
-    "memoryUsed": 297264
+    "avgDuration": 0.47186479999999986,
+    "minDuration": 0.0742,
+    "maxDuration": 12.5767,
+    "p50": 0.1039,
+    "p90": 0.2734,
+    "p95": 4.0729,
+    "p99": 8.0254,
+    "throughput": 2119.2511075206294,
+    "memoryUsed": 297344
   },
   {
     "operation": "cache-delete",
-    "avgDuration": 0.5386263000000001,
-    "minDuration": 0.0733,
-    "maxDuration": 99.0395,
-    "p50": 0.1153,
-    "p90": 0.1729,
-    "p95": 0.2314,
-    "p99": 5.5755,
-    "throughput": 1856.5747717851873,
-    "memoryUsed": 444360
+    "avgDuration": 0.37161569999999977,
+    "minDuration": 0.0764,
+    "maxDuration": 18.4903,
+    "p50": 0.1064,
+    "p90": 0.2381,
+    "p95": 1.8399,
+    "p99": 5.8565,
+    "throughput": 2690.951970005575,
+    "memoryUsed": 442720
   },
   {
     "operation": "cache-stats",
-    "avgDuration": 0.32004450000000007,
-    "minDuration": 0.1652,
-    "maxDuration": 48.5593,
-    "p50": 0.2046,
-    "p90": 0.2563,
-    "p95": 0.3068,
-    "p99": 2.1103,
-    "throughput": 3124.5654901115304,
-    "memoryUsed": 765656
+    "avgDuration": 0.6591104,
+    "minDuration": 0.1651,
+    "maxDuration": 10.2148,
+    "p50": 0.197,
+    "p90": 1.7847,
+    "p95": 4.4389,
+    "p99": 7.1797,
+    "throughput": 1517.1965121472822,
+    "memoryUsed": 770976
   },
   {
     "operation": "metrics-record",
-    "avgDuration": 0.0027574999999999657,
-    "minDuration": 0.0019,
-    "maxDuration": 0.1329,
-    "p50": 0.0025,
-    "p90": 0.003,
-    "p95": 0.0033,
-    "p99": 0.0056,
-    "throughput": 362647.32547597913,
-    "memoryUsed": 459872
+    "avgDuration": 0.01021149999999998,
+    "minDuration": 0.0017,
+    "maxDuration": 4.0691,
+    "p50": 0.0022,
+    "p90": 0.0028,
+    "p95": 0.003,
+    "p99": 0.0075,
+    "throughput": 97928.80575821397,
+    "memoryUsed": 459840
   },
   {
     "operation": "metrics-cache-stats",
-    "avgDuration": 0.5644068000000001,
-    "minDuration": 0.0897,
-    "maxDuration": 113.9726,
-    "p50": 0.1075,
-    "p90": 0.1357,
-    "p95": 0.1805,
-    "p99": 18.041,
-    "throughput": 1771.7717079241424,
-    "memoryUsed": -5707968
+    "avgDuration": 0.5884749999999999,
+    "minDuration": 0.0814,
+    "maxDuration": 101.3263,
+    "p50": 0.0959,
+    "p90": 0.146,
+    "p95": 2.1334,
+    "p99": 8.5735,
+    "throughput": 1699.3075321806366,
+    "memoryUsed": -5767600
   },
   {
     "operation": "metrics-breakdown",
-    "avgDuration": 2.7876345,
-    "minDuration": 0.6644,
-    "maxDuration": 53.726,
-    "p50": 0.8498,
-    "p90": 2.1808,
-    "p95": 24.6884,
-    "p99": 40.5231,
-    "throughput": 358.7270856347918,
-    "memoryUsed": 4166392
+    "avgDuration": 3.030816000000001,
+    "minDuration": 0.6429,
+    "maxDuration": 14.331,
+    "p50": 0.9025,
+    "p90": 8.4685,
+    "p95": 10.3432,
+    "p99": 12.5062,
+    "throughput": 329.94414705478647,
+    "memoryUsed": 3432896
   },
   {
     "operation": "metrics-percentiles",
-    "avgDuration": 0.19757700000000003,
-    "minDuration": 0.0665,
-    "maxDuration": 22.145,
-    "p50": 0.0782,
-    "p90": 0.1113,
-    "p95": 0.1406,
-    "p99": 0.3571,
-    "throughput": 5061.3178659459345,
-    "memoryUsed": 6890256
+    "avgDuration": 0.2575195,
+    "minDuration": 0.0656,
+    "maxDuration": 5.1485,
+    "p50": 0.0749,
+    "p90": 0.1152,
+    "p95": 0.2302,
+    "p99": 4.9353,
+    "throughput": 3883.201077976619,
+    "memoryUsed": 6877416
   },
   {
     "operation": "e2e-optimization",
-    "avgDuration": 4.089156000000001,
-    "minDuration": 1.7855,
-    "maxDuration": 36.6479,
-    "p50": 2.4161,
-    "p90": 7.4475,
-    "p95": 13.5108,
-    "p99": 36.6479,
-    "throughput": 244.54924194625977,
-    "memoryUsed": 843760
+    "avgDuration": 10.268875999999999,
+    "minDuration": 2.0887,
+    "maxDuration": 20.2826,
+    "p50": 9.5857,
+    "p90": 16.0762,
+    "p95": 18.8724,
+    "p99": 20.2826,
+    "throughput": 97.38164137925126,
+    "memoryUsed": 848648
   },
   {
     "operation": "e2e-cache-hit",
-    "avgDuration": 0.5933224999999999,
-    "minDuration": 0.1019,
-    "maxDuration": 55.8155,
-    "p50": 0.1439,
-    "p90": 0.2178,
-    "p95": 0.3251,
-    "p99": 21.812,
-    "throughput": 1685.424031618555,
-    "memoryUsed": -14126360
+    "avgDuration": 0.5115652000000002,
+    "minDuration": 0.0905,
+    "maxDuration": 18.1006,
+    "p50": 0.1224,
+    "p90": 0.3575,
+    "p95": 3.4573,
+    "p99": 8.1231,
+    "throughput": 1954.785040108279,
+    "memoryUsed": 2419984
   },
   {
     "operation": "regression-token-count",
-    "avgDuration": 1.0176798,
-    "minDuration": 0.4497,
-    "maxDuration": 19.4484,
-    "p50": 0.5622,
-    "p90": 0.7359,
-    "p95": 3.6875,
-    "p99": 15.1693,
-    "throughput": 982.6273450647246,
-    "memoryUsed": 263416
+    "avgDuration": 1.9268184000000008,
+    "minDuration": 0.455,
+    "maxDuration": 17.2354,
+    "p50": 0.6255,
+    "p90": 5.6609,
+    "p95": 6.9407,
+    "p99": 12.8987,
+    "throughput": 518.9902691400496,
+    "memoryUsed": 263464
   },
   {
     "operation": "regression-compress",
-    "avgDuration": 1.1226146666666665,
-    "minDuration": 0.7638,
-    "maxDuration": 28.5129,
-    "p50": 0.8377,
-    "p90": 1.0199,
-    "p95": 1.281,
-    "p99": 10.5418,
-    "throughput": 890.7776013378293,
-    "memoryUsed": -5355728
+    "avgDuration": 5.232439333333333,
+    "minDuration": 0.7827,
+    "maxDuration": 61.2683,
+    "p50": 2.3238,
+    "p90": 12.9448,
+    "p95": 14.6136,
+    "p99": 17.48,
+    "throughput": 191.11545042280474,
+    "memoryUsed": -4833328
   },
   {
     "operation": "regression-cache",
-    "avgDuration": 0.4148695999999999,
-    "minDuration": 0.1768,
-    "maxDuration": 17.4546,
-    "p50": 0.2465,
-    "p90": 0.3658,
-    "p95": 0.5651,
-    "p99": 5.4624,
-    "throughput": 2410.395941278899,
-    "memoryUsed": -447896
+    "avgDuration": 2.2698532000000005,
+    "minDuration": 0.1812,
+    "maxDuration": 44.9064,
+    "p50": 0.2844,
+    "p90": 10.6146,
+    "p95": 13.1184,
+    "p99": 19.7883,
+    "throughput": 440.5571250158379,
+    "memoryUsed": -467608
   }
 ]
\ No newline at end of file
diff --git a/tests/unit/cache-engine.test.ts b/tests/unit/cache-engine.test.ts
index 8a99374..f8cabc2 100644
--- a/tests/unit/cache-engine.test.ts
+++ b/tests/unit/cache-engine.test.ts
@@ -48,7 +48,7 @@ describe('CacheEngine', () => {
     cache = new CacheEngine(testDbPath, 100);
   });
 
-  afterEach(() => {
+  afterEach(async () => {
     // Restore original environment variable
     if (originalEnv !== undefined) {
       process.env.TOKEN_OPTIMIZER_CACHE_DIR = originalEnv;
@@ -58,6 +58,7 @@ describe('CacheEngine', () => {
 
     // Clean up
     cache.close();
+    await new Promise(resolve => setTimeout(resolve, 100)); // Add a small delay
     if (fs.existsSync(testDbPath)) {
       fs.unlinkSync(testDbPath);
     }
diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts
new file mode 100644
index 0000000..a767d89
--- /dev/null
+++ b/tests/unit/config.test.ts
@@ -0,0 +1,94 @@
+import { describe, it, expect, afterEach } from '@jest/globals';
+import { mkdtempSync, writeFileSync, existsSync, rmSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
+import { ConfigManager } from '../../src/core/config.js';
+
+describe('ConfigManager', () => {
+  const tempDirs: string[] = [];
+
+  afterEach(() => {
+    while (tempDirs.length) {
+      const dir = tempDirs.pop();
+      if (dir) {
+        rmSync(dir, { recursive: true, force: true });
+      }
+    }
+  });
+
+  function tempConfigPath(): string {
+    const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-config-'));
+    tempDirs.push(dir);
+    return join(dir, 'config.json');
+  }
+
+  function writeConfig(content: string): string {
+    const file = tempConfigPath();
+    writeFileSync(file, content);
+    return file;
+  }
+
+  it('returns defaults when no config file exists and writeDefaults is false', () => {
+    const mgr = new ConfigManager(tempConfigPath(), { writeDefaults: false });
+    const opt = mgr.getOptimizationConfig();
+    expect(opt.compressionTokenThreshold).toBe(0.7);
+    expect(opt.quality).toBe('balanced');
+    expect(opt.cacheSettings.maxSize).toBe(1000);
+    expect(opt.cacheSettings.ttlSeconds).toBe(3600);
+    expect(opt.chatCompression.enabled).toBe(true);
+    expect(opt.chatCompression.strategy).toBe('summarize');
+    expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000);
+  });
+
+  it('writes a default config file on first run', () => {
+    const file = tempConfigPath();
+    expect(existsSync(file)).toBe(false);
+    new ConfigManager(file);
+    expect(existsSync(file)).toBe(true);
+
+    // A second instance reads what the first wrote.
+    const second = new ConfigManager(file);
+    expect(second.getOptimizationConfig().quality).toBe('balanced');
+  });
+
+  it('overrides defaults with user config — nested sub-objects deep-merge', () => {
+    const configPath = writeConfig(
+      JSON.stringify({
+        optimization: {
+          compressionTokenThreshold: 0.9,
+          quality: 'max',
+          cacheSettings: { maxSize: 42 },
+          chatCompression: { strategy: 'truncate' },
+          modelTokenLimits: { 'custom-model': 500000 },
+        },
+      })
+    );
+    const mgr = new ConfigManager(configPath, { writeDefaults: false });
+    const opt = mgr.getOptimizationConfig();
+    expect(opt.compressionTokenThreshold).toBe(0.9);
+    expect(opt.quality).toBe('max');
+    expect(opt.cacheSettings.maxSize).toBe(42);
+    // Unprovided sub-field retains default.
+    expect(opt.cacheSettings.ttlSeconds).toBe(3600);
+    expect(opt.chatCompression.enabled).toBe(true);
+    expect(opt.chatCompression.strategy).toBe('truncate');
+    expect(mgr.getModelTokenLimit('custom-model')).toBe(500000);
+    // Built-in model limits must survive a partial override.
+    expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000);
+    expect(opt.compressionPreserveThreshold).toBe(0.3);
+  });
+
+  it('falls back to defaults on invalid config', () => {
+    const configPath = writeConfig(
+      JSON.stringify({ optimization: { compressionTokenThreshold: 5 } })
+    );
+    const mgr = new ConfigManager(configPath, { writeDefaults: false });
+    expect(mgr.getOptimizationConfig().compressionTokenThreshold).toBe(0.7);
+  });
+
+  it('falls back to defaults on malformed JSON', () => {
+    const configPath = writeConfig('not json at all');
+    const mgr = new ConfigManager(configPath, { writeDefaults: false });
+    expect(mgr.getOptimizationConfig().quality).toBe('balanced');
+  });
+});
diff --git a/tests/unit/diff.test.ts b/tests/unit/diff.test.ts
new file mode 100644
index 0000000..0780b1f
--- /dev/null
+++ b/tests/unit/diff.test.ts
@@ -0,0 +1,33 @@
+import { describe, it, expect } from '@jest/globals';
+import { calculateDelta, applyDelta } from '../../src/utils/diff.js';
+
+describe('diff utils', () => {
+  it('returns empty delta when inputs are identical', () => {
+    expect(calculateDelta('hello', 'hello')).toBe('');
+  });
+
+  it('round-trips a simple change', () => {
+    const prev = 'line1\nline2\nline3\n';
+    const next = 'line1\nline2 changed\nline3\n';
+    const delta = calculateDelta(prev, next);
+    expect(delta).not.toBe('');
+    expect(applyDelta(prev, delta)).toBe(next);
+  });
+
+  it('applyDelta on an empty delta is a no-op', () => {
+    expect(applyDelta('anything', '')).toBe('anything');
+  });
+
+  it('produces a meaningfully smaller delta than the full content for small edits', () => {
+    const prev = 'a\n'.repeat(500);
+    const next = prev + 'appended line\n';
+    const delta = calculateDelta(prev, next);
+    expect(delta.length).toBeLessThan(next.length);
+  });
+
+  it('throws when the patch targets a different baseline than supplied', () => {
+    const patch = calculateDelta('original\ntext\n', 'original\nchanged\n');
+    // Applying the patch against completely different content fails.
+    expect(() => applyDelta('totally different input\n', patch)).toThrow();
+  });
+});
diff --git a/tests/unit/gzip.test.ts b/tests/unit/gzip.test.ts
new file mode 100644
index 0000000..bbb444c
--- /dev/null
+++ b/tests/unit/gzip.test.ts
@@ -0,0 +1,73 @@
+import { describe, it, expect, afterEach } from '@jest/globals';
+import { mkdtempSync, existsSync, writeFileSync, rmSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
+import {
+    gzipString,
+    gunzipBuffer,
+    saveGzippedFile,
+    loadMaybeGzippedFile,
+} from '../../src/utils/gzip.js';
+
+describe('gzip utils', () => {
+    const tempDirs: string[] = [];
+    afterEach(() => {
+        while (tempDirs.length) {
+            const dir = tempDirs.pop();
+            if (dir) {
+                rmSync(dir, { recursive: true, force: true });
+            }
+        }
+    });
+
+    function tempDir(): string {
+        const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-gzip-'));
+        tempDirs.push(dir);
+        return dir;
+    }
+
+    it('gzipString round-trips via gunzipBuffer', () => {
+        const text = 'Hello, world. '.repeat(1000);
+        const buffer = gzipString(text);
+        expect(buffer.length).toBeLessThan(text.length);
+        expect(gunzipBuffer(buffer)).toBe(text);
+    });
+
+    it('saveGzippedFile writes .gz and removes plaintext', () => {
+        const dir = tempDir();
+        const file = join(dir, 'sessions.json');
+        writeFileSync(file, 'stale plaintext');
+        const stats = saveGzippedFile(file, JSON.stringify({ hello: 'world' }));
+        expect(existsSync(`${file}.gz`)).toBe(true);
+        expect(existsSync(file)).toBe(false);
+        expect(stats.originalBytes).toBeGreaterThan(0);
+        expect(stats.compressedBytes).toBeGreaterThan(0);
+    });
+
+    it('loadMaybeGzippedFile prefers the .gz sibling', () => {
+        const dir = tempDir();
+        const file = join(dir, 'state.json');
+        saveGzippedFile(file, '{"compressed":true}');
+        expect(loadMaybeGzippedFile(file)).toBe('{"compressed":true}');
+    });
+
+    it('loadMaybeGzippedFile falls back to plaintext when no .gz exists', () => {
+        const dir = tempDir();
+        const file = join(dir, 'legacy.json');
+        writeFileSync(file, '{"legacy":true}');
+        expect(loadMaybeGzippedFile(file)).toBe('{"legacy":true}');
+    });
+
+    it('loadMaybeGzippedFile returns null when neither exists', () => {
+        const dir = tempDir();
+        const file = join(dir, 'missing.json');
+        expect(loadMaybeGzippedFile(file)).toBeNull();
+    });
+
+    it('saves with high compression ratio on repetitive content', () => {
+        const dir = tempDir();
+        const file = join(dir, 'repeated.txt');
+        const stats = saveGzippedFile(file, 'aa'.repeat(10_000));
+        expect(stats.percentSaved).toBeGreaterThan(95);
+    });
+});
diff --git a/tests/unit/lru-cache.test.ts b/tests/unit/lru-cache.test.ts
new file mode 100644
index 0000000..0063e2c
--- /dev/null
+++ b/tests/unit/lru-cache.test.ts
@@ -0,0 +1,88 @@
+import { describe, it, expect } from '@jest/globals';
+import { LruCache } from '../../src/utils/lru-cache.js';
+
+describe('LruCache', () => {
+  it('rejects non-positive maxSize', () => {
+    expect(() => new LruCache<string, number>(0)).toThrow();
+    expect(() => new LruCache<string, number>(-1)).toThrow();
+  });
+
+  it('get returns undefined on miss and counts it', () => {
+    const cache = new LruCache<string, number>(2);
+    expect(cache.get('x')).toBeUndefined();
+    expect(cache.stats().misses).toBe(1);
+  });
+
+  it('set/get round-trips and counts hits', () => {
+    const cache = new LruCache<string, number>(2);
+    cache.set('a', 1);
+    expect(cache.get('a')).toBe(1);
+    expect(cache.stats().hits).toBe(1);
+  });
+
+  it('evicts the least recently used entry when full', () => {
+    const cache = new LruCache<string, number>(2);
+    cache.set('a', 1);
+    cache.set('b', 2);
+    cache.get('a');
+    cache.set('c', 3);
+
+    expect(cache.get('a')).toBe(1);
+    expect(cache.get('b')).toBeUndefined();
+    expect(cache.get('c')).toBe(3);
+    expect(cache.stats().evictions).toBe(1);
+  });
+
+  it('refreshes recency on get', () => {
+    const cache = new LruCache<string, number>(2);
+    cache.set('a', 1);
+    cache.set('b', 2);
+    cache.get('a');
+    cache.set('c', 3);
+
+    expect(cache.has('b')).toBe(false);
+    expect(cache.has('a')).toBe(true);
+  });
+
+  it('expires entries past the TTL', async () => {
+    const cache = new LruCache<string, number>(2, 20);
+    cache.set('a', 1);
+    await new Promise((r) => setTimeout(r, 30));
+    expect(cache.get('a')).toBeUndefined();
+    expect(cache.stats().expired).toBe(1);
+  });
+
+  it('prune removes expired entries', async () => {
+    const cache = new LruCache<string, number>(4, 20);
+    cache.set('a', 1);
+    cache.set('b', 2);
+    await new Promise((r) => setTimeout(r, 30));
+    cache.set('c', 3);
+    const removed = cache.prune();
+    expect(removed).toBe(2);
+    expect(cache.size).toBe(1);
+  });
+
+  it('prune removes per-entry TTL expirations even when defaultTtlMs is 0', async () => {
+    const cache = new LruCache<string, number>(4, 0);
+    cache.set('short', 1, 20);
+    cache.set('forever', 2);
+    await new Promise((r) => setTimeout(r, 30));
+    const removed = cache.prune();
+    expect(removed).toBe(1);
+    expect(cache.has('forever')).toBe(true);
+    expect(cache.has('short')).toBe(false);
+  });
+
+  it('stats.hitRate reflects hits / total', () => {
+    const cache = new LruCache<string, number>(2);
+    cache.set('a', 1);
+    cache.get('a');
+    cache.get('a');
+    cache.get('missing');
+    const stats = cache.stats();
+    expect(stats.hits).toBe(2);
+    expect(stats.misses).toBe(1);
+    expect(stats.hitRate).toBeCloseTo(2 / 3);
+  });
+});
diff --git a/tests/unit/lru-memoize.test.ts b/tests/unit/lru-memoize.test.ts
new file mode 100644
index 0000000..b0dae36
--- /dev/null
+++ b/tests/unit/lru-memoize.test.ts
@@ -0,0 +1,113 @@
+import { describe, it, expect } from '@jest/globals';
+import { lruMemoize, memoRegistry } from '../../src/utils/lru-memoize.js';
+
+describe('lruMemoize', () => {
+  it('returns cached value for identical args', async () => {
+    let calls = 0;
+    const fn = async (x: number) => {
+      calls++;
+      return x * 2;
+    };
+    const memo = lruMemoize(fn, { name: 'test-double', maxSize: 10 });
+    expect(await memo(3)).toBe(6);
+    expect(await memo(3)).toBe(6);
+    expect(calls).toBe(1);
+  });
+
+  it('differentiates calls by args', async () => {
+    let calls = 0;
+    const fn = async (x: number) => {
+      calls++;
+      return x * 2;
+    };
+    const memo = lruMemoize(fn, { name: 'test-by-args', maxSize: 10 });
+    await memo(1);
+    await memo(2);
+    await memo(1);
+    expect(calls).toBe(2);
+  });
+
+  it('expires entries past the TTL', async () => {
+    let calls = 0;
+    const fn = async (x: number) => {
+      calls++;
+      return x;
+    };
+    const memo = lruMemoize(fn, { name: 'test-ttl', maxSize: 10, ttlMs: 20 });
+    await memo(7);
+    await memo(7);
+    expect(calls).toBe(1);
+    await new Promise((r) => setTimeout(r, 30));
+    await memo(7);
+    expect(calls).toBe(2);
+  });
+
+  it('registers with memoRegistry for bulk prune / stats', async () => {
+    const fn = async (x: string) => x.toUpperCase();
+    lruMemoize(fn, { name: 'test-registered', maxSize: 5 });
+    const stats = memoRegistry.stats();
+    expect(stats['test-registered']).toBeDefined();
+    expect(stats['test-registered'].size).toBe(0);
+  });
+
+  it('accepts a custom key function', async () => {
+    let calls = 0;
+    const fn = async (obj: { id: string; ignore: number }) => {
+      calls++;
+      return obj.id;
+    };
+    const memo = lruMemoize(fn, {
+      name: 'test-custom-key',
+      maxSize: 5,
+      keyFn: ([{ id }]) => id,
+    });
+    await memo({ id: 'a', ignore: 1 });
+    await memo({ id: 'a', ignore: 9999 }); // same id → hit
+    await memo({ id: 'b', ignore: 1 }); // different id → miss
+    expect(calls).toBe(2);
+  });
+
+  it('deduplicates concurrent calls for the same args', async () => {
+    let calls = 0;
+    const fn = async (x: number) => {
+      calls++;
+      await new Promise((r) => setTimeout(r, 20));
+      return x * 2;
+    };
+    const memo = lruMemoize(fn, { name: 'test-concurrent', maxSize: 10 });
+    const [a, b] = await Promise.all([memo(5), memo(5)]);
+    expect(a).toBe(10);
+    expect(b).toBe(10);
+    // Stampede collapsed into a single invocation.
+    expect(calls).toBe(1);
+  });
+
+  it('memoizes a legitimately-undefined return value', async () => {
+    let calls = 0;
+    const fn = async (): Promise<undefined> => {
+      calls++;
+      return undefined;
+    };
+    const memo = lruMemoize(fn, { name: 'test-undefined', maxSize: 10 });
+    expect(await memo()).toBeUndefined();
+    expect(await memo()).toBeUndefined();
+    // Without envelope-style storage, the second call would re-run fn.
+    expect(calls).toBe(1);
+  });
+
+  it('distinguishes bigint args from string args in the default key', async () => {
+    let calls = 0;
+    const fn = async (x: unknown) => {
+      calls++;
+      return String(x);
+    };
+    const memo = lruMemoize(fn as (x: unknown) => Promise<string>, {
+      name: 'test-bigint-collision',
+      maxSize: 10,
+    });
+    expect(await memo(1n)).toBe('1');
+    expect(await memo('1')).toBe('1');
+    // Two distinct args ⇒ two distinct cache keys ⇒ two invocations.
+    expect(calls).toBe(2);
+  });
+});
diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts
new file mode 100644
index 0000000..ffe6c6e
--- /dev/null
+++ b/tests/unit/session.test.ts
@@ -0,0 +1,98 @@
+import { describe, it, expect } from '@jest/globals';
+import { Session } from '../../src/core/session.js';
+import { SessionManager } from '../../src/core/session-manager.js';
+import { HeuristicTokenizer } from '../../src/core/tokenizers/heuristic-tokenizer.js';
+
+describe('Session', () => {
+  it('appends messages and tracks updatedAt', async () => {
+    const session = new Session({ allowCharHeuristic: true });
+    const before = session.updatedAt;
+    await new Promise((r) => setTimeout(r, 5));
+    session.addMessage('user', 'hi');
+    expect(session.getHistory().length).toBe(1);
+    expect(session.updatedAt).toBeGreaterThan(before);
+  });
+
+  it('compressHistory is a no-op under the budget', async () => {
+    const session = new Session({
+      maxTokens: 10_000,
+      allowCharHeuristic: true,
+    });
+    session.addMessage('user', 'short');
+    const before = session.getHistory().length;
+    await session.compressHistory();
+    expect(session.getHistory().length).toBe(before);
+  });
+
+  it('getHistoryTokenCount throws without a tokenizer when heuristic is off', async () => {
+    const session = new Session();
+    session.addMessage('user', 'hi');
+    await expect(session.getHistoryTokenCount()).rejects.toThrow(
+      /requires a tokenizer/
+    );
+  });
+
+  it('clearFileContent removes the entry', () => {
+    const session = new Session();
+    session.setFileContent('a.ts', 'const x = 1;');
+    session.clearFileContent('a.ts');
+    expect(session.getFileContent('a.ts')).toBeUndefined();
+  });
+
+  it('compressHistory summarizes head when over budget', async () => {
+    const tokenizer = new HeuristicTokenizer();
+    const session = new Session({ maxTokens: 50, tokenizer });
+    // Each long message is several hundred chars → easily over 50 tokens.
+    for (let i = 0; i < 10; i++) {
+      session.addMessage('user', 'a'.repeat(400) + ` turn=${i}`);
+    }
+    expect((await session.getHistoryTokenCount()) > 50).toBe(true);
+    await session.compressHistory();
+    const history = session.getHistory();
+    // Summary is stored as `assistant` (never `system`) so that
+    // user-derived text can't be elevated into system-role context.
+    expect(history[0].role).toBe('assistant');
+    expect(history[0].content.startsWith('[summary')).toBe(true);
+    expect(history.length).toBeLessThan(10);
+  });
+
+  it('snapshot round-trips and preserves createdAt / updatedAt', () => {
+    const session = new Session({ maxTokens: 42 });
+    session.addMessage('user', 'hello');
+    session.setFileContent('a.ts', 'const x = 1;');
+    const snapshot = session.toSnapshot();
+    const restored = Session.fromSnapshot(snapshot);
+    expect(restored.id).toBe(session.id);
+    expect(restored.maxTokens).toBe(42);
+    expect(restored.getFileContent('a.ts')).toBe('const x = 1;');
+    expect(restored.getHistory()[0].content).toBe('hello');
+    expect(restored.createdAt).toBe(snapshot.createdAt);
+    expect(restored.updatedAt).toBe(snapshot.updatedAt);
+  });
+});
+
+describe('SessionManager', () => {
+  it('create/get/delete lifecycle', () => {
+    const manager = new SessionManager();
+    const session = manager.createSession();
+    expect(manager.getSession(session.id)).toBe(session);
+    expect(manager.deleteSession(session.id)).toBe(true);
+    expect(manager.getSession(session.id)).toBeUndefined();
+  });
+
+  it('addMessage auto-compresses when over budget', async () => {
+    const tokenizer = new HeuristicTokenizer();
+    const manager = new SessionManager({ tokenizer, defaultMaxTokens: 30 });
+    const session = manager.createSession();
+    for (let i = 0; i < 8; i++) {
+      await manager.addMessage(session.id, 'user', 'x'.repeat(300));
+    }
+    const history = session.getHistory();
+    expect(history[0].content.startsWith('[summary')).toBe(true);
+  });
+
+  it('throws for unknown session ids', async () => {
+    const manager = new SessionManager();
+    await expect(manager.addMessage('bogus', 'user', 'hi')).rejects.toThrow();
+  });
+});
diff --git a/tests/unit/summarization.test.ts b/tests/unit/summarization.test.ts
new file mode 100644
index 0000000..a060ee7
--- /dev/null
+++ b/tests/unit/summarization.test.ts
@@ -0,0 +1,102 @@
+import { describe, it, expect, beforeEach, afterEach } from '@jest/globals';
+import {
+    TruncatingSummarizer,
+    AnthropicSummarizer,
+    GoogleAISummarizer,
+    createSummarizerFromEnv,
+} from '../../src/core/summarization.js';
+import { Message } from '../../src/core/session.js';
+
+function makeMessages(n: number): Message[] {
+    return Array.from({ length: n }, (_, i) => ({
+        role: (i % 2 === 0 ? 'user' : 'assistant') as Message['role'],
+        content: `Turn ${i}: ${'x'.repeat(50)}`,
+        timestamp: Date.now() + i,
+    }));
+}
+
+describe('TruncatingSummarizer', () => {
+    it('returns empty string for empty input', async () => {
+        const s = new TruncatingSummarizer();
+        expect(await s.summarize([])).toBe('');
+    });
+
+    it('returns untruncated text when under maxChars', async () => {
+        const s = new TruncatingSummarizer({ maxChars: 10_000 });
+        const out = await s.summarize(makeMessages(3));
+        expect(out).toContain('Turn 0');
+        expect(out).toContain('Turn 2');
+        expect(out).not.toContain('[truncated]');
+    });
+
+    it('truncates with a marker when over maxChars', async () => {
+        const s = new TruncatingSummarizer({ maxChars: 500 });
+        const out = await s.summarize(makeMessages(50));
+        expect(out).toContain('[truncated]');
+        expect(out.length).toBeLessThan(600);
+    });
+});
+
+describe('AnthropicSummarizer / GoogleAISummarizer constructors', () => {
+    const savedAnthropic = process.env.ANTHROPIC_API_KEY;
+    const savedGoogle = process.env.GOOGLE_AI_API_KEY;
+
+    beforeEach(() => {
+        delete process.env.ANTHROPIC_API_KEY;
+        delete process.env.GOOGLE_AI_API_KEY;
+    });
+    afterEach(() => {
+        if (savedAnthropic !== undefined) process.env.ANTHROPIC_API_KEY = savedAnthropic;
+        else delete process.env.ANTHROPIC_API_KEY;
+        if (savedGoogle !== undefined) process.env.GOOGLE_AI_API_KEY = savedGoogle;
+        else delete process.env.GOOGLE_AI_API_KEY;
+    });
+
+    it('AnthropicSummarizer throws without a key', () => {
+        expect(() => new AnthropicSummarizer()).toThrow(/ANTHROPIC_API_KEY/);
+    });
+
+    it('GoogleAISummarizer throws without a key', () => {
+        expect(() => new GoogleAISummarizer()).toThrow(/GOOGLE_AI_API_KEY/);
+    });
+
+    it('AnthropicSummarizer constructs with explicit apiKey', () => {
+        expect(() => new AnthropicSummarizer({ apiKey: 'sk-test' })).not.toThrow();
+    });
+
+    it('GoogleAISummarizer constructs with explicit apiKey', () => {
+        expect(() => new GoogleAISummarizer({ apiKey: 'gapi-test' })).not.toThrow();
+    });
+});
+
+describe('createSummarizerFromEnv', () => {
+    const saved = {
+        anthropic: process.env.ANTHROPIC_API_KEY,
+        google: process.env.GOOGLE_AI_API_KEY,
+    };
+
+    afterEach(() => {
+        if (saved.anthropic !== undefined) process.env.ANTHROPIC_API_KEY = saved.anthropic;
+        else delete process.env.ANTHROPIC_API_KEY;
+        if (saved.google !== undefined) process.env.GOOGLE_AI_API_KEY = saved.google;
+        else delete process.env.GOOGLE_AI_API_KEY;
+    });
+
+    it('falls back to TruncatingSummarizer when no keys are set', () => {
+        delete process.env.ANTHROPIC_API_KEY;
+        delete process.env.GOOGLE_AI_API_KEY;
+        expect(createSummarizerFromEnv()).toBeInstanceOf(TruncatingSummarizer);
+    });
+
+    it('prefers Anthropic when its key is set', () => {
+        process.env.ANTHROPIC_API_KEY = 'sk-test';
+        delete process.env.GOOGLE_AI_API_KEY;
+        expect(createSummarizerFromEnv()).toBeInstanceOf(AnthropicSummarizer);
+    });
+
+    it('uses Google AI when only its key is set', () => {
+        delete process.env.ANTHROPIC_API_KEY;
+        process.env.GOOGLE_AI_API_KEY = 'gapi-test';
+        expect(createSummarizerFromEnv()).toBeInstanceOf(GoogleAISummarizer);
+    });
+});
diff --git a/tests/unit/tokenizers.test.ts b/tests/unit/tokenizers.test.ts
new file mode 100644
index 0000000..ed2f2a3
--- /dev/null
+++ b/tests/unit/tokenizers.test.ts
@@ -0,0 +1,64 @@
+import { describe, it, expect } from '@jest/globals';
+import { HeuristicTokenizer, ContentType } from '../../src/core/tokenizers/heuristic-tokenizer.js';
+import { TokenizerFactory } from '../../src/core/tokenizers/tokenizer-factory.js';
+import { TiktokenTokenizer } from '../../src/core/tokenizers/tiktoken-tokenizer.js';
+
+describe('HeuristicTokenizer', () => {
+  it('detects JSON content', () => {
+    const json = '{"a": 1, "b": [1, 2, 3]}';
+    expect(HeuristicTokenizer.detectContentType(json)).toBe(ContentType.Json);
+  });
+
+  it('detects code content', () => {
+    const code = 'function foo() { return 42; }';
+    expect(HeuristicTokenizer.detectContentType(code)).toBe(ContentType.Code);
+  });
+
+  it('detects markdown content', () => {
+    const md = '# Heading\n\n- item one\n- item two';
+    expect(HeuristicTokenizer.detectContentType(md)).toBe(ContentType.Markdown);
+  });
+
+  it('defaults to text content', () => {
+    const text = 'Just a short plain sentence.';
+    expect(HeuristicTokenizer.detectContentType(text)).toBe(ContentType.Text);
+  });
+
+  it('uses a lower chars/token ratio for code than text', async () => {
+    const tokenizer = new HeuristicTokenizer();
+    const code = 'function foo() { return 42; }';
+    const text = 'A sentence of roughly similar length here.';
+    const codeTokens = await tokenizer.countTokens(code);
+    const textTokens = await tokenizer.countTokens(text);
+    // Code has ratio 2.5 vs text 4.0 → for strings of similar length, code tokens > text tokens.
+    expect(codeTokens / code.length).toBeGreaterThan(textTokens / text.length);
+  });
+
+  it('caches repeated inputs', async () => {
+    const tokenizer = new HeuristicTokenizer();
+    const input = 'cache me';
+    const first = await tokenizer.countTokens(input);
+    const second = await tokenizer.countTokens(input);
+    expect(first).toBe(second);
+  });
+});
+
+describe('TokenizerFactory', () => {
+  it('returns a TiktokenTokenizer for gpt-4', () => {
+    const t = TokenizerFactory.create('gpt-4');
+    expect(t).toBeInstanceOf(TiktokenTokenizer);
+    t.free();
+  });
+
+  it('returns a TiktokenTokenizer for Claude models (maps to gpt-4)', () => {
+    const t = TokenizerFactory.create('claude-opus-4-7');
+    expect(t).toBeInstanceOf(TiktokenTokenizer);
+    t.free();
+  });
+
+  it('falls back to HeuristicTokenizer for unknown models', () => {
+    const t = TokenizerFactory.create('some-unknown-local-model');
+    expect(t).toBeInstanceOf(HeuristicTokenizer);
+    t.free();
+  });
+});