diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml index fbd9a07..81458cc 100644 --- a/.github/workflows/quality-gates.yml +++ b/.github/workflows/quality-gates.yml @@ -129,30 +129,47 @@ jobs: - name: Run npm audit id: audit run: | - npm audit --json > audit-results.json || true + # Gating audit — prod deps only. Dev deps (e.g. @semantic-release/npm, + # which bundles its own node_modules/npm) can carry unfixable + # transitive vulnerabilities that never ship to end users, and + # failing CI on those is noise. The "Dependency Vulnerability Scan" + # step below still covers the full tree for visibility. + npm audit --omit=dev --json > audit-results.json || true + + # Informational audit — full tree, including dev deps. Always + # collected so teams can review non-gating findings even on + # forks/repos without a SNYK_TOKEN. + npm audit --json > audit-results-full.json || true # Check for high/critical vulnerabilities using Python for reliable JSON parsing HIGH_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))") CRITICAL_VULNS=$(python3 -c "import json; data = json.load(open('audit-results.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('critical', 0))") + FULL_CRITICAL=$(python3 -c "import json; data = json.load(open('audit-results-full.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('critical', 0))") + FULL_HIGH=$(python3 -c "import json; data = json.load(open('audit-results-full.json')); print(data.get('metadata', {}).get('vulnerabilities', {}).get('high', 0))") # Ensure we have valid integers HIGH_VULNS=${HIGH_VULNS:-0} CRITICAL_VULNS=${CRITICAL_VULNS:-0} + FULL_CRITICAL=${FULL_CRITICAL:-0} + FULL_HIGH=${FULL_HIGH:-0} echo "high_vulnerabilities=$HIGH_VULNS" >> $GITHUB_OUTPUT echo "critical_vulnerabilities=$CRITICAL_VULNS" >> $GITHUB_OUTPUT - echo "Found $CRITICAL_VULNS critical and $HIGH_VULNS high severity vulnerabilities" + echo "Production: $CRITICAL_VULNS critical, $HIGH_VULNS high" + echo "Full tree: $FULL_CRITICAL critical, $FULL_HIGH high (informational)" if [ "$CRITICAL_VULNS" -gt 0 ] 2>/dev/null; then - echo "Error: Found $CRITICAL_VULNS critical vulnerabilities" - npm audit + echo "Error: Found $CRITICAL_VULNS critical vulnerabilities in production deps" + npm audit --omit=dev || true exit 1 fi if [ "$HIGH_VULNS" -gt 0 ] 2>/dev/null; then - echo "Warning: Found $HIGH_VULNS high vulnerabilities" - npm audit + echo "Warning: Found $HIGH_VULNS high vulnerabilities in production deps" + # npm audit exits non-zero when vulns exist — don't let that + # turn a "warning" into a failed step. + npm audit --omit=dev || true fi - name: Upload audit results @@ -160,7 +177,9 @@ jobs: uses: actions/upload-artifact@v4 with: name: security-audit-${{ github.sha }} - path: audit-results.json + path: | + audit-results.json + audit-results-full.json retention-days: 30 - name: Comment PR with security audit diff --git a/hooks/dispatcher.ps1 b/hooks/dispatcher.ps1 index acc6187..3896848 100644 --- a/hooks/dispatcher.ps1 +++ b/hooks/dispatcher.ps1 @@ -2,18 +2,30 @@ # Minimal dispatcher focused on token optimization via MCP # Replaces 400+ line mess with clean architecture +[CmdletBinding()] param([string]$Phase = "") $HANDLERS_DIR = "C:\Users\cheat\.claude-global\hooks\handlers" $LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\dispatcher.log" $ORCHESTRATOR = "$HANDLERS_DIR\token-optimizer-orchestrator.ps1" -function Write-Log { - param([string]$Message) - $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" - "[$timestamp] [$Phase] $Message" | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8 +# Load the shared logging helper defensively: a missing/malformed helper +# must not kill the dispatcher for every hook phase. Fall back to a +# minimal Write-Log shim so the rest of the script still runs. +$loggingHelperPath = "$PSScriptRoot\helpers\logging.ps1" +try { + if (Test-Path $loggingHelperPath) { + . $loggingHelperPath + } else { + throw "logging helper not found at $loggingHelperPath" + } +} catch { + function Write-Log { param([string]$Message, [string]$Level = 'INFO') $null = $Message; $null = $Level } + function Handle-Error { param($Exception, [string]$Message) $null = $Exception; $null = $Message } } + + function Block-Tool { param([string]$Reason) diff --git a/hooks/handlers/token-optimizer-orchestrator.ps1 b/hooks/handlers/token-optimizer-orchestrator.ps1 index 6726ee6..6b13fd8 100644 --- a/hooks/handlers/token-optimizer-orchestrator.ps1 +++ b/hooks/handlers/token-optimizer-orchestrator.ps1 @@ -10,9 +10,20 @@ param( [string]$InputJsonFile = "" ) +# Dot-source helpers BEFORE any logging — Write-Log must exist before +# the first use below. +$HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers" +$INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1" +$LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log" +$SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt" +. "$PSScriptRoot\..\helpers\logging.ps1" +. "$PSScriptRoot\..\helpers\config.ps1" +. "$PSScriptRoot\..\helpers\gzip.ps1" +. "$PSScriptRoot\..\helpers\context-delta.ps1" + # DIAGNOSTIC: Log script version/load time to verify latest version is being used $SCRIPT_VERSION = Get-Date -Format 'yyyyMMdd.HHmmss' -Write-Host "DEBUG: token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" -ForegroundColor Cyan +Write-Log "token-optimizer-orchestrator.ps1 version $SCRIPT_VERSION loaded. Phase=$Phase, Action=$Action" "DEBUG" # Read JSON from temp file if provided # DO NOT delete temp file - dispatcher will clean it up after all handlers run @@ -21,14 +32,9 @@ if ($InputJsonFile -and (Test-Path $InputJsonFile)) { try { $InputJson = Get-Content -Path $InputJsonFile -Raw -Encoding UTF8 } catch { - Write-Host "ERROR: Failed to read InputJsonFile: $($_.Exception.Message)" -ForegroundColor Red + Write-Log "Failed to read InputJsonFile: $($_.Exception.Message)" "ERROR" } } - -$HELPERS_DIR = "C:\Users\cheat\.claude-global\hooks\helpers" -$INVOKE_MCP = "$HELPERS_DIR\invoke-mcp.ps1" -$LOG_FILE = "C:\Users\cheat\.claude-global\hooks\logs\token-optimizer-orchestrator.log" -$SESSION_FILE = "C:\Users\cheat\.claude-global\hooks\data\current-session.txt" $OPERATIONS_DIR = "C:\Users\cheat\.claude-global\hooks\data" # PERFORMANCE FIX: Prefer local dev path if not already set @@ -345,7 +351,7 @@ if (-not ('TokenCounter' -as [type])) { if (-not $script:TokenCounter) { $apiKey = $env:GOOGLE_AI_API_KEY if (-not $apiKey) { - Write-Host "WARN: GOOGLE_AI_API_KEY not set, falling back to estimation only" -ForegroundColor Yellow + Write-Log "GOOGLE_AI_API_KEY not set, falling back to estimation only" "WARN" } $modelName = if ($env:GOOGLE_AI_MODEL) { $env:GOOGLE_AI_MODEL } else { "gemini-2.0-flash-exp" } $script:TokenCounter = [TokenCounter]::new($apiKey, $modelName) @@ -420,7 +426,7 @@ function Read-SessionFile { Write-Log "Failed to acquire read lock on session file '$FilePath', retrying... ($($_.Exception.Message))" "WARN" Start-Sleep -Milliseconds $retryDelayMs } catch { - Write-Log "Failed to read session file '$FilePath': $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Failed to read session file '$FilePath'" return $null } } @@ -452,7 +458,7 @@ function Write-SessionFile { Write-Log "Failed to acquire write lock on session file '$FilePath', retrying... ($($_.Exception.Message))" "WARN" Start-Sleep -Milliseconds $retryDelayMs } catch { - Write-Log "Failed to write session file '$FilePath': $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Failed to write session file '$FilePath'" return $false } finally { # Ensure writer and fileStream are disposed even if errors occur @@ -498,7 +504,7 @@ function Flush-OperationLogs { Write-Log "Flushed $($script:OperationLogBuffer.Count) operation logs" "DEBUG" $script:OperationLogBuffer = @() } catch { - Write-Log "Failed to flush operation logs: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Failed to flush operation logs" } } } @@ -523,33 +529,7 @@ function Start-LogFlushTimer { } } -function Write-Log { - param( - [string]$Message, - [ValidateSet('DEBUG','INFO','WARN','ERROR')][string]$Level = "INFO", - [string]$Context = "" - ) - - # Check if debug logging is disabled - $debugLogging = if ($env:TOKEN_OPTIMIZER_DEBUG_LOGGING) { - $env:TOKEN_OPTIMIZER_DEBUG_LOGGING -eq 'true' - } else { - $true # Default: enabled - } - - if ($Level -eq 'DEBUG' -and -not $debugLogging) { - return - } - $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" - $contextPart = if ($Context) { " [$Context]" } else { "" } - $logEntry = "[$timestamp] [$Level]$contextPart $Message" - try { - $logEntry | Out-File -FilePath $LOG_FILE -Append -Encoding UTF8 -ErrorAction SilentlyContinue - } catch { - # Silently fail - } -} # Removed - now using direct invoke-mcp.ps1 calls @@ -559,7 +539,7 @@ function Get-SessionInfo { $session = Read-SessionFile -FilePath $SESSION_FILE return $session } catch { - Write-Log "Failed to read session file: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Failed to read session file" } } return $null @@ -716,7 +696,7 @@ function Handle-LogOperation { Write-Log "Logged operation: $toolName ($tokens tokens)" "DEBUG" } catch { - Write-Log "Operation logging failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Operation logging failed" } } @@ -746,7 +726,7 @@ function Handle-OptimizeSession { } } catch { - Write-Log "Session optimization failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Session optimization failed" } } @@ -822,7 +802,7 @@ function Handle-ContextGuard { return 0 # Success - allow operation to proceed } catch { - Write-Log "Context guard failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Context guard failed" return 0 # On error, don't block } } @@ -852,7 +832,7 @@ function Handle-PeriodicOptimize { } } catch { - Write-Log "Periodic optimize failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Periodic optimize failed" } } @@ -877,7 +857,7 @@ function Handle-CacheWarmup { } } catch { - Write-Log "Cache warmup failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Cache warmup failed" } } @@ -917,7 +897,7 @@ function Handle-SessionReport { } } catch { - Write-Log "Session report failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Session report failed" } } @@ -999,11 +979,11 @@ function Handle-UserPromptOptimization { Write-Log "Optimized user prompt: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO" } } catch { - Write-Log "Prompt optimization failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Prompt optimization failed" } } catch { - Write-Log "UserPromptOptimization handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "UserPromptOptimization handler failed" } } @@ -1055,7 +1035,7 @@ function Handle-SessionStartInit { } } catch { - Write-Log "SessionStartInit handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "SessionStartInit handler failed" } } @@ -1100,7 +1080,7 @@ function Handle-SmartDiff { return $null } catch { - Write-Log "SmartDiff handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "SmartDiff handler failed" return $null } } @@ -1140,7 +1120,7 @@ function Handle-SmartLogs { return $null } catch { - Write-Log "SmartLogs handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "SmartLogs handler failed" return $null } } @@ -1195,7 +1175,7 @@ function Handle-ToolSpecificOptimization { return $ToolOutput } catch { - Write-Log "ToolSpecificOptimization handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "ToolSpecificOptimization handler failed" return $ToolOutput } } @@ -1233,7 +1213,7 @@ function Handle-MetricCollector { return $null } catch { - Write-Log "MetricCollector handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "MetricCollector handler failed" return $null } } @@ -1273,7 +1253,7 @@ function Handle-AlertManager { return $null } catch { - Write-Log "AlertManager handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "AlertManager handler failed" return $null } } @@ -1305,7 +1285,7 @@ function Handle-HealthMonitor { return $null } catch { - Write-Log "HealthMonitor handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "HealthMonitor handler failed" return $null } } @@ -1343,7 +1323,7 @@ function Handle-MonitoringIntegration { return $null } catch { - Write-Log "MonitoringIntegration handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "MonitoringIntegration handler failed" return $null } } @@ -1379,7 +1359,7 @@ function Handle-AnalyzeOptimization { return $null } catch { - Write-Log "AnalyzeOptimization handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "AnalyzeOptimization handler failed" return $null } } @@ -1408,7 +1388,7 @@ function Handle-CacheAnalytics { return $null } catch { - Write-Log "CacheAnalytics handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "CacheAnalytics handler failed" return $null } } @@ -1438,7 +1418,7 @@ function Handle-CacheOptimizer { return $null } catch { - Write-Log "CacheOptimizer handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "CacheOptimizer handler failed" return $null } } @@ -1478,7 +1458,7 @@ function Handle-CacheCompression { return $Data } catch { - Write-Log "CacheCompression handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "CacheCompression handler failed" return $Data } } @@ -1505,7 +1485,7 @@ function Handle-CacheInvalidation { Write-Log "Cache invalidation completed for pattern: $Pattern" "DEBUG" } catch { - Write-Log "CacheInvalidation handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "CacheInvalidation handler failed" } } @@ -1545,7 +1525,7 @@ function Handle-SmartCache { return $null } catch { - Write-Log "SmartCache handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "SmartCache handler failed" return $null } } @@ -1593,7 +1573,7 @@ function Handle-IntelligentSummarization { return $Text } catch { - Write-Log "IntelligentSummarization handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "IntelligentSummarization handler failed" return $Text } } @@ -1639,7 +1619,7 @@ function Handle-PatternRecognition { return $null } catch { - Write-Log "PatternRecognition handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "PatternRecognition handler failed" return $null } } @@ -1682,7 +1662,7 @@ function Handle-PredictiveAnalytics { return $Context } catch { - Write-Log "PredictiveAnalytics handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "PredictiveAnalytics handler failed" return $Context } } @@ -1716,7 +1696,7 @@ function Handle-IntelligentAssistant { return $null } catch { - Write-Log "IntelligentAssistant handler failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "IntelligentAssistant handler failed" return $null } } @@ -1861,7 +1841,7 @@ function Handle-PreToolUseOptimization { } } catch { - Write-Log "PreToolUse optimization failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "PreToolUse optimization failed" return 1 } return 0 @@ -1877,43 +1857,43 @@ function Handle-OptimizeToolOutput { $ErrorActionPreference = 'Stop' try { - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Entered function." + Write-Log "[Handle-OptimizeToolOutput] Entered function." "DEBUG" if (-not $InputJson) { Write-Log "No input received for tool output optimization" "WARN" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] No input received, returning." + Write-Log "[Handle-OptimizeToolOutput] No input received, returning." "DEBUG" return } - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Parsing InputJson..." + Write-Log "[Handle-OptimizeToolOutput] Parsing InputJson..." "DEBUG" $data = $InputJson | ConvertFrom-Json $toolName = $data.tool_name $toolOutput = $data.tool_response # FIXED: Claude Code uses tool_response not tool_result $outputType = if ($toolOutput) { $toolOutput.GetType().Name } else { "null" } Write-Log "DEBUG: tool_name=$toolName, tool_response_type=$outputType, has_content=$(-not -not $toolOutput)" "DEBUG" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 1 - After line 1564 log. toolName=$toolName, outputType=$outputType" + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 1 - After line 1564 log. toolName=$toolName, outputType=$outputType" "DEBUG" # Skip if no output or if output is already optimized Write-Log "DEBUG: Checking if toolOutput is null or empty" "DEBUG" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 2 - Before null/empty check." + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 2 - Before null/empty check." "DEBUG" if (-not $toolOutput) { Write-Log "No tool output to optimize for: $toolName (toolOutput is null/false)" "DEBUG" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] toolOutput is null/false, returning." + Write-Log "[Handle-OptimizeToolOutput] toolOutput is null/false, returning." "DEBUG" return } - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 3 - After null/empty check, toolOutput exists." + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 3 - After null/empty check, toolOutput exists." "DEBUG" # Convert output to string for token counting $outputText = "" try { - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 4 - Attempting to convert toolOutput to string. Is string: $($toolOutput -is [string])" + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 4 - Attempting to convert toolOutput to string. Is string: $($toolOutput -is [string])" "DEBUG" $outputText = if ($toolOutput -is [string]) { $toolOutput } else { $toolOutput | ConvertTo-Json -Depth 10 -ErrorAction Stop } Write-Log "DEBUG: Converted tool output to string. Length: $($outputText.Length)" "DEBUG" - Write-Host "DEBUG: [Handle-OptimizeToolOutput] Checkpoint 5 - toolOutput converted. Length: $($outputText.Length)" + Write-Log "[Handle-OptimizeToolOutput] Checkpoint 5 - toolOutput converted. Length: $($outputText.Length)" "DEBUG" } catch { Write-Log "ERROR: Failed to convert tool output to JSON string for ${toolName}: $($_.Exception.Message)" "ERROR" - Write-Host "ERROR: [Handle-OptimizeToolOutput] Failed to convert: $($_.Exception.Message)" + Write-Log "[Handle-OptimizeToolOutput] Failed to convert: $($_.Exception.Message)" "ERROR" return } @@ -1934,8 +1914,7 @@ function Handle-OptimizeToolOutput { Write-Log "WARN: count_tokens result did not contain expected content" "WARN" } } catch { - Write-Log "ERROR: Token counting failed for ${toolName}: $($_.Exception.Message)" "ERROR" - Write-Log "ERROR: Stack Trace: $($_.ScriptStackTrace)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Token counting failed for ${toolName}" return } @@ -1958,16 +1937,56 @@ function Handle-OptimizeToolOutput { Write-Log "Tool-specific optimization failed: $($_.Exception.Message)" "WARN" } - # Optimize using optimize_text (PHASE 4: Reduced quality for performance) + # Calculate SHA256 hash of the output text for caching + $hasher = [System.Security.Cryptography.SHA256]::Create() + $hashBytes = $hasher.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($outputText)) + $originalTextHash = [System.BitConverter]::ToString($hashBytes).Replace("-", "").ToLower() + + # Attempt to retrieve from optimization storage try { - # PHASE 2 FIX: Use content hash instead of timestamp for cache key - $hasher = [System.Security.Cryptography.SHA256]::Create() - $hashBytes = $hasher.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($outputText)) - $contentHash = [Convert]::ToBase64String($hashBytes).Substring(0, 16) + $retrieveArgs = @{ + operation = "retrieve" + originalTextHash = $originalTextHash + } + $retrieveJson = $retrieveArgs | ConvertTo-Json -Compress + $retrieveResultJson = & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $retrieveJson + $retrieveResult = if ($retrieveResultJson) { $retrieveResultJson | ConvertFrom-Json } else { $null } + + if ($retrieveResult -and $retrieveResult.success -and $retrieveResult.result) { + Write-Log "Cache HIT for optimization result. Hash: $originalTextHash" "INFO" + # OptimizationStorageTool.retrieve() returns { success, result: { optimizedText, ... } }. + # Read the actual payload from $retrieveResult.result (not top-level), and mirror + # the base64 wrapping used on the store path below so round-tripped bytes survive JSON. + $cachedEntry = $retrieveResult.result + $optimizedTextBytes = [System.Convert]::FromBase64String($cachedEntry.optimizedText) + $optimizedText = [System.Text.Encoding]::UTF8.GetString($optimizedTextBytes) + $afterTokens = $cachedEntry.optimizedTokens + $saved = $cachedEntry.tokensSaved + $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 } + if ($script:CurrentSession) { + $script:CurrentSession.cacheHits++ + if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) { + Write-Log "Session stats updated and persisted after cache hit." "DEBUG" + } else { + Write-Log "Failed to persist session stats after cache hit." "ERROR" + } + } + + Write-Log "Using cached optimized $toolName output: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO" + Update-SessionOperation -TokensDelta $afterTokens + return + } else { + Write-Log "Cache MISS for optimization result. Hash: $originalTextHash" "DEBUG" + } + } catch { + Handle-Error -Exception $_.Exception -Message "Failed to retrieve from optimization storage" + } + + # Optimize using optimize_text (PHASE 4: Reduced quality for performance) + try { $optimizeArgs = @{ text = $outputText - key = "tool_output_${toolName}_$contentHash" quality = $script:OPTIMIZATION_QUALITY } $optimizeJson = $optimizeArgs | ConvertTo-Json -Compress @@ -1982,34 +2001,43 @@ function Handle-OptimizeToolOutput { $saved = $beforeTokens - $afterTokens $percent = if ($beforeTokens -gt 0) { [math]::Round(($saved / $beforeTokens) * 100, 1) } else { 0 } - # PHASE 1 FIX: Rollback logic - only use optimization if it actually helps if ($afterTokens -ge $beforeTokens) { Write-Log "Optimization made things worse or had no effect ($beforeTokens → $afterTokens tokens), REVERTING to original" "WARN" - - # PHASE 4 FIX: Track failure and persist immediately if ($script:CurrentSession) { $script:CurrentSession.optimizationFailures++ - # CRITICAL: Persist immediately to disk for multi-process visibility if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) { Write-Log "Session stats updated and persisted after optimization failure." "DEBUG" } else { Write-Log "Failed to persist session stats after optimization failure." "ERROR" } } - - # Don't update session with optimized tokens, skip this optimization return } Write-Log "Optimized $toolName output: $beforeTokens → $afterTokens tokens ($percent% reduction)" "INFO" - # PHASE 4 FIX: Track success and detailed stats, persist immediately + # Store the new optimization result + try { + $storeArgs = @{ + operation = "store" + originalTextHash = $originalTextHash + optimizedText = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes($optimizedText)) + originalTokens = $beforeTokens + optimizedTokens = $afterTokens + tokensSaved = $saved + } + $storeJson = $storeArgs | ConvertTo-Json -Compress + & "$HELPERS_DIR\invoke-mcp.ps1" -Tool "optimization_storage" -ArgumentsJson $storeJson + Write-Log "Stored new optimization result. Hash: $originalTextHash" "DEBUG" + } catch { + Handle-Error -Exception $_.Exception -Message "Failed to store optimization result" + } + if ($script:CurrentSession) { $script:CurrentSession.optimizationSuccesses++ $script:CurrentSession.totalOriginalTokens += $beforeTokens $script:CurrentSession.totalOptimizedTokens += $afterTokens $script:CurrentSession.totalTokensSaved += $saved - # CRITICAL: Persist immediately to disk for multi-process visibility if (Write-SessionFile -FilePath $SESSION_FILE -SessionObject $script:CurrentSession) { Write-Log "Session stats updated and persisted after optimization success." "DEBUG" } else { @@ -2017,11 +2045,10 @@ function Handle-OptimizeToolOutput { } } - # Update session tokens (only if optimization helped) Update-SessionOperation -TokensDelta $afterTokens } } catch { - Write-Log "Tool output optimization failed: $($_.Exception.Message)" "ERROR" + Handle-Error -Exception $_.Exception -Message "Tool output optimization failed" } } catch { @@ -2227,6 +2254,28 @@ function Handle-SmartRead { Write-Log "Updated session totalTokens by $tokens" "DEBUG" } + # #122: update the MCP server's context_delta so the next read + # of this file can be served as a diff. Failure here is + # non-fatal — smart_read still succeeds. + # + # IMPORTANT: only feed FULL content. smart_read can return a + # diff payload (metadata.isDiff), and persisting a diff as the + # new baseline would make the next compute-delta compare + # against the previous patch instead of the file contents. + try { + $isDiff = $result.metadata -and $result.metadata.isDiff + $contentText = if ($result.content -and $result.content[0] -and $result.content[0].text) { + $result.content[0].text + } else { + $null + } + if ($contentText -and -not $isDiff) { + $null = Invoke-ContextDelta -Operation 'compute-delta' -FilePath $filePath -CurrentContent $contentText + } + } catch { + Write-Log "context_delta update skipped: $($_.Exception.Message)" 'DEBUG' + } + # Return smart_read result and block plain Read $blockResponse = @{ continue = $false diff --git a/hooks/helpers/config.ps1 b/hooks/helpers/config.ps1 new file mode 100644 index 0000000..38b42b6 --- /dev/null +++ b/hooks/helpers/config.ps1 @@ -0,0 +1,151 @@ +[CmdletBinding()] +param() + +<# +Token-Optimizer Config helper — addresses issue #120 (PowerShell side). + +Mirrors src/core/config.ts so the PS orchestrator and the TS server +share one source of truth. The config file lives at +~/.token-optimizer/config.json and is the same one the Node server +reads. On first run we copy the defaults below into that file. +#> + +$script:TokenOptimizerConfigPath = + Join-Path $env:USERPROFILE '.token-optimizer\config.json' + +$script:TokenOptimizerDefaultConfig = @{ + cache = @{ + enabled = $true + maxSizeMB = 500 + defaultTTL = 300 + ttlByType = @{ + file_read = 300 + git_status = 60 + git_diff = 120 + build_result = 600 + test_result = 300 + } + compression = 'auto' + } + monitoring = @{ + enabled = $true + detailedLogging = $false + metricsRetentionDays = 30 + dashboardPort = 3100 + enableWebUI = $false + } + optimization = @{ + compressionTokenThreshold = 0.7 + compressionPreserveThreshold = 0.3 + minTokensBeforeCompression = 1000 + modelTokenLimits = @{ + 'gpt-4' = 128000 + 'gpt-4-turbo' = 128000 + 'gpt-3.5-turbo' = 16385 + 'claude-3-opus' = 200000 + 'claude-3-sonnet' = 200000 + 'claude-3-haiku' = 200000 + 'claude-opus-4-7' = 1000000 + 'claude-sonnet-4-6' = 1000000 + 'gemini-1.5-pro' = 2000000 + 'gemini-2.5-flash' = 1000000 + } + minOutputSizeBytes = 500 + quality = 'balanced' + cacheSettings = @{ + maxSize = 1000 + ttlSeconds = 3600 + } + chatCompression = @{ + enabled = $true + strategy = 'summarize' + } + } +} + +function Get-TokenOptimizerConfigPath { + return $script:TokenOptimizerConfigPath +} + +function Write-TokenOptimizerDefaultConfig { + $configPath = Get-TokenOptimizerConfigPath + $configDir = Split-Path -Parent $configPath + if (-not (Test-Path $configDir)) { + New-Item -ItemType Directory -Path $configDir -Force | Out-Null + } + $json = $script:TokenOptimizerDefaultConfig | ConvertTo-Json -Depth 10 + Set-Content -Path $configPath -Value $json -Encoding UTF8 +} + +function Import-TokenOptimizerConfig { + $configPath = Get-TokenOptimizerConfigPath + if (-not (Test-Path $configPath)) { + Write-TokenOptimizerDefaultConfig + return $script:TokenOptimizerDefaultConfig + } + try { + $raw = Get-Content -Path $configPath -Raw -Encoding UTF8 + return ($raw | ConvertFrom-Json -AsHashtable) + } catch { + $msg = "Failed to load $configPath ($($_.Exception.Message)); using defaults." + if (Get-Command Write-Log -ErrorAction SilentlyContinue) { + Write-Log $msg 'WARN' + } else { + Write-Warning $msg + } + return $script:TokenOptimizerDefaultConfig + } +} + +function Merge-TokenOptimizerHashtable { + param( + [hashtable]$Base, + $User + ) + $merged = @{} + foreach ($key in $Base.Keys) { + $merged[$key] = $Base[$key] + } + if ($null -eq $User) { + return $merged + } + # Handle both hashtables and PSCustomObjects (ConvertFrom-Json returns the latter). + $userKeys = @() + if ($User -is [hashtable]) { + $userKeys = $User.Keys + } elseif ($User.PSObject) { + $userKeys = $User.PSObject.Properties.Name + } + foreach ($key in $userKeys) { + $userValue = if ($User -is [hashtable]) { $User[$key] } else { $User.$key } + if ($Base.ContainsKey($key) -and ($Base[$key] -is [hashtable]) -and ($null -ne $userValue)) { + $merged[$key] = Merge-TokenOptimizerHashtable -Base $Base[$key] -User $userValue + } else { + $merged[$key] = $userValue + } + } + return $merged +} + +function Get-TokenOptimizerOptimizationConfig { + $config = Import-TokenOptimizerConfig + $defaults = $script:TokenOptimizerDefaultConfig.optimization + if ($null -eq $config.optimization) { + return $defaults + } + # Deep-merge the user's partial optimization section onto defaults so + # overriding one modelTokenLimit doesn't drop the rest of the map. + return Merge-TokenOptimizerHashtable -Base $defaults -User $config.optimization +} + +function Get-TokenOptimizerModelTokenLimit { + param( + [Parameter(Mandatory = $true)] + [string]$ModelName + ) + $opt = Get-TokenOptimizerOptimizationConfig + if ($opt.modelTokenLimits -and $opt.modelTokenLimits.ContainsKey($ModelName)) { + return $opt.modelTokenLimits[$ModelName] + } + return $null +} diff --git a/hooks/helpers/context-delta.ps1 b/hooks/helpers/context-delta.ps1 new file mode 100644 index 0000000..e8035e7 --- /dev/null +++ b/hooks/helpers/context-delta.ps1 @@ -0,0 +1,98 @@ +[CmdletBinding()] +param() + +<# +PowerShell integration for the context_delta MCP tool — addresses +issue #122 Phase 2. + +Get-TokenOptimizerSessionId generates a stable sessionId per top-level +PS session (cached on the script scope and persisted to a marker file +so multiple orchestrator invocations within one Claude session reuse +the same id). + +Invoke-ContextDelta calls the context_delta MCP tool via the shared +Invoke-TokenOptimizer helper and returns the unified-diff delta so +Handle-SmartRead can emit only the changed lines to the model. +#> + +$script:TokenOptimizerSessionIdPath = + Join-Path $env:USERPROFILE '.token-optimizer\current-session-id' + +function Get-TokenOptimizerSessionId { + if ($script:TokenOptimizerCurrentSessionId) { + return $script:TokenOptimizerCurrentSessionId + } + if (Test-Path $script:TokenOptimizerSessionIdPath) { + $existing = (Get-Content -Path $script:TokenOptimizerSessionIdPath -Raw).Trim() + if ($existing) { + $script:TokenOptimizerCurrentSessionId = $existing + return $existing + } + } + $newId = [guid]::NewGuid().ToString() + $dir = Split-Path -Parent $script:TokenOptimizerSessionIdPath + if (-not (Test-Path $dir)) { + New-Item -ItemType Directory -Path $dir -Force | Out-Null + } + Set-Content -Path $script:TokenOptimizerSessionIdPath -Value $newId + $script:TokenOptimizerCurrentSessionId = $newId + return $newId +} + +function Reset-TokenOptimizerSessionId { + $script:TokenOptimizerCurrentSessionId = $null + if (Test-Path $script:TokenOptimizerSessionIdPath) { + Remove-Item -Path $script:TokenOptimizerSessionIdPath -Force + } +} + +function Invoke-ContextDelta { + param( + [Parameter(Mandatory = $true)] + [ValidateSet('compute-delta', 'seed', 'clear')] + [string]$Operation, + [Parameter(Mandatory = $true)][string]$FilePath, + [string]$CurrentContent = $null, + [string]$SessionId = $null + ) + + if (-not $SessionId) { + $SessionId = Get-TokenOptimizerSessionId + } + $toolArgs = @{ + operation = $Operation + sessionId = $SessionId + filePath = $FilePath + } + if ($Operation -ne 'clear' -and $null -ne $CurrentContent) { + $toolArgs.currentContent = $CurrentContent + } + + # Call the MCP tool via the repo's existing invoke-mcp.ps1 script. + # The server-side ContextDeltaTool auto-creates the session on first + # contact, so there's no separate bootstrap step needed here. + $invokeMcp = Join-Path $PSScriptRoot 'invoke-mcp.ps1' + if (-not (Test-Path $invokeMcp)) { + if (Get-Command Write-Log -ErrorAction SilentlyContinue) { + Write-Log "invoke-mcp.ps1 not found at $invokeMcp; skipping context_delta." 'DEBUG' + } + return $null + } + + try { + $argsJson = $toolArgs | ConvertTo-Json -Compress + $resultJson = & $invokeMcp -Tool 'context_delta' -ArgumentsJson $argsJson + if ($resultJson) { + return ($resultJson | ConvertFrom-Json) + } + return $null + } catch { + $msg = "Invoke-ContextDelta failed: $($_.Exception.Message)" + if (Get-Command Write-Log -ErrorAction SilentlyContinue) { + Write-Log $msg 'WARN' + } else { + Write-Warning $msg + } + return $null + } +} diff --git a/hooks/helpers/gzip.ps1 b/hooks/helpers/gzip.ps1 new file mode 100644 index 0000000..9527dbf --- /dev/null +++ b/hooks/helpers/gzip.ps1 @@ -0,0 +1,118 @@ +[CmdletBinding()] +param() + +<# +Gzip utilities — addresses issue #126 (PowerShell side). + +Compress-String / Expand-String are the primitives. Save-GzippedFile +writes .gz atomically (tmp + rename) and strips the plaintext +sibling once the gzip lands. Read-MaybeGzippedFile prefers .gz +and falls back to plaintext so PS code can be migrated incrementally. +#> + +function Compress-String { + param( + [Parameter(Mandatory = $true)][string]$InputString, + [ValidateSet('Optimal', 'Fastest', 'NoCompression', 'SmallestSize')] + [string]$CompressionLevel = 'Optimal' + ) + $inputStream = $null + $outputStream = $null + $gzipStream = $null + try { + $bytes = [System.Text.Encoding]::UTF8.GetBytes($InputString) + $inputStream = [System.IO.MemoryStream]::new($bytes) + $outputStream = [System.IO.MemoryStream]::new() + $level = [System.IO.Compression.CompressionLevel]::$CompressionLevel + $gzipStream = [System.IO.Compression.GZipStream]::new($outputStream, $level) + $inputStream.CopyTo($gzipStream) + $gzipStream.Dispose() + $gzipStream = $null + return ,$outputStream.ToArray() + } finally { + if ($null -ne $gzipStream) { $gzipStream.Dispose() } + if ($null -ne $inputStream) { $inputStream.Dispose() } + if ($null -ne $outputStream) { $outputStream.Dispose() } + } +} + +function Expand-String { + param( + [Parameter(Mandatory = $true)][byte[]]$CompressedBytes + ) + $inputStream = $null + $outputStream = $null + $gzipStream = $null + try { + $inputStream = [System.IO.MemoryStream]::new($CompressedBytes) + $outputStream = [System.IO.MemoryStream]::new() + $gzipStream = [System.IO.Compression.GZipStream]::new( + $inputStream, + [System.IO.Compression.CompressionMode]::Decompress + ) + $gzipStream.CopyTo($outputStream) + return [System.Text.Encoding]::UTF8.GetString($outputStream.ToArray()) + } finally { + if ($null -ne $gzipStream) { $gzipStream.Dispose() } + if ($null -ne $inputStream) { $inputStream.Dispose() } + if ($null -ne $outputStream) { $outputStream.Dispose() } + } +} + +function Save-GzippedFile { + param( + [Parameter(Mandatory = $true)][string]$Path, + [Parameter(Mandatory = $true)][string]$Content + ) + $dir = Split-Path -Parent $Path + if ($dir -and -not (Test-Path $dir)) { + New-Item -ItemType Directory -Path $dir -Force | Out-Null + } + $compressed = Compress-String -InputString $Content + $gzPath = "$Path.gz" + # Per-write temp path so concurrent writers to the same destination + # can't clobber each other mid-write. + $tmpPath = "$gzPath.$([guid]::NewGuid().ToString('N')).tmp" + [System.IO.File]::WriteAllBytes($tmpPath, $compressed) + # Atomic swap: File::Move(src, dst, overwrite:$true) on .NET5+. + # Unlike "delete then move", this never leaves the caller with a + # missing .gz file if the process crashes. + try { + [System.IO.File]::Move($tmpPath, $gzPath, $true) + } finally { + if (Test-Path $tmpPath) { + Remove-Item -Path $tmpPath -Force -ErrorAction SilentlyContinue + } + } + if (Test-Path $Path) { + Remove-Item -Path $Path -Force -ErrorAction SilentlyContinue + } + return @{ + originalBytes = [System.Text.Encoding]::UTF8.GetByteCount($Content) + compressedBytes = $compressed.Length + } +} + +function Read-MaybeGzippedFile { + param( + [Parameter(Mandatory = $true)][string]$Path + ) + $gzPath = "$Path.gz" + if (Test-Path $gzPath) { + try { + $bytes = [System.IO.File]::ReadAllBytes($gzPath) + return Expand-String -CompressedBytes $bytes + } catch { + # Corrupt / partial .gz — fall back to the plaintext sibling + # so the backward-compat migration path still works. If no + # plaintext exists either, rethrow the original error. + if (-not (Test-Path $Path)) { + throw + } + } + } + if (Test-Path $Path) { + return [System.IO.File]::ReadAllText($Path, [System.Text.Encoding]::UTF8) + } + return $null +} diff --git a/hooks/helpers/logging.ps1 b/hooks/helpers/logging.ps1 new file mode 100644 index 0000000..b52f54a --- /dev/null +++ b/hooks/helpers/logging.ps1 @@ -0,0 +1,51 @@ +[CmdletBinding()] +param() + +function Write-Log { + param( + [string]$Message, + [ValidateSet('DEBUG','INFO','WARN','ERROR')][string]$Level = "INFO", + [string]$Context = "" + ) + + # Check if debug logging is disabled + $debugLogging = if ($env:TOKEN_OPTIMIZER_DEBUG_LOGGING) { + $env:TOKEN_OPTIMIZER_DEBUG_LOGGING -eq 'true' + } else { + $true # Default: enabled + } + + if ($Level -eq 'DEBUG' -and -not $debugLogging) { + return + } + + $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss" + $contextPart = if ($Context) { " [$Context]" } else { "" } + $logMessage = "[$timestamp] [$Level]$contextPart $Message" + if ($script:LOG_FILE) { + try { + $logDir = Split-Path -Parent $script:LOG_FILE + if ($logDir -and -not (Test-Path $logDir)) { + New-Item -ItemType Directory -Path $logDir -Force | Out-Null + } + $logMessage | Out-File -FilePath $script:LOG_FILE -Append -Encoding UTF8 + } catch { + # Swallow — logging must never be a failure mode for the caller. + } + } + Write-Verbose $logMessage +} + +function Handle-Error { + param( + [System.Exception]$Exception, + [string]$Message = "" + ) + + $errorMessage = if ($Message) { $Message } else { $Exception.Message } + # $StackTrace is a built-in PowerShell automatic variable — use a + # different name so we don't shadow it. + $exceptionTrace = $Exception.ScriptStackTrace + Write-Log "ERROR: $errorMessage" "ERROR" + Write-Log "StackTrace: $exceptionTrace" "ERROR" +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index a3c484a..a34c8da 100644 --- a/package-lock.json +++ b/package-lock.json @@ -137,6 +137,7 @@ "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.3", @@ -1144,9 +1145,9 @@ } }, "node_modules/@eslint/config-array/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", "dev": true, "license": "MIT", "dependencies": { @@ -1225,9 +1226,9 @@ "license": "Python-2.0" }, "node_modules/@eslint/eslintrc/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", "dev": true, "license": "MIT", "dependencies": { @@ -2012,6 +2013,7 @@ "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", @@ -3073,6 +3075,7 @@ "integrity": "sha512-/NbVmcGTP+lj5oa4yiYxxeBjRivKQ5Ns1eSZeB99ExsEQ6rX5XYU1Zy/gGxY/ilqtD4Etx9mKyrPxZRetiahhA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.14.0" } @@ -3208,6 +3211,7 @@ "integrity": "sha512-6JSSaBZmsKvEkbRUkf7Zj7dru/8ZCrJxAqArcLaVMee5907JdtEbKGsZ7zNiIm/UAkpGUkaSMZEXShnN2D1HZA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.46.1", "@typescript-eslint/types": "8.46.1", @@ -3702,6 +3706,7 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -3863,6 +3868,19 @@ "node": ">= 8" } }, + "node_modules/anymatch/node_modules/picomatch": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -4105,9 +4123,9 @@ "license": "MIT" }, "node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz", + "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==", "dev": true, "license": "MIT", "dependencies": { @@ -4147,6 +4165,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.9", "caniuse-lite": "^1.0.30001746", @@ -4862,6 +4881,7 @@ "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "env-paths": "^2.2.1", "import-fresh": "^3.3.0", @@ -5473,6 +5493,7 @@ "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -5574,9 +5595,9 @@ } }, "node_modules/eslint/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", "dev": true, "license": "MIT", "dependencies": { @@ -5876,6 +5897,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -6672,6 +6694,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.14.tgz", "integrity": "sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -7248,6 +7271,7 @@ "integrity": "sha512-F26gjC0yWN8uAA5m5Ss8ZQf5nDHWGlN/xWZIh8S5SRbsEKBovwZhxGd6LJlbZYxBgCYOtreSUyb8hpXyGC5O4A==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@jest/core": "30.2.0", "@jest/types": "30.2.0", @@ -7875,19 +7899,6 @@ "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" } }, - "node_modules/jest-util/node_modules/picomatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, "node_modules/jest-validate": { "version": "30.2.0", "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-30.2.0.tgz", @@ -8373,6 +8384,7 @@ "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==", "dev": true, "license": "MIT", + "peer": true, "bin": { "marked": "bin/marked.js" }, @@ -8505,6 +8517,19 @@ "node": ">=8.6" } }, + "node_modules/micromatch/node_modules/picomatch": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/mime": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/mime/-/mime-4.1.0.tgz", @@ -10593,6 +10618,7 @@ "dev": true, "inBundle": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -11033,13 +11059,14 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", - "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", + "peer": true, "engines": { - "node": ">=8.6" + "node": ">=12" }, "funding": { "url": "https://github.com/sponsors/jonschlinkert" @@ -11650,6 +11677,7 @@ "integrity": "sha512-6qGjWccl5yoyugHt3jTgztJ9Y0JVzyH8/Voc/D8PlLat9pwxQYXz7W1Dpnq5h0/G5GCYGUaDSlYcyk3AMh5A6g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@semantic-release/commit-analyzer": "^13.0.1", "@semantic-release/error": "^4.0.0", @@ -13037,9 +13065,9 @@ } }, "node_modules/test-exclude/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", "dev": true, "license": "MIT", "dependencies": { @@ -13233,19 +13261,6 @@ } } }, - "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", @@ -13451,6 +13466,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -13964,6 +13980,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/src/analytics/optimization-storage.ts b/src/analytics/optimization-storage.ts new file mode 100644 index 0000000..81486d7 --- /dev/null +++ b/src/analytics/optimization-storage.ts @@ -0,0 +1,147 @@ +import Database from 'better-sqlite3'; +import { existsSync, mkdirSync } from 'fs'; +import { homedir } from 'os'; +import { dirname, join } from 'path'; +import { CompressionEngine } from '../core/compression-engine.js'; + +export interface OptimizationResult { + originalTextHash: string; + optimizedText: string; + originalTokens: number; + optimizedTokens: number; + tokensSaved: number; +} + +export function getDefaultOptimizationDbPath(): string { + return join(homedir(), '.token-optimizer', 'optimization.db'); +} + +export class SqliteOptimizationStorage { + private db: Database.Database | null = null; + private readonly dbPath: string; + private readonly compressionEngine: CompressionEngine; + + constructor(dbPath?: string) { + this.dbPath = dbPath ?? getDefaultOptimizationDbPath(); + this.compressionEngine = new CompressionEngine(); + } + + public initializeDatabase(): void { + const dir = dirname(this.dbPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + this.db = new Database(this.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.exec(` + CREATE TABLE IF NOT EXISTS optimization_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + original_text_hash TEXT NOT NULL UNIQUE, + optimized_text_compressed BLOB NOT NULL, + compression_algorithm TEXT NOT NULL, + original_tokens INTEGER NOT NULL, + optimized_tokens INTEGER NOT NULL, + tokens_saved INTEGER NOT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + CREATE INDEX IF NOT EXISTS idx_optimization_hash + ON optimization_results(original_text_hash); + `); + } + + private requireDb(): Database.Database { + if (!this.db) { + throw new Error('Optimization storage database is not initialized. Call initializeDatabase() first.'); + } + return this.db; + } + + public save(entry: OptimizationResult): void { + const db = this.requireDb(); + const compressed = this.compressionEngine.compress(entry.optimizedText); + + db.prepare( + `INSERT OR REPLACE INTO optimization_results + (original_text_hash, optimized_text_compressed, compression_algorithm, + original_tokens, optimized_tokens, tokens_saved) + VALUES (?, ?, ?, ?, ?, ?)` + ).run( + entry.originalTextHash, + compressed.compressed, + SqliteOptimizationStorage.COMPRESSION_ALGORITHM, + entry.originalTokens, + entry.optimizedTokens, + entry.tokensSaved + ); + } + + public get(originalTextHash: string): OptimizationResult | null { + const db = this.requireDb(); + const row = db.prepare( + `SELECT optimized_text_compressed, compression_algorithm, + original_tokens, optimized_tokens, tokens_saved + FROM optimization_results WHERE original_text_hash = ?` + ).get(originalTextHash) as + | { + optimized_text_compressed: Buffer; + compression_algorithm: string; + original_tokens: number; + optimized_tokens: number; + tokens_saved: number; + } + | undefined; + + if (!row) { + return null; + } + + return { + originalTextHash, + optimizedText: this.decodePayload( + row.optimized_text_compressed, + row.compression_algorithm + ), + originalTokens: row.original_tokens, + optimizedTokens: row.optimized_tokens, + tokensSaved: row.tokens_saved, + }; + } + + /** + * Decode a stored payload using the persisted algorithm label. Keeps + * the door open for additional algorithms (gzip, zstd) without + * touching the read path, and surfaces an explicit error for + * unknown labels instead of silently corrupting data. + */ + private decodePayload(buffer: Buffer, algorithm: string | null): string { + if (algorithm === 'brotli') { + return this.compressionEngine.decompress(buffer); + } + if (algorithm === 'none' || algorithm === '') { + return buffer.toString('utf8'); + } + if (algorithm === null || algorithm === undefined) { + // Legacy rows without a recorded algorithm: pre-tracking code + // always wrote brotli, but we still accept raw UTF-8 as a last + // resort so a one-off plaintext row doesn't poison reads. + try { + return this.compressionEngine.decompress(buffer); + } catch { + return buffer.toString('utf8'); + } + } + throw new Error( + `Unknown compression_algorithm in optimization_results: ${algorithm}` + ); + } + + /** Algorithm label paired with the current CompressionEngine. */ + public static readonly COMPRESSION_ALGORITHM = 'brotli'; + + public close(): void { + if (this.db) { + this.db.close(); + this.db = null; + } + } +} diff --git a/src/core/compression-engine.ts b/src/core/compression-engine.ts index 2be5b9e..b2daabb 100644 --- a/src/core/compression-engine.ts +++ b/src/core/compression-engine.ts @@ -1,167 +1,103 @@ import { brotliCompressSync, brotliDecompressSync, constants } from 'zlib'; export interface CompressionResult { - compressed: Buffer; - originalSize: number; - compressedSize: number; - ratio: number; - percentSaved: number; -} - -export interface CompressionOptions { - quality?: number; // 0-11, default 11 (max compression) - mode?: 'text' | 'font' | 'generic'; -} - -export class CompressionEngine { - private readonly DEFAULT_QUALITY = 11; - - /** - * Compress text using Brotli - */ - compress(text: string, options?: CompressionOptions): CompressionResult { - const buffer = Buffer.from(text, 'utf-8'); - const quality = options?.quality ?? this.DEFAULT_QUALITY; - const mode = this.getModeConstant(options?.mode); - - const compressed = brotliCompressSync(buffer, { - params: { - [constants.BROTLI_PARAM_QUALITY]: quality, - [constants.BROTLI_PARAM_MODE]: mode, - }, - }); - - const originalSize = buffer.length; - const compressedSize = compressed.length; - const ratio = originalSize > 0 ? compressedSize / originalSize : 0; - const percentSaved = - originalSize > 0 - ? ((originalSize - compressedSize) / originalSize) * 100 - : 0; - - return { - compressed, - originalSize, - compressedSize, - ratio, - percentSaved, - }; - } - - /** - * Decompress Brotli-compressed data - */ - decompress(compressed: Buffer): string { - const decompressed = brotliDecompressSync(compressed); - return decompressed.toString('utf-8'); - } - - /** - * Compress to base64 string (for easier storage) - */ - compressToBase64( - text: string, - options?: CompressionOptions - ): { - compressed: string; + compressed: Buffer; originalSize: number; compressedSize: number; ratio: number; percentSaved: number; - } { - const result = this.compress(text, options); - - return { - compressed: result.compressed.toString('base64'), - originalSize: result.originalSize, - compressedSize: result.compressedSize, - ratio: result.ratio, - percentSaved: result.percentSaved, - }; - } - - /** - * Decompress from base64 string - */ - decompressFromBase64(compressed: string): string { - const buffer = Buffer.from(compressed, 'base64'); - return this.decompress(buffer); - } +} - /** - * Check if compression would be beneficial - */ - shouldCompress(text: string, minSize: number = 1000): boolean { - // Don't compress small texts - overhead not worth it - if (text.length < minSize) { - return false; +export class CompressionEngine { + public compress(text: string, options?: { quality?: number; mode?: string; }): CompressionResult { + const originalSize = Buffer.byteLength(text, 'utf8'); + if (originalSize === 0) { + return { + compressed: Buffer.alloc(0), + originalSize: 0, + compressedSize: 0, + ratio: 0, + percentSaved: 0, + }; + } + + const params = { + [constants.BROTLI_PARAM_QUALITY]: options?.quality ?? constants.BROTLI_MAX_QUALITY, + [constants.BROTLI_PARAM_MODE]: options?.mode === 'text' ? constants.BROTLI_MODE_TEXT : constants.BROTLI_MODE_GENERIC, + }; + + const compressed = brotliCompressSync(text, { params }); + const compressedSize = compressed.length; + const ratio = compressedSize / originalSize; + const percentSaved = (1 - ratio) * 100; + + return { + compressed, + originalSize, + compressedSize, + ratio, + percentSaved, + }; } - // Quick sample compression to check ratio - const sample = text.slice(0, Math.min(text.length, 5000)); - const result = this.compress(sample, { quality: 4 }); // Use lower quality for quick test + public decompress(buffer: Buffer): string { + if (!buffer || buffer.length === 0) { + return ''; + } + return brotliDecompressSync(buffer).toString('utf8'); + } - // Only compress if we get at least 20% reduction - return result.percentSaved >= 20; - } + public compressToBase64(text: string, options?: { quality?: number; mode?: string; }): Omit & { compressed: string } { + const result = this.compress(text, options); + return { + originalSize: result.originalSize, + compressedSize: result.compressedSize, + ratio: result.ratio, + percentSaved: result.percentSaved, + compressed: result.compressed.toString('base64'), + }; + } - /** - * Batch compress multiple texts - */ - compressBatch( - texts: string[], - options?: CompressionOptions - ): Array<{ - index: number; - compressed: Buffer; - originalSize: number; - compressedSize: number; - ratio: number; - }> { - return texts.map((text, index) => { - const result = this.compress(text, options); - return { - index, - compressed: result.compressed, - originalSize: result.originalSize, - compressedSize: result.compressedSize, - ratio: result.ratio, - }; - }); - } + public decompressFromBase64(base64: string): string { + const buffer = Buffer.from(base64, 'base64'); + return this.decompress(buffer); + } - /** - * Get compression statistics for text - */ - getCompressionStats(text: string): { - uncompressed: number; - compressed: number; - ratio: number; - percentSaved: number; - recommended: boolean; - } { - const result = this.compress(text); + public compressBatch(texts: string[]): (CompressionResult & { index: number; })[] { + return texts.map((text, index) => ({ + ...this.compress(text), + index, + })); + } - return { - uncompressed: result.originalSize, - compressed: result.compressedSize, - ratio: result.ratio, - percentSaved: result.percentSaved, - recommended: this.shouldCompress(text), - }; - } + public shouldCompress(text: string, minSize: number = CompressionEngine.DEFAULT_MIN_SIZE_BYTES): boolean { + if (Buffer.byteLength(text, 'utf8') < minSize) { + return false; + } + const stats = this.getCompressionStats(text, minSize); + return stats.percentSaved >= 20; + } - /** - * Convert mode string to Brotli constant - */ - private getModeConstant(mode?: 'text' | 'font' | 'generic'): number { - switch (mode) { - case 'text': - return constants.BROTLI_MODE_TEXT; - case 'font': - return constants.BROTLI_MODE_FONT; - default: - return constants.BROTLI_MODE_GENERIC; + public getCompressionStats( + text: string, + minSize: number = CompressionEngine.DEFAULT_MIN_SIZE_BYTES + ): { uncompressed: number; compressed: number; ratio: number; percentSaved: number; recommended: boolean; } { + const result = this.compress(text); + const recommended = result.originalSize >= minSize && result.percentSaved >= 20; + return { + uncompressed: result.originalSize, + compressed: result.compressedSize, + ratio: result.ratio, + percentSaved: result.percentSaved, + recommended: recommended, + }; } - } + + /** + * Default minimum size (in bytes) below which compression isn't + * worth the metadata overhead. Exposed as a static so callers can + * override via OptimizationConfig.minOutputSizeBytes and have + * `recommended` / `shouldCompress` agree on the threshold. + */ + public static DEFAULT_MIN_SIZE_BYTES = 500; } diff --git a/src/core/config.ts b/src/core/config.ts index d5cd01c..f684c0a 100644 --- a/src/core/config.ts +++ b/src/core/config.ts @@ -2,10 +2,39 @@ * Configuration management for Hypercontext MCP */ -import { HypercontextConfig } from './types.js'; -import { readFileSync, existsSync } from 'fs'; +import { z } from 'zod'; +import { HypercontextConfig, OptimizationConfig } from './types.js'; +import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'; import { homedir } from 'os'; -import { join } from 'path'; +import { dirname, join } from 'path'; + +const DEFAULT_OPTIMIZATION: OptimizationConfig = { + compressionTokenThreshold: 0.7, + compressionPreserveThreshold: 0.3, + minTokensBeforeCompression: 1000, + modelTokenLimits: { + 'gpt-4': 128000, + 'gpt-4-turbo': 128000, + 'gpt-3.5-turbo': 16385, + 'claude-3-opus': 200000, + 'claude-3-sonnet': 200000, + 'claude-3-haiku': 200000, + 'claude-opus-4-7': 1000000, + 'claude-sonnet-4-6': 1000000, + 'gemini-1.5-pro': 2000000, + 'gemini-2.5-flash': 1000000, + }, + minOutputSizeBytes: 500, + quality: 'balanced', + cacheSettings: { + maxSize: 1000, + ttlSeconds: 3600, + }, + chatCompression: { + enabled: true, + strategy: 'summarize', + }, +}; const DEFAULT_CONFIG: HypercontextConfig = { cache: { @@ -38,18 +67,119 @@ const DEFAULT_CONFIG: HypercontextConfig = { streamingThreshold: 1024 * 1024, // 1MB enableStreaming: false, }, + optimization: DEFAULT_OPTIMIZATION, }; +const CacheSettingsSchema = z.object({ + maxSize: z.number().int().positive(), + ttlSeconds: z.number().int().nonnegative(), +}); + +const ChatCompressionSchema = z.object({ + enabled: z.boolean(), + tokenLimit: z.number().int().positive().optional(), + strategy: z.enum(['summarize', 'truncate']), +}); + +const OptimizationConfigSchema = z.object({ + compressionTokenThreshold: z.number().min(0).max(1), + compressionPreserveThreshold: z.number().min(0).max(1), + minTokensBeforeCompression: z.number().int().nonnegative(), + modelTokenLimits: z.record(z.string(), z.number().int().positive()), + minOutputSizeBytes: z.number().int().nonnegative(), + quality: z.enum(['fast', 'balanced', 'max']), + cacheSettings: CacheSettingsSchema, + chatCompression: ChatCompressionSchema, +}); + +/** + * User-supplied optimization schema. Partial at every depth so users can + * override just one field (e.g. `{ cacheSettings: { maxSize: 42 } }`) + * without having to re-supply the entire sub-object. + */ +const OptimizationConfigUserSchema = OptimizationConfigSchema.partial().extend({ + cacheSettings: CacheSettingsSchema.partial().optional(), + chatCompression: ChatCompressionSchema.partial().optional(), +}); + +const HypercontextConfigSchema = z + .object({ + cache: z + .object({ + enabled: z.boolean(), + maxSizeMB: z.number().int().positive(), + defaultTTL: z.number().int().nonnegative(), + ttlByType: z.record(z.string(), z.number().int().nonnegative()), + compression: z.enum(['none', 'gzip', 'brotli', 'auto']), + }) + .partial() + .optional(), + monitoring: z + .object({ + enabled: z.boolean(), + detailedLogging: z.boolean(), + metricsRetentionDays: z.number().int().nonnegative(), + dashboardPort: z.number().int().positive(), + enableWebUI: z.boolean(), + }) + .partial() + .optional(), + intelligence: z + .object({ + enablePatternDetection: z.boolean(), + enableWorkflowLearning: z.boolean(), + enablePredictiveCaching: z.boolean(), + mlModelPath: z.string(), + }) + .partial() + .optional(), + performance: z + .object({ + maxConcurrentOps: z.number().int().positive(), + streamingThreshold: z.number().int().positive(), + enableStreaming: z.boolean(), + }) + .partial() + .optional(), + optimization: OptimizationConfigUserSchema.optional(), + }) + .passthrough(); + export class ConfigManager { private config: HypercontextConfig; private configPath: string; - constructor(configPath?: string) { + constructor(configPath?: string, options: { writeDefaults?: boolean } = {}) { this.configPath = - configPath || join(homedir(), '.hypercontext', 'config.json'); + configPath || join(homedir(), '.token-optimizer', 'config.json'); + const writeDefaults = options.writeDefaults ?? true; + if (writeDefaults && !existsSync(this.configPath)) { + this.writeDefaultConfig(); + } this.config = this.loadConfig(); } + /** + * Write DEFAULT_CONFIG to configPath on first run — addresses #120's + * "Default config created on first run" acceptance criterion. + * Errors are logged and non-fatal; callers still get an in-memory + * DEFAULT_CONFIG via loadConfig(). + */ + private writeDefaultConfig(): void { + try { + const dir = dirname(this.configPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + writeFileSync(this.configPath, JSON.stringify(DEFAULT_CONFIG, null, 2)); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn( + `ConfigManager: failed to write default config to ${this.configPath}: ${message}` + ); + } + } + private loadConfig(): HypercontextConfig { if (!existsSync(this.configPath)) { return DEFAULT_CONFIG; @@ -57,26 +187,80 @@ export class ConfigManager { try { const fileContent = readFileSync(this.configPath, 'utf-8'); - const userConfig = JSON.parse(fileContent); - return this.mergeConfig(DEFAULT_CONFIG, userConfig); + const rawUserConfig = JSON.parse(fileContent); + const parsed = HypercontextConfigSchema.safeParse(rawUserConfig); + if (!parsed.success) { + const issues = parsed.error.issues + .map((i) => ` - ${i.path.join('.') || 'root'}: ${i.message}`) + .join('\n'); + console.warn( + `Invalid config at ${this.configPath}, using defaults:\n${issues}` + ); + return DEFAULT_CONFIG; + } + return this.mergeConfig(DEFAULT_CONFIG, parsed.data); } catch (error) { - console.warn('Failed to load config, using defaults:', error); + const message = error instanceof Error ? error.message : String(error); + console.warn(`Failed to load config, using defaults: ${message}`); return DEFAULT_CONFIG; } } private mergeConfig( defaults: HypercontextConfig, - user: Partial + user: { + cache?: Partial; + monitoring?: Partial; + intelligence?: Partial; + performance?: Partial; + optimization?: Partial< + Omit + > & { + cacheSettings?: Partial; + chatCompression?: Partial; + }; + } ): HypercontextConfig { + const userOpt = user.optimization ?? {}; + // Preserve any existing optimization state the caller may have set + // (e.g. via prior update()) instead of always starting from + // DEFAULT_OPTIMIZATION. Non-optimization updates should no longer + // silently reset the entire optimization block. + const baseOptimization = defaults.optimization ?? DEFAULT_OPTIMIZATION; return { cache: { ...defaults.cache, ...user.cache }, monitoring: { ...defaults.monitoring, ...user.monitoring }, intelligence: { ...defaults.intelligence, ...user.intelligence }, performance: { ...defaults.performance, ...user.performance }, + optimization: { + ...baseOptimization, + ...userOpt, + cacheSettings: { + ...baseOptimization.cacheSettings, + ...(userOpt.cacheSettings ?? {}), + }, + chatCompression: { + ...baseOptimization.chatCompression, + ...(userOpt.chatCompression ?? {}), + }, + // Deep-merge model token limits so a user override like + // { "custom-model": 500_000 } does not drop the built-in map. + modelTokenLimits: { + ...baseOptimization.modelTokenLimits, + ...(userOpt.modelTokenLimits ?? {}), + }, + }, }; } + public getOptimizationConfig(): OptimizationConfig { + return this.config.optimization ?? DEFAULT_OPTIMIZATION; + } + + public getModelTokenLimit(modelName: string): number | undefined { + return this.getOptimizationConfig().modelTokenLimits[modelName]; + } + get(): HypercontextConfig { return { ...this.config }; } diff --git a/src/core/session-manager.ts b/src/core/session-manager.ts new file mode 100644 index 0000000..30df98f --- /dev/null +++ b/src/core/session-manager.ts @@ -0,0 +1,282 @@ +import { existsSync } from 'fs'; +import { z } from 'zod'; +import { + Session, + SessionOptions, + MessageRole, +} from './session.js'; +import { ITokenizer } from './tokenizers/i-tokenizer.js'; +import { ISummarizer } from './summarization.js'; +import { loadMaybeGzippedFile, saveGzippedFile } from '../utils/gzip.js'; + +/** + * Persistent SessionManager — addresses issues #121 / #122. + * + * Production behaviors added after the audit: + * - Atomic persistence: write to .tmp then rename so a crash mid- + * write never produces a corrupt sessions.json. + * - Debounced persistence: rapid addMessage calls coalesce into one + * disk write per PERSIST_DEBOUNCE_MS window. + * - Error-isolated persist(): a disk-full or permission error is logged + * and never bubbles up to crash the MCP server. + * - Schema-validated load(): malformed persisted state is rejected with + * a warning instead of being cast blindly. + * - Size / expiry caps: sessions inactive past `sessionTtlMs` are + * evicted on load, and no individual file state entry can exceed + * `maxFileStateBytes`. + */ + +const PERSIST_DEBOUNCE_MS = 250; +const DEFAULT_SESSION_TTL_MS = 30 * 24 * 60 * 60 * 1000; // 30 days +const DEFAULT_MAX_FILE_STATE_BYTES = 10 * 1024 * 1024; // 10 MB per file + +const MessageSchema = z.object({ + role: z.enum(['system', 'user', 'assistant', 'tool']), + content: z.string(), + timestamp: z.number(), +}); + +const SessionSnapshotSchema = z.object({ + id: z.string(), + history: z.array(MessageSchema), + fileState: z.record(z.string(), z.string()), + maxTokens: z.number(), + createdAt: z.number(), + updatedAt: z.number(), +}); + +const PersistedStateSchema = z.object({ + sessions: z.array(SessionSnapshotSchema), +}); + +export interface SessionManagerOptions { + persistencePath?: string; + tokenizer?: ITokenizer; + summarizer?: ISummarizer; + defaultMaxTokens?: number; + sessionTtlMs?: number; + maxFileStateBytes?: number; +} + +export class SessionManager { + private readonly sessions = new Map(); + private readonly persistencePath: string | null; + private readonly tokenizer: ITokenizer | undefined; + private readonly summarizer: ISummarizer | undefined; + private readonly defaultMaxTokens: number | undefined; + private readonly sessionTtlMs: number; + private readonly maxFileStateBytes: number; + private pendingPersistTimer: NodeJS.Timeout | null = null; + private persistInFlight = false; + + constructor(options: SessionManagerOptions = {}) { + this.persistencePath = options.persistencePath ?? null; + this.tokenizer = options.tokenizer; + this.summarizer = options.summarizer; + this.defaultMaxTokens = options.defaultMaxTokens; + this.sessionTtlMs = options.sessionTtlMs ?? DEFAULT_SESSION_TTL_MS; + this.maxFileStateBytes = + options.maxFileStateBytes ?? DEFAULT_MAX_FILE_STATE_BYTES; + if ( + this.persistencePath && + (existsSync(`${this.persistencePath}.gz`) || + existsSync(this.persistencePath)) + ) { + this.load(); + } + } + + public createSession(options: SessionOptions = {}): Session { + const session = new Session({ + tokenizer: this.tokenizer, + summarizer: this.summarizer, + maxTokens: options.maxTokens ?? this.defaultMaxTokens, + ...options, + }); + this.sessions.set(session.id, session); + this.schedulePersist(); + return session; + } + + public getSession(id: string): Session | undefined { + return this.sessions.get(id); + } + + public listSessions(): Session[] { + return Array.from(this.sessions.values()); + } + + public deleteSession(id: string): boolean { + const removed = this.sessions.delete(id); + if (removed) { + this.schedulePersist(); + } + return removed; + } + + public async addMessage( + sessionId: string, + role: MessageRole, + content: string + ): Promise { + const session = this.requireSession(sessionId); + session.addMessage(role, content); + // Schedule persistence in `finally` so the mutated session still + // hits disk even if tokenization or compression throws. Without + // this, a single tokenizer error leaves the message appended + // in memory but never persisted, and a restart loses the turn. + try { + const currentTokens = await session.getHistoryTokenCount(); + if (currentTokens > session.maxTokens) { + return await session.compressHistory(); + } + return currentTokens; + } finally { + this.schedulePersist(); + } + } + + /** Fetch an existing session, or create one with the given id. */ + public getOrCreateSession(id: string): Session { + const existing = this.sessions.get(id); + if (existing) { + return existing; + } + return this.createSession({ id }); + } + + public updateFileState( + sessionId: string, + filePath: string, + content: string + ): void { + const session = this.requireSession(sessionId); + if (Buffer.byteLength(content, 'utf8') > this.maxFileStateBytes) { + throw new Error( + `Session file state content exceeds ${this.maxFileStateBytes} bytes for ${filePath}` + ); + } + session.setFileContent(filePath, content); + this.schedulePersist(); + } + + public clearFileState(sessionId: string, filePath: string): void { + const session = this.requireSession(sessionId); + session.clearFileContent(filePath); + this.schedulePersist(); + } + + /** + * Flush any pending debounced persist. Call this from the host's + * shutdown handler so the last writes survive. + */ + public async flush(): Promise { + if (this.pendingPersistTimer) { + clearTimeout(this.pendingPersistTimer); + this.pendingPersistTimer = null; + } + this.persistNow(); + } + + private requireSession(id: string): Session { + const session = this.sessions.get(id); + if (!session) { + throw new Error(`Unknown session: ${id}`); + } + return session; + } + + private schedulePersist(): void { + if (!this.persistencePath) { + return; + } + if (this.pendingPersistTimer) { + return; + } + this.pendingPersistTimer = setTimeout(() => { + this.pendingPersistTimer = null; + this.persistNow(); + }, PERSIST_DEBOUNCE_MS); + // Don't keep the event loop alive just for persistence. + if (typeof this.pendingPersistTimer.unref === 'function') { + this.pendingPersistTimer.unref(); + } + } + + private persistNow(): void { + if (!this.persistencePath || this.persistInFlight) { + return; + } + this.persistInFlight = true; + try { + const state = { + sessions: this.listSessions().map((s) => s.toSnapshot()), + }; + // Gzip + atomic tmp + rename (handled inside saveGzippedFile). + saveGzippedFile( + this.persistencePath, + JSON.stringify(state, null, 2) + ); + } catch (error) { + const message = + error instanceof Error ? error.message : String(error); + console.warn( + `SessionManager: failed to persist to ${this.persistencePath}: ${message}` + ); + } finally { + this.persistInFlight = false; + } + } + + private load(): void { + if (!this.persistencePath) { + return; + } + try { + const raw = loadMaybeGzippedFile(this.persistencePath); + if (raw === null) { + return; + } + const json = JSON.parse(raw); + const parsed = PersistedStateSchema.safeParse(json); + if (!parsed.success) { + console.warn( + `SessionManager: invalid persisted state at ${this.persistencePath}, discarding.` + ); + return; + } + const now = Date.now(); + for (const snapshot of parsed.data.sessions) { + if (now - snapshot.updatedAt > this.sessionTtlMs) { + continue; // Expired session — drop. + } + // Enforce the same per-file size cap on restore that + // updateFileState enforces on writes; otherwise a + // tampered or legacy persisted file can smuggle in + // oversized entries past the live guardrail. + const maxBytes = this.maxFileStateBytes; + const sanitizedFileState: Record = {}; + for (const [filePath, content] of Object.entries(snapshot.fileState)) { + if (Buffer.byteLength(content, 'utf8') <= maxBytes) { + sanitizedFileState[filePath] = content; + } + } + const safeSnapshot = { + ...snapshot, + fileState: sanitizedFileState, + }; + const session = Session.fromSnapshot(safeSnapshot, { + tokenizer: this.tokenizer, + summarizer: this.summarizer, + }); + this.sessions.set(session.id, session); + } + } catch (error) { + const message = + error instanceof Error ? error.message : String(error); + console.warn( + `SessionManager: failed to load sessions from ${this.persistencePath}: ${message}` + ); + } + } +} diff --git a/src/core/session.ts b/src/core/session.ts new file mode 100644 index 0000000..5dd629d --- /dev/null +++ b/src/core/session.ts @@ -0,0 +1,210 @@ +import { randomUUID } from 'crypto'; +import { ITokenizer } from './tokenizers/i-tokenizer.js'; +import { ISummarizer, TruncatingSummarizer } from './summarization.js'; + +/** + * Session state — addresses issues #121 and #122. + * + * A Session holds a single user's conversation history plus a per-file + * content snapshot. The history is token-budgeted (see #121) and the file + * snapshots feed context-delta tracking (#122). + */ + +export type MessageRole = 'system' | 'user' | 'assistant' | 'tool'; + +export interface Message { + role: MessageRole; + content: string; + timestamp: number; +} + +export interface SessionFileState { + [filePath: string]: string; +} + +export interface SessionSnapshot { + id: string; + history: Message[]; + fileState: SessionFileState; + maxTokens: number; + createdAt: number; + updatedAt: number; +} + +export interface SessionOptions { + id?: string; + maxTokens?: number; + preserveTailRatio?: number; + tokenizer?: ITokenizer; + summarizer?: ISummarizer; + /** + * When true, getHistoryTokenCount may fall back to a character/4 + * heuristic if no tokenizer is supplied. Production code should + * always pass a real tokenizer and leave this false (the default). + */ + allowCharHeuristic?: boolean; + /** Override for createdAt — used by fromSnapshot. */ + createdAt?: number; + /** Override for updatedAt — used by fromSnapshot. */ + updatedAt?: number; +} + +const DEFAULT_MAX_TOKENS = 100_000; +const DEFAULT_PRESERVE_TAIL_RATIO = 0.3; +const CHAR_HEURISTIC_RATIO = 4; + +export class Session { + public readonly id: string; + public maxTokens: number; + public readonly createdAt: number; + public updatedAt: number; + + private history: Message[] = []; + private fileState: SessionFileState = {}; + private readonly preserveTailRatio: number; + private readonly tokenizer: ITokenizer | null; + private readonly summarizer: ISummarizer; + private readonly allowCharHeuristic: boolean; + + constructor(options: SessionOptions = {}) { + this.id = options.id ?? randomUUID(); + this.maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS; + this.preserveTailRatio = options.preserveTailRatio ?? DEFAULT_PRESERVE_TAIL_RATIO; + this.tokenizer = options.tokenizer ?? null; + this.summarizer = options.summarizer ?? new TruncatingSummarizer(); + this.allowCharHeuristic = options.allowCharHeuristic ?? false; + const now = Date.now(); + this.createdAt = options.createdAt ?? now; + this.updatedAt = options.updatedAt ?? this.createdAt; + } + + public addMessage(role: MessageRole, content: string): Message { + const message: Message = { role, content, timestamp: Date.now() }; + this.history.push(message); + this.updatedAt = message.timestamp; + return message; + } + + public getHistory(): readonly Message[] { + // Defensive copy so external mutation (push/splice/in-place + // edit) can't bypass updatedAt tracking or corrupt the history. + return this.history.map((message) => ({ ...message })); + } + + public getFileState(): Readonly { + return { ...this.fileState }; + } + + public getFileContent(filePath: string): string | undefined { + return this.fileState[filePath]; + } + + public setFileContent(filePath: string, content: string): void { + this.fileState[filePath] = content; + this.updatedAt = Date.now(); + } + + public clearFileContent(filePath: string): void { + if (filePath in this.fileState) { + delete this.fileState[filePath]; + this.updatedAt = Date.now(); + } + } + + /** + * Total token count of the current history. + * + * Requires a tokenizer unless the caller opted into the character/4 + * heuristic via `allowCharHeuristic: true`. We default to requiring a + * tokenizer because #124's whole point is eliminating char/4. + */ + public async getHistoryTokenCount(): Promise { + if (!this.tokenizer) { + if (!this.allowCharHeuristic) { + throw new Error( + 'Session.getHistoryTokenCount requires a tokenizer. ' + + 'Construct the Session with TokenizerFactory.create(...) ' + + 'or pass allowCharHeuristic: true to opt into the fallback.' + ); + } + return this.history.reduce( + (acc, m) => acc + Math.ceil(m.content.length / CHAR_HEURISTIC_RATIO), + 0 + ); + } + let total = 0; + for (const message of this.history) { + total += await this.tokenizer.countTokens(message.content); + } + return total; + } + + /** + * Compress the history by summarizing everything except the + * preserve-tail fraction. Does nothing if history fits under maxTokens. + * + * Returns the new token count after compression. + */ + public async compressHistory(): Promise { + const currentTokens = await this.getHistoryTokenCount(); + if (currentTokens <= this.maxTokens) { + return currentTokens; + } + if (this.history.length <= 1) { + return currentTokens; + } + + const preserveCount = Math.max( + 1, + Math.floor(this.history.length * this.preserveTailRatio) + ); + const tail = this.history.slice(-preserveCount); + const head = this.history.slice(0, -preserveCount); + if (head.length === 0) { + return currentTokens; + } + + const summary = await this.summarizer.summarize(head); + // Store summaries as `assistant`, not `system` — a user turn + // can contain prompt-injection text, and promoting it into a + // system-role message after compression would let that text + // act as a higher-priority instruction. Assistant role keeps + // the context without the privilege escalation. + const summaryMessage: Message = { + role: 'assistant', + content: `[summary of earlier conversation] ${summary}`, + timestamp: head[head.length - 1].timestamp, + }; + + this.history = [summaryMessage, ...tail]; + this.updatedAt = Date.now(); + return this.getHistoryTokenCount(); + } + + public toSnapshot(): SessionSnapshot { + return { + id: this.id, + history: this.history.map((message) => ({ ...message })), + fileState: { ...this.fileState }, + maxTokens: this.maxTokens, + createdAt: this.createdAt, + updatedAt: this.updatedAt, + }; + } + + public static fromSnapshot( + snapshot: SessionSnapshot, + options: Omit = {} + ): Session { + const session = new Session({ + id: snapshot.id, + maxTokens: snapshot.maxTokens, + createdAt: snapshot.createdAt, + updatedAt: snapshot.updatedAt, + ...options, + }); + session.history = snapshot.history.map((message) => ({ ...message })); + session.fileState = { ...snapshot.fileState }; + return session; + } +} diff --git a/src/core/summarization.ts b/src/core/summarization.ts new file mode 100644 index 0000000..b68ec4b --- /dev/null +++ b/src/core/summarization.ts @@ -0,0 +1,288 @@ +import { Message } from './session.js'; + +/** + * Pluggable summarization — part of issue #121. + * + * An ISummarizer implementation takes a list of Messages and returns a + * natural-language summary. We ship three implementations out of the box: + * + * - TruncatingSummarizer — self-contained, zero deps. Concatenates + * role:content and trims to `maxChars`. Useful for tests and for + * users who don't want to hand a foundation model every + * conversation turn. + * - AnthropicSummarizer — calls /v1/messages on api.anthropic.com. + * Needs ANTHROPIC_API_KEY. Used when the host wires it up. + * - GoogleAISummarizer — calls generativelanguage.googleapis.com. + * Needs GOOGLE_AI_API_KEY. + * + * Selection lives in `createSummarizerFromEnv()` below — the server + * picks the highest-fidelity summarizer whose credentials are available + * and falls back to TruncatingSummarizer otherwise. + */ + +const SUMMARY_SYSTEM_PROMPT = + 'You are summarizing the early portion of a conversation so the rest can continue without the full history in context. ' + + 'Produce a concise summary (at most ~300 tokens) that preserves decisions made, outstanding TODOs, and any concrete facts the assistant has already told the user. ' + + 'Do not address the user directly; write in third person.'; + +export interface ISummarizer { + summarize(messages: readonly Message[]): Promise; +} + +export interface TruncatingSummarizerOptions { + /** Approximate maximum characters of summary output. Default: 2000. */ + maxChars?: number; +} + +const TRUNCATION_MARKER = '\n... [truncated] ...\n'; +const MIN_MAX_CHARS = 32; + +export class TruncatingSummarizer implements ISummarizer { + private readonly maxChars: number; + + constructor(options: TruncatingSummarizerOptions = {}) { + const maxChars = options.maxChars ?? 2000; + if (!Number.isFinite(maxChars) || maxChars < MIN_MAX_CHARS) { + throw new Error( + `TruncatingSummarizer.maxChars must be >= ${MIN_MAX_CHARS}, got ${maxChars}` + ); + } + this.maxChars = maxChars; + } + + public async summarize(messages: readonly Message[]): Promise { + if (messages.length === 0) { + return ''; + } + + const joined = messages + .map((m) => `${m.role}: ${m.content}`) + .join('\n'); + + if (joined.length <= this.maxChars) { + return joined; + } + + // Budget excludes the marker length so the final string never + // exceeds maxChars — the previous `-20` was a guess that + // didn't match the marker exactly and produced unpredictable + // output for small limits. + const budget = Math.max(0, this.maxChars - TRUNCATION_MARKER.length); + const keepHead = Math.floor(budget * 0.4); + const keepTail = budget - keepHead; + return ( + joined.slice(0, keepHead) + + TRUNCATION_MARKER + + joined.slice(-keepTail) + ); + } +} + +// ============================================================================ +// Anthropic-backed summarizer +// ============================================================================ + +const ANTHROPIC_ENDPOINT = 'https://api.anthropic.com/v1/messages'; +const ANTHROPIC_DEFAULT_MODEL = 'claude-haiku-4-5-20251001'; +const ANTHROPIC_API_VERSION = '2023-06-01'; +const SUMMARIZER_TIMEOUT_MS = 30_000; +const SUMMARIZER_MAX_TOKENS = 1024; + +export interface AnthropicSummarizerOptions { + apiKey?: string; + model?: string; + endpoint?: string; + timeoutMs?: number; +} + +export class AnthropicSummarizer implements ISummarizer { + private readonly apiKey: string; + private readonly model: string; + private readonly endpoint: string; + private readonly timeoutMs: number; + + constructor(options: AnthropicSummarizerOptions = {}) { + const apiKey = options.apiKey ?? process.env.ANTHROPIC_API_KEY; + if (!apiKey) { + throw new Error( + 'AnthropicSummarizer requires ANTHROPIC_API_KEY (or apiKey option).' + ); + } + this.apiKey = apiKey; + this.model = options.model ?? ANTHROPIC_DEFAULT_MODEL; + this.endpoint = options.endpoint ?? ANTHROPIC_ENDPOINT; + this.timeoutMs = options.timeoutMs ?? SUMMARIZER_TIMEOUT_MS; + } + + public async summarize(messages: readonly Message[]): Promise { + if (messages.length === 0) { + return ''; + } + const userContent = messages + .map((m) => `${m.role}: ${m.content}`) + .join('\n'); + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), this.timeoutMs); + + try { + const response = await fetch(this.endpoint, { + method: 'POST', + headers: { + 'content-type': 'application/json', + 'x-api-key': this.apiKey, + 'anthropic-version': ANTHROPIC_API_VERSION, + }, + body: JSON.stringify({ + model: this.model, + max_tokens: SUMMARIZER_MAX_TOKENS, + system: SUMMARY_SYSTEM_PROMPT, + messages: [ + { role: 'user', content: userContent.slice(0, 200_000) }, + ], + }), + signal: controller.signal, + }); + + if (!response.ok) { + // Deliberately omit the response body — it can echo + // user prompt content and we don't want that leaking + // into log pipelines via thrown errors. + throw new Error( + `Anthropic summarize failed: ${response.status} ${response.statusText}` + ); + } + + const data = (await response.json()) as { + content?: Array<{ type: string; text?: string }>; + }; + const text = + data.content + ?.filter((c) => c.type === 'text' && typeof c.text === 'string') + .map((c) => c.text ?? '') + .join('\n') + .trim() ?? ''; + return text; + } finally { + clearTimeout(timeout); + } + } +} + +// ============================================================================ +// Google AI-backed summarizer +// ============================================================================ + +const GOOGLE_AI_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models'; +const GOOGLE_AI_DEFAULT_MODEL = 'gemini-2.5-flash'; + +export interface GoogleAISummarizerOptions { + apiKey?: string; + model?: string; + endpoint?: string; + timeoutMs?: number; +} + +export class GoogleAISummarizer implements ISummarizer { + private readonly apiKey: string; + private readonly model: string; + private readonly endpoint: string; + private readonly timeoutMs: number; + + constructor(options: GoogleAISummarizerOptions = {}) { + const apiKey = options.apiKey ?? process.env.GOOGLE_AI_API_KEY; + if (!apiKey) { + throw new Error( + 'GoogleAISummarizer requires GOOGLE_AI_API_KEY (or apiKey option).' + ); + } + this.apiKey = apiKey; + this.model = options.model ?? GOOGLE_AI_DEFAULT_MODEL; + this.endpoint = options.endpoint ?? GOOGLE_AI_ENDPOINT; + this.timeoutMs = options.timeoutMs ?? SUMMARIZER_TIMEOUT_MS; + } + + public async summarize(messages: readonly Message[]): Promise { + if (messages.length === 0) { + return ''; + } + const joined = messages + .map((m) => `${m.role}: ${m.content}`) + .join('\n'); + + const url = `${this.endpoint}/${encodeURIComponent(this.model)}:generateContent?key=${encodeURIComponent(this.apiKey)}`; + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), this.timeoutMs); + + try { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + systemInstruction: { parts: [{ text: SUMMARY_SYSTEM_PROMPT }] }, + contents: [ + { + role: 'user', + parts: [{ text: joined.slice(0, 200_000) }], + }, + ], + generationConfig: { maxOutputTokens: SUMMARIZER_MAX_TOKENS }, + }), + signal: controller.signal, + }); + + if (!response.ok) { + // See AnthropicSummarizer — no body in the thrown error. + throw new Error( + `Google AI summarize failed: ${response.status} ${response.statusText}` + ); + } + + const data = (await response.json()) as { + candidates?: Array<{ + content?: { parts?: Array<{ text?: string }> }; + }>; + }; + const text = + data.candidates?.[0]?.content?.parts + ?.map((p) => p.text ?? '') + .join('\n') + .trim() ?? ''; + return text; + } finally { + clearTimeout(timeout); + } + } +} + +// ============================================================================ +// Factory +// ============================================================================ + +/** + * Pick an ISummarizer based on available credentials: + * 1. ANTHROPIC_API_KEY → AnthropicSummarizer + * 2. GOOGLE_AI_API_KEY → GoogleAISummarizer + * 3. fallback → TruncatingSummarizer (no network, no key) + * + * Anthropic sits first because this project is Claude-adjacent; users + * who prefer Gemini can either unset ANTHROPIC_API_KEY or construct + * GoogleAISummarizer directly. + */ +export function createSummarizerFromEnv(): ISummarizer { + if (process.env.ANTHROPIC_API_KEY) { + try { + return new AnthropicSummarizer(); + } catch { + // Fall through to next option. + } + } + if (process.env.GOOGLE_AI_API_KEY) { + try { + return new GoogleAISummarizer(); + } catch { + // Fall through. + } + } + return new TruncatingSummarizer(); +} diff --git a/src/core/token-counter.ts b/src/core/token-counter.ts index d4bd471..80dd895 100644 --- a/src/core/token-counter.ts +++ b/src/core/token-counter.ts @@ -1,4 +1,7 @@ import { encoding_for_model, Tiktoken } from 'tiktoken'; +import { TokenizerFactory } from './tokenizers/tokenizer-factory.js'; +import { ITokenizer } from './tokenizers/i-tokenizer.js'; +import { TiktokenTokenizer } from './tokenizers/tiktoken-tokenizer.js'; export interface TokenCountResult { tokens: number; @@ -6,114 +9,95 @@ export interface TokenCountResult { estimatedCost?: number; } +/** + * TokenCounter — delegates tokenization to the pluggable + * TokenizerFactory from issue #124 while preserving the callable + * surface (`count`, `countBatch`, `estimate`, `calculateSavings`, + * `calculateCacheSavings`, `exceedsLimit`, `truncate`, + * `getTokenCharRatio`, `free`) the rest of the codebase relies on. + * + * Truncation still uses a local tiktoken encoder because the + * ITokenizer contract doesn't expose the raw token array — we + * keep one for GPT-4-family models and otherwise degrade to + * character-based truncation. + */ export class TokenCounter { - private encoder: Tiktoken; - private readonly model: string; + private readonly tokenizer: ITokenizer; + private readonly encoder: Tiktoken | null; + public readonly model: string; constructor(model?: string) { - // Auto-detect model from environment or use provided model - // Claude Code sets CLAUDE_MODEL env var with the active model - // Falls back to GPT-4 as universal approximation this.model = model || process.env.CLAUDE_MODEL || process.env.ANTHROPIC_MODEL || + process.env.OPENAI_MODEL || + process.env.GOOGLE_AI_MODEL || 'gpt-4'; - // Map Claude models to closest tiktoken equivalent - // Claude uses similar tokenization to GPT-4, so it's a good approximation - const tokenModel = this.mapToTiktokenModel(this.model); - - // Initialize tiktoken encoder - this.encoder = encoding_for_model(tokenModel); - } - - /** - * Map Claude/Anthropic models to tiktoken model names - */ - private mapToTiktokenModel(model: string): 'gpt-4' | 'gpt-3.5-turbo' { - const lowerModel = model.toLowerCase(); - - // Claude models use GPT-4 tokenizer as closest approximation - if ( - lowerModel.includes('claude') || - lowerModel.includes('sonnet') || - lowerModel.includes('opus') || - lowerModel.includes('haiku') - ) { - return 'gpt-4'; - } - - // GPT-4 variants - if (lowerModel.includes('gpt-4')) { - return 'gpt-4'; + this.tokenizer = TokenizerFactory.create(this.model); + + // Keep a local encoder for tiktoken-compatible models — truncate() + // needs to slice the raw token array, which the ITokenizer interface + // intentionally does not expose. + if (TiktokenTokenizer.supports(this.model)) { + this.encoder = encoding_for_model( + TiktokenTokenizer.mapToTiktokenModel(this.model) + ); + } else { + this.encoder = null; } - - // GPT-3.5 variants - if (lowerModel.includes('gpt-3.5') || lowerModel.includes('gpt3.5')) { - return 'gpt-3.5-turbo'; - } - - // Default to GPT-4 for unknown models - return 'gpt-4'; } /** - * Count tokens in text + * Count tokens in text (synchronous). + * + * Synchronous on tiktoken-backed tokenizers, which is all we expose + * externally via Anthropic/OpenAI. Remote tokenizers (Google AI) are + * reachable via `countAsync`. */ count(text: string): TokenCountResult { - const tokens = this.encoder.encode(text); - + if (this.encoder) { + return { + tokens: this.encoder.encode(text).length, + characters: text.length, + }; + } + // Fall back to the synchronous estimate so non-tiktoken paths keep + // working. Callers that want exact remote counts should use + // countAsync. return { - tokens: tokens.length, + tokens: this.estimate(text), characters: text.length, }; } /** - * Count tokens in multiple texts + * Async token counting through the pluggable tokenizer — accurate for + * both local tiktoken and remote Google AI paths. */ + async countAsync(text: string): Promise { + const tokens = await this.tokenizer.countTokens(text); + return { tokens, characters: text.length }; + } + countBatch(texts: string[]): TokenCountResult { let totalTokens = 0; let totalCharacters = 0; - for (const text of texts) { const result = this.count(text); totalTokens += result.tokens; totalCharacters += result.characters; } - - return { - tokens: totalTokens, - characters: totalCharacters, - }; + return { tokens: totalTokens, characters: totalCharacters }; } - /** - * Estimate token count without encoding (faster, less accurate) - */ estimate(text: string): number { - // Rough estimate: ~4 characters per token on average + // Rough fallback: ~4 characters per token. Only used when no + // tiktoken encoder is available for this model. return Math.ceil(text.length / 4); } - /** - * Calculate token savings based on context window management - * - * @param originalText - The original text content - * @param contextTokens - Number of tokens remaining in LLM context (default: 0 for full caching) - * @returns Token savings calculation - * - * @remarks - * This method measures context window optimization, NOT compression ratio. - * When content is cached externally (SQLite, Redis, etc.), it's completely - * removed from the LLM's context window, resulting in 100% token savings. - * - * Use cases: - * - External caching: contextTokens = 0 (100% savings) - * - Metadata-only: contextTokens = tokens in metadata (e.g., 8) - * - Summarization: contextTokens = tokens in summary (e.g., 50) - */ calculateSavings( originalText: string, contextTokens: number = 0 @@ -136,36 +120,6 @@ export class TokenCounter { }; } - /** - * Calculate context window savings for externally cached content - * - * @param originalText - The original text content being cached - * @returns Token savings calculation with 100% savings - * - * @remarks - * When content is compressed and stored in an external cache (SQLite, Redis, etc.), - * it's completely removed from the LLM's context window. The compressed/encoded - * data is NEVER sent to the LLM, so we measure 100% token savings. - * - * Key insight: We're measuring CONTEXT WINDOW CLEARANCE, not compression ratio. - * - ✅ Content removed from LLM context (saves tokens) - * - ✅ Storage compressed (saves disk space) - * - ❌ Don't count tokens in compressed data (it's not sent to LLM!) - * - * @example - * ```typescript - * const tokenCounter = new TokenCounter(); - * const content = "Large file content..."; - * const compressed = compress(content); - * - * // Store in external cache - * await cache.set(key, compressed); - * - * // Calculate context window savings - * const savings = tokenCounter.calculateCacheSavings(content); - * // Returns: { originalTokens: 250, contextTokens: 0, tokensSaved: 250, percentSaved: 100 } - * ``` - */ calculateCacheSavings(originalText: string): { originalTokens: number; contextTokens: number; @@ -173,54 +127,45 @@ export class TokenCounter { percentSaved: number; } { const original = this.count(originalText); - return { originalTokens: original.tokens, - contextTokens: 0, // External cache - nothing in context - tokensSaved: original.tokens, // 100% of original tokens saved - percentSaved: 100, // Always 100% for external caching + contextTokens: 0, + tokensSaved: original.tokens, + percentSaved: 100, }; } - /** - * Check if text exceeds token limit - */ exceedsLimit(text: string, limit: number): boolean { - const result = this.count(text); - return result.tokens > limit; + return this.count(text).tokens > limit; } - /** - * Truncate text to fit within token limit - */ truncate(text: string, maxTokens: number): string { + if (!this.encoder) { + // No raw-token access for this model — fall back to a + // char-proportional slice using the estimate ratio. + const approxChars = maxTokens * 4; + return text.length <= approxChars ? text : text.slice(0, approxChars); + } const tokens = this.encoder.encode(text); - if (tokens.length <= maxTokens) { return text; } - const truncatedTokens = tokens.slice(0, maxTokens); const decoded = this.encoder.decode(truncatedTokens); - - // Handle potential type issues with decode return value return typeof decoded === 'string' ? decoded : new TextDecoder().decode(decoded); } - /** - * Get token-to-character ratio for text - */ getTokenCharRatio(text: string): number { const result = this.count(text); return result.tokens > 0 ? result.characters / result.tokens : 0; } - /** - * Free the encoder resources - */ free(): void { - this.encoder.free(); + if (this.encoder) { + this.encoder.free(); + } + // TokenizerFactory owns the tokenizer's lifecycle (instance cache). } } diff --git a/src/core/tokenizers/google-ai-tokenizer.ts b/src/core/tokenizers/google-ai-tokenizer.ts new file mode 100644 index 0000000..6c751fe --- /dev/null +++ b/src/core/tokenizers/google-ai-tokenizer.ts @@ -0,0 +1,104 @@ +import { createHash } from 'crypto'; +import { ITokenizer } from './i-tokenizer.js'; +import { LruCache } from '../../utils/lru-cache.js'; + +const DEFAULT_CACHE_SIZE = 500; +const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; +const DEFAULT_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models'; +const REQUEST_TIMEOUT_MS = 10_000; + +/** + * Remote tokenizer that uses Google AI's countTokens REST endpoint — + * addresses issue #124's GoogleAITokenizer requirement. + * + * Network calls are memoized in an LruCache with a TTL so repeated + * token counts don't re-hit the API. If the request fails (network, + * 4xx, 5xx) we surface the error to the caller — TokenCounter above + * is responsible for deciding whether to fall back to a local + * tokenizer. + */ +export class GoogleAITokenizer implements ITokenizer { + public readonly modelName: string; + private readonly apiKey: string; + private readonly endpoint: string; + private readonly cache: LruCache; + private readonly timeoutMs: number; + + constructor( + modelName: string, + apiKey: string, + options: { + endpoint?: string; + cache?: LruCache; + timeoutMs?: number; + } = {} + ) { + if (!apiKey) { + throw new Error('GoogleAITokenizer requires an apiKey'); + } + this.modelName = modelName; + this.apiKey = apiKey; + this.endpoint = options.endpoint ?? DEFAULT_ENDPOINT; + this.cache = + options.cache ?? + new LruCache(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS); + this.timeoutMs = options.timeoutMs ?? REQUEST_TIMEOUT_MS; + } + + public async countTokens(text: string): Promise { + // Always hash with a namespace prefix so cache keys can't collide + // with a raw string arg and so sensitive user text isn't retained + // verbatim in process memory. + const key = `sha256:${createHash('sha256').update(text).digest('hex')}`; + const cached = this.cache.get(key); + if (cached !== undefined) { + return cached; + } + + // Per Gemini API reference, x-goog-api-key is the recommended + // auth path — it keeps the key out of URLs and access logs. + const url = `${this.endpoint}/${encodeURIComponent( + this.modelName + )}:countTokens`; + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), this.timeoutMs); + + try { + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-goog-api-key': this.apiKey, + }, + body: JSON.stringify({ + contents: [{ parts: [{ text }] }], + }), + signal: controller.signal, + }); + + if (!response.ok) { + // Don't embed the response body — it can leak prompt + // content in upstream logs. + throw new Error( + `Google AI countTokens failed: ${response.status} ${response.statusText}` + ); + } + + const data = (await response.json()) as { totalTokens?: number }; + if (typeof data.totalTokens !== 'number') { + throw new Error( + `Google AI countTokens returned unexpected payload: ${JSON.stringify(data).slice(0, 200)}` + ); + } + this.cache.set(key, data.totalTokens); + return data.totalTokens; + } finally { + clearTimeout(timeout); + } + } + + public free(): void { + this.cache.clear(); + } +} diff --git a/src/core/tokenizers/heuristic-tokenizer.ts b/src/core/tokenizers/heuristic-tokenizer.ts new file mode 100644 index 0000000..a0208e2 --- /dev/null +++ b/src/core/tokenizers/heuristic-tokenizer.ts @@ -0,0 +1,89 @@ +import { createHash } from 'crypto'; +import { ITokenizer } from './i-tokenizer.js'; +import { LruCache } from '../../utils/lru-cache.js'; + +const DEFAULT_CACHE_SIZE = 500; +const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; +/** See TiktokenTokenizer for rationale. */ +const KEY_HASH_THRESHOLD_CHARS = 256; + +function cacheKeyFor(text: string): string { + if (text.length <= KEY_HASH_THRESHOLD_CHARS) { + return text; + } + return createHash('sha256').update(text).digest('hex'); +} + +export enum ContentType { + Code = 'code', + Json = 'json', + Markdown = 'markdown', + Text = 'text', +} + +/** + * Content-aware character-to-token ratios derived from tiktoken encoding + * on typical samples: + * + * | Content | chars/token | + * | --------- | ----------- | + * | code | 2.5 | + * | json | 2.8 | + * | markdown | 3.5 | + * | text | 4.0 | + */ +const CHARS_PER_TOKEN: Readonly> = { + [ContentType.Code]: 2.5, + [ContentType.Json]: 2.8, + [ContentType.Markdown]: 3.5, + [ContentType.Text]: 4.0, +}; + +const CODE_PATTERN = /\b(function|class|const|import|export|return|await|=>)\b/; +const JSON_PATTERN = /^[\s\n]*[{[]/; +const MARKDOWN_PATTERN = /^#{1,6}\s|^\s*[-*+]\s|\[[^\]]+\]\([^)]+\)/m; + +export class HeuristicTokenizer implements ITokenizer { + public readonly modelName: string; + private readonly cache: LruCache; + + constructor(modelName: string = 'heuristic', cache?: LruCache) { + this.modelName = modelName; + this.cache = cache ?? new LruCache(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS); + } + + public async countTokens(text: string): Promise { + const key = cacheKeyFor(text); + const cached = this.cache.get(key); + if (cached !== undefined) { + return cached; + } + const contentType = HeuristicTokenizer.detectContentType(text); + const ratio = CHARS_PER_TOKEN[contentType]; + const count = Math.ceil(text.length / ratio); + this.cache.set(key, count); + return count; + } + + public free(): void { + // No native resources to free. + } + + public static detectContentType(text: string): ContentType { + if (JSON_PATTERN.test(text)) { + try { + JSON.parse(text); + return ContentType.Json; + } catch { + // Not actually JSON; fall through to other detection. + } + } + if (CODE_PATTERN.test(text)) { + return ContentType.Code; + } + if (MARKDOWN_PATTERN.test(text)) { + return ContentType.Markdown; + } + return ContentType.Text; + } +} diff --git a/src/core/tokenizers/i-tokenizer.ts b/src/core/tokenizers/i-tokenizer.ts new file mode 100644 index 0000000..57f23fd --- /dev/null +++ b/src/core/tokenizers/i-tokenizer.ts @@ -0,0 +1,19 @@ +/** + * Pluggable tokenizer interface — addresses issue #124. + * + * Implementations: + * - TiktokenTokenizer: uses the local tiktoken library (GPT-4 / GPT-3.5-turbo). + * - HeuristicTokenizer: content-aware local fallback for unknown models. + * + * The factory picks an implementation based on model name. All implementations + * memoize counts via an injected LruCache so repeated inputs don't re-tokenize. + */ + +export interface ITokenizer { + readonly modelName: string; + + countTokens(text: string): Promise; + + /** Free any native resources. */ + free(): void; +} diff --git a/src/core/tokenizers/tiktoken-tokenizer.ts b/src/core/tokenizers/tiktoken-tokenizer.ts new file mode 100644 index 0000000..4ebf197 --- /dev/null +++ b/src/core/tokenizers/tiktoken-tokenizer.ts @@ -0,0 +1,85 @@ +import { createHash } from 'crypto'; +import { encoding_for_model, Tiktoken, TiktokenModel } from 'tiktoken'; +import { ITokenizer } from './i-tokenizer.js'; +import { LruCache } from '../../utils/lru-cache.js'; + +const DEFAULT_CACHE_SIZE = 500; +const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; +/** + * Strings longer than this are hashed before being used as a cache key + * so the LRU stores ~64-byte SHA-256 digests instead of entire prompts + * or file contents — keeps the cache from ballooning into hundreds of + * MB on hot paths. + */ +const KEY_HASH_THRESHOLD_CHARS = 256; + +function cacheKeyFor(text: string): string { + if (text.length <= KEY_HASH_THRESHOLD_CHARS) { + return text; + } + return createHash('sha256').update(text).digest('hex'); +} + +const SUPPORTED_TIKTOKEN_MODELS: readonly TiktokenModel[] = ['gpt-4', 'gpt-3.5-turbo']; + +export class TiktokenTokenizer implements ITokenizer { + public readonly modelName: string; + private readonly encoder: Tiktoken; + private readonly cache: LruCache; + + constructor(modelName: string, cache?: LruCache) { + this.modelName = modelName; + this.cache = cache ?? new LruCache(DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL_MS); + const tiktokenModel = TiktokenTokenizer.mapToTiktokenModel(modelName); + this.encoder = encoding_for_model(tiktokenModel); + } + + public async countTokens(text: string): Promise { + const key = cacheKeyFor(text); + const cached = this.cache.get(key); + if (cached !== undefined) { + return cached; + } + const count = this.encoder.encode(text).length; + this.cache.set(key, count); + return count; + } + + public free(): void { + this.encoder.free(); + } + + public static supports(modelName: string): boolean { + const mapped = TiktokenTokenizer.tryMap(modelName); + return mapped !== null; + } + + public static mapToTiktokenModel(modelName: string): TiktokenModel { + const mapped = TiktokenTokenizer.tryMap(modelName); + if (mapped === null) { + // Default: GPT-4 tokenizer is the closest available for Claude/unknown models. + return 'gpt-4'; + } + return mapped; + } + + private static tryMap(modelName: string): TiktokenModel | null { + const lower = modelName.toLowerCase(); + if ( + lower.includes('claude') || + lower.includes('sonnet') || + lower.includes('opus') || + lower.includes('haiku') || + lower.includes('gpt-4') + ) { + return 'gpt-4'; + } + if (lower.includes('gpt-3.5') || lower.includes('gpt3.5')) { + return 'gpt-3.5-turbo'; + } + if (SUPPORTED_TIKTOKEN_MODELS.includes(lower as TiktokenModel)) { + return lower as TiktokenModel; + } + return null; + } +} diff --git a/src/core/tokenizers/tokenizer-factory.ts b/src/core/tokenizers/tokenizer-factory.ts new file mode 100644 index 0000000..edce55f --- /dev/null +++ b/src/core/tokenizers/tokenizer-factory.ts @@ -0,0 +1,75 @@ +import { ITokenizer } from './i-tokenizer.js'; +import { TiktokenTokenizer } from './tiktoken-tokenizer.js'; +import { HeuristicTokenizer } from './heuristic-tokenizer.js'; +import { GoogleAITokenizer } from './google-ai-tokenizer.js'; + +/** + * Pluggable tokenizer factory — addresses issues #123 / #124. + * + * Resolution order: + * 1. Google AI models (`gemini-*`) — GoogleAITokenizer when + * GOOGLE_AI_API_KEY is set, else HeuristicTokenizer. + * 2. Tiktoken-compatible families (GPT, Claude) — TiktokenTokenizer. + * 3. HeuristicTokenizer fallback for everything else. + * + * Instances are cached per model name so callers don't pay for repeated + * allocation of the native tiktoken encoder, and so their per-tokenizer + * LRU caches can be shared across call sites. + */ +export class TokenizerFactory { + private static readonly instances = new Map(); + + public static create(modelName: string): ITokenizer { + const cached = TokenizerFactory.instances.get(modelName); + if (cached) { + return cached; + } + const tokenizer = TokenizerFactory.build(modelName); + TokenizerFactory.instances.set(modelName, tokenizer); + return tokenizer; + } + + public static createFromEnv(): ITokenizer { + // TOKEN_OPTIMIZER_MODEL has highest precedence so a user can pin + // the optimizer model without having to clear broader env vars + // (CLAUDE_MODEL, ANTHROPIC_MODEL, …) that may already be set. + const modelName = + process.env.TOKEN_OPTIMIZER_MODEL || + process.env.CLAUDE_MODEL || + process.env.ANTHROPIC_MODEL || + process.env.OPENAI_MODEL || + process.env.GOOGLE_AI_MODEL || + 'gpt-4'; + return TokenizerFactory.create(modelName); + } + + /** + * Release every cached tokenizer. Call this on server shutdown so + * native tiktoken encoders are freed. + */ + public static disposeAll(): void { + for (const tokenizer of TokenizerFactory.instances.values()) { + try { + tokenizer.free(); + } catch { + // Ignore — best-effort cleanup. + } + } + TokenizerFactory.instances.clear(); + } + + private static build(modelName: string): ITokenizer { + const lower = modelName.toLowerCase(); + if (lower.startsWith('gemini') || lower.includes('google')) { + const apiKey = process.env.GOOGLE_AI_API_KEY; + if (apiKey) { + return new GoogleAITokenizer(modelName, apiKey); + } + return new HeuristicTokenizer(modelName); + } + if (TiktokenTokenizer.supports(modelName)) { + return new TiktokenTokenizer(modelName); + } + return new HeuristicTokenizer(modelName); + } +} diff --git a/src/core/types.ts b/src/core/types.ts index 9d36be2..1cb1775 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -48,6 +48,40 @@ export interface HypercontextConfig { streamingThreshold: number; enableStreaming: boolean; }; + optimization?: OptimizationConfig; +} + +/** + * Configuration-driven compression thresholds — addresses issue #120. + * Mirrors the fields exposed by Gemini CLI's settingsSchema.ts. + */ +export interface OptimizationConfig { + /** Fraction of model context at which compression kicks in (0-1). */ + compressionTokenThreshold: number; + /** Fraction of chat history to keep uncompressed at the tail (0-1). */ + compressionPreserveThreshold: number; + /** Minimum token count before an optimizer considers compressing. */ + minTokensBeforeCompression: number; + /** Per-model total context window size, in tokens. */ + modelTokenLimits: Record; + /** Minimum output bytes before optimization emits a stored entry. */ + minOutputSizeBytes: number; + /** Compression quality preset. */ + quality: 'fast' | 'balanced' | 'max'; + /** In-memory cache knobs — mirrors Gemini CLI's `cacheSettings`. */ + cacheSettings: { + /** Max entries per LRU cache shard. */ + maxSize: number; + /** Default TTL for cached entries, in seconds. */ + ttlSeconds: number; + }; + /** Chat-history compression knobs — #121. */ + chatCompression: { + enabled: boolean; + /** Hard token limit per session (falls back to modelTokenLimit × compressionTokenThreshold). */ + tokenLimit?: number; + strategy: 'summarize' | 'truncate'; + }; } export interface TokenMetrics { diff --git a/src/server/index.ts b/src/server/index.ts index 7c23370..62fba3c 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -126,12 +126,23 @@ import { getMcpServerAnalyticsTool, GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/get-mcp-server-analytics.js'; +import { getExportAnalyticsTool, EXPORT_ANALYTICS_TOOL_DEFINITION, } from '../tools/analytics/export-analytics.js'; import { - getExportAnalyticsTool, - EXPORT_ANALYTICS_TOOL_DEFINITION, -} from '../tools/analytics/export-analytics.js'; + OptimizationStorageTool, + OPTIMIZATION_STORAGE_TOOL_DEFINITION, +} from '../tools/optimization-storage-tool.js'; +import { + ContextDeltaTool, + CONTEXT_DELTA_TOOL_DEFINITION, +} from '../tools/context-delta-tool.js'; +import { SessionManager } from '../core/session-manager.js'; +import { createSummarizerFromEnv } from '../core/summarization.js'; +import { TokenizerFactory } from '../core/tokenizers/tokenizer-factory.js'; +import { ConfigManager } from '../core/config.js'; +import { lruMemoize, memoRegistry } from '../utils/lru-memoize.js'; import { AnalyticsManager } from '../analytics/analytics-manager.js'; + // API & Database tools import { getSmartSql, @@ -369,6 +380,63 @@ const getHookAnalytics = getHookAnalyticsTool(analyticsManager); const getActionAnalytics = getActionAnalyticsTool(analyticsManager); const getMcpServerAnalytics = getMcpServerAnalyticsTool(analyticsManager); const exportAnalytics = getExportAnalyticsTool(analyticsManager); +const optimizationStorage = new OptimizationStorageTool(); + +// #120: load user config (creates ~/.token-optimizer/config.json with +// defaults on first run) and derive session-level knobs. +const configManager = new ConfigManager(); +const optimizationConfig = configManager.getOptimizationConfig(); +const sessionTokenizer = TokenizerFactory.createFromEnv(); +const modelLimit = + configManager.getModelTokenLimit(sessionTokenizer.modelName) ?? + // Fall back to an aggressive default for unknown models. + 128000; +const chatDefaultMaxTokens = + optimizationConfig.chatCompression.tokenLimit ?? + Math.floor(modelLimit * optimizationConfig.compressionTokenThreshold); + +const sessionManager = new SessionManager({ + persistencePath: path.join(os.homedir(), '.token-optimizer', 'sessions.json'), + tokenizer: sessionTokenizer, + defaultMaxTokens: chatDefaultMaxTokens, + summarizer: createSummarizerFromEnv(), +}); +const contextDelta = new ContextDeltaTool(sessionManager); + +// #125: memoize the expensive read-only file-operation tools with an +// LRU bounded by the user's cacheSettings. The memoRegistry hook lets +// the cleanup handler below prune them all at once. +const cacheSettings = optimizationConfig.cacheSettings; +const memoizedSmartRead = lruMemoize(runSmartRead, { + name: 'smart_read', + maxSize: cacheSettings.maxSize, + ttlMs: cacheSettings.ttlSeconds * 1000, +}); +const memoizedSmartGrep = lruMemoize(runSmartGrep, { + name: 'smart_grep', + maxSize: cacheSettings.maxSize, + ttlMs: cacheSettings.ttlSeconds * 1000, +}); +const memoizedSmartGlob = lruMemoize(runSmartGlob, { + name: 'smart_glob', + maxSize: cacheSettings.maxSize, + ttlMs: cacheSettings.ttlSeconds * 1000, +}); + +// Periodic prune + stats log. Runs every 5 minutes; unref so it doesn't +// keep the process alive on its own. +const MEMO_PRUNE_INTERVAL_MS = 5 * 60 * 1000; +const memoPruneTimer = setInterval(() => { + const removed = memoRegistry.pruneAll(); + if (removed > 0) { + console.error( + `[memo] pruned ${removed} expired cache entries; stats: ${JSON.stringify(memoRegistry.stats())}` + ); + } +}, MEMO_PRUNE_INTERVAL_MS); +if (typeof memoPruneTimer.unref === 'function') { + memoPruneTimer.unref(); +} // Create MCP server const server = new Server( @@ -430,7 +498,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { { name: 'count_tokens', description: - 'Count tokens in text using tiktoken. Useful for understanding token usage before and after optimization.', + 'Count tokens in text using the pluggable tokenizer framework (#124). Picks a model-specific tokenizer (tiktoken for GPT/Claude, Google AI REST for Gemini, content-aware heuristic fallback).', inputSchema: { type: 'object', properties: { @@ -438,6 +506,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { type: 'string', description: 'Text to count tokens for', }, + modelName: { + type: 'string', + description: + 'Model name (e.g. gpt-4, claude-opus-4-7, gemini-2.5-flash). Defaults to the server-configured model when omitted.', + }, }, required: ['text'], }, @@ -655,6 +728,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { GET_ACTION_ANALYTICS_TOOL_DEFINITION, GET_MCP_SERVER_ANALYTICS_TOOL_DEFINITION, EXPORT_ANALYTICS_TOOL_DEFINITION, + OPTIMIZATION_STORAGE_TOOL_DEFINITION, + CONTEXT_DELTA_TOOL_DEFINITION, ], }; }); @@ -847,17 +922,46 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } case 'count_tokens': { - const { text } = args as { text: string }; - const result = tokenCounter.count(text); - - return { - content: [ - { - type: 'text', - text: JSON.stringify(result, null, 2), - }, - ], + const { text, modelName } = args as { + text: string; + modelName?: string; }; + const counter = modelName ? new TokenCounter(modelName) : tokenCounter; + try { + const result = modelName + ? await counter.countAsync(text) + : counter.count(text); + // Return the full result JSON under a dedicated `metadata` + // key while the primary `text` payload stays the scalar token + // count string — preserves the integer-parse contract that + // the PowerShell orchestrator relies on + // (e.g. token-optimizer-orchestrator.ps1 L931/1910/2092 cast + // `content[0].text -as [int]`) and still surfaces the richer + // object for TS callers. + return { + content: [ + { + type: 'text', + text: String(result.tokens), + }, + { + type: 'text', + text: JSON.stringify( + { ...result, model: modelName ?? counter.model }, + null, + 2 + ), + }, + ], + }; + } finally { + // Always free one-shot counters — even when countAsync throws, + // leaving the tiktoken encoder allocated was leaking native + // resources. + if (modelName) { + counter.free(); + } + } } case 'compress_text': { @@ -1931,7 +2035,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_read': { const { path, ...options } = args as any; - const result = await runSmartRead(path, options); + const result = await memoizedSmartRead(path, options); return { content: [ { @@ -1945,6 +2049,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_write': { const { path, content, ...options } = args as any; const result = await runSmartWrite(path, content, options); + // Filesystem was mutated — drop every memoized read-only cache + // entry so the next smart_read/grep/glob reflects the new state + // instead of waiting for TTL expiry. + memoRegistry.clearAll(); return { content: [ { @@ -1958,6 +2066,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_edit': { const { path, operations, ...options } = args as any; const result = await runSmartEdit(path, operations, options); + memoRegistry.clearAll(); return { content: [ { @@ -1970,7 +2079,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_glob': { const { pattern, ...options } = args as any; - const result = await runSmartGlob(pattern, options); + const result = await memoizedSmartGlob(pattern, options); return { content: [ { @@ -1983,7 +2092,31 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { case 'smart_grep': { const { pattern, ...options } = args as any; - const result = await runSmartGrep(pattern, options); + const result = await memoizedSmartGrep(pattern, options); + return { + content: [ + { + type: 'text', + text: JSON.stringify(result, null, 2), + }, + ], + }; + } + + case 'optimization_storage': { + const result = optimizationStorage.run(args as any); + return { + content: [ + { + type: 'text', + text: JSON.stringify(result, null, 2), + }, + ], + }; + } + + case 'context_delta': { + const result = contextDelta.run(args as any); return { content: [ { @@ -2231,6 +2364,16 @@ async function cleanup() { }, { fn: () => cache?.close(), name: 'closing cache' }, { fn: () => tokenCounter?.free(), name: 'freeing tokenCounter' }, + { fn: async () => await sessionManager.flush(), name: 'flushing sessions' }, + { fn: () => TokenizerFactory.disposeAll(), name: 'disposing tokenizers' }, + { fn: () => optimizationStorage.close(), name: 'closing optimization storage' }, + { + fn: () => { + clearInterval(memoPruneTimer); + memoRegistry.clearAll(); + }, + name: 'clearing memo caches', + }, // Note: predictiveCache and cacheWarmup do not implement dispose() methods // Removed dispose() calls to prevent runtime errors during cleanup ]); diff --git a/src/tools/context-delta-tool.ts b/src/tools/context-delta-tool.ts new file mode 100644 index 0000000..20f5fdc --- /dev/null +++ b/src/tools/context-delta-tool.ts @@ -0,0 +1,184 @@ +import { SessionManager } from '../core/session-manager.js'; +import { calculateDelta } from '../utils/diff.js'; + +/** + * context_delta MCP tool — addresses issue #122. + * + * Given (sessionId, filePath, currentContent) this tool: + * 1. Looks up the session from the SessionManager. + * 2. Diffs the current content against the session's last snapshot of + * that file. + * 3. Updates the session's file state. + * 4. Returns a unified-diff delta — the caller can send ONLY the delta + * to the model instead of the whole file, which is the token win. + * + * On first invocation for a given filePath the full content is treated + * as "the delta" (there is no baseline to diff against). + */ + +export type ContextDeltaOperation = 'compute-delta' | 'seed' | 'clear'; + +export interface ContextDeltaOptions { + operation: ContextDeltaOperation; + sessionId: string; + filePath: string; + currentContent?: string; +} + +export interface ContextDeltaResponse { + success: boolean; + error?: string; + delta?: string; + isBaseline?: boolean; + originalSize?: number; + deltaSize?: number; + bytesSaved?: number; +} + +export class ContextDeltaTool { + public readonly name = 'context_delta'; + public readonly description = + 'Compute a unified-diff delta between a file’s previous session snapshot and its current content, so the model only receives what changed.'; + + constructor(private readonly sessionManager: SessionManager) {} + + public run(options: ContextDeltaOptions): ContextDeltaResponse { + switch (options.operation) { + case 'compute-delta': + return this.computeDelta(options); + case 'seed': + return this.seed(options); + case 'clear': + return this.clear(options); + default: + return { + success: false, + error: `Unknown operation: ${String( + (options as { operation: unknown }).operation + )}`, + }; + } + } + + private computeDelta(options: ContextDeltaOptions): ContextDeltaResponse { + const { sessionId, filePath, currentContent } = options; + if (currentContent === undefined) { + return { + success: false, + error: 'currentContent is required for compute-delta', + }; + } + // Auto-bootstrap the session on first contact so PS-side callers + // that locally generate a sessionId don't have to separately + // create it server-side first. + const session = this.sessionManager.getOrCreateSession(sessionId); + const previous = session.getFileContent(filePath); + + try { + // Goes through SessionManager so the new state hits disk. + this.sessionManager.updateFileState(sessionId, filePath, currentContent); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + + // Use UTF-8 byte counts throughout so the reported sizes match + // the byte-cap that SessionManager.updateFileState enforces. + // string.length counts UTF-16 code units, which drifts for any + // non-ASCII content. + const originalSize = Buffer.byteLength(currentContent, 'utf8'); + if (previous === undefined) { + return { + success: true, + isBaseline: true, + delta: currentContent, + originalSize, + deltaSize: originalSize, + bytesSaved: 0, + }; + } + + const delta = calculateDelta(previous, currentContent, filePath); + const deltaSize = Buffer.byteLength(delta, 'utf8'); + return { + success: true, + isBaseline: false, + delta, + originalSize, + deltaSize, + bytesSaved: Math.max(0, originalSize - deltaSize), + }; + } + + private seed(options: ContextDeltaOptions): ContextDeltaResponse { + const { sessionId, filePath, currentContent } = options; + if (currentContent === undefined) { + return { success: false, error: 'currentContent is required for seed' }; + } + try { + this.sessionManager.getOrCreateSession(sessionId); + this.sessionManager.updateFileState(sessionId, filePath, currentContent); + return { success: true, isBaseline: true }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + } + + private clear(options: ContextDeltaOptions): ContextDeltaResponse { + try { + this.sessionManager.clearFileState(options.sessionId, options.filePath); + return { success: true }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + } +} + +export const CONTEXT_DELTA_TOOL_DEFINITION = { + name: 'context_delta', + description: + 'Compute a unified-diff delta for a file in a given session so the model only sees changes since the last snapshot. Operations: compute-delta, seed, clear.', + // Discriminated inputSchema keyed on `operation` — compute-delta and + // seed require currentContent at runtime, so enforce that at schema + // validation time rather than letting a malformed payload reach the + // tool body. + inputSchema: { + type: 'object', + oneOf: [ + { + type: 'object', + properties: { + operation: { type: 'string', const: 'compute-delta' }, + sessionId: { type: 'string', minLength: 1 }, + filePath: { type: 'string', minLength: 1 }, + currentContent: { type: 'string' }, + }, + required: ['operation', 'sessionId', 'filePath', 'currentContent'], + additionalProperties: false, + }, + { + type: 'object', + properties: { + operation: { type: 'string', const: 'seed' }, + sessionId: { type: 'string', minLength: 1 }, + filePath: { type: 'string', minLength: 1 }, + currentContent: { type: 'string' }, + }, + required: ['operation', 'sessionId', 'filePath', 'currentContent'], + additionalProperties: false, + }, + { + type: 'object', + properties: { + operation: { type: 'string', const: 'clear' }, + sessionId: { type: 'string', minLength: 1 }, + filePath: { type: 'string', minLength: 1 }, + }, + required: ['operation', 'sessionId', 'filePath'], + additionalProperties: false, + }, + ], + }, +}; diff --git a/src/tools/optimization-storage-tool.ts b/src/tools/optimization-storage-tool.ts new file mode 100644 index 0000000..6465fc2 --- /dev/null +++ b/src/tools/optimization-storage-tool.ts @@ -0,0 +1,166 @@ +import { SqliteOptimizationStorage, OptimizationResult } from '../analytics/optimization-storage.js'; + +export type OptimizationStorageOperation = 'store' | 'retrieve'; + +export interface OptimizationStorageOptions { + operation: OptimizationStorageOperation; + originalTextHash?: string; + optimizedText?: string; + originalTokens?: number; + optimizedTokens?: number; + tokensSaved?: number; +} + +export interface OptimizationStorageResponse { + success: boolean; + error?: string; + result?: OptimizationResult; +} + +export class OptimizationStorageTool { + public readonly name = 'optimization_storage'; + public readonly description = + 'Persist and retrieve brotli-compressed optimization results keyed by text hash.'; + + private readonly storage: SqliteOptimizationStorage; + + constructor(storage?: SqliteOptimizationStorage) { + this.storage = storage ?? new SqliteOptimizationStorage(); + this.storage.initializeDatabase(); + } + + public run(options: OptimizationStorageOptions): OptimizationStorageResponse { + switch (options.operation) { + case 'store': + return this.store(options); + case 'retrieve': + return this.retrieve(options); + default: + return { + success: false, + error: `Unknown operation: ${String((options as { operation: unknown }).operation)}`, + }; + } + } + + private store(options: OptimizationStorageOptions): OptimizationStorageResponse { + const { originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved } = options; + + if ( + !originalTextHash || + !optimizedText || + originalTokens === undefined || + optimizedTokens === undefined || + tokensSaved === undefined + ) { + return { + success: false, + error: 'Missing required arguments for store operation: originalTextHash, optimizedText, originalTokens, optimizedTokens, tokensSaved.', + }; + } + + try { + this.storage.save({ + originalTextHash, + optimizedText, + originalTokens, + optimizedTokens, + tokensSaved, + }); + return { success: true }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Failed to store optimization result: ${message}` }; + } + } + + private retrieve(options: OptimizationStorageOptions): OptimizationStorageResponse { + const { originalTextHash } = options; + + if (!originalTextHash) { + return { + success: false, + error: 'Missing required argument for retrieve operation: originalTextHash.', + }; + } + + try { + const result = this.storage.get(originalTextHash); + if (!result) { + return { success: false, error: 'Not found' }; + } + return { success: true, result }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: `Failed to retrieve optimization result: ${message}` }; + } + } + + public close(): void { + this.storage.close(); + } +} + +export const OPTIMIZATION_STORAGE_TOOL_DEFINITION = { + name: 'optimization_storage', + description: + 'Persist and retrieve brotli-compressed optimization results keyed by text hash. Operations: store, retrieve.', + // JSON Schema discriminated union — rejects a `store` payload that + // omits required fields at schema time instead of deep in the tool. + inputSchema: { + type: 'object', + oneOf: [ + { + type: 'object', + properties: { + operation: { type: 'string', const: 'store' }, + originalTextHash: { + type: 'string', + minLength: 1, + description: 'Stable hash of the original uncompressed text', + }, + optimizedText: { + type: 'string', + description: 'The optimized text to store', + }, + originalTokens: { + type: 'number', + minimum: 0, + description: 'Token count of the original text', + }, + optimizedTokens: { + type: 'number', + minimum: 0, + description: 'Token count after optimization', + }, + tokensSaved: { + type: 'number', + description: 'Tokens saved by optimization', + }, + }, + required: [ + 'operation', + 'originalTextHash', + 'optimizedText', + 'originalTokens', + 'optimizedTokens', + 'tokensSaved', + ], + additionalProperties: false, + }, + { + type: 'object', + properties: { + operation: { type: 'string', const: 'retrieve' }, + originalTextHash: { + type: 'string', + minLength: 1, + description: 'Stable hash of the original uncompressed text', + }, + }, + required: ['operation', 'originalTextHash'], + additionalProperties: false, + }, + ], + }, +}; diff --git a/src/utils/diff.ts b/src/utils/diff.ts new file mode 100644 index 0000000..3c032e2 --- /dev/null +++ b/src/utils/diff.ts @@ -0,0 +1,39 @@ +import { createPatch, applyPatch } from 'diff'; + +/** + * Delta-based context helpers — addresses issue #122. + * + * Uses the unified-diff format from the existing `diff` dependency so the + * resulting deltas are human-readable and round-trippable via applyDelta. + */ + +/** + * Compute a unified-diff delta from `previous` to `current`. + * Returns the empty string when the inputs are identical (callers can use + * that to skip transmitting a no-op delta). + */ +export function calculateDelta( + previous: string, + current: string, + fileName: string = 'content' +): string { + if (previous === current) { + return ''; + } + return createPatch(fileName, previous, current, '', ''); +} + +/** + * Apply a unified-diff `delta` to `previous`, returning the reconstructed + * `current`. Throws if the patch cannot be applied cleanly. + */ +export function applyDelta(previous: string, delta: string): string { + if (delta === '') { + return previous; + } + const result = applyPatch(previous, delta); + if (result === false) { + throw new Error('Failed to apply delta: patch did not apply cleanly'); + } + return result; +} diff --git a/src/utils/gzip.ts b/src/utils/gzip.ts new file mode 100644 index 0000000..206cebc --- /dev/null +++ b/src/utils/gzip.ts @@ -0,0 +1,99 @@ +import { gzipSync, gunzipSync } from 'zlib'; +import { + existsSync, + mkdirSync, + readFileSync, + renameSync, + unlinkSync, + writeFileSync, +} from 'fs'; +import { dirname } from 'path'; + +/** + * Gzip utilities — addresses issue #126. + * + * `gzipString` / `gunzipBuffer` are thin UTF-8 wrappers around node:zlib. + * `saveGzippedFile` writes `.gz` atomically (tmp + rename) so a + * crash mid-write can't produce a corrupt gzip. `loadFile` transparently + * reads `.gz` if present and falls back to the plaintext path — + * that gives us backward compatibility with sessions.json files written + * before this change. + */ + +export interface GzipStats { + originalBytes: number; + compressedBytes: number; + ratio: number; + percentSaved: number; +} + +export function gzipString(text: string, level: number = 6): Buffer { + return gzipSync(Buffer.from(text, 'utf8'), { level }); +} + +export function gunzipBuffer(buffer: Buffer): string { + return gunzipSync(buffer).toString('utf8'); +} + +export function computeStats(text: string, compressed: Buffer): GzipStats { + const originalBytes = Buffer.byteLength(text, 'utf8'); + const compressedBytes = compressed.length; + const ratio = originalBytes === 0 ? 0 : compressedBytes / originalBytes; + return { + originalBytes, + compressedBytes, + ratio, + percentSaved: originalBytes === 0 ? 0 : (1 - ratio) * 100, + }; +} + +/** + * Write gzipped text to `${path}.gz` using atomic tmp + rename so a + * crash mid-write never produces a half-written file. Also removes any + * stale uncompressed plaintext at `path` once the gzip lands (backward + * compat cleanup). + */ +export function saveGzippedFile(path: string, text: string, level: number = 6): GzipStats { + const dir = dirname(path); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + const compressed = gzipString(text, level); + const gzPath = `${path}.gz`; + const tmpPath = `${gzPath}.tmp`; + writeFileSync(tmpPath, compressed); + renameSync(tmpPath, gzPath); + if (existsSync(path)) { + try { + unlinkSync(path); + } catch { + // Best-effort — leaving the plaintext file isn't fatal. + } + } + return computeStats(text, compressed); +} + +/** + * Load either `${path}.gz` or `${path}` — whichever exists. Returns + * null if neither is present. If the `.gz` sibling exists but can't + * be decompressed (corrupt, partially-written), falls back to the + * plaintext path so the backward-compat migration still works. + */ +export function loadMaybeGzippedFile(path: string): string | null { + const gzPath = `${path}.gz`; + if (existsSync(gzPath)) { + try { + const buffer = readFileSync(gzPath); + return gunzipBuffer(buffer); + } catch (error) { + if (!existsSync(path)) { + throw error; + } + // Fall through to the plaintext sibling below. + } + } + if (existsSync(path)) { + return readFileSync(path, 'utf-8'); + } + return null; +} diff --git a/src/utils/lru-cache.ts b/src/utils/lru-cache.ts new file mode 100644 index 0000000..8f7a5b2 --- /dev/null +++ b/src/utils/lru-cache.ts @@ -0,0 +1,137 @@ +/** + * Generic LRU cache with optional per-entry TTL — addresses issue #125. + * + * Unlike CacheEngine (token-aware, persistent SQLite cache), this is an + * in-memory LRU intended for hot paths: file-search results, token counts, + * MCP correction responses, etc. Eviction is O(1) via Map insertion order. + */ + +export interface LruCacheStats { + size: number; + maxSize: number; + hits: number; + misses: number; + evictions: number; + expired: number; + hitRate: number; +} + +interface LruCacheEntry { + value: V; + expiresAt: number; +} + +export class LruCache { + private readonly cache = new Map>(); + private readonly maxSize: number; + private readonly defaultTtlMs: number; + private hits = 0; + private misses = 0; + private evictions = 0; + private expired = 0; + + constructor(maxSize: number, defaultTtlMs: number = 0) { + if (maxSize <= 0) { + throw new Error(`LruCache maxSize must be > 0, got ${maxSize}`); + } + this.maxSize = maxSize; + this.defaultTtlMs = defaultTtlMs; + } + + public get(key: K): V | undefined { + const entry = this.cache.get(key); + if (!entry) { + this.misses++; + return undefined; + } + + if (entry.expiresAt !== 0 && Date.now() > entry.expiresAt) { + this.cache.delete(key); + this.expired++; + this.misses++; + return undefined; + } + + // Refresh recency: remove + re-insert moves to the tail. + this.cache.delete(key); + this.cache.set(key, entry); + this.hits++; + return entry.value; + } + + public set(key: K, value: V, ttlMs?: number): void { + if (this.cache.has(key)) { + this.cache.delete(key); + } else if (this.cache.size >= this.maxSize) { + const oldestKey = this.cache.keys().next().value as K | undefined; + if (oldestKey !== undefined) { + this.cache.delete(oldestKey); + this.evictions++; + } + } + + const effectiveTtl = ttlMs ?? this.defaultTtlMs; + this.cache.set(key, { + value, + expiresAt: effectiveTtl > 0 ? Date.now() + effectiveTtl : 0, + }); + } + + public has(key: K): boolean { + const entry = this.cache.get(key); + if (!entry) { + return false; + } + if (entry.expiresAt !== 0 && Date.now() > entry.expiresAt) { + this.cache.delete(key); + this.expired++; + return false; + } + return true; + } + + public delete(key: K): boolean { + return this.cache.delete(key); + } + + public clear(): void { + this.cache.clear(); + } + + public get size(): number { + return this.cache.size; + } + + /** + * Remove all entries whose TTL has expired. Returns the count removed. + * + * Scans every entry regardless of the default TTL so per-entry TTLs + * passed via set(key, value, ttlMs) are also cleaned up even when the + * cache was constructed with defaultTtlMs === 0. + */ + public prune(): number { + const now = Date.now(); + let removed = 0; + for (const [key, entry] of this.cache) { + if (entry.expiresAt !== 0 && now > entry.expiresAt) { + this.cache.delete(key); + removed++; + } + } + this.expired += removed; + return removed; + } + + public stats(): LruCacheStats { + const total = this.hits + this.misses; + return { + size: this.cache.size, + maxSize: this.maxSize, + hits: this.hits, + misses: this.misses, + evictions: this.evictions, + expired: this.expired, + hitRate: total === 0 ? 0 : this.hits / total, + }; + } +} diff --git a/src/utils/lru-memoize.ts b/src/utils/lru-memoize.ts new file mode 100644 index 0000000..ea43aeb --- /dev/null +++ b/src/utils/lru-memoize.ts @@ -0,0 +1,119 @@ +import { createHash } from 'crypto'; +import { LruCache, LruCacheStats } from './lru-cache.js'; + +/** + * Wrap an async function with an LRU cache so repeated calls with the + * same arguments are served from memory — addresses issue #125's + * "store results of expensive operations" for smart_read, smart_grep, + * smart_glob, and edit-correction paths. + * + * Each wrapped function owns its own cache, but every cache is + * registered with the shared `memoRegistry` so the server can prune + * and log stats for all of them at once. + */ + +export interface LruMemoizeOptions { + /** Identifier used in logs. */ + name: string; + /** Max cached entries. */ + maxSize: number; + /** Default per-entry TTL in ms. 0 disables expiration. */ + ttlMs?: number; + /** Custom key function; defaults to sha256(JSON.stringify(args)). */ + keyFn?: (args: Args) => string; +} + +export interface RegisteredCache { + name: string; + cache: LruCache; +} + +class MemoRegistry { + private readonly caches = new Map(); + + public register(entry: RegisteredCache): void { + this.caches.set(entry.name, entry); + } + + /** Prune every registered cache and return total entries removed. */ + public pruneAll(): number { + let total = 0; + for (const { cache } of this.caches.values()) { + total += cache.prune(); + } + return total; + } + + public stats(): Record { + const out: Record = {}; + for (const [name, { cache }] of this.caches) { + out[name] = cache.stats(); + } + return out; + } + + public clearAll(): void { + for (const { cache } of this.caches.values()) { + cache.clear(); + } + } +} + +export const memoRegistry = new MemoRegistry(); + +export function lruMemoize( + fn: (...args: Args) => Promise, + options: LruMemoizeOptions +): (...args: Args) => Promise { + // Wrap values in a tiny envelope so a legitimately-cached `undefined` + // can be distinguished from a cache miss. + type Envelope = { value: R }; + const cache = new LruCache(options.maxSize, options.ttlMs ?? 0); + + // Deduplicate concurrent calls for the same key so a stampede of + // requests while the first promise is still pending doesn't run the + // expensive function N times. + const inFlight = new Map>(); + + memoRegistry.register({ + name: options.name, + cache: cache as unknown as LruCache, + }); + + const keyFn = + options.keyFn ?? + ((args: Args): string => { + const serialized = JSON.stringify(args, (_, v) => { + // Tag bigints with a dedicated discriminator so + // `[1n]` and `["1"]` don't collapse to the same key. + if (typeof v === 'bigint') { + return { __memo_bigint__: v.toString() }; + } + return v; + }); + return createHash('sha256').update(serialized).digest('hex'); + }); + + return async (...args: Args): Promise => { + const key = keyFn(args); + const hit = cache.get(key); + if (hit !== undefined) { + return hit.value; + } + const pending = inFlight.get(key); + if (pending) { + return pending; + } + const promise = (async () => { + try { + const value = await fn(...args); + cache.set(key, { value }); + return value; + } finally { + inFlight.delete(key); + } + })(); + inFlight.set(key, promise); + return promise; + }; +} diff --git a/src/validation/tool-schemas.ts b/src/validation/tool-schemas.ts index b09ecb4..21e1cfb 100644 --- a/src/validation/tool-schemas.ts +++ b/src/validation/tool-schemas.ts @@ -23,6 +23,13 @@ export const GetCachedSchema = z.object({ // 3. count_tokens export const CountTokensSchema = z.object({ text: z.string().describe('Text to count tokens for'), + modelName: z + .string() + .optional() + .describe( + 'Model name (e.g. gpt-4, claude-opus-4-7, gemini-2.5-flash). ' + + 'Defaults to the server-configured model when omitted.' + ), }); // 4. compress_text @@ -413,6 +420,46 @@ export const ExportAnalyticsSchema = z.object({ .describe('Optional filter by MCP server name'), }); +// 72. optimization_storage — discriminated union keyed on `operation` so +// the zod validator rejects a `store` request missing the required +// payload fields at validateToolArgs time, instead of after dispatch. +export const OptimizationStorageSchema = z.discriminatedUnion('operation', [ + z.object({ + operation: z.literal('store'), + originalTextHash: z.string().min(1), + optimizedText: z.string(), + originalTokens: z.number().nonnegative(), + optimizedTokens: z.number().nonnegative(), + tokensSaved: z.number(), + }), + z.object({ + operation: z.literal('retrieve'), + originalTextHash: z.string().min(1), + }), +]); + +// 73. context_delta — discriminated on operation so compute-delta and +// seed require currentContent at validation time rather than runtime. +export const ContextDeltaSchema = z.discriminatedUnion('operation', [ + z.object({ + operation: z.literal('compute-delta'), + sessionId: z.string().min(1), + filePath: z.string().min(1), + currentContent: z.string(), + }), + z.object({ + operation: z.literal('seed'), + sessionId: z.string().min(1), + filePath: z.string().min(1), + currentContent: z.string(), + }), + z.object({ + operation: z.literal('clear'), + sessionId: z.string().min(1), + filePath: z.string().min(1), + }), +]); + // Map tool names to their schemas for easy lookup export const toolSchemaMap: Record> = { optimize_text: OptimizeTextSchema, @@ -486,4 +533,6 @@ export const toolSchemaMap: Record> = { get_action_analytics: GetActionAnalyticsSchema, get_mcp_server_analytics: GetMcpServerAnalyticsSchema, export_analytics: ExportAnalyticsSchema, + optimization_storage: OptimizationStorageSchema, + context_delta: ContextDeltaSchema, }; diff --git a/tests/benchmarks/results.json b/tests/benchmarks/results.json index b54d5be..dd72b7a 100644 --- a/tests/benchmarks/results.json +++ b/tests/benchmarks/results.json @@ -1,314 +1,314 @@ [ { "operation": "token-count-small", - "avgDuration": 0.21655369999999988, - "minDuration": 0.1389, - "maxDuration": 6.4655, - "p50": 0.1805, - "p90": 0.2406, - "p95": 0.2933, - "p99": 0.7547, - "throughput": 4617.792261226664, - "memoryUsed": 643496 + "avgDuration": 0.3614560000000002, + "minDuration": 0.1311, + "maxDuration": 8.1673, + "p50": 0.1678, + "p90": 0.3182, + "p95": 1.7474, + "p99": 4.338, + "throughput": 2766.5884644327366, + "memoryUsed": 653224 }, { "operation": "token-count-medium", - "avgDuration": 1.3377186000000005, - "minDuration": 0.8123, - "maxDuration": 18.5582, - "p50": 1.0083, - "p90": 1.5469, - "p95": 3.0017, - "p99": 9.8616, - "throughput": 747.5413737986446, - "memoryUsed": 235800 + "avgDuration": 1.9391590000000005, + "minDuration": 0.7915, + "maxDuration": 8.2937, + "p50": 1.1356, + "p90": 4.2806, + "p95": 5.0478, + "p99": 6.6412, + "throughput": 515.6874707025054, + "memoryUsed": 227808 }, { "operation": "token-count-large", - "avgDuration": 21.130661, - "minDuration": 15.5, - "maxDuration": 61.1893, - "p50": 19.1065, - "p90": 27.2373, - "p95": 29.4197, - "p99": 61.1893, - "throughput": 47.32459623482673, - "memoryUsed": 123872 + "avgDuration": 30.857169999999996, + "minDuration": 16.9689, + "maxDuration": 62.7117, + "p50": 28.8884, + "p90": 44.6519, + "p95": 52.7573, + "p99": 62.7117, + "throughput": 32.40737890091671, + "memoryUsed": 163384 }, { "operation": "token-count-batch", - "avgDuration": 5.449602000000001, - "minDuration": 3.8459, - "maxDuration": 13.5479, - "p50": 4.8888, - "p90": 7.3654, - "p95": 9.2773, - "p99": 13.5479, - "throughput": 183.49963905620996, - "memoryUsed": 418480 + "avgDuration": 8.504128, + "minDuration": 3.7843, + "maxDuration": 25.0622, + "p50": 6.9115, + "p90": 14.5447, + "p95": 17.9337, + "p99": 25.0622, + "throughput": 117.58995160938312, + "memoryUsed": 418424 }, { "operation": "token-estimate", - "avgDuration": 0.0012522999999999961, - "minDuration": 0.001, - "maxDuration": 0.0564, - "p50": 0.0012, - "p90": 0.0013, - "p95": 0.0014, - "p99": 0.0018, - "throughput": 798530.7035055521, - "memoryUsed": 149920 + "avgDuration": 0.0014780999999999996, + "minDuration": 0.0011, + "maxDuration": 0.0561, + "p50": 0.0014, + "p90": 0.0016, + "p95": 0.0017, + "p99": 0.0021, + "throughput": 676544.2121642651, + "memoryUsed": 149912 }, { "operation": "compress-small", - "avgDuration": 0.9627775000000005, - "minDuration": 0.426, - "maxDuration": 28.6613, - "p50": 0.5542, - "p90": 0.8703, - "p95": 1.9429, - "p99": 13.3281, - "throughput": 1038.661580687126, - "memoryUsed": -2487288 + "avgDuration": 1.2691879999999993, + "minDuration": 0.4314, + "maxDuration": 24.3388, + "p50": 0.5745, + "p90": 3.0667, + "p95": 4.4366, + "p99": 9.2609, + "throughput": 787.9053379010835, + "memoryUsed": -4992648 }, { "operation": "compress-medium", - "avgDuration": 0.7440725999999998, - "minDuration": 0.4942, - "maxDuration": 14.3456, - "p50": 0.6042, - "p90": 0.8371, - "p95": 1.1362, - "p99": 4.4296, - "throughput": 1343.9548775213605, - "memoryUsed": -1654048 + "avgDuration": 1.5216334000000002, + "minDuration": 0.5063, + "maxDuration": 13.7982, + "p50": 0.6649, + "p90": 3.5498, + "p95": 6.0332, + "p99": 9.4093, + "throughput": 657.1885186011294, + "memoryUsed": -1948624 }, { "operation": "compress-large", - "avgDuration": 2.7067430000000003, - "minDuration": 1.9602, - "maxDuration": 12.7897, - "p50": 2.2941, - "p90": 3.2423, - "p95": 4.5883, - "p99": 12.7897, - "throughput": 369.4477089254503, - "memoryUsed": 337712 + "avgDuration": 4.812509, + "minDuration": 2.2623, + "maxDuration": 13.9499, + "p50": 4.4611, + "p90": 7.8466, + "p95": 9.0442, + "p99": 13.9499, + "throughput": 207.79181919451992, + "memoryUsed": 359296 }, { "operation": "decompress", - "avgDuration": 0.031263400000000004, - "minDuration": 0.018, - "maxDuration": 2.1778, - "p50": 0.0243, - "p90": 0.0363, - "p95": 0.0642, - "p99": 0.0877, - "throughput": 31986.28428130018, - "memoryUsed": 3302040 + "avgDuration": 0.05447509999999999, + "minDuration": 0.0181, + "maxDuration": 3.3958, + "p50": 0.0238, + "p90": 0.0394, + "p95": 0.0754, + "p99": 2.0722, + "throughput": 18357.01081778648, + "memoryUsed": 3294192 }, { "operation": "compress-base64", - "avgDuration": 0.9150339999999996, - "minDuration": 0.5028, - "maxDuration": 16.1137, - "p50": 0.6502, - "p90": 0.9426, - "p95": 1.6786, - "p99": 9.5861, - "throughput": 1092.855566022684, - "memoryUsed": -4893280 + "avgDuration": 1.3982006000000007, + "minDuration": 0.5093, + "maxDuration": 17.5377, + "p50": 0.6728, + "p90": 3.5295, + "p95": 4.8434, + "p99": 9.593, + "throughput": 715.2049569997321, + "memoryUsed": -4899848 }, { "operation": "compress-quality-1", - "avgDuration": 0.0402535, - "minDuration": 0.0182, - "maxDuration": 1.2449, - "p50": 0.025, - "p90": 0.0459, - "p95": 0.0879, - "p99": 0.3411, - "throughput": 24842.56027426187, - "memoryUsed": 568648 + "avgDuration": 0.08870700000000004, + "minDuration": 0.0176, + "maxDuration": 4.9948, + "p50": 0.0227, + "p90": 0.0636, + "p95": 0.0849, + "p99": 3.4525, + "throughput": 11273.06751440134, + "memoryUsed": 575952 }, { "operation": "compress-quality-11", - "avgDuration": 1.4276415, - "minDuration": 0.7075, - "maxDuration": 19.9989, - "p50": 0.807, - "p90": 1.2711, - "p95": 6.5825, - "p99": 18.9334, - "throughput": 700.4559618083391, - "memoryUsed": 560544 + "avgDuration": 1.7096004999999996, + "minDuration": 0.6727, + "maxDuration": 8.1428, + "p50": 0.9267, + "p90": 3.7587, + "p95": 5.583, + "p99": 7.3277, + "throughput": 584.9319767980883, + "memoryUsed": 566880 }, { "operation": "cache-write", - "avgDuration": 0.28219179999999994, - "minDuration": 0.1069, - "maxDuration": 16.9523, - "p50": 0.1543, - "p90": 0.2669, - "p95": 0.3756, - "p99": 2.9096, - "throughput": 3543.689079555112, - "memoryUsed": 646560 + "avgDuration": 0.6586279000000003, + "minDuration": 0.1024, + "maxDuration": 14.5587, + "p50": 0.1522, + "p90": 1.0068, + "p95": 4.7546, + "p99": 7.3433, + "throughput": 1518.3079854345672, + "memoryUsed": 640856 }, { "operation": "cache-read-memory", - "avgDuration": 0.25163070000000015, - "minDuration": 0.0793, - "maxDuration": 28.8506, - "p50": 0.1203, - "p90": 0.1493, - "p95": 0.2349, - "p99": 4.1812, - "throughput": 3974.077884773199, - "memoryUsed": 478016 + "avgDuration": 0.4997243, + "minDuration": 0.079, + "maxDuration": 15.3762, + "p50": 0.1012, + "p90": 0.2858, + "p95": 4.201, + "p99": 8.698, + "throughput": 2001.1034084194023, + "memoryUsed": 478008 }, { "operation": "cache-read-disk", - "avgDuration": 0.45986899999999997, - "minDuration": 0.0884, - "maxDuration": 39.4532, - "p50": 0.1262, - "p90": 0.1936, - "p95": 0.2933, - "p99": 19.9866, - "throughput": 2174.532312462897, - "memoryUsed": 297264 + "avgDuration": 0.47186479999999986, + "minDuration": 0.0742, + "maxDuration": 12.5767, + "p50": 0.1039, + "p90": 0.2734, + "p95": 4.0729, + "p99": 8.0254, + "throughput": 2119.2511075206294, + "memoryUsed": 297344 }, { "operation": "cache-delete", - "avgDuration": 0.5386263000000001, - "minDuration": 0.0733, - "maxDuration": 99.0395, - "p50": 0.1153, - "p90": 0.1729, - "p95": 0.2314, - "p99": 5.5755, - "throughput": 1856.5747717851873, - "memoryUsed": 444360 + "avgDuration": 0.37161569999999977, + "minDuration": 0.0764, + "maxDuration": 18.4903, + "p50": 0.1064, + "p90": 0.2381, + "p95": 1.8399, + "p99": 5.8565, + "throughput": 2690.951970005575, + "memoryUsed": 442720 }, { "operation": "cache-stats", - "avgDuration": 0.32004450000000007, - "minDuration": 0.1652, - "maxDuration": 48.5593, - "p50": 0.2046, - "p90": 0.2563, - "p95": 0.3068, - "p99": 2.1103, - "throughput": 3124.5654901115304, - "memoryUsed": 765656 + "avgDuration": 0.6591104, + "minDuration": 0.1651, + "maxDuration": 10.2148, + "p50": 0.197, + "p90": 1.7847, + "p95": 4.4389, + "p99": 7.1797, + "throughput": 1517.1965121472822, + "memoryUsed": 770976 }, { "operation": "metrics-record", - "avgDuration": 0.0027574999999999657, - "minDuration": 0.0019, - "maxDuration": 0.1329, - "p50": 0.0025, - "p90": 0.003, - "p95": 0.0033, - "p99": 0.0056, - "throughput": 362647.32547597913, - "memoryUsed": 459872 + "avgDuration": 0.01021149999999998, + "minDuration": 0.0017, + "maxDuration": 4.0691, + "p50": 0.0022, + "p90": 0.0028, + "p95": 0.003, + "p99": 0.0075, + "throughput": 97928.80575821397, + "memoryUsed": 459840 }, { "operation": "metrics-cache-stats", - "avgDuration": 0.5644068000000001, - "minDuration": 0.0897, - "maxDuration": 113.9726, - "p50": 0.1075, - "p90": 0.1357, - "p95": 0.1805, - "p99": 18.041, - "throughput": 1771.7717079241424, - "memoryUsed": -5707968 + "avgDuration": 0.5884749999999999, + "minDuration": 0.0814, + "maxDuration": 101.3263, + "p50": 0.0959, + "p90": 0.146, + "p95": 2.1334, + "p99": 8.5735, + "throughput": 1699.3075321806366, + "memoryUsed": -5767600 }, { "operation": "metrics-breakdown", - "avgDuration": 2.7876345, - "minDuration": 0.6644, - "maxDuration": 53.726, - "p50": 0.8498, - "p90": 2.1808, - "p95": 24.6884, - "p99": 40.5231, - "throughput": 358.7270856347918, - "memoryUsed": 4166392 + "avgDuration": 3.030816000000001, + "minDuration": 0.6429, + "maxDuration": 14.331, + "p50": 0.9025, + "p90": 8.4685, + "p95": 10.3432, + "p99": 12.5062, + "throughput": 329.94414705478647, + "memoryUsed": 3432896 }, { "operation": "metrics-percentiles", - "avgDuration": 0.19757700000000003, - "minDuration": 0.0665, - "maxDuration": 22.145, - "p50": 0.0782, - "p90": 0.1113, - "p95": 0.1406, - "p99": 0.3571, - "throughput": 5061.3178659459345, - "memoryUsed": 6890256 + "avgDuration": 0.2575195, + "minDuration": 0.0656, + "maxDuration": 5.1485, + "p50": 0.0749, + "p90": 0.1152, + "p95": 0.2302, + "p99": 4.9353, + "throughput": 3883.201077976619, + "memoryUsed": 6877416 }, { "operation": "e2e-optimization", - "avgDuration": 4.089156000000001, - "minDuration": 1.7855, - "maxDuration": 36.6479, - "p50": 2.4161, - "p90": 7.4475, - "p95": 13.5108, - "p99": 36.6479, - "throughput": 244.54924194625977, - "memoryUsed": 843760 + "avgDuration": 10.268875999999999, + "minDuration": 2.0887, + "maxDuration": 20.2826, + "p50": 9.5857, + "p90": 16.0762, + "p95": 18.8724, + "p99": 20.2826, + "throughput": 97.38164137925126, + "memoryUsed": 848648 }, { "operation": "e2e-cache-hit", - "avgDuration": 0.5933224999999999, - "minDuration": 0.1019, - "maxDuration": 55.8155, - "p50": 0.1439, - "p90": 0.2178, - "p95": 0.3251, - "p99": 21.812, - "throughput": 1685.424031618555, - "memoryUsed": -14126360 + "avgDuration": 0.5115652000000002, + "minDuration": 0.0905, + "maxDuration": 18.1006, + "p50": 0.1224, + "p90": 0.3575, + "p95": 3.4573, + "p99": 8.1231, + "throughput": 1954.785040108279, + "memoryUsed": 2419984 }, { "operation": "regression-token-count", - "avgDuration": 1.0176798, - "minDuration": 0.4497, - "maxDuration": 19.4484, - "p50": 0.5622, - "p90": 0.7359, - "p95": 3.6875, - "p99": 15.1693, - "throughput": 982.6273450647246, - "memoryUsed": 263416 + "avgDuration": 1.9268184000000008, + "minDuration": 0.455, + "maxDuration": 17.2354, + "p50": 0.6255, + "p90": 5.6609, + "p95": 6.9407, + "p99": 12.8987, + "throughput": 518.9902691400496, + "memoryUsed": 263464 }, { "operation": "regression-compress", - "avgDuration": 1.1226146666666665, - "minDuration": 0.7638, - "maxDuration": 28.5129, - "p50": 0.8377, - "p90": 1.0199, - "p95": 1.281, - "p99": 10.5418, - "throughput": 890.7776013378293, - "memoryUsed": -5355728 + "avgDuration": 5.232439333333333, + "minDuration": 0.7827, + "maxDuration": 61.2683, + "p50": 2.3238, + "p90": 12.9448, + "p95": 14.6136, + "p99": 17.48, + "throughput": 191.11545042280474, + "memoryUsed": -4833328 }, { "operation": "regression-cache", - "avgDuration": 0.4148695999999999, - "minDuration": 0.1768, - "maxDuration": 17.4546, - "p50": 0.2465, - "p90": 0.3658, - "p95": 0.5651, - "p99": 5.4624, - "throughput": 2410.395941278899, - "memoryUsed": -447896 + "avgDuration": 2.2698532000000005, + "minDuration": 0.1812, + "maxDuration": 44.9064, + "p50": 0.2844, + "p90": 10.6146, + "p95": 13.1184, + "p99": 19.7883, + "throughput": 440.5571250158379, + "memoryUsed": -467608 } ] \ No newline at end of file diff --git a/tests/unit/cache-engine.test.ts b/tests/unit/cache-engine.test.ts index 8a99374..f8cabc2 100644 --- a/tests/unit/cache-engine.test.ts +++ b/tests/unit/cache-engine.test.ts @@ -48,7 +48,7 @@ describe('CacheEngine', () => { cache = new CacheEngine(testDbPath, 100); }); - afterEach(() => { + afterEach(async () => { // Restore original environment variable if (originalEnv !== undefined) { process.env.TOKEN_OPTIMIZER_CACHE_DIR = originalEnv; @@ -58,6 +58,7 @@ describe('CacheEngine', () => { // Clean up cache.close(); + await new Promise(resolve => setTimeout(resolve, 100)); // Add a small delay if (fs.existsSync(testDbPath)) { fs.unlinkSync(testDbPath); } diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts new file mode 100644 index 0000000..a767d89 --- /dev/null +++ b/tests/unit/config.test.ts @@ -0,0 +1,94 @@ +import { describe, it, expect, afterEach } from '@jest/globals'; +import { mkdtempSync, writeFileSync, existsSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { ConfigManager } from '../../src/core/config.js'; + +describe('ConfigManager', () => { + const tempDirs: string[] = []; + + afterEach(() => { + while (tempDirs.length) { + const dir = tempDirs.pop(); + if (dir) { + rmSync(dir, { recursive: true, force: true }); + } + } + }); + + function tempConfigPath(): string { + const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-config-')); + tempDirs.push(dir); + return join(dir, 'config.json'); + } + + function writeConfig(content: string): string { + const file = tempConfigPath(); + writeFileSync(file, content); + return file; + } + + it('returns defaults when no config file exists and writeDefaults is false', () => { + const mgr = new ConfigManager(tempConfigPath(), { writeDefaults: false }); + const opt = mgr.getOptimizationConfig(); + expect(opt.compressionTokenThreshold).toBe(0.7); + expect(opt.quality).toBe('balanced'); + expect(opt.cacheSettings.maxSize).toBe(1000); + expect(opt.cacheSettings.ttlSeconds).toBe(3600); + expect(opt.chatCompression.enabled).toBe(true); + expect(opt.chatCompression.strategy).toBe('summarize'); + expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000); + }); + + it('writes a default config file on first run', () => { + const file = tempConfigPath(); + expect(existsSync(file)).toBe(false); + new ConfigManager(file); + expect(existsSync(file)).toBe(true); + + // A second instance reads what the first wrote. + const second = new ConfigManager(file); + expect(second.getOptimizationConfig().quality).toBe('balanced'); + }); + + it('overrides defaults with user config — nested sub-objects deep-merge', () => { + const configPath = writeConfig( + JSON.stringify({ + optimization: { + compressionTokenThreshold: 0.9, + quality: 'max', + cacheSettings: { maxSize: 42 }, + chatCompression: { strategy: 'truncate' }, + modelTokenLimits: { 'custom-model': 500000 }, + }, + }) + ); + const mgr = new ConfigManager(configPath, { writeDefaults: false }); + const opt = mgr.getOptimizationConfig(); + expect(opt.compressionTokenThreshold).toBe(0.9); + expect(opt.quality).toBe('max'); + expect(opt.cacheSettings.maxSize).toBe(42); + // Unprovided sub-field retains default. + expect(opt.cacheSettings.ttlSeconds).toBe(3600); + expect(opt.chatCompression.enabled).toBe(true); + expect(opt.chatCompression.strategy).toBe('truncate'); + expect(mgr.getModelTokenLimit('custom-model')).toBe(500000); + // Built-in model limits must survive a partial override. + expect(mgr.getModelTokenLimit('gpt-4')).toBe(128000); + expect(opt.compressionPreserveThreshold).toBe(0.3); + }); + + it('falls back to defaults on invalid config', () => { + const configPath = writeConfig( + JSON.stringify({ optimization: { compressionTokenThreshold: 5 } }) + ); + const mgr = new ConfigManager(configPath, { writeDefaults: false }); + expect(mgr.getOptimizationConfig().compressionTokenThreshold).toBe(0.7); + }); + + it('falls back to defaults on malformed JSON', () => { + const configPath = writeConfig('not json at all'); + const mgr = new ConfigManager(configPath, { writeDefaults: false }); + expect(mgr.getOptimizationConfig().quality).toBe('balanced'); + }); +}); diff --git a/tests/unit/diff.test.ts b/tests/unit/diff.test.ts new file mode 100644 index 0000000..0780b1f --- /dev/null +++ b/tests/unit/diff.test.ts @@ -0,0 +1,33 @@ +import { describe, it, expect } from '@jest/globals'; +import { calculateDelta, applyDelta } from '../../src/utils/diff.js'; + +describe('diff utils', () => { + it('returns empty delta when inputs are identical', () => { + expect(calculateDelta('hello', 'hello')).toBe(''); + }); + + it('round-trips a simple change', () => { + const prev = 'line1\nline2\nline3\n'; + const next = 'line1\nline2 changed\nline3\n'; + const delta = calculateDelta(prev, next); + expect(delta).not.toBe(''); + expect(applyDelta(prev, delta)).toBe(next); + }); + + it('applyDelta on an empty delta is a no-op', () => { + expect(applyDelta('anything', '')).toBe('anything'); + }); + + it('produces a meaningfully smaller delta than the full content for small edits', () => { + const prev = 'a\n'.repeat(500); + const next = prev + 'appended line\n'; + const delta = calculateDelta(prev, next); + expect(delta.length).toBeLessThan(next.length); + }); + + it('throws when the patch targets a different baseline than supplied', () => { + const patch = calculateDelta('original\ntext\n', 'original\nchanged\n'); + // Applying the patch against completely different content fails. + expect(() => applyDelta('totally different input\n', patch)).toThrow(); + }); +}); diff --git a/tests/unit/gzip.test.ts b/tests/unit/gzip.test.ts new file mode 100644 index 0000000..bbb444c --- /dev/null +++ b/tests/unit/gzip.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect, afterEach } from '@jest/globals'; +import { mkdtempSync, existsSync, writeFileSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { + gzipString, + gunzipBuffer, + saveGzippedFile, + loadMaybeGzippedFile, +} from '../../src/utils/gzip.js'; + +describe('gzip utils', () => { + const tempDirs: string[] = []; + afterEach(() => { + while (tempDirs.length) { + const dir = tempDirs.pop(); + if (dir) { + rmSync(dir, { recursive: true, force: true }); + } + } + }); + + function tempDir(): string { + const dir = mkdtempSync(join(tmpdir(), 'token-optimizer-gzip-')); + tempDirs.push(dir); + return dir; + } + + it('gzipString round-trips via gunzipBuffer', () => { + const text = 'Hello, world. '.repeat(1000); + const buffer = gzipString(text); + expect(buffer.length).toBeLessThan(text.length); + expect(gunzipBuffer(buffer)).toBe(text); + }); + + it('saveGzippedFile writes .gz and removes plaintext', () => { + const dir = tempDir(); + const file = join(dir, 'sessions.json'); + writeFileSync(file, 'stale plaintext'); + const stats = saveGzippedFile(file, JSON.stringify({ hello: 'world' })); + expect(existsSync(`${file}.gz`)).toBe(true); + expect(existsSync(file)).toBe(false); + expect(stats.originalBytes).toBeGreaterThan(0); + expect(stats.compressedBytes).toBeGreaterThan(0); + }); + + it('loadMaybeGzippedFile prefers the .gz sibling', () => { + const dir = tempDir(); + const file = join(dir, 'state.json'); + saveGzippedFile(file, '{"compressed":true}'); + expect(loadMaybeGzippedFile(file)).toBe('{"compressed":true}'); + }); + + it('loadMaybeGzippedFile falls back to plaintext when no .gz exists', () => { + const dir = tempDir(); + const file = join(dir, 'legacy.json'); + writeFileSync(file, '{"legacy":true}'); + expect(loadMaybeGzippedFile(file)).toBe('{"legacy":true}'); + }); + + it('loadMaybeGzippedFile returns null when neither exists', () => { + const dir = tempDir(); + const file = join(dir, 'missing.json'); + expect(loadMaybeGzippedFile(file)).toBeNull(); + }); + + it('saves with high compression ratio on repetitive content', () => { + const dir = tempDir(); + const file = join(dir, 'repeated.txt'); + const stats = saveGzippedFile(file, 'aa'.repeat(10_000)); + expect(stats.percentSaved).toBeGreaterThan(95); + }); +}); diff --git a/tests/unit/lru-cache.test.ts b/tests/unit/lru-cache.test.ts new file mode 100644 index 0000000..0063e2c --- /dev/null +++ b/tests/unit/lru-cache.test.ts @@ -0,0 +1,88 @@ +import { describe, it, expect } from '@jest/globals'; +import { LruCache } from '../../src/utils/lru-cache.js'; + +describe('LruCache', () => { + it('rejects non-positive maxSize', () => { + expect(() => new LruCache(0)).toThrow(); + expect(() => new LruCache(-1)).toThrow(); + }); + + it('get returns undefined on miss and counts it', () => { + const cache = new LruCache(2); + expect(cache.get('x')).toBeUndefined(); + expect(cache.stats().misses).toBe(1); + }); + + it('set/get round-trips and counts hits', () => { + const cache = new LruCache(2); + cache.set('a', 1); + expect(cache.get('a')).toBe(1); + expect(cache.stats().hits).toBe(1); + }); + + it('evicts the least recently used entry when full', () => { + const cache = new LruCache(2); + cache.set('a', 1); + cache.set('b', 2); + cache.get('a'); + cache.set('c', 3); + + expect(cache.get('a')).toBe(1); + expect(cache.get('b')).toBeUndefined(); + expect(cache.get('c')).toBe(3); + expect(cache.stats().evictions).toBe(1); + }); + + it('refreshes recency on get', () => { + const cache = new LruCache(2); + cache.set('a', 1); + cache.set('b', 2); + cache.get('a'); + cache.set('c', 3); + + expect(cache.has('b')).toBe(false); + expect(cache.has('a')).toBe(true); + }); + + it('expires entries past the TTL', async () => { + const cache = new LruCache(2, 20); + cache.set('a', 1); + await new Promise((r) => setTimeout(r, 30)); + expect(cache.get('a')).toBeUndefined(); + expect(cache.stats().expired).toBe(1); + }); + + it('prune removes expired entries', async () => { + const cache = new LruCache(4, 20); + cache.set('a', 1); + cache.set('b', 2); + await new Promise((r) => setTimeout(r, 30)); + cache.set('c', 3); + const removed = cache.prune(); + expect(removed).toBe(2); + expect(cache.size).toBe(1); + }); + + it('prune removes per-entry TTL expirations even when defaultTtlMs is 0', async () => { + const cache = new LruCache(4, 0); + cache.set('short', 1, 20); + cache.set('forever', 2); + await new Promise((r) => setTimeout(r, 30)); + const removed = cache.prune(); + expect(removed).toBe(1); + expect(cache.has('forever')).toBe(true); + expect(cache.has('short')).toBe(false); + }); + + it('stats.hitRate reflects hits / total', () => { + const cache = new LruCache(2); + cache.set('a', 1); + cache.get('a'); + cache.get('a'); + cache.get('missing'); + const stats = cache.stats(); + expect(stats.hits).toBe(2); + expect(stats.misses).toBe(1); + expect(stats.hitRate).toBeCloseTo(2 / 3); + }); +}); diff --git a/tests/unit/lru-memoize.test.ts b/tests/unit/lru-memoize.test.ts new file mode 100644 index 0000000..b0dae36 --- /dev/null +++ b/tests/unit/lru-memoize.test.ts @@ -0,0 +1,113 @@ +import { describe, it, expect } from '@jest/globals'; +import { lruMemoize, memoRegistry } from '../../src/utils/lru-memoize.js'; + +describe('lruMemoize', () => { + it('returns cached value for identical args', async () => { + let calls = 0; + const fn = async (x: number) => { + calls++; + return x * 2; + }; + const memo = lruMemoize(fn, { name: 'test-double', maxSize: 10 }); + expect(await memo(3)).toBe(6); + expect(await memo(3)).toBe(6); + expect(calls).toBe(1); + }); + + it('differentiates calls by args', async () => { + let calls = 0; + const fn = async (x: number) => { + calls++; + return x * 2; + }; + const memo = lruMemoize(fn, { name: 'test-by-args', maxSize: 10 }); + await memo(1); + await memo(2); + await memo(1); + expect(calls).toBe(2); + }); + + it('expires entries past the TTL', async () => { + let calls = 0; + const fn = async (x: number) => { + calls++; + return x; + }; + const memo = lruMemoize(fn, { name: 'test-ttl', maxSize: 10, ttlMs: 20 }); + await memo(7); + await memo(7); + expect(calls).toBe(1); + await new Promise((r) => setTimeout(r, 30)); + await memo(7); + expect(calls).toBe(2); + }); + + it('registers with memoRegistry for bulk prune / stats', async () => { + const fn = async (x: string) => x.toUpperCase(); + lruMemoize(fn, { name: 'test-registered', maxSize: 5 }); + const stats = memoRegistry.stats(); + expect(stats['test-registered']).toBeDefined(); + expect(stats['test-registered'].size).toBe(0); + }); + + it('accepts a custom key function', async () => { + let calls = 0; + const fn = async (obj: { id: string; ignore: number }) => { + calls++; + return obj.id; + }; + const memo = lruMemoize(fn, { + name: 'test-custom-key', + maxSize: 5, + keyFn: ([{ id }]) => id, + }); + await memo({ id: 'a', ignore: 1 }); + await memo({ id: 'a', ignore: 9999 }); // same id → hit + await memo({ id: 'b', ignore: 1 }); // different id → miss + expect(calls).toBe(2); + }); + + it('deduplicates concurrent calls for the same args', async () => { + let calls = 0; + const fn = async (x: number) => { + calls++; + await new Promise((r) => setTimeout(r, 20)); + return x * 2; + }; + const memo = lruMemoize(fn, { name: 'test-concurrent', maxSize: 10 }); + const [a, b] = await Promise.all([memo(5), memo(5)]); + expect(a).toBe(10); + expect(b).toBe(10); + // Stampede collapsed into a single invocation. + expect(calls).toBe(1); + }); + + it('memoizes a legitimately-undefined return value', async () => { + let calls = 0; + const fn = async (): Promise => { + calls++; + return undefined; + }; + const memo = lruMemoize(fn, { name: 'test-undefined', maxSize: 10 }); + expect(await memo()).toBeUndefined(); + expect(await memo()).toBeUndefined(); + // Without envelope-style storage, the second call would re-run fn. + expect(calls).toBe(1); + }); + + it('distinguishes bigint args from string args in the default key', async () => { + let calls = 0; + const fn = async (x: unknown) => { + calls++; + return String(x); + }; + const memo = lruMemoize(fn as (x: unknown) => Promise, { + name: 'test-bigint-collision', + maxSize: 10, + }); + expect(await memo(1n)).toBe('1'); + expect(await memo('1')).toBe('1'); + // Two distinct args ⇒ two distinct cache keys ⇒ two invocations. + expect(calls).toBe(2); + }); +}); diff --git a/tests/unit/session.test.ts b/tests/unit/session.test.ts new file mode 100644 index 0000000..ffe6c6e --- /dev/null +++ b/tests/unit/session.test.ts @@ -0,0 +1,98 @@ +import { describe, it, expect } from '@jest/globals'; +import { Session } from '../../src/core/session.js'; +import { SessionManager } from '../../src/core/session-manager.js'; +import { HeuristicTokenizer } from '../../src/core/tokenizers/heuristic-tokenizer.js'; + +describe('Session', () => { + it('appends messages and tracks updatedAt', async () => { + const session = new Session({ allowCharHeuristic: true }); + const before = session.updatedAt; + await new Promise((r) => setTimeout(r, 5)); + session.addMessage('user', 'hi'); + expect(session.getHistory().length).toBe(1); + expect(session.updatedAt).toBeGreaterThan(before); + }); + + it('compressHistory is a no-op under the budget', async () => { + const session = new Session({ + maxTokens: 10_000, + allowCharHeuristic: true, + }); + session.addMessage('user', 'short'); + const before = session.getHistory().length; + await session.compressHistory(); + expect(session.getHistory().length).toBe(before); + }); + + it('getHistoryTokenCount throws without a tokenizer when heuristic is off', async () => { + const session = new Session(); + session.addMessage('user', 'hi'); + await expect(session.getHistoryTokenCount()).rejects.toThrow( + /requires a tokenizer/ + ); + }); + + it('clearFileContent removes the entry', () => { + const session = new Session(); + session.setFileContent('a.ts', 'const x = 1;'); + session.clearFileContent('a.ts'); + expect(session.getFileContent('a.ts')).toBeUndefined(); + }); + + it('compressHistory summarizes head when over budget', async () => { + const tokenizer = new HeuristicTokenizer(); + const session = new Session({ maxTokens: 50, tokenizer }); + // Each long message is several hundred chars → easily over 50 tokens. + for (let i = 0; i < 10; i++) { + session.addMessage('user', 'a'.repeat(400) + ` turn=${i}`); + } + expect((await session.getHistoryTokenCount()) > 50).toBe(true); + await session.compressHistory(); + const history = session.getHistory(); + // Summary is stored as `assistant` (never `system`) so that + // user-derived text can't be elevated into system-role context. + expect(history[0].role).toBe('assistant'); + expect(history[0].content.startsWith('[summary')).toBe(true); + expect(history.length).toBeLessThan(10); + }); + + it('snapshot round-trips and preserves createdAt / updatedAt', () => { + const session = new Session({ maxTokens: 42 }); + session.addMessage('user', 'hello'); + session.setFileContent('a.ts', 'const x = 1;'); + const snapshot = session.toSnapshot(); + const restored = Session.fromSnapshot(snapshot); + expect(restored.id).toBe(session.id); + expect(restored.maxTokens).toBe(42); + expect(restored.getFileContent('a.ts')).toBe('const x = 1;'); + expect(restored.getHistory()[0].content).toBe('hello'); + expect(restored.createdAt).toBe(snapshot.createdAt); + expect(restored.updatedAt).toBe(snapshot.updatedAt); + }); +}); + +describe('SessionManager', () => { + it('create/get/delete lifecycle', () => { + const manager = new SessionManager(); + const session = manager.createSession(); + expect(manager.getSession(session.id)).toBe(session); + expect(manager.deleteSession(session.id)).toBe(true); + expect(manager.getSession(session.id)).toBeUndefined(); + }); + + it('addMessage auto-compresses when over budget', async () => { + const tokenizer = new HeuristicTokenizer(); + const manager = new SessionManager({ tokenizer, defaultMaxTokens: 30 }); + const session = manager.createSession(); + for (let i = 0; i < 8; i++) { + await manager.addMessage(session.id, 'user', 'x'.repeat(300)); + } + const history = session.getHistory(); + expect(history[0].content.startsWith('[summary')).toBe(true); + }); + + it('throws for unknown session ids', async () => { + const manager = new SessionManager(); + await expect(manager.addMessage('bogus', 'user', 'hi')).rejects.toThrow(); + }); +}); diff --git a/tests/unit/summarization.test.ts b/tests/unit/summarization.test.ts new file mode 100644 index 0000000..a060ee7 --- /dev/null +++ b/tests/unit/summarization.test.ts @@ -0,0 +1,102 @@ +import { describe, it, expect, beforeEach, afterEach } from '@jest/globals'; +import { + TruncatingSummarizer, + AnthropicSummarizer, + GoogleAISummarizer, + createSummarizerFromEnv, +} from '../../src/core/summarization.js'; +import { Message } from '../../src/core/session.js'; + +function makeMessages(n: number): Message[] { + return Array.from({ length: n }, (_, i) => ({ + role: (i % 2 === 0 ? 'user' : 'assistant') as Message['role'], + content: `Turn ${i}: ${'x'.repeat(50)}`, + timestamp: Date.now() + i, + })); +} + +describe('TruncatingSummarizer', () => { + it('returns empty string for empty input', async () => { + const s = new TruncatingSummarizer(); + expect(await s.summarize([])).toBe(''); + }); + + it('returns untruncated text when under maxChars', async () => { + const s = new TruncatingSummarizer({ maxChars: 10_000 }); + const out = await s.summarize(makeMessages(3)); + expect(out).toContain('Turn 0'); + expect(out).toContain('Turn 2'); + expect(out).not.toContain('[truncated]'); + }); + + it('truncates with a marker when over maxChars', async () => { + const s = new TruncatingSummarizer({ maxChars: 500 }); + const out = await s.summarize(makeMessages(50)); + expect(out).toContain('[truncated]'); + expect(out.length).toBeLessThan(600); + }); +}); + +describe('AnthropicSummarizer / GoogleAISummarizer constructors', () => { + const savedAnthropic = process.env.ANTHROPIC_API_KEY; + const savedGoogle = process.env.GOOGLE_AI_API_KEY; + + beforeEach(() => { + delete process.env.ANTHROPIC_API_KEY; + delete process.env.GOOGLE_AI_API_KEY; + }); + afterEach(() => { + if (savedAnthropic !== undefined) process.env.ANTHROPIC_API_KEY = savedAnthropic; + else delete process.env.ANTHROPIC_API_KEY; + if (savedGoogle !== undefined) process.env.GOOGLE_AI_API_KEY = savedGoogle; + else delete process.env.GOOGLE_AI_API_KEY; + }); + + it('AnthropicSummarizer throws without a key', () => { + expect(() => new AnthropicSummarizer()).toThrow(/ANTHROPIC_API_KEY/); + }); + + it('GoogleAISummarizer throws without a key', () => { + expect(() => new GoogleAISummarizer()).toThrow(/GOOGLE_AI_API_KEY/); + }); + + it('AnthropicSummarizer constructs with explicit apiKey', () => { + expect(() => new AnthropicSummarizer({ apiKey: 'sk-test' })).not.toThrow(); + }); + + it('GoogleAISummarizer constructs with explicit apiKey', () => { + expect(() => new GoogleAISummarizer({ apiKey: 'gapi-test' })).not.toThrow(); + }); +}); + +describe('createSummarizerFromEnv', () => { + const saved = { + anthropic: process.env.ANTHROPIC_API_KEY, + google: process.env.GOOGLE_AI_API_KEY, + }; + + afterEach(() => { + if (saved.anthropic !== undefined) process.env.ANTHROPIC_API_KEY = saved.anthropic; + else delete process.env.ANTHROPIC_API_KEY; + if (saved.google !== undefined) process.env.GOOGLE_AI_API_KEY = saved.google; + else delete process.env.GOOGLE_AI_API_KEY; + }); + + it('falls back to TruncatingSummarizer when no keys are set', () => { + delete process.env.ANTHROPIC_API_KEY; + delete process.env.GOOGLE_AI_API_KEY; + expect(createSummarizerFromEnv()).toBeInstanceOf(TruncatingSummarizer); + }); + + it('prefers Anthropic when its key is set', () => { + process.env.ANTHROPIC_API_KEY = 'sk-test'; + delete process.env.GOOGLE_AI_API_KEY; + expect(createSummarizerFromEnv()).toBeInstanceOf(AnthropicSummarizer); + }); + + it('uses Google AI when only its key is set', () => { + delete process.env.ANTHROPIC_API_KEY; + process.env.GOOGLE_AI_API_KEY = 'gapi-test'; + expect(createSummarizerFromEnv()).toBeInstanceOf(GoogleAISummarizer); + }); +}); diff --git a/tests/unit/tokenizers.test.ts b/tests/unit/tokenizers.test.ts new file mode 100644 index 0000000..ed2f2a3 --- /dev/null +++ b/tests/unit/tokenizers.test.ts @@ -0,0 +1,64 @@ +import { describe, it, expect } from '@jest/globals'; +import { HeuristicTokenizer, ContentType } from '../../src/core/tokenizers/heuristic-tokenizer.js'; +import { TokenizerFactory } from '../../src/core/tokenizers/tokenizer-factory.js'; +import { TiktokenTokenizer } from '../../src/core/tokenizers/tiktoken-tokenizer.js'; + +describe('HeuristicTokenizer', () => { + it('detects JSON content', () => { + const json = '{"a": 1, "b": [1, 2, 3]}'; + expect(HeuristicTokenizer.detectContentType(json)).toBe(ContentType.Json); + }); + + it('detects code content', () => { + const code = 'function foo() { return 42; }'; + expect(HeuristicTokenizer.detectContentType(code)).toBe(ContentType.Code); + }); + + it('detects markdown content', () => { + const md = '# Heading\n\n- item one\n- item two'; + expect(HeuristicTokenizer.detectContentType(md)).toBe(ContentType.Markdown); + }); + + it('defaults to text content', () => { + const text = 'Just a short plain sentence.'; + expect(HeuristicTokenizer.detectContentType(text)).toBe(ContentType.Text); + }); + + it('uses a lower chars/token ratio for code than text', async () => { + const tokenizer = new HeuristicTokenizer(); + const code = 'function foo() { return 42; }'; + const text = 'A sentence of roughly similar length here.'; + const codeTokens = await tokenizer.countTokens(code); + const textTokens = await tokenizer.countTokens(text); + // Code has ratio 2.5 vs text 4.0 → for strings of similar length, code tokens > text tokens. + expect(codeTokens / code.length).toBeGreaterThan(textTokens / text.length); + }); + + it('caches repeated inputs', async () => { + const tokenizer = new HeuristicTokenizer(); + const input = 'cache me'; + const first = await tokenizer.countTokens(input); + const second = await tokenizer.countTokens(input); + expect(first).toBe(second); + }); +}); + +describe('TokenizerFactory', () => { + it('returns a TiktokenTokenizer for gpt-4', () => { + const t = TokenizerFactory.create('gpt-4'); + expect(t).toBeInstanceOf(TiktokenTokenizer); + t.free(); + }); + + it('returns a TiktokenTokenizer for Claude models (maps to gpt-4)', () => { + const t = TokenizerFactory.create('claude-opus-4-7'); + expect(t).toBeInstanceOf(TiktokenTokenizer); + t.free(); + }); + + it('falls back to HeuristicTokenizer for unknown models', () => { + const t = TokenizerFactory.create('some-unknown-local-model'); + expect(t).toBeInstanceOf(HeuristicTokenizer); + t.free(); + }); +});