diff --git a/.claude/settings.local.json b/.claude/settings.local.json index e518c4f..69052df 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -77,7 +77,18 @@ "Bash(kill:*)", "Bash(grep:*)", "WebFetch(domain:opencode.ai)", - "Bash(find:*)" + "Bash(find:*)", + "WebFetch(domain:www.databricks.com)", + "WebFetch(domain:docs.databricks.com)", + "Bash(env:*)", + "Bash(DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test:*)", + "Bash(DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node:*)", + "Bash(gh pr list:*)", + "Bash(gh pr diff:*)", + "Bash(PREFER_OLLAMA=true node:*)", + "Bash(DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com MODEL_PROVIDER=azure-openai AZURE_OPENAI_ENDPOINT=https://test.openai.azure.com AZURE_OPENAI_API_KEY=test-key node:*)", + "Bash(git stash:*)", + "WebFetch(domain:docs.ollama.com)" ], "deny": [], "ask": [] diff --git a/.env.example b/.env.example index 644358f..7c37a1f 100644 --- a/.env.example +++ b/.env.example @@ -1,90 +1,62 @@ -# Lynkr Configuration +# ============================================================================== +# LYNKR CONFIGURATION - All Environment Variables # Copy this file to .env and fill in your values +# ============================================================================== # ============================================================================== # Model Provider Configuration # ============================================================================== -# Primary model provider to use -# Options: databricks, azure-anthropic, azure-openai, openrouter, openai, ollama, llamacpp, lmstudio, bedrock, zai, vertex -# Default: databricks +# Primary model provider — controls routing when TIER_* vars are NOT configured (static mode). +# When all 4 TIER_* vars are set, tier routing overrides this for request routing. +# Even with tier routing active, MODEL_PROVIDER is still used for: +# - Startup checks (e.g. waiting for Ollama to be reachable) +# - Provider discovery API (/v1/providers) +# - Default provider when a TIER_* value has no "provider:" prefix +# Options: databricks, azure-anthropic, azure-openai, openrouter, openai, ollama, llamacpp, lmstudio, bedrock, zai, vertex, moonshot +# Note: PREFER_OLLAMA is deprecated and has no effect. Use TIER_SIMPLE=ollama: instead. MODEL_PROVIDER=ollama # ============================================================================== -# Ollama Configuration (Hybrid Routing) +# Databricks Configuration # ============================================================================== -# Enable Ollama preference for simple requests -PREFER_OLLAMA=false +# DATABRICKS_API_BASE=https://your-workspace.cloud.databricks.com +# DATABRICKS_API_KEY=dapi1234567890abcdef +# DATABRICKS_ENDPOINT_PATH=/serving-endpoints/your-endpoint/invocations -# Ollama model to use (must be compatible with tool calling) -# Options: qwen2.5-coder:latest, llama3.1, mistral-nemo, nemotron-3-nano:30b-cloud, etc. -OLLAMA_MODEL=qwen2.5-coder:latest +# ============================================================================== +# Ollama Configuration (Local Models) +# ============================================================================== -# Ollama endpoint (default: http://localhost:11434) +# Ollama endpoint OLLAMA_ENDPOINT=http://localhost:11434 -# Ollama embeddings configuration (for Cursor @Codebase semantic search) -# Embedding models for local, privacy-first semantic search -# Popular models: -# - nomic-embed-text (768 dim, 137M params, best all-around) - RECOMMENDED -# - mxbai-embed-large (1024 dim, 335M params, higher quality) -# - all-minilm (384 dim, 23M params, fastest/smallest) -# -# Pull model: ollama pull nomic-embed-text -# OLLAMA_EMBEDDINGS_MODEL=nomic-embed-text -# OLLAMA_EMBEDDINGS_ENDPOINT=http://localhost:11434/api/embeddings - -# Fallback provider when primary provider fails or for complex requests -# Options: databricks, azure-anthropic, azure-openai, openrouter, openai, bedrock -# Note: Local providers (ollama, llamacpp, lmstudio) cannot be used as fallback -FALLBACK_PROVIDER=databricks +# Ollama timeout in milliseconds +OLLAMA_TIMEOUT_MS=120000 -# Enable automatic fallback (true = transparent fallback, false = fail on provider error) -FALLBACK_ENABLED=false +# Ollama model to use (must be compatible with tool calling) +# Options: qwen2.5-coder:latest, llama3.1, mistral-nemo, etc. +OLLAMA_MODEL=qwen2.5-coder:latest # Max tools for routing to Ollama (requests with more tools go to cloud) OLLAMA_MAX_TOOLS_FOR_ROUTING=3 -# ============================================================================== -# Databricks Configuration -# ============================================================================== - -# DATABRICKS_API_BASE=https://your-workspace.cloud.databricks.com -# DATABRICKS_API_KEY=dapi1234567890abcdef - -# ============================================================================== -# AWS Bedrock Configuration -# ============================================================================== - -# Bedrock API Key (Bearer token) - REQUIRED -# Generate from AWS Console → Bedrock → API Keys -# See: https://docs.aws.amazon.com/bedrock/latest/userguide/api-keys-generate.html -# AWS_BEDROCK_API_KEY=your-bedrock-api-key-here - -# AWS region (default: us-east-1) -# Available regions: us-east-1, us-west-2, us-east-2, ap-southeast-1, ap-northeast-1, eu-central-1 -# AWS_BEDROCK_REGION=us-east-2 - -# Bedrock model ID to use -# Claude models (recommended): -# - anthropic.claude-3-5-sonnet-20241022-v2:0 (best for tool calling) -# - anthropic.claude-3-opus-20240229-v1:0 (most capable) -# - anthropic.claude-3-haiku-20240307-v1:0 (fast, cheap) -# Other models: -# - us.deepseek.r1-v1:0 (DeepSeek R1 - reasoning model) -# - qwen.qwen3-coder-480b-a35b-v1:0 (coding specialist) -# - minimax.minimax-m2 (MiniMax M2) -# - amazon.titan-text-express-v1 -# - meta.llama3-1-70b-instruct-v1:0 -# AWS_BEDROCK_MODEL_ID=anthropic.claude-3-5-sonnet-20241022-v2:0 +# Ollama embeddings configuration (for Cursor @Codebase semantic search) +# Pull model: ollama pull nomic-embed-text +OLLAMA_EMBEDDINGS_MODEL=nomic-embed-text +OLLAMA_EMBEDDINGS_ENDPOINT=http://localhost:11434/api/embeddings # ============================================================================== -# Azure Anthropic Configuration +# OpenRouter Configuration (100+ Models via Single API) # ============================================================================== -# AZURE_ANTHROPIC_ENDPOINT=https://your-anthropic.openai.azure.com -# AZURE_ANTHROPIC_API_KEY=your-azure-key +# Get your API key from: https://openrouter.ai/keys +# OPENROUTER_API_KEY=sk-or-v1-your-key-here +OPENROUTER_MODEL=openai/gpt-4o-mini +OPENROUTER_EMBEDDINGS_MODEL=openai/text-embedding-ada-002 +OPENROUTER_ENDPOINT=https://openrouter.ai/api/v1/chat/completions +OPENROUTER_MAX_TOOLS_FOR_ROUTING=15 # ============================================================================== # Azure OpenAI Configuration @@ -92,15 +64,22 @@ OLLAMA_MAX_TOOLS_FOR_ROUTING=3 # Azure OpenAI endpoint (supports both standard and AI Foundry formats) # Standard: https://.openai.azure.com -# AI Foundry: https://.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview +# AI Foundry: https://.services.ai.azure.com/models/chat/completions?api-version=... # AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com - # AZURE_OPENAI_API_KEY=your-azure-openai-key # AZURE_OPENAI_DEPLOYMENT=gpt-4o -# AZURE_OPENAI_API_VERSION=2024-05-01-preview +# AZURE_OPENAI_API_VERSION=2024-08-01-preview # ============================================================================== -# OpenAI Configuration (Direct OpenAI API) +# Azure Anthropic Configuration +# ============================================================================== + +# AZURE_ANTHROPIC_ENDPOINT=https://your-anthropic.openai.azure.com +# AZURE_ANTHROPIC_API_KEY=your-azure-key +# AZURE_ANTHROPIC_VERSION=2023-06-01 + +# ============================================================================== +# OpenAI Configuration (Direct) # ============================================================================== # OPENAI_API_KEY=sk-your-openai-api-key @@ -109,88 +88,76 @@ OLLAMA_MAX_TOOLS_FOR_ROUTING=3 # OPENAI_ORGANIZATION=org-your-org-id # ============================================================================== -# OpenRouter Configuration (100+ Models via Single API) +# AWS Bedrock Configuration # ============================================================================== -# Get your API key from: https://openrouter.ai/keys -# OPENROUTER_API_KEY=sk-or-v1-your-key-here - -# Model to use (default: openai/gpt-4o-mini) -# Popular options: -# - nvidia/nemotron-3-nano-30b-a3b:free (FREE) -# - anthropic/claude-3.5-sonnet ($3/$15 per 1M) -# - openai/gpt-4o-mini ($0.15/$0.60 per 1M) -# OPENROUTER_MODEL=openai/gpt-4o-mini +# Generate from AWS Console → Bedrock → API Keys +# AWS_BEDROCK_API_KEY=your-bedrock-bearer-token +# AWS_BEDROCK_REGION=us-east-1 +# AWS_BEDROCK_MODEL_ID=anthropic.claude-3-5-sonnet-20241022-v2:0 # ============================================================================== # llama.cpp Configuration (Local GGUF Models) # ============================================================================== -# LLAMACPP_ENDPOINT=http://localhost:8080 -# LLAMACPP_MODEL=default -# LLAMACPP_TIMEOUT_MS=120000 +LLAMACPP_ENDPOINT=http://localhost:8080 +LLAMACPP_MODEL=default +LLAMACPP_TIMEOUT_MS=120000 # LLAMACPP_API_KEY=your-optional-api-key - -# llama.cpp embeddings configuration -# LLAMACPP_EMBEDDINGS_ENDPOINT=http://localhost:8080/embeddings +LLAMACPP_EMBEDDINGS_ENDPOINT=http://localhost:8080/embeddings # ============================================================================== -# LM Studio Configuration (Local Models with GUI) +# LM Studio Configuration # ============================================================================== -# LMSTUDIO_ENDPOINT=http://localhost:1234 -# LMSTUDIO_MODEL=default -# LMSTUDIO_TIMEOUT_MS=120000 +LMSTUDIO_ENDPOINT=http://localhost:1234 +LMSTUDIO_MODEL=default +LMSTUDIO_TIMEOUT_MS=120000 # LMSTUDIO_API_KEY=your-optional-api-key # ============================================================================== # Z.AI (Zhipu AI) Configuration - ~1/7 cost of Anthropic # ============================================================================== -# Z.AI provides GLM models through an Anthropic-compatible API -# Get your API key from: https://z.ai/ or https://open.bigmodel.cn/ # ZAI_API_KEY=your-zai-api-key - -# Z.AI endpoint (default: https://api.z.ai/api/anthropic/v1/messages) # ZAI_ENDPOINT=https://api.z.ai/api/anthropic/v1/messages - -# Model to use (GLM-4.7 is equivalent to Claude Sonnet, GLM-4.5-Air is like Haiku) -# Options: GLM-4.7, GLM-4.5-Air, GLM-4-Plus # ZAI_MODEL=GLM-4.7 +# ============================================================================== +# Moonshot AI (Kimi) Configuration +# ============================================================================== + +# MOONSHOT_API_KEY=your-moonshot-api-key +# MOONSHOT_ENDPOINT=https://api.moonshot.ai/v1/chat/completions +# MOONSHOT_MODEL=kimi-k2-turbo-preview + # ============================================================================== # Google Vertex AI Configuration (Gemini Models) # ============================================================================== -# Google AI API Key (required) # Get your API key from: https://aistudio.google.com/app/apikey # VERTEX_API_KEY=your-google-api-key -# or use: GOOGLE_API_KEY=your-google-api-key - -# Gemini model to use (default: gemini-2.0-flash) -# Options: -# - gemini-2.0-flash (fast, good for most tasks) - DEFAULT -# - gemini-2.0-flash-lite (fastest, cheapest) -# - gemini-2.5-pro (most capable, best quality) -# - gemini-1.5-pro (previous generation) -# - gemini-1.5-flash (previous generation, fast) +# GOOGLE_API_KEY=your-google-api-key # VERTEX_MODEL=gemini-2.0-flash -# Model mapping from Claude names: -# claude-sonnet-* → gemini-2.0-flash -# claude-haiku-* → gemini-2.0-flash-lite -# claude-opus-* → gemini-2.5-pro +# ============================================================================== +# Fallback Configuration +# ============================================================================== + +# Enable automatic fallback when tier provider fails +FALLBACK_ENABLED=false + +# Fallback provider (cannot be local: ollama, llamacpp, lmstudio) +# Options: databricks, azure-anthropic, azure-openai, openrouter, openai, bedrock +FALLBACK_PROVIDER=databricks # ============================================================================== # Embeddings Provider Override # ============================================================================== -# By default, embeddings use same provider as MODEL_PROVIDER (if supported) -# To force a specific provider, set: -# EMBEDDINGS_PROVIDER=ollama # Use Ollama embeddings -# EMBEDDINGS_PROVIDER=llamacpp # Use llama.cpp embeddings -# EMBEDDINGS_PROVIDER=openrouter # Use OpenRouter embeddings -# EMBEDDINGS_PROVIDER=openai # Use OpenAI embeddings +# Force a specific embeddings provider (default: same as MODEL_PROVIDER) +# Options: ollama, llamacpp, openrouter, openai +# EMBEDDINGS_PROVIDER=ollama # ============================================================================== # Server Configuration @@ -198,83 +165,165 @@ OLLAMA_MAX_TOOLS_FOR_ROUTING=3 PORT=8081 LOG_LEVEL=info -WEB_SEARCH_ENDPOINT=http://localhost:8888/search +# NODE_ENV=development -# Policy Configuration -POLICY_MAX_STEPS=20 -POLICY_MAX_TOOL_CALLS=12 +# File logging (persistent logs with automatic rotation via pino-roll) +# LOG_FILE_ENABLED=true +# LOG_FILE_PATH=./logs/lynkr.log +# LOG_FILE_LEVEL=debug +# LOG_FILE_FREQUENCY=daily +# LOG_FILE_MAX_FILES=14 -# Tool loop guard - max tool results in conversation before force-terminating -# Prevents infinite tool loops. Set higher for complex multi-step tasks. -POLICY_TOOL_LOOP_THRESHOLD=10 +# Maximum JSON request body size +REQUEST_JSON_LIMIT=1gb + +# Session database path +SESSION_DB_PATH=./data/sessions.db -# Workspace for embeddings/indexing +# Workspace root directory WORKSPACE_ROOT=/path/to/your/workspace -WORKSPACE_INDEX_ENABLED=true -# Tool execution mode: where to execute tools (Write, Read, Bash, etc.) -# - server: Execute tools on the server (default, for standalone proxy use) +# ============================================================================== +# Tool Execution Mode +# ============================================================================== + +# Where to execute tools +# - server: Execute tools on the proxy server (default) # - client/passthrough: Return tool calls to CLI for local execution TOOL_EXECUTION_MODE=server -# Suggestion mode model override -# Controls which model handles suggestion mode (predicting next user input). -# Values: -# default - Use the same model as MODEL_PROVIDER (no change) -# none - Skip suggestion mode LLM calls entirely (saves GPU time) -# - Use a specific model (e.g. "llama3.1" for a lighter model) -SUGGESTION_MODE_MODEL=default - # Enable/disable automatic tool injection for local models INJECT_TOOLS_LLAMACPP=true INJECT_TOOLS_OLLAMA=true +# Suggestion mode model override +# Values: default (same as MODEL_PROVIDER), none (skip), or name +SUGGESTION_MODE_MODEL=default + +# ============================================================================== +# Rate Limiting +# ============================================================================== + +RATE_LIMIT_ENABLED=true +RATE_LIMIT_WINDOW_MS=60000 +RATE_LIMIT_MAX=100 +RATE_LIMIT_KEY_BY=session + +# ============================================================================== +# Web Search Configuration +# ============================================================================== + +WEB_SEARCH_ENDPOINT=http://localhost:8888/search +# WEB_SEARCH_API_KEY= +WEB_SEARCH_ALLOW_ALL=true +# WEB_SEARCH_ALLOWED_HOSTS=localhost,127.0.0.1 +WEB_SEARCH_TIMEOUT_MS=10000 +WEB_FETCH_BODY_PREVIEW_MAX=10000 +WEB_SEARCH_RETRY_ENABLED=true +WEB_SEARCH_MAX_RETRIES=2 + +# ============================================================================== +# Policy Configuration +# ============================================================================== + +POLICY_MAX_STEPS=20 +POLICY_MAX_TOOL_CALLS=12 +# POLICY_DISALLOWED_TOOLS=dangerous_tool1,dangerous_tool2 + +# Tool loop guard threshold +POLICY_TOOL_LOOP_THRESHOLD=10 + +# Git policy +POLICY_GIT_ALLOW_PUSH=false +POLICY_GIT_ALLOW_PULL=true +POLICY_GIT_ALLOW_COMMIT=true +# POLICY_GIT_TEST_COMMAND=npm test +POLICY_GIT_REQUIRE_TESTS=false +# POLICY_GIT_COMMIT_REGEX=^(feat|fix|docs|style|refactor|test|chore): +POLICY_GIT_AUTOSTASH=false + +# File access policy +# POLICY_FILE_ALLOWED_PATHS=/path1,/path2 +POLICY_FILE_BLOCKED_PATHS=/.env,.env,/etc/passwd,/etc/shadow + +# Safe commands +POLICY_SAFE_COMMANDS_ENABLED=true +# POLICY_SAFE_COMMANDS_CONFIG={"allowed":["ls","cat","grep"]} + +# ============================================================================== +# Agents Configuration +# ============================================================================== + +AGENTS_ENABLED=true +AGENTS_MAX_CONCURRENT=10 +AGENTS_DEFAULT_MODEL=haiku +AGENTS_MAX_STEPS=15 +AGENTS_TIMEOUT=300000 + +# ============================================================================== +# MCP Sandbox Configuration +# ============================================================================== + +MCP_SANDBOX_ENABLED=true +# MCP_SANDBOX_IMAGE=node:20-alpine +MCP_SANDBOX_RUNTIME=docker +MCP_SANDBOX_CONTAINER_WORKSPACE=/workspace +MCP_SANDBOX_MOUNT_WORKSPACE=true +MCP_SANDBOX_ALLOW_NETWORKING=false +MCP_SANDBOX_NETWORK_MODE=none +MCP_SANDBOX_PASSTHROUGH_ENV=PATH,LANG,LC_ALL,TERM,HOME +# MCP_SANDBOX_EXTRA_MOUNTS=/host/path:/container/path:ro +MCP_SANDBOX_TIMEOUT_MS=20000 +# MCP_SANDBOX_USER=node +# MCP_SANDBOX_ENTRYPOINT=/bin/sh +MCP_SANDBOX_REUSE_SESSION=true +MCP_SANDBOX_READ_ONLY_ROOT=false +MCP_SANDBOX_NO_NEW_PRIVILEGES=true +MCP_SANDBOX_DROP_CAPABILITIES=ALL +# MCP_SANDBOX_ADD_CAPABILITIES=NET_BIND_SERVICE +MCP_SANDBOX_MEMORY_LIMIT=512m +MCP_SANDBOX_CPU_LIMIT=1.0 +MCP_SANDBOX_PIDS_LIMIT=100 + +# MCP permissions +MCP_SANDBOX_PERMISSION_MODE=auto +# MCP_SANDBOX_PERMISSION_ALLOW=tool1,tool2 +# MCP_SANDBOX_PERMISSION_DENY=tool3,tool4 + +# MCP server manifest +# MCP_SERVER_MANIFEST=~/.claude/mcp/servers.json +MCP_MANIFEST_DIRS=~/.claude/mcp + +# ============================================================================== +# Prompt Cache Configuration +# ============================================================================== + +PROMPT_CACHE_ENABLED=true +PROMPT_CACHE_MAX_ENTRIES=1000 +PROMPT_CACHE_TTL_MS=300000 + # ============================================================================== # Semantic Response Cache # ============================================================================== -# Enable semantic caching (requires embeddings provider) -# Caches LLM responses and returns them for semantically similar prompts -# Requires: Ollama with nomic-embed-text, or another embeddings provider +# Requires an embeddings provider SEMANTIC_CACHE_ENABLED=false - -# Similarity threshold for cache hits (0.0-1.0, higher = stricter matching) -# 0.95 = very similar prompts only, 0.90 = more lenient SEMANTIC_CACHE_THRESHOLD=0.95 # ============================================================================== # Long-Term Memory System (Titans-Inspired) # ============================================================================== -# Enable/disable the entire memory system MEMORY_ENABLED=true - -# Maximum number of memories to inject into each request MEMORY_RETRIEVAL_LIMIT=5 - -# Minimum surprise score (0.0-1.0) required to store a memory MEMORY_SURPRISE_THRESHOLD=0.3 - -# Auto-delete memories older than this many days MEMORY_MAX_AGE_DAYS=90 - -# Maximum total memories to keep MEMORY_MAX_COUNT=10000 - -# Enable importance decay over time -MEMORY_DECAY_ENABLED=true - -# Days for importance to decay by 50% -MEMORY_DECAY_HALF_LIFE=30 - -# Include global memories in all sessions MEMORY_INCLUDE_GLOBAL=true - -# Where to inject memories (system or assistant_preamble) MEMORY_INJECTION_FORMAT=system - -# Enable automatic extraction MEMORY_EXTRACTION_ENABLED=true +MEMORY_DECAY_ENABLED=true +MEMORY_DECAY_HALF_LIFE=30 # ============================================================================== # Token Optimization Settings (60-80% Cost Reduction) @@ -301,107 +350,49 @@ TOON_FAIL_OPEN=true TOON_LOG_STATS=true # ============================================================================== -# Smart Tool Selection (Advanced Token Optimization) +# Smart Tool Selection # ============================================================================== # Selection strategy: heuristic, aggressive, or conservative SMART_TOOL_SELECTION_MODE=heuristic - -# Maximum token budget for tools per request SMART_TOOL_SELECTION_TOKEN_BUDGET=2500 # ============================================================================== -# Performance & Security +# Test Configuration # ============================================================================== -# API retry configuration -API_RETRY_MAX_RETRIES=3 -API_RETRY_INITIAL_DELAY=1000 -API_RETRY_MAX_DELAY=30000 - -# Load shedding thresholds -LOAD_SHEDDING_HEAP_THRESHOLD=90 -LOAD_SHEDDING_EVENT_LOOP_DELAY=100 +# WORKSPACE_TEST_COMMAND=npm test +# WORKSPACE_TEST_ARGS=--coverage +WORKSPACE_TEST_TIMEOUT_MS=600000 +WORKSPACE_TEST_SANDBOX=auto +WORKSPACE_TEST_COVERAGE_FILES=coverage/coverage-summary.json +# WORKSPACE_TEST_PROFILES=[{"name":"unit","command":"npm test"}] # ============================================================================== # Hot Reload Configuration # ============================================================================== -# Enable hot reload of configuration (default: true) -# When enabled, changes to .env are applied without restart HOT_RELOAD_ENABLED=true - -# Debounce delay in ms (prevents rapid reloads) HOT_RELOAD_DEBOUNCE_MS=1000 -# ============================================================================== -# Quick Start Examples -# ============================================================================== - -# 100% Local (FREE) - Ollama: -# MODEL_PROVIDER=ollama -# OLLAMA_MODEL=qwen2.5-coder:latest -# npm start - -# AWS Bedrock: -# MODEL_PROVIDER=bedrock -# AWS_BEDROCK_API_KEY=your-key -# AWS_BEDROCK_MODEL_ID=anthropic.claude-3-5-sonnet-20241022-v2:0 -# npm start - -# OpenRouter (simplest cloud): -# MODEL_PROVIDER=openrouter -# OPENROUTER_API_KEY=sk-or-v1-your-key -# npm start - -# Azure OpenAI: -# MODEL_PROVIDER=azure-openai -# AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com -# AZURE_OPENAI_API_KEY=your-key -# AZURE_OPENAI_DEPLOYMENT=gpt-4o -# npm start - -# Z.AI (Zhipu - ~1/7 cost of Anthropic): -# MODEL_PROVIDER=zai -# ZAI_API_KEY=your-zai-api-key -# ZAI_MODEL=GLM-4.7 -# npm start - -# Google Gemini (via Vertex AI): -# MODEL_PROVIDER=vertex -# VERTEX_API_KEY=your-google-api-key -# VERTEX_MODEL=gemini-2.0-flash -# npm start - # ============================================================================== # Headroom Context Compression (Sidecar) # ============================================================================== -# Headroom provides 47-92% token reduction through intelligent context compression. -# It runs as a Python sidecar container managed automatically by Lynkr via Docker. -# -# Features: -# - Smart Crusher: Statistical JSON compression for tool outputs -# - Cache Aligner: Stabilizes dynamic content for provider cache hits -# - CCR (Compress-Cache-Retrieve): Reversible compression with on-demand retrieval -# - Rolling Window: Token budget enforcement with turn-based windowing -# - LLMLingua (optional): ML-based 20x compression with GPU acceleration - -# Enable/disable Headroom compression (default: false) + +# Enable Headroom compression (47-92% token reduction) HEADROOM_ENABLED=false -# Sidecar endpoint (auto-configured when Docker is enabled) +# Sidecar endpoint HEADROOM_ENDPOINT=http://localhost:8787 -# Request timeout in milliseconds +# Request timeout and minimum tokens HEADROOM_TIMEOUT_MS=5000 - -# Minimum tokens to trigger compression (skip small requests) HEADROOM_MIN_TOKENS=500 -# Operating mode: "audit" (observe only) or "optimize" (apply transforms) +# Operating mode: audit (observe) or optimize (apply) HEADROOM_MODE=optimize -# Provider for cache optimization hints: anthropic, openai, google +# Provider for cache hints: anthropic, openai, google HEADROOM_PROVIDER=anthropic # Log level: debug, info, warning, error @@ -410,50 +401,27 @@ HEADROOM_LOG_LEVEL=info # ============================================================================== # Headroom Docker Configuration # ============================================================================== -# When enabled, Lynkr automatically manages the Headroom container lifecycle -# Enable Docker container management (default: true when HEADROOM_ENABLED=true) HEADROOM_DOCKER_ENABLED=true - -# Docker image to use HEADROOM_DOCKER_IMAGE=lynkr/headroom-sidecar:latest - -# Container name HEADROOM_DOCKER_CONTAINER_NAME=lynkr-headroom - -# Port mapping HEADROOM_DOCKER_PORT=8787 - -# Resource limits HEADROOM_DOCKER_MEMORY_LIMIT=512m HEADROOM_DOCKER_CPU_LIMIT=1.0 - -# Restart policy: no, always, unless-stopped, on-failure HEADROOM_DOCKER_RESTART_POLICY=unless-stopped - -# Docker network (optional, leave empty for default bridge) # HEADROOM_DOCKER_NETWORK=lynkr-network - -# Build from local source instead of pulling image -# HEADROOM_DOCKER_AUTO_BUILD=true # HEADROOM_DOCKER_BUILD_CONTEXT=./headroom-sidecar +# HEADROOM_DOCKER_AUTO_BUILD=true # ============================================================================== # Headroom Transform Settings # ============================================================================== -# Smart Crusher (statistical JSON compression) HEADROOM_SMART_CRUSHER=true HEADROOM_SMART_CRUSHER_MIN_TOKENS=200 HEADROOM_SMART_CRUSHER_MAX_ITEMS=15 - -# Tool Crusher (fixed-rules compression for tool outputs) HEADROOM_TOOL_CRUSHER=true - -# Cache Aligner (stabilize dynamic content like UUIDs, timestamps) HEADROOM_CACHE_ALIGNER=true - -# Rolling Window (context overflow management) HEADROOM_ROLLING_WINDOW=true HEADROOM_KEEP_TURNS=3 @@ -461,29 +429,32 @@ HEADROOM_KEEP_TURNS=3 # Headroom CCR (Compress-Cache-Retrieve) # ============================================================================== -# Enable CCR for reversible compression with on-demand retrieval HEADROOM_CCR=true - -# TTL for cached content in seconds (default: 5 minutes) HEADROOM_CCR_TTL=300 # ============================================================================== -# Headroom LLMLingua (Optional ML Compression) +# Headroom LLMLingua (ML Compression - Requires GPU) # ============================================================================== -# LLMLingua-2 provides ML-based 20x compression using BERT token classification. -# Requires GPU for reasonable performance, or use CPU with longer timeouts. -# Enable LLMLingua (default: false, requires GPU recommended) HEADROOM_LLMLINGUA=false - -# Device: cuda, cpu, auto HEADROOM_LLMLINGUA_DEVICE=auto # ============================================================================== -# Prompt Cache Configuration +# Tiered Model Routing (Recommended for Cost Optimization) # ============================================================================== - -# Enable prompt caching (caches exact prompts) -PROMPT_CACHE_ENABLED=true -PROMPT_CACHE_MAX_ENTRIES=1000 -PROMPT_CACHE_TTL_MS=300000 +# When all 4 TIER_* vars are set, they OVERRIDE MODEL_PROVIDER for routing. +# Each request is scored for complexity (0-100) and routed to the matching tier: +# SIMPLE (0-25) → cheap/local models COMPLEX (51-75) → capable cloud models +# MEDIUM (26-50) → mid-range models REASONING (76-100) → best available +# +# Format: TIER_=provider:model +# All 4 tiers must be configured to enable tiered routing. +# If any are missing, tiered routing is disabled and MODEL_PROVIDER is used directly. +# +# Supported providers: ollama, openai, azure-openai, openrouter, +# databricks, bedrock, vertex, zai, moonshot, llamacpp, lmstudio +# +TIER_SIMPLE=ollama:llama3.2 +TIER_MEDIUM=openrouter:openai/gpt-4o-mini +TIER_COMPLEX=azure-openai:gpt-4o +TIER_REASONING=azure-openai:gpt-4o diff --git a/Dockerfile b/Dockerfile index 59c57a1..691a885 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,7 +56,7 @@ COPY --from=build --chown=node:node /app/index.js /app/package.json ./ COPY --from=build --chown=node:node /app/node_modules ./node_modules COPY --from=build --chown=node:node /app/src ./src -VOLUME ["/app/data"] +VOLUME ["/app/data", "/app/logs"] EXPOSE 8081 @@ -75,17 +75,27 @@ ENV MODEL_PROVIDER="databricks" \ LOG_LEVEL="info" \ WORKSPACE_ROOT="/workspace" \ WEB_SEARCH_ENDPOINT="http://searxng:8888/search" \ - NODE_ENV="production" + NODE_ENV="production" \ + REQUEST_JSON_LIMIT="1gb" \ + SESSION_DB_PATH="/app/data/sessions.db" + +# File Logging (persistent logs with pino-roll rotation) +ENV LOG_FILE_ENABLED="false" \ + LOG_FILE_PATH="/app/logs/lynkr.log" \ + LOG_FILE_LEVEL="debug" \ + LOG_FILE_FREQUENCY="daily" \ + LOG_FILE_MAX_FILES="14" # Databricks Configuration (default provider) ENV DATABRICKS_API_BASE="https://example.cloud.databricks.com" \ DATABRICKS_API_KEY="replace-with-databricks-pat" -# Ollama Configuration (for hybrid routing) +# Ollama Configuration (for tier-based routing) # Recommended models: llama3.1:8b, llama3.2, qwen2.5:14b, mistral:7b-instruct -ENV PREFER_OLLAMA="false" \ - OLLAMA_ENDPOINT="http://localhost:11434" \ +# Configure via TIER_* env vars: TIER_SIMPLE=ollama:llama3.2 +ENV OLLAMA_ENDPOINT="http://localhost:11434" \ OLLAMA_MODEL="llama3.1:8b" \ + OLLAMA_TIMEOUT_MS="120000" \ OLLAMA_MAX_TOOLS_FOR_ROUTING="3" \ OLLAMA_EMBEDDINGS_MODEL="nomic-embed-text" \ OLLAMA_EMBEDDINGS_ENDPOINT="http://localhost:11434/api/embeddings" @@ -99,45 +109,99 @@ ENV OPENROUTER_API_KEY="" \ OPENROUTER_MAX_TOOLS_FOR_ROUTING="15" # Azure OpenAI Configuration (optional) -# IMPORTANT: Set full endpoint URL including deployment path -# Example: https://your-resource.openai.azure.com/openai/deployments/YOUR-DEPLOYMENT/chat/completions?api-version=2025-01-01-preview -# Deployment options: gpt-4o, gpt-4o-mini, gpt-5-chat, o1-preview, o3-mini ENV AZURE_OPENAI_ENDPOINT="" \ AZURE_OPENAI_API_KEY="" \ - AZURE_OPENAI_DEPLOYMENT="gpt-4o" + AZURE_OPENAI_DEPLOYMENT="gpt-4o" \ + AZURE_OPENAI_API_VERSION="2024-08-01-preview" # Hybrid Routing & Fallback Configuration -# Options: databricks, azure-openai, azure-anthropic, openrouter, bedrock, openai -# Note: Local providers (ollama, llamacpp, lmstudio) cannot be used as fallback ENV FALLBACK_ENABLED="true" \ FALLBACK_PROVIDER="databricks" # Azure Anthropic Configuration (optional) ENV AZURE_ANTHROPIC_ENDPOINT="" \ - AZURE_ANTHROPIC_API_KEY="" + AZURE_ANTHROPIC_API_KEY="" \ + AZURE_ANTHROPIC_VERSION="2023-06-01" # AWS Bedrock Configuration (optional) -# Supports Claude, Titan, Llama, Jurassic, Cohere, Mistral models ENV AWS_BEDROCK_API_KEY="" \ AWS_BEDROCK_REGION="us-east-1" \ AWS_BEDROCK_MODEL_ID="anthropic.claude-3-5-sonnet-20241022-v2:0" -# llama.cpp Configuration (optional - for local GGUF models) +# llama.cpp Configuration (optional) ENV LLAMACPP_ENDPOINT="http://localhost:8080" \ LLAMACPP_MODEL="default" \ LLAMACPP_EMBEDDINGS_ENDPOINT="http://localhost:8080/embeddings" \ LLAMACPP_TIMEOUT_MS="120000" +# LM Studio Configuration (optional) +ENV LMSTUDIO_ENDPOINT="http://localhost:1234" \ + LMSTUDIO_MODEL="default" \ + LMSTUDIO_TIMEOUT_MS="120000" + # OpenAI Configuration (optional) ENV OPENAI_API_KEY="" \ OPENAI_MODEL="gpt-4o" \ OPENAI_ENDPOINT="https://api.openai.com/v1/chat/completions" +# Z.AI Configuration (optional) +ENV ZAI_API_KEY="" \ + ZAI_ENDPOINT="https://api.z.ai/api/anthropic/v1/messages" \ + ZAI_MODEL="GLM-4.7" + +# Google Vertex AI Configuration (optional) +ENV VERTEX_API_KEY="" \ + VERTEX_MODEL="gemini-2.0-flash" + # Embeddings Provider Override (optional) -# Options: ollama, llamacpp, openrouter, openai -# By default, uses same provider as MODEL_PROVIDER ENV EMBEDDINGS_PROVIDER="" +# Tool Injection & Suggestion Mode +ENV INJECT_TOOLS_LLAMACPP="true" \ + INJECT_TOOLS_OLLAMA="true" \ + SUGGESTION_MODE_MODEL="default" + +# Rate Limiting +ENV RATE_LIMIT_ENABLED="true" \ + RATE_LIMIT_WINDOW_MS="60000" \ + RATE_LIMIT_MAX="100" \ + RATE_LIMIT_KEY_BY="session" + +# Web Search Configuration +ENV WEB_SEARCH_ALLOW_ALL="true" \ + WEB_SEARCH_TIMEOUT_MS="10000" \ + WEB_FETCH_BODY_PREVIEW_MAX="10000" \ + WEB_SEARCH_RETRY_ENABLED="true" \ + WEB_SEARCH_MAX_RETRIES="2" + +# Policy Configuration +ENV POLICY_MAX_STEPS="20" \ + POLICY_MAX_TOOL_CALLS="12" \ + POLICY_TOOL_LOOP_THRESHOLD="10" \ + POLICY_GIT_ALLOW_PUSH="false" \ + POLICY_GIT_ALLOW_PULL="true" \ + POLICY_GIT_ALLOW_COMMIT="true" \ + POLICY_GIT_REQUIRE_TESTS="false" \ + POLICY_GIT_AUTOSTASH="false" \ + POLICY_FILE_BLOCKED_PATHS="/.env,.env,/etc/passwd,/etc/shadow" \ + POLICY_SAFE_COMMANDS_ENABLED="true" + +# Agents Configuration +ENV AGENTS_ENABLED="true" \ + AGENTS_MAX_CONCURRENT="10" \ + AGENTS_DEFAULT_MODEL="haiku" \ + AGENTS_MAX_STEPS="15" \ + AGENTS_TIMEOUT="300000" + +# Prompt Cache Configuration +ENV PROMPT_CACHE_ENABLED="true" \ + PROMPT_CACHE_MAX_ENTRIES="1000" \ + PROMPT_CACHE_TTL_MS="300000" + +# Semantic Response Cache +ENV SEMANTIC_CACHE_ENABLED="false" \ + SEMANTIC_CACHE_THRESHOLD="0.95" + # Production Hardening Defaults ENV CIRCUIT_BREAKER_FAILURE_THRESHOLD="5" \ CIRCUIT_BREAKER_SUCCESS_THRESHOLD="2" \ @@ -160,6 +224,34 @@ ENV MEMORY_ENABLED="true" \ MEMORY_DEDUP_ENABLED="true" \ MEMORY_DEDUP_LOOKBACK="5" +# Token Optimization +ENV TOKEN_TRACKING_ENABLED="true" \ + TOOL_TRUNCATION_ENABLED="true" \ + SYSTEM_PROMPT_MODE="dynamic" \ + TOOL_DESCRIPTIONS="minimal" \ + HISTORY_COMPRESSION_ENABLED="true" \ + HISTORY_KEEP_RECENT_TURNS="10" \ + HISTORY_SUMMARIZE_OLDER="true" \ + TOKEN_BUDGET_WARNING="100000" \ + TOKEN_BUDGET_MAX="180000" \ + TOKEN_BUDGET_ENFORCEMENT="true" + +# Smart Tool Selection +ENV SMART_TOOL_SELECTION_MODE="heuristic" \ + SMART_TOOL_SELECTION_TOKEN_BUDGET="2500" + +# Hot Reload +ENV HOT_RELOAD_ENABLED="true" \ + HOT_RELOAD_DEBOUNCE_MS="1000" + +# Tiered Model Routing (optional) +# Format: TIER_=provider:model +# All 4 tiers must be set to enable tiered routing +# ENV TIER_SIMPLE="ollama:llama3.2" \ +# TIER_MEDIUM="openrouter:openai/gpt-4o-mini" \ +# TIER_COMPLEX="azure-openai:gpt-4o" \ +# TIER_REASONING="azure-openai:gpt-4o" + # Switch to non-root user USER node diff --git a/README.md b/README.md index 69d017a..89dd677 100644 --- a/README.md +++ b/README.md @@ -238,7 +238,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c ### Getting Started - 📦 **[Installation Guide](documentation/installation.md)** - Detailed installation for all methods -- ⚙️ **[Provider Configuration](documentation/providers.md)** - Complete setup for all 9+ providers +- ⚙️ **[Provider Configuration](documentation/providers.md)** - Complete setup for all 12+ providers - 🎯 **[Quick Start Examples](documentation/installation.md#quick-start-examples)** - Copy-paste configs ### IDE & CLI Integration @@ -277,7 +277,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c ## Key Features Highlights -- ✅ **Multi-Provider Support** - 9+ providers including local (Ollama, llama.cpp) and cloud (Bedrock, Databricks, OpenRouter) +- ✅ **Multi-Provider Support** - 12+ providers including local (Ollama, llama.cpp) and cloud (Bedrock, Databricks, OpenRouter, Moonshot AI) - ✅ **60-80% Cost Reduction** - Token optimization with smart tool selection, prompt caching, memory deduplication - ✅ **100% Local Option** - Run completely offline with Ollama/llama.cpp (zero cloud dependencies) - ✅ **OpenAI Compatible** - Works with Cursor IDE, Continue.dev, and any OpenAI-compatible client diff --git a/config/model-tiers.json b/config/model-tiers.json new file mode 100644 index 0000000..94c3812 --- /dev/null +++ b/config/model-tiers.json @@ -0,0 +1,89 @@ +{ + "tiers": { + "SIMPLE": { + "description": "Greetings, simple Q&A, confirmations, basic lookups", + "range": [0, 25], + "priority": 1, + "preferred": { + "ollama": ["llama3.2", "gemma2", "phi3", "qwen2.5:7b", "mistral"], + "llamacpp": ["default"], + "lmstudio": ["default"], + "openai": ["gpt-4o-mini", "gpt-3.5-turbo"], + "azure-openai": ["gpt-4o-mini", "gpt-35-turbo"], + "anthropic": ["claude-3-haiku-20240307", "claude-3-5-haiku-20241022"], + "bedrock": ["anthropic.claude-3-haiku-20240307-v1:0", "amazon.nova-lite-v1:0"], + "databricks": ["databricks-claude-haiku-4-5", "databricks-gpt-5-nano"], + "google": ["gemini-2.0-flash", "gemini-1.5-flash"], + "openrouter": ["google/gemini-flash-1.5", "deepseek/deepseek-chat"], + "zai": ["GLM-4-Flash"], + "moonshot": ["kimi-k2-turbo-preview"] + } + }, + "MEDIUM": { + "description": "Code reading, simple edits, research, documentation", + "range": [26, 50], + "priority": 2, + "preferred": { + "ollama": ["qwen2.5:32b", "deepseek-coder:33b", "codellama:34b"], + "llamacpp": ["default"], + "lmstudio": ["default"], + "openai": ["gpt-4o", "gpt-4-turbo"], + "azure-openai": ["gpt-4o", "gpt-4"], + "anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"], + "bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0", "amazon.nova-pro-v1:0"], + "databricks": ["databricks-claude-sonnet-4-5", "databricks-gpt-5-1"], + "google": ["gemini-1.5-pro", "gemini-2.0-pro"], + "openrouter": ["anthropic/claude-3.5-sonnet", "openai/gpt-4o"], + "zai": ["GLM-4.7"], + "moonshot": ["kimi-k2-turbo-preview"] + } + }, + "COMPLEX": { + "description": "Multi-file changes, debugging, architecture, refactoring", + "range": [51, 75], + "priority": 3, + "preferred": { + "ollama": ["qwen2.5:72b", "llama3.1:70b", "deepseek-coder-v2:236b"], + "openai": ["o1-mini", "o3-mini", "gpt-4o"], + "azure-openai": ["o1-mini", "gpt-4o"], + "anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"], + "bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0"], + "databricks": ["databricks-claude-sonnet-4-5", "databricks-gpt-5-1-codex-max"], + "google": ["gemini-2.5-pro", "gemini-1.5-pro"], + "openrouter": ["anthropic/claude-3.5-sonnet", "meta-llama/llama-3.1-405b"], + "zai": ["GLM-4.7"], + "moonshot": ["kimi-k2-turbo-preview"] + } + }, + "REASONING": { + "description": "Complex analysis, security audits, novel problems, deep thinking", + "range": [76, 100], + "priority": 4, + "preferred": { + "openai": ["o1", "o1-pro", "o3"], + "azure-openai": ["o1", "o1-pro"], + "anthropic": ["claude-opus-4-20250514", "claude-3-opus-20240229"], + "bedrock": ["anthropic.claude-3-opus-20240229-v1:0"], + "databricks": ["databricks-claude-opus-4-6", "databricks-claude-opus-4-5", "databricks-gpt-5-2"], + "google": ["gemini-2.5-pro"], + "openrouter": ["anthropic/claude-3-opus", "deepseek/deepseek-reasoner", "openai/o1"], + "deepseek": ["deepseek-reasoner", "deepseek-r1"], + "moonshot": ["kimi-k2-thinking", "kimi-k2-turbo-preview"] + } + } + }, + "localProviders": { + "ollama": { "free": true, "defaultTier": "SIMPLE" }, + "llamacpp": { "free": true, "defaultTier": "SIMPLE" }, + "lmstudio": { "free": true, "defaultTier": "SIMPLE" } + }, + "providerAliases": { + "azure": "azure-openai", + "aws": "bedrock", + "amazon": "bedrock", + "claude": "anthropic", + "gemini": "google", + "vertex": "google", + "kimi": "moonshot" + } +} diff --git a/docker-compose.yml b/docker-compose.yml index 8b7c3d0..8d18466 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,7 +40,11 @@ services: # - llama3.2 (latest) # - qwen2.5:14b (strong reasoning, 7b struggles with tools) # - mistral:7b-instruct (fast and capable) - PREFER_OLLAMA: ${PREFER_OLLAMA:-true} + # Tier-based routing (set all 4 to enable) + TIER_SIMPLE: ${TIER_SIMPLE:-} + TIER_MEDIUM: ${TIER_MEDIUM:-} + TIER_COMPLEX: ${TIER_COMPLEX:-} + TIER_REASONING: ${TIER_REASONING:-} # OLLAMA_ENDPOINT: http://ollama:11434 OLLAMA_ENDPOINT: http://host.docker.internal:11434 OLLAMA_MODEL: ${OLLAMA_MODEL:-llama3.1:8b} @@ -64,23 +68,15 @@ services: # ============================================================ # AZURE OPENAI CONFIGURATION # ============================================================ - # Required when MODEL_PROVIDER=azure-openai - # IMPORTANT: Use FULL endpoint URL including deployment path and API version - # Format: https://YOUR-RESOURCE.openai.azure.com/openai/deployments/YOUR-DEPLOYMENT/chat/completions?api-version=2025-01-01-preview - # Get credentials from: https://portal.azure.com → Azure OpenAI → Keys and Endpoint - # Deployment options: gpt-4o, gpt-4o-mini, gpt-5-chat, o1-preview, o3-mini AZURE_OPENAI_ENDPOINT: ${AZURE_OPENAI_ENDPOINT:-} AZURE_OPENAI_API_KEY: ${AZURE_OPENAI_API_KEY:-} AZURE_OPENAI_DEPLOYMENT: ${AZURE_OPENAI_DEPLOYMENT:-gpt-4o} + AZURE_OPENAI_API_VERSION: ${AZURE_OPENAI_API_VERSION:-2024-08-01-preview} # ============================================================ # HYBRID ROUTING & FALLBACK # ============================================================ - # Enable/disable fallback to cloud providers FALLBACK_ENABLED: ${FALLBACK_ENABLED:-true} - # Fallback provider when Ollama can't handle request - # Options: databricks, azure-openai, azure-anthropic, openrouter, bedrock, openai - # Note: Local providers (ollama, llamacpp, lmstudio) cannot be used as fallback FALLBACK_PROVIDER: ${FALLBACK_PROVIDER:-databricks} # ============================================================ @@ -94,12 +90,11 @@ services: # ============================================================ AZURE_ANTHROPIC_ENDPOINT: ${AZURE_ANTHROPIC_ENDPOINT:-} AZURE_ANTHROPIC_API_KEY: ${AZURE_ANTHROPIC_API_KEY:-} + AZURE_ANTHROPIC_VERSION: ${AZURE_ANTHROPIC_VERSION:-2023-06-01} # ============================================================ # AWS BEDROCK CONFIGURATION (OPTIONAL) # ============================================================ - # Supports Claude, Titan, Llama, Jurassic, Cohere, Mistral models - # Get API key from AWS Console → Bedrock → API Keys AWS_BEDROCK_API_KEY: ${AWS_BEDROCK_API_KEY:-} AWS_BEDROCK_REGION: ${AWS_BEDROCK_REGION:-us-east-1} AWS_BEDROCK_MODEL_ID: ${AWS_BEDROCK_MODEL_ID:-anthropic.claude-3-5-sonnet-20241022-v2:0} @@ -107,12 +102,18 @@ services: # ============================================================ # LLAMA.CPP CONFIGURATION (OPTIONAL) # ============================================================ - # For local GGUF models LLAMACPP_ENDPOINT: ${LLAMACPP_ENDPOINT:-http://localhost:8080} LLAMACPP_MODEL: ${LLAMACPP_MODEL:-default} LLAMACPP_EMBEDDINGS_ENDPOINT: ${LLAMACPP_EMBEDDINGS_ENDPOINT:-http://localhost:8080/embeddings} LLAMACPP_TIMEOUT_MS: ${LLAMACPP_TIMEOUT_MS:-120000} + # ============================================================ + # LM STUDIO CONFIGURATION (OPTIONAL) + # ============================================================ + LMSTUDIO_ENDPOINT: ${LMSTUDIO_ENDPOINT:-http://localhost:1234} + LMSTUDIO_MODEL: ${LMSTUDIO_MODEL:-default} + LMSTUDIO_TIMEOUT_MS: ${LMSTUDIO_TIMEOUT_MS:-120000} + # ============================================================ # OPENAI CONFIGURATION (OPTIONAL) # ============================================================ @@ -120,11 +121,22 @@ services: OPENAI_MODEL: ${OPENAI_MODEL:-gpt-4o} OPENAI_ENDPOINT: ${OPENAI_ENDPOINT:-https://api.openai.com/v1/chat/completions} + # ============================================================ + # Z.AI CONFIGURATION (OPTIONAL) + # ============================================================ + ZAI_API_KEY: ${ZAI_API_KEY:-} + ZAI_ENDPOINT: ${ZAI_ENDPOINT:-https://api.z.ai/api/anthropic/v1/messages} + ZAI_MODEL: ${ZAI_MODEL:-GLM-4.7} + + # ============================================================ + # GOOGLE VERTEX AI CONFIGURATION (OPTIONAL) + # ============================================================ + VERTEX_API_KEY: ${VERTEX_API_KEY:-} + VERTEX_MODEL: ${VERTEX_MODEL:-gemini-2.0-flash} + # ============================================================ # EMBEDDINGS PROVIDER OVERRIDE (OPTIONAL) # ============================================================ - # Options: ollama, llamacpp, openrouter, openai - # By default, uses same provider as MODEL_PROVIDER EMBEDDINGS_PROVIDER: ${EMBEDDINGS_PROVIDER:-} # ============================================================ @@ -132,16 +144,127 @@ services: # ============================================================ PORT: ${PORT:-8081} LOG_LEVEL: ${LOG_LEVEL:-info} + NODE_ENV: ${NODE_ENV:-production} + REQUEST_JSON_LIMIT: ${REQUEST_JSON_LIMIT:-1gb} + SESSION_DB_PATH: /app/data/sessions.db WEB_SEARCH_ENDPOINT: ${WEB_SEARCH_ENDPOINT:-http://searxng:8888/search} WORKSPACE_ROOT: /workspace # ============================================================ - # PRODUCTION HARDENING (OPTIONAL) + # FILE LOGGING (pino-roll rotation) + # ============================================================ + LOG_FILE_ENABLED: ${LOG_FILE_ENABLED:-false} + LOG_FILE_PATH: /app/logs/lynkr.log + LOG_FILE_LEVEL: ${LOG_FILE_LEVEL:-debug} + LOG_FILE_FREQUENCY: ${LOG_FILE_FREQUENCY:-daily} + LOG_FILE_MAX_FILES: ${LOG_FILE_MAX_FILES:-14} + + # ============================================================ + # TOOL INJECTION & SUGGESTION MODE + # ============================================================ + INJECT_TOOLS_LLAMACPP: ${INJECT_TOOLS_LLAMACPP:-true} + INJECT_TOOLS_OLLAMA: ${INJECT_TOOLS_OLLAMA:-true} + SUGGESTION_MODE_MODEL: ${SUGGESTION_MODE_MODEL:-default} + + # ============================================================ + # RATE LIMITING + # ============================================================ + RATE_LIMIT_ENABLED: ${RATE_LIMIT_ENABLED:-true} + RATE_LIMIT_WINDOW_MS: ${RATE_LIMIT_WINDOW_MS:-60000} + RATE_LIMIT_MAX: ${RATE_LIMIT_MAX:-100} + RATE_LIMIT_KEY_BY: ${RATE_LIMIT_KEY_BY:-session} + + # ============================================================ + # WEB SEARCH + # ============================================================ + WEB_SEARCH_ALLOW_ALL: ${WEB_SEARCH_ALLOW_ALL:-true} + WEB_SEARCH_TIMEOUT_MS: ${WEB_SEARCH_TIMEOUT_MS:-10000} + WEB_FETCH_BODY_PREVIEW_MAX: ${WEB_FETCH_BODY_PREVIEW_MAX:-10000} + WEB_SEARCH_RETRY_ENABLED: ${WEB_SEARCH_RETRY_ENABLED:-true} + WEB_SEARCH_MAX_RETRIES: ${WEB_SEARCH_MAX_RETRIES:-2} + + # ============================================================ + # POLICY CONFIGURATION + # ============================================================ + POLICY_MAX_STEPS: ${POLICY_MAX_STEPS:-20} + POLICY_MAX_TOOL_CALLS: ${POLICY_MAX_TOOL_CALLS:-12} + POLICY_TOOL_LOOP_THRESHOLD: ${POLICY_TOOL_LOOP_THRESHOLD:-10} + POLICY_GIT_ALLOW_PUSH: ${POLICY_GIT_ALLOW_PUSH:-false} + POLICY_GIT_ALLOW_PULL: ${POLICY_GIT_ALLOW_PULL:-true} + POLICY_GIT_ALLOW_COMMIT: ${POLICY_GIT_ALLOW_COMMIT:-true} + POLICY_GIT_REQUIRE_TESTS: ${POLICY_GIT_REQUIRE_TESTS:-false} + POLICY_GIT_AUTOSTASH: ${POLICY_GIT_AUTOSTASH:-false} + POLICY_FILE_BLOCKED_PATHS: ${POLICY_FILE_BLOCKED_PATHS:-/.env,.env,/etc/passwd,/etc/shadow} + POLICY_SAFE_COMMANDS_ENABLED: ${POLICY_SAFE_COMMANDS_ENABLED:-true} + + # ============================================================ + # AGENTS CONFIGURATION + # ============================================================ + AGENTS_ENABLED: ${AGENTS_ENABLED:-true} + AGENTS_MAX_CONCURRENT: ${AGENTS_MAX_CONCURRENT:-10} + AGENTS_DEFAULT_MODEL: ${AGENTS_DEFAULT_MODEL:-haiku} + AGENTS_MAX_STEPS: ${AGENTS_MAX_STEPS:-15} + AGENTS_TIMEOUT: ${AGENTS_TIMEOUT:-300000} + + # ============================================================ + # PROMPT & SEMANTIC CACHE + # ============================================================ + PROMPT_CACHE_ENABLED: ${PROMPT_CACHE_ENABLED:-true} + PROMPT_CACHE_MAX_ENTRIES: ${PROMPT_CACHE_MAX_ENTRIES:-1000} + PROMPT_CACHE_TTL_MS: ${PROMPT_CACHE_TTL_MS:-300000} + SEMANTIC_CACHE_ENABLED: ${SEMANTIC_CACHE_ENABLED:-false} + SEMANTIC_CACHE_THRESHOLD: ${SEMANTIC_CACHE_THRESHOLD:-0.95} + + # ============================================================ + # PRODUCTION HARDENING # ============================================================ CIRCUIT_BREAKER_FAILURE_THRESHOLD: ${CIRCUIT_BREAKER_FAILURE_THRESHOLD:-5} CIRCUIT_BREAKER_TIMEOUT: ${CIRCUIT_BREAKER_TIMEOUT:-60000} LOAD_SHEDDING_MEMORY_THRESHOLD: ${LOAD_SHEDDING_MEMORY_THRESHOLD:-0.85} + # ============================================================ + # LONG-TERM MEMORY (Titans-inspired) + # ============================================================ + MEMORY_ENABLED: ${MEMORY_ENABLED:-true} + MEMORY_RETRIEVAL_LIMIT: ${MEMORY_RETRIEVAL_LIMIT:-5} + MEMORY_SURPRISE_THRESHOLD: ${MEMORY_SURPRISE_THRESHOLD:-0.3} + MEMORY_MAX_AGE_DAYS: ${MEMORY_MAX_AGE_DAYS:-90} + MEMORY_MAX_COUNT: ${MEMORY_MAX_COUNT:-10000} + MEMORY_INCLUDE_GLOBAL: ${MEMORY_INCLUDE_GLOBAL:-true} + MEMORY_INJECTION_FORMAT: ${MEMORY_INJECTION_FORMAT:-system} + MEMORY_EXTRACTION_ENABLED: ${MEMORY_EXTRACTION_ENABLED:-true} + MEMORY_DECAY_ENABLED: ${MEMORY_DECAY_ENABLED:-true} + MEMORY_DECAY_HALF_LIFE: ${MEMORY_DECAY_HALF_LIFE:-30} + + # ============================================================ + # TOKEN OPTIMIZATION (60-80% cost reduction) + # ============================================================ + TOKEN_TRACKING_ENABLED: ${TOKEN_TRACKING_ENABLED:-true} + TOOL_TRUNCATION_ENABLED: ${TOOL_TRUNCATION_ENABLED:-true} + MEMORY_FORMAT: ${MEMORY_FORMAT:-compact} + MEMORY_DEDUP_ENABLED: ${MEMORY_DEDUP_ENABLED:-true} + MEMORY_DEDUP_LOOKBACK: ${MEMORY_DEDUP_LOOKBACK:-5} + SYSTEM_PROMPT_MODE: ${SYSTEM_PROMPT_MODE:-dynamic} + TOOL_DESCRIPTIONS: ${TOOL_DESCRIPTIONS:-minimal} + HISTORY_COMPRESSION_ENABLED: ${HISTORY_COMPRESSION_ENABLED:-true} + HISTORY_KEEP_RECENT_TURNS: ${HISTORY_KEEP_RECENT_TURNS:-10} + HISTORY_SUMMARIZE_OLDER: ${HISTORY_SUMMARIZE_OLDER:-true} + TOKEN_BUDGET_WARNING: ${TOKEN_BUDGET_WARNING:-100000} + TOKEN_BUDGET_MAX: ${TOKEN_BUDGET_MAX:-180000} + TOKEN_BUDGET_ENFORCEMENT: ${TOKEN_BUDGET_ENFORCEMENT:-true} + + # ============================================================ + # SMART TOOL SELECTION + # ============================================================ + SMART_TOOL_SELECTION_MODE: ${SMART_TOOL_SELECTION_MODE:-heuristic} + SMART_TOOL_SELECTION_TOKEN_BUDGET: ${SMART_TOOL_SELECTION_TOKEN_BUDGET:-2500} + + # ============================================================ + # HOT RELOAD + # ============================================================ + HOT_RELOAD_ENABLED: ${HOT_RELOAD_ENABLED:-true} + HOT_RELOAD_DEBOUNCE_MS: ${HOT_RELOAD_DEBOUNCE_MS:-1000} + # ============================================================ # HEADROOM CONTEXT COMPRESSION (OPTIONAL) # ============================================================ @@ -163,9 +286,20 @@ services: HEADROOM_CCR_TTL: ${HEADROOM_CCR_TTL:-300} HEADROOM_LLMLINGUA: ${HEADROOM_LLMLINGUA:-false} + # ============================================================ + # TIERED MODEL ROUTING (OPTIONAL) + # ============================================================ + # Format: TIER_=provider:model + # All 4 must be set to enable tiered routing + # TIER_SIMPLE: ${TIER_SIMPLE:-} + # TIER_MEDIUM: ${TIER_MEDIUM:-} + # TIER_COMPLEX: ${TIER_COMPLEX:-} + # TIER_REASONING: ${TIER_REASONING:-} + volumes: - - ./data:/app/data # Persist SQLite databases - - .:/workspace # Mount workspace + - ./data:/app/data # Persist SQLite databases + - ./logs:/app/logs # Persist log files + - .:/workspace # Mount workspace restart: unless-stopped networks: - lynkr-network diff --git a/docs/docs.html b/docs/docs.html index 71d5d12..22fe761 100644 --- a/docs/docs.html +++ b/docs/docs.html @@ -51,6 +51,7 @@
Features