diff --git a/.gitignore b/.gitignore index e9557c031..a90d91cfb 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,7 @@ ansible/inventory/hosts # Secrets *.env +*.env.* .envrc .kubeconfig.yaml diff --git a/docker/swarm/stacks/socrates/.env.sample b/docker/swarm/stacks/socrates/.env.sample index 66f83f87b..bcd11d85b 100644 --- a/docker/swarm/stacks/socrates/.env.sample +++ b/docker/swarm/stacks/socrates/.env.sample @@ -5,14 +5,11 @@ DEPLOYMENT_VERSION= # -- Runtime NODE_ENV=production -SERVER_URL= # ALLOWED_ORIGINS=* # LOG_LEVEL=info # -- Authentication API_KEY= -DOCS_BASIC_AUTH_USER= -DOCS_BASIC_AUTH_PASS= # -- Groq AI GROQ_API_KEY= diff --git a/docker/swarm/stacks/socrates/stack-socrates.yml b/docker/swarm/stacks/socrates/stack-socrates.yml index 757fec4ee..7a01ba9e1 100644 --- a/docker/swarm/stacks/socrates/stack-socrates.yml +++ b/docker/swarm/stacks/socrates/stack-socrates.yml @@ -1,6 +1,6 @@ x-deploy-config: &deploy-config deploy: - replicas: 2 + replicas: 3 placement: max_replicas_per_node: 1 constraints: @@ -12,6 +12,8 @@ x-deploy-config: &deploy-config parallelism: 1 delay: 10s failure_action: rollback + monitor: 30s + max_failure_ratio: 0.3 resources: limits: cpus: ${CPU_LIMIT:-1} @@ -22,67 +24,92 @@ x-deploy-config: &deploy-config condition: any labels: - org.freecodecamp.autoupdate=true + extra_hosts: + - "host.docker.internal:host-gateway" + healthcheck: + test: + [ + "CMD-SHELL", + "wget --no-verbose --spider http://localhost:3001/health || exit 1", + ] + interval: 30s + timeout: 5s + start_period: 15s + start_interval: 5s + retries: 3 + environment: + # Runtime + - NODE_ENV=${NODE_ENV:-production} + - PORT=3001 + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-*} + - LOG_LEVEL=${LOG_LEVEL:-info} + # Redis (local — each app connects to co-located Redis via host gateway) + - REDIS_URL=redis://host.docker.internal:6379 + # Authentication + - API_KEY=${API_KEY} + # Groq AI + - GROQ_API_KEY=${GROQ_API_KEY} + - GROQ_MODEL=${GROQ_MODEL:-openai/gpt-oss-20b} + - GROQ_MODEL_HTML=${GROQ_MODEL_HTML:-openai/gpt-oss-20b} + - GROQ_MODEL_CSS=${GROQ_MODEL_CSS:-openai/gpt-oss-20b} + - GROQ_MODEL_JAVASCRIPT=${GROQ_MODEL_JAVASCRIPT:-openai/gpt-oss-120b} + - GROQ_MODEL_PYTHON=${GROQ_MODEL_PYTHON:-openai/gpt-oss-120b} + - GROQ_TIMEOUT_MS=${GROQ_TIMEOUT_MS:-30000} + - GROQ_MAX_RETRIES=${GROQ_MAX_RETRIES:-2} + - GROQ_BACKOFF_BASE_MS=${GROQ_BACKOFF_BASE_MS:-500} + - GROQ_MAX_TOKENS=${GROQ_MAX_TOKENS:-1024} + - GROQ_MAX_TOKENS_RETRY=${GROQ_MAX_TOKENS_RETRY:-2048} + - GROQ_EMPTY_RESPONSE_RETRIES=${GROQ_EMPTY_RESPONSE_RETRIES:-1} + # Circuit Breaker + - MODEL_CB_FAILURES=${MODEL_CB_FAILURES:-3} + - MODEL_CB_COOLDOWN_MS=${MODEL_CB_COOLDOWN_MS:-30000} + # Health Check + - ENABLE_EXTENDED_HEALTH=${ENABLE_EXTENDED_HEALTH:-false} + # Rate Limiting + - PER_USER_LIMIT=${PER_USER_LIMIT:-10} + - GLOBAL_LIMIT=${GLOBAL_LIMIT:-1000} x-port-config: &port-config - target: 3000 + target: 3001 protocol: tcp mode: host services: - svc-socrates: + svc-socrates-alpha: image: ${DOCKER_REGISTRY}/${DEPLOYMENT_ENV}/socrates:${DEPLOYMENT_VERSION} ports: - published: 4010 <<: *port-config <<: *deploy-config - healthcheck: - test: - [ - "CMD-SHELL", - "wget --no-verbose --spider http://localhost:3000/health || exit 1", - ] - interval: 30s - timeout: 5s - start_period: 15s - retries: 3 - environment: - # Runtime - - NODE_ENV=${NODE_ENV:-production} - - PORT=3000 - - SERVER_URL=${SERVER_URL} - - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-*} - - LOG_LEVEL=${LOG_LEVEL:-info} - # Redis (overlay network service discovery) - - REDIS_URL=redis://svc-redis:6379 - # Authentication - - API_KEY=${API_KEY} - - DOCS_BASIC_AUTH_USER=${DOCS_BASIC_AUTH_USER} - - DOCS_BASIC_AUTH_PASS=${DOCS_BASIC_AUTH_PASS} - # Groq AI - - GROQ_API_KEY=${GROQ_API_KEY} - - GROQ_MODEL=${GROQ_MODEL:-openai/gpt-oss-20b} - - GROQ_MODEL_HTML=${GROQ_MODEL_HTML:-openai/gpt-oss-20b} - - GROQ_MODEL_CSS=${GROQ_MODEL_CSS:-openai/gpt-oss-20b} - - GROQ_MODEL_JAVASCRIPT=${GROQ_MODEL_JAVASCRIPT:-openai/gpt-oss-120b} - - GROQ_MODEL_PYTHON=${GROQ_MODEL_PYTHON:-openai/gpt-oss-120b} - - GROQ_TIMEOUT_MS=${GROQ_TIMEOUT_MS:-30000} - - GROQ_MAX_RETRIES=${GROQ_MAX_RETRIES:-2} - - GROQ_BACKOFF_BASE_MS=${GROQ_BACKOFF_BASE_MS:-500} - - GROQ_MAX_TOKENS=${GROQ_MAX_TOKENS:-1024} - - GROQ_MAX_TOKENS_RETRY=${GROQ_MAX_TOKENS_RETRY:-2048} - - GROQ_EMPTY_RESPONSE_RETRIES=${GROQ_EMPTY_RESPONSE_RETRIES:-1} - # Circuit Breaker - - MODEL_CB_FAILURES=${MODEL_CB_FAILURES:-3} - - MODEL_CB_COOLDOWN_MS=${MODEL_CB_COOLDOWN_MS:-30000} - # Health Check - - ENABLE_EXTENDED_HEALTH=${ENABLE_EXTENDED_HEALTH:-false} - # Rate Limiting - - PER_USER_LIMIT=${PER_USER_LIMIT:-10} - - GLOBAL_LIMIT=${GLOBAL_LIMIT:-1000} + + svc-socrates-bravo: + image: ${DOCKER_REGISTRY}/${DEPLOYMENT_ENV}/socrates:${DEPLOYMENT_VERSION} + ports: + - published: 4011 + <<: *port-config + <<: *deploy-config svc-redis: image: redis:7.4-alpine - command: ["redis-server", "--appendonly", "yes"] + ports: + - published: 6379 + target: 6379 + protocol: tcp + mode: host + command: + [ + "redis-server", + "--appendonly", + "yes", + "--maxmemory", + "200mb", + "--maxmemory-policy", + "allkeys-lru", + "--bind", + "0.0.0.0", + "--protected-mode", + "no", + ] volumes: - redis_data:/data healthcheck: @@ -90,9 +117,10 @@ services: interval: 10s timeout: 5s start_period: 5s + start_interval: 5s retries: 3 deploy: - replicas: 1 + mode: global placement: constraints: - node.labels.socrates.enabled == true