diff --git a/.env.example b/.env.example index f8a6555..ac272e4 100644 --- a/.env.example +++ b/.env.example @@ -6,16 +6,31 @@ API_KEYS=dev-key:tenant_dev:predict,read_models;admin-key:tenant_admin:predict,r # Set to "production" to enforce API_KEYS at startup # ENV=production +# ── Postgres credentials — CHANGE IN PRODUCTION ────────────────────────────── +# POSTGRES_PASSWORD is required; the compose file will refuse to start without it. +POSTGRES_USER=inference +POSTGRES_PASSWORD=darshan +POSTGRES_DB=inference_engine + +# ── Redis password (optional) — set to enable requirepass ──────────────────── +# Leave unset (or empty) for no-auth Redis (acceptable for local dev only). +# CHANGE IN PRODUCTION. +REDIS_PASSWORD=darshan + +# ── API service memory limit (default: 4g) ─────────────────────────────────── +# Tune to the largest model artifact you expect to load. +# API_MEMORY_LIMIT=4g + # PostgreSQL — when running via docker compose, use the service name "postgres" # Leave unset to fall back to SQLite (zero-dependency quickstart) -DATABASE_URL=postgresql://inference:inference@postgres:5432/inference_engine +DATABASE_URL=postgresql://inference:darshan@postgres:5432/inference_engine # Redis — when running via docker compose, use the service name "redis" # Leave unset to use in-process thread pool REDIS_URL=redis://redis:6379/0 # For local host-only runs (uvicorn on the host, not in Docker): -# DATABASE_URL=postgresql://inference:inference@127.0.0.1:15432/inference_engine +# DATABASE_URL=postgresql://inference:change-me-in-production@127.0.0.1:15432/inference_engine # REDIS_URL=redis://127.0.0.1:6379/0 # ── CLI ────────────────────────────────────────────────────────────────────── diff --git a/docker-compose.yml b/docker-compose.yml index da5b2f7..7380fe4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,9 +5,9 @@ services: postgres: image: postgres:16 environment: - POSTGRES_USER: inference - POSTGRES_PASSWORD: inference - POSTGRES_DB: inference_engine + POSTGRES_USER: ${POSTGRES_USER:-inference} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set} + POSTGRES_DB: ${POSTGRES_DB:-inference_engine} ports: - "15432:5432" volumes: @@ -15,19 +15,20 @@ services: networks: - inference-net healthcheck: - test: ["CMD-SHELL", "pg_isready -U inference -d inference_engine"] + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-inference} -d ${POSTGRES_DB:-inference_engine}"] interval: 5s timeout: 5s retries: 10 redis: image: redis:7-alpine + command: ["sh", "-c", "redis-server --requirepass \"${REDIS_PASSWORD:-}\""] ports: - "6379:6379" networks: - inference-net healthcheck: - test: ["CMD", "redis-cli", "ping"] + test: ["CMD-SHELL", "redis-cli -a \"${REDIS_PASSWORD:-}\" ping"] interval: 5s timeout: 3s retries: 10 @@ -41,8 +42,8 @@ services: - "8000:8000" env_file: .env environment: - DATABASE_URL: postgresql://inference:inference@postgres:5432/inference_engine - REDIS_URL: redis://redis:6379/0 + DATABASE_URL: postgresql://${POSTGRES_USER:-inference}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-inference_engine} + REDIS_URL: redis://:${REDIS_PASSWORD:-}@redis:6379/0 volumes: - models:/app/models networks: @@ -53,14 +54,18 @@ services: redis: condition: service_healthy restart: unless-stopped + deploy: + resources: + limits: + memory: ${API_MEMORY_LIMIT:-4g} worker: image: inference-engine:latest command: ["arq", "app.infra.queue.worker.WorkerSettings"] env_file: .env environment: - DATABASE_URL: postgresql://inference:inference@postgres:5432/inference_engine - REDIS_URL: redis://redis:6379/0 + DATABASE_URL: postgresql://${POSTGRES_USER:-inference}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-inference_engine} + REDIS_URL: redis://:${REDIS_PASSWORD:-}@redis:6379/0 volumes: - models:/app/models networks: diff --git a/docs/integrations/docker-compose.md b/docs/integrations/docker-compose.md index 94f27b8..93448e0 100644 --- a/docs/integrations/docker-compose.md +++ b/docs/integrations/docker-compose.md @@ -125,14 +125,14 @@ Prometheus scrapes `/metrics` on the `api` service (no authentication required The `api` and `worker` services load `.env` via `env_file: .env` and then override two variables unconditionally: ```yaml -DATABASE_URL: postgresql://inference:inference@postgres:5432/inference_engine -REDIS_URL: redis://redis:6379/0 +DATABASE_URL: postgresql://${POSTGRES_USER:-inference}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-inference_engine} +REDIS_URL: redis://:${REDIS_PASSWORD:-}@redis:6379/0 # empty password = no-auth; set REDIS_PASSWORD to enable requirepass ``` These use Docker's internal DNS (`postgres`, `redis`) — not `localhost`. If you run the API on the host while Postgres/Redis are in Docker, use the host-mapped ports instead: ```bash -DATABASE_URL=postgresql://inference:inference@127.0.0.1:15432/inference_engine +DATABASE_URL=postgresql://inference:your-password@127.0.0.1:15432/inference_engine REDIS_URL=redis://127.0.0.1:6379/0 ``` @@ -208,6 +208,55 @@ Tune this value to match the largest model artifact you expect to load. The limi --- +## API resource limits + +The `api` service has a memory limit controlled by `API_MEMORY_LIMIT` (default `4g`): + +```yaml +deploy: + resources: + limits: + memory: ${API_MEMORY_LIMIT:-4g} +``` + +Set `API_MEMORY_LIMIT` in `.env` to tune it for your largest loaded model. Without a limit, an OOM in the API container can exhaust host memory and take down all co-located services. + +--- + +## Postgres credentials + +Postgres credentials are read from environment variables — no defaults are baked into the compose file: + +```yaml +POSTGRES_USER: ${POSTGRES_USER:-inference} +POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set} +POSTGRES_DB: ${POSTGRES_DB:-inference_engine} +``` + +`POSTGRES_PASSWORD` uses the `:?` modifier: Compose will refuse to start if it is unset or empty. Set it in `.env`: + +```bash +POSTGRES_PASSWORD=change-me-in-production +``` + +> **Warning** +> Never commit a real password to `.env`. The `.env.example` ships with a placeholder value (`change-me-in-production`) as a reminder. + +--- + +## Redis password + +Redis authentication is optional and controlled by `REDIS_PASSWORD`. When set, the Redis container starts with `--requirepass` and the `REDIS_URL` passed to `api` and `worker` includes the credential automatically: + +```bash +# .env +REDIS_PASSWORD=change-me-in-production +``` + +Leave `REDIS_PASSWORD` unset (or commented out) for no-auth Redis — acceptable for local development on a trusted network, but not for any shared or production deployment. + +--- + ## Grafana admin password The Grafana admin password is configurable via the `GRAFANA_PASSWORD` environment variable: @@ -235,3 +284,5 @@ GRAFANA_PASSWORD=change-me-in-production | Models not found in worker | Ensure both `api` and `worker` mount the same `models` volume | | Source changes not reflected | Confirm `docker-compose.override.yml` is being loaded (`docker compose config` to verify) | | `network not found` on Prometheus start | Run `docker compose down` then `bash dev.sh --observability` to let Compose create the named network before attaching profile services | +| `POSTGRES_PASSWORD must be set` on startup | Add `POSTGRES_PASSWORD=...` to your `.env` file — the compose file requires it explicitly | +| Redis `WRONGPASS` / auth errors | Set `REDIS_PASSWORD` in `.env` to match the value used when the Redis container was first started; or `docker compose down -v` to reset | diff --git a/scripts/curl_results.md b/scripts/curl_results.md index f6110a6..edb7694 100644 --- a/scripts/curl_results.md +++ b/scripts/curl_results.md @@ -1,6 +1,6 @@ # Curl Test Results Base: http://localhost:8000 -Run: Sun May 17 11:33:12 IST 2026 +Run: Wed May 20 13:17:46 IST 2026 ## GET /health (no auth) ``` @@ -59,12 +59,12 @@ HTTP 200 ## POST /predict/async ``` HTTP 200 -{"job_id":"b1e65c99-606c-463b-bfed-502a758af849"} +{"job_id":"010897c0-e61b-4bde-b1f2-29dbe3713c33"} ``` ## GET /predict/async/:id ``` -{"job_id":"b1e65c99-606c-463b-bfed-502a758af849","status":"succeeded","model":"echo","version":"v1","created_at":"2026-05-17T06:03:12.455825Z","result":"async-test","error_message":null} +{"job_id":"010897c0-e61b-4bde-b1f2-29dbe3713c33","status":"succeeded","model":"echo","version":"v1","created_at":"2026-05-20T07:47:47.136068Z","result":"async-test","error_message":null} ``` ## GET /predict/async/unknown-id -> 404 @@ -75,8 +75,8 @@ HTTP 404 ## X-Request-ID ``` -Sent: test-1778997793 -Got: test-1778997793 +Sent: test-1779263267 +Got: test-1779263267 ``` ## GET /metrics (no auth — public) @@ -89,18 +89,18 @@ inference_requests_total{model="echo",tenant="tenant_dev",version="v2"} 1.0 inference_requests_total{model="ghost",tenant="tenant_dev",version="v1"} 1.0 # HELP inference_requests_created Total inference requests # TYPE inference_requests_created gauge -inference_requests_created{model="echo",tenant="tenant_dev",version="v1"} 1.7789977923317099e+09 -inference_requests_created{model="echo",tenant="tenant_dev",version="v2"} 1.7789977923721476e+09 -inference_requests_created{model="ghost",tenant="tenant_dev",version="v1"} 1.7789977924149942e+09 +inference_requests_created{model="echo",tenant="tenant_dev",version="v1"} 1.7792632670171075e+09 +inference_requests_created{model="echo",tenant="tenant_dev",version="v2"} 1.7792632670413818e+09 +inference_requests_created{model="ghost",tenant="tenant_dev",version="v1"} 1.7792632670738883e+09 # HELP inference_errors_total Total inference errors # TYPE inference_errors_total counter inference_errors_total{error_type="model_not_found",model="ghost",tenant="tenant_dev",version="v1"} 1.0 # HELP inference_errors_created Total inference errors # TYPE inference_errors_created gauge -inference_errors_created{error_type="model_not_found",model="ghost",tenant="tenant_dev",version="v1"} 1.7789977924150174e+09 +inference_errors_created{error_type="model_not_found",model="ghost",tenant="tenant_dev",version="v1"} 1.7792632670739152e+09 # HELP inference_latency_seconds Inference latency # TYPE inference_latency_seconds histogram -inference_latency_seconds_bucket{le="0.005",model="echo",tenant="tenant_dev",version="v1"} 1.0 +inference_latency_seconds_bucket{le="0.005",model="echo",tenant="tenant_dev",version="v1"} 2.0 inference_latency_seconds_bucket{le="0.01",model="echo",tenant="tenant_dev",version="v1"} 2.0 inference_latency_seconds_bucket{le="0.02",model="echo",tenant="tenant_dev",version="v1"} 2.0 inference_latency_seconds_bucket{le="0.05",model="echo",tenant="tenant_dev",version="v1"} 2.0 @@ -113,7 +113,7 @@ inference_latency_seconds_bucket{le="5.0",model="echo",tenant="tenant_dev",versi inference_latency_seconds_bucket{le="10.0",model="echo",tenant="tenant_dev",version="v1"} 2.0 inference_latency_seconds_bucket{le="+Inf",model="echo",tenant="tenant_dev",version="v1"} 2.0 inference_latency_seconds_count{model="echo",tenant="tenant_dev",version="v1"} 2.0 -inference_latency_seconds_sum{model="echo",tenant="tenant_dev",version="v1"} 0.008782148361206055 +inference_latency_seconds_sum{model="echo",tenant="tenant_dev",version="v1"} 0.007155179977416992 inference_latency_seconds_bucket{le="0.005",model="echo",tenant="tenant_dev",version="v2"} 1.0 inference_latency_seconds_bucket{le="0.01",model="echo",tenant="tenant_dev",version="v2"} 1.0 inference_latency_seconds_bucket{le="0.02",model="echo",tenant="tenant_dev",version="v2"} 1.0 @@ -127,11 +127,11 @@ inference_latency_seconds_bucket{le="5.0",model="echo",tenant="tenant_dev",versi inference_latency_seconds_bucket{le="10.0",model="echo",tenant="tenant_dev",version="v2"} 1.0 inference_latency_seconds_bucket{le="+Inf",model="echo",tenant="tenant_dev",version="v2"} 1.0 inference_latency_seconds_count{model="echo",tenant="tenant_dev",version="v2"} 1.0 -inference_latency_seconds_sum{model="echo",tenant="tenant_dev",version="v2"} 0.0024755001068115234 +inference_latency_seconds_sum{model="echo",tenant="tenant_dev",version="v2"} 0.0027306079864501953 # HELP inference_latency_seconds_created Inference latency # TYPE inference_latency_seconds_created gauge -inference_latency_seconds_created{model="echo",tenant="tenant_dev",version="v1"} 1.778997792338559e+09 -inference_latency_seconds_created{model="echo",tenant="tenant_dev",version="v2"} 1.7789977923746636e+09 +inference_latency_seconds_created{model="echo",tenant="tenant_dev",version="v1"} 1.7792632670205004e+09 +inference_latency_seconds_created{model="echo",tenant="tenant_dev",version="v2"} 1.7792632670441573e+09 # HELP executor_inflight Number of in-flight inference executions # TYPE executor_inflight gauge executor_inflight{device="gpu"} 0.0 @@ -158,21 +158,21 @@ Request 8: HTTP 429 Request 9: HTTP 429 Request 10: HTTP 429 Request 11: HTTP 429 -Request 12: HTTP 429 +Request 12: HTTP 200 Request 13: HTTP 429 -Request 14: HTTP 429 +Request 14: HTTP 200 Request 15: HTTP 429 ``` ## POST /predict/async/batch ``` HTTP 200 -{"job_id":"90047139-95ff-4bcd-a611-9a479b31fa58"} +{"job_id":"93107503-679d-40ce-b845-f9acb301dcf7"} ``` ## GET /predict/async/:id (batch) ``` -{"job_id":"90047139-95ff-4bcd-a611-9a479b31fa58","status":"succeeded","model":"echo","version":"v1","created_at":"2026-05-17T06:03:13.183552Z","result":["x","y","z"],"error_message":null} +{"job_id":"93107503-679d-40ce-b845-f9acb301dcf7","status":"succeeded","model":"echo","version":"v1","created_at":"2026-05-20T07:47:48.081914Z","result":["x","y","z"],"error_message":null} ``` ## POST /predict (tenant_dev) @@ -189,7 +189,7 @@ HTTP 200 ## Metrics tenant label check ``` -inference_requests_total{model="echo",tenant="tenant_dev",version="v1"} 10.0 +inference_requests_total{model="echo",tenant="tenant_dev",version="v1"} 12.0 inference_requests_total{model="echo",tenant="tenant_dev",version="v2"} 1.0 inference_requests_total{model="ghost",tenant="tenant_dev",version="v1"} 1.0 ``` @@ -220,7 +220,7 @@ HTTP 403 ## GET /jobs/:id ``` HTTP 200 -{"job_id":"b1e65c99-606c-463b-bfed-502a758af849","status":"succeeded","model":"echo","version":"v1","created_at":"2026-05-17T06:03:12.455825+00:00"} +{"job_id":"010897c0-e61b-4bde-b1f2-29dbe3713c33","status":"succeeded","model":"echo","version":"v1","created_at":"2026-05-20T07:47:47.136068+00:00"} ``` ## POST /admin/models/echo/v1/reload (admin) @@ -266,8 +266,8 @@ HTTP 403 ## X-Request-ID (phase 4) ``` -Sent: p4-1778997798 -Got: p4-1778997798 +Sent: p4-1779263273 +Got: p4-1779263273 ```