From 94d0a2c0349166e06e9f426a5c5d64044cd51a36 Mon Sep 17 00:00:00 2001
From: Nikita Bige <wargloom@gmail.com>
Date: Sun, 3 May 2026 23:15:18 +0300
Subject: [PATCH 1/5] Add local stack management workflow

---
 .dockerignore             |  34 ++++
 .gitignore                |   2 +
 docker-compose.yml        |  34 +++-
 scripts/braindb-manage.sh | 357 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 425 insertions(+), 2 deletions(-)
 create mode 100644 .dockerignore
 create mode 100755 scripts/braindb-manage.sh

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..165786d
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,34 @@
+# Secrets and local environment
+.env
+.env.*
+!.env.example
+
+# VCS and local agent/editor state
+.git/
+.gitignore
+.claude/
+.codex
+.codex/
+.vscode/
+.idea/
+
+# Python/test/build artifacts
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+.pytest_cache/
+.mypy_cache/
+.coverage
+dist/
+build/
+
+# Runtime/user data
+data/sources/*
+!data/sources/.gitkeep
+!data/sources/README.md
+
+# OS noise
+.DS_Store
+Thumbs.db
+*.stackdump
diff --git a/.gitignore b/.gitignore
index 005c3b6..2ba4b43 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,8 @@
 
 # Claude Code — settings.json, scheduled_tasks.lock, and any future state
 .claude/
+.codex
+.codex/
 
 # Python
 __pycache__/
diff --git a/docker-compose.yml b/docker-compose.yml
index 491fb6d..e7ec46b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,8 +1,32 @@
 services:
+  postgres:
+    image: pgvector/pgvector:pg16
+    container_name: braindb_postgres
+    restart: unless-stopped
+    networks:
+      local-network:
+        aliases:
+          - braindb_postgres
+    environment:
+      POSTGRES_USER: ${POSTGRES_USER:-braindb}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-braindb}
+      POSTGRES_DB: ${POSTGRES_DB:-braindb}
+    volumes:
+      - postgres-data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"]
+      interval: 5s
+      timeout: 5s
+      retries: 12
+
   api:
     build: .
+    image: braindb:local
     container_name: braindb_api
     restart: unless-stopped
+    depends_on:
+      postgres:
+        condition: service_healthy
     networks:
       - local-network
     environment:
@@ -11,6 +35,9 @@ services:
       HF_TOKEN: ${HF_TOKEN:-}
       LLM_PROFILE: ${LLM_PROFILE:-deepinfra}
       AGENT_MODEL: ${AGENT_MODEL:-}
+      AGENT_BASE_URL: ${AGENT_BASE_URL:-}
+      AGENT_API_KEY: ${AGENT_API_KEY:-}
+      OPENAI_API_KEY: ${OPENAI_API_KEY:-}
       NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY:-}
       DEEPINFRA_API_KEY: ${DEEPINFRA_API_KEY:-}
       VLLM_API_KEY: ${VLLM_API_KEY:-}
@@ -25,10 +52,10 @@ services:
     volumes:
       - .:/app
     command: >
-      sh -c "alembic upgrade head && uvicorn braindb.main:app --host 0.0.0.0 --port ${API_PORT:-8000} --reload"
+      sh -c "until python -c 'import os, psycopg2; conn = psycopg2.connect(os.environ[\"DATABASE_URL\"]); conn.close()'; do echo waiting for database; sleep 2; done; alembic upgrade head && uvicorn braindb.main:app --host 0.0.0.0 --port ${API_PORT:-8000} --reload"
 
   watcher:
-    build: .
+    image: braindb:local
     container_name: braindb_watcher
     restart: unless-stopped
     depends_on:
@@ -45,3 +72,6 @@ services:
 networks:
   local-network:
     external: true
+
+volumes:
+  postgres-data:
diff --git a/scripts/braindb-manage.sh b/scripts/braindb-manage.sh
new file mode 100755
index 0000000..b1d8350
--- /dev/null
+++ b/scripts/braindb-manage.sh
@@ -0,0 +1,357 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd -- "$SCRIPT_DIR/.." && pwd)"
+COMPOSE_FILE="$REPO_ROOT/docker-compose.yml"
+ENV_FILE="$REPO_ROOT/.env"
+ENV_EXAMPLE="$REPO_ROOT/.env.example"
+DOCKER_BIN="${DOCKER_BIN:-docker}"
+
+log() { printf '%s\n' "$*"; }
+warn() { printf 'warn: %s\n' "$*" >&2; }
+die() { printf 'error: %s\n' "$*" >&2; exit 1; }
+
+require_cmd() {
+  command -v "$1" >/dev/null 2>&1 || die "missing required command: $1"
+}
+
+compose() {
+  "$DOCKER_BIN" compose -f "$COMPOSE_FILE" "$@"
+}
+
+ensure_env_file() {
+  if [[ ! -f "$ENV_FILE" ]]; then
+    [[ -f "$ENV_EXAMPLE" ]] || die "missing .env.example; cannot create .env"
+    cp "$ENV_EXAMPLE" "$ENV_FILE"
+    warn "created .env from .env.example"
+  fi
+}
+
+env_value() {
+  local key="$1"
+  local default_value="${2:-}"
+  [[ -f "$ENV_FILE" ]] || { printf '%s' "$default_value"; return; }
+
+  python - "$ENV_FILE" "$key" "$default_value" <<'PY'
+import sys
+from pathlib import Path
+
+path = Path(sys.argv[1])
+key = sys.argv[2]
+default = sys.argv[3]
+
+for line in path.read_text().splitlines():
+    if not line or line.lstrip().startswith('#') or '=' not in line:
+        continue
+    k, v = line.split('=', 1)
+    if k.strip() == key:
+        print(v.strip())
+        raise SystemExit(0)
+
+print(default)
+PY
+}
+
+env_set() {
+  local key="$1"
+  local value="$2"
+
+  python - "$ENV_FILE" "$key" "$value" <<'PY'
+import sys
+from pathlib import Path
+
+path = Path(sys.argv[1])
+key = sys.argv[2]
+value = sys.argv[3]
+
+lines = path.read_text().splitlines()
+updated = False
+out = []
+
+for line in lines:
+    if line.startswith(f"{key}="):
+        out.append(f"{key}={value}")
+        updated = True
+    else:
+        out.append(line)
+
+if not updated:
+    out.append(f"{key}={value}")
+
+path.write_text("\n".join(out) + "\n")
+PY
+}
+
+database_url() {
+  env_value DATABASE_URL
+}
+
+ensure_database_url() {
+  local url
+  url="$(database_url)"
+  [[ -n "$url" ]] || die ".env must set DATABASE_URL"
+}
+
+ensure_network() {
+  "$DOCKER_BIN" network inspect local-network >/dev/null 2>&1 || "$DOCKER_BIN" network create local-network >/dev/null
+}
+
+health_port() {
+  env_value API_PORT 8100
+}
+
+health_url() {
+  printf 'http://localhost:%s/health' "$(health_port)"
+}
+
+wait_for_health() {
+  require_cmd curl
+  local url attempts sleep_s response
+  url="$(health_url)"
+  attempts=30
+  sleep_s=2
+
+  while (( attempts > 0 )); do
+    response="$(curl -fsS "$url" 2>/dev/null || true)"
+    if [[ "$response" == *'"status":"ok"'* ]]; then
+      log "health: ok ($url)"
+      return 0
+    fi
+    sleep "$sleep_s"
+    ((attempts--))
+  done
+
+  warn "health check failed after waiting; try: curl -s $(health_url)"
+  return 1
+}
+
+openai_compatible_base_url() {
+  env_value AGENT_BASE_URL
+}
+
+openai_compatible_root_url() {
+  local base
+  base="$(openai_compatible_base_url)"
+  [[ -n "$base" ]] || return 1
+  base="${base%/}"
+  case "$base" in
+    */v1)
+      printf '%s\n' "${base%/v1}"
+      ;;
+    *)
+      printf '%s\n' "$base"
+      ;;
+  esac
+}
+
+fetch_openai_compatible_models() {
+  require_cmd curl
+  local base root payload
+  base="$(openai_compatible_base_url)"
+  [[ -n "$base" ]] || return 1
+
+  root="$(openai_compatible_root_url)"
+
+  payload="$(
+    curl -fsS --max-time 4 "$base/models" 2>/dev/null || \
+    curl -fsS --max-time 4 "$root/api/tags" 2>/dev/null || \
+    curl -fsS --max-time 4 "$root/v1/models" 2>/dev/null || true
+  )"
+
+  [[ -n "$payload" ]] || return 1
+
+  python - "$payload" <<'PY'
+import json
+import sys
+
+raw = sys.argv[1]
+try:
+    data = json.loads(raw)
+except Exception:
+    raise SystemExit(1)
+
+models = []
+if isinstance(data, dict):
+    if isinstance(data.get('models'), list):
+        for item in data['models']:
+            if isinstance(item, dict):
+                name = item.get('name') or item.get('model') or item.get('id')
+                if name:
+                    models.append(name)
+    if isinstance(data.get('data'), list):
+        for item in data['data']:
+            if isinstance(item, dict):
+                name = item.get('id') or item.get('name')
+                if name:
+                    models.append(name)
+
+seen = set()
+for model in models:
+    model = model.strip()
+    if not model:
+        continue
+    if not model.startswith('openai/'):
+        model = f'openai/{model}'
+    if model not in seen:
+        seen.add(model)
+        print(model)
+PY
+}
+
+maybe_set_openai_compatible_model() {
+  local existing models count model
+  existing="$(env_value AGENT_MODEL)"
+  [[ -n "$existing" ]] && return 0
+
+  if ! models="$(fetch_openai_compatible_models)"; then
+    models=""
+  fi
+  if [[ -z "$models" ]]; then
+    die "LLM_PROFILE=openai_compatible/local_ollama needs AGENT_MODEL, and auto-discovery failed. Run: ./scripts/braindb-manage.sh models"
+  fi
+
+  count=0
+  while IFS= read -r model; do
+    [[ -n "$model" ]] && ((count++))
+  done <<<"$models"
+  if [[ "$count" == "1" ]]; then
+    model="$(printf '%s\n' "$models")"
+    env_set AGENT_MODEL "$model"
+    log "set AGENT_MODEL=$model"
+    return 0
+  fi
+
+  die "LLM_PROFILE=openai_compatible/local_ollama needs AGENT_MODEL and discovery found multiple models. Run: ./scripts/braindb-manage.sh models; then set AGENT_MODEL=openai/<model-id> in .env"
+}
+
+warn_if_unconfigured() {
+  local database_url llm_profile deepinfra_key nim_key openai_key agent_model agent_base_url
+  database_url="$(env_value DATABASE_URL)"
+  llm_profile="$(env_value LLM_PROFILE deepinfra)"
+  deepinfra_key="$(env_value DEEPINFRA_API_KEY)"
+  nim_key="$(env_value NVIDIA_NIM_API_KEY)"
+  openai_key="$(env_value OPENAI_API_KEY)"
+  agent_model="$(env_value AGENT_MODEL)"
+  agent_base_url="$(env_value AGENT_BASE_URL)"
+
+  case "$database_url" in
+    ""|postgresql://user:password@host:5432/braindb)
+      warn "DATABASE_URL still looks like the example; update .env before expecting a successful start"
+      ;;
+  esac
+
+  case "$llm_profile" in
+    deepinfra)
+      [[ -n "$deepinfra_key" ]] || warn "LLM_PROFILE=deepinfra but DEEPINFRA_API_KEY is empty"
+      ;;
+    nim)
+      [[ -n "$nim_key" ]] || warn "LLM_PROFILE=nim but NVIDIA_NIM_API_KEY is empty"
+      ;;
+    codex)
+      [[ -n "$openai_key" ]] || warn "LLM_PROFILE=codex but OPENAI_API_KEY is empty"
+      ;;
+    openai_compatible|local_ollama)
+      [[ -n "$agent_base_url" ]] || die "LLM_PROFILE=openai_compatible/local_ollama requires AGENT_BASE_URL"
+      if [[ -z "$agent_model" ]]; then
+        maybe_set_openai_compatible_model
+      fi
+      ;;
+  esac
+}
+
+print_openai_compatible_models() {
+  local models
+  if ! models="$(fetch_openai_compatible_models)"; then
+    die "could not reach OpenAI-compatible models endpoint from AGENT_BASE_URL"
+  fi
+  [[ -n "$models" ]] || die "no OpenAI-compatible models found at AGENT_BASE_URL"
+  printf '%s\n' "$models"
+}
+
+start_stack() {
+  ensure_env_file
+  ensure_database_url
+  warn_if_unconfigured
+  ensure_network
+  compose up -d --build
+  wait_for_health
+}
+
+update_stack() {
+  ensure_env_file
+  ensure_database_url
+  warn_if_unconfigured
+  ensure_network
+  compose up -d --build --force-recreate
+  wait_for_health
+}
+
+status_stack() {
+  ensure_env_file
+  ensure_database_url
+  ensure_network
+  compose ps
+  if command -v curl >/dev/null 2>&1; then
+    curl -fsS "$(health_url)" || true
+    printf '\n'
+  fi
+}
+
+logs_stack() {
+  ensure_env_file
+  ensure_database_url
+  ensure_network
+  compose logs -f --tail="${TAIL_LINES:-200}" "$@"
+}
+
+usage() {
+  cat <<'EOF'
+Usage: braindb-manage.sh <command>
+
+Commands:
+  start, bootstrap, up  Ensure .env/network and start the stack
+  update, upgrade       Recreate services
+  status                Show compose status and health
+  logs [service...]     Follow service logs (default tail=200)
+  models                List models from AGENT_BASE_URL
+  help                  Show this help
+
+Env overrides:
+  DOCKER_BIN=docker|podman  Docker-compatible CLI to use
+  TAIL_LINES=200            Lines shown by logs
+EOF
+}
+
+main() {
+  require_cmd "$DOCKER_BIN"
+  local cmd="${1:-help}"
+  shift || true
+
+  case "$cmd" in
+    start|bootstrap|up)
+      start_stack "$@"
+      ;;
+    update|upgrade)
+      update_stack "$@"
+      ;;
+    status)
+      status_stack "$@"
+      ;;
+    logs)
+      logs_stack "$@"
+      ;;
+    models)
+      ensure_env_file
+      print_openai_compatible_models
+      ;;
+    help|-h|--help)
+      usage
+      ;;
+    *)
+      die "unknown command: $cmd (try: help)"
+      ;;
+  esac
+}
+
+main "$@"

From b8f2bf8518ace21b5fe034d6605678ec983d7e18 Mon Sep 17 00:00:00 2001
From: Nikita Bige <wargloom@gmail.com>
Date: Sun, 3 May 2026 23:15:24 +0300
Subject: [PATCH 2/5] Add OpenAI-compatible agent profiles

---
 .env.example                  |  8 ++--
 BRAINDB_GUIDE.md              |  3 +-
 CLAUDE.md                     |  4 +-
 CONTRIBUTING.md               |  6 +--
 README.md                     | 42 +++++++++++++++++----
 braindb/config.py             | 56 ++++++++++++++++++++++++----
 tests/test_config_profiles.py | 70 +++++++++++++++++++++++++++++++++++
 7 files changed, 164 insertions(+), 25 deletions(-)
 create mode 100644 tests/test_config_profiles.py

diff --git a/.env.example b/.env.example
index c26571d..47e5c14 100644
--- a/.env.example
+++ b/.env.example
@@ -17,8 +17,10 @@ HF_TOKEN=
 # (currently: nim, deepinfra, vllm_workstation).
 LLM_PROFILE=deepinfra
 
-# Provider API keys — fill in whichever profile you're using.
-# Get a NIM key at https://build.nvidia.com/, a DeepInfra key at https://deepinfra.com/
+# Provider API keys — fill in whichever hosted profile you're using.
+# Get an OpenAI key for Codex, a NIM key at https://build.nvidia.com/,
+# or a DeepInfra key at https://deepinfra.com/
+OPENAI_API_KEY=
 NVIDIA_NIM_API_KEY=
 DEEPINFRA_API_KEY=
 
@@ -28,7 +30,7 @@ DEEPINFRA_API_KEY=
 VLLM_API_KEY=
 
 # Optional: override the profile's default model string (e.g. to try a smaller variant).
-# Leave blank to use the profile's built-in default.
+# Leave blank to use the profile's built-in default. Required for openai_compatible/local_ollama.
 AGENT_MODEL=
 
 # Agent verbosity — when true, every tool call is logged to stdout
diff --git a/BRAINDB_GUIDE.md b/BRAINDB_GUIDE.md
index 8917c31..5c8c2b0 100644
--- a/BRAINDB_GUIDE.md
+++ b/BRAINDB_GUIDE.md
@@ -306,7 +306,7 @@ curl -X POST http://localhost:8000/api/v1/entities/datasources/ingest \
 
 ### BrainDB Agent — natural language queries
 
-`POST /api/v1/agent/query` — instead of orchestrating individual API calls, send a plain English request and let BrainDB's internal agent handle it. The agent uses the OpenAI Agents SDK with LiteLLM (provider pluggable via `LLM_PROFILE` — default `deepinfra`, `nim` also supported) and has access to all 21 BrainDB operations as function tools.
+`POST /api/v1/agent/query` — instead of orchestrating individual API calls, send a plain English request and let BrainDB's internal agent handle it. The agent uses the OpenAI Agents SDK with LiteLLM (provider pluggable via `LLM_PROFILE` — default `deepinfra`, with `nim`, `codex`, and generic OpenAI-compatible local endpoints also supported) and has access to all 21 BrainDB operations as function tools.
 
 ```bash
 curl -X POST http://localhost:8000/api/v1/agent/query \
@@ -340,6 +340,7 @@ The agent has these tools internally: `recall_memory`, `quick_search`, `save_fac
 - **Self-hosted vLLM**: set `LLM_PROFILE=vllm_workstation` for a vLLM server bound to the Docker host's loopback at `:8002`. No API key needed if the server runs without auth. See [CONTRIBUTING.md](CONTRIBUTING.md) for how to add your own self-hosted profile.
 - Profiles live in `braindb/config.py::_LLM_PROFILES`. Add new providers there (e.g. `together`, `openai`) by adding a dict entry — no code change required.
 - Optional override: set `AGENT_MODEL=` in `.env` to use a non-default model for the active profile.
+- Optional auth override: set `AGENT_API_KEY=` only if your OpenAI-compatible endpoint requires auth; copilot-api and Ollama can run without it when local auth is disabled.
 
 **Verbose logging**: set `AGENT_VERBOSE=true` in `.env` to log every tool call to stdout (visible via `docker logs braindb_api -f`). The HTTP response stays clean — only `answer` and `max_turns`.
 
diff --git a/CLAUDE.md b/CLAUDE.md
index f79b079..dd61c64 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -127,7 +127,7 @@ curl -s -X DELETE http://localhost:8000/api/v1/entities/<UUID>
 
 **Direct API** (what's shown above) — call individual endpoints yourself. Full control, more verbose context. Good when you want to be precise about what's saved or recalled.
 
-**Agent endpoint** — `POST /api/v1/agent/query` — send a natural language request and let BrainDB's internal agent handle it. The agent (LiteLLM with pluggable provider via `LLM_PROFILE` — default `deepinfra/google/gemma-4-31B-it`, NIM also supported) has all 21 BrainDB operations as tools. Cleaner conversation context, but slower (5-30 seconds for a query).
+**Agent endpoint** — `POST /api/v1/agent/query` — send a natural language request and let BrainDB's internal agent handle it. The agent (LiteLLM with pluggable provider via `LLM_PROFILE` — default `deepinfra/google/gemma-4-31B-it`, NIM, Codex, and generic OpenAI-compatible endpoints such as copilot-api or Ollama also supported) has all 21 BrainDB operations as tools. Cleaner conversation context, but slower (5-30 seconds for a query).
 
 ```bash
 # Recall via the agent
@@ -156,7 +156,7 @@ When debugging the agent: set `AGENT_VERBOSE=true` in `.env` and watch `docker l
 
 ## Important Notes
 
-- `.env` contains real DB credentials and provider API keys (`DEEPINFRA_API_KEY`, `NVIDIA_NIM_API_KEY`, etc.) — **never commit it**, it is in `.gitignore`. Active provider is picked by `LLM_PROFILE` (see `braindb/config.py::_LLM_PROFILES`).
+- `.env` contains real DB credentials and provider API keys (`DEEPINFRA_API_KEY`, `NVIDIA_NIM_API_KEY`, `OPENAI_API_KEY`, `AGENT_API_KEY`, etc.) — **never commit it**, it is in `.gitignore`. Active provider is picked by `LLM_PROFILE` (see `braindb/config.py::_LLM_PROFILES`).
 - Always-on rules (priority 100, `always_on: true`) are returned on every `/memory/context` call
 - `notes` field on any entity or relation is for running commentary — append observations over time
 - Keywords are stored as both a `TEXT[]` column on the entity AND as separate keyword entities linked via `tagged_with` relations (the keyword entities carry the embeddings for semantic search)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 396cec8..102184d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -10,7 +10,7 @@ Prerequisites: Docker Desktop (or any Docker Engine), Python 3.12, a Postgres 16
 git clone <repo-url> braindb
 cd braindb
 cp .env.example .env
-# edit .env — set DATABASE_URL, pick an LLM_PROFILE, fill in the matching API key
+# edit .env — set DATABASE_URL, pick an LLM_PROFILE, fill in the matching API key or OpenAI-compatible endpoint
 
 docker network create local-network       # one-time; docker-compose expects this
 docker compose up -d --build
@@ -45,8 +45,8 @@ LiteLLM does the heavy lifting — providers are selected by a prefix in the mod
        "api_key_env": "MY_PROVIDER_API_KEY",
    },
    ```
-2. Add `MY_PROVIDER_API_KEY=` to [`.env.example`](.env.example).
-3. Add the env passthrough to [`docker-compose.yml`](docker-compose.yml) under the `api` service.
+2. Add `MY_PROVIDER_API_KEY=` to [`.env.example`](.env.example) if the provider needs auth.
+3. Add the env passthrough to [`docker-compose.yml`](docker-compose.yml) under the `api` service. OpenAI-compatible endpoints can use `LLM_PROFILE=openai_compatible` plus `AGENT_BASE_URL` / `AGENT_API_KEY` variables.
 4. (Optional) Document the provider in the README and BRAINDB_GUIDE.
 
 No other code changes required — the agent resolves model and key through `settings.resolved_agent_model` and `settings.resolved_api_key`, which read the active profile.
diff --git a/README.md b/README.md
index 79854dc..eb33c7b 100644
--- a/README.md
+++ b/README.md
@@ -72,18 +72,28 @@ Any reachable hostname/IP works — the connecting user just needs network acces
 
 ### 4. Pick an LLM provider (for the internal agent)
 
-The agent talks to any LiteLLM-supported backend. BrainDB ships with two profiles pre-configured: **DeepInfra** (default, fast, paid) and **NVIDIA NIM** (free tier, can be flaky).
+The agent talks to any LiteLLM-supported backend. BrainDB ships with four profiles pre-configured: **DeepInfra** (default, fast, paid), **NVIDIA NIM** (free tier, can be flaky), **Codex** (`gpt-5.3-codex-spark` via OpenAI routing), and **openai_compatible** for local OpenAI-compatible APIs such as copilot-api or Ollama (`local_ollama` remains as a legacy alias).
 
 In `.env`:
 ```
-LLM_PROFILE=deepinfra        # or 'nim' — default is 'deepinfra'
+LLM_PROFILE=deepinfra        # or 'codex'/'nim'/'openai_compatible' — default is 'deepinfra'
 DEEPINFRA_API_KEY=...        # if profile=deepinfra — get from https://deepinfra.com/
 NVIDIA_NIM_API_KEY=...       # if profile=nim       — get from https://build.nvidia.com/
+OPENAI_API_KEY=...            # if profile=codex    — OpenAI API key for Codex
 ```
 
-Only the key matching your chosen profile needs to be filled. Leave the other blank or absent.
+For a local OpenAI-compatible server such as `copilot-api`:
 
-Adding a third provider (Together, OpenAI, local vLLM, whatever) is a two-line entry in [`braindb/config.py::_LLM_PROFILES`](braindb/config.py) + an env var — no other code changes. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for the recipe.
+```
+LLM_PROFILE=openai_compatible
+AGENT_BASE_URL=http://<host-ip>:4141/v1    # copilot-api default port
+AGENT_MODEL=openai/gpt-5-mini
+AGENT_API_KEY=                             # optional; only set if your endpoint requires auth
+```
+
+Only the key matching your chosen hosted profile needs to be filled. Leave the other blank or absent. For OpenAI-compatible local endpoints with auth disabled, leave `AGENT_API_KEY` blank.
+
+Adding another hosted provider (Together, OpenAI, local vLLM, whatever) is usually a small entry in [`braindb/config.py::_LLM_PROFILES`](braindb/config.py) + env passthrough — see [`CONTRIBUTING.md`](CONTRIBUTING.md) for the recipe.
 
 ### 5. Create the Docker network, then bring the stack up
 
@@ -110,6 +120,19 @@ API at `http://localhost:8000`. Swagger UI at `http://localhost:8000/docs`. Data
 
 Drop a markdown file into `data/sources/` and the watcher sidecar picks it up within ~7 seconds — see [File Ingestion](#file-ingestion) below.
 
+### Operational helper
+
+For a safer one-command workflow, use `scripts/braindb-manage.sh`:
+
+```bash
+./scripts/braindb-manage.sh start
+./scripts/braindb-manage.sh update
+./scripts/braindb-manage.sh status
+./scripts/braindb-manage.sh logs api
+```
+
+It creates `.env` from `.env.example` if needed, ensures the `local-network` Docker network exists, starts/recreates the Compose services, and checks `http://localhost:8000/health`.
+
 ---
 
 ## Key Endpoints
@@ -162,7 +185,7 @@ Single `query` (string) still works for backward compatibility.
 Instead of orchestrating individual API calls, you can talk to BrainDB in plain English via `POST /api/v1/agent/query`. The agent (built on the OpenAI Agents SDK + LiteLLM) decides which tools to call and returns a summary.
 
 ```bash
-curl -X POST http://localhost:8000/api/v1/agent/query \
+curl -X POST http://localhost:8100/api/v1/agent/query \
   -H "Content-Type: application/json" \
   -d '{"query":"What do you know about the user role and recent projects?"}'
 
@@ -173,15 +196,18 @@ The agent has 21 tools — every single BrainDB endpoint plus `delegate_to_subag
 
 **LLM provider — pluggable via `.env`**:
 
-`LLM_PROFILE` selects the backend. Profiles are defined in [braindb/config.py](braindb/config.py) (`_LLM_PROFILES`) — currently `deepinfra` (default, model `google/gemma-4-31B-it`) and `nim` (NVIDIA NIM, model `google/gemma-4-31b-it`). Each profile is a model-prefix + env-var pair; adding a new one is a dict entry.
+`LLM_PROFILE` selects the backend. Profiles are defined in [braindb/config.py](braindb/config.py) (`_LLM_PROFILES`) — currently `deepinfra` (default, model `google/gemma-4-31B-it`), `nim` (NVIDIA NIM, model `google/gemma-4-31b-it`), `codex` (OpenAI Codex, model `gpt-5.3-codex-spark`), and `openai_compatible` (generic OpenAI-compatible `/v1` endpoints; `local_ollama` is a legacy alias).
 
 ```
-LLM_PROFILE=deepinfra         # or nim — default is deepinfra
+LLM_PROFILE=deepinfra         # or codex/nim/openai_compatible — default is deepinfra
 DEEPINFRA_API_KEY=...         # required if profile=deepinfra (https://deepinfra.com/)
 NVIDIA_NIM_API_KEY=...        # required if profile=nim (https://build.nvidia.com/)
+OPENAI_API_KEY=...             # required if profile=codex
 AGENT_MODEL=                  # optional: override the profile's default model
 ```
 
+For copilot-api, set `AGENT_BASE_URL=http://<host-ip>:4141/v1` and `AGENT_MODEL=openai/gpt-5-mini`. For Ollama, use `AGENT_BASE_URL=http://<ollama-host>:11434/v1` and an Ollama model such as `AGENT_MODEL=openai/llama3.2:3b`. `AGENT_API_KEY` is optional and only needed if your OpenAI-compatible endpoint enforces auth.
+
 **Verbose logging**: set `AGENT_VERBOSE=true` in `.env` to log every tool call (entry args + exit elapsed/result) to stdout, visible via `docker logs braindb_api -f`.
 
 ---
@@ -276,5 +302,5 @@ It's idempotent by content hash — re-calling with the same bytes returns 200 (
 - PostgreSQL 16 with `pg_trgm` and `pgvector`
 - Alembic migrations
 - `sentence-transformers` + `Qwen/Qwen3-Embedding-0.6B` for keyword embeddings
-- `openai-agents[litellm]` + LiteLLM for the internal agent (DeepInfra / NIM / others pluggable via `LLM_PROFILE`)
+- `openai-agents[litellm]` + LiteLLM for the internal agent (DeepInfra / NIM / Codex / others pluggable via `LLM_PROFILE`)
 - Docker Compose — `api` + `watcher` services, external PostgreSQL
diff --git a/braindb/config.py b/braindb/config.py
index c27eb08..40d2fdd 100644
--- a/braindb/config.py
+++ b/braindb/config.py
@@ -7,6 +7,10 @@
 # plus an optional base_url for self-hosted OpenAI-compatible servers (vLLM,
 # Ollama, llama.cpp). Adding a new provider is a dict entry, no code change.
 _LLM_PROFILES: dict[str, dict[str, str]] = {
+    "codex": {
+        "model": "openai/gpt-5.3-codex-spark",
+        "api_key_env": "OPENAI_API_KEY",
+    },
     "nim": {
         "model": "nvidia_nim/google/gemma-4-31b-it",
         "api_key_env": "NVIDIA_NIM_API_KEY",
@@ -15,12 +19,18 @@
         "model": "deepinfra/google/gemma-4-31B-it",
         "api_key_env": "DEEPINFRA_API_KEY",
     },
+    "openai_compatible": {
+        "model": "",
+        "api_key_env": "AGENT_API_KEY",
+        "default_api_key": "ollama",
+    },
     "vllm_workstation": {
         "model": "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit",
         "api_key_env": "VLLM_API_KEY",
         "base_url": "http://host.docker.internal:8002/v1",
     },
 }
+_LLM_PROFILES["local_ollama"] = _LLM_PROFILES["openai_compatible"]
 
 
 class Settings(BaseSettings):
@@ -50,27 +60,57 @@ class Settings(BaseSettings):
     # Agent (LiteLLM — provider selected via llm_profile)
     llm_profile: str = "deepinfra"
     agent_model: str = ""          # blank = use profile's default model
+    agent_base_url: str = ""       # OpenAI-compatible base URL, e.g. http://host:11434/v1
+    agent_api_key: str = ""        # optional generic key for OpenAI-compatible endpoints
+    openai_api_key: str = ""
+    deepinfra_api_key: str = ""
+    nvidia_nim_api_key: str = ""
     agent_max_turns: int = 15
     agent_subagent_max_turns: int = 30
     agent_verbose: bool = False
 
+    @property
+    def _active_llm_profile(self) -> dict[str, str]:
+        try:
+            return _LLM_PROFILES[self.llm_profile]
+        except KeyError as exc:
+            known = ", ".join(sorted(_LLM_PROFILES))
+            raise ValueError(f"Unknown LLM_PROFILE={self.llm_profile!r}. Expected one of: {known}") from exc
+
+    def _env_setting(self, env_name: str) -> str:
+        field_name = env_name.lower()
+        return getattr(self, field_name, "") or os.getenv(env_name, "")
+
     @property
     def resolved_agent_model(self) -> str:
-        return self.agent_model or _LLM_PROFILES[self.llm_profile]["model"]
+        model = self.agent_model or self._active_llm_profile["model"]
+        if not model:
+            raise ValueError(
+                f"AGENT_MODEL must be set for LLM_PROFILE={self.llm_profile!r}; "
+                "for OpenAI-compatible endpoints use AGENT_MODEL=openai/<model-id> "
+                "(for example, openai/gpt-5-mini for copilot-api)."
+            )
+        return model
 
     @property
     def resolved_api_key(self) -> str:
-        profile = _LLM_PROFILES[self.llm_profile]
-        key = os.getenv(profile["api_key_env"], "")
-        # Self-hosted profiles (vLLM/Ollama) may run without auth, but the
-        # OpenAI client still needs a non-empty key — supply a placeholder.
-        if not key and profile.get("base_url"):
+        profile = self._active_llm_profile
+        key = self._env_setting(profile["api_key_env"])
+        if key:
+            return key
+        if "default_api_key" in profile:
+            return profile["default_api_key"]
+        if self.resolved_base_url:
             return "EMPTY"
-        return key
+        return ""
 
     @property
     def resolved_base_url(self) -> str | None:
-        return _LLM_PROFILES[self.llm_profile].get("base_url")
+        return self.agent_base_url or self._active_llm_profile.get("base_url")
+
+    @property
+    def resolved_agent_base_url(self) -> str | None:
+        return self.resolved_base_url
 
 
 settings = Settings()
diff --git a/tests/test_config_profiles.py b/tests/test_config_profiles.py
new file mode 100644
index 0000000..dabd391
--- /dev/null
+++ b/tests/test_config_profiles.py
@@ -0,0 +1,70 @@
+import pytest
+
+from braindb.config import Settings
+
+
+pytestmark = pytest.mark.unit
+
+
+def test_codex_profile_resolves_default_model():
+    settings = Settings(_env_file=None, llm_profile="codex", openai_api_key="test-key")
+
+    assert settings.resolved_agent_model == "openai/gpt-5.3-codex-spark"
+
+
+def test_codex_profile_resolves_api_key_from_field():
+    settings = Settings(_env_file=None, llm_profile="codex", openai_api_key="test-key")
+
+    assert settings.resolved_api_key == "test-key"
+
+
+def test_codex_profile_resolves_api_key_from_environment(monkeypatch):
+    monkeypatch.setenv("OPENAI_API_KEY", "env-key")
+    settings = Settings(_env_file=None, llm_profile="codex")
+
+    assert settings.resolved_api_key == "env-key"
+
+
+def test_agent_model_override_wins_for_codex_profile():
+    settings = Settings(
+        _env_file=None,
+        llm_profile="codex",
+        agent_model="openai/alternate-model",
+        openai_api_key="test-key",
+    )
+
+    assert settings.resolved_agent_model == "openai/alternate-model"
+
+
+def test_unknown_profile_error_lists_known_profiles():
+    settings = Settings(_env_file=None, llm_profile="missing")
+
+    with pytest.raises(ValueError, match="openai_compatible"):
+        _ = settings.resolved_agent_model
+
+
+def test_openai_compatible_profile_default_api_key():
+    settings = Settings(
+        _env_file=None,
+        llm_profile="openai_compatible",
+        agent_model="openai/gpt-5-mini",
+        agent_base_url="http://localhost:4141/v1",
+    )
+
+    assert settings.resolved_agent_model == "openai/gpt-5-mini"
+    assert settings.resolved_api_key == "ollama"
+    assert settings.resolved_agent_base_url == "http://localhost:4141/v1"
+
+
+def test_local_ollama_alias_matches_openai_compatible():
+    from braindb.config import _LLM_PROFILES
+
+    assert _LLM_PROFILES["local_ollama"] is _LLM_PROFILES["openai_compatible"]
+    settings = Settings(
+        _env_file=None,
+        llm_profile="local_ollama",
+        agent_model="openai/llama3.2:3b",
+    )
+
+    assert settings.resolved_agent_model == "openai/llama3.2:3b"
+    assert settings.resolved_api_key == "ollama"

From 969a55e57b470e45964f88878bc1e36f65449071 Mon Sep 17 00:00:00 2001
From: Nikita Bige <wargloom@gmail.com>
Date: Sun, 3 May 2026 23:15:30 +0300
Subject: [PATCH 3/5] Harden agent tools and SQL logging

---
 braindb/agent/tools.py               | 138 ++++++++++++++++++++++-----
 braindb/routers/memory.py            |  38 ++++----
 braindb/services/activity_log.py     |  22 +++++
 pyproject.toml                       |   3 +
 tests/conftest.py                    |   6 +-
 tests/test_agent_tools_validation.py | 108 +++++++++++++++++++++
 tests/test_memory_sql.py             |  41 ++++++++
 7 files changed, 311 insertions(+), 45 deletions(-)
 create mode 100644 tests/test_agent_tools_validation.py
 create mode 100644 tests/test_memory_sql.py

diff --git a/braindb/agent/tools.py b/braindb/agent/tools.py
index 3bc3b6f..a2ae145 100644
--- a/braindb/agent/tools.py
+++ b/braindb/agent/tools.py
@@ -16,17 +16,19 @@
 import json
 import logging
 import time
-from typing import Optional
+from typing import Optional, get_args
 from uuid import UUID
 
+import psycopg2
 import psycopg2.extras
 from agents import function_tool
 
 from braindb.config import settings
 from braindb.db import get_conn
+from braindb.schemas.relations import RELATION_TYPES
 from braindb.schemas.search import ContextRequest
-from braindb.services.activity_log import log_activity, query_log
-from braindb.services.context import assemble_context, effective_importance, track_access
+from braindb.services.activity_log import log_activity, log_activity_in_new_transaction, query_log
+from braindb.services.context import assemble_context, track_access
 from braindb.services.embedding_service import get_embedding_service
 from braindb.services.keyword_service import (
     ensure_keyword_entities,
@@ -52,6 +54,38 @@ def _err(msg: str) -> str:
     return f"ERROR: {msg}"
 
 
+_PLACEHOLDER_UUIDS = {
+    "<root-entity-id>",
+    "<existing-entity-id>",
+    "search-mode-context",
+}
+
+_ALLOWED_RELATION_TYPES = frozenset(get_args(RELATION_TYPES))
+
+
+def _validate_uuid_string(value: str | UUID | None, field_name: str) -> str:
+    """Return a normalized UUID string or raise ValueError before SQL sees it."""
+    text = str(value).strip() if value is not None else ""
+    if not text:
+        raise ValueError(f"{field_name} is required")
+    if (
+        text in _PLACEHOLDER_UUIDS
+        or (text.startswith("<") and text.endswith(">"))
+        or text.startswith("entity-id-of-")
+    ):
+        raise ValueError(f"{field_name} must be a real UUID, got placeholder {text!r}")
+    try:
+        return str(UUID(text))
+    except ValueError as exc:
+        raise ValueError(f"{field_name} must be a valid UUID, got {text!r}") from exc
+
+
+def _entity_exists(conn, entity_id: str) -> bool:
+    with conn.cursor() as cur:
+        cur.execute("SELECT 1 FROM entities WHERE id = %s", (entity_id,))
+        return cur.fetchone() is not None
+
+
 def _verbose(name: str):
     """Decorator that logs tool entry and exit when settings.agent_verbose is True.
     Placed BELOW @function_tool so the SDK still introspects the real signature.
@@ -181,6 +215,27 @@ def _insert_entity_raw(conn, entity_type: str, content: str, keywords: list[str]
     return str(eid)
 
 
+def _save_fact_impl(
+    content: str,
+    keywords: list[str],
+    source: str = "user-stated",
+    certainty: float = 0.8,
+    importance: float = 0.6,
+    notes: Optional[str] = None,
+) -> str:
+    try:
+        with get_conn() as conn:
+            eid = _insert_entity_raw(conn, "fact", content, keywords, source, importance, notes)
+            with conn.cursor() as cur:
+                cur.execute(
+                    "INSERT INTO facts_ext (entity_id, certainty, is_verified) VALUES (%s, %s, FALSE)",
+                    (eid, certainty),
+                )
+        return f"Saved fact id={eid}"
+    except Exception as e:
+        return _err(str(e))
+
+
 @function_tool
 @_verbose("save_fact")
 async def save_fact(
@@ -201,17 +256,7 @@ async def save_fact(
         importance: Weight 0-1 (default 0.6).
         notes: Optional running commentary.
     """
-    try:
-        with get_conn() as conn:
-            eid = _insert_entity_raw(conn, "fact", content, keywords, source, importance, notes)
-            with conn.cursor() as cur:
-                cur.execute(
-                    "INSERT INTO facts_ext (entity_id, certainty, is_verified) VALUES (%s, %s, FALSE)",
-                    (eid, certainty),
-                )
-        return f"Saved fact id={eid}"
-    except Exception as e:
-        return _err(str(e))
+    return _save_fact_impl(content, keywords, source, certainty, importance, notes)
 
 
 @function_tool
@@ -276,6 +321,27 @@ async def save_source(
         return _err(str(e))
 
 
+def _save_rule_impl(
+    content: str,
+    category: str = "behavior",
+    priority: int = 50,
+    always_on: bool = False,
+    keywords: Optional[list[str]] = None,
+    importance: float = 0.8,
+) -> str:
+    try:
+        with get_conn() as conn:
+            eid = _insert_entity_raw(conn, "rule", content, keywords or [], "user-stated", importance, None)
+            with conn.cursor() as cur:
+                cur.execute(
+                    "INSERT INTO rules_ext (entity_id, always_on, category, priority, is_active) VALUES (%s, %s, %s, %s, TRUE)",
+                    (eid, always_on, category, priority),
+                )
+        return f"Saved rule id={eid}"
+    except Exception as e:
+        return _err(str(e))
+
+
 @function_tool
 @_verbose("save_rule")
 async def save_rule(
@@ -296,17 +362,7 @@ async def save_rule(
         keywords: Optional topic keywords.
         importance: Weight 0-1 (default 0.8).
     """
-    try:
-        with get_conn() as conn:
-            eid = _insert_entity_raw(conn, "rule", content, keywords or [], "user-stated", importance, None)
-            with conn.cursor() as cur:
-                cur.execute(
-                    "INSERT INTO rules_ext (entity_id, always_on, category, priority, is_active) VALUES (%s, %s, %s, %s, TRUE)",
-                    (eid, always_on, category, priority),
-                )
-        return f"Saved rule id={eid}"
-    except Exception as e:
-        return _err(str(e))
+    return _save_rule_impl(content, category, priority, always_on, keywords, importance)
 
 
 # ====================================================================== #
@@ -322,6 +378,7 @@ async def get_entity(entity_id: str) -> str:
         entity_id: UUID of the entity.
     """
     try:
+        entity_id = _validate_uuid_string(entity_id, "entity_id")
         with get_conn() as conn:
             with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                 cur.execute("SELECT * FROM entities WHERE id = %s", (entity_id,))
@@ -413,6 +470,7 @@ async def update_entity(
         importance: New importance 0-1.
     """
     try:
+        entity_id = _validate_uuid_string(entity_id, "entity_id")
         # Datasource guardrail — look up type and strip content if protected.
         content_dropped = False
         with get_conn() as conn:
@@ -466,6 +524,7 @@ async def delete_entity(entity_id: str) -> str:
         entity_id: UUID to delete.
     """
     try:
+        entity_id = _validate_uuid_string(entity_id, "entity_id")
         with get_conn() as conn:
             with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                 cur.execute("SELECT entity_type FROM entities WHERE id = %s", (entity_id,))
@@ -502,8 +561,21 @@ async def create_relation(
         description: Why this relation exists.
     """
     try:
+        from_entity_id = _validate_uuid_string(from_entity_id, "from_entity_id")
+        to_entity_id = _validate_uuid_string(to_entity_id, "to_entity_id")
+        if relation_type not in _ALLOWED_RELATION_TYPES:
+            allowed = ", ".join(sorted(_ALLOWED_RELATION_TYPES))
+            return _err(f"relation_type must be one of: {allowed}")
         with get_conn() as conn:
             with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
+                cur.execute(
+                    "SELECT id FROM entities WHERE id IN (%s, %s)",
+                    (from_entity_id, to_entity_id),
+                )
+                found_ids = {str(r["id"]) for r in cur.fetchall()}
+                missing_ids = [eid for eid in (from_entity_id, to_entity_id) if eid not in found_ids]
+                if missing_ids:
+                    return _err(f"entity {missing_ids[0]} not found")
                 try:
                     cur.execute(
                         """INSERT INTO relations (from_entity_id, to_entity_id, relation_type, relevance_score, description)
@@ -514,6 +586,10 @@ async def create_relation(
                 except psycopg2.errors.UniqueViolation:
                     conn.rollback()
                     return _err(f"relation {relation_type} already exists between these entities")
+                except psycopg2.IntegrityError as e:
+                    conn.rollback()
+                    message = getattr(getattr(e, "diag", None), "message_primary", None) or str(e)
+                    return _err(f"could not create relation: {message}")
             log_activity(conn, "create", "relation", rid, details={
                 "from": from_entity_id, "to": to_entity_id, "type": relation_type,
             })
@@ -531,7 +607,10 @@ async def view_entity_relations(entity_id: str) -> str:
         entity_id: UUID of the entity.
     """
     try:
+        entity_id = _validate_uuid_string(entity_id, "entity_id")
         with get_conn() as conn:
+            if not _entity_exists(conn, entity_id):
+                return _err(f"entity {entity_id} not found")
             with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                 cur.execute(
                     """SELECT id, from_entity_id, to_entity_id, relation_type, relevance_score, description
@@ -563,6 +642,7 @@ async def delete_relation(relation_id: str) -> str:
         relation_id: UUID of the relation.
     """
     try:
+        relation_id = _validate_uuid_string(relation_id, "relation_id")
         with get_conn() as conn:
             with conn.cursor() as cur:
                 cur.execute("DELETE FROM relations WHERE id = %s RETURNING id", (relation_id,))
@@ -588,7 +668,10 @@ async def view_tree(entity_id: str, max_depth: int = 2) -> str:
         max_depth: How far to traverse (1-3, default 2).
     """
     try:
+        entity_id = _validate_uuid_string(entity_id, "entity_id")
         with get_conn() as conn:
+            if not _entity_exists(conn, entity_id):
+                return _err(f"entity {entity_id} not found")
             with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                 cur.execute(
                     """SELECT e.*, r.relation_type, r.relevance_score, r.description AS rel_desc,
@@ -631,10 +714,11 @@ async def search_sql(query: str) -> str:
                 cur.execute(query)
                 columns = [d[0] for d in cur.description] if cur.description else []
                 rows = cur.fetchmany(1000)
-            log_activity(conn, "sql_query", details={"query": query[:500], "rows": len(rows)})
+        log_activity_in_new_transaction("sql_query", details={"query": query[:500], "rows": len(rows)})
         result = {"columns": columns, "rows": [[str(v) if v is not None else None for v in r] for r in rows], "row_count": len(rows)}
         return _truncate(json.dumps(result, default=str, indent=2))
     except Exception as e:
+        log_activity_in_new_transaction("sql_query", details={"query": query[:500], "error": str(e)})
         return _err(str(e))
 
 
@@ -653,6 +737,8 @@ async def view_log(
         limit: Max entries (default 30).
     """
     try:
+        if entity_id is not None:
+            entity_id = _validate_uuid_string(entity_id, "entity_id")
         with get_conn() as conn:
             rows = query_log(conn, operation=operation, entity_id=entity_id, limit=limit)
         if not rows:
diff --git a/braindb/routers/memory.py b/braindb/routers/memory.py
index c680d39..ac2752f 100644
--- a/braindb/routers/memory.py
+++ b/braindb/routers/memory.py
@@ -11,17 +11,15 @@
 
 from braindb.db import get_conn
 from braindb.schemas.search import ContextRequest, ContextResponse, SearchRequest, SearchResultItem
-from braindb.services.activity_log import log_activity, query_log
+from braindb.services.activity_log import log_activity, log_activity_in_new_transaction, query_log
 from braindb.services.embedding_service import get_embedding_service
 from braindb.services.keyword_service import generate_missing_embeddings
 from braindb.services.context import (
     assemble_context,
     effective_importance,
-    fetch_always_on_rules,
     fetch_ext,
     track_access,
 )
-from braindb.services.graph import graph_expand
 from braindb.services.search import fuzzy_search
 
 router = APIRouter(prefix="/api/v1/memory", tags=["memory"])
@@ -180,9 +178,9 @@ def read_only_sql(body: SqlRequest):
         raise HTTPException(400, "Only SELECT or WITH queries are allowed")
 
     start = time.perf_counter()
-    with get_conn() as conn:
-        with conn.cursor() as cur:
-            try:
+    try:
+        with get_conn() as conn:
+            with conn.cursor() as cur:
                 cur.execute("SET LOCAL statement_timeout = '5s'")
                 cur.execute("SET LOCAL transaction_read_only = on")
                 cur.execute(body.query)
@@ -190,21 +188,27 @@ def read_only_sql(body: SqlRequest):
                 rows = cur.fetchmany(1000)
                 # Convert rows to JSON-safe format
                 safe_rows = [[_to_safe(v) for v in row] for row in rows]
-            except Exception as e:
-                raise HTTPException(400, f"Query error: {e}")
-
+    except Exception as e:
         elapsed_ms = int((time.perf_counter() - start) * 1000)
-        log_activity(conn, "sql_query", details={
+        log_activity_in_new_transaction("sql_query", details={
             "query": body.query[:500],
-            "rows": len(safe_rows),
+            "error": str(e),
             "elapsed_ms": elapsed_ms,
         })
-        return {
-            "columns": columns,
-            "rows": safe_rows,
-            "row_count": len(safe_rows),
-            "elapsed_ms": elapsed_ms,
-        }
+        raise HTTPException(400, f"Query error: {e}")
+
+    elapsed_ms = int((time.perf_counter() - start) * 1000)
+    log_activity_in_new_transaction("sql_query", details={
+        "query": body.query[:500],
+        "rows": len(safe_rows),
+        "elapsed_ms": elapsed_ms,
+    })
+    return {
+        "columns": columns,
+        "rows": safe_rows,
+        "row_count": len(safe_rows),
+        "elapsed_ms": elapsed_ms,
+    }
 
 
 def _to_safe(value):
diff --git a/braindb/services/activity_log.py b/braindb/services/activity_log.py
index 3a24cd8..e80c445 100644
--- a/braindb/services/activity_log.py
+++ b/braindb/services/activity_log.py
@@ -4,8 +4,15 @@
 
 The log_activity function is fire-and-forget: it must never fail the main operation.
 """
+import logging
+
 import psycopg2.extras
 
+from braindb.db import get_conn
+
+
+logger = logging.getLogger(__name__)
+
 
 def log_activity(
     conn,
@@ -35,6 +42,21 @@ def log_activity(
         pass
 
 
+def log_activity_in_new_transaction(
+    operation: str,
+    entity_type: str | None = None,
+    entity_id: str | None = None,
+    details: dict | None = None,
+    context_note: str | None = None,
+) -> None:
+    """Write an activity log entry using its own normal transaction."""
+    try:
+        with get_conn() as conn:
+            log_activity(conn, operation, entity_type, entity_id, details, context_note)
+    except Exception as exc:
+        logger.warning("Activity log write failed in separate transaction: %s", exc)
+
+
 def query_log(
     conn,
     operation: str | None = None,
diff --git a/pyproject.toml b/pyproject.toml
index 011c379..23d82d7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,3 +61,6 @@ packages = ["braindb"]
 testpaths = ["tests"]
 timeout = 60
 addopts = "-ra"
+markers = [
+    "unit: tests that do not require a live BrainDB stack",
+]
diff --git a/tests/conftest.py b/tests/conftest.py
index 3f74f3e..65ad8da 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -42,8 +42,10 @@ def _wait_for_health(url: str, timeout: int = 30) -> bool:
 
 
 @pytest.fixture(scope="session", autouse=True)
-def _require_live_api() -> None:
-    """Fail fast and loud if the stack isn't up — tests have nothing to run against."""
+def _require_live_api(request: pytest.FixtureRequest) -> None:
+    """Fail fast for integration tests; pure unit tests do not need the stack."""
+    if request.session.items and all(item.get_closest_marker("unit") for item in request.session.items):
+        return
     if not _wait_for_health(API_URL):
         pytest.fail(
             f"BrainDB API not healthy at {API_URL}. "
diff --git a/tests/test_agent_tools_validation.py b/tests/test_agent_tools_validation.py
new file mode 100644
index 0000000..d510c12
--- /dev/null
+++ b/tests/test_agent_tools_validation.py
@@ -0,0 +1,108 @@
+"""Targeted tests for agent tool ID validation."""
+from __future__ import annotations
+
+import asyncio
+import json
+import uuid
+from contextlib import contextmanager
+
+from agents.tool import ToolContext
+
+from braindb.agent import tools
+
+
+def _invoke_tool(tool, args: dict) -> str:
+    payload = json.dumps(args)
+    ctx = ToolContext(
+        context=None,
+        tool_name=tool.name,
+        tool_call_id="pytest",
+        tool_arguments=payload,
+    )
+    return asyncio.run(tool.on_invoke_tool(ctx, payload))
+
+
+def test_get_entity_rejects_placeholder_before_db(monkeypatch):
+    def fail_get_conn():
+        raise AssertionError("database should not be queried for placeholder IDs")
+
+    monkeypatch.setattr(tools, "get_conn", fail_get_conn)
+
+    result = _invoke_tool(tools.get_entity, {"entity_id": "<root-entity-id>"})
+
+    assert result.startswith("ERROR:")
+    assert "placeholder" in result
+
+
+def test_view_log_rejects_non_uuid_filter_before_db(monkeypatch):
+    def fail_get_conn():
+        raise AssertionError("database should not be queried for invalid IDs")
+
+    monkeypatch.setattr(tools, "get_conn", fail_get_conn)
+
+    result = _invoke_tool(tools.view_log, {
+        "operation": None,
+        "entity_id": "search-mode-context",
+        "limit": 30,
+    })
+
+    assert result.startswith("ERROR:")
+    assert "placeholder" in result
+
+
+def test_create_relation_rejects_invalid_type_before_db(monkeypatch):
+    def fail_get_conn():
+        raise AssertionError("database should not be queried for invalid relation types")
+
+    monkeypatch.setattr(tools, "get_conn", fail_get_conn)
+
+    result = _invoke_tool(tools.create_relation, {
+        "from_entity_id": str(uuid.uuid4()),
+        "to_entity_id": str(uuid.uuid4()),
+        "relation_type": "i_like_it",
+        "relevance_score": 0.7,
+        "description": None,
+    })
+
+    assert result.startswith("ERROR:")
+    assert "relation_type" in result
+
+
+def test_create_relation_prechecks_missing_entities_before_insert(monkeypatch):
+    queries: list[str] = []
+
+    class FakeCursor:
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+        def execute(self, query, params=None):
+            queries.append(query)
+
+        def fetchall(self):
+            return []
+
+    class FakeConn:
+        def cursor(self, *args, **kwargs):
+            return FakeCursor()
+
+    @contextmanager
+    def fake_get_conn():
+        yield FakeConn()
+
+    monkeypatch.setattr(tools, "get_conn", fake_get_conn)
+    missing_from = str(uuid.uuid4())
+
+    result = _invoke_tool(tools.create_relation, {
+        "from_entity_id": missing_from,
+        "to_entity_id": str(uuid.uuid4()),
+        "relation_type": "supports",
+        "relevance_score": 0.7,
+        "description": None,
+    })
+
+    assert result == f"ERROR: entity {missing_from} not found"
+    assert len(queries) == 1
+    assert "SELECT id FROM entities" in queries[0]
diff --git a/tests/test_memory_sql.py b/tests/test_memory_sql.py
new file mode 100644
index 0000000..1d8b04d
--- /dev/null
+++ b/tests/test_memory_sql.py
@@ -0,0 +1,41 @@
+"""Targeted tests for read-only SQL execution and activity logging."""
+from __future__ import annotations
+
+import uuid
+
+import requests
+
+
+def _recent_sql_log_details(api: str, limit: int = 20) -> list[dict]:
+    r = requests.get(f"{api}/api/v1/memory/log", params={"operation": "sql_query", "limit": limit}, timeout=10)
+    assert r.status_code == 200, f"activity log query failed: {r.status_code} {r.text[:200]}"
+    return [entry.get("details") or {} for entry in r.json()]
+
+
+def test_read_only_sql_success_logs_in_separate_transaction(api):
+    marker = f"pytest_sql_{uuid.uuid4().hex}"
+    query = f"SELECT 1 AS ok /* {marker} */"
+
+    r = requests.post(f"{api}/api/v1/memory/sql", json={"query": query}, timeout=10)
+
+    assert r.status_code == 200, f"sql query failed: {r.status_code} {r.text[:200]}"
+    body = r.json()
+    assert body["columns"] == ["ok"]
+    assert body["rows"] == [[1]]
+    assert body["row_count"] == 1
+
+    details = _recent_sql_log_details(api)
+    assert any(d.get("query") == query and d.get("rows") == 1 for d in details)
+
+
+def test_read_only_sql_failure_logs_in_separate_transaction(api):
+    marker = f"pytest_sql_{uuid.uuid4().hex}"
+    query = f"SELECT definitely_missing_column /* {marker} */"
+
+    r = requests.post(f"{api}/api/v1/memory/sql", json={"query": query}, timeout=10)
+
+    assert r.status_code == 400
+    assert "Query error" in r.text
+
+    details = _recent_sql_log_details(api)
+    assert any(d.get("query") == query and "error" in d for d in details)

From 75be7f7b7789e75b127ba6635c71751e6d5b3845 Mon Sep 17 00:00:00 2001
From: Nikita Bige <wargloom@gmail.com>
Date: Sun, 3 May 2026 23:15:35 +0300
Subject: [PATCH 4/5] Add agent save fast path

---
 braindb/agent/fast_path.py    | 47 +++++++++++++++++++
 braindb/routers/agent.py      | 45 +++++++++++++++---
 tests/test_agent.py           |  9 ++++
 tests/test_agent_fast_path.py | 86 +++++++++++++++++++++++++++++++++++
 4 files changed, 180 insertions(+), 7 deletions(-)
 create mode 100644 braindb/agent/fast_path.py
 create mode 100644 tests/test_agent_fast_path.py

diff --git a/braindb/agent/fast_path.py b/braindb/agent/fast_path.py
new file mode 100644
index 0000000..a258cb0
--- /dev/null
+++ b/braindb/agent/fast_path.py
@@ -0,0 +1,47 @@
+"""Deterministic fast paths for simple BrainDB agent requests."""
+import re
+from typing import Any
+
+from braindb.agent.tools import _save_fact_impl, _save_rule_impl
+
+_SAVE_RE = re.compile(r"^\s*Save:\s+(?P<content>.+?)\s*$", re.IGNORECASE | re.DOTALL)
+_SAVE_RULE_RE = re.compile(r"^\s*Save as rule:\s+(?P<content>.+?)\s*$", re.IGNORECASE | re.DOTALL)
+_MAX_FAST_PATH_CHARS = 2000
+
+
+def _content_is_safe_for_fast_path(content: str) -> bool:
+    return bool(content) and "?" not in content and len(content) <= _MAX_FAST_PATH_CHARS
+
+
+def try_fast_path(query: str) -> dict[str, Any] | None:
+    """Handle simple save requests without invoking the LLM agent loop."""
+    rule_match = _SAVE_RULE_RE.match(query)
+    if rule_match:
+        content = rule_match.group("content").strip()
+        if not _content_is_safe_for_fast_path(content):
+            return None
+        answer = _save_rule_impl(
+            content=content,
+            keywords=[],
+            importance=0.8,
+        )
+        status = "fast_path_error" if answer.startswith("ERROR:") else "fast_path"
+        return {"answer": answer, "max_turns": 0, "status": status}
+
+    save_match = _SAVE_RE.match(query)
+    if save_match:
+        content = save_match.group("content").strip()
+        if not _content_is_safe_for_fast_path(content):
+            return None
+        answer = _save_fact_impl(
+            content=content,
+            keywords=[],
+            source="user-stated",
+            certainty=0.9,
+            importance=0.7,
+            notes="Saved via agent fast path.",
+        )
+        status = "fast_path_error" if answer.startswith("ERROR:") else "fast_path"
+        return {"answer": answer, "max_turns": 0, "status": status}
+
+    return None
diff --git a/braindb/routers/agent.py b/braindb/routers/agent.py
index 3337bf2..68f8218 100644
--- a/braindb/routers/agent.py
+++ b/braindb/routers/agent.py
@@ -6,13 +6,16 @@
 its internal tools and returns a summary.
 """
 import logging
+from typing import Any
 
+from agents.exceptions import MaxTurnsExceeded
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
 
 from braindb.agent.agent import run_agent_query
-from braindb.db import get_conn
-from braindb.services.activity_log import log_activity
+from braindb.agent.fast_path import try_fast_path
+from braindb.config import settings
+from braindb.services.activity_log import log_activity_in_new_transaction
 
 logger = logging.getLogger(__name__)
 
@@ -24,6 +27,11 @@ class AgentQueryRequest(BaseModel):
     max_turns: int | None = Field(default=None, ge=1, le=60)
 
 
+def _log_agent_query(query: str, details: dict[str, Any]) -> None:
+    payload = {"query": query[:500], **details}
+    log_activity_in_new_transaction("agent_query", details=payload)
+
+
 @router.post("/query")
 async def agent_query(body: AgentQueryRequest):
     """Run a natural-language query through the BrainDB agent.
@@ -31,13 +39,36 @@ async def agent_query(body: AgentQueryRequest):
     When AGENT_VERBOSE=true is set in the server environment, every tool call
     is logged to stdout and visible via `docker logs braindb_api`.
     """
+    turns = body.max_turns or settings.agent_max_turns
+    fast_path_result = try_fast_path(body.query)
+    if fast_path_result is not None:
+        _log_agent_query(body.query, {
+            "max_turns": 0,
+            "status": fast_path_result.get("status"),
+        })
+        return fast_path_result
+
     try:
         result = await run_agent_query(body.query, max_turns=body.max_turns)
-        with get_conn() as conn:
-            log_activity(conn, "agent_query", details={
-                "query": body.query[:500],
-                "max_turns": result.get("max_turns"),
-            })
+        result.setdefault("status", "ok")
+        _log_agent_query(body.query, {
+            "max_turns": result.get("max_turns"),
+            "status": result.get("status"),
+        })
+        return result
+    except MaxTurnsExceeded as e:
+        logger.warning("Agent query exceeded max_turns=%s: %s", turns, e)
+        result = {
+            "answer": f"Agent exceeded max_turns={turns} before calling submit_result.",
+            "max_turns": turns,
+            "turns_used": turns,
+            "status": "max_turns_exceeded",
+        }
+        _log_agent_query(body.query, {
+            "max_turns": turns,
+            "turns_used": turns,
+            "status": "max_turns_exceeded",
+        })
         return result
     except Exception as e:
         logger.exception("Agent query failed")
diff --git a/tests/test_agent.py b/tests/test_agent.py
index e7f3ebd..72d02e3 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -49,3 +49,12 @@ def test_agent_endpoint_rejects_missing_query(api):
         timeout=10,
     )
     assert r.status_code in (400, 422), f"expected 4xx for missing query, got {r.status_code}"
+
+
+@pytest.mark.unit
+def test_agent_builder_stops_at_submit_result():
+    from braindb.agent.agent import create_braindb_agent
+
+    agent = create_braindb_agent()
+
+    assert agent.tool_use_behavior["stop_at_tool_names"] == ["submit_result"]
diff --git a/tests/test_agent_fast_path.py b/tests/test_agent_fast_path.py
new file mode 100644
index 0000000..a5e5e08
--- /dev/null
+++ b/tests/test_agent_fast_path.py
@@ -0,0 +1,86 @@
+"""Agent endpoint fast-path tests."""
+import re
+
+import requests
+
+
+_ID_RE = re.compile(r"id=([0-9a-f-]{36})")
+
+
+def _extract_entity_id(answer: str) -> str:
+    match = _ID_RE.search(answer)
+    assert match, f"missing entity id in answer: {answer!r}"
+    return match.group(1)
+
+
+def test_agent_fast_path_saves_fact(api, test_tag, created_entities):
+    content = f"{test_tag} user prefers deterministic BrainDB save fast paths"
+    response = requests.post(
+        f"{api}/api/v1/agent/query",
+        json={"query": f"Save: {content}"},
+        timeout=10,
+    )
+
+    assert response.status_code == 200, response.text
+    body = response.json()
+    assert body["status"] == "fast_path"
+    assert body["max_turns"] == 0
+    entity_id = _extract_entity_id(body["answer"])
+    created_entities.append(entity_id)
+
+    entity = requests.get(f"{api}/api/v1/entities/{entity_id}", timeout=10)
+    assert entity.status_code == 200, entity.text
+    payload = entity.json()
+    assert payload["entity_type"] == "fact"
+    assert payload["content"] == content
+
+
+def test_agent_fast_path_saves_rule(api, test_tag, created_entities):
+    content = f"{test_tag} always prefer deterministic fast paths for simple memory saves"
+    response = requests.post(
+        f"{api}/api/v1/agent/query",
+        json={"query": f"Save as rule: {content}"},
+        timeout=10,
+    )
+
+    assert response.status_code == 200, response.text
+    body = response.json()
+    assert body["status"] == "fast_path"
+    assert body["max_turns"] == 0
+    entity_id = _extract_entity_id(body["answer"])
+    created_entities.append(entity_id)
+
+    entity = requests.get(f"{api}/api/v1/entities/{entity_id}", timeout=10)
+    assert entity.status_code == 200, entity.text
+    payload = entity.json()
+    assert payload["entity_type"] == "rule"
+    assert payload["content"] == content
+
+
+def test_agent_save_question_bypasses_fast_path(api):
+    response = requests.post(
+        f"{api}/api/v1/agent/query",
+        json={"query": "Save: what does the user prefer?", "max_turns": 1},
+        timeout=80,
+    )
+
+    assert response.status_code == 200, response.text
+    assert response.json().get("status") != "fast_path"
+
+
+def test_agent_max_turns_returns_structured_status(api):
+    response = requests.post(
+        f"{api}/api/v1/agent/query",
+        json={
+            "query": "Recall everything about the user, then save a thought summarizing it, then connect them with relations.",
+            "max_turns": 1,
+        },
+        timeout=80,
+    )
+
+    assert response.status_code == 200, response.text
+    body = response.json()
+    assert body["status"] == "max_turns_exceeded"
+    assert body["max_turns"] == 1
+    assert body["turns_used"] == 1
+    assert body["answer"]

From 9e240f6b26ebf10fe1ed463ccc027bb482b1642a Mon Sep 17 00:00:00 2001
From: Nikita Bige <wargloom@gmail.com>
Date: Sun, 3 May 2026 23:17:01 +0300
Subject: [PATCH 5/5] Validate configured LLM profiles

---
 braindb/config.py             | 3 ++-
 tests/test_config_profiles.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/braindb/config.py b/braindb/config.py
index 40d2fdd..18c7ef8 100644
--- a/braindb/config.py
+++ b/braindb/config.py
@@ -83,7 +83,8 @@ def _env_setting(self, env_name: str) -> str:
 
     @property
     def resolved_agent_model(self) -> str:
-        model = self.agent_model or self._active_llm_profile["model"]
+        profile = self._active_llm_profile
+        model = self.agent_model or profile["model"]
         if not model:
             raise ValueError(
                 f"AGENT_MODEL must be set for LLM_PROFILE={self.llm_profile!r}; "
diff --git a/tests/test_config_profiles.py b/tests/test_config_profiles.py
index dabd391..dff2279 100644
--- a/tests/test_config_profiles.py
+++ b/tests/test_config_profiles.py
@@ -7,7 +7,7 @@
 
 
 def test_codex_profile_resolves_default_model():
-    settings = Settings(_env_file=None, llm_profile="codex", openai_api_key="test-key")
+    settings = Settings(_env_file=None, llm_profile="codex", agent_model="", openai_api_key="test-key")
 
     assert settings.resolved_agent_model == "openai/gpt-5.3-codex-spark"
 
@@ -37,7 +37,7 @@ def test_agent_model_override_wins_for_codex_profile():
 
 
 def test_unknown_profile_error_lists_known_profiles():
-    settings = Settings(_env_file=None, llm_profile="missing")
+    settings = Settings(_env_file=None, llm_profile="missing", agent_model="")
 
     with pytest.raises(ValueError, match="openai_compatible"):
         _ = settings.resolved_agent_model