From 94d0a2c0349166e06e9f426a5c5d64044cd51a36 Mon Sep 17 00:00:00 2001 From: Nikita Bige Date: Sun, 3 May 2026 23:15:18 +0300 Subject: [PATCH 1/5] Add local stack management workflow --- .dockerignore | 34 ++++ .gitignore | 2 + docker-compose.yml | 34 +++- scripts/braindb-manage.sh | 357 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 425 insertions(+), 2 deletions(-) create mode 100644 .dockerignore create mode 100755 scripts/braindb-manage.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..165786d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,34 @@ +# Secrets and local environment +.env +.env.* +!.env.example + +# VCS and local agent/editor state +.git/ +.gitignore +.claude/ +.codex +.codex/ +.vscode/ +.idea/ + +# Python/test/build artifacts +__pycache__/ +*.py[cod] +*$py.class +*.egg-info/ +.pytest_cache/ +.mypy_cache/ +.coverage +dist/ +build/ + +# Runtime/user data +data/sources/* +!data/sources/.gitkeep +!data/sources/README.md + +# OS noise +.DS_Store +Thumbs.db +*.stackdump diff --git a/.gitignore b/.gitignore index 005c3b6..2ba4b43 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ # Claude Code — settings.json, scheduled_tasks.lock, and any future state .claude/ +.codex +.codex/ # Python __pycache__/ diff --git a/docker-compose.yml b/docker-compose.yml index 491fb6d..e7ec46b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,32 @@ services: + postgres: + image: pgvector/pgvector:pg16 + container_name: braindb_postgres + restart: unless-stopped + networks: + local-network: + aliases: + - braindb_postgres + environment: + POSTGRES_USER: ${POSTGRES_USER:-braindb} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-braindb} + POSTGRES_DB: ${POSTGRES_DB:-braindb} + volumes: + - postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"] + interval: 5s + timeout: 5s + retries: 12 + api: build: . + image: braindb:local container_name: braindb_api restart: unless-stopped + depends_on: + postgres: + condition: service_healthy networks: - local-network environment: @@ -11,6 +35,9 @@ services: HF_TOKEN: ${HF_TOKEN:-} LLM_PROFILE: ${LLM_PROFILE:-deepinfra} AGENT_MODEL: ${AGENT_MODEL:-} + AGENT_BASE_URL: ${AGENT_BASE_URL:-} + AGENT_API_KEY: ${AGENT_API_KEY:-} + OPENAI_API_KEY: ${OPENAI_API_KEY:-} NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY:-} DEEPINFRA_API_KEY: ${DEEPINFRA_API_KEY:-} VLLM_API_KEY: ${VLLM_API_KEY:-} @@ -25,10 +52,10 @@ services: volumes: - .:/app command: > - sh -c "alembic upgrade head && uvicorn braindb.main:app --host 0.0.0.0 --port ${API_PORT:-8000} --reload" + sh -c "until python -c 'import os, psycopg2; conn = psycopg2.connect(os.environ[\"DATABASE_URL\"]); conn.close()'; do echo waiting for database; sleep 2; done; alembic upgrade head && uvicorn braindb.main:app --host 0.0.0.0 --port ${API_PORT:-8000} --reload" watcher: - build: . + image: braindb:local container_name: braindb_watcher restart: unless-stopped depends_on: @@ -45,3 +72,6 @@ services: networks: local-network: external: true + +volumes: + postgres-data: diff --git a/scripts/braindb-manage.sh b/scripts/braindb-manage.sh new file mode 100755 index 0000000..b1d8350 --- /dev/null +++ b/scripts/braindb-manage.sh @@ -0,0 +1,357 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "$SCRIPT_DIR/.." && pwd)" +COMPOSE_FILE="$REPO_ROOT/docker-compose.yml" +ENV_FILE="$REPO_ROOT/.env" +ENV_EXAMPLE="$REPO_ROOT/.env.example" +DOCKER_BIN="${DOCKER_BIN:-docker}" + +log() { printf '%s\n' "$*"; } +warn() { printf 'warn: %s\n' "$*" >&2; } +die() { printf 'error: %s\n' "$*" >&2; exit 1; } + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || die "missing required command: $1" +} + +compose() { + "$DOCKER_BIN" compose -f "$COMPOSE_FILE" "$@" +} + +ensure_env_file() { + if [[ ! -f "$ENV_FILE" ]]; then + [[ -f "$ENV_EXAMPLE" ]] || die "missing .env.example; cannot create .env" + cp "$ENV_EXAMPLE" "$ENV_FILE" + warn "created .env from .env.example" + fi +} + +env_value() { + local key="$1" + local default_value="${2:-}" + [[ -f "$ENV_FILE" ]] || { printf '%s' "$default_value"; return; } + + python - "$ENV_FILE" "$key" "$default_value" <<'PY' +import sys +from pathlib import Path + +path = Path(sys.argv[1]) +key = sys.argv[2] +default = sys.argv[3] + +for line in path.read_text().splitlines(): + if not line or line.lstrip().startswith('#') or '=' not in line: + continue + k, v = line.split('=', 1) + if k.strip() == key: + print(v.strip()) + raise SystemExit(0) + +print(default) +PY +} + +env_set() { + local key="$1" + local value="$2" + + python - "$ENV_FILE" "$key" "$value" <<'PY' +import sys +from pathlib import Path + +path = Path(sys.argv[1]) +key = sys.argv[2] +value = sys.argv[3] + +lines = path.read_text().splitlines() +updated = False +out = [] + +for line in lines: + if line.startswith(f"{key}="): + out.append(f"{key}={value}") + updated = True + else: + out.append(line) + +if not updated: + out.append(f"{key}={value}") + +path.write_text("\n".join(out) + "\n") +PY +} + +database_url() { + env_value DATABASE_URL +} + +ensure_database_url() { + local url + url="$(database_url)" + [[ -n "$url" ]] || die ".env must set DATABASE_URL" +} + +ensure_network() { + "$DOCKER_BIN" network inspect local-network >/dev/null 2>&1 || "$DOCKER_BIN" network create local-network >/dev/null +} + +health_port() { + env_value API_PORT 8100 +} + +health_url() { + printf 'http://localhost:%s/health' "$(health_port)" +} + +wait_for_health() { + require_cmd curl + local url attempts sleep_s response + url="$(health_url)" + attempts=30 + sleep_s=2 + + while (( attempts > 0 )); do + response="$(curl -fsS "$url" 2>/dev/null || true)" + if [[ "$response" == *'"status":"ok"'* ]]; then + log "health: ok ($url)" + return 0 + fi + sleep "$sleep_s" + ((attempts--)) + done + + warn "health check failed after waiting; try: curl -s $(health_url)" + return 1 +} + +openai_compatible_base_url() { + env_value AGENT_BASE_URL +} + +openai_compatible_root_url() { + local base + base="$(openai_compatible_base_url)" + [[ -n "$base" ]] || return 1 + base="${base%/}" + case "$base" in + */v1) + printf '%s\n' "${base%/v1}" + ;; + *) + printf '%s\n' "$base" + ;; + esac +} + +fetch_openai_compatible_models() { + require_cmd curl + local base root payload + base="$(openai_compatible_base_url)" + [[ -n "$base" ]] || return 1 + + root="$(openai_compatible_root_url)" + + payload="$( + curl -fsS --max-time 4 "$base/models" 2>/dev/null || \ + curl -fsS --max-time 4 "$root/api/tags" 2>/dev/null || \ + curl -fsS --max-time 4 "$root/v1/models" 2>/dev/null || true + )" + + [[ -n "$payload" ]] || return 1 + + python - "$payload" <<'PY' +import json +import sys + +raw = sys.argv[1] +try: + data = json.loads(raw) +except Exception: + raise SystemExit(1) + +models = [] +if isinstance(data, dict): + if isinstance(data.get('models'), list): + for item in data['models']: + if isinstance(item, dict): + name = item.get('name') or item.get('model') or item.get('id') + if name: + models.append(name) + if isinstance(data.get('data'), list): + for item in data['data']: + if isinstance(item, dict): + name = item.get('id') or item.get('name') + if name: + models.append(name) + +seen = set() +for model in models: + model = model.strip() + if not model: + continue + if not model.startswith('openai/'): + model = f'openai/{model}' + if model not in seen: + seen.add(model) + print(model) +PY +} + +maybe_set_openai_compatible_model() { + local existing models count model + existing="$(env_value AGENT_MODEL)" + [[ -n "$existing" ]] && return 0 + + if ! models="$(fetch_openai_compatible_models)"; then + models="" + fi + if [[ -z "$models" ]]; then + die "LLM_PROFILE=openai_compatible/local_ollama needs AGENT_MODEL, and auto-discovery failed. Run: ./scripts/braindb-manage.sh models" + fi + + count=0 + while IFS= read -r model; do + [[ -n "$model" ]] && ((count++)) + done <<<"$models" + if [[ "$count" == "1" ]]; then + model="$(printf '%s\n' "$models")" + env_set AGENT_MODEL "$model" + log "set AGENT_MODEL=$model" + return 0 + fi + + die "LLM_PROFILE=openai_compatible/local_ollama needs AGENT_MODEL and discovery found multiple models. Run: ./scripts/braindb-manage.sh models; then set AGENT_MODEL=openai/ in .env" +} + +warn_if_unconfigured() { + local database_url llm_profile deepinfra_key nim_key openai_key agent_model agent_base_url + database_url="$(env_value DATABASE_URL)" + llm_profile="$(env_value LLM_PROFILE deepinfra)" + deepinfra_key="$(env_value DEEPINFRA_API_KEY)" + nim_key="$(env_value NVIDIA_NIM_API_KEY)" + openai_key="$(env_value OPENAI_API_KEY)" + agent_model="$(env_value AGENT_MODEL)" + agent_base_url="$(env_value AGENT_BASE_URL)" + + case "$database_url" in + ""|postgresql://user:password@host:5432/braindb) + warn "DATABASE_URL still looks like the example; update .env before expecting a successful start" + ;; + esac + + case "$llm_profile" in + deepinfra) + [[ -n "$deepinfra_key" ]] || warn "LLM_PROFILE=deepinfra but DEEPINFRA_API_KEY is empty" + ;; + nim) + [[ -n "$nim_key" ]] || warn "LLM_PROFILE=nim but NVIDIA_NIM_API_KEY is empty" + ;; + codex) + [[ -n "$openai_key" ]] || warn "LLM_PROFILE=codex but OPENAI_API_KEY is empty" + ;; + openai_compatible|local_ollama) + [[ -n "$agent_base_url" ]] || die "LLM_PROFILE=openai_compatible/local_ollama requires AGENT_BASE_URL" + if [[ -z "$agent_model" ]]; then + maybe_set_openai_compatible_model + fi + ;; + esac +} + +print_openai_compatible_models() { + local models + if ! models="$(fetch_openai_compatible_models)"; then + die "could not reach OpenAI-compatible models endpoint from AGENT_BASE_URL" + fi + [[ -n "$models" ]] || die "no OpenAI-compatible models found at AGENT_BASE_URL" + printf '%s\n' "$models" +} + +start_stack() { + ensure_env_file + ensure_database_url + warn_if_unconfigured + ensure_network + compose up -d --build + wait_for_health +} + +update_stack() { + ensure_env_file + ensure_database_url + warn_if_unconfigured + ensure_network + compose up -d --build --force-recreate + wait_for_health +} + +status_stack() { + ensure_env_file + ensure_database_url + ensure_network + compose ps + if command -v curl >/dev/null 2>&1; then + curl -fsS "$(health_url)" || true + printf '\n' + fi +} + +logs_stack() { + ensure_env_file + ensure_database_url + ensure_network + compose logs -f --tail="${TAIL_LINES:-200}" "$@" +} + +usage() { + cat <<'EOF' +Usage: braindb-manage.sh + +Commands: + start, bootstrap, up Ensure .env/network and start the stack + update, upgrade Recreate services + status Show compose status and health + logs [service...] Follow service logs (default tail=200) + models List models from AGENT_BASE_URL + help Show this help + +Env overrides: + DOCKER_BIN=docker|podman Docker-compatible CLI to use + TAIL_LINES=200 Lines shown by logs +EOF +} + +main() { + require_cmd "$DOCKER_BIN" + local cmd="${1:-help}" + shift || true + + case "$cmd" in + start|bootstrap|up) + start_stack "$@" + ;; + update|upgrade) + update_stack "$@" + ;; + status) + status_stack "$@" + ;; + logs) + logs_stack "$@" + ;; + models) + ensure_env_file + print_openai_compatible_models + ;; + help|-h|--help) + usage + ;; + *) + die "unknown command: $cmd (try: help)" + ;; + esac +} + +main "$@" From b8f2bf8518ace21b5fe034d6605678ec983d7e18 Mon Sep 17 00:00:00 2001 From: Nikita Bige Date: Sun, 3 May 2026 23:15:24 +0300 Subject: [PATCH 2/5] Add OpenAI-compatible agent profiles --- .env.example | 8 ++-- BRAINDB_GUIDE.md | 3 +- CLAUDE.md | 4 +- CONTRIBUTING.md | 6 +-- README.md | 42 +++++++++++++++++---- braindb/config.py | 56 ++++++++++++++++++++++++---- tests/test_config_profiles.py | 70 +++++++++++++++++++++++++++++++++++ 7 files changed, 164 insertions(+), 25 deletions(-) create mode 100644 tests/test_config_profiles.py diff --git a/.env.example b/.env.example index c26571d..47e5c14 100644 --- a/.env.example +++ b/.env.example @@ -17,8 +17,10 @@ HF_TOKEN= # (currently: nim, deepinfra, vllm_workstation). LLM_PROFILE=deepinfra -# Provider API keys — fill in whichever profile you're using. -# Get a NIM key at https://build.nvidia.com/, a DeepInfra key at https://deepinfra.com/ +# Provider API keys — fill in whichever hosted profile you're using. +# Get an OpenAI key for Codex, a NIM key at https://build.nvidia.com/, +# or a DeepInfra key at https://deepinfra.com/ +OPENAI_API_KEY= NVIDIA_NIM_API_KEY= DEEPINFRA_API_KEY= @@ -28,7 +30,7 @@ DEEPINFRA_API_KEY= VLLM_API_KEY= # Optional: override the profile's default model string (e.g. to try a smaller variant). -# Leave blank to use the profile's built-in default. +# Leave blank to use the profile's built-in default. Required for openai_compatible/local_ollama. AGENT_MODEL= # Agent verbosity — when true, every tool call is logged to stdout diff --git a/BRAINDB_GUIDE.md b/BRAINDB_GUIDE.md index 8917c31..5c8c2b0 100644 --- a/BRAINDB_GUIDE.md +++ b/BRAINDB_GUIDE.md @@ -306,7 +306,7 @@ curl -X POST http://localhost:8000/api/v1/entities/datasources/ingest \ ### BrainDB Agent — natural language queries -`POST /api/v1/agent/query` — instead of orchestrating individual API calls, send a plain English request and let BrainDB's internal agent handle it. The agent uses the OpenAI Agents SDK with LiteLLM (provider pluggable via `LLM_PROFILE` — default `deepinfra`, `nim` also supported) and has access to all 21 BrainDB operations as function tools. +`POST /api/v1/agent/query` — instead of orchestrating individual API calls, send a plain English request and let BrainDB's internal agent handle it. The agent uses the OpenAI Agents SDK with LiteLLM (provider pluggable via `LLM_PROFILE` — default `deepinfra`, with `nim`, `codex`, and generic OpenAI-compatible local endpoints also supported) and has access to all 21 BrainDB operations as function tools. ```bash curl -X POST http://localhost:8000/api/v1/agent/query \ @@ -340,6 +340,7 @@ The agent has these tools internally: `recall_memory`, `quick_search`, `save_fac - **Self-hosted vLLM**: set `LLM_PROFILE=vllm_workstation` for a vLLM server bound to the Docker host's loopback at `:8002`. No API key needed if the server runs without auth. See [CONTRIBUTING.md](CONTRIBUTING.md) for how to add your own self-hosted profile. - Profiles live in `braindb/config.py::_LLM_PROFILES`. Add new providers there (e.g. `together`, `openai`) by adding a dict entry — no code change required. - Optional override: set `AGENT_MODEL=` in `.env` to use a non-default model for the active profile. +- Optional auth override: set `AGENT_API_KEY=` only if your OpenAI-compatible endpoint requires auth; copilot-api and Ollama can run without it when local auth is disabled. **Verbose logging**: set `AGENT_VERBOSE=true` in `.env` to log every tool call to stdout (visible via `docker logs braindb_api -f`). The HTTP response stays clean — only `answer` and `max_turns`. diff --git a/CLAUDE.md b/CLAUDE.md index f79b079..dd61c64 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -127,7 +127,7 @@ curl -s -X DELETE http://localhost:8000/api/v1/entities/ **Direct API** (what's shown above) — call individual endpoints yourself. Full control, more verbose context. Good when you want to be precise about what's saved or recalled. -**Agent endpoint** — `POST /api/v1/agent/query` — send a natural language request and let BrainDB's internal agent handle it. The agent (LiteLLM with pluggable provider via `LLM_PROFILE` — default `deepinfra/google/gemma-4-31B-it`, NIM also supported) has all 21 BrainDB operations as tools. Cleaner conversation context, but slower (5-30 seconds for a query). +**Agent endpoint** — `POST /api/v1/agent/query` — send a natural language request and let BrainDB's internal agent handle it. The agent (LiteLLM with pluggable provider via `LLM_PROFILE` — default `deepinfra/google/gemma-4-31B-it`, NIM, Codex, and generic OpenAI-compatible endpoints such as copilot-api or Ollama also supported) has all 21 BrainDB operations as tools. Cleaner conversation context, but slower (5-30 seconds for a query). ```bash # Recall via the agent @@ -156,7 +156,7 @@ When debugging the agent: set `AGENT_VERBOSE=true` in `.env` and watch `docker l ## Important Notes -- `.env` contains real DB credentials and provider API keys (`DEEPINFRA_API_KEY`, `NVIDIA_NIM_API_KEY`, etc.) — **never commit it**, it is in `.gitignore`. Active provider is picked by `LLM_PROFILE` (see `braindb/config.py::_LLM_PROFILES`). +- `.env` contains real DB credentials and provider API keys (`DEEPINFRA_API_KEY`, `NVIDIA_NIM_API_KEY`, `OPENAI_API_KEY`, `AGENT_API_KEY`, etc.) — **never commit it**, it is in `.gitignore`. Active provider is picked by `LLM_PROFILE` (see `braindb/config.py::_LLM_PROFILES`). - Always-on rules (priority 100, `always_on: true`) are returned on every `/memory/context` call - `notes` field on any entity or relation is for running commentary — append observations over time - Keywords are stored as both a `TEXT[]` column on the entity AND as separate keyword entities linked via `tagged_with` relations (the keyword entities carry the embeddings for semantic search) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 396cec8..102184d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,7 @@ Prerequisites: Docker Desktop (or any Docker Engine), Python 3.12, a Postgres 16 git clone braindb cd braindb cp .env.example .env -# edit .env — set DATABASE_URL, pick an LLM_PROFILE, fill in the matching API key +# edit .env — set DATABASE_URL, pick an LLM_PROFILE, fill in the matching API key or OpenAI-compatible endpoint docker network create local-network # one-time; docker-compose expects this docker compose up -d --build @@ -45,8 +45,8 @@ LiteLLM does the heavy lifting — providers are selected by a prefix in the mod "api_key_env": "MY_PROVIDER_API_KEY", }, ``` -2. Add `MY_PROVIDER_API_KEY=` to [`.env.example`](.env.example). -3. Add the env passthrough to [`docker-compose.yml`](docker-compose.yml) under the `api` service. +2. Add `MY_PROVIDER_API_KEY=` to [`.env.example`](.env.example) if the provider needs auth. +3. Add the env passthrough to [`docker-compose.yml`](docker-compose.yml) under the `api` service. OpenAI-compatible endpoints can use `LLM_PROFILE=openai_compatible` plus `AGENT_BASE_URL` / `AGENT_API_KEY` variables. 4. (Optional) Document the provider in the README and BRAINDB_GUIDE. No other code changes required — the agent resolves model and key through `settings.resolved_agent_model` and `settings.resolved_api_key`, which read the active profile. diff --git a/README.md b/README.md index 79854dc..eb33c7b 100644 --- a/README.md +++ b/README.md @@ -72,18 +72,28 @@ Any reachable hostname/IP works — the connecting user just needs network acces ### 4. Pick an LLM provider (for the internal agent) -The agent talks to any LiteLLM-supported backend. BrainDB ships with two profiles pre-configured: **DeepInfra** (default, fast, paid) and **NVIDIA NIM** (free tier, can be flaky). +The agent talks to any LiteLLM-supported backend. BrainDB ships with four profiles pre-configured: **DeepInfra** (default, fast, paid), **NVIDIA NIM** (free tier, can be flaky), **Codex** (`gpt-5.3-codex-spark` via OpenAI routing), and **openai_compatible** for local OpenAI-compatible APIs such as copilot-api or Ollama (`local_ollama` remains as a legacy alias). In `.env`: ``` -LLM_PROFILE=deepinfra # or 'nim' — default is 'deepinfra' +LLM_PROFILE=deepinfra # or 'codex'/'nim'/'openai_compatible' — default is 'deepinfra' DEEPINFRA_API_KEY=... # if profile=deepinfra — get from https://deepinfra.com/ NVIDIA_NIM_API_KEY=... # if profile=nim — get from https://build.nvidia.com/ +OPENAI_API_KEY=... # if profile=codex — OpenAI API key for Codex ``` -Only the key matching your chosen profile needs to be filled. Leave the other blank or absent. +For a local OpenAI-compatible server such as `copilot-api`: -Adding a third provider (Together, OpenAI, local vLLM, whatever) is a two-line entry in [`braindb/config.py::_LLM_PROFILES`](braindb/config.py) + an env var — no other code changes. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for the recipe. +``` +LLM_PROFILE=openai_compatible +AGENT_BASE_URL=http://:4141/v1 # copilot-api default port +AGENT_MODEL=openai/gpt-5-mini +AGENT_API_KEY= # optional; only set if your endpoint requires auth +``` + +Only the key matching your chosen hosted profile needs to be filled. Leave the other blank or absent. For OpenAI-compatible local endpoints with auth disabled, leave `AGENT_API_KEY` blank. + +Adding another hosted provider (Together, OpenAI, local vLLM, whatever) is usually a small entry in [`braindb/config.py::_LLM_PROFILES`](braindb/config.py) + env passthrough — see [`CONTRIBUTING.md`](CONTRIBUTING.md) for the recipe. ### 5. Create the Docker network, then bring the stack up @@ -110,6 +120,19 @@ API at `http://localhost:8000`. Swagger UI at `http://localhost:8000/docs`. Data Drop a markdown file into `data/sources/` and the watcher sidecar picks it up within ~7 seconds — see [File Ingestion](#file-ingestion) below. +### Operational helper + +For a safer one-command workflow, use `scripts/braindb-manage.sh`: + +```bash +./scripts/braindb-manage.sh start +./scripts/braindb-manage.sh update +./scripts/braindb-manage.sh status +./scripts/braindb-manage.sh logs api +``` + +It creates `.env` from `.env.example` if needed, ensures the `local-network` Docker network exists, starts/recreates the Compose services, and checks `http://localhost:8000/health`. + --- ## Key Endpoints @@ -162,7 +185,7 @@ Single `query` (string) still works for backward compatibility. Instead of orchestrating individual API calls, you can talk to BrainDB in plain English via `POST /api/v1/agent/query`. The agent (built on the OpenAI Agents SDK + LiteLLM) decides which tools to call and returns a summary. ```bash -curl -X POST http://localhost:8000/api/v1/agent/query \ +curl -X POST http://localhost:8100/api/v1/agent/query \ -H "Content-Type: application/json" \ -d '{"query":"What do you know about the user role and recent projects?"}' @@ -173,15 +196,18 @@ The agent has 21 tools — every single BrainDB endpoint plus `delegate_to_subag **LLM provider — pluggable via `.env`**: -`LLM_PROFILE` selects the backend. Profiles are defined in [braindb/config.py](braindb/config.py) (`_LLM_PROFILES`) — currently `deepinfra` (default, model `google/gemma-4-31B-it`) and `nim` (NVIDIA NIM, model `google/gemma-4-31b-it`). Each profile is a model-prefix + env-var pair; adding a new one is a dict entry. +`LLM_PROFILE` selects the backend. Profiles are defined in [braindb/config.py](braindb/config.py) (`_LLM_PROFILES`) — currently `deepinfra` (default, model `google/gemma-4-31B-it`), `nim` (NVIDIA NIM, model `google/gemma-4-31b-it`), `codex` (OpenAI Codex, model `gpt-5.3-codex-spark`), and `openai_compatible` (generic OpenAI-compatible `/v1` endpoints; `local_ollama` is a legacy alias). ``` -LLM_PROFILE=deepinfra # or nim — default is deepinfra +LLM_PROFILE=deepinfra # or codex/nim/openai_compatible — default is deepinfra DEEPINFRA_API_KEY=... # required if profile=deepinfra (https://deepinfra.com/) NVIDIA_NIM_API_KEY=... # required if profile=nim (https://build.nvidia.com/) +OPENAI_API_KEY=... # required if profile=codex AGENT_MODEL= # optional: override the profile's default model ``` +For copilot-api, set `AGENT_BASE_URL=http://:4141/v1` and `AGENT_MODEL=openai/gpt-5-mini`. For Ollama, use `AGENT_BASE_URL=http://:11434/v1` and an Ollama model such as `AGENT_MODEL=openai/llama3.2:3b`. `AGENT_API_KEY` is optional and only needed if your OpenAI-compatible endpoint enforces auth. + **Verbose logging**: set `AGENT_VERBOSE=true` in `.env` to log every tool call (entry args + exit elapsed/result) to stdout, visible via `docker logs braindb_api -f`. --- @@ -276,5 +302,5 @@ It's idempotent by content hash — re-calling with the same bytes returns 200 ( - PostgreSQL 16 with `pg_trgm` and `pgvector` - Alembic migrations - `sentence-transformers` + `Qwen/Qwen3-Embedding-0.6B` for keyword embeddings -- `openai-agents[litellm]` + LiteLLM for the internal agent (DeepInfra / NIM / others pluggable via `LLM_PROFILE`) +- `openai-agents[litellm]` + LiteLLM for the internal agent (DeepInfra / NIM / Codex / others pluggable via `LLM_PROFILE`) - Docker Compose — `api` + `watcher` services, external PostgreSQL diff --git a/braindb/config.py b/braindb/config.py index c27eb08..40d2fdd 100644 --- a/braindb/config.py +++ b/braindb/config.py @@ -7,6 +7,10 @@ # plus an optional base_url for self-hosted OpenAI-compatible servers (vLLM, # Ollama, llama.cpp). Adding a new provider is a dict entry, no code change. _LLM_PROFILES: dict[str, dict[str, str]] = { + "codex": { + "model": "openai/gpt-5.3-codex-spark", + "api_key_env": "OPENAI_API_KEY", + }, "nim": { "model": "nvidia_nim/google/gemma-4-31b-it", "api_key_env": "NVIDIA_NIM_API_KEY", @@ -15,12 +19,18 @@ "model": "deepinfra/google/gemma-4-31B-it", "api_key_env": "DEEPINFRA_API_KEY", }, + "openai_compatible": { + "model": "", + "api_key_env": "AGENT_API_KEY", + "default_api_key": "ollama", + }, "vllm_workstation": { "model": "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit", "api_key_env": "VLLM_API_KEY", "base_url": "http://host.docker.internal:8002/v1", }, } +_LLM_PROFILES["local_ollama"] = _LLM_PROFILES["openai_compatible"] class Settings(BaseSettings): @@ -50,27 +60,57 @@ class Settings(BaseSettings): # Agent (LiteLLM — provider selected via llm_profile) llm_profile: str = "deepinfra" agent_model: str = "" # blank = use profile's default model + agent_base_url: str = "" # OpenAI-compatible base URL, e.g. http://host:11434/v1 + agent_api_key: str = "" # optional generic key for OpenAI-compatible endpoints + openai_api_key: str = "" + deepinfra_api_key: str = "" + nvidia_nim_api_key: str = "" agent_max_turns: int = 15 agent_subagent_max_turns: int = 30 agent_verbose: bool = False + @property + def _active_llm_profile(self) -> dict[str, str]: + try: + return _LLM_PROFILES[self.llm_profile] + except KeyError as exc: + known = ", ".join(sorted(_LLM_PROFILES)) + raise ValueError(f"Unknown LLM_PROFILE={self.llm_profile!r}. Expected one of: {known}") from exc + + def _env_setting(self, env_name: str) -> str: + field_name = env_name.lower() + return getattr(self, field_name, "") or os.getenv(env_name, "") + @property def resolved_agent_model(self) -> str: - return self.agent_model or _LLM_PROFILES[self.llm_profile]["model"] + model = self.agent_model or self._active_llm_profile["model"] + if not model: + raise ValueError( + f"AGENT_MODEL must be set for LLM_PROFILE={self.llm_profile!r}; " + "for OpenAI-compatible endpoints use AGENT_MODEL=openai/ " + "(for example, openai/gpt-5-mini for copilot-api)." + ) + return model @property def resolved_api_key(self) -> str: - profile = _LLM_PROFILES[self.llm_profile] - key = os.getenv(profile["api_key_env"], "") - # Self-hosted profiles (vLLM/Ollama) may run without auth, but the - # OpenAI client still needs a non-empty key — supply a placeholder. - if not key and profile.get("base_url"): + profile = self._active_llm_profile + key = self._env_setting(profile["api_key_env"]) + if key: + return key + if "default_api_key" in profile: + return profile["default_api_key"] + if self.resolved_base_url: return "EMPTY" - return key + return "" @property def resolved_base_url(self) -> str | None: - return _LLM_PROFILES[self.llm_profile].get("base_url") + return self.agent_base_url or self._active_llm_profile.get("base_url") + + @property + def resolved_agent_base_url(self) -> str | None: + return self.resolved_base_url settings = Settings() diff --git a/tests/test_config_profiles.py b/tests/test_config_profiles.py new file mode 100644 index 0000000..dabd391 --- /dev/null +++ b/tests/test_config_profiles.py @@ -0,0 +1,70 @@ +import pytest + +from braindb.config import Settings + + +pytestmark = pytest.mark.unit + + +def test_codex_profile_resolves_default_model(): + settings = Settings(_env_file=None, llm_profile="codex", openai_api_key="test-key") + + assert settings.resolved_agent_model == "openai/gpt-5.3-codex-spark" + + +def test_codex_profile_resolves_api_key_from_field(): + settings = Settings(_env_file=None, llm_profile="codex", openai_api_key="test-key") + + assert settings.resolved_api_key == "test-key" + + +def test_codex_profile_resolves_api_key_from_environment(monkeypatch): + monkeypatch.setenv("OPENAI_API_KEY", "env-key") + settings = Settings(_env_file=None, llm_profile="codex") + + assert settings.resolved_api_key == "env-key" + + +def test_agent_model_override_wins_for_codex_profile(): + settings = Settings( + _env_file=None, + llm_profile="codex", + agent_model="openai/alternate-model", + openai_api_key="test-key", + ) + + assert settings.resolved_agent_model == "openai/alternate-model" + + +def test_unknown_profile_error_lists_known_profiles(): + settings = Settings(_env_file=None, llm_profile="missing") + + with pytest.raises(ValueError, match="openai_compatible"): + _ = settings.resolved_agent_model + + +def test_openai_compatible_profile_default_api_key(): + settings = Settings( + _env_file=None, + llm_profile="openai_compatible", + agent_model="openai/gpt-5-mini", + agent_base_url="http://localhost:4141/v1", + ) + + assert settings.resolved_agent_model == "openai/gpt-5-mini" + assert settings.resolved_api_key == "ollama" + assert settings.resolved_agent_base_url == "http://localhost:4141/v1" + + +def test_local_ollama_alias_matches_openai_compatible(): + from braindb.config import _LLM_PROFILES + + assert _LLM_PROFILES["local_ollama"] is _LLM_PROFILES["openai_compatible"] + settings = Settings( + _env_file=None, + llm_profile="local_ollama", + agent_model="openai/llama3.2:3b", + ) + + assert settings.resolved_agent_model == "openai/llama3.2:3b" + assert settings.resolved_api_key == "ollama" From 969a55e57b470e45964f88878bc1e36f65449071 Mon Sep 17 00:00:00 2001 From: Nikita Bige Date: Sun, 3 May 2026 23:15:30 +0300 Subject: [PATCH 3/5] Harden agent tools and SQL logging --- braindb/agent/tools.py | 138 ++++++++++++++++++++++----- braindb/routers/memory.py | 38 ++++---- braindb/services/activity_log.py | 22 +++++ pyproject.toml | 3 + tests/conftest.py | 6 +- tests/test_agent_tools_validation.py | 108 +++++++++++++++++++++ tests/test_memory_sql.py | 41 ++++++++ 7 files changed, 311 insertions(+), 45 deletions(-) create mode 100644 tests/test_agent_tools_validation.py create mode 100644 tests/test_memory_sql.py diff --git a/braindb/agent/tools.py b/braindb/agent/tools.py index 3bc3b6f..a2ae145 100644 --- a/braindb/agent/tools.py +++ b/braindb/agent/tools.py @@ -16,17 +16,19 @@ import json import logging import time -from typing import Optional +from typing import Optional, get_args from uuid import UUID +import psycopg2 import psycopg2.extras from agents import function_tool from braindb.config import settings from braindb.db import get_conn +from braindb.schemas.relations import RELATION_TYPES from braindb.schemas.search import ContextRequest -from braindb.services.activity_log import log_activity, query_log -from braindb.services.context import assemble_context, effective_importance, track_access +from braindb.services.activity_log import log_activity, log_activity_in_new_transaction, query_log +from braindb.services.context import assemble_context, track_access from braindb.services.embedding_service import get_embedding_service from braindb.services.keyword_service import ( ensure_keyword_entities, @@ -52,6 +54,38 @@ def _err(msg: str) -> str: return f"ERROR: {msg}" +_PLACEHOLDER_UUIDS = { + "", + "", + "search-mode-context", +} + +_ALLOWED_RELATION_TYPES = frozenset(get_args(RELATION_TYPES)) + + +def _validate_uuid_string(value: str | UUID | None, field_name: str) -> str: + """Return a normalized UUID string or raise ValueError before SQL sees it.""" + text = str(value).strip() if value is not None else "" + if not text: + raise ValueError(f"{field_name} is required") + if ( + text in _PLACEHOLDER_UUIDS + or (text.startswith("<") and text.endswith(">")) + or text.startswith("entity-id-of-") + ): + raise ValueError(f"{field_name} must be a real UUID, got placeholder {text!r}") + try: + return str(UUID(text)) + except ValueError as exc: + raise ValueError(f"{field_name} must be a valid UUID, got {text!r}") from exc + + +def _entity_exists(conn, entity_id: str) -> bool: + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM entities WHERE id = %s", (entity_id,)) + return cur.fetchone() is not None + + def _verbose(name: str): """Decorator that logs tool entry and exit when settings.agent_verbose is True. Placed BELOW @function_tool so the SDK still introspects the real signature. @@ -181,6 +215,27 @@ def _insert_entity_raw(conn, entity_type: str, content: str, keywords: list[str] return str(eid) +def _save_fact_impl( + content: str, + keywords: list[str], + source: str = "user-stated", + certainty: float = 0.8, + importance: float = 0.6, + notes: Optional[str] = None, +) -> str: + try: + with get_conn() as conn: + eid = _insert_entity_raw(conn, "fact", content, keywords, source, importance, notes) + with conn.cursor() as cur: + cur.execute( + "INSERT INTO facts_ext (entity_id, certainty, is_verified) VALUES (%s, %s, FALSE)", + (eid, certainty), + ) + return f"Saved fact id={eid}" + except Exception as e: + return _err(str(e)) + + @function_tool @_verbose("save_fact") async def save_fact( @@ -201,17 +256,7 @@ async def save_fact( importance: Weight 0-1 (default 0.6). notes: Optional running commentary. """ - try: - with get_conn() as conn: - eid = _insert_entity_raw(conn, "fact", content, keywords, source, importance, notes) - with conn.cursor() as cur: - cur.execute( - "INSERT INTO facts_ext (entity_id, certainty, is_verified) VALUES (%s, %s, FALSE)", - (eid, certainty), - ) - return f"Saved fact id={eid}" - except Exception as e: - return _err(str(e)) + return _save_fact_impl(content, keywords, source, certainty, importance, notes) @function_tool @@ -276,6 +321,27 @@ async def save_source( return _err(str(e)) +def _save_rule_impl( + content: str, + category: str = "behavior", + priority: int = 50, + always_on: bool = False, + keywords: Optional[list[str]] = None, + importance: float = 0.8, +) -> str: + try: + with get_conn() as conn: + eid = _insert_entity_raw(conn, "rule", content, keywords or [], "user-stated", importance, None) + with conn.cursor() as cur: + cur.execute( + "INSERT INTO rules_ext (entity_id, always_on, category, priority, is_active) VALUES (%s, %s, %s, %s, TRUE)", + (eid, always_on, category, priority), + ) + return f"Saved rule id={eid}" + except Exception as e: + return _err(str(e)) + + @function_tool @_verbose("save_rule") async def save_rule( @@ -296,17 +362,7 @@ async def save_rule( keywords: Optional topic keywords. importance: Weight 0-1 (default 0.8). """ - try: - with get_conn() as conn: - eid = _insert_entity_raw(conn, "rule", content, keywords or [], "user-stated", importance, None) - with conn.cursor() as cur: - cur.execute( - "INSERT INTO rules_ext (entity_id, always_on, category, priority, is_active) VALUES (%s, %s, %s, %s, TRUE)", - (eid, always_on, category, priority), - ) - return f"Saved rule id={eid}" - except Exception as e: - return _err(str(e)) + return _save_rule_impl(content, category, priority, always_on, keywords, importance) # ====================================================================== # @@ -322,6 +378,7 @@ async def get_entity(entity_id: str) -> str: entity_id: UUID of the entity. """ try: + entity_id = _validate_uuid_string(entity_id, "entity_id") with get_conn() as conn: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute("SELECT * FROM entities WHERE id = %s", (entity_id,)) @@ -413,6 +470,7 @@ async def update_entity( importance: New importance 0-1. """ try: + entity_id = _validate_uuid_string(entity_id, "entity_id") # Datasource guardrail — look up type and strip content if protected. content_dropped = False with get_conn() as conn: @@ -466,6 +524,7 @@ async def delete_entity(entity_id: str) -> str: entity_id: UUID to delete. """ try: + entity_id = _validate_uuid_string(entity_id, "entity_id") with get_conn() as conn: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute("SELECT entity_type FROM entities WHERE id = %s", (entity_id,)) @@ -502,8 +561,21 @@ async def create_relation( description: Why this relation exists. """ try: + from_entity_id = _validate_uuid_string(from_entity_id, "from_entity_id") + to_entity_id = _validate_uuid_string(to_entity_id, "to_entity_id") + if relation_type not in _ALLOWED_RELATION_TYPES: + allowed = ", ".join(sorted(_ALLOWED_RELATION_TYPES)) + return _err(f"relation_type must be one of: {allowed}") with get_conn() as conn: with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute( + "SELECT id FROM entities WHERE id IN (%s, %s)", + (from_entity_id, to_entity_id), + ) + found_ids = {str(r["id"]) for r in cur.fetchall()} + missing_ids = [eid for eid in (from_entity_id, to_entity_id) if eid not in found_ids] + if missing_ids: + return _err(f"entity {missing_ids[0]} not found") try: cur.execute( """INSERT INTO relations (from_entity_id, to_entity_id, relation_type, relevance_score, description) @@ -514,6 +586,10 @@ async def create_relation( except psycopg2.errors.UniqueViolation: conn.rollback() return _err(f"relation {relation_type} already exists between these entities") + except psycopg2.IntegrityError as e: + conn.rollback() + message = getattr(getattr(e, "diag", None), "message_primary", None) or str(e) + return _err(f"could not create relation: {message}") log_activity(conn, "create", "relation", rid, details={ "from": from_entity_id, "to": to_entity_id, "type": relation_type, }) @@ -531,7 +607,10 @@ async def view_entity_relations(entity_id: str) -> str: entity_id: UUID of the entity. """ try: + entity_id = _validate_uuid_string(entity_id, "entity_id") with get_conn() as conn: + if not _entity_exists(conn, entity_id): + return _err(f"entity {entity_id} not found") with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute( """SELECT id, from_entity_id, to_entity_id, relation_type, relevance_score, description @@ -563,6 +642,7 @@ async def delete_relation(relation_id: str) -> str: relation_id: UUID of the relation. """ try: + relation_id = _validate_uuid_string(relation_id, "relation_id") with get_conn() as conn: with conn.cursor() as cur: cur.execute("DELETE FROM relations WHERE id = %s RETURNING id", (relation_id,)) @@ -588,7 +668,10 @@ async def view_tree(entity_id: str, max_depth: int = 2) -> str: max_depth: How far to traverse (1-3, default 2). """ try: + entity_id = _validate_uuid_string(entity_id, "entity_id") with get_conn() as conn: + if not _entity_exists(conn, entity_id): + return _err(f"entity {entity_id} not found") with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute( """SELECT e.*, r.relation_type, r.relevance_score, r.description AS rel_desc, @@ -631,10 +714,11 @@ async def search_sql(query: str) -> str: cur.execute(query) columns = [d[0] for d in cur.description] if cur.description else [] rows = cur.fetchmany(1000) - log_activity(conn, "sql_query", details={"query": query[:500], "rows": len(rows)}) + log_activity_in_new_transaction("sql_query", details={"query": query[:500], "rows": len(rows)}) result = {"columns": columns, "rows": [[str(v) if v is not None else None for v in r] for r in rows], "row_count": len(rows)} return _truncate(json.dumps(result, default=str, indent=2)) except Exception as e: + log_activity_in_new_transaction("sql_query", details={"query": query[:500], "error": str(e)}) return _err(str(e)) @@ -653,6 +737,8 @@ async def view_log( limit: Max entries (default 30). """ try: + if entity_id is not None: + entity_id = _validate_uuid_string(entity_id, "entity_id") with get_conn() as conn: rows = query_log(conn, operation=operation, entity_id=entity_id, limit=limit) if not rows: diff --git a/braindb/routers/memory.py b/braindb/routers/memory.py index c680d39..ac2752f 100644 --- a/braindb/routers/memory.py +++ b/braindb/routers/memory.py @@ -11,17 +11,15 @@ from braindb.db import get_conn from braindb.schemas.search import ContextRequest, ContextResponse, SearchRequest, SearchResultItem -from braindb.services.activity_log import log_activity, query_log +from braindb.services.activity_log import log_activity, log_activity_in_new_transaction, query_log from braindb.services.embedding_service import get_embedding_service from braindb.services.keyword_service import generate_missing_embeddings from braindb.services.context import ( assemble_context, effective_importance, - fetch_always_on_rules, fetch_ext, track_access, ) -from braindb.services.graph import graph_expand from braindb.services.search import fuzzy_search router = APIRouter(prefix="/api/v1/memory", tags=["memory"]) @@ -180,9 +178,9 @@ def read_only_sql(body: SqlRequest): raise HTTPException(400, "Only SELECT or WITH queries are allowed") start = time.perf_counter() - with get_conn() as conn: - with conn.cursor() as cur: - try: + try: + with get_conn() as conn: + with conn.cursor() as cur: cur.execute("SET LOCAL statement_timeout = '5s'") cur.execute("SET LOCAL transaction_read_only = on") cur.execute(body.query) @@ -190,21 +188,27 @@ def read_only_sql(body: SqlRequest): rows = cur.fetchmany(1000) # Convert rows to JSON-safe format safe_rows = [[_to_safe(v) for v in row] for row in rows] - except Exception as e: - raise HTTPException(400, f"Query error: {e}") - + except Exception as e: elapsed_ms = int((time.perf_counter() - start) * 1000) - log_activity(conn, "sql_query", details={ + log_activity_in_new_transaction("sql_query", details={ "query": body.query[:500], - "rows": len(safe_rows), + "error": str(e), "elapsed_ms": elapsed_ms, }) - return { - "columns": columns, - "rows": safe_rows, - "row_count": len(safe_rows), - "elapsed_ms": elapsed_ms, - } + raise HTTPException(400, f"Query error: {e}") + + elapsed_ms = int((time.perf_counter() - start) * 1000) + log_activity_in_new_transaction("sql_query", details={ + "query": body.query[:500], + "rows": len(safe_rows), + "elapsed_ms": elapsed_ms, + }) + return { + "columns": columns, + "rows": safe_rows, + "row_count": len(safe_rows), + "elapsed_ms": elapsed_ms, + } def _to_safe(value): diff --git a/braindb/services/activity_log.py b/braindb/services/activity_log.py index 3a24cd8..e80c445 100644 --- a/braindb/services/activity_log.py +++ b/braindb/services/activity_log.py @@ -4,8 +4,15 @@ The log_activity function is fire-and-forget: it must never fail the main operation. """ +import logging + import psycopg2.extras +from braindb.db import get_conn + + +logger = logging.getLogger(__name__) + def log_activity( conn, @@ -35,6 +42,21 @@ def log_activity( pass +def log_activity_in_new_transaction( + operation: str, + entity_type: str | None = None, + entity_id: str | None = None, + details: dict | None = None, + context_note: str | None = None, +) -> None: + """Write an activity log entry using its own normal transaction.""" + try: + with get_conn() as conn: + log_activity(conn, operation, entity_type, entity_id, details, context_note) + except Exception as exc: + logger.warning("Activity log write failed in separate transaction: %s", exc) + + def query_log( conn, operation: str | None = None, diff --git a/pyproject.toml b/pyproject.toml index 011c379..23d82d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,3 +61,6 @@ packages = ["braindb"] testpaths = ["tests"] timeout = 60 addopts = "-ra" +markers = [ + "unit: tests that do not require a live BrainDB stack", +] diff --git a/tests/conftest.py b/tests/conftest.py index 3f74f3e..65ad8da 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,8 +42,10 @@ def _wait_for_health(url: str, timeout: int = 30) -> bool: @pytest.fixture(scope="session", autouse=True) -def _require_live_api() -> None: - """Fail fast and loud if the stack isn't up — tests have nothing to run against.""" +def _require_live_api(request: pytest.FixtureRequest) -> None: + """Fail fast for integration tests; pure unit tests do not need the stack.""" + if request.session.items and all(item.get_closest_marker("unit") for item in request.session.items): + return if not _wait_for_health(API_URL): pytest.fail( f"BrainDB API not healthy at {API_URL}. " diff --git a/tests/test_agent_tools_validation.py b/tests/test_agent_tools_validation.py new file mode 100644 index 0000000..d510c12 --- /dev/null +++ b/tests/test_agent_tools_validation.py @@ -0,0 +1,108 @@ +"""Targeted tests for agent tool ID validation.""" +from __future__ import annotations + +import asyncio +import json +import uuid +from contextlib import contextmanager + +from agents.tool import ToolContext + +from braindb.agent import tools + + +def _invoke_tool(tool, args: dict) -> str: + payload = json.dumps(args) + ctx = ToolContext( + context=None, + tool_name=tool.name, + tool_call_id="pytest", + tool_arguments=payload, + ) + return asyncio.run(tool.on_invoke_tool(ctx, payload)) + + +def test_get_entity_rejects_placeholder_before_db(monkeypatch): + def fail_get_conn(): + raise AssertionError("database should not be queried for placeholder IDs") + + monkeypatch.setattr(tools, "get_conn", fail_get_conn) + + result = _invoke_tool(tools.get_entity, {"entity_id": ""}) + + assert result.startswith("ERROR:") + assert "placeholder" in result + + +def test_view_log_rejects_non_uuid_filter_before_db(monkeypatch): + def fail_get_conn(): + raise AssertionError("database should not be queried for invalid IDs") + + monkeypatch.setattr(tools, "get_conn", fail_get_conn) + + result = _invoke_tool(tools.view_log, { + "operation": None, + "entity_id": "search-mode-context", + "limit": 30, + }) + + assert result.startswith("ERROR:") + assert "placeholder" in result + + +def test_create_relation_rejects_invalid_type_before_db(monkeypatch): + def fail_get_conn(): + raise AssertionError("database should not be queried for invalid relation types") + + monkeypatch.setattr(tools, "get_conn", fail_get_conn) + + result = _invoke_tool(tools.create_relation, { + "from_entity_id": str(uuid.uuid4()), + "to_entity_id": str(uuid.uuid4()), + "relation_type": "i_like_it", + "relevance_score": 0.7, + "description": None, + }) + + assert result.startswith("ERROR:") + assert "relation_type" in result + + +def test_create_relation_prechecks_missing_entities_before_insert(monkeypatch): + queries: list[str] = [] + + class FakeCursor: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def execute(self, query, params=None): + queries.append(query) + + def fetchall(self): + return [] + + class FakeConn: + def cursor(self, *args, **kwargs): + return FakeCursor() + + @contextmanager + def fake_get_conn(): + yield FakeConn() + + monkeypatch.setattr(tools, "get_conn", fake_get_conn) + missing_from = str(uuid.uuid4()) + + result = _invoke_tool(tools.create_relation, { + "from_entity_id": missing_from, + "to_entity_id": str(uuid.uuid4()), + "relation_type": "supports", + "relevance_score": 0.7, + "description": None, + }) + + assert result == f"ERROR: entity {missing_from} not found" + assert len(queries) == 1 + assert "SELECT id FROM entities" in queries[0] diff --git a/tests/test_memory_sql.py b/tests/test_memory_sql.py new file mode 100644 index 0000000..1d8b04d --- /dev/null +++ b/tests/test_memory_sql.py @@ -0,0 +1,41 @@ +"""Targeted tests for read-only SQL execution and activity logging.""" +from __future__ import annotations + +import uuid + +import requests + + +def _recent_sql_log_details(api: str, limit: int = 20) -> list[dict]: + r = requests.get(f"{api}/api/v1/memory/log", params={"operation": "sql_query", "limit": limit}, timeout=10) + assert r.status_code == 200, f"activity log query failed: {r.status_code} {r.text[:200]}" + return [entry.get("details") or {} for entry in r.json()] + + +def test_read_only_sql_success_logs_in_separate_transaction(api): + marker = f"pytest_sql_{uuid.uuid4().hex}" + query = f"SELECT 1 AS ok /* {marker} */" + + r = requests.post(f"{api}/api/v1/memory/sql", json={"query": query}, timeout=10) + + assert r.status_code == 200, f"sql query failed: {r.status_code} {r.text[:200]}" + body = r.json() + assert body["columns"] == ["ok"] + assert body["rows"] == [[1]] + assert body["row_count"] == 1 + + details = _recent_sql_log_details(api) + assert any(d.get("query") == query and d.get("rows") == 1 for d in details) + + +def test_read_only_sql_failure_logs_in_separate_transaction(api): + marker = f"pytest_sql_{uuid.uuid4().hex}" + query = f"SELECT definitely_missing_column /* {marker} */" + + r = requests.post(f"{api}/api/v1/memory/sql", json={"query": query}, timeout=10) + + assert r.status_code == 400 + assert "Query error" in r.text + + details = _recent_sql_log_details(api) + assert any(d.get("query") == query and "error" in d for d in details) From 75be7f7b7789e75b127ba6635c71751e6d5b3845 Mon Sep 17 00:00:00 2001 From: Nikita Bige Date: Sun, 3 May 2026 23:15:35 +0300 Subject: [PATCH 4/5] Add agent save fast path --- braindb/agent/fast_path.py | 47 +++++++++++++++++++ braindb/routers/agent.py | 45 +++++++++++++++--- tests/test_agent.py | 9 ++++ tests/test_agent_fast_path.py | 86 +++++++++++++++++++++++++++++++++++ 4 files changed, 180 insertions(+), 7 deletions(-) create mode 100644 braindb/agent/fast_path.py create mode 100644 tests/test_agent_fast_path.py diff --git a/braindb/agent/fast_path.py b/braindb/agent/fast_path.py new file mode 100644 index 0000000..a258cb0 --- /dev/null +++ b/braindb/agent/fast_path.py @@ -0,0 +1,47 @@ +"""Deterministic fast paths for simple BrainDB agent requests.""" +import re +from typing import Any + +from braindb.agent.tools import _save_fact_impl, _save_rule_impl + +_SAVE_RE = re.compile(r"^\s*Save:\s+(?P.+?)\s*$", re.IGNORECASE | re.DOTALL) +_SAVE_RULE_RE = re.compile(r"^\s*Save as rule:\s+(?P.+?)\s*$", re.IGNORECASE | re.DOTALL) +_MAX_FAST_PATH_CHARS = 2000 + + +def _content_is_safe_for_fast_path(content: str) -> bool: + return bool(content) and "?" not in content and len(content) <= _MAX_FAST_PATH_CHARS + + +def try_fast_path(query: str) -> dict[str, Any] | None: + """Handle simple save requests without invoking the LLM agent loop.""" + rule_match = _SAVE_RULE_RE.match(query) + if rule_match: + content = rule_match.group("content").strip() + if not _content_is_safe_for_fast_path(content): + return None + answer = _save_rule_impl( + content=content, + keywords=[], + importance=0.8, + ) + status = "fast_path_error" if answer.startswith("ERROR:") else "fast_path" + return {"answer": answer, "max_turns": 0, "status": status} + + save_match = _SAVE_RE.match(query) + if save_match: + content = save_match.group("content").strip() + if not _content_is_safe_for_fast_path(content): + return None + answer = _save_fact_impl( + content=content, + keywords=[], + source="user-stated", + certainty=0.9, + importance=0.7, + notes="Saved via agent fast path.", + ) + status = "fast_path_error" if answer.startswith("ERROR:") else "fast_path" + return {"answer": answer, "max_turns": 0, "status": status} + + return None diff --git a/braindb/routers/agent.py b/braindb/routers/agent.py index 3337bf2..68f8218 100644 --- a/braindb/routers/agent.py +++ b/braindb/routers/agent.py @@ -6,13 +6,16 @@ its internal tools and returns a summary. """ import logging +from typing import Any +from agents.exceptions import MaxTurnsExceeded from fastapi import APIRouter, HTTPException from pydantic import BaseModel, Field from braindb.agent.agent import run_agent_query -from braindb.db import get_conn -from braindb.services.activity_log import log_activity +from braindb.agent.fast_path import try_fast_path +from braindb.config import settings +from braindb.services.activity_log import log_activity_in_new_transaction logger = logging.getLogger(__name__) @@ -24,6 +27,11 @@ class AgentQueryRequest(BaseModel): max_turns: int | None = Field(default=None, ge=1, le=60) +def _log_agent_query(query: str, details: dict[str, Any]) -> None: + payload = {"query": query[:500], **details} + log_activity_in_new_transaction("agent_query", details=payload) + + @router.post("/query") async def agent_query(body: AgentQueryRequest): """Run a natural-language query through the BrainDB agent. @@ -31,13 +39,36 @@ async def agent_query(body: AgentQueryRequest): When AGENT_VERBOSE=true is set in the server environment, every tool call is logged to stdout and visible via `docker logs braindb_api`. """ + turns = body.max_turns or settings.agent_max_turns + fast_path_result = try_fast_path(body.query) + if fast_path_result is not None: + _log_agent_query(body.query, { + "max_turns": 0, + "status": fast_path_result.get("status"), + }) + return fast_path_result + try: result = await run_agent_query(body.query, max_turns=body.max_turns) - with get_conn() as conn: - log_activity(conn, "agent_query", details={ - "query": body.query[:500], - "max_turns": result.get("max_turns"), - }) + result.setdefault("status", "ok") + _log_agent_query(body.query, { + "max_turns": result.get("max_turns"), + "status": result.get("status"), + }) + return result + except MaxTurnsExceeded as e: + logger.warning("Agent query exceeded max_turns=%s: %s", turns, e) + result = { + "answer": f"Agent exceeded max_turns={turns} before calling submit_result.", + "max_turns": turns, + "turns_used": turns, + "status": "max_turns_exceeded", + } + _log_agent_query(body.query, { + "max_turns": turns, + "turns_used": turns, + "status": "max_turns_exceeded", + }) return result except Exception as e: logger.exception("Agent query failed") diff --git a/tests/test_agent.py b/tests/test_agent.py index e7f3ebd..72d02e3 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -49,3 +49,12 @@ def test_agent_endpoint_rejects_missing_query(api): timeout=10, ) assert r.status_code in (400, 422), f"expected 4xx for missing query, got {r.status_code}" + + +@pytest.mark.unit +def test_agent_builder_stops_at_submit_result(): + from braindb.agent.agent import create_braindb_agent + + agent = create_braindb_agent() + + assert agent.tool_use_behavior["stop_at_tool_names"] == ["submit_result"] diff --git a/tests/test_agent_fast_path.py b/tests/test_agent_fast_path.py new file mode 100644 index 0000000..a5e5e08 --- /dev/null +++ b/tests/test_agent_fast_path.py @@ -0,0 +1,86 @@ +"""Agent endpoint fast-path tests.""" +import re + +import requests + + +_ID_RE = re.compile(r"id=([0-9a-f-]{36})") + + +def _extract_entity_id(answer: str) -> str: + match = _ID_RE.search(answer) + assert match, f"missing entity id in answer: {answer!r}" + return match.group(1) + + +def test_agent_fast_path_saves_fact(api, test_tag, created_entities): + content = f"{test_tag} user prefers deterministic BrainDB save fast paths" + response = requests.post( + f"{api}/api/v1/agent/query", + json={"query": f"Save: {content}"}, + timeout=10, + ) + + assert response.status_code == 200, response.text + body = response.json() + assert body["status"] == "fast_path" + assert body["max_turns"] == 0 + entity_id = _extract_entity_id(body["answer"]) + created_entities.append(entity_id) + + entity = requests.get(f"{api}/api/v1/entities/{entity_id}", timeout=10) + assert entity.status_code == 200, entity.text + payload = entity.json() + assert payload["entity_type"] == "fact" + assert payload["content"] == content + + +def test_agent_fast_path_saves_rule(api, test_tag, created_entities): + content = f"{test_tag} always prefer deterministic fast paths for simple memory saves" + response = requests.post( + f"{api}/api/v1/agent/query", + json={"query": f"Save as rule: {content}"}, + timeout=10, + ) + + assert response.status_code == 200, response.text + body = response.json() + assert body["status"] == "fast_path" + assert body["max_turns"] == 0 + entity_id = _extract_entity_id(body["answer"]) + created_entities.append(entity_id) + + entity = requests.get(f"{api}/api/v1/entities/{entity_id}", timeout=10) + assert entity.status_code == 200, entity.text + payload = entity.json() + assert payload["entity_type"] == "rule" + assert payload["content"] == content + + +def test_agent_save_question_bypasses_fast_path(api): + response = requests.post( + f"{api}/api/v1/agent/query", + json={"query": "Save: what does the user prefer?", "max_turns": 1}, + timeout=80, + ) + + assert response.status_code == 200, response.text + assert response.json().get("status") != "fast_path" + + +def test_agent_max_turns_returns_structured_status(api): + response = requests.post( + f"{api}/api/v1/agent/query", + json={ + "query": "Recall everything about the user, then save a thought summarizing it, then connect them with relations.", + "max_turns": 1, + }, + timeout=80, + ) + + assert response.status_code == 200, response.text + body = response.json() + assert body["status"] == "max_turns_exceeded" + assert body["max_turns"] == 1 + assert body["turns_used"] == 1 + assert body["answer"] From 9e240f6b26ebf10fe1ed463ccc027bb482b1642a Mon Sep 17 00:00:00 2001 From: Nikita Bige Date: Sun, 3 May 2026 23:17:01 +0300 Subject: [PATCH 5/5] Validate configured LLM profiles --- braindb/config.py | 3 ++- tests/test_config_profiles.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/braindb/config.py b/braindb/config.py index 40d2fdd..18c7ef8 100644 --- a/braindb/config.py +++ b/braindb/config.py @@ -83,7 +83,8 @@ def _env_setting(self, env_name: str) -> str: @property def resolved_agent_model(self) -> str: - model = self.agent_model or self._active_llm_profile["model"] + profile = self._active_llm_profile + model = self.agent_model or profile["model"] if not model: raise ValueError( f"AGENT_MODEL must be set for LLM_PROFILE={self.llm_profile!r}; " diff --git a/tests/test_config_profiles.py b/tests/test_config_profiles.py index dabd391..dff2279 100644 --- a/tests/test_config_profiles.py +++ b/tests/test_config_profiles.py @@ -7,7 +7,7 @@ def test_codex_profile_resolves_default_model(): - settings = Settings(_env_file=None, llm_profile="codex", openai_api_key="test-key") + settings = Settings(_env_file=None, llm_profile="codex", agent_model="", openai_api_key="test-key") assert settings.resolved_agent_model == "openai/gpt-5.3-codex-spark" @@ -37,7 +37,7 @@ def test_agent_model_override_wins_for_codex_profile(): def test_unknown_profile_error_lists_known_profiles(): - settings = Settings(_env_file=None, llm_profile="missing") + settings = Settings(_env_file=None, llm_profile="missing", agent_model="") with pytest.raises(ValueError, match="openai_compatible"): _ = settings.resolved_agent_model