Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,26 @@
# `python api.py` when running the backend on the host.

# ---- LLM provider (profile-driven via LiteLLM) ----
# Picks an entry from backend/API/config.py::_LLM_PROFILES.
# Currently only `deepinfra` ships out of the box.
# Flip LLM_PROFILE to switch the whole stack. Six profiles ship in
# backend/API/config.py::_LLM_PROFILES — see the README for the full table.
# gemini | deepinfra | nim | together | local | local_gemma
LLM_PROFILE=deepinfra
DEEPINFRA_API_KEY=your-deepinfra-api-key-here

# Optional: override the model string for the active profile
# AGENT_MODEL=deepinfra/google/gemma-4-31B-it
# Provider keys — uncomment/fill the one(s) matching the active LLM_PROFILE:
DEEPINFRA_API_KEY=your-deepinfra-api-key-here
# GEMINI_API_KEY=
# NVIDIA_NIM_API_KEY=
# TOGETHER_API_KEY=

# Local OpenAI-compatible servers (llama.cpp, vLLM, LM Studio, etc.).
# Inside Docker, use host.docker.internal instead of localhost.
# LOCAL_API_KEY=sk-noauth # any non-empty value
# LOCAL_API_BASE=http://host.docker.internal:8003/v1 # for LLM_PROFILE=local
# LOCAL_API_BASE_GEMMA=http://host.docker.internal:8002/v1 # for LLM_PROFILE=local_gemma

# Optional overrides (rarely needed — profile defaults usually suffice):
# AGENT_MODEL= # override the active profile's model id
# AGENT_API_BASE= # global base_url override

# ---- Agent loop ----
AGENT_MAX_TURNS=15
Expand Down
11 changes: 7 additions & 4 deletions backend/API/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@


def _create_model() -> LitellmModel:
return LitellmModel(
model=settings.resolved_agent_model,
api_key=settings.resolved_api_key,
)
kwargs: dict = {
"model": settings.resolved_agent_model,
"api_key": settings.resolved_api_key,
}
if settings.resolved_api_base:
kwargs["base_url"] = settings.resolved_api_base
return LitellmModel(**kwargs)


def _build_instructions(user_task: str, output_schema: list[dict], scrape_backend: str) -> str:
Expand Down
46 changes: 43 additions & 3 deletions backend/API/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,40 @@

# Adding a provider = one dict entry. No code change elsewhere.
_LLM_PROFILES: dict[str, dict[str, str]] = {
"gemini": {
"model": "gemini/gemini-3.1-flash-lite-preview",
"api_key_env": "GEMINI_API_KEY",
"api_base_env": "",
},
"deepinfra": {
"model": "deepinfra/google/gemma-4-31B-it",
"api_key_env": "DEEPINFRA_API_KEY",
"api_base_env": "",
},
"nim": {
"model": "nvidia_nim/google/gemma-4-31b-it",
"api_key_env": "NVIDIA_NIM_API_KEY",
"api_base_env": "",
},
"together": {
"model": "together_ai/google/gemma-4-31B-it",
"api_key_env": "TOGETHER_API_KEY",
"api_base_env": "",
},
"local": {
# llama.cpp llama-server (OpenAI-compatible) hosting Qwen on :8003.
# The "openai/<name>" prefix tells LiteLLM to route via api_base.
"model": "openai/qwen",
"api_key_env": "LOCAL_API_KEY",
"api_base_env": "LOCAL_API_BASE",
},
"local_gemma": {
# OpenAI-compatible server hosting Gemma 4 31B (AWQ) on :8002.
# Edit this string if your server exposes the model under a different
# name, or override at runtime with AGENT_MODEL.
"model": "openai/cyankiwi/gemma-4-31B-it-AWQ-4bit",
"api_key_env": "LOCAL_API_KEY",
"api_base_env": "LOCAL_API_BASE_GEMMA",
},
}

Expand All @@ -25,8 +56,9 @@ class Settings(BaseSettings):
# --- Master switch ---
llm_profile: str = "deepinfra"

# --- Optional model override (blank = use profile default) ---
# --- Optional overrides (blank = use profile default) ---
agent_model: str = ""
agent_api_base: str = ""

# --- Agent loop ---
agent_max_turns: int = 15
Expand Down Expand Up @@ -57,8 +89,15 @@ def resolved_agent_model(self) -> str:

@property
def resolved_api_key(self) -> str:
env = _LLM_PROFILES[self.llm_profile]["api_key_env"]
return os.getenv(env, "")
env = _LLM_PROFILES[self.llm_profile].get("api_key_env", "")
return os.getenv(env, "") if env else ""

@property
def resolved_api_base(self) -> str:
if self.agent_api_base:
return self.agent_api_base
env = _LLM_PROFILES[self.llm_profile].get("api_base_env", "")
return os.getenv(env, "") if env else ""

@property
def running_in_docker(self) -> bool:
Expand All @@ -79,6 +118,7 @@ def log_config_summary(self, logger: Optional[logging.Logger] = None) -> None:
logger.info(" LLM profile: %s", self.llm_profile)
logger.info(" Resolved model: %s", self.resolved_agent_model)
logger.info(" API key set: %s", bool(self.resolved_api_key))
logger.info(" API base: %s", self.resolved_api_base or "(provider default)")
logger.info(" Firecrawl key set: %s", bool(self.firecrawl_api_key))
logger.info(" Running in Docker: %s", self.running_in_docker)
logger.info(" Browser-visible supported: %s", self.browser_visible_supported)
Expand Down
69 changes: 51 additions & 18 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The backend exposes a small HTTP API. Each call accepts one row of input data, a

### Key Features

- **Profile-based LLM provider** — `LLM_PROFILE=deepinfra` runs `deepinfra/google/gemma-4-31B-it`. Adding a provider is one entry in `_LLM_PROFILES` (see `config.py`).
- **Profile-based LLM provider** — `LLM_PROFILE` flips the whole stack: ships with six profiles (`gemini`, `deepinfra`, `nim`, `together`, `local`, `local_gemma`). Cloud providers (Gemini 3.1 Flash Lite, DeepInfra, NVIDIA NIM, Together) and OpenAI-compatible local servers (llama.cpp, vLLM, LM Studio) all work via the same dict in `config.py`.
- **Two scrape backends, picked per request**:
- `local` (default) — Playwright (`search_google`) + crawl4ai (`visit_webpage`). Chromium baked into the image.
- `firecrawl` — Firecrawl MCP (`firecrawl_scrape`, `firecrawl_search`).
Expand Down Expand Up @@ -58,7 +58,7 @@ backend/
### Prerequisites

- Python 3.13+
- DeepInfra API key (default profile) — or any LiteLLM-supported provider after adding it to `_LLM_PROFILES`
- API key for one of the six built-in profiles (`gemini`, `deepinfra`, `nim`, `together`, `local`, `local_gemma`) — or any LiteLLM-supported provider after adding it to `_LLM_PROFILES`
- Firecrawl API key (only required if you'll use `scrape_backend=firecrawl`)
- For host run: `playwright install chromium` after `pip install`

Expand All @@ -71,17 +71,32 @@ pip install -r requirements.txt
playwright install chromium # only needed for host runs
```

2. **Configure environment** (create `.env`):
2. **Configure environment** (create `.env` — pick the profile you want):
```bash
# Required — LLM provider profile + key
# Required — LLM provider profile + matching API key
LLM_PROFILE=deepinfra
DEEPINFRA_API_KEY=your-deepinfra-api-key
# GEMINI_API_KEY=your-gemini-api-key # for LLM_PROFILE=gemini
# NVIDIA_NIM_API_KEY=your-nim-api-key # for LLM_PROFILE=nim
# TOGETHER_API_KEY=your-together-api-key # for LLM_PROFILE=together

# Local OpenAI-compatible server (llama.cpp / vLLM / LM Studio):
# LLM_PROFILE=local # Qwen on :8003 by default
# LOCAL_API_KEY=sk-noauth # any non-empty value
# LOCAL_API_BASE=http://localhost:8003/v1
# (inside Docker: http://host.docker.internal:8003/v1)

# LLM_PROFILE=local_gemma # Gemma 4 31B AWQ on :8002
# LOCAL_API_KEY=sk-noauth
# LOCAL_API_BASE_GEMMA=http://localhost:8002/v1
# AGENT_MODEL= # only needed if your server uses a different model id

# Required if you'll use the firecrawl backend
FIRECRAWL_API_KEY=your-firecrawl-api-key

# Optional overrides
# AGENT_MODEL=deepinfra/google/gemma-4-31B-it
# AGENT_MODEL= # override the profile's model string
# AGENT_API_BASE= # global base_url override
# AGENT_MAX_TURNS=15
# FIRECRAWL_MAX_CONTENT_LENGTH=20000
# DEBUG_MODE=true
Expand Down Expand Up @@ -114,10 +129,17 @@ wsl -- bash -c 'cd /mnt/c/.../knowledge-robot && \

| Variable | Required | Default | Description |
|----------|----------|---------|-------------|
| `LLM_PROFILE` | Yes | `deepinfra` | Picks an entry from `_LLM_PROFILES` in `config.py` |
| `LLM_PROFILE` | Yes | `deepinfra` | Picks an entry from `_LLM_PROFILES` in `config.py`. Ships with `gemini` / `deepinfra` / `nim` / `together` / `local` / `local_gemma` |
| `GEMINI_API_KEY` | Yes (for `gemini` profile) | - | Google AI Studio key |
| `DEEPINFRA_API_KEY` | Yes (for `deepinfra` profile) | - | DeepInfra API key |
| `NVIDIA_NIM_API_KEY` | Yes (for `nim` profile) | - | NVIDIA NIM API key |
| `TOGETHER_API_KEY` | Yes (for `together` profile) | - | Together AI API key |
| `LOCAL_API_KEY` | Yes (for `local` / `local_gemma`) | - | Any non-empty string; the OpenAI client validates it client-side, the local server typically ignores it |
| `LOCAL_API_BASE` | Yes (for `local` profile) | - | OpenAI-compatible URL, e.g. `http://localhost:8003/v1` (or `http://host.docker.internal:8003/v1` from inside Docker) |
| `LOCAL_API_BASE_GEMMA` | Yes (for `local_gemma` profile) | - | OpenAI-compatible URL for the Gemma server, e.g. `http://localhost:8002/v1` |
| `FIRECRAWL_API_KEY` | When `scrape_backend=firecrawl` | - | Firecrawl API key |
| `AGENT_MODEL` | No | (profile default) | Override the profile's model string, e.g. `deepinfra/google/gemma-4-31B-it` |
| `AGENT_MODEL` | No | (profile default) | Override the profile's model string, e.g. `openai/cyankiwi/gemma-4-31B-it-AWQ-4bit` |
| `AGENT_API_BASE` | No | (profile default) | Global `base_url` override (wins over the per-profile `*_API_BASE`) |
| `AGENT_MAX_TURNS` | No | `15` | Max tool calls per row |
| `AGENT_SUBAGENT_MAX_TURNS` | No | `30` | Max tool calls inside a subagent |
| `TOOL_OUTPUT_MAX_CHARS` | No | `8000` | Per-tool truncation cap for local-backend tools |
Expand All @@ -137,23 +159,33 @@ wsl -- bash -c 'cd /mnt/c/.../knowledge-robot && \

### Supported LLM Providers

The default profile is `deepinfra`. To add a provider, append to `_LLM_PROFILES` in [API/config.py](API/config.py):
Six profiles ship in [API/config.py](API/config.py):

| Profile | Default model | Required env |
|---------|---------------|--------------|
| `gemini` | `gemini/gemini-3.1-flash-lite-preview` | `GEMINI_API_KEY` |
| `deepinfra` | `deepinfra/google/gemma-4-31B-it` | `DEEPINFRA_API_KEY` |
| `nim` | `nvidia_nim/google/gemma-4-31b-it` | `NVIDIA_NIM_API_KEY` |
| `together` | `together_ai/google/gemma-4-31B-it` | `TOGETHER_API_KEY` |
| `local` | `openai/qwen` (llama.cpp Qwen on :8003) | `LOCAL_API_KEY` + `LOCAL_API_BASE` |
| `local_gemma` | `openai/cyankiwi/gemma-4-31B-it-AWQ-4bit` (Gemma 4 31B AWQ on :8002) | `LOCAL_API_KEY` + `LOCAL_API_BASE_GEMMA` |

Set `LLM_PROFILE=<key>` in `.env` to switch — that's it, no other vars to flip. If your local server exposes the model under a different name, edit the `model` field of the profile in [API/config.py](API/config.py) (or override at runtime with `AGENT_MODEL`).

To add another LiteLLM-supported provider, append to `_LLM_PROFILES`:

```python
_LLM_PROFILES = {
"deepinfra": {
"model": "deepinfra/google/gemma-4-31B-it",
"api_key_env": "DEEPINFRA_API_KEY",
# ... existing six ...
"anthropic": {
"model": "anthropic/claude-sonnet-4-5",
"api_key_env": "ANTHROPIC_API_KEY",
"api_base_env": "",
},
# Examples (not currently wired):
# "gemini": {"model": "gemini/gemini-2.5-flash", "api_key_env": "GEMINI_API_KEY"},
# "anthropic":{"model": "anthropic/claude-3-5-sonnet-latest", "api_key_env": "ANTHROPIC_API_KEY"},
# "openai": {"model": "openai/gpt-4o-mini", "api_key_env": "OPENAI_API_KEY"},
# "nim": {"model": "nvidia_nim/google/gemma-4-31b-it", "api_key_env": "NVIDIA_NIM_API_KEY"},
}
```

Set `LLM_PROFILE=<key>` in `.env` to switch. **Note**: `ModelSettings()` is bare in this codebase — passing `reasoning_effort` raises `UnsupportedParamsError` on DeepInfra/Gemma. If you want reasoning on a Gemini-class profile, set `litellm.drop_params=True` at module load.
**Note**: `ModelSettings()` is bare in this codebase — passing `reasoning_effort` raises `UnsupportedParamsError` on DeepInfra/Gemma. If you want reasoning on a Gemini-class profile, set `litellm.drop_params=True` at module load.

## API Endpoints

Expand Down Expand Up @@ -265,7 +297,7 @@ Module-level `process_row()` validates the request, builds the dynamic Pydantic
### 2. Agent factory + dispatcher (`agent.py`)

- `set_tracing_disabled(disabled=True)` at module load (silences OpenAI tracing service spam).
- `_create_model()` returns a `LitellmModel` from `settings.resolved_agent_model` + `settings.resolved_api_key`.
- `_create_model()` returns a `LitellmModel` from `settings.resolved_agent_model` + `settings.resolved_api_key`, plus `base_url=settings.resolved_api_base` when the active profile defines an `api_base_env` (used by `local` / `local_gemma`).
- `_build_instructions(user_task, output_schema, scrape_backend)` returns the system prompt — branches by backend.
- `_run_firecrawl(...)` and `_run_local(...)` build the Agent with the right tool list and run it. Both end with `StopAtTools(["submit_result"])`.
- `run_agent(...)` is the public dispatcher; raises `ValueError` for anything other than `"firecrawl"` / `"local"`.
Expand All @@ -290,6 +322,7 @@ Module-level `process_row()` validates the request, builds the dynamic Pydantic
`Settings(BaseSettings)` from `pydantic-settings`. Reads `.env` once at import. Singleton `settings` exposed via `get_config()` shim. Computed properties:
- `resolved_agent_model` — `agent_model` override OR `_LLM_PROFILES[llm_profile]["model"]`
- `resolved_api_key` — env var named by `_LLM_PROFILES[llm_profile]["api_key_env"]`
- `resolved_api_base` — `agent_api_base` override OR env var named by `_LLM_PROFILES[llm_profile]["api_base_env"]` (empty for cloud profiles, set for `local` / `local_gemma`)
- `running_in_docker` — checks `/.dockerenv` or `RUNNING_IN_DOCKER=true`
- `browser_visible_supported` — true on host, true in WSLg overlay, false in plain Docker

Expand Down
6 changes: 6 additions & 0 deletions docker-compose.local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ services:
- "8080:8080"
env_file:
- ./backend/.env
extra_hosts:
# Lets the container reach a local LLM server running on the host
# via http://host.docker.internal:<port>/v1 (LLM_PROFILE=local* path).
# Docker Desktop on Windows/Mac resolves this automatically; this
# explicit mapping makes the same setup work on native Linux too.
- "host.docker.internal:host-gateway"
environment:
# Development settings (override defaults from env_file if needed)
- DEBUG_MODE=true
Expand Down
Loading