Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,14 @@ TENDERLY_PROJECT=sam
ETHERSCAN_TOKEN=your-etherscan-api-key

# LLM provider for AI transaction explanations
# Supported: venice (default), openai, anthropic, or any OpenAI-compatible provider
# Supported: venice (default), openai, anthropic, codex, or any OpenAI-compatible provider
LLM_PROVIDER=venice
# Optional for LLM_PROVIDER=codex when existing Codex auth is available.
LLM_API_KEY=your-llm-api-key
# LLM_BASE_URL=https://api.venice.ai/api/v1 # auto-set for known providers
# LLM_MODEL=deepseek-v4-flash # auto-set for known providers
# LLM_CODEX_MODEL_PROVIDER=openai # optional Codex SDK override
# LLM_CODEX_CWD=/path/to/workspace # optional Codex runtime cwd

# Dune (hourly large-transfer monitor)
DUNE_API_KEY=your-dune-api-key
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ dependencies = [
[project.optional-dependencies]
ai = [
"anthropic>=0.40.0",
"openai-codex>=0.1.0b2",
"openai>=1.0.0",
]
dev = [
"anthropic>=0.40.0",
"openai-codex>=0.1.0b2",
"openai>=1.0.0",
"mdformat==1.0.0",
"mypy==2.0.0",
Expand All @@ -86,6 +88,9 @@ dev = [
# or ISO 8601 (e.g. "P7D"). Applies to `uv pip install`, `uv lock`,
# `uv sync`, etc. Also settable via `UV_EXCLUDE_NEWER=1 week`.
exclude-newer = "1 week"
# Narrow exception for the beta OpenAI Codex SDK requested by the LLM provider.
# The SDK package pins this exact runtime package.
exclude-newer-package = { openai-codex = "2026-05-29T00:00:00Z", openai-codex-cli-bin = "2026-05-21T00:00:00Z" }

[build-system]
requires = ["setuptools>=42", "wheel"]
Expand Down
116 changes: 116 additions & 0 deletions tests/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,107 @@ def test_complete_structured_uses_forced_tool(self, mock_anthropic_cls: MagicMoc
self.assertEqual(kwargs["tool_choice"]["type"], "tool")


class TestCodexProvider(unittest.TestCase):
"""Tests for the OpenAI Codex Python SDK provider."""

@patch("openai_codex.Codex")
def test_complete_success(self, mock_codex_cls: MagicMock) -> None:
mock_client = MagicMock()
mock_codex_cls.return_value = mock_client

mock_thread = MagicMock()
mock_client.thread_start.return_value = mock_thread
mock_result = MagicMock()
mock_result.final_response = " Updates the cap. LOW. "
mock_thread.run.return_value = mock_result

from utils.llm.codex_provider import CodexProvider

provider = CodexProvider(api_key="sk-test", model="gpt-5.2-codex", model_provider="openai")
result = provider.complete("prompt", system_prompt="sys")

self.assertEqual(result, "Updates the cap. LOW.")
mock_client.login_api_key.assert_called_once_with("sk-test")
thread_kwargs = mock_client.thread_start.call_args.kwargs
self.assertEqual(thread_kwargs["model"], "gpt-5.2-codex")
self.assertEqual(thread_kwargs["model_provider"], "openai")
self.assertTrue(thread_kwargs["ephemeral"])
self.assertIn("sys", thread_kwargs["developer_instructions"])
self.assertEqual(thread_kwargs["approval_mode"].value, "deny_all")
self.assertEqual(thread_kwargs["sandbox"].value, "read-only")
run_kwargs = mock_thread.run.call_args.kwargs
self.assertEqual(run_kwargs["model"], "gpt-5.2-codex")
self.assertIsNone(run_kwargs["output_schema"])
self.assertEqual(run_kwargs["approval_mode"].value, "deny_all")
self.assertEqual(run_kwargs["sandbox"].value, "read-only")

@patch("openai_codex.Codex")
def test_complete_without_api_key_reuses_existing_auth(self, mock_codex_cls: MagicMock) -> None:
mock_client = MagicMock()
mock_codex_cls.return_value = mock_client
mock_thread = MagicMock()
mock_client.thread_start.return_value = mock_thread
mock_result = MagicMock()
mock_result.final_response = "OK"
mock_thread.run.return_value = mock_result

from utils.llm.codex_provider import CodexProvider

provider = CodexProvider(api_key=None, model="gpt-5.2-codex")
self.assertEqual(provider.complete("prompt"), "OK")
mock_client.login_api_key.assert_not_called()

@patch("openai_codex.Codex")
def test_complete_empty_response_raises(self, mock_codex_cls: MagicMock) -> None:
mock_client = MagicMock()
mock_codex_cls.return_value = mock_client
mock_thread = MagicMock()
mock_client.thread_start.return_value = mock_thread
mock_result = MagicMock()
mock_result.final_response = None
mock_thread.run.return_value = mock_result

from utils.llm.codex_provider import CodexProvider

provider = CodexProvider(api_key=None, model="gpt-5.2-codex")
with self.assertRaises(LLMError):
provider.complete("prompt")

@patch("openai_codex.Codex")
def test_complete_structured_parses_json(self, mock_codex_cls: MagicMock) -> None:
mock_client = MagicMock()
mock_codex_cls.return_value = mock_client
mock_thread = MagicMock()
mock_client.thread_start.return_value = mock_thread
mock_result = MagicMock()
mock_result.final_response = '{"summary": "Updates. LOW.", "detail": "d", "risk_tag": "LOW"}'
mock_thread.run.return_value = mock_result

from utils.llm.codex_provider import CodexProvider

provider = CodexProvider(api_key=None, model="gpt-5.2-codex")
result = provider.complete_structured("prompt", {"type": "object"})

self.assertEqual(result["risk_tag"], "LOW")
self.assertEqual(mock_thread.run.call_args.kwargs["output_schema"], {"type": "object"})

@patch("openai_codex.Codex")
def test_complete_structured_invalid_json_raises(self, mock_codex_cls: MagicMock) -> None:
mock_client = MagicMock()
mock_codex_cls.return_value = mock_client
mock_thread = MagicMock()
mock_client.thread_start.return_value = mock_thread
mock_result = MagicMock()
mock_result.final_response = "not json"
mock_thread.run.return_value = mock_result

from utils.llm.codex_provider import CodexProvider

provider = CodexProvider(api_key=None, model="gpt-5.2-codex")
with self.assertRaises(LLMError):
provider.complete_structured("prompt", {"type": "object"})


class TestFactory(unittest.TestCase):
"""Tests for the LLM provider factory."""

Expand Down Expand Up @@ -264,6 +365,15 @@ def test_anthropic_defaults(self, mock_anthropic_cls: MagicMock) -> None:
provider = get_llm_provider()
self.assertEqual(provider.model_name, "claude-haiku-4-5-20251001")

@patch("openai_codex.Codex")
def test_codex_defaults_without_api_key(self, mock_codex_cls: MagicMock) -> None:
env = {"LLM_PROVIDER": "codex"}
with patch.dict(os.environ, env, clear=True):
provider = get_llm_provider()
self.assertEqual(provider.model_name, "gpt-5.2-codex")
self.assertTrue(provider.supports_structured_output)
mock_codex_cls.return_value.login_api_key.assert_not_called()

@patch("anthropic.Anthropic")
def test_anthropic_custom_model(self, mock_anthropic_cls: MagicMock) -> None:
env = {"LLM_PROVIDER": "anthropic", "LLM_API_KEY": "sk-ant-test", "LLM_MODEL": "claude-sonnet-4-6"}
Expand Down Expand Up @@ -326,6 +436,12 @@ def test_structured_output_on_by_default_for_anthropic(self, mock_anthropic_cls:
with patch.dict(os.environ, env, clear=True):
self.assertTrue(get_llm_provider().supports_structured_output)

@patch("openai_codex.Codex")
def test_structured_output_on_by_default_for_codex(self, mock_codex_cls: MagicMock) -> None:
env = {"LLM_PROVIDER": "codex"}
with patch.dict(os.environ, env, clear=True):
self.assertTrue(get_llm_provider().supports_structured_output)


if __name__ == "__main__":
unittest.main()
14 changes: 9 additions & 5 deletions utils/llm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ Calls upgradeTo(address) on the AAVE pool proxy...

`_parse_explanation()` splits this with tolerant regex (handles `### DETAIL`, `**TLDR:**`, etc.); if the format isn't followed, the whole response becomes the summary (backward compatible).

Structured output is controlled by `LLM_STRUCTURED_OUTPUT` (per-provider default: on for `anthropic`/`openai`/`venice` — all verified live — off for `groq`/custom, since JSON-schema support varies by backend). The refine pass (step 7) always uses the text path.
Structured output is controlled by `LLM_STRUCTURED_OUTPUT` (per-provider default: on for `anthropic`/`codex`/`openai`/`venice`; off for `groq`/custom, since JSON-schema support varies by backend). The refine pass (step 7) always uses the text path.

### 9. Output Formatting

Expand All @@ -253,11 +253,13 @@ All configuration is via environment variables:

| Variable | Default | Description |
|---|---|---|
| `LLM_PROVIDER` | `venice` | Provider name: `venice`, `groq`, `openai`, `anthropic`, or custom |
| `LLM_API_KEY` | *(required)* | API key for the LLM provider |
| `LLM_PROVIDER` | `venice` | Provider name: `venice`, `groq`, `openai`, `anthropic`, `codex`, or custom |
| `LLM_API_KEY` | *(required except codex)* | API key for the LLM provider. For `codex`, omitted means reuse existing Codex auth |
| `LLM_MODEL` | `deepseek-v4-flash` | Model identifier |
| `LLM_BASE_URL` | *(per provider)* | API base URL (not needed for anthropic) |
| `LLM_STRUCTURED_OUTPUT` | *(per provider)* | `true`/`false` to force JSON-schema output. Default: on for anthropic/openai/venice (all verified live), off for groq/custom |
| `LLM_STRUCTURED_OUTPUT` | *(per provider)* | `true`/`false` to force JSON-schema output. Default: on for anthropic/codex/openai/venice, off for groq/custom |
| `LLM_CODEX_MODEL_PROVIDER` | *(unset)* | Optional Codex SDK model-provider override |
| `LLM_CODEX_CWD` | *(current process cwd)* | Optional Codex runtime working directory |
| `ETHERSCAN_TOKEN` | *(optional)* | Etherscan v2 multichain API key for source context |
| `TENDERLY_API_KEY` | *(optional)* | Tenderly API key for simulation |
| `TENDERLY_ACCOUNT` | `yearn` | Tenderly account slug |
Expand All @@ -271,9 +273,10 @@ All configuration is via environment variables:
| Groq | `https://api.groq.com/openai/v1` | `openai/gpt-oss-safeguard-20b` | `openai` |
| OpenAI | `https://api.openai.com/v1` | `gpt-4o-mini` | `openai` |
| Anthropic | *(native API)* | `claude-haiku-4-5-20251001` | `anthropic` |
| Codex | *(native SDK)* | `gpt-5.2-codex` | `openai-codex` |
| Custom | Set `LLM_BASE_URL` | Set `LLM_MODEL` | `openai` |

The `openai` and `anthropic` packages are optional dependencies. Install with:
The `openai`, `anthropic`, and `openai-codex` packages are optional dependencies. Install with:

```bash
uv pip install 'monitoring-scripts-py[ai]'
Expand All @@ -287,6 +290,7 @@ utils/llm/
├── ai_explainer.py # Orchestrator: decode → fetch context → prompt → explain
├── anthropic_provider.py # Anthropic (Claude) native API provider
├── base.py # Abstract LLMProvider base class + LLMError
├── codex_provider.py # OpenAI Codex Python SDK provider
├── factory.py # Provider factory with env-based config + singleton
├── openai_compat.py # OpenAI-compatible provider (Venice, OpenAI, etc.)
└── README.md # This file
Expand Down
132 changes: 132 additions & 0 deletions utils/llm/codex_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""OpenAI Codex Python SDK LLM provider.

Uses the native ``openai-codex`` SDK rather than the OpenAI HTTP API. Codex is
an agent runtime, so this adapter constrains it to direct text completion:
read-only sandbox, denied approvals, and one ephemeral thread per request.
"""

import json
from typing import Any

from utils.llm.base import LLMError, LLMProvider
from utils.logging import get_logger

logger = get_logger("utils.llm.codex_provider")

_COMPLETION_INSTRUCTIONS = """Act as a direct LLM completion backend for this application.
Use only the prompt content supplied in the turn. Do not inspect the workspace,
run commands, edit files, or describe tool limitations."""


class CodexProvider(LLMProvider):
"""LLM provider backed by the OpenAI Codex Python SDK."""

def __init__(
self,
api_key: str | None,
model: str,
structured_output: bool = True,
model_provider: str | None = None,
cwd: str | None = None,
) -> None:
"""Initialize the provider.

Args:
api_key: Optional OpenAI API key. If omitted, Codex reuses existing
Codex authentication (for example from ``codex login``).
model: Codex model identifier.
structured_output: Whether to advertise Codex ``output_schema``.
model_provider: Optional Codex model provider override.
cwd: Optional runtime working directory for the Codex app-server.
"""
try:
from openai_codex import ApprovalMode, Codex, CodexConfig, Sandbox
except ImportError:
raise LLMError(
"openai-codex package not installed. Install with: uv pip install 'monitoring-scripts-py[ai]'"
)

self._model = model
self._model_provider = model_provider
self._structured_output = structured_output
self._approval_mode = ApprovalMode.deny_all
self._sandbox = Sandbox.read_only
config = CodexConfig(cwd=cwd) if cwd else None
client = Codex(config)
try:
if api_key:
client.login_api_key(api_key)
except Exception:
client.close()
raise
self._client = client
logger.info(
"Initialized Codex provider: model=%s model_provider=%s structured=%s cwd=%s",
model,
model_provider,
structured_output,
cwd,
)

def complete(self, prompt: str, system_prompt: str = "") -> str:
"""Generate a completion using a fresh Codex thread."""
try:
return self._run(prompt, system_prompt).strip()
except LLMError:
raise
except Exception as e:
raise LLMError(f"Codex SDK call failed: {e}") from e

@property
def supports_structured_output(self) -> bool:
"""Return whether Codex ``output_schema`` is enabled."""
return self._structured_output

def complete_structured(self, prompt: str, schema: dict[str, Any], system_prompt: str = "") -> dict[str, Any]:
"""Request a schema-constrained Codex response and return it parsed."""
try:
content = self._run(prompt, system_prompt, output_schema=schema)
parsed: dict[str, Any] = json.loads(content)
return parsed
except LLMError:
raise
except json.JSONDecodeError as e:
raise LLMError(f"Structured Codex response was not valid JSON: {e}") from e
except Exception as e:
raise LLMError(f"Codex structured call failed: {e}") from e

def close(self) -> None:
"""Close the Codex runtime process."""
self._client.close()

def _run(self, prompt: str, system_prompt: str, output_schema: dict[str, Any] | None = None) -> str:
"""Run one stateless Codex turn and return the final response text."""
thread = self._client.thread_start(
approval_mode=self._approval_mode,
developer_instructions=self._build_instructions(system_prompt),
ephemeral=True,
model=self._model,
model_provider=self._model_provider,
sandbox=self._sandbox,
)
result = thread.run(
prompt,
approval_mode=self._approval_mode,
model=self._model,
output_schema=output_schema,
sandbox=self._sandbox,
)
if not result.final_response:
raise LLMError("Empty response from Codex")
return result.final_response.strip()

def _build_instructions(self, system_prompt: str) -> str:
"""Combine adapter-level constraints with the caller's system prompt."""
if not system_prompt:
return _COMPLETION_INSTRUCTIONS
return f"{_COMPLETION_INSTRUCTIONS}\n\n{system_prompt}"

@property
def model_name(self) -> str:
"""Return the model identifier."""
return self._model
Loading
Loading