diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7f964b2..cecca53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,13 +43,13 @@ jobs: run: | pytest tests/unit/ -v --tb=short - - name: Run integration tests + - name: Run integration tests (excluding smoke) run: | - pytest tests/integration/ -v --tb=short + pytest tests/integration/ -v --tb=short -m "not smoke" - name: Generate coverage report run: | - pytest tests/ --cov=cyberai --cov-report=term-missing --cov-report=xml + pytest tests/ --cov=cyberai --cov-report=term-missing --cov-report=xml -m "not smoke" - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -57,6 +57,26 @@ jobs: file: ./coverage.xml fail_ci_if_error: false + smoke: + name: Smoke Tests (end-to-end) + runs-on: ubuntu-latest + continue-on-error: true # smoke tests are xfail until day 7; don't block PRs yet + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + pip install -e . + - name: Run smoke tests + run: | + pytest tests/ -v --tb=short -m smoke + lint: name: Lint runs-on: ubuntu-latest diff --git a/docs/architecture/known-issues.md b/docs/architecture/known-issues.md new file mode 100644 index 0000000..1d2479a --- /dev/null +++ b/docs/architecture/known-issues.md @@ -0,0 +1,99 @@ +# Known Issues — Pre-W1 Baseline + +This document captures the broken state of CyberAI **as of the start of +the 30-day STANDOFF rewrite**. Each item is fixed by a specific day in +the plan; see `STANDOFF.md` for the schedule. + +When all items are checked off, days 1–7 (Reanimation week) are done +and `cyberai scan --dry-run` will work end-to-end. + +## How this was verified + +Smoke tests in `tests/integration/test_cli_smoke.py` reproduce the broken +state via `CliRunner().invoke(cli, ["scan", ..., "--dry-run"])`. They are +marked `@pytest.mark.xfail` until day 7, then un-xfailed to provide +regression protection. + +## The Issues + +### 🔴 KI-1 — CLI ↔ Orchestrator API mismatch +- **What's broken:** `__main__.py` calls `Orchestrator(config)` and + `orchestrator.run_pipeline(session)`. Neither matches the actual API: + `Orchestrator.__init__(phases, authorized_scope, dry_run)` does not + accept `config`, and the method is named `run(target)`. +- **Symptom:** `TypeError` on any `cyberai scan` invocation. +- **Fixed by:** Day 5 (`refactor/orchestrator-v2`) +- **Status:** ❌ broken + +### 🔴 KI-2 — Two competing session classes +- **What's broken:** `PentestSession` (in `core/session.py`) and + `ScanSession` (in `core/scan_session.py`) coexist with different + fields and methods. `__main__.py` uses `PentestSession`; `Orchestrator` + creates `ScanSession`. +- **Fixed by:** Day 3 (`refactor/unify-session`) +- **Status:** ❌ broken + +### 🔴 KI-3 — BaseAgent doesn't match what agents use +- **What's broken:** `BaseAgent.__init__(config, audit, session_id)` is + what's declared, but agents access `self.session`, `self.kb`, + `self.memory`, `self.llm` — none of which exist on `BaseAgent`. The + Orchestrator constructs agents as `ReconAgent(kb=session.kb)`, which + also doesn't match. +- **Fixed by:** Day 4 (`refactor/base-agent-contract`) +- **Status:** ❌ broken + +### 🔴 KI-4 — Agents call non-existent methods +- **What's broken:** Several agents call `self._check_iteration_limit()`, + `self._log(...)`, `self.llm.chat(...)` — none of these exist. +- **Fixed by:** Day 4 + Day 6 +- **Status:** ❌ broken + +### 🔴 KI-5 — `Finding` signature mismatch +- **What's broken:** `ReconAgent` builds `Finding(title=..., target=..., + evidence=[...])`, but the `Finding` dataclass has no `target` or + `evidence` fields. +- **Fixed by:** Day 3 +- **Status:** ❌ broken + +### 🔴 KI-6 — `Tool` param name mismatch +- **What's broken:** `Tool` dataclass field is `params`, but every + `_register_tools()` call uses `parameters=...`. +- **Fixed by:** Day 4 +- **Status:** ❌ broken + +### 🔴 KI-7 — `LLMClient.chat()` doesn't exist +- **What's broken:** `ExploitAgent` calls `self.llm.chat(messages=..., + system=...)`. The actual `LLMClient` method is `call()`. +- **Fixed by:** Day 6 +- **Status:** ❌ broken + +### 🔴 KI-8 — `conftest.fresh_session` accesses non-existent field +- **What's broken:** Original `conftest.py` did + `fresh_session.knowledge_base["recon.nmap"] = ...` but `PentestSession` + has no `knowledge_base` field — only `recon_data` / `intel_data` / + `exploit_data`. +- **Fixed by:** Day 2 (this PR) — temporarily redirected to `recon_data` +- **Status:** ✅ patched (full unification in day 3) + +## Reproduction + +```bash +# Will raise TypeError before any real work happens: +python -m cyberai scan 127.0.0.1 --dry-run + +# Smoke tests reproduce this state: +pytest tests/integration/test_cli_smoke.py -v +# Expected: 2 xfailed, 1 passed +``` + +## Progress tracker + +| Day | Issue(s) addressed | Status | +|-----|-------------------|--------| +| 1 | (rebrand only) | ✅ | +| 2 | KI-8 | ✅ | +| 3 | KI-2, KI-5 | ⏳ | +| 4 | KI-3, KI-4, KI-6 | ⏳ | +| 5 | KI-1 | ⏳ | +| 6 | KI-7, KI-4 | ⏳ | +| 7 | All checked | ⏳ | diff --git a/pytest.ini b/pytest.ini index 636004c..2797fb5 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,8 +3,10 @@ testpaths = tests python_files = test_*.py python_classes = Test* python_functions = test_* -addopts = -v --tb=short +addopts = -v --tb=short --strict-markers markers = unit: Unit tests (fast, no external calls) integration: Integration tests (may use mocks) - slow: Slow tests (real network calls) + smoke: End-to-end smoke tests for CLI and pipeline + slow: Slow tests (real network calls, NVD/etc.) + network: Tests that require live network access diff --git a/tests/conftest.py b/tests/conftest.py index f02f26b..812150f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,24 +1,140 @@ +""" +Shared pytest fixtures for CyberAI test suite. + +Note: The `fresh_session` fixture currently uses PentestSession. +This will change to ScanSession in day 3 of the STANDOFF plan, +when the two competing session types are unified. +""" +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock + import pytest + from cyberai.core.config import CyberAIConfig from cyberai.core.session import PentestSession + +# --------------------------------------------------------------------------- +# Config & sessions +# --------------------------------------------------------------------------- + @pytest.fixture(scope="session") -def base_config(): - """Shared config for all tests — no real API keys needed""" +def base_config() -> CyberAIConfig: + """Shared config for all tests — no real API keys needed.""" return CyberAIConfig() + @pytest.fixture -def fresh_session(): - """Fresh session for each test""" +def fresh_session() -> PentestSession: + """A clean session for each test that needs one.""" return PentestSession(target="testhost.local") + @pytest.fixture -def session_with_recon(fresh_session): - """Session pre-loaded with recon data""" - fresh_session.knowledge_base["recon.nmap"] = { +def session_with_recon(fresh_session: PentestSession) -> PentestSession: + """Session pre-loaded with synthetic recon data.""" + fresh_session.recon_data["nmap"] = { "ports": [ - {"port": 80, "service": "http", "state": "open"}, - {"port": 22, "service": "ssh", "state": "open"}, + {"port": 80, "service": "http", "state": "open"}, + {"port": 22, "service": "ssh", "state": "open"}, ] } return fresh_session + + +# --------------------------------------------------------------------------- +# Mocked external services +# --------------------------------------------------------------------------- + +@pytest.fixture +def mock_llm_client() -> MagicMock: + """ + A MagicMock that mimics the LLMClient interface. + + Returns a deterministic response for `call()` and `acall()`, + so tests don't need real API keys and don't hit the network. + + Usage: + def test_something(mock_llm_client): + mock_llm_client.call.return_value = "custom response" + agent = SomeAgent(llm=mock_llm_client, ...) + ... + """ + client = MagicMock() + client.call.return_value = "stub LLM response" + client.acall.return_value = "stub async LLM response" + client.model = "stub-model" + client.provider = "stub-provider" + return client + + +@pytest.fixture +def mock_nmap_result() -> dict[str, Any]: + """ + Realistic-ish nmap output structure for tests that need recon data + without actually running nmap. + """ + return { + "target": "testhost.local", + "ports": [ + { + "port": 22, + "protocol": "tcp", + "state": "open", + "service": "ssh", + "version": "OpenSSH 8.9p1 Ubuntu", + }, + { + "port": 80, + "protocol": "tcp", + "state": "open", + "service": "http", + "version": "Apache 2.4.52", + }, + { + "port": 443, + "protocol": "tcp", + "state": "open", + "service": "https", + "version": "Apache 2.4.52", + }, + ], + "scan_time": "12.3s", + } + + +@pytest.fixture +def mock_nvd_response() -> dict[str, Any]: + """Minimal NVD API 2.0 response shape for one CVE.""" + return { + "resultsPerPage": 1, + "startIndex": 0, + "totalResults": 1, + "vulnerabilities": [ + { + "cve": { + "id": "CVE-2024-9999", + "published": "2024-01-15T00:00:00.000", + "metrics": { + "cvssMetricV31": [ + { + "cvssData": { + "baseScore": 9.8, + "baseSeverity": "CRITICAL", + "vectorString": ( + "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/" + "S:U/C:H/I:H/A:H" + ), + } + } + ] + }, + "descriptions": [ + {"lang": "en", "value": "Synthetic test CVE for fixtures."} + ], + } + } + ], + } diff --git a/tests/integration/test_cli_smoke.py b/tests/integration/test_cli_smoke.py new file mode 100644 index 0000000..80a682a --- /dev/null +++ b/tests/integration/test_cli_smoke.py @@ -0,0 +1,67 @@ +""" +End-to-end smoke tests for the cyberai CLI. + +These tests verify that the entire pipeline runs without crashing, +even in dry-run mode where no real network calls are made. + +Currently most are marked xfail because of known API mismatches between +__main__.py, Orchestrator, and the agents — see docs/architecture/known-issues.md. +They will be un-xfailed in day 7 of the STANDOFF plan. +""" +from __future__ import annotations + +import pytest +from click.testing import CliRunner + +from cyberai.__main__ import cli + + +pytestmark = pytest.mark.smoke + + +@pytest.mark.xfail( + reason="Orchestrator/CLI API mismatch — see known-issues.md (fixed in W1)", + strict=False, +) +def test_cli_scan_dry_run_exits_cleanly(): + """ + `cyberai scan --dry-run` should complete with exit code 0 + without making any real network calls. + + Currently fails because __main__.py calls Orchestrator(config) but + Orchestrator.__init__ does not accept `config` as positional arg, + and calls orchestrator.run_pipeline(session) which does not exist + (the method is named `run(target)`). + """ + runner = CliRunner() + result = runner.invoke(cli, ["scan", "127.0.0.1", "--dry-run"]) + + assert result.exit_code == 0, ( + f"CLI exited with code {result.exit_code}\n" + f"Output:\n{result.output}\n" + f"Exception:\n{result.exception!r}" + ) + + +@pytest.mark.xfail( + reason="Same root cause — Orchestrator API mismatch", + strict=False, +) +def test_cli_scan_dry_run_produces_output(): + """The scan should produce some textual output, even in dry-run mode.""" + runner = CliRunner() + result = runner.invoke(cli, ["scan", "example.com", "--dry-run"]) + + assert result.output, "CLI produced no output at all" + + +def test_cli_help_works(): + """ + Sanity check: `cyberai --help` must always work. + If this breaks, something is very wrong with imports. + """ + runner = CliRunner() + result = runner.invoke(cli, ["--help"]) + + assert result.exit_code == 0 + assert "scan" in result.output.lower()