From 858e455455dcf4de6ca120de380ffac77769b938 Mon Sep 17 00:00:00 2001 From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com> Date: Wed, 20 May 2026 14:32:52 +0300 Subject: [PATCH 1/4] test(e2e): add failing smoke test for 'cyberai scan' command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two xfail tests reproduce the current broken state: - test_cli_scan_dry_run_exits_cleanly: CLI calls Orchestrator(config) and orchestrator.run_pipeline(session) — neither matches the actual API - test_cli_scan_dry_run_produces_output: same root cause One always-passing sanity check: - test_cli_help_works: ensures the CLI module at least imports cleanly xfail(strict=False) — when day 7 fixes the API mismatch, these tests will XPASS without failing CI. When we un-xfail them in day 7, they will provide actual regression protection. Refs: STANDOFF.md day 2/30 --- tests/integration/test_cli_smoke.py | 67 +++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 tests/integration/test_cli_smoke.py diff --git a/tests/integration/test_cli_smoke.py b/tests/integration/test_cli_smoke.py new file mode 100644 index 0000000..80a682a --- /dev/null +++ b/tests/integration/test_cli_smoke.py @@ -0,0 +1,67 @@ +""" +End-to-end smoke tests for the cyberai CLI. + +These tests verify that the entire pipeline runs without crashing, +even in dry-run mode where no real network calls are made. + +Currently most are marked xfail because of known API mismatches between +__main__.py, Orchestrator, and the agents — see docs/architecture/known-issues.md. +They will be un-xfailed in day 7 of the STANDOFF plan. +""" +from __future__ import annotations + +import pytest +from click.testing import CliRunner + +from cyberai.__main__ import cli + + +pytestmark = pytest.mark.smoke + + +@pytest.mark.xfail( + reason="Orchestrator/CLI API mismatch — see known-issues.md (fixed in W1)", + strict=False, +) +def test_cli_scan_dry_run_exits_cleanly(): + """ + `cyberai scan --dry-run` should complete with exit code 0 + without making any real network calls. + + Currently fails because __main__.py calls Orchestrator(config) but + Orchestrator.__init__ does not accept `config` as positional arg, + and calls orchestrator.run_pipeline(session) which does not exist + (the method is named `run(target)`). + """ + runner = CliRunner() + result = runner.invoke(cli, ["scan", "127.0.0.1", "--dry-run"]) + + assert result.exit_code == 0, ( + f"CLI exited with code {result.exit_code}\n" + f"Output:\n{result.output}\n" + f"Exception:\n{result.exception!r}" + ) + + +@pytest.mark.xfail( + reason="Same root cause — Orchestrator API mismatch", + strict=False, +) +def test_cli_scan_dry_run_produces_output(): + """The scan should produce some textual output, even in dry-run mode.""" + runner = CliRunner() + result = runner.invoke(cli, ["scan", "example.com", "--dry-run"]) + + assert result.output, "CLI produced no output at all" + + +def test_cli_help_works(): + """ + Sanity check: `cyberai --help` must always work. + If this breaks, something is very wrong with imports. + """ + runner = CliRunner() + result = runner.invoke(cli, ["--help"]) + + assert result.exit_code == 0 + assert "scan" in result.output.lower() From b40c07bf6513c9888d1945ae826fa11102c95d9b Mon Sep 17 00:00:00 2001 From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com> Date: Wed, 20 May 2026 14:36:49 +0300 Subject: [PATCH 2/4] test(fixtures): add mocked LLM and NVD fixtures, fix broken kb access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - mock_llm_client: MagicMock with .call() / .acall() returning stub strings, for tests that exercise agents without real API keys - mock_nmap_result: realistic nmap output dict for recon-dependent tests - mock_nvd_response: NVD API 2.0 response shape for intel tests - session_with_recon: fix broken 'knowledge_base[...]' access — the actual PentestSession field is 'recon_data' (this fixture was silently broken) All fixtures are typed and documented for IDE autocomplete. Refs: STANDOFF.md day 2/30 --- tests/conftest.py | 134 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 125 insertions(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index f02f26b..812150f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,24 +1,140 @@ +""" +Shared pytest fixtures for CyberAI test suite. + +Note: The `fresh_session` fixture currently uses PentestSession. +This will change to ScanSession in day 3 of the STANDOFF plan, +when the two competing session types are unified. +""" +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock + import pytest + from cyberai.core.config import CyberAIConfig from cyberai.core.session import PentestSession + +# --------------------------------------------------------------------------- +# Config & sessions +# --------------------------------------------------------------------------- + @pytest.fixture(scope="session") -def base_config(): - """Shared config for all tests — no real API keys needed""" +def base_config() -> CyberAIConfig: + """Shared config for all tests — no real API keys needed.""" return CyberAIConfig() + @pytest.fixture -def fresh_session(): - """Fresh session for each test""" +def fresh_session() -> PentestSession: + """A clean session for each test that needs one.""" return PentestSession(target="testhost.local") + @pytest.fixture -def session_with_recon(fresh_session): - """Session pre-loaded with recon data""" - fresh_session.knowledge_base["recon.nmap"] = { +def session_with_recon(fresh_session: PentestSession) -> PentestSession: + """Session pre-loaded with synthetic recon data.""" + fresh_session.recon_data["nmap"] = { "ports": [ - {"port": 80, "service": "http", "state": "open"}, - {"port": 22, "service": "ssh", "state": "open"}, + {"port": 80, "service": "http", "state": "open"}, + {"port": 22, "service": "ssh", "state": "open"}, ] } return fresh_session + + +# --------------------------------------------------------------------------- +# Mocked external services +# --------------------------------------------------------------------------- + +@pytest.fixture +def mock_llm_client() -> MagicMock: + """ + A MagicMock that mimics the LLMClient interface. + + Returns a deterministic response for `call()` and `acall()`, + so tests don't need real API keys and don't hit the network. + + Usage: + def test_something(mock_llm_client): + mock_llm_client.call.return_value = "custom response" + agent = SomeAgent(llm=mock_llm_client, ...) + ... + """ + client = MagicMock() + client.call.return_value = "stub LLM response" + client.acall.return_value = "stub async LLM response" + client.model = "stub-model" + client.provider = "stub-provider" + return client + + +@pytest.fixture +def mock_nmap_result() -> dict[str, Any]: + """ + Realistic-ish nmap output structure for tests that need recon data + without actually running nmap. + """ + return { + "target": "testhost.local", + "ports": [ + { + "port": 22, + "protocol": "tcp", + "state": "open", + "service": "ssh", + "version": "OpenSSH 8.9p1 Ubuntu", + }, + { + "port": 80, + "protocol": "tcp", + "state": "open", + "service": "http", + "version": "Apache 2.4.52", + }, + { + "port": 443, + "protocol": "tcp", + "state": "open", + "service": "https", + "version": "Apache 2.4.52", + }, + ], + "scan_time": "12.3s", + } + + +@pytest.fixture +def mock_nvd_response() -> dict[str, Any]: + """Minimal NVD API 2.0 response shape for one CVE.""" + return { + "resultsPerPage": 1, + "startIndex": 0, + "totalResults": 1, + "vulnerabilities": [ + { + "cve": { + "id": "CVE-2024-9999", + "published": "2024-01-15T00:00:00.000", + "metrics": { + "cvssMetricV31": [ + { + "cvssData": { + "baseScore": 9.8, + "baseSeverity": "CRITICAL", + "vectorString": ( + "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/" + "S:U/C:H/I:H/A:H" + ), + } + } + ] + }, + "descriptions": [ + {"lang": "en", "value": "Synthetic test CVE for fixtures."} + ], + } + } + ], + } From 20cede7741d50ce27390738044d78b2ce558772a Mon Sep 17 00:00:00 2001 From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com> Date: Wed, 20 May 2026 14:39:11 +0300 Subject: [PATCH 3/4] ci: separate smoke tests from unit/integration runs Changes: - pytest.ini: add 'smoke' and 'network' markers, enable --strict-markers - ci.yml: add dedicated 'smoke' job with continue-on-error=true (smoke tests are xfail until day 7, so we don't want them blocking PRs yet) - ci.yml: exclude smoke tests from main integration run via '-m not smoke' Rationale: smoke tests reproduce known broken state; they should be visible in CI but not block merges. Once the API mismatch is fixed in day 7, we'll flip continue-on-error to false. Refs: STANDOFF.md day 2/30 --- .github/workflows/ci.yml | 26 +++++++++++++++++++++++--- pytest.ini | 6 ++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7f964b2..cecca53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,13 +43,13 @@ jobs: run: | pytest tests/unit/ -v --tb=short - - name: Run integration tests + - name: Run integration tests (excluding smoke) run: | - pytest tests/integration/ -v --tb=short + pytest tests/integration/ -v --tb=short -m "not smoke" - name: Generate coverage report run: | - pytest tests/ --cov=cyberai --cov-report=term-missing --cov-report=xml + pytest tests/ --cov=cyberai --cov-report=term-missing --cov-report=xml -m "not smoke" - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -57,6 +57,26 @@ jobs: file: ./coverage.xml fail_ci_if_error: false + smoke: + name: Smoke Tests (end-to-end) + runs-on: ubuntu-latest + continue-on-error: true # smoke tests are xfail until day 7; don't block PRs yet + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + pip install -e . + - name: Run smoke tests + run: | + pytest tests/ -v --tb=short -m smoke + lint: name: Lint runs-on: ubuntu-latest diff --git a/pytest.ini b/pytest.ini index 636004c..2797fb5 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,8 +3,10 @@ testpaths = tests python_files = test_*.py python_classes = Test* python_functions = test_* -addopts = -v --tb=short +addopts = -v --tb=short --strict-markers markers = unit: Unit tests (fast, no external calls) integration: Integration tests (may use mocks) - slow: Slow tests (real network calls) + smoke: End-to-end smoke tests for CLI and pipeline + slow: Slow tests (real network calls, NVD/etc.) + network: Tests that require live network access From f9d37c5fa48bdbd19e0444461d291ebe6e8fa244 Mon Sep 17 00:00:00 2001 From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com> Date: Wed, 20 May 2026 14:39:30 +0300 Subject: [PATCH 4/4] docs: document 8 known API issues in pre-W1 baseline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lists every broken API contract between CLI, Orchestrator, BaseAgent, agents, and tests, with: - concrete symptom (what error / what doesn't work) - which day of STANDOFF.md fixes it - progress tracker table for visual closure KI-8 (conftest.knowledge_base access) is fixed by this very PR — the fixture now uses .recon_data which is the actual PentestSession field. The other 7 issues will be checked off across days 3-7. This doc serves as a public 'before' snapshot — when day 7 is done, all rows turn green and the file gets archived. Refs: STANDOFF.md day 2/30 --- docs/architecture/known-issues.md | 99 +++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/architecture/known-issues.md diff --git a/docs/architecture/known-issues.md b/docs/architecture/known-issues.md new file mode 100644 index 0000000..1d2479a --- /dev/null +++ b/docs/architecture/known-issues.md @@ -0,0 +1,99 @@ +# Known Issues — Pre-W1 Baseline + +This document captures the broken state of CyberAI **as of the start of +the 30-day STANDOFF rewrite**. Each item is fixed by a specific day in +the plan; see `STANDOFF.md` for the schedule. + +When all items are checked off, days 1–7 (Reanimation week) are done +and `cyberai scan --dry-run` will work end-to-end. + +## How this was verified + +Smoke tests in `tests/integration/test_cli_smoke.py` reproduce the broken +state via `CliRunner().invoke(cli, ["scan", ..., "--dry-run"])`. They are +marked `@pytest.mark.xfail` until day 7, then un-xfailed to provide +regression protection. + +## The Issues + +### 🔴 KI-1 — CLI ↔ Orchestrator API mismatch +- **What's broken:** `__main__.py` calls `Orchestrator(config)` and + `orchestrator.run_pipeline(session)`. Neither matches the actual API: + `Orchestrator.__init__(phases, authorized_scope, dry_run)` does not + accept `config`, and the method is named `run(target)`. +- **Symptom:** `TypeError` on any `cyberai scan` invocation. +- **Fixed by:** Day 5 (`refactor/orchestrator-v2`) +- **Status:** ❌ broken + +### 🔴 KI-2 — Two competing session classes +- **What's broken:** `PentestSession` (in `core/session.py`) and + `ScanSession` (in `core/scan_session.py`) coexist with different + fields and methods. `__main__.py` uses `PentestSession`; `Orchestrator` + creates `ScanSession`. +- **Fixed by:** Day 3 (`refactor/unify-session`) +- **Status:** ❌ broken + +### 🔴 KI-3 — BaseAgent doesn't match what agents use +- **What's broken:** `BaseAgent.__init__(config, audit, session_id)` is + what's declared, but agents access `self.session`, `self.kb`, + `self.memory`, `self.llm` — none of which exist on `BaseAgent`. The + Orchestrator constructs agents as `ReconAgent(kb=session.kb)`, which + also doesn't match. +- **Fixed by:** Day 4 (`refactor/base-agent-contract`) +- **Status:** ❌ broken + +### 🔴 KI-4 — Agents call non-existent methods +- **What's broken:** Several agents call `self._check_iteration_limit()`, + `self._log(...)`, `self.llm.chat(...)` — none of these exist. +- **Fixed by:** Day 4 + Day 6 +- **Status:** ❌ broken + +### 🔴 KI-5 — `Finding` signature mismatch +- **What's broken:** `ReconAgent` builds `Finding(title=..., target=..., + evidence=[...])`, but the `Finding` dataclass has no `target` or + `evidence` fields. +- **Fixed by:** Day 3 +- **Status:** ❌ broken + +### 🔴 KI-6 — `Tool` param name mismatch +- **What's broken:** `Tool` dataclass field is `params`, but every + `_register_tools()` call uses `parameters=...`. +- **Fixed by:** Day 4 +- **Status:** ❌ broken + +### 🔴 KI-7 — `LLMClient.chat()` doesn't exist +- **What's broken:** `ExploitAgent` calls `self.llm.chat(messages=..., + system=...)`. The actual `LLMClient` method is `call()`. +- **Fixed by:** Day 6 +- **Status:** ❌ broken + +### 🔴 KI-8 — `conftest.fresh_session` accesses non-existent field +- **What's broken:** Original `conftest.py` did + `fresh_session.knowledge_base["recon.nmap"] = ...` but `PentestSession` + has no `knowledge_base` field — only `recon_data` / `intel_data` / + `exploit_data`. +- **Fixed by:** Day 2 (this PR) — temporarily redirected to `recon_data` +- **Status:** ✅ patched (full unification in day 3) + +## Reproduction + +```bash +# Will raise TypeError before any real work happens: +python -m cyberai scan 127.0.0.1 --dry-run + +# Smoke tests reproduce this state: +pytest tests/integration/test_cli_smoke.py -v +# Expected: 2 xfailed, 1 passed +``` + +## Progress tracker + +| Day | Issue(s) addressed | Status | +|-----|-------------------|--------| +| 1 | (rebrand only) | ✅ | +| 2 | KI-8 | ✅ | +| 3 | KI-2, KI-5 | ⏳ | +| 4 | KI-3, KI-4, KI-6 | ⏳ | +| 5 | KI-1 | ⏳ | +| 6 | KI-7, KI-4 | ⏳ | +| 7 | All checked | ⏳ |