From 858e455455dcf4de6ca120de380ffac77769b938 Mon Sep 17 00:00:00 2001
From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com>
Date: Wed, 20 May 2026 14:32:52 +0300
Subject: [PATCH 1/4] test(e2e): add failing smoke test for 'cyberai scan'
 command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two xfail tests reproduce the current broken state:
- test_cli_scan_dry_run_exits_cleanly: CLI calls Orchestrator(config) and
  orchestrator.run_pipeline(session) — neither matches the actual API
- test_cli_scan_dry_run_produces_output: same root cause

One always-passing sanity check:
- test_cli_help_works: ensures the CLI module at least imports cleanly

xfail(strict=False) — when day 7 fixes the API mismatch, these tests
will XPASS without failing CI. When we un-xfail them in day 7, they
will provide actual regression protection.

Refs: STANDOFF.md day 2/30
---
 tests/integration/test_cli_smoke.py | 67 +++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 tests/integration/test_cli_smoke.py
diff --git a/tests/integration/test_cli_smoke.py b/tests/integration/test_cli_smoke.py
new file mode 100644
index 0000000..80a682a
--- /dev/null
+++ b/tests/integration/test_cli_smoke.py
@@ -0,0 +1,67 @@
+"""
+End-to-end smoke tests for the cyberai CLI.
+
+These tests verify that the entire pipeline runs without crashing,
+even in dry-run mode where no real network calls are made.
+
+Currently most are marked xfail because of known API mismatches between
+__main__.py, Orchestrator, and the agents — see docs/architecture/known-issues.md.
+They will be un-xfailed in day 7 of the STANDOFF plan.
+"""
+from __future__ import annotations
+
+import pytest
+from click.testing import CliRunner
+
+from cyberai.__main__ import cli
+
+
+pytestmark = pytest.mark.smoke
+
+
+@pytest.mark.xfail(
+    reason="Orchestrator/CLI API mismatch — see known-issues.md (fixed in W1)",
+    strict=False,
+)
+def test_cli_scan_dry_run_exits_cleanly():
+    """
+    `cyberai scan <target> --dry-run` should complete with exit code 0
+    without making any real network calls.
+
+    Currently fails because __main__.py calls Orchestrator(config) but
+    Orchestrator.__init__ does not accept `config` as positional arg,
+    and calls orchestrator.run_pipeline(session) which does not exist
+    (the method is named `run(target)`).
+    """
+    runner = CliRunner()
+    result = runner.invoke(cli, ["scan", "127.0.0.1", "--dry-run"])
+
+    assert result.exit_code == 0, (
+        f"CLI exited with code {result.exit_code}\n"
+        f"Output:\n{result.output}\n"
+        f"Exception:\n{result.exception!r}"
+    )
+
+
+@pytest.mark.xfail(
+    reason="Same root cause — Orchestrator API mismatch",
+    strict=False,
+)
+def test_cli_scan_dry_run_produces_output():
+    """The scan should produce some textual output, even in dry-run mode."""
+    runner = CliRunner()
+    result = runner.invoke(cli, ["scan", "example.com", "--dry-run"])
+
+    assert result.output, "CLI produced no output at all"
+
+
+def test_cli_help_works():
+    """
+    Sanity check: `cyberai --help` must always work.
+    If this breaks, something is very wrong with imports.
+    """
+    runner = CliRunner()
+    result = runner.invoke(cli, ["--help"])
+
+    assert result.exit_code == 0
+    assert "scan" in result.output.lower()

From b40c07bf6513c9888d1945ae826fa11102c95d9b Mon Sep 17 00:00:00 2001
From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com>
Date: Wed, 20 May 2026 14:36:49 +0300
Subject: [PATCH 2/4] test(fixtures): add mocked LLM and NVD fixtures, fix
 broken kb access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- mock_llm_client: MagicMock with .call() / .acall() returning stub strings,
  for tests that exercise agents without real API keys
- mock_nmap_result: realistic nmap output dict for recon-dependent tests
- mock_nvd_response: NVD API 2.0 response shape for intel tests
- session_with_recon: fix broken 'knowledge_base[...]' access — the actual
  PentestSession field is 'recon_data' (this fixture was silently broken)

All fixtures are typed and documented for IDE autocomplete.

Refs: STANDOFF.md day 2/30
---
 tests/conftest.py | 134 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 125 insertions(+), 9 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index f02f26b..812150f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,24 +1,140 @@
+"""
+Shared pytest fixtures for CyberAI test suite.
+
+Note: The `fresh_session` fixture currently uses PentestSession.
+This will change to ScanSession in day 3 of the STANDOFF plan,
+when the two competing session types are unified.
+"""
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import MagicMock
+
 import pytest
+
 from cyberai.core.config import CyberAIConfig
 from cyberai.core.session import PentestSession
 
+
+# ---------------------------------------------------------------------------
+# Config & sessions
+# ---------------------------------------------------------------------------
+
 @pytest.fixture(scope="session")
-def base_config():
-    """Shared config for all tests — no real API keys needed"""
+def base_config() -> CyberAIConfig:
+    """Shared config for all tests — no real API keys needed."""
     return CyberAIConfig()
 
+
 @pytest.fixture
-def fresh_session():
-    """Fresh session for each test"""
+def fresh_session() -> PentestSession:
+    """A clean session for each test that needs one."""
     return PentestSession(target="testhost.local")
 
+
 @pytest.fixture
-def session_with_recon(fresh_session):
-    """Session pre-loaded with recon data"""
-    fresh_session.knowledge_base["recon.nmap"] = {
+def session_with_recon(fresh_session: PentestSession) -> PentestSession:
+    """Session pre-loaded with synthetic recon data."""
+    fresh_session.recon_data["nmap"] = {
         "ports": [
-            {"port": 80,  "service": "http", "state": "open"},
-            {"port": 22,  "service": "ssh",  "state": "open"},
+            {"port": 80, "service": "http", "state": "open"},
+            {"port": 22, "service": "ssh", "state": "open"},
         ]
     }
     return fresh_session
+
+
+# ---------------------------------------------------------------------------
+# Mocked external services
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def mock_llm_client() -> MagicMock:
+    """
+    A MagicMock that mimics the LLMClient interface.
+
+    Returns a deterministic response for `call()` and `acall()`,
+    so tests don't need real API keys and don't hit the network.
+
+    Usage:
+        def test_something(mock_llm_client):
+            mock_llm_client.call.return_value = "custom response"
+            agent = SomeAgent(llm=mock_llm_client, ...)
+            ...
+    """
+    client = MagicMock()
+    client.call.return_value = "stub LLM response"
+    client.acall.return_value = "stub async LLM response"
+    client.model = "stub-model"
+    client.provider = "stub-provider"
+    return client
+
+
+@pytest.fixture
+def mock_nmap_result() -> dict[str, Any]:
+    """
+    Realistic-ish nmap output structure for tests that need recon data
+    without actually running nmap.
+    """
+    return {
+        "target": "testhost.local",
+        "ports": [
+            {
+                "port": 22,
+                "protocol": "tcp",
+                "state": "open",
+                "service": "ssh",
+                "version": "OpenSSH 8.9p1 Ubuntu",
+            },
+            {
+                "port": 80,
+                "protocol": "tcp",
+                "state": "open",
+                "service": "http",
+                "version": "Apache 2.4.52",
+            },
+            {
+                "port": 443,
+                "protocol": "tcp",
+                "state": "open",
+                "service": "https",
+                "version": "Apache 2.4.52",
+            },
+        ],
+        "scan_time": "12.3s",
+    }
+
+
+@pytest.fixture
+def mock_nvd_response() -> dict[str, Any]:
+    """Minimal NVD API 2.0 response shape for one CVE."""
+    return {
+        "resultsPerPage": 1,
+        "startIndex": 0,
+        "totalResults": 1,
+        "vulnerabilities": [
+            {
+                "cve": {
+                    "id": "CVE-2024-9999",
+                    "published": "2024-01-15T00:00:00.000",
+                    "metrics": {
+                        "cvssMetricV31": [
+                            {
+                                "cvssData": {
+                                    "baseScore": 9.8,
+                                    "baseSeverity": "CRITICAL",
+                                    "vectorString": (
+                                        "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/"
+                                        "S:U/C:H/I:H/A:H"
+                                    ),
+                                }
+                            }
+                        ]
+                    },
+                    "descriptions": [
+                        {"lang": "en", "value": "Synthetic test CVE for fixtures."}
+                    ],
+                }
+            }
+        ],
+    }

From 20cede7741d50ce27390738044d78b2ce558772a Mon Sep 17 00:00:00 2001
From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com>
Date: Wed, 20 May 2026 14:39:11 +0300
Subject: [PATCH 3/4] ci: separate smoke tests from unit/integration runs

Changes:
- pytest.ini: add 'smoke' and 'network' markers, enable --strict-markers
- ci.yml: add dedicated 'smoke' job with continue-on-error=true (smoke
  tests are xfail until day 7, so we don't want them blocking PRs yet)
- ci.yml: exclude smoke tests from main integration run via '-m not smoke'

Rationale: smoke tests reproduce known broken state; they should be
visible in CI but not block merges. Once the API mismatch is fixed
in day 7, we'll flip continue-on-error to false.

Refs: STANDOFF.md day 2/30
---
 .github/workflows/ci.yml | 26 +++++++++++++++++++++++---
 pytest.ini               |  6 ++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7f964b2..cecca53 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -43,13 +43,13 @@ jobs:
         run: |
           pytest tests/unit/ -v --tb=short
 
-      - name: Run integration tests
+      - name: Run integration tests (excluding smoke)
         run: |
-          pytest tests/integration/ -v --tb=short
+          pytest tests/integration/ -v --tb=short -m "not smoke"
 
       - name: Generate coverage report
         run: |
-          pytest tests/ --cov=cyberai --cov-report=term-missing --cov-report=xml
+          pytest tests/ --cov=cyberai --cov-report=term-missing --cov-report=xml -m "not smoke"
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
@@ -57,6 +57,26 @@ jobs:
           file: ./coverage.xml
           fail_ci_if_error: false
 
+  smoke:
+    name: Smoke Tests (end-to-end)
+    runs-on: ubuntu-latest
+    continue-on-error: true  # smoke tests are xfail until day 7; don't block PRs yet
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest
+          pip install -e .
+      - name: Run smoke tests
+        run: |
+          pytest tests/ -v --tb=short -m smoke
+
   lint:
     name: Lint
     runs-on: ubuntu-latest
diff --git a/pytest.ini b/pytest.ini
index 636004c..2797fb5 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,8 +3,10 @@ testpaths = tests
 python_files = test_*.py
 python_classes = Test*
 python_functions = test_*
-addopts = -v --tb=short
+addopts = -v --tb=short --strict-markers
 markers =
     unit: Unit tests (fast, no external calls)
     integration: Integration tests (may use mocks)
-    slow: Slow tests (real network calls)
+    smoke: End-to-end smoke tests for CLI and pipeline
+    slow: Slow tests (real network calls, NVD/etc.)
+    network: Tests that require live network access

From f9d37c5fa48bdbd19e0444461d291ebe6e8fa244 Mon Sep 17 00:00:00 2001
From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com>
Date: Wed, 20 May 2026 14:39:30 +0300
Subject: [PATCH 4/4] docs: document 8 known API issues in pre-W1 baseline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lists every broken API contract between CLI, Orchestrator, BaseAgent,
agents, and tests, with:
- concrete symptom (what error / what doesn't work)
- which day of STANDOFF.md fixes it
- progress tracker table for visual closure

KI-8 (conftest.knowledge_base access) is fixed by this very PR — the
fixture now uses .recon_data which is the actual PentestSession field.
The other 7 issues will be checked off across days 3-7.

This doc serves as a public 'before' snapshot — when day 7 is done,
all rows turn green and the file gets archived.

Refs: STANDOFF.md day 2/30
---
 docs/architecture/known-issues.md | 99 +++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 docs/architecture/known-issues.md

diff --git a/docs/architecture/known-issues.md b/docs/architecture/known-issues.md
new file mode 100644
index 0000000..1d2479a
--- /dev/null
+++ b/docs/architecture/known-issues.md
@@ -0,0 +1,99 @@
+# Known Issues — Pre-W1 Baseline
+
+This document captures the broken state of CyberAI **as of the start of
+the 30-day STANDOFF rewrite**. Each item is fixed by a specific day in
+the plan; see `STANDOFF.md` for the schedule.
+
+When all items are checked off, days 1–7 (Reanimation week) are done
+and `cyberai scan <target> --dry-run` will work end-to-end.
+
+## How this was verified
+
+Smoke tests in `tests/integration/test_cli_smoke.py` reproduce the broken
+state via `CliRunner().invoke(cli, ["scan", ..., "--dry-run"])`. They are
+marked `@pytest.mark.xfail` until day 7, then un-xfailed to provide
+regression protection.
+
+## The Issues
+
+### 🔴 KI-1 — CLI ↔ Orchestrator API mismatch
+- **What's broken:** `__main__.py` calls `Orchestrator(config)` and
+  `orchestrator.run_pipeline(session)`. Neither matches the actual API:
+  `Orchestrator.__init__(phases, authorized_scope, dry_run)` does not
+  accept `config`, and the method is named `run(target)`.
+- **Symptom:** `TypeError` on any `cyberai scan` invocation.
+- **Fixed by:** Day 5 (`refactor/orchestrator-v2`)
+- **Status:** ❌ broken
+
+### 🔴 KI-2 — Two competing session classes
+- **What's broken:** `PentestSession` (in `core/session.py`) and
+  `ScanSession` (in `core/scan_session.py`) coexist with different
+  fields and methods. `__main__.py` uses `PentestSession`; `Orchestrator`
+  creates `ScanSession`.
+- **Fixed by:** Day 3 (`refactor/unify-session`)
+- **Status:** ❌ broken
+
+### 🔴 KI-3 — BaseAgent doesn't match what agents use
+- **What's broken:** `BaseAgent.__init__(config, audit, session_id)` is
+  what's declared, but agents access `self.session`, `self.kb`,
+  `self.memory`, `self.llm` — none of which exist on `BaseAgent`. The
+  Orchestrator constructs agents as `ReconAgent(kb=session.kb)`, which
+  also doesn't match.
+- **Fixed by:** Day 4 (`refactor/base-agent-contract`)
+- **Status:** ❌ broken
+
+### 🔴 KI-4 — Agents call non-existent methods
+- **What's broken:** Several agents call `self._check_iteration_limit()`,
+  `self._log(...)`, `self.llm.chat(...)` — none of these exist.
+- **Fixed by:** Day 4 + Day 6
+- **Status:** ❌ broken
+
+### 🔴 KI-5 — `Finding` signature mismatch
+- **What's broken:** `ReconAgent` builds `Finding(title=..., target=...,
+  evidence=[...])`, but the `Finding` dataclass has no `target` or
+  `evidence` fields.
+- **Fixed by:** Day 3
+- **Status:** ❌ broken
+
+### 🔴 KI-6 — `Tool` param name mismatch
+- **What's broken:** `Tool` dataclass field is `params`, but every
+  `_register_tools()` call uses `parameters=...`.
+- **Fixed by:** Day 4
+- **Status:** ❌ broken
+
+### 🔴 KI-7 — `LLMClient.chat()` doesn't exist
+- **What's broken:** `ExploitAgent` calls `self.llm.chat(messages=...,
+  system=...)`. The actual `LLMClient` method is `call()`.
+- **Fixed by:** Day 6
+- **Status:** ❌ broken
+
+### 🔴 KI-8 — `conftest.fresh_session` accesses non-existent field
+- **What's broken:** Original `conftest.py` did
+  `fresh_session.knowledge_base["recon.nmap"] = ...` but `PentestSession`
+  has no `knowledge_base` field — only `recon_data` / `intel_data` /
+  `exploit_data`.
+- **Fixed by:** Day 2 (this PR) — temporarily redirected to `recon_data`
+- **Status:** ✅ patched (full unification in day 3)
+
+## Reproduction
+
+```bash
+# Will raise TypeError before any real work happens:
+python -m cyberai scan 127.0.0.1 --dry-run
+
+# Smoke tests reproduce this state:
+pytest tests/integration/test_cli_smoke.py -v
+# Expected: 2 xfailed, 1 passed
+```
+
+## Progress tracker
+
+| Day | Issue(s) addressed | Status |
+|-----|-------------------|--------|
+| 1   | (rebrand only)    | ✅     |
+| 2   | KI-8              | ✅     |
+| 3   | KI-2, KI-5        | ⏳     |
+| 4   | KI-3, KI-4, KI-6  | ⏳     |
+| 5   | KI-1              | ⏳     |
+| 6   | KI-7, KI-4        | ⏳     |
+| 7   | All checked       | ⏳     |