diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cecca53..96f9c94 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,6 @@ jobs: smoke: name: Smoke Tests (end-to-end) runs-on: ubuntu-latest - continue-on-error: true # smoke tests are xfail until day 7; don't block PRs yet steps: - uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index 6f1aaec..5f52450 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,6 @@ config.yml *.json.bak .pytest_cache/ htmlcov/ + +# audit logs from scan runs +reports/*.jsonl diff --git a/CHANGELOG.md b/CHANGELOG.md index 24d9d38..9380a03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,54 +1,23 @@ # Changelog -## [1.0.0] — 2026-05-17 +All notable changes to CyberAI are documented here. -### Added - -**Core** -- AsyncPipeline — parallel recon, sequential intel/exploit/report -- AsyncBaseAgent — run_tool() + run_tools_parallel() via asyncio -- PipelineRecovery — HARD STOP recon, SOFT FAIL intel/exploit/report -- SessionSigning — HMAC-SHA256 tamper-evident audit trail -- AgentTimeoutManager — per-agent configurable timeouts -- Safety decorators — @sanitize_input, @require_scope, @enforce_trust_boundary - -**Agents** -- AsyncReconAgent — parallel nmap + DNS + TLS via asyncio.gather() -- AsyncIntelAgent — CVE enrichment via NVD API 2.0 -- AsyncExploitAgent — SSRF + blind XXE + attack chain builder -- ReportAgent — Markdown, HTML, JSON output +## [0.2.0] - 2026-05-25 -**Integrations** -- phantom-grid poller — OOB DNS/HTTP callback confirmation -- reality-probe client — TLS score, cert expiry, weak cipher detection -- TLS CVE mapper — issue → CVE context for IntelAgent +### Reanimation — Week 1 complete -**Safety** -- InputSanitizer — prompt injection detection + length limiting -- AgentTrustBoundary — per-agent KB write permissions -- ScopeValidator — CIDR + domain scope enforcement +Skeleton-to-working pipeline. CyberAI runs end-to-end: `cyberai scan + --dry-run` walks all 4 phases and completes cleanly. -**CLI** -- cyberai scan — --scope, --dry-run, --output, --verbose -- Rich progress bars and spinners -- Dry-run plan table - -**Web API** -- Flask REST API — POST /api/session, GET /api/session/ -- Report serving — GET /api/report/ -- HTML dashboard — dark theme, auto-refresh 5s - -**Hardening** -- Exponential backoff with jitter for NVD API rate limiting -- Graceful nmap timeout — partial results, pipeline continues -- Type aliases centralised in cyberai/core/types.py +### Added +- Unified `ScanSession` state object shared across all components. +- `BaseAgent` contract — consistent agent lifecycle and API. +- End-to-end smoke tests for the `scan` CLI covering all 4 phases. -**Tests** -- 160+ tests across unit and integration suites -- Python 3.11 + 3.12 matrix CI -- ruff lint on every push +### Changed +- Orchestrator rewritten against the new agent contract. +- All 4 agents (recon, intel, exploit, report) migrated to `BaseAgent`. +- `--dry-run` walks the full pipeline with no network calls or API key. -### Stats -- 128 commits -- 30 days -- CI green throughout +### Fixed +- All 8 known issues resolved (KI-1 through KI-8). diff --git a/README.md b/README.md index a11f09d..8f2e2d3 100644 --- a/README.md +++ b/README.md @@ -108,23 +108,31 @@ CyberAI/ ## Quick start **1. Clone and install** + ```bash git clone https://github.com/evkir/CyberAI.git cd CyberAI -python -m venv venv && source venv/bin/activate -pip install -r requirements.txt +pip install -e . ``` +> Prefer isolation? Run `python -m venv venv && source venv/bin/activate` first. + **2. Configure** + ```bash cp config.example.yml config.yml cp .env.example .env -# Edit .env — add your OPENAI_API_KEY or ANTHROPIC_API_KEY +# Edit .env -- add your OPENAI_API_KEY or ANTHROPIC_API_KEY ``` -**3. Run** +**3. Run a scan** + ```bash -python -m cyberai --help +# Dry-run: walks all 4 phases, no network calls, no API key needed +python -m cyberai scan example.com --dry-run + +# Real scan +python -m cyberai scan target.htb ``` --- diff --git a/cyberai/version.py b/cyberai/version.py index 2fd38bb..41e151b 100644 --- a/cyberai/version.py +++ b/cyberai/version.py @@ -1,3 +1,3 @@ -__version__ = "1.0.0" +__version__ = "0.2.0" __author__ = "evkir" __description__ = "CyberAI — AI-native multi-agent pentest platform" diff --git a/tests/integration/test_cli_smoke.py b/tests/integration/test_cli_smoke.py index 80a682a..64b3fbb 100644 --- a/tests/integration/test_cli_smoke.py +++ b/tests/integration/test_cli_smoke.py @@ -4,9 +4,9 @@ These tests verify that the entire pipeline runs without crashing, even in dry-run mode where no real network calls are made. -Currently most are marked xfail because of known API mismatches between -__main__.py, Orchestrator, and the agents — see docs/architecture/known-issues.md. -They will be un-xfailed in day 7 of the STANDOFF plan. +These tests pass end-to-end as of week 1 of the STANDOFF plan: +the CLI, Orchestrator, and agents share a consistent API. +See docs/architecture/known-issues.md for the issues that were resolved. """ from __future__ import annotations @@ -19,10 +19,6 @@ pytestmark = pytest.mark.smoke -@pytest.mark.xfail( - reason="Orchestrator/CLI API mismatch — see known-issues.md (fixed in W1)", - strict=False, -) def test_cli_scan_dry_run_exits_cleanly(): """ `cyberai scan --dry-run` should complete with exit code 0 @@ -43,10 +39,6 @@ def test_cli_scan_dry_run_exits_cleanly(): ) -@pytest.mark.xfail( - reason="Same root cause — Orchestrator API mismatch", - strict=False, -) def test_cli_scan_dry_run_produces_output(): """The scan should produce some textual output, even in dry-run mode.""" runner = CliRunner() @@ -65,3 +57,14 @@ def test_cli_help_works(): assert result.exit_code == 0 assert "scan" in result.output.lower() + + +def test_cli_scan_dry_run_completes_all_phases(): + """Dry-run must reach all 4 phases and finish in `completed` state.""" + runner = CliRunner() + result = runner.invoke(cli, ["scan", "example.com", "--dry-run"]) + assert result.exit_code == 0 + out = result.output.lower() + assert "completed" in out + for phase in ("recon", "intel", "exploit", "report"): + assert phase in out, f"phase {phase} missing from dry-run output"