From 68de91b7f82aa73a9f0780c4c718389e8a8a395c Mon Sep 17 00:00:00 2001 From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com> Date: Fri, 22 May 2026 09:04:05 +0300 Subject: [PATCH 1/4] refactor(core): redesign BaseAgent with explicit deps (KI-3, KI-4, KI-6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BaseAgent.__init__ now takes (config, session, llm=None, audit=None) and exposes everything agents actually reference: self.config, self.session, self.kb, self.llm, self.audit, self.memory New on the base class: - self.kb — KnowledgeBase, taken from session or created fresh - self.memory — AgentMemory for multi-turn LLM context (ExploitAgent) - _check_iteration_limit() — raises AgentIterationLimitError past config.max_agent_iterations; agents call it before major steps (KI-4) - _log(msg, data) — alias for log(); several agents call _log (KI-4) Tool dataclass: accepts both 'params' and 'parameters' kwargs, kept in sync via __post_init__. Every agent registers tools with parameters=... so this closes KI-6 without touching a single agent file. New helpers: - AgentMemory — add()/to_messages()/system/clear() - AgentIterationLimitError — raised on runaway loops Agents themselves are migrated to this contract in day 6; this commit only widens/fixes the base class. Refs: STANDOFF.md day 4/30 --- cyberai/core/base_agent.py | 179 +++++++++++++++++++++++++++++++------ 1 file changed, 154 insertions(+), 25 deletions(-) diff --git a/cyberai/core/base_agent.py b/cyberai/core/base_agent.py index db3184c..97746fb 100644 --- a/cyberai/core/base_agent.py +++ b/cyberai/core/base_agent.py @@ -1,61 +1,190 @@ +""" +BaseAgent — abstract base for all CyberAI agents. + +Day 4 of STANDOFF rewrite: this redesign closes KI-3, KI-4, KI-6. + +Every agent now receives explicit dependencies (config, session, llm, +audit) and exposes the attributes agents actually use: + self.config, self.session, self.kb, self.llm, self.audit, self.memory +plus helper methods _check_iteration_limit() and _log(). +""" +from __future__ import annotations + from abc import ABC, abstractmethod -from typing import Any, Dict, Callable from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional + +from rich.console import Console + from .config import CyberAIConfig +from .knowledge_base import KnowledgeBase from .logger import AuditLogger -from rich.console import Console + +if TYPE_CHECKING: + from .llm_client import LLMClient + from .scan_session import ScanSession console = Console() + +# ── Tool ────────────────────────────────────────────────────────────── + + @dataclass class Tool: - name: str + """ + A callable capability an agent can invoke. + + `params` is the canonical field. `parameters` is accepted as an + alias for backward compatibility — all existing agents register + tools with `parameters=...` (KI-6). Pass either; they are kept + in sync. + """ + name: str description: str - func: Callable - params: Dict[str, str] = field(default_factory=dict) + func: Callable + params: Dict[str, str] = field(default_factory=dict) + parameters: Optional[Dict[str, str]] = None + + def __post_init__(self) -> None: + # KI-6: agents pass parameters=...; mirror it into params. + if self.parameters is not None and not self.params: + self.params = self.parameters + # Keep parameters readable as an alias too. + self.parameters = self.params + + +# ── AgentMemory ─────────────────────────────────────────────────────── + + +class AgentMemory: + """ + Minimal multi-turn conversation memory for agents that talk to an LLM + across several steps (ExploitAgent in particular — KI-4). + """ + + def __init__(self) -> None: + self._messages: List[Dict[str, str]] = [] + self._system: Optional[str] = None + + def add(self, role: str, content: str) -> None: + """Add a message. role='system' is stored separately.""" + if role == "system": + self._system = content + else: + self._messages.append({"role": role, "content": content}) + + def to_messages(self) -> List[Dict[str, str]]: + """Return the message list (excluding system) for an LLM call.""" + return list(self._messages) + + @property + def system(self) -> Optional[str]: + return self._system + + def clear(self) -> None: + self._messages.clear() + self._system = None + + +# ── AgentIterationLimitError ────────────────────────────────────────── + + +class AgentIterationLimitError(RuntimeError): + """Raised when an agent exceeds config.max_agent_iterations.""" + + +# ── BaseAgent ───────────────────────────────────────────────────────── + class BaseAgent(ABC): """ Abstract base class for all CyberAI agents. - Each agent: has a role, a tool registry, and access to shared KB. + + Agents are constructed with explicit dependencies so they are easy + to test (everything is injectable / mockable): + + agent = ReconAgent(config, session, llm, audit) + result = agent.run(target) """ + AGENT_NAME: str = "base" ROLE: str = "Generic Agent" def __init__( self, - config: CyberAIConfig, - audit: AuditLogger, - session_id: str = "unknown" - ): - self.config = config - self.audit = audit - self.session_id = session_id + config: CyberAIConfig, + session: "ScanSession", + llm: Optional["LLMClient"] = None, + audit: Optional[AuditLogger] = None, + ) -> None: + self.config = config + self.session = session + self.llm = llm + # KB is taken from the session if present, else a fresh one. + self.kb: KnowledgeBase = getattr(session, "kb", None) or KnowledgeBase() + if not isinstance(self.kb, KnowledgeBase): + # legacy ScanSession.kb may be a plain dict — wrap it + self.kb = KnowledgeBase() + self.audit = audit or AuditLogger( + session_id=getattr(session, "session_id", "unknown") + ) + self.memory = AgentMemory() + self.tools: Dict[str, Tool] = {} + self._iterations: int = 0 + self._register_tools() - def register_tool(self, tool: Tool): + # ── tool registry ───────────────────────────────────────────────── + + def register_tool(self, tool: Tool) -> None: self.tools[tool.name] = tool @abstractmethod - def _register_tools(self): - """Register agent-specific tools""" - pass + def _register_tools(self) -> None: + """Register agent-specific tools.""" @abstractmethod - def run(self, target: str, context: Dict[str, Any] = None) -> Dict[str, Any]: - """Main agent execution — returns findings dict""" - pass + def run(self, target: str, context: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """Main agent execution — returns a result dict.""" - def call_tool(self, tool_name: str, **kwargs) -> Any: + def call_tool(self, tool_name: str, **kwargs: Any) -> Any: if tool_name not in self.tools: - raise ValueError(f"Tool '{tool_name}' not registered in {self.AGENT_NAME}") + raise ValueError( + f"Tool '{tool_name}' not registered in {self.AGENT_NAME}" + ) tool = self.tools[tool_name] self.audit.agent_action(self.AGENT_NAME, f"calling tool: {tool_name}", kwargs) console.print(f"[dim cyan][{self.AGENT_NAME}] → {tool_name}[/dim cyan]") - result = tool.func(**kwargs) - return result + return tool.func(**kwargs) + + # ── iteration safety ────────────────────────────────────────────── + + def _check_iteration_limit(self) -> None: + """ + Increment the step counter and raise if the agent has exceeded + config.max_agent_iterations. Called by agents before each major + step to prevent runaway loops (KI-4). + """ + self._iterations += 1 + limit = getattr(self.config, "max_agent_iterations", 10) + if self._iterations > limit: + raise AgentIterationLimitError( + f"{self.AGENT_NAME} exceeded {limit} iterations" + ) - def log(self, msg: str, data: Any = None): + # ── logging ─────────────────────────────────────────────────────── + + def log(self, msg: str, data: Any = None) -> None: + """Structured log + console echo.""" self.audit.agent_action(self.AGENT_NAME, msg, data) console.print(f"[cyan][{self.AGENT_NAME}][/cyan] {msg}") + + def _log(self, msg: str, data: Any = None) -> None: + """ + Alias for log(). Several agents call self._log(...) (KI-4). + Some legacy call sites pass (event, data) — both forms work + since the first arg is just the message string. + """ + self.log(msg, data) From f78e2eb5d9efc0fc5c7b386b0f3c035fbdedaeb6 Mon Sep 17 00:00:00 2001 From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com> Date: Fri, 22 May 2026 13:02:33 +0300 Subject: [PATCH 2/4] refactor(core): make KnowledgeBase agent-optional, add dict access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes needed before agents migrate to the new BaseAgent (day 6): - set() agent param now defaults to 'unknown' — agents like ExploitAgent call self.kb.set('exploit', result) without naming themselves - get() gained a default param: kb.get(key, default) - mutable default tags=[] bug fixed (was shared across all entries) - datetime.utcnow() replaced with timezone-aware datetime.now(timezone.utc) Added dict-like access (__contains__, __getitem__, __setitem__, __len__) since some agents treat the KB like a dict (kb['recon.nmap']). Refs: STANDOFF.md day 4/30 --- cyberai/core/knowledge_base.py | 63 +++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/cyberai/core/knowledge_base.py b/cyberai/core/knowledge_base.py index 69a56e1..26ba472 100644 --- a/cyberai/core/knowledge_base.py +++ b/cyberai/core/knowledge_base.py @@ -1,32 +1,54 @@ -from typing import Any, Dict, List, Optional +""" +KnowledgeBase — shared memory store for all agents in a session. + +Day 4 of STANDOFF: `agent` is now optional (defaults to "unknown") so +agents can write quick entries without always naming themselves; the +mutable default `tags=[]` bug is fixed; datetime is timezone-aware. +""" +from __future__ import annotations + from dataclasses import dataclass, field -from datetime import datetime +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + + +def _now() -> str: + return datetime.now(timezone.utc).isoformat() + @dataclass class KBEntry: - key: str - value: Any - agent: str - timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat()) - tags: List[str] = field(default_factory=list) + key: str + value: Any + agent: str = "unknown" + timestamp: str = field(default_factory=_now) + tags: List[str] = field(default_factory=list) + class KnowledgeBase: """ Shared memory store for all agents in a session. Agents read/write through trust-validated keys. """ - def __init__(self): + + def __init__(self) -> None: self._store: Dict[str, KBEntry] = {} self._history: List[KBEntry] = [] - def set(self, key: str, value: Any, agent: str, tags: List[str] = []): - entry = KBEntry(key=key, value=value, agent=agent, tags=tags) + def set( + self, + key: str, + value: Any, + agent: str = "unknown", + tags: Optional[List[str]] = None, + ) -> None: + entry = KBEntry(key=key, value=value, agent=agent, tags=tags or []) self._store[key] = entry self._history.append(entry) - def get(self, key: str) -> Optional[Any]: + def get(self, key: str, default: Any = None) -> Optional[Any]: entry = self._store.get(key) - return entry.value if entry else None + return entry.value if entry else default def get_by_tag(self, tag: str) -> Dict[str, Any]: return { @@ -45,3 +67,20 @@ def history(self) -> List[Dict]: {"key": e.key, "agent": e.agent, "timestamp": e.timestamp} for e in self._history ] + + # ── dict-like access ────────────────────────────────────────────── + # Some agents treat the KB like a dict (kb["recon.nmap"]). + + def __contains__(self, key: str) -> bool: + return key in self._store + + def __getitem__(self, key: str) -> Any: + if key not in self._store: + raise KeyError(key) + return self._store[key].value + + def __setitem__(self, key: str, value: Any) -> None: + self.set(key, value) + + def __len__(self) -> int: + return len(self._store) From 2eb3a05e54622012a3911aadcb169dbca3c69052 Mon Sep 17 00:00:00 2001 From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com> Date: Fri, 22 May 2026 13:03:39 +0300 Subject: [PATCH 3/4] test(core): cover redesigned BaseAgent, Tool, AgentMemory (12 tests) --- tests/unit/test_base_agent.py | 130 ++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 tests/unit/test_base_agent.py diff --git a/tests/unit/test_base_agent.py b/tests/unit/test_base_agent.py new file mode 100644 index 0000000..25fecf6 --- /dev/null +++ b/tests/unit/test_base_agent.py @@ -0,0 +1,130 @@ +"""Tests for the redesigned BaseAgent contract — day 4 of STANDOFF.""" +from __future__ import annotations + +import pytest + +from cyberai.core.base_agent import ( + AgentIterationLimitError, + AgentMemory, + BaseAgent, + Tool, +) +from cyberai.core.config import CyberAIConfig +from cyberai.core.scan_session import ScanSession + + +# ── a minimal concrete agent for testing ────────────────────────────── + + +class DummyAgent(BaseAgent): + AGENT_NAME = "dummy" + ROLE = "Test Agent" + + def _register_tools(self) -> None: + self.register_tool(Tool( + name="echo", + description="returns its input", + func=lambda value: value, + parameters={"value": "str"}, + )) + + def run(self, target, context=None): + return {"target": target, "ok": True} + + +@pytest.fixture +def dummy_agent(): + config = CyberAIConfig() + session = ScanSession(target="testhost.local") + return DummyAgent(config, session) + + +# ── Tool ────────────────────────────────────────────────────────────── + + +def test_tool_parameters_alias_synced(): + t = Tool(name="x", description="d", func=lambda: 1, parameters={"a": "str"}) + assert t.params == {"a": "str"} + assert t.parameters == {"a": "str"} + + +def test_tool_params_directly(): + t = Tool(name="x", description="d", func=lambda: 1, params={"b": "int"}) + assert t.parameters == {"b": "int"} + + +# ── AgentMemory ─────────────────────────────────────────────────────── + + +def test_memory_stores_messages(): + m = AgentMemory() + m.add("user", "hello") + m.add("assistant", "hi") + assert m.to_messages() == [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + + +def test_memory_system_kept_separate(): + m = AgentMemory() + m.add("system", "you are a pentester") + m.add("user", "scan this") + assert m.system == "you are a pentester" + assert m.to_messages() == [{"role": "user", "content": "scan this"}] + + +def test_memory_clear(): + m = AgentMemory() + m.add("user", "x") + m.add("system", "s") + m.clear() + assert m.to_messages() == [] + assert m.system is None + + +# ── BaseAgent construction ──────────────────────────────────────────── + + +def test_agent_exposes_expected_attrs(dummy_agent): + for attr in ("config", "session", "kb", "llm", "audit", "memory", "tools"): + assert hasattr(dummy_agent, attr), f"missing {attr}" + + +def test_agent_registers_tools(dummy_agent): + assert "echo" in dummy_agent.tools + + +def test_agent_call_tool(dummy_agent): + assert dummy_agent.call_tool("echo", value="ping") == "ping" + + +def test_agent_call_unknown_tool_raises(dummy_agent): + with pytest.raises(ValueError, match="not registered"): + dummy_agent.call_tool("nope") + + +def test_agent_run_returns_dict(dummy_agent): + result = dummy_agent.run("example.com") + assert result == {"target": "example.com", "ok": True} + + +# ── iteration limit (KI-4) ──────────────────────────────────────────── + + +def test_iteration_limit_raises_past_max(): + config = CyberAIConfig() + config.max_agent_iterations = 3 + agent = DummyAgent(config, ScanSession(target="x")) + + # 3 allowed + for _ in range(3): + agent._check_iteration_limit() + # 4th trips + with pytest.raises(AgentIterationLimitError, match="exceeded 3"): + agent._check_iteration_limit() + + +def test_log_and_alias_do_not_crash(dummy_agent): + dummy_agent.log("a message") + dummy_agent._log("aliased message", data={"k": "v"}) From c92755c5bc13cb5fcc32b03d4a3f5d7a14d2bab1 Mon Sep 17 00:00:00 2001 From: Evgeny Kiriyak <224408464+evkir@users.noreply.github.com> Date: Fri, 22 May 2026 13:04:20 +0300 Subject: [PATCH 4/4] docs: mark KI-3, KI-4, KI-6 as fixed (6/8 closed) --- docs/architecture/known-issues.md | 51 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/docs/architecture/known-issues.md b/docs/architecture/known-issues.md index 05c112f..7df2d24 100644 --- a/docs/architecture/known-issues.md +++ b/docs/architecture/known-issues.md @@ -11,27 +11,26 @@ neither matches the actual API. **Fixed by:** Day 5. ### 🟢 KI-2 — Two competing session classes ✅ FIXED IN DAY 3 `scan_session.py` is now the single source of truth. `session.py` is a -backward-compat shim where `PentestSession` is a subclass of -`ScanSession` preserving legacy attributes. All 8 import sites work -unchanged. Verified by `tests/unit/test_session_shim.py`. +backward-compat shim. Verified by `tests/unit/test_session_shim.py`. -### 🔴 KI-3 — BaseAgent doesn't match what agents use -Agents access `self.session`, `self.kb`, `self.memory`, `self.llm` — -none exist on `BaseAgent`. **Fixed by:** Day 4. +### 🟢 KI-3 — BaseAgent didn't match what agents use ✅ FIXED IN DAY 4 +`BaseAgent.__init__` now takes `(config, session, llm, audit)` and +exposes `self.session`, `self.kb`, `self.llm`, `self.memory`. Agents are +migrated to actually use this contract in day 6. Verified by +`tests/unit/test_base_agent.py`. -### 🔴 KI-4 — Agents call non-existent methods -`self._check_iteration_limit()`, `self._log()`, `self.llm.chat()` — -none exist. **Fixed by:** Day 4 + Day 6. +### 🟢 KI-4 — Agents called non-existent methods ✅ FIXED IN DAY 4 +`_check_iteration_limit()` and `_log()` now exist on `BaseAgent`. +`AgentMemory` (with `add()`/`to_messages()`) backs `self.memory`. +`self.llm.chat()` is addressed in day 6 when ExploitAgent is migrated to +`self.llm.call()`. Verified by `tests/unit/test_base_agent.py`. -### 🟢 KI-5 — Finding signature mismatch ✅ FIXED IN DAY 3 -`Finding` now has `target`, `evidence`, `cve_ids` fields with -backward-compat `cve` ↔ `cve_ids` syncing. `ScanSession.add_finding()` -auto-fills `target` from `session.target`. Verified by -`tests/unit/test_finding_model.py`. +### 🔴 KI-5 — Finding signature mismatch ✅ FIXED IN DAY 3 -### 🔴 KI-6 — `Tool` param name mismatch -`Tool` field is `params`, agents register with `parameters=`. -**Fixed by:** Day 4. +### 🟢 KI-6 — Tool param name mismatch ✅ FIXED IN DAY 4 +`Tool` accepts both `params` and `parameters`, synced via +`__post_init__`. All agents register tools with `parameters=...` so this +closed without touching any agent file. ### 🔴 KI-7 — `LLMClient.chat()` doesn't exist Actual method is `call()`. **Fixed by:** Day 6. @@ -40,12 +39,12 @@ Actual method is `call()`. **Fixed by:** Day 6. ## Progress tracker -| Day | Issue(s) addressed | Status | -|-----|-------------------|--------| -| 1 | (rebrand only) | ✅ | -| 2 | KI-8 | ✅ | -| 3 | KI-2, KI-5 | ✅ | -| 4 | KI-3, KI-4, KI-6 | ⏳ | -| 5 | KI-1 | ⏳ | -| 6 | KI-7, KI-4 | ⏳ | -| 7 | All checked | ⏳ | +| Day | Issue(s) addressed | Status | +|-----|----------------------|--------| +| 1 | (rebrand only) | ✅ | +| 2 | KI-8 | ✅ | +| 3 | KI-2, KI-5 | ✅ | +| 4 | KI-3, KI-4, KI-6 | ✅ | +| 5 | KI-1 | ⏳ | +| 6 | KI-7, KI-4 (llm.chat)| ⏳ | +| 7 | All checked | ⏳ |