diff --git a/agent_evolve/algorithms/adaptive_skill/engine.py b/agent_evolve/algorithms/adaptive_skill/engine.py index 789c35e..09437cb 100644 --- a/agent_evolve/algorithms/adaptive_skill/engine.py +++ b/agent_evolve/algorithms/adaptive_skill/engine.py @@ -17,7 +17,7 @@ from ...engine.versioning import VersionControl from ...llm.base import LLMMessage, LLMProvider from ...types import Observation, StepResult -from .prompts import DEFAULT_EVOLVER_SYSTEM_PROMPT, build_evolution_prompt +from .prompts import DEFAULT_EVOLVER_SYSTEM_PROMPT, BRAIDED_EVOLVER_SYSTEM_PROMPT, build_evolution_prompt from .tools import BASH_TOOL_SPEC, create_default_llm, make_workspace_bash logger = logging.getLogger(__name__) @@ -30,6 +30,14 @@ def __init__(self, config: EvolveConfig, llm: LLMProvider | None = None): self.config = config self._llm = llm + @property + def _system_prompt(self) -> str: + """Select evolution system prompt based on config.""" + style = self.config.extra.get("evolver_style", "default") + if style == "braided": + return BRAIDED_EVOLVER_SYSTEM_PROMPT + return DEFAULT_EVOLVER_SYSTEM_PROMPT + @property def llm(self) -> LLMProvider: if self._llm is None: @@ -159,7 +167,7 @@ def _run_llm(self, prompt: str, workspace_root: Path) -> dict[str, Any]: if isinstance(self.llm, BedrockProvider): response = self.llm.converse_loop( - system_prompt=DEFAULT_EVOLVER_SYSTEM_PROMPT, + system_prompt=self._system_prompt, user_message=prompt, tools=[BASH_TOOL_SPEC], tool_executor={"workspace_bash": lambda command: bash_fn(command)}, @@ -173,7 +181,7 @@ def _run_llm(self, prompt: str, workspace_root: Path) -> dict[str, Any]: pass messages = [ - LLMMessage(role="system", content=DEFAULT_EVOLVER_SYSTEM_PROMPT), + LLMMessage(role="system", content=self._system_prompt), LLMMessage(role="user", content=prompt), ] response = self.llm.complete( diff --git a/agent_evolve/algorithms/adaptive_skill/prompts.py b/agent_evolve/algorithms/adaptive_skill/prompts.py index 04f4968..a2a9bc9 100644 --- a/agent_evolve/algorithms/adaptive_skill/prompts.py +++ b/agent_evolve/algorithms/adaptive_skill/prompts.py @@ -586,3 +586,54 @@ def _build_standard_instructions() -> str: 4. Review memory -- prune redundant entries, add high-level insights 5. Use the workspace_bash tool to read/write files in the workspace 6. Verify your changes with `git diff` before finishing""" + + +BRAIDED_EVOLVER_SYSTEM_PROMPT = """\ +You are a meta-learning agent that improves another agent by modifying its workspace files. + +The workspace follows a standard directory structure: +- prompts/system.md -- the agent's system prompt +- skills/*/SKILL.md -- reusable skill definitions +- skills/_drafts/ -- draft skills from the solver +- memory/*.jsonl -- episodic and semantic memory +- tools/ -- tool implementations + +Execute every step below. Output the complete analysis before making changes. + +Step 1 [BUILD]: Analyze the observation logs. Group failures by category AND by \ +root mechanism. For each failure cluster, name the structural trade-off the agent \ +is managing that makes this class of failure inevitable. What property is the agent \ +preserving that FORCES the failure elsewhere? Name this as a conservation law: \ +"when X increases, Y must decrease because Z." + +Step 2 [TEST]: Challenge Step 1. What does your failure analysis CONCEAL? Which \ +failures look like different problems but share the same root cause? Which \ +"successes" would fail on harder variants of the same task? What would your \ +proposed fixes BREAK in the currently-passing tasks? + +Step 3 [BUILD]: Cross-reference failure categories. Do calculation errors cluster \ +inside multi-requirement tasks? Do entity errors correlate with trajectory length? \ +Name the INTERACTION pattern that per-category analysis misses. Using the \ +conservation law from Step 1 and the concealment from Step 2, identify the single \ +deepest root cause. + +Step 4 [TEST]: For each existing skill, ask: is this skill HELPING or HIDING the \ +problem? A skill that patches a symptom without addressing the root cause makes \ +the agent dependent on the patch. Name any skill whose presence might be making \ +things worse. + +Step 5 [BUILD]: Produce a concrete mutation plan. For each proposed change, state: \ +(1) what file to change, (2) the exact content, (3) WHY this addresses the root \ +cause not just the symptom, (4) what NEW failure mode this fix might introduce. \ +End with a testable prediction: which specific task categories should improve and \ +which might regress. + +Use the provided bash tool to read/write files in the workspace. \ +Verify your changes with `git diff` before finishing. + +Guidelines: +- Quality over quantity. One root-cause fix beats five symptom patches. +- Skills use SKILL.md format with YAML frontmatter (name, description). +- Keep memory concise and actionable. +- When modifying files, use precise edits. +"""