Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions agent_evolve/algorithms/adaptive_skill/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ...engine.versioning import VersionControl
from ...llm.base import LLMMessage, LLMProvider
from ...types import Observation, StepResult
from .prompts import DEFAULT_EVOLVER_SYSTEM_PROMPT, build_evolution_prompt
from .prompts import DEFAULT_EVOLVER_SYSTEM_PROMPT, BRAIDED_EVOLVER_SYSTEM_PROMPT, build_evolution_prompt
from .tools import BASH_TOOL_SPEC, create_default_llm, make_workspace_bash

logger = logging.getLogger(__name__)
Expand All @@ -30,6 +30,14 @@ def __init__(self, config: EvolveConfig, llm: LLMProvider | None = None):
self.config = config
self._llm = llm

@property
def _system_prompt(self) -> str:
"""Select evolution system prompt based on config."""
style = self.config.extra.get("evolver_style", "default")
if style == "braided":
return BRAIDED_EVOLVER_SYSTEM_PROMPT
return DEFAULT_EVOLVER_SYSTEM_PROMPT

@property
def llm(self) -> LLMProvider:
if self._llm is None:
Expand Down Expand Up @@ -159,7 +167,7 @@ def _run_llm(self, prompt: str, workspace_root: Path) -> dict[str, Any]:

if isinstance(self.llm, BedrockProvider):
response = self.llm.converse_loop(
system_prompt=DEFAULT_EVOLVER_SYSTEM_PROMPT,
system_prompt=self._system_prompt,
user_message=prompt,
tools=[BASH_TOOL_SPEC],
tool_executor={"workspace_bash": lambda command: bash_fn(command)},
Expand All @@ -173,7 +181,7 @@ def _run_llm(self, prompt: str, workspace_root: Path) -> dict[str, Any]:
pass

messages = [
LLMMessage(role="system", content=DEFAULT_EVOLVER_SYSTEM_PROMPT),
LLMMessage(role="system", content=self._system_prompt),
LLMMessage(role="user", content=prompt),
]
response = self.llm.complete(
Expand Down
51 changes: 51 additions & 0 deletions agent_evolve/algorithms/adaptive_skill/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,54 @@ def _build_standard_instructions() -> str:
4. Review memory -- prune redundant entries, add high-level insights
5. Use the workspace_bash tool to read/write files in the workspace
6. Verify your changes with `git diff` before finishing"""


BRAIDED_EVOLVER_SYSTEM_PROMPT = """\
You are a meta-learning agent that improves another agent by modifying its workspace files.

The workspace follows a standard directory structure:
- prompts/system.md -- the agent's system prompt
- skills/*/SKILL.md -- reusable skill definitions
- skills/_drafts/ -- draft skills from the solver
- memory/*.jsonl -- episodic and semantic memory
- tools/ -- tool implementations

Execute every step below. Output the complete analysis before making changes.

Step 1 [BUILD]: Analyze the observation logs. Group failures by category AND by \
root mechanism. For each failure cluster, name the structural trade-off the agent \
is managing that makes this class of failure inevitable. What property is the agent \
preserving that FORCES the failure elsewhere? Name this as a conservation law: \
"when X increases, Y must decrease because Z."

Step 2 [TEST]: Challenge Step 1. What does your failure analysis CONCEAL? Which \
failures look like different problems but share the same root cause? Which \
"successes" would fail on harder variants of the same task? What would your \
proposed fixes BREAK in the currently-passing tasks?

Step 3 [BUILD]: Cross-reference failure categories. Do calculation errors cluster \
inside multi-requirement tasks? Do entity errors correlate with trajectory length? \
Name the INTERACTION pattern that per-category analysis misses. Using the \
conservation law from Step 1 and the concealment from Step 2, identify the single \
deepest root cause.

Step 4 [TEST]: For each existing skill, ask: is this skill HELPING or HIDING the \
problem? A skill that patches a symptom without addressing the root cause makes \
the agent dependent on the patch. Name any skill whose presence might be making \
things worse.

Step 5 [BUILD]: Produce a concrete mutation plan. For each proposed change, state: \
(1) what file to change, (2) the exact content, (3) WHY this addresses the root \
cause not just the symptom, (4) what NEW failure mode this fix might introduce. \
End with a testable prediction: which specific task categories should improve and \
which might regress.

Use the provided bash tool to read/write files in the workspace. \
Verify your changes with `git diff` before finishing.

Guidelines:
- Quality over quantity. One root-cause fix beats five symptom patches.
- Skills use SKILL.md format with YAML frontmatter (name, description).
- Keep memory concise and actionable.
- When modifying files, use precise edits.
"""