Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
ci:
autoupdate_commit_msg: "chore: update pre-commit hooks"

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v6.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand All @@ -9,14 +12,24 @@ repos:
- id: check-merge-conflict
- id: debug-statements

- repo: https://github.com/psf/black
rev: 23.3.0
- repo: https://github.com/crate-ci/typos
rev: v1
hooks:
- id: typos
files: \.(py|md|rst|yaml|toml)
exclude: pyproject.toml

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.10
hooks:
- id: black
language_version: python3
# Run the linter.
- id: ruff
args: ["--fix"]
# Run the formatter.
- id: ruff-format

- repo: https://github.com/pycqa/isort
rev: 5.12.0
- repo: https://github.com/pre-commit/mirrors-prettier
rev: "v4.0.0-alpha.8" # Use the sha or tag you want to point at
hooks:
- id: isort
args: ["--profile", "black"]
- id: prettier
types_or: ["javascript", "css"]
4 changes: 1 addition & 3 deletions codeclash/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
from codeclash.agents.utils import GameContext


def get_agent(
config: dict, game_context: GameContext, environment: DockerEnvironment
) -> Player:
def get_agent(config: dict, game_context: GameContext, environment: DockerEnvironment) -> Player:
agents = {
"dummy": Dummy,
"mini": MiniSWEAgent,
Expand Down
38 changes: 11 additions & 27 deletions codeclash/agents/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from codeclash.agents.utils import GameContext
from codeclash.constants import GH_ORG
from codeclash.tournaments.utils.git_utils import filter_git_diff
from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container
from codeclash.utils.environment import assert_zero_exit_code, create_file_in_container
from codeclash.utils.log import get_logger

load_dotenv()
Expand All @@ -25,7 +25,7 @@ def __init__(
self.config = config
self.name = config["name"]
self._player_unique_id = uuid.uuid4()
"""Unique ID that doesn't clash even accross multiple games. Used for git tags."""
"""Unique ID that doesn't clash even across multiple games. Used for git tags."""
self.environment = environment
self.game_context = game_context
self.logger = get_logger(
Expand Down Expand Up @@ -54,9 +54,7 @@ def post_run_hook(self, *, round: int) -> None:
"""Should be called after we called the run method."""
self._commit()
self._metadata["diff"][round] = self._get_round_diff(round)
self._metadata["incremental_diff"][round] = self._get_round_diff(
round, incremental=True
)
self._metadata["incremental_diff"][round] = self._get_round_diff(round, incremental=True)

@abstractmethod
def run(self) -> None:
Expand All @@ -79,23 +77,15 @@ def push(self) -> None:
"git push origin --tags",
]:
assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger)
self.logger.info(
f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})"
)
self.logger.info(f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})")

def reset_and_apply_patch(
self, patch: str, *, base_commit: str = "", filter_patch: bool = True
) -> None:
"""Clean all uncommited changes. If base_commit is provided, reset to that commit.
def reset_and_apply_patch(self, patch: str, *, base_commit: str = "", filter_patch: bool = True) -> None:
"""Clean all uncommitted changes. If base_commit is provided, reset to that commit.
Then apply the patch to the codebase.
"""
# Need to clean before we copy over the patch (else it's gonna be removed by git clean)
self.logger.debug(
assert_zero_exit_code(
self.environment.execute(
f"git reset --hard {base_commit} && git clean -fd"
)
)
assert_zero_exit_code(self.environment.execute(f"git reset --hard {base_commit} && git clean -fd"))
)

patch = filter_git_diff(patch) if filter_patch else patch
Expand All @@ -104,7 +94,7 @@ def reset_and_apply_patch(
self.logger.debug("No patch to apply, skipping")
return

create_file_on_container(
create_file_in_container(
container=self.environment, # type: ignore
content=patch,
dest_path="tmp_patch.txt",
Expand All @@ -115,19 +105,15 @@ def reset_and_apply_patch(
commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"]
for cmd in commands:
self.logger.debug(f"Executing command: {cmd}")
out = assert_zero_exit_code(
self.environment.execute(cmd), logger=self.logger
)
out = assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger)
self.logger.debug(out)

# --- Helper methods ---

def _tag_round(self, round: int) -> None:
"""Git tag the codebase at the given round."""
assert_zero_exit_code(
self.environment.execute(
f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'"
),
self.environment.execute(f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'"),
logger=self.logger,
)

Expand Down Expand Up @@ -164,9 +150,7 @@ def _get_round_diff(self, round: int, *, incremental: bool = False) -> str:
previous_round_tag = self._get_round_tag_name(0)
current_round_tag = self._get_round_tag_name(round)
out = assert_zero_exit_code(
self.environment.execute(
f"git diff {previous_round_tag}..{current_round_tag}"
),
self.environment.execute(f"git diff {previous_round_tag}..{current_round_tag}"),
logger=self.logger,
)
return out["output"]
17 changes: 5 additions & 12 deletions codeclash/agents/minisweagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from codeclash.agents.abstract import Player
from codeclash.agents.utils import GameContext, resolve_api_key
from codeclash.utils.environment import copy_file_to_container
from codeclash.utils.environment import copy_to_container


class ClashAgent(DefaultAgent):
Expand Down Expand Up @@ -47,9 +47,7 @@ def add_message(self, role: str, content: str, **kwargs):
super().add_message(role, content, **kwargs)
self.logger.debug(f"[{role}] {content}", extra={"highlighter": None})
if role == "assistant":
self.logger.info(
f"Step taken (step {self.model.n_calls}, cost {self.model.cost:.2f})"
)
self.logger.info(f"Step taken (step {self.model.n_calls}, cost {self.model.cost:.2f})")

def render_template(self, template: str, **kwargs) -> str:
cs = (
Expand All @@ -69,9 +67,7 @@ def run(self) -> tuple[str, str]:
class MiniSWEAgent(Player):
"""Player with agentic code editing capabilities"""

def __init__(
self, config: dict, environment: DockerEnvironment, game_context: GameContext
):
def __init__(self, config: dict, environment: DockerEnvironment, game_context: GameContext):
super().__init__(config, environment=environment, game_context=game_context)

def run(self):
Expand All @@ -96,18 +92,15 @@ def run(self):
result = exc_message
print(exc_message)
finally:
traj_path = (
self.game_context.log_local
/ f"{self.name}_r{self.game_context.round}.traj.json"
)
traj_path = self.game_context.log_local / f"{self.name}_r{self.game_context.round}.traj.json"
save_traj(
self.agent, # type: ignore
traj_path,
exit_status=exit_status,
result=result,
print_fct=self.logger.debug,
)
copy_file_to_container(
copy_to_container(
self.environment,
traj_path,
self.game_context.log_env / traj_path.name,
Expand Down
6 changes: 2 additions & 4 deletions codeclash/agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def resolve_api_key(model: str) -> str:
return os.getenv("ANTHROPIC_API_KEY")
if "gpt" in model:
return os.getenv("OPENAI_API_KEY")
return ""


@dataclass
Expand All @@ -38,10 +39,7 @@ class GameContext:

def _render_prompt_templates(self) -> dict:
context = asdict(self)
return {
key: Template(template_str).render(**context)
for key, template_str in self.prompts.items()
}
return {key: Template(template_str).render(**context) for key, template_str in self.prompts.items()}

def to_template_vars(self) -> dict[str, str]:
"""Convert the GameContext to a dictionary for rendering prompts in the agent"""
Expand Down
61 changes: 35 additions & 26 deletions codeclash/games/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import subprocess
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Any

from minisweagent.environments.docker import DockerEnvironment

Expand All @@ -12,6 +14,28 @@
from codeclash.utils.log import get_logger


@dataclass
class RoundStats:
winner: str
scores: dict[str, float] # Map of player to game metric (e.g. # of wins, assets accumulated)
details: dict[str, Any] = None # Optional, for game-specific info

def __str__(self) -> str:
return "\n".join([f"- Winner: {self.winner}", f"- Scores: {self.scores}"])


@dataclass
class RoundData:
logs: list[str]
results: list[str]


@dataclass
class RoundRecord:
data: RoundData
stats: RoundStats


class CodeGame(ABC):
name: str

Expand Down Expand Up @@ -41,9 +65,7 @@ def __init__(self, config: dict, *, tournament_id: str, local_output_dir: Path):
}
self.log_env: Path = (DIR_WORK / DIR_LOGS / self.game_id).resolve()
self.log_local: Path = local_output_dir
self.logger = get_logger(
self.name, log_path=self.log_local / "game.log", emoji="🏓"
)
self.logger = get_logger(self.name, log_path=self.log_local / "game.log", emoji="🏓")
self.environment: DockerEnvironment = self.get_environment()
"""The running docker environment for executing the game"""

Expand Down Expand Up @@ -87,9 +109,7 @@ def build_image(self):
if result.returncode == 0:
self.logger.info(f"✅ Built Docker image {self.image_name}")
else:
self.logger.error(
f"❌ Failed to build Docker image: {result.stderr}\n{result.stdout}{result.stderr}"
)
self.logger.error(f"❌ Failed to build Docker image: {result.stderr}\n{result.stdout}{result.stderr}")
raise RuntimeError(f"Failed to build Docker image: {result.stderr}")

def get_metadata(self) -> dict:
Expand Down Expand Up @@ -146,48 +166,37 @@ def _pre_round_setup(self, agents: list[Player]):
)

@abstractmethod
def determine_winner(
self, result_output: str, agents: list[Player]
) -> dict[str, str]:
def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats:
"""Determine the winner of the game based on the result output.

Args:
result_output: The specific output containing winning information
result_outputs: The specific output(s) containing winning information
agents: List of agents participating in the round

Returns:
Dictionary with key "winner" containing the winner's name
RoundStats object
"""
pass

@abstractmethod
def execute_round(self, agents: list[Player]) -> dict[str, str]:
def execute_round(self, agents: list[Player]) -> RoundData:
"""Subclasses implement their game-specific logic here.
This is the low level implementation, you probably want to use run_round instead, which
includes the pre-round setup, post-round setup, and winner determination.

Returns:
Dictionary with keys "log_output" and "result_output"
RoundData object
"""
pass

def run_round(self, agents: list[Player]) -> dict[str, str]:
def run_round(self, agents: list[Player]) -> RoundRecord:
"""
Run a single round of the game with the given agents.

Returns the log output, result output, and winner name. All bookkeeping should be
handled by the tournament class.
"""
self._pre_round_setup(agents)
result = self.execute_round(agents)
log_output = result["log_output"]
result_output = result["result_output"]

winner_result = self.determine_winner(result_output, agents)
winner_name = winner_result["winner"]

return {
"log_output": log_output,
"result_output": result_output,
"winner": winner_name,
}
data = self.execute_round(agents)
stats = self.get_stats(data.results, agents)
return RoundRecord(data=data, stats=stats)
Loading