diff --git a/codeclash/agents/__init__.py b/codeclash/agents/__init__.py index 15c8b6f9..5dd9d0a6 100644 --- a/codeclash/agents/__init__.py +++ b/codeclash/agents/__init__.py @@ -1,33 +1,18 @@ +from minisweagent.environments.docker import DockerEnvironment + from codeclash.agents.abstract import Player from codeclash.agents.dummy import Dummy from codeclash.agents.minisweagent import MiniSWEAgent from codeclash.agents.utils import GameContext -from codeclash.constants import DIR_WORK -from codeclash.games.abstract import CodeGame -def get_agent(config: dict, prompts: dict, game: CodeGame) -> Player: +def get_agent( + config: dict, game_context: GameContext, environment: DockerEnvironment +) -> Player: agents = { "dummy": Dummy, "mini": MiniSWEAgent, }.get(config["agent"]) if agents is None: raise ValueError(f"Unknown agent type: {config['agent']}") - environment = game.get_environment( - f"{game.game_id}.{config['name']}" - ) # NOTE: MUST be branch_name (defined in agents/abstract.py) - return agents( - config, - environment, - GameContext( - id=game.game_id, - log_env=game.log_env, - log_local=game.log_local, - name=game.name, - player_id=config["name"], - prompts=prompts, - round=1, - rounds=game.rounds, - working_dir=str(DIR_WORK), - ), - ) + return agents(config, environment, game_context) diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index ec2c1a72..ed47b8da 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -1,12 +1,14 @@ import os +import uuid from abc import ABC, abstractmethod from dotenv import load_dotenv -from minisweagent import Environment +from minisweagent.environments.docker import DockerEnvironment from codeclash.agents.utils import GameContext from codeclash.constants import GH_ORG -from codeclash.utils.environment import assert_zero_exit_code +from codeclash.tournaments.utils.git_utils import filter_git_diff +from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container from codeclash.utils.log import get_logger load_dotenv() @@ -16,41 +18,52 @@ class Player(ABC): def __init__( self, config: dict, - environment: Environment, + environment: DockerEnvironment, game_context: GameContext, - ): + ) -> None: self.config = config self.name = config["name"] + self._player_unique_id = uuid.uuid4() + """Unique ID that doesn't clash even accross multiple games. Used for git tags.""" self.environment = environment self.game_context = game_context - self.game_context.render_and_set_prompts() self.logger = get_logger( self.name, log_path=self.game_context.log_local / f"{self.name}.log", emoji="👤", ) + self._metadata = { + "name": self.name, + "player_unique_id": self._player_unique_id, + "diff": {0: ""}, # mapping round -> diff + "incremental_diff": {0: ""}, # mapping round -> diff + } - @property - def branch_name(self): - """Get the branch name for the agent's codebase.""" - return f"{self.game_context.id}.{self.name}" - - def commit(self): - """Commit changes to the agent's codebase.""" - r, rounds = self.game_context.round, self.game_context.rounds - for cmd in [ - "git add -A", - f"git commit --allow-empty -m 'Round {r}/{rounds} Update'", - ]: - assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) - self.logger.info(f"Committed changes for {self.name} for round {r}/{rounds}") + # --- Main methods --- - def on_round_update(self, new_round: int): - """Update the agent's round to match the game round.""" + def pre_run_hook(self, *, new_round: int) -> None: + """Should be called before we call the run method.""" + if new_round == 1: + self._tag_round(0) self.game_context.round = new_round - self.game_context.render_and_set_prompts() - def push(self): + def post_run_hook(self, *, round: int) -> None: + """Should be called after we called the run method.""" + self._commit() + self._metadata["diff"][round] = self._get_round_diff(round) + self._metadata["incremental_diff"][round] = self._get_round_diff( + round, incremental=True + ) + + @abstractmethod + def run(self) -> None: + """Given the observation / recap, update the codebase""" + + def get_metadata(self) -> dict: + """Get metadata for the agent.""" + return self._metadata + + def push(self) -> None: """Push codebase to a branch on the game's remote repository.""" token = os.getenv("GITHUB_TOKEN") if not token: @@ -59,13 +72,98 @@ def push(self): for cmd in [ "git remote remove origin", f"git remote add origin https://x-access-token:{token}@github.com/{GH_ORG}/{self.game_context.name}.git", - f"git push origin {self.branch_name}", + f"git push origin {self._branch_name}", + "git push origin --tags", ]: assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) self.logger.info( - f"Pushed {self.name} commit history to remote repository (branch {self.branch_name})" + f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})" ) - @abstractmethod - def run(self): - """Given the observation / recap, update the codebase""" + def reset_and_apply_patch( + self, patch: str, *, base_commit: str = "", filter_patch: bool = True + ) -> None: + """Clean all uncommited changes. If base_commit is provided, reset to that commit. + Then apply the patch to the codebase. + """ + # Need to clean before we copy over the patch (else it's gonna be removed by git clean) + self.logger.debug( + assert_zero_exit_code( + self.environment.execute( + f"git reset --hard {base_commit} && git clean -fd" + ) + ) + ) + + patch = filter_git_diff(patch) if filter_patch else patch + + if not patch.strip(): + self.logger.debug("No patch to apply, skipping") + return + + create_file_on_container( + container=self.environment, # type: ignore + content=patch, + dest_path="tmp_patch.txt", + ) + + self.logger.debug(f"Applying patch to agent's codebase: {patch}") + + commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"] + for cmd in commands: + self.logger.debug(f"Executing command: {cmd}") + out = assert_zero_exit_code( + self.environment.execute(cmd), logger=self.logger + ) + self.logger.debug(out) + + # --- Helper methods --- + + def _tag_round(self, round: int) -> None: + """Git tag the codebase at the given round.""" + assert_zero_exit_code( + self.environment.execute( + f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'" + ), + logger=self.logger, + ) + + @property + def _branch_name(self) -> str: + """Get the branch name for the agent's codebase.""" + return f"{self.game_context.id}.{self.name}" + + def _get_round_tag_name(self, round: int) -> str: + """Get git tag name for the version of the codebase at the given round.""" + return f"{self._player_unique_id}-round-{round}" + + def _commit(self) -> None: + """Commit changes to the agent's codebase.""" + r = self.game_context.round + for cmd in [ + "git add -A", + f"git commit --allow-empty -m 'Round {r} Update'", + ]: + assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) + self._tag_round(r) + self.logger.info(f"Committed changes for {self.name} for round {r}") + + def _get_round_diff(self, round: int, *, incremental: bool = False) -> str: + """Get the diff between the round and initial version (round 0). + If incremental is True, get the diff between the round and the previous round. + Returns empty string if round is 0. + """ + if round == 0: + return "" + if incremental: + previous_round_tag = self._get_round_tag_name(round - 1) + else: + previous_round_tag = self._get_round_tag_name(0) + current_round_tag = self._get_round_tag_name(round) + out = assert_zero_exit_code( + self.environment.execute( + f"git diff {previous_round_tag}..{current_round_tag}" + ), + logger=self.logger, + ) + return out["output"] diff --git a/codeclash/agents/dummy.py b/codeclash/agents/dummy.py index 514341c0..ffa882fc 100644 --- a/codeclash/agents/dummy.py +++ b/codeclash/agents/dummy.py @@ -5,4 +5,5 @@ class Dummy(Player): """A dummy player that does nothing. Mainly for testing purposes.""" def run(self): - self.commit() + pass + # self.commit() # now called in post_round_hook diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py index 645131f2..f3fec5d4 100644 --- a/codeclash/agents/minisweagent.py +++ b/codeclash/agents/minisweagent.py @@ -8,8 +8,9 @@ import yaml from jinja2 import Template -from minisweagent import Environment, Model +from minisweagent import Model from minisweagent.agents.default import AgentConfig, DefaultAgent +from minisweagent.environments.docker import DockerEnvironment from minisweagent.models.litellm_model import LitellmModel from minisweagent.run.utils.save import save_traj from rich.console import Console @@ -28,7 +29,7 @@ class ClashAgent(DefaultAgent): def __init__( self, model: Model, - env: Environment, + env: DockerEnvironment, name: str, game_context: GameContext, *, @@ -56,7 +57,7 @@ def render_template(self, template: str, **kwargs) -> str: | asdict(self.env.config) | asdict(self.model.config) | platform.uname()._asdict() - | self.game_context.to_dict() + | self.game_context.to_template_vars() ) return Template(template).render(**kwargs, **cs, **os.environ) @@ -69,7 +70,7 @@ class MiniSWEAgent(Player): """Player with agentic code editing capabilities""" def __init__( - self, config: dict, environment: Environment, game_context: GameContext + self, config: dict, environment: DockerEnvironment, game_context: GameContext ): super().__init__(config, environment=environment, game_context=game_context) @@ -104,10 +105,11 @@ def run(self): traj_path, exit_status=exit_status, result=result, + print_fct=self.logger.debug, ) copy_file_to_container( self.environment, traj_path, self.game_context.log_env / traj_path.name, ) - self.commit() + # self.commit() # now called in post_round_hook diff --git a/codeclash/agents/utils.py b/codeclash/agents/utils.py index a5de6856..16ecd13a 100644 --- a/codeclash/agents/utils.py +++ b/codeclash/agents/utils.py @@ -36,24 +36,15 @@ class GameContext: rounds: int working_dir: str - def render_and_set_prompts(self): - """Render and set prompts using the current game context.""" + def _render_prompt_templates(self) -> dict: context = asdict(self) - del context["prompts"] - for key, template_str in self.prompts.items(): - rendered = Template(template_str).render(**context) - setattr(self, key, rendered) - - def to_dict(self): - """Convert the GameContext to a dictionary, including dynamically added attributes.""" - result = asdict(self) - declared = set(self.__dataclass_fields__) - for attr in dir(self): - if ( - not attr.startswith("_") - and attr not in declared - and not callable(getattr(self, attr)) - ): - result[attr] = getattr(self, attr) - del result["prompts"] - return result + return { + key: Template(template_str).render(**context) + for key, template_str in self.prompts.items() + } + + def to_template_vars(self) -> dict[str, str]: + """Convert the GameContext to a dictionary for rendering prompts in the agent""" + out = asdict(self) | self._render_prompt_templates() + out.pop("prompts") + return out diff --git a/codeclash/games/__init__.py b/codeclash/games/__init__.py index d996b8bf..781c3557 100644 --- a/codeclash/games/__init__.py +++ b/codeclash/games/__init__.py @@ -1,3 +1,5 @@ +from pathlib import Path + from codeclash.games.abstract import CodeGame from codeclash.games.battlecode.main import BattleCodeGame from codeclash.games.battlesnake.main import BattleSnakeGame @@ -7,16 +9,17 @@ # might consider postponing imports to avoid loading things we don't need -def get_game(config: dict) -> CodeGame: +def get_game(config: dict, *, tournament_id: str, local_output_dir: Path) -> CodeGame: game = { - x.name: x for x in [ + x.name: x + for x in [ BattleCodeGame, BattleSnakeGame, CoreWarGame, RoboCodeGame, - RobotRumbleGame + RobotRumbleGame, ] }.get(config["game"]["name"]) if game is None: raise ValueError(f"Unknown game: {config['game']['name']}") - return game(config) + return game(config, tournament_id=tournament_id, local_output_dir=local_output_dir) diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py index ac20956a..ddbb316f 100644 --- a/codeclash/games/abstract.py +++ b/codeclash/games/abstract.py @@ -1,47 +1,52 @@ -import getpass import json import os import subprocess -import time -import traceback from abc import ABC, abstractmethod -from collections import Counter from pathlib import Path -from typing import Any from minisweagent.environments.docker import DockerEnvironment from codeclash.agents.abstract import Player from codeclash.constants import DIR_LOGS, DIR_WORK, GH_ORG -from codeclash.utils.environment import ( - assert_zero_exit_code, - copy_between_containers, - copy_file_from_container, -) +from codeclash.utils.environment import assert_zero_exit_code, copy_between_containers from codeclash.utils.log import get_logger class CodeGame(ABC): name: str - def __init__(self, config: dict): + def __init__(self, config: dict, *, tournament_id: str, local_output_dir: Path): + """The CodeGame class is responsible for running games, i.e., taking a list of code + from different agents/players and running them against each other. + It also provides the environments for the game and agents to run in. + + The central method is `run_round`, which takes a list of agents and returns the winner of the round. + + At the end of the the tournament, run the `end` method to clean up the game and agents and write the metadata. + + Args: + config: The overall config for the tournament. + tournament_id: The id of the tournament. + local_output_dir: The host/local directory to write logs to. + """ self.url_gh: str = f"git@github.com:{GH_ORG}/{self.name}.git" self.artifacts: list[Path] = [] """Artifact objects that we might want to clean up after the game.""" - self.scoreboard: list[tuple[int, str]] = [] - """List of (round number, winner (player id))""" self.game_config: dict = config["game"] self.config: dict = config - self.rounds: int = self.game_config.get("rounds", 1) - self.round: int = 0 - self.game_id: str = f"{self.name}{time.strftime('%y%m%d%H%M%S')}" + self.game_id: str = tournament_id self.log_env: Path = (DIR_WORK / DIR_LOGS / self.game_id).resolve() - self.log_local: Path = (DIR_LOGS / getpass.getuser() / self.game_id).resolve() + self.log_local: Path = local_output_dir self.logger = get_logger( self.name, log_path=self.log_local / "game.log", emoji="🏓" ) self.environment: DockerEnvironment = self.get_environment() - assert len(config["players"]) >= 2, "At least two players are required" + """The running docker environment for executing the game""" + self._metadata: dict = { + "name": self.name, + "config": self.config, + "game_id": self.game_id, + } @property def image_name(self) -> str: @@ -84,15 +89,9 @@ def get_metadata(self) -> dict: """This is what we write to metadata.json. You can subclass extend this to add more details for specific games. """ - return { - "name": self.name, - "scoreboard": self.scoreboard, - "config": self.config, - "game_id": self.game_id, - } + return self._metadata def end(self, cleanup: bool = False): - self.logger.info("Overall score: %s", Counter([x[1] for x in self.scoreboard])) (self.log_local / "metadata.json").write_text(json.dumps(self.get_metadata())) if cleanup: for artifact in self.artifacts: @@ -121,14 +120,7 @@ def get_environment(self, branch_name: str | None = None) -> DockerEnvironment: return environment def _pre_round_setup(self, agents: list[Player]): - """Copy agent codebases into game's container and make round log file""" - self.round += 1 - # Notify agents of round update - for agent in agents: - if hasattr(agent, "on_round_update"): - agent.on_round_update(self.round) - self.logger.info(f"▶️ Running {self.name} round {self.round}...") - + """Copy agent codebases into game's container""" # Copy agent codebases into game's container for agent in agents: self.logger.debug(f"Copying {agent.name}'s codebase") @@ -139,78 +131,55 @@ def _pre_round_setup(self, agents: list[Player]): dest_path=f"/{agent.name}", ) - # Ensure the log path + file exists + # Ensure the log directory exists assert_zero_exit_code( self.environment.execute(f"mkdir -p {self.log_env}"), logger=self.logger, ) - assert_zero_exit_code( - self.environment.execute(f"touch {self.round_log_path}"), logger=self.logger - ) @abstractmethod - def determine_winner(self, agents: list[Player]) -> Any: - """Determine the winner of the game based on the round results, - Should update self.scoreboard + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + """Determine the winner of the game based on the result output. + + Args: + result_output: The specific output containing winning information + agents: List of agents participating in the round + + Returns: + Dictionary with key "winner" containing the winner's name """ pass @abstractmethod - def execute_round(self, agents: list[Player]): - """Subclasses implement their game-specific logic here, must write results to round_log_path. + def execute_round(self, agents: list[Player]) -> dict[str, str]: + """Subclasses implement their game-specific logic here. This is the low level implementation, you probably want to use run_round instead, which includes the pre-round setup, post-round setup, and winner determination. + + Returns: + Dictionary with keys "log_output" and "result_output" """ pass - def _post_round_setup(self, agents: list[Player]): - for agent in agents: - try: - copy_between_containers( - self.environment, - agent.environment, - self.round_log_path, - f"{agent.environment.config.cwd}/logs/round_{self.round}.log", - ) - except Exception: - self.logger.error( - f"Error copying round log to {agent.name}'s container: {traceback.format_exc()}" - ) - else: - self.logger.info(f"Copied round log to {agent.name}'s container.") - - try: - copy_file_from_container( - self.environment, - self.round_log_path, - self.log_local / self.round_log_path.name, - ) - except Exception: - self.logger.error( - f"Error copying round log to {agent.name}'s container: {traceback.format_exc()}" - ) - else: - self.logger.info( - f"Copied round log from {agent.name}'s container to local log dir." - ) - self.logger.info(f"Round {self.round} completed.") - - def run_round(self, agents: list[Player]): + def run_round(self, agents: list[Player]) -> dict[str, str]: """ Run a single round of the game with the given agents. - Writes to directory containing logs and results of the round(s). + Returns the log output, result output, and winner name. All bookkeeping should be + handled by the tournament class. """ self._pre_round_setup(agents) - self.execute_round(agents) - self.determine_winner(agents) - last_winner = self.scoreboard[-1][1] - self.logger.info(f"Round {self.round} winner: {last_winner}") - self._post_round_setup(agents) + result = self.execute_round(agents) + log_output = result["log_output"] + result_output = result["result_output"] - @property - def round_log_path(self) -> Path: - """ - Get the path to the current round's log file. - """ - return self.log_env / f"round_{self.round}.log" + winner_result = self.determine_winner(result_output, agents) + winner_name = winner_result["winner"] + + return { + "log_output": log_output, + "result_output": result_output, + "winner": winner_name, + } diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py index 07df6252..43c26295 100644 --- a/codeclash/games/battlecode/main.py +++ b/codeclash/games/battlecode/main.py @@ -1,4 +1,6 @@ import re +import shlex +from pathlib import Path from typing import Any from codeclash.constants import DIR_WORK, RESULT_TIE @@ -8,8 +10,10 @@ class BattleCodeGame(CodeGame): name: str = "BattleCode" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) assert len(config["players"]) == 2, "BattleCode is a two-player game" self.run_cmd_round: str = "python run.py run" for arg, val in self.game_config.get("args", {}).items(): @@ -19,13 +23,27 @@ def __init__(self, config): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner(self, agents: list[Any]): - response = self.environment.execute(f"tail -3 {self.round_log_path} | head -1") - winner = re.search(r"\s\((.*)\)\swins\s\(", response["output"]).group(1) - winner = {"A": agents[0].name, "B": agents[1].name}.get(winner, RESULT_TIE) - self.scoreboard.append((self.round, winner)) + def determine_winner(self, result_output: str, agents: list[Any]) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") + lines = result_output.strip().split("\n") + # Get the third-to-last line which contains the winner info + winner_line = lines[-3] if len(lines) >= 3 else "" + self.logger.debug(f"Winner line: {winner_line}") + match = re.search(r"\s\((.*)\)\swins\s\(", winner_line) + if match: + winner_key = match.group(1) + self.logger.debug(f"Winner key from match: {winner_key}") + # Map A/B to actual agent names (much closer to original code) + winner = {"A": agents[0].name, "B": agents[1].name}.get( + winner_key, RESULT_TIE + ) + self.logger.debug(f"Concluding winner: {winner}") + return {"winner": winner} + else: + self.logger.debug("No winner match found, returning tie") + return {"winner": RESULT_TIE} - def execute_round(self, agents: list[Any]): + def execute_round(self, agents: list[Any]) -> dict[str, str]: for agent in agents: src, dest = f"/{agent.name}/src/mysubmission/", str( DIR_WORK / "src" / agent.name @@ -35,7 +53,10 @@ def execute_round(self, agents: list[Any]): f"--p{idx+1}-dir src --p{idx+1} {agent.name}" for idx, agent in enumerate(agents) ] - cmd = f"{self.run_cmd_round} {' '.join(args)} > {self.round_log_path}" + cmd = f"{self.run_cmd_round} {shlex.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response + # For BattleCode, log_output and result_output are the same + output = response["output"] + return {"log_output": output, "result_output": output} diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py index 206f6fed..d3a8187e 100644 --- a/codeclash/games/battlesnake/main.py +++ b/codeclash/games/battlesnake/main.py @@ -1,5 +1,6 @@ import json import time +from pathlib import Path from codeclash.agents.abstract import Player from codeclash.games.abstract import CodeGame @@ -9,8 +10,10 @@ class BattleSnakeGame(CodeGame): name: str = "BattleSnake" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) self.run_cmd_round: str = "./battlesnake play" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -19,14 +22,19 @@ def __init__(self, config): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner(self, agents: list[Player]): - response = assert_zero_exit_code( - self.environment.execute(f"tail -1 {self.round_log_path}") - ) - winner = json.loads(response["output"].strip("\n"))["winnerName"] - self.scoreboard.append((self.round, winner)) + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") + lines = result_output.strip().split("\n") + # Get the last line which contains the game result + last_line = lines[-1] if lines else "" + self.logger.debug(f"Last line: {last_line}") + winner = json.loads(last_line)["winnerName"] + self.logger.debug(f"Concluding winner: {winner}") + return {"winner": winner} - def execute_round(self, agents: list[Player]): + def execute_round(self, agents: list[Player]) -> dict[str, str]: cmd = [] for idx, agent in enumerate(agents): port = 8001 + idx @@ -38,18 +46,27 @@ def execute_round(self, agents: list[Player]): time.sleep(3) # Give servers time to start - cmd.append(f"-o {self.round_log_path}") - cmd = " ".join(cmd) - self.logger.info(f"Running command: {cmd}") + # Create temporary output file for results + output_file = f"battlesnake_output_{int(time.time())}.json" + cmd_str = " ".join(cmd) + f" -o {output_file}" + self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}") - # todo: should probably keep output somewhere? try: - assert_zero_exit_code( + response = assert_zero_exit_code( self.environment.execute( - f"{self.run_cmd_round} {cmd}", + f"{self.run_cmd_round} {cmd_str}", cwd=f"{self.environment.config.cwd}/game", ) ) + + # Read the output file for result information + result_response = self.environment.execute(f"cat game/{output_file}") + result_output = result_response["output"] + + # Clean up the output file + self.environment.execute(f"rm -f game/{output_file}") + + return {"log_output": response["output"], "result_output": result_output} finally: # Kill all python servers when done self.environment.execute("pkill -f 'python main.py' || true") diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py index 46d5eafc..7e6e5617 100644 --- a/codeclash/games/corewar/main.py +++ b/codeclash/games/corewar/main.py @@ -1,4 +1,6 @@ import re +import shlex +from pathlib import Path from codeclash.agents.abstract import Player from codeclash.games.abstract import CodeGame @@ -7,8 +9,10 @@ class CoreWarGame(CodeGame): name: str = "CoreWar" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) self.run_cmd_round: str = "./src/pmars" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -17,20 +21,42 @@ def __init__(self, config): else: self.run_cmd_round += f" -{arg} {val}" - def determine_winner(self, agents: list[Player]): + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") scores = [] n = len(agents) * 2 - response = self.environment.execute(f"tail -{n} {self.round_log_path}") - for line in response["output"].splitlines(): + lines = result_output.strip().split("\n") + # Get the last n lines which contain the scores (closer to original) + relevant_lines = lines[-n:] if len(lines) >= n else lines + self.logger.debug(f"Relevant lines for scoring: {relevant_lines}") + + for line in relevant_lines: match = re.search(r".*\sby\s.*\sscores\s(\d+)", line) if match: - scores.append(int(match.group(1))) - winner = agents[scores.index(max(scores))].name - self.scoreboard.append((self.round, winner)) + score = int(match.group(1)) + scores.append(score) + self.logger.debug(f"Found score: {score} from line: {line}") + + self.logger.debug(f"All scores: {scores}") + if scores: + max_score_index = scores.index(max(scores)) + winner = agents[max_score_index].name + self.logger.debug( + f"Concluding winner: {winner} with index {max_score_index}" + ) + return {"winner": winner} + else: + self.logger.debug("No scores found, returning unknown") + return {"winner": "unknown"} - def execute_round(self, agents: list[Player]): + def execute_round(self, agents: list[Player]) -> dict[str, str]: args = [f"/{agent.name}/warriors/warrior.red" for agent in agents] - cmd = f"{self.run_cmd_round} {' '.join(args)} > {self.round_log_path}" + cmd = f"{self.run_cmd_round} {shlex.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response + # For CoreWar, log_output and result_output are the same + output = response["output"] + return {"log_output": output, "result_output": output} diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py index fdcf5435..514e7ab2 100644 --- a/codeclash/games/robocode/main.py +++ b/codeclash/games/robocode/main.py @@ -1,4 +1,6 @@ import subprocess +import time +from pathlib import Path from codeclash.agents.abstract import Player from codeclash.games.abstract import CodeGame @@ -8,8 +10,10 @@ class RoboCodeGame(CodeGame): name: str = "RoboCode" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) self.run_cmd_round: str = "./robocode.sh" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -51,12 +55,23 @@ def dict_to_lines(d, prefix=""): dict_to_lines(default_battle_config) return "\n".join(battle_lines) - def determine_winner(self, agents: list[Player]): - response = self.environment.execute(f"head -3 {self.round_log_path} | tail -1") - winner = response["output"].split()[1].rsplit(".", 1)[0] - self.scoreboard.append((self.round, winner)) + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") + lines = result_output.strip().split("\n") + # Get the second line which contains the winner info (closer to original) + winner_line = lines[1] if len(lines) >= 2 else "" + self.logger.debug(f"Winner line: {winner_line}") + if winner_line: + winner = winner_line.split()[1].rsplit(".", 1)[0] + self.logger.debug(f"Concluding winner: {winner}") + return {"winner": winner} + else: + self.logger.debug("No winner line found, returning unknown") + return {"winner": "unknown"} - def execute_round(self, agents: list[Player]): + def execute_round(self, agents: list[Player]) -> dict[str, str]: for agent in agents: # Copy the agent codebase into the game codebase and compile it for cmd in [ @@ -69,7 +84,8 @@ def execute_round(self, agents: list[Player]): # Create .battle file selected_robots = ",".join([f"{agent.name}.MyTank*" for agent in agents]) - battle_file = f"{self.game_id}-round{self.round}.battle" + # Use timestamp for unique battle file name since rounds are managed by tournament + battle_file = f"{self.game_id}-battle{int(time.time())}.battle" with open(battle_file, "w") as f: f.write( f"""#Battle Properties @@ -80,10 +96,18 @@ def execute_round(self, agents: list[Player]): copy_file_to_container(self.environment, battle_file, f"battles/{battle_file}") subprocess.run(f"rm -f {battle_file}", shell=True) - # Run battle - cmd = ( - f"{self.run_cmd_round} -battle {battle_file} -results {self.round_log_path}" - ) + # Run battle with results output to file + results_file = f"results_{int(time.time())}.txt" + cmd = f"{self.run_cmd_round} -battle {battle_file} -results {results_file}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response + + # Read the results file to get result output + cat_response = self.environment.execute(f"cat {results_file}") + result_output = cat_response["output"] + + # Clean up the results file + self.environment.execute(f"rm -f {results_file}") + + return {"log_output": response["output"], "result_output": result_output} diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py index 495d10e9..8f893ee4 100644 --- a/codeclash/games/robotrumble/main.py +++ b/codeclash/games/robotrumble/main.py @@ -1,3 +1,6 @@ +import shlex +from pathlib import Path + from codeclash.agents.abstract import Player from codeclash.constants import RESULT_TIE from codeclash.games.abstract import CodeGame @@ -6,23 +9,44 @@ class RobotRumbleGame(CodeGame): name: str = "RobotRumble" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) assert len(config["players"]) == 2, "RobotRumble is a two-player game" self.run_cmd_round: str = "./rumblebot run term" - def determine_winner(self, agents: list[Player]): - response = self.environment.execute(f"tail -2 {self.round_log_path}") - if "Blue won" in response["output"]: - self.scoreboard.append((self.round, agents[0].name)) - elif "Red won" in response["output"]: - self.scoreboard.append((self.round, agents[1].name)) - elif "it was a tie" in response["output"]: - self.scoreboard.append((self.round, RESULT_TIE)) + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") + lines = result_output.strip().split("\n") + # Get the last 2 lines which contain the game result (same as original) + relevant_lines = lines[-2:] if len(lines) >= 2 else lines + log_text = "\n".join(relevant_lines) + self.logger.debug(f"Relevant lines: {log_text}") + + if "Blue won" in log_text: + winner = agents[0].name + self.logger.debug(f"Blue won - Concluding winner: {winner}") + return {"winner": winner} + elif "Red won" in log_text: + winner = agents[1].name + self.logger.debug(f"Red won - Concluding winner: {winner}") + return {"winner": winner} + elif "it was a tie" in log_text: + self.logger.debug("Game was a tie") + return {"winner": RESULT_TIE} + else: + self.logger.debug("No clear result found, treating as tie") + return {"winner": RESULT_TIE} - def execute_round(self, agents: list[Player]): + def execute_round(self, agents: list[Player]) -> dict[str, str]: args = [f"/{agent.name}/robot.py" for agent in agents] - cmd = f"{self.run_cmd_round} {' '.join(args)} > {self.round_log_path}" + cmd = f"{self.run_cmd_round} {shlex.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response + # For RobotRumble, log_output and result_output are the same + output = response["output"] + return {"log_output": output, "result_output": output} diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py new file mode 100644 index 00000000..d98a9d6c --- /dev/null +++ b/codeclash/tournaments/abstract.py @@ -0,0 +1,46 @@ +import getpass +import time +import traceback +from pathlib import Path + +from codeclash.agents import get_agent +from codeclash.agents.abstract import Player +from codeclash.agents.utils import GameContext +from codeclash.constants import DIR_LOGS, DIR_WORK +from codeclash.utils.environment import create_file_on_container +from codeclash.utils.log import get_logger + + +class AbstractTournament: + def __init__(self, config: dict, *, name: str, **kwargs): + self.config: dict = config + self.name: str = name + self.tournament_id: str = f"{self.name}{time.strftime('%y%m%d%H%M%S')}" + self.local_output_dir: Path = ( + DIR_LOGS / getpass.getuser() / self.tournament_id + ).resolve() + self._metadata: dict = { + "name": self.name, + "tournament_id": self.tournament_id, + } + self.logger = get_logger( + self.name, log_path=self.local_output_dir / "tournament.log", emoji="🏆" + ) + + def get_metadata(self) -> dict: + return self._metadata + + def _copy_game_log_to_agent(self, agent, round_num: int, log_output: str) -> None: + """Copy round log to agent environment.""" + try: + create_file_on_container( + container=agent.environment, + content=log_output, + dest_path=f"logs/round_{round_num}.log", + ) + except Exception: + self.logger.error( + f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" + ) + else: + self.logger.info(f"Created round log in {agent.name}'s container.") diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp_training.py new file mode 100644 index 00000000..ffc223cc --- /dev/null +++ b/codeclash/tournaments/pvp_training.py @@ -0,0 +1,101 @@ +""" +PvP training mode where multiple agents compete against each other. +""" + +from codeclash.agents import get_agent +from codeclash.agents.abstract import Player +from codeclash.agents.utils import GameContext +from codeclash.constants import DIR_WORK +from codeclash.games import get_game +from codeclash.games.abstract import CodeGame +from codeclash.tournaments.abstract import AbstractTournament +from codeclash.utils.log import get_logger + + +class PvpTraining(AbstractTournament): + def __init__( + self, config: dict, *, cleanup: bool = False, push_agent: bool = False + ): + super().__init__(config, name="PvpTraining") + self.cleanup_on_end = cleanup + self.push_agent = push_agent + self.game: CodeGame = get_game( + self.config, + tournament_id=self.tournament_id, + local_output_dir=self.local_output_dir, + ) + self.agents: list[Player] = [] + for agent_conf in self.config["players"]: + self.agents.append(self.get_agent(agent_conf, self.config["prompts"])) + self.logger = get_logger(self.game.name) + self.scoreboard: list[tuple[int, str]] = [] + + @property + def rounds(self) -> int: + return self.config["tournament"]["rounds"] + + def get_agent(self, agent_config: dict, prompts: dict) -> Player: + """Create an agent with environment and game context.""" + environment = self.game.get_environment( + f"{self.game.game_id}.{agent_config['name']}" + ) + + game_context = GameContext( + id=self.game.game_id, + log_env=self.game.log_env, + log_local=self.game.log_local, + name=self.game.name, + player_id=agent_config["name"], + prompts=prompts, + round=1, + rounds=self.rounds, + working_dir=str(DIR_WORK), + ) + + return get_agent(agent_config, game_context, environment) + + def run(self) -> None: + """Main execution function that runs all rounds.""" + try: + for round_num in range(1, self.rounds + 1): + self.run_training_round(round_num) + finally: + self.cleanup() + + def run_training_round(self, round_num: int) -> None: + """Execute a single training round.""" + # Run the game round and get results + result = self.game.run_round(self.agents) + log_output = result["log_output"] + result_output = result["result_output"] + winner = result["winner"] + + # Handle bookkeeping that was previously in the game + self.scoreboard.append((round_num, winner)) + self.logger.info(f"Round {round_num} winner: {winner}") + + # Write log to file + round_log_path = self.game.log_local / f"round_{round_num}.log" + round_log_path.write_text(log_output) + + # Copy log to agent environments + for agent in self.agents: + self._copy_game_log_to_agent(agent, round_num, log_output) + + for agent in self.agents: + self.run_agent(agent, round_num) + + self.logger.info("Round completed.") + + def run_agent(self, agent: Player, round_num: int) -> None: + """Run a single agent for the current round.""" + agent.pre_run_hook(new_round=round_num) + agent.run() + agent.post_run_hook(round=round_num) + + def cleanup(self) -> None: + """Clean up game resources and push agents if requested.""" + self.game.end(self.cleanup_on_end) + if self.push_agent: + for agent in self.agents: + agent.push() diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py new file mode 100644 index 00000000..832d36b4 --- /dev/null +++ b/codeclash/tournaments/single_player_training.py @@ -0,0 +1,154 @@ +""" +In single player mode, the agent runs always against its previous version. +""" + +import copy + +from codeclash.agents import get_agent +from codeclash.agents.abstract import Player +from codeclash.agents.dummy import Dummy +from codeclash.agents.utils import GameContext +from codeclash.constants import DIR_WORK +from codeclash.games import get_game +from codeclash.games.abstract import CodeGame +from codeclash.tournaments.abstract import AbstractTournament +from codeclash.tournaments.utils.git_utils import filter_git_diff +from codeclash.utils.log import get_logger + + +class SinglePlayerTraining(AbstractTournament): + def __init__(self, config: dict, cleanup: bool = False): + super().__init__(config, name="SinglePlayerTraining") + self.cleanup_on_end = cleanup + self.game: CodeGame = get_game( + self.config, + tournament_id=self.tournament_id, + local_output_dir=self.local_output_dir, + ) + self.agent: Player = self.get_agent(self.config["player"], round=1) + mirror_agent_config = copy.deepcopy(self.config["player"]) + mirror_agent_config["name"] = "mirror" + self.mirror_agent: Player = self.get_agent(mirror_agent_config, round=0) + self.scoreboard: list[tuple[int, str]] = [] + + @property + def rounds(self) -> int: + return self.config["tournament"]["rounds"] + + def get_game_context(self, agent_config: dict, *, round: int) -> GameContext: + """Create a game context for an agent.""" + return GameContext( + id=self.game.game_id, + log_env=self.game.log_env, + log_local=self.game.log_local, + name=self.game.name, + player_id=agent_config["name"], + prompts=self.config["prompts"], + round=round, + rounds=self.rounds, + working_dir=str(DIR_WORK), + ) + + def get_agent(self, agent_config: dict, round: int) -> Player: + """Create an agent with environment and game context.""" + environment = self.game.get_environment( + f"{self.game.game_id}.{agent_config['name']}" + ) + game_context = self.get_game_context(agent_config, round=round) + return get_agent(agent_config, game_context, environment) + + def get_dummy_agent(self) -> Player: + """Create a dummy agent that does nothing.""" + return Dummy( + self.config["player"], + environment=self.game.get_environment(f"{self.game.game_id}.dummy"), + game_context=self.get_game_context(self.config["player"], round=0), + ) + + def run(self): + """Main execution function that runs all rounds.""" + try: + for round_num in range(1, self.rounds + 1): + self.run_training_round(round_num) + if self.config["tournament"]["evaluate_matrix"]: + self.evaluate() + finally: + self.cleanup() + + def run_training_round(self, round_num: int) -> None: + """Execute a single training round, i.e., run the game, then run the agent.""" + # Run the game round and get results + result = self.game.run_round([self.agent, self.mirror_agent]) + log_output = result["log_output"] + winner = result["winner"] + + # Handle bookkeeping that was previously in the game + self.scoreboard.append((round_num, winner)) + self.logger.info(f"Round {round_num} winner: {winner}") + + # Write log to file + round_log_path = self.game.log_local / f"round_{round_num}.log" + round_log_path.write_text(log_output) + + # Copy log to main agent environment only + self._copy_game_log_to_agent(self.agent, round_num, log_output) + + self.run_main_agent(round_num) + mirror_agent_state = round_num - 1 if round_num > 1 else 0 + self.set_mirror_state_to_round(mirror_agent_state) + + self.logger.info("Round completed.") + + def run_main_agent(self, round_num: int): + """Run the main agent for the current round.""" + self.agent.pre_run_hook(new_round=round_num) + self.agent.run() + self.agent.post_run_hook(round=round_num) + + def set_mirror_state_to_round(self, round_num: int): + """Update mirror agent's codebase with the main agent's changes.""" + full_diff = self.agent.get_metadata()["diff"][round_num] + full_diff = filter_git_diff(full_diff) + self.mirror_agent.reset_and_apply_patch(full_diff) + + def cleanup(self): + """Clean up game resources.""" + self.game.end(self.cleanup_on_end) + + def evaluate(self, n_repetitions: int = 3): + """Evaluate the agent's performance by + calculating the matrix of every round against each other. + """ + p1_config = self.config["player"].copy() + p1_config["name"] = "p1" + p1 = self.get_dummy_agent() + + p2_config = self.config["player"].copy() + p2_config["name"] = "p2" + p2 = self.get_dummy_agent() + matrix = { + p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} + for p1_round in range(0, self.rounds + 1) + } + for p1_round in range(0, self.rounds + 1): + for p2_round in range(0, self.rounds + 1): + self.logger.info( + f"Evaluating agent at round {p1_round} against agent at round {p2_round}" + ) + p1_patch = ( + self.agent.get_metadata()["diff"][p1_round] if p1_round > 0 else "" + ) + p2_patch = ( + self.agent.get_metadata()["diff"][p2_round] if p2_round > 0 else "" + ) + p1.reset_and_apply_patch(p1_patch) + p2.reset_and_apply_patch(p2_patch) + for i_repetition in range(n_repetitions): + result = self.game.run_round([p1, p2]) + winner = result["winner"] + self.logger.info( + f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}" + ) + matrix[p1_round][p2_round].append(winner) + self.logger.info(f"Evaluation matrix: {matrix}") + return matrix diff --git a/codeclash/tournaments/utils/git_utils.py b/codeclash/tournaments/utils/git_utils.py new file mode 100644 index 00000000..22bef669 --- /dev/null +++ b/codeclash/tournaments/utils/git_utils.py @@ -0,0 +1,37 @@ +def filter_git_diff(text: str) -> str: + """Return a git diff with any file sections mentioning binary content removed.""" + lines = text.splitlines(keepends=True) + out: list[str] = [] + block: list[str] = [] + in_block = False + prelude_copied = False + + def is_binary_block(bl: list[str]) -> bool: + for ln in bl: + s = ln.strip() + if ln.startswith("Binary files "): + return True + if s == "GIT binary patch": + return True + return False + + for ln in lines: + if ln.startswith("diff --git "): + if in_block: + if not is_binary_block(block): + out.extend(block) + block = [] + else: + if not prelude_copied: + prelude_copied = True + in_block = True + if in_block: + block.append(ln) + else: + out.append(ln) + + if in_block and block: + if not is_binary_block(block): + out.extend(block) + + return "".join(out) diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py index 3525547f..1d6f6aa1 100644 --- a/codeclash/utils/environment.py +++ b/codeclash/utils/environment.py @@ -113,3 +113,23 @@ def copy_file_from_container( f"Failed to copy {container.container_id}:{src_path} to {dest_path}: {result.stdout}{result.stderr}" ) return result + + +def create_file_on_container( + container: DockerEnvironment, + *, + content: str, + dest_path: str | Path, +): + """ + Create a file with given content on a Docker container. + Uses a temporary file on the local filesystem for the transfer. + """ + with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file: + tmp_file.write(content) + tmp_file_path = Path(tmp_file.name) + + try: + copy_file_to_container(container, tmp_file_path, dest_path) + finally: + tmp_file_path.unlink() # Clean up the temporary file diff --git a/codeclash/viewer/app.py b/codeclash/viewer/app.py index 25dcd0ee..a8e62920 100644 --- a/codeclash/viewer/app.py +++ b/codeclash/viewer/app.py @@ -22,6 +22,25 @@ def set_log_base_directory(directory: str | Path): LOG_BASE_DIR = Path(directory).resolve() +def is_probably_failed_run(log_dir: Path) -> bool: + """Check if a run probably failed by checking if metadata.json is missing""" + metadata_file = log_dir / "metadata.json" + return not metadata_file.exists() + + +def get_round_count_from_metadata(log_dir: Path) -> Optional[int]: + """Extract round count from metadata.json if it exists""" + metadata_file = log_dir / "metadata.json" + if not metadata_file.exists(): + return None + + try: + metadata = json.loads(metadata_file.read_text()) + return metadata.get("config", {}).get("game", {}).get("rounds") + except (json.JSONDecodeError, KeyError): + return None + + @dataclass class GameMetadata: """Metadata about a game session""" @@ -150,9 +169,22 @@ def index(): """Main viewer page""" # Get available log directories logs_dir = LOG_BASE_DIR - log_folders = [] + log_folders_info = [] if logs_dir.exists(): - log_folders = [d.name for d in logs_dir.iterdir() if d.is_dir()] + for d in logs_dir.iterdir(): + if d.is_dir(): + folder_info = { + "name": d.name, + "is_failed": is_probably_failed_run(d), + "round_count": get_round_count_from_metadata(d), + } + log_folders_info.append(folder_info) + + # Sort folders alphabetically by name + log_folders_info.sort(key=lambda x: x["name"]) + + # Extract just the names for backwards compatibility + log_folders = [folder["name"] for folder in log_folders_info] selected_folder = request.args.get( "folder", log_folders[0] if log_folders else None @@ -178,6 +210,7 @@ def index(): return render_template( "index.html", log_folders=log_folders, + log_folders_info=log_folders_info, selected_folder=selected_folder, metadata=metadata, trajectories_by_round=trajectories_by_round, diff --git a/codeclash/viewer/templates/index.html b/codeclash/viewer/templates/index.html index c59c7382..40641d3d 100644 --- a/codeclash/viewer/templates/index.html +++ b/codeclash/viewer/templates/index.html @@ -16,10 +16,10 @@