From bdd79f4bdec376175a0b2593402773bff64861b8 Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Sat, 23 Aug 2025 21:40:38 -0400 Subject: [PATCH 1/8] Single player mode initial version --- codeclash/agents/abstract.py | 108 +++++++++++++---- codeclash/agents/dummy.py | 3 +- codeclash/agents/minisweagent.py | 2 +- codeclash/games/abstract.py | 5 +- codeclash/utils/environment.py | 20 ++++ configs/battlesnake_single_player.yaml | 23 ++++ configs/mini/default.yaml | 11 +- main.py | 4 +- main_single_player.py | 158 +++++++++++++++++++++++++ 9 files changed, 301 insertions(+), 33 deletions(-) create mode 100644 configs/battlesnake_single_player.yaml create mode 100644 main_single_player.py diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index ec2c1a72..f54f461e 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -1,4 +1,5 @@ import os +import uuid from abc import ABC, abstractmethod from dotenv import load_dotenv @@ -18,9 +19,11 @@ def __init__( config: dict, environment: Environment, game_context: GameContext, - ): + ) -> None: self.config = config self.name = config["name"] + self._player_unique_id = uuid.uuid4() + """Unique ID that doesn't clash even accross multiple games. Used for git tags.""" self.environment = environment self.game_context = game_context self.game_context.render_and_set_prompts() @@ -29,28 +32,39 @@ def __init__( log_path=self.game_context.log_local / f"{self.name}.log", emoji="👤", ) + self._metadata = { + "name": self.name, + "player_unique_id": self._player_unique_id, + "diff": {}, # mapping round -> diff + "incremental_diff": {}, # mapping round -> diff + } - @property - def branch_name(self): - """Get the branch name for the agent's codebase.""" - return f"{self.game_context.id}.{self.name}" - - def commit(self): - """Commit changes to the agent's codebase.""" - r, rounds = self.game_context.round, self.game_context.rounds - for cmd in [ - "git add -A", - f"git commit --allow-empty -m 'Round {r}/{rounds} Update'", - ]: - assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) - self.logger.info(f"Committed changes for {self.name} for round {r}/{rounds}") + # --- Main methods --- - def on_round_update(self, new_round: int): - """Update the agent's round to match the game round.""" + def pre_run_hook(self, *, new_round: int) -> None: + """Should be called before we call the run method.""" + if new_round == 1: + self._tag_round(0) self.game_context.round = new_round self.game_context.render_and_set_prompts() - def push(self): + def post_run_hook(self, *, round: int) -> None: + """Should be called after we called the run method.""" + self._commit() + self._metadata["diff"][round] = self._get_round_diff(round) + self._metadata["incremental_diff"][round] = self._get_round_diff( + round, incremental=True + ) + + @abstractmethod + def run(self) -> None: + """Given the observation / recap, update the codebase""" + + def get_metadata(self) -> dict: + """Get metadata for the agent.""" + return self._metadata + + def push(self) -> None: """Push codebase to a branch on the game's remote repository.""" token = os.getenv("GITHUB_TOKEN") if not token: @@ -59,13 +73,61 @@ def push(self): for cmd in [ "git remote remove origin", f"git remote add origin https://x-access-token:{token}@github.com/{GH_ORG}/{self.game_context.name}.git", - f"git push origin {self.branch_name}", + f"git push origin {self._branch_name}", + "git push origin --tags", ]: assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) self.logger.info( - f"Pushed {self.name} commit history to remote repository (branch {self.branch_name})" + f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})" ) - @abstractmethod - def run(self): - """Given the observation / recap, update the codebase""" + # --- Helper methods --- + + def _tag_round(self, round: int) -> None: + """Git tag the codebase at the given round.""" + assert_zero_exit_code( + self.environment.execute( + f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'" + ), + logger=self.logger, + ) + + @property + def _branch_name(self) -> str: + """Get the branch name for the agent's codebase.""" + return f"{self.game_context.id}.{self.name}" + + def _get_round_tag_name(self, round: int) -> str: + """Get git tag name for the version of the codebase at the given round.""" + return f"{self._player_unique_id}-round-{round}" + + def _commit(self) -> None: + """Commit changes to the agent's codebase.""" + r = self.game_context.round + for cmd in [ + "git add -A", + f"git commit --allow-empty -m 'Round {r} Update'", + ]: + assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) + self._tag_round(r) + self.logger.info(f"Committed changes for {self.name} for round {r}") + + def _get_round_diff(self, round: int, *, incremental: bool = False) -> str: + """Get the diff between the round and initial version (round 0). + If incremental is True, get the diff between the round and the previous round. + Returns empty string if round is 0. + """ + if round == 0: + return "" + if incremental: + previous_round_tag = self._get_round_tag_name(round - 1) + else: + previous_round_tag = self._get_round_tag_name(0) + current_round_tag = self._get_round_tag_name(round) + out = assert_zero_exit_code( + self.environment.execute( + f"git diff {previous_round_tag}..{current_round_tag}" + ), + logger=self.logger, + ) + return out["output"] diff --git a/codeclash/agents/dummy.py b/codeclash/agents/dummy.py index 514341c0..ffa882fc 100644 --- a/codeclash/agents/dummy.py +++ b/codeclash/agents/dummy.py @@ -5,4 +5,5 @@ class Dummy(Player): """A dummy player that does nothing. Mainly for testing purposes.""" def run(self): - self.commit() + pass + # self.commit() # now called in post_round_hook diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py index 645131f2..ec5d7cb8 100644 --- a/codeclash/agents/minisweagent.py +++ b/codeclash/agents/minisweagent.py @@ -110,4 +110,4 @@ def run(self): traj_path, self.game_context.log_env / traj_path.name, ) - self.commit() + # self.commit() # now called in post_round_hook diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py index ac20956a..3b9a7c2c 100644 --- a/codeclash/games/abstract.py +++ b/codeclash/games/abstract.py @@ -41,7 +41,7 @@ def __init__(self, config: dict): self.name, log_path=self.log_local / "game.log", emoji="🏓" ) self.environment: DockerEnvironment = self.get_environment() - assert len(config["players"]) >= 2, "At least two players are required" + # assert len(config["players"]) >= 2, "At least two players are required" @property def image_name(self) -> str: @@ -124,9 +124,6 @@ def _pre_round_setup(self, agents: list[Player]): """Copy agent codebases into game's container and make round log file""" self.round += 1 # Notify agents of round update - for agent in agents: - if hasattr(agent, "on_round_update"): - agent.on_round_update(self.round) self.logger.info(f"▶️ Running {self.name} round {self.round}...") # Copy agent codebases into game's container diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py index 3525547f..1d6f6aa1 100644 --- a/codeclash/utils/environment.py +++ b/codeclash/utils/environment.py @@ -113,3 +113,23 @@ def copy_file_from_container( f"Failed to copy {container.container_id}:{src_path} to {dest_path}: {result.stdout}{result.stderr}" ) return result + + +def create_file_on_container( + container: DockerEnvironment, + *, + content: str, + dest_path: str | Path, +): + """ + Create a file with given content on a Docker container. + Uses a temporary file on the local filesystem for the transfer. + """ + with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file: + tmp_file.write(content) + tmp_file_path = Path(tmp_file.name) + + try: + copy_file_to_container(container, tmp_file_path, dest_path) + finally: + tmp_file_path.unlink() # Clean up the temporary file diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml new file mode 100644 index 00000000..64985c40 --- /dev/null +++ b/configs/battlesnake_single_player.yaml @@ -0,0 +1,23 @@ +game: + name: BattleSnake + rounds: 5 + args: + width: 11 + height: 11 + browser: false +player: + agent: mini + config: configs/mini/default.yaml + model: openai/gpt-5-mini + name: main +prompts: + game_description: | + You are a software developer ({{player_id}}) competing in a coding game called BattleSnake. + Your bot (`main.py`) controls a snake on a grid-based board. + Snakes collect food, avoid collisions, and try to outlast their opponents. + + The game is played in {{rounds}} rounds. For every round, you (and your competitor) edit program code that controls your bot. This is round {{round}}. + After you and your competitor finish editing your codebases, the game is run automatically. + + Your task: improve the bot in `main.py`, located in {{working_dir}}. + {{working_dir}} is your codebase, which contains both your bot and supporting assets. diff --git a/configs/mini/default.yaml b/configs/mini/default.yaml index 7450d86f..c6c762e0 100644 --- a/configs/mini/default.yaml +++ b/configs/mini/default.yaml @@ -1,6 +1,11 @@ agent: system_template: | - You are a helpful assistant editing a codebase to play a programming game. + You are a helpful assistant interacting continuously with a computer by submitting commands. + You'll be editing a codebase to play a programming game. + + + This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command. + Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||). Include a THOUGHT section before your command where you explain your reasoning process. @@ -142,8 +147,8 @@ agent: Note: In rare cases, if you need to reference a similar format in your command, you might have to proceed in two steps, first writing TRIPLEBACKTICKSBASH, then replacing them with ```bash. - step_limit: 0. - cost_limit: 0. + step_limit: 30 + cost_limit: 1. environment: env: PAGER: cat diff --git a/main.py b/main.py index c9678719..7c965c0c 100644 --- a/main.py +++ b/main.py @@ -17,10 +17,12 @@ def main(config_path: str, cleanup: bool = False, push_agent: bool = False): agents.append(get_agent(agent_conf, config["prompts"], game)) try: - for _ in range(game.rounds): + for round in range(1, game.rounds + 1): game.run_round(agents) for agent in agents: + agent.pre_run_hook(new_round=round) agent.run() + agent.post_run_hook(round=round) finally: game.end(cleanup) if push_agent: diff --git a/main_single_player.py b/main_single_player.py new file mode 100644 index 00000000..e5b6087c --- /dev/null +++ b/main_single_player.py @@ -0,0 +1,158 @@ +""" +In single player mode, the agent runs always against its previous version. +""" + +import argparse +import copy + +import yaml + +from codeclash.agents import get_agent +from codeclash.agents.abstract import Player +from codeclash.games import get_game +from codeclash.games.abstract import CodeGame +from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container +from codeclash.utils.log import get_logger + + +def filter_git_diff(text: str) -> str: + """Return a git diff with any file sections mentioning binary content removed.""" + lines = text.splitlines(keepends=True) + out: list[str] = [] + block: list[str] = [] + in_block = False + prelude_copied = False + + def is_binary_block(bl: list[str]) -> bool: + for ln in bl: + s = ln.strip() + if ln.startswith("Binary files "): + return True + if s == "GIT binary patch": + return True + return False + + for ln in lines: + if ln.startswith("diff --git "): + if in_block: + if not is_binary_block(block): + out.extend(block) + block = [] + else: + if not prelude_copied: + prelude_copied = True + in_block = True + if in_block: + block.append(ln) + else: + out.append(ln) + + if in_block and block: + if not is_binary_block(block): + out.extend(block) + + return "".join(out) + + +class SinglePlayerTraining: + def __init__(self, config: dict, cleanup: bool = False): + self.config = config + self.cleanup_on_end = cleanup + self.game: CodeGame = get_game(self.config) + self.agent: Player = get_agent( + self.config["player"], self.config["prompts"], self.game + ) + mirror_agent_config = copy.deepcopy(self.config["player"]) + mirror_agent_config["name"] = "mirror" + self.mirror_agent: Player = get_agent( + mirror_agent_config, self.config["prompts"], self.game + ) + self.logger = get_logger(self.game.name) + + def run(self): + """Main execution function that runs all rounds.""" + try: + for round_num in range(1, self.game.rounds + 1): + self.run_round(round_num) + finally: + self.cleanup() + + def run_round(self, round_num: int): + """Execute a single training round.""" + self.game.run_round([self.agent, self.mirror_agent]) + self.run_main_agent(round_num) + self.run_mirror_agent(round_num) + + def run_main_agent(self, round_num: int): + """Run the main agent for the current round.""" + self.agent.pre_run_hook(new_round=round_num) + self.agent.run() + self.agent.post_run_hook(round=round_num) + + def run_mirror_agent(self, round_num: int): + """Update mirror agent's codebase with the main agent's changes.""" + if round_num == 1: + self.logger.info("Skipping updating mirror agent for round 1") + return + + # Set mirror agent's codebase to the main agent's codebase of the previous round + full_diff = self.agent.get_metadata()["diff"][round_num - 1] + + full_diff = filter_git_diff(full_diff) + + if full_diff.strip(): + self.logger.debug( + assert_zero_exit_code( + self.mirror_agent.environment.execute( + "git reset --hard && git clean -fd" + ) + ) + ) + + create_file_on_container( + container=self.mirror_agent.environment, # type: ignore + content=full_diff, + dest_path="tmp_patch.txt", + ) + + self.logger.info("Applying patch to mirror agent's codebase") + self.logger.debug(f"Full diff: {full_diff}") + + commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"] + for cmd in commands: + self.logger.debug(f"Executing command: {cmd}") + out = assert_zero_exit_code( + self.mirror_agent.environment.execute(cmd), logger=self.logger + ) + self.logger.debug(out) + else: + self.logger.info("No diff found for mirror agent, skipping update") + + def cleanup(self): + """Clean up game resources.""" + self.game.end(self.cleanup_on_end) + + +def main(config_path: str, cleanup: bool = False): + with open(config_path, "r") as f: + config = yaml.safe_load(f) + training = SinglePlayerTraining(config, cleanup) + training.run() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="CodeClash") + parser.add_argument( + "config_path", + type=str, + default="configs/battlesnake.yaml", + help="Path to the config file.", + ) + parser.add_argument( + "-c", + "--cleanup", + action="store_true", + help="If set, do not clean up the game environment after running.", + ) + args = parser.parse_args() + main(**vars(args)) From b8427d410d4f675a722cdf2717e6e04280dc8d1f Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Sun, 24 Aug 2025 11:25:12 -0400 Subject: [PATCH 2/8] Ref: Create new Tournament class. Move main.py code --- codeclash/tournaments/abstract.py | 2 + codeclash/tournaments/pvp_training.py | 51 +++++++ .../tournaments/single_player_training.py | 93 +++++++++++++ codeclash/tournaments/utils/git_utils.py | 37 +++++ main.py | 26 +--- main_single_player.py | 130 +----------------- 6 files changed, 188 insertions(+), 151 deletions(-) create mode 100644 codeclash/tournaments/abstract.py create mode 100644 codeclash/tournaments/pvp_training.py create mode 100644 codeclash/tournaments/single_player_training.py create mode 100644 codeclash/tournaments/utils/git_utils.py diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py new file mode 100644 index 00000000..59b0e753 --- /dev/null +++ b/codeclash/tournaments/abstract.py @@ -0,0 +1,2 @@ +class AbstractTournament: + pass diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp_training.py new file mode 100644 index 00000000..07ab57c6 --- /dev/null +++ b/codeclash/tournaments/pvp_training.py @@ -0,0 +1,51 @@ +""" +PvP training mode where multiple agents compete against each other. +""" + +from codeclash.agents import get_agent +from codeclash.agents.abstract import Player +from codeclash.games import get_game +from codeclash.games.abstract import CodeGame +from codeclash.tournaments.abstract import AbstractTournament +from codeclash.utils.log import get_logger + + +class PvpTraining(AbstractTournament): + def __init__( + self, config: dict, *, cleanup: bool = False, push_agent: bool = False + ): + self.config = config + self.cleanup_on_end = cleanup + self.push_agent = push_agent + self.game: CodeGame = get_game(self.config) + self.agents: list[Player] = [] + for agent_conf in self.config["players"]: + self.agents.append(get_agent(agent_conf, self.config["prompts"], self.game)) + self.logger = get_logger(self.game.name) + + def run(self) -> None: + """Main execution function that runs all rounds.""" + try: + for round_num in range(1, self.game.rounds + 1): + self.run_training_round(round_num) + finally: + self.cleanup() + + def run_training_round(self, round_num: int) -> None: + """Execute a single training round.""" + self.game.run_round(self.agents) + for agent in self.agents: + self.run_agent(agent, round_num) + + def run_agent(self, agent: Player, round_num: int) -> None: + """Run a single agent for the current round.""" + agent.pre_run_hook(new_round=round_num) + agent.run() + agent.post_run_hook(round=round_num) + + def cleanup(self) -> None: + """Clean up game resources and push agents if requested.""" + self.game.end(self.cleanup_on_end) + if self.push_agent: + for agent in self.agents: + agent.push() diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py new file mode 100644 index 00000000..f2325640 --- /dev/null +++ b/codeclash/tournaments/single_player_training.py @@ -0,0 +1,93 @@ +""" +In single player mode, the agent runs always against its previous version. +""" + +import copy + +from codeclash.agents import get_agent +from codeclash.agents.abstract import Player +from codeclash.games import get_game +from codeclash.games.abstract import CodeGame +from codeclash.tournaments.abstract import AbstractTournament +from codeclash.tournaments.utils.git_utils import filter_git_diff +from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container +from codeclash.utils.log import get_logger + + +class SinglePlayerTraining(AbstractTournament): + def __init__(self, config: dict, cleanup: bool = False): + self.config = config + self.cleanup_on_end = cleanup + self.game: CodeGame = get_game(self.config) + self.agent: Player = get_agent( + self.config["player"], self.config["prompts"], self.game + ) + mirror_agent_config = copy.deepcopy(self.config["player"]) + mirror_agent_config["name"] = "mirror" + self.mirror_agent: Player = get_agent( + mirror_agent_config, self.config["prompts"], self.game + ) + self.logger = get_logger(self.game.name) + + def run(self) -> None: + """Main execution function that runs all rounds.""" + try: + for round_num in range(1, self.game.rounds + 1): + self.run_training_round(round_num) + finally: + self.cleanup() + + def run_training_round(self, round_num: int) -> None: + """Execute a single training round.""" + self.game.run_round([self.agent, self.mirror_agent]) + self.run_main_agent(round_num) + self.run_mirror_agent(round_num) + + def run_main_agent(self, round_num: int) -> None: + """Run the main agent for the current round.""" + self.agent.pre_run_hook(new_round=round_num) + self.agent.run() + self.agent.post_run_hook(round=round_num) + + def run_mirror_agent(self, round_num: int) -> None: + """Update mirror agent's codebase with the main agent's changes.""" + if round_num == 1: + self.logger.info("Skipping updating mirror agent for round 1") + return + + # Set mirror agent's codebase to the main agent's codebase of the previous round + full_diff = self.agent.get_metadata()["diff"][round_num - 1] + + full_diff = filter_git_diff(full_diff) + + if full_diff.strip(): + self.logger.debug( + assert_zero_exit_code( + self.mirror_agent.environment.execute( + "git reset --hard && git clean -fd" + ) + ) + ) + + create_file_on_container( + container=self.mirror_agent.environment, # type: ignore + content=full_diff, + dest_path="tmp_patch.txt", + ) + + self.logger.info("Applying patch to mirror agent's codebase") + self.logger.debug(f"Full diff: {full_diff}") + + commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"] + for cmd in commands: + self.logger.debug(f"Executing command: {cmd}") + out = assert_zero_exit_code( + self.mirror_agent.environment.execute(cmd), logger=self.logger + ) + self.logger.debug(out) + else: + self.logger.info("No diff found for mirror agent, skipping update") + + def cleanup(self) -> None: + """Clean up game resources.""" + self.game.end(self.cleanup_on_end) diff --git a/codeclash/tournaments/utils/git_utils.py b/codeclash/tournaments/utils/git_utils.py new file mode 100644 index 00000000..22bef669 --- /dev/null +++ b/codeclash/tournaments/utils/git_utils.py @@ -0,0 +1,37 @@ +def filter_git_diff(text: str) -> str: + """Return a git diff with any file sections mentioning binary content removed.""" + lines = text.splitlines(keepends=True) + out: list[str] = [] + block: list[str] = [] + in_block = False + prelude_copied = False + + def is_binary_block(bl: list[str]) -> bool: + for ln in bl: + s = ln.strip() + if ln.startswith("Binary files "): + return True + if s == "GIT binary patch": + return True + return False + + for ln in lines: + if ln.startswith("diff --git "): + if in_block: + if not is_binary_block(block): + out.extend(block) + block = [] + else: + if not prelude_copied: + prelude_copied = True + in_block = True + if in_block: + block.append(ln) + else: + out.append(ln) + + if in_block and block: + if not is_binary_block(block): + out.extend(block) + + return "".join(out) diff --git a/main.py b/main.py index 7c965c0c..14245d38 100644 --- a/main.py +++ b/main.py @@ -2,32 +2,14 @@ import yaml -from codeclash.agents import get_agent -from codeclash.agents.abstract import Player -from codeclash.games import get_game -from codeclash.games.abstract import CodeGame +from codeclash.tournaments.pvp_training import PvpTraining -def main(config_path: str, cleanup: bool = False, push_agent: bool = False): +def main(config_path: str, *, cleanup: bool = False, push_agent: bool = False): with open(config_path, "r") as f: config = yaml.safe_load(f) - game: CodeGame = get_game(config) - agents: list[Player] = [] - for agent_conf in config["players"]: - agents.append(get_agent(agent_conf, config["prompts"], game)) - - try: - for round in range(1, game.rounds + 1): - game.run_round(agents) - for agent in agents: - agent.pre_run_hook(new_round=round) - agent.run() - agent.post_run_hook(round=round) - finally: - game.end(cleanup) - if push_agent: - for agent in agents: - agent.push() + training = PvpTraining(config, cleanup=cleanup, push_agent=push_agent) + training.run() if __name__ == "__main__": diff --git a/main_single_player.py b/main_single_player.py index e5b6087c..6c7ff720 100644 --- a/main_single_player.py +++ b/main_single_player.py @@ -1,136 +1,8 @@ -""" -In single player mode, the agent runs always against its previous version. -""" - import argparse -import copy import yaml -from codeclash.agents import get_agent -from codeclash.agents.abstract import Player -from codeclash.games import get_game -from codeclash.games.abstract import CodeGame -from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container -from codeclash.utils.log import get_logger - - -def filter_git_diff(text: str) -> str: - """Return a git diff with any file sections mentioning binary content removed.""" - lines = text.splitlines(keepends=True) - out: list[str] = [] - block: list[str] = [] - in_block = False - prelude_copied = False - - def is_binary_block(bl: list[str]) -> bool: - for ln in bl: - s = ln.strip() - if ln.startswith("Binary files "): - return True - if s == "GIT binary patch": - return True - return False - - for ln in lines: - if ln.startswith("diff --git "): - if in_block: - if not is_binary_block(block): - out.extend(block) - block = [] - else: - if not prelude_copied: - prelude_copied = True - in_block = True - if in_block: - block.append(ln) - else: - out.append(ln) - - if in_block and block: - if not is_binary_block(block): - out.extend(block) - - return "".join(out) - - -class SinglePlayerTraining: - def __init__(self, config: dict, cleanup: bool = False): - self.config = config - self.cleanup_on_end = cleanup - self.game: CodeGame = get_game(self.config) - self.agent: Player = get_agent( - self.config["player"], self.config["prompts"], self.game - ) - mirror_agent_config = copy.deepcopy(self.config["player"]) - mirror_agent_config["name"] = "mirror" - self.mirror_agent: Player = get_agent( - mirror_agent_config, self.config["prompts"], self.game - ) - self.logger = get_logger(self.game.name) - - def run(self): - """Main execution function that runs all rounds.""" - try: - for round_num in range(1, self.game.rounds + 1): - self.run_round(round_num) - finally: - self.cleanup() - - def run_round(self, round_num: int): - """Execute a single training round.""" - self.game.run_round([self.agent, self.mirror_agent]) - self.run_main_agent(round_num) - self.run_mirror_agent(round_num) - - def run_main_agent(self, round_num: int): - """Run the main agent for the current round.""" - self.agent.pre_run_hook(new_round=round_num) - self.agent.run() - self.agent.post_run_hook(round=round_num) - - def run_mirror_agent(self, round_num: int): - """Update mirror agent's codebase with the main agent's changes.""" - if round_num == 1: - self.logger.info("Skipping updating mirror agent for round 1") - return - - # Set mirror agent's codebase to the main agent's codebase of the previous round - full_diff = self.agent.get_metadata()["diff"][round_num - 1] - - full_diff = filter_git_diff(full_diff) - - if full_diff.strip(): - self.logger.debug( - assert_zero_exit_code( - self.mirror_agent.environment.execute( - "git reset --hard && git clean -fd" - ) - ) - ) - - create_file_on_container( - container=self.mirror_agent.environment, # type: ignore - content=full_diff, - dest_path="tmp_patch.txt", - ) - - self.logger.info("Applying patch to mirror agent's codebase") - self.logger.debug(f"Full diff: {full_diff}") - - commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"] - for cmd in commands: - self.logger.debug(f"Executing command: {cmd}") - out = assert_zero_exit_code( - self.mirror_agent.environment.execute(cmd), logger=self.logger - ) - self.logger.debug(out) - else: - self.logger.info("No diff found for mirror agent, skipping update") - - def cleanup(self): - """Clean up game resources.""" - self.game.end(self.cleanup_on_end) +from codeclash.tournaments.single_player_training import SinglePlayerTraining def main(config_path: str, cleanup: bool = False): From 20f8cb25f8f4076ebfa78aeae4b2644b4f07e43d Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Sun, 24 Aug 2025 13:33:11 -0400 Subject: [PATCH 3/8] WIP: Factored out tournament class from game class --- codeclash/agents/abstract.py | 4 +- codeclash/games/__init__.py | 11 +- codeclash/games/abstract.py | 131 +++++++----------- codeclash/games/battlecode/main.py | 38 +++-- codeclash/games/battlesnake/main.py | 47 +++++-- codeclash/games/corewar/main.py | 45 ++++-- codeclash/games/robocode/main.py | 48 +++++-- codeclash/games/robotrumble/main.py | 47 +++++-- codeclash/tournaments/abstract.py | 26 +++- codeclash/tournaments/pvp_training.py | 47 ++++++- .../tournaments/single_player_training.py | 63 +++++++-- 11 files changed, 352 insertions(+), 155 deletions(-) diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index f54f461e..1cfc39c3 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from dotenv import load_dotenv -from minisweagent import Environment +from minisweagent.environments.docker import DockerEnvironment from codeclash.agents.utils import GameContext from codeclash.constants import GH_ORG @@ -17,7 +17,7 @@ class Player(ABC): def __init__( self, config: dict, - environment: Environment, + environment: DockerEnvironment, game_context: GameContext, ) -> None: self.config = config diff --git a/codeclash/games/__init__.py b/codeclash/games/__init__.py index d996b8bf..781c3557 100644 --- a/codeclash/games/__init__.py +++ b/codeclash/games/__init__.py @@ -1,3 +1,5 @@ +from pathlib import Path + from codeclash.games.abstract import CodeGame from codeclash.games.battlecode.main import BattleCodeGame from codeclash.games.battlesnake.main import BattleSnakeGame @@ -7,16 +9,17 @@ # might consider postponing imports to avoid loading things we don't need -def get_game(config: dict) -> CodeGame: +def get_game(config: dict, *, tournament_id: str, local_output_dir: Path) -> CodeGame: game = { - x.name: x for x in [ + x.name: x + for x in [ BattleCodeGame, BattleSnakeGame, CoreWarGame, RoboCodeGame, - RobotRumbleGame + RobotRumbleGame, ] }.get(config["game"]["name"]) if game is None: raise ValueError(f"Unknown game: {config['game']['name']}") - return game(config) + return game(config, tournament_id=tournament_id, local_output_dir=local_output_dir) diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py index 3b9a7c2c..9c45e205 100644 --- a/codeclash/games/abstract.py +++ b/codeclash/games/abstract.py @@ -1,30 +1,30 @@ -import getpass import json import os import subprocess -import time -import traceback from abc import ABC, abstractmethod from collections import Counter from pathlib import Path -from typing import Any from minisweagent.environments.docker import DockerEnvironment from codeclash.agents.abstract import Player from codeclash.constants import DIR_LOGS, DIR_WORK, GH_ORG -from codeclash.utils.environment import ( - assert_zero_exit_code, - copy_between_containers, - copy_file_from_container, -) +from codeclash.utils.environment import assert_zero_exit_code, copy_between_containers from codeclash.utils.log import get_logger class CodeGame(ABC): name: str - def __init__(self, config: dict): + def __init__(self, config: dict, *, tournament_id: str, local_output_dir: Path): + """The CodeGame class is responsible for running games, i.e., taking a list of code + from different agents/players and running them against each other. + It also provides the environments for the game and agents to run in. + + The central method is `run_round`, which takes a list of agents and returns the winner of the round. + + At the end of the the tournament, run the `end` method to clean up the game and agents and write the metadata. + """ self.url_gh: str = f"git@github.com:{GH_ORG}/{self.name}.git" self.artifacts: list[Path] = [] """Artifact objects that we might want to clean up after the game.""" @@ -32,16 +32,21 @@ def __init__(self, config: dict): """List of (round number, winner (player id))""" self.game_config: dict = config["game"] self.config: dict = config - self.rounds: int = self.game_config.get("rounds", 1) - self.round: int = 0 - self.game_id: str = f"{self.name}{time.strftime('%y%m%d%H%M%S')}" + self.game_id: str = tournament_id self.log_env: Path = (DIR_WORK / DIR_LOGS / self.game_id).resolve() - self.log_local: Path = (DIR_LOGS / getpass.getuser() / self.game_id).resolve() + self.log_local: Path = local_output_dir self.logger = get_logger( self.name, log_path=self.log_local / "game.log", emoji="🏓" ) self.environment: DockerEnvironment = self.get_environment() + """The running docker environment for executing the game""" # assert len(config["players"]) >= 2, "At least two players are required" + """Total number of rounds to play""" + self._metadata: dict = { + "name": self.name, + "config": self.config, + "game_id": self.game_id, + } @property def image_name(self) -> str: @@ -84,12 +89,7 @@ def get_metadata(self) -> dict: """This is what we write to metadata.json. You can subclass extend this to add more details for specific games. """ - return { - "name": self.name, - "scoreboard": self.scoreboard, - "config": self.config, - "game_id": self.game_id, - } + return self._metadata def end(self, cleanup: bool = False): self.logger.info("Overall score: %s", Counter([x[1] for x in self.scoreboard])) @@ -121,11 +121,7 @@ def get_environment(self, branch_name: str | None = None) -> DockerEnvironment: return environment def _pre_round_setup(self, agents: list[Player]): - """Copy agent codebases into game's container and make round log file""" - self.round += 1 - # Notify agents of round update - self.logger.info(f"▶️ Running {self.name} round {self.round}...") - + """Copy agent codebases into game's container""" # Copy agent codebases into game's container for agent in agents: self.logger.debug(f"Copying {agent.name}'s codebase") @@ -136,78 +132,55 @@ def _pre_round_setup(self, agents: list[Player]): dest_path=f"/{agent.name}", ) - # Ensure the log path + file exists + # Ensure the log directory exists assert_zero_exit_code( self.environment.execute(f"mkdir -p {self.log_env}"), logger=self.logger, ) - assert_zero_exit_code( - self.environment.execute(f"touch {self.round_log_path}"), logger=self.logger - ) @abstractmethod - def determine_winner(self, agents: list[Player]) -> Any: - """Determine the winner of the game based on the round results, - Should update self.scoreboard + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + """Determine the winner of the game based on the result output. + + Args: + result_output: The specific output containing winning information + agents: List of agents participating in the round + + Returns: + Dictionary with key "winner" containing the winner's name """ pass @abstractmethod - def execute_round(self, agents: list[Player]): - """Subclasses implement their game-specific logic here, must write results to round_log_path. + def execute_round(self, agents: list[Player]) -> dict[str, str]: + """Subclasses implement their game-specific logic here. This is the low level implementation, you probably want to use run_round instead, which includes the pre-round setup, post-round setup, and winner determination. + + Returns: + Dictionary with keys "log_output" and "result_output" """ pass - def _post_round_setup(self, agents: list[Player]): - for agent in agents: - try: - copy_between_containers( - self.environment, - agent.environment, - self.round_log_path, - f"{agent.environment.config.cwd}/logs/round_{self.round}.log", - ) - except Exception: - self.logger.error( - f"Error copying round log to {agent.name}'s container: {traceback.format_exc()}" - ) - else: - self.logger.info(f"Copied round log to {agent.name}'s container.") - - try: - copy_file_from_container( - self.environment, - self.round_log_path, - self.log_local / self.round_log_path.name, - ) - except Exception: - self.logger.error( - f"Error copying round log to {agent.name}'s container: {traceback.format_exc()}" - ) - else: - self.logger.info( - f"Copied round log from {agent.name}'s container to local log dir." - ) - self.logger.info(f"Round {self.round} completed.") - - def run_round(self, agents: list[Player]): + def run_round(self, agents: list[Player]) -> dict[str, str]: """ Run a single round of the game with the given agents. - Writes to directory containing logs and results of the round(s). + Returns the log output, result output, and winner name. All bookkeeping should be + handled by the tournament class. """ self._pre_round_setup(agents) - self.execute_round(agents) - self.determine_winner(agents) - last_winner = self.scoreboard[-1][1] - self.logger.info(f"Round {self.round} winner: {last_winner}") - self._post_round_setup(agents) + result = self.execute_round(agents) + log_output = result["log_output"] + result_output = result["result_output"] - @property - def round_log_path(self) -> Path: - """ - Get the path to the current round's log file. - """ - return self.log_env / f"round_{self.round}.log" + winner_result = self.determine_winner(result_output, agents) + winner_name = winner_result["winner"] + + return { + "log_output": log_output, + "result_output": result_output, + "winner": winner_name, + } diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py index 07df6252..59faa13f 100644 --- a/codeclash/games/battlecode/main.py +++ b/codeclash/games/battlecode/main.py @@ -1,4 +1,5 @@ import re +from pathlib import Path from typing import Any from codeclash.constants import DIR_WORK, RESULT_TIE @@ -8,8 +9,10 @@ class BattleCodeGame(CodeGame): name: str = "BattleCode" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) assert len(config["players"]) == 2, "BattleCode is a two-player game" self.run_cmd_round: str = "python run.py run" for arg, val in self.game_config.get("args", {}).items(): @@ -19,13 +22,27 @@ def __init__(self, config): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner(self, agents: list[Any]): - response = self.environment.execute(f"tail -3 {self.round_log_path} | head -1") - winner = re.search(r"\s\((.*)\)\swins\s\(", response["output"]).group(1) - winner = {"A": agents[0].name, "B": agents[1].name}.get(winner, RESULT_TIE) - self.scoreboard.append((self.round, winner)) + def determine_winner(self, result_output: str, agents: list[Any]) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") + lines = result_output.strip().split("\n") + # Get the third-to-last line which contains the winner info + winner_line = lines[-3] if len(lines) >= 3 else "" + self.logger.debug(f"Winner line: {winner_line}") + match = re.search(r"\s\((.*)\)\swins\s\(", winner_line) + if match: + winner_key = match.group(1) + self.logger.debug(f"Winner key from match: {winner_key}") + # Map A/B to actual agent names (much closer to original code) + winner = {"A": agents[0].name, "B": agents[1].name}.get( + winner_key, RESULT_TIE + ) + self.logger.debug(f"Concluding winner: {winner}") + return {"winner": winner} + else: + self.logger.debug("No winner match found, returning tie") + return {"winner": RESULT_TIE} - def execute_round(self, agents: list[Any]): + def execute_round(self, agents: list[Any]) -> dict[str, str]: for agent in agents: src, dest = f"/{agent.name}/src/mysubmission/", str( DIR_WORK / "src" / agent.name @@ -35,7 +52,10 @@ def execute_round(self, agents: list[Any]): f"--p{idx+1}-dir src --p{idx+1} {agent.name}" for idx, agent in enumerate(agents) ] - cmd = f"{self.run_cmd_round} {' '.join(args)} > {self.round_log_path}" + cmd = f"{self.run_cmd_round} {' '.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response + # For BattleCode, log_output and result_output are the same + output = response["output"] + return {"log_output": output, "result_output": output} diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py index 206f6fed..d3a8187e 100644 --- a/codeclash/games/battlesnake/main.py +++ b/codeclash/games/battlesnake/main.py @@ -1,5 +1,6 @@ import json import time +from pathlib import Path from codeclash.agents.abstract import Player from codeclash.games.abstract import CodeGame @@ -9,8 +10,10 @@ class BattleSnakeGame(CodeGame): name: str = "BattleSnake" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) self.run_cmd_round: str = "./battlesnake play" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -19,14 +22,19 @@ def __init__(self, config): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner(self, agents: list[Player]): - response = assert_zero_exit_code( - self.environment.execute(f"tail -1 {self.round_log_path}") - ) - winner = json.loads(response["output"].strip("\n"))["winnerName"] - self.scoreboard.append((self.round, winner)) + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") + lines = result_output.strip().split("\n") + # Get the last line which contains the game result + last_line = lines[-1] if lines else "" + self.logger.debug(f"Last line: {last_line}") + winner = json.loads(last_line)["winnerName"] + self.logger.debug(f"Concluding winner: {winner}") + return {"winner": winner} - def execute_round(self, agents: list[Player]): + def execute_round(self, agents: list[Player]) -> dict[str, str]: cmd = [] for idx, agent in enumerate(agents): port = 8001 + idx @@ -38,18 +46,27 @@ def execute_round(self, agents: list[Player]): time.sleep(3) # Give servers time to start - cmd.append(f"-o {self.round_log_path}") - cmd = " ".join(cmd) - self.logger.info(f"Running command: {cmd}") + # Create temporary output file for results + output_file = f"battlesnake_output_{int(time.time())}.json" + cmd_str = " ".join(cmd) + f" -o {output_file}" + self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}") - # todo: should probably keep output somewhere? try: - assert_zero_exit_code( + response = assert_zero_exit_code( self.environment.execute( - f"{self.run_cmd_round} {cmd}", + f"{self.run_cmd_round} {cmd_str}", cwd=f"{self.environment.config.cwd}/game", ) ) + + # Read the output file for result information + result_response = self.environment.execute(f"cat game/{output_file}") + result_output = result_response["output"] + + # Clean up the output file + self.environment.execute(f"rm -f game/{output_file}") + + return {"log_output": response["output"], "result_output": result_output} finally: # Kill all python servers when done self.environment.execute("pkill -f 'python main.py' || true") diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py index 46d5eafc..55e2960a 100644 --- a/codeclash/games/corewar/main.py +++ b/codeclash/games/corewar/main.py @@ -1,4 +1,5 @@ import re +from pathlib import Path from codeclash.agents.abstract import Player from codeclash.games.abstract import CodeGame @@ -7,8 +8,10 @@ class CoreWarGame(CodeGame): name: str = "CoreWar" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) self.run_cmd_round: str = "./src/pmars" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -17,20 +20,42 @@ def __init__(self, config): else: self.run_cmd_round += f" -{arg} {val}" - def determine_winner(self, agents: list[Player]): + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") scores = [] n = len(agents) * 2 - response = self.environment.execute(f"tail -{n} {self.round_log_path}") - for line in response["output"].splitlines(): + lines = result_output.strip().split("\n") + # Get the last n lines which contain the scores (closer to original) + relevant_lines = lines[-n:] if len(lines) >= n else lines + self.logger.debug(f"Relevant lines for scoring: {relevant_lines}") + + for line in relevant_lines: match = re.search(r".*\sby\s.*\sscores\s(\d+)", line) if match: - scores.append(int(match.group(1))) - winner = agents[scores.index(max(scores))].name - self.scoreboard.append((self.round, winner)) + score = int(match.group(1)) + scores.append(score) + self.logger.debug(f"Found score: {score} from line: {line}") + + self.logger.debug(f"All scores: {scores}") + if scores: + max_score_index = scores.index(max(scores)) + winner = agents[max_score_index].name + self.logger.debug( + f"Concluding winner: {winner} with index {max_score_index}" + ) + return {"winner": winner} + else: + self.logger.debug("No scores found, returning unknown") + return {"winner": "unknown"} - def execute_round(self, agents: list[Player]): + def execute_round(self, agents: list[Player]) -> dict[str, str]: args = [f"/{agent.name}/warriors/warrior.red" for agent in agents] - cmd = f"{self.run_cmd_round} {' '.join(args)} > {self.round_log_path}" + cmd = f"{self.run_cmd_round} {' '.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response + # For CoreWar, log_output and result_output are the same + output = response["output"] + return {"log_output": output, "result_output": output} diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py index fdcf5435..514e7ab2 100644 --- a/codeclash/games/robocode/main.py +++ b/codeclash/games/robocode/main.py @@ -1,4 +1,6 @@ import subprocess +import time +from pathlib import Path from codeclash.agents.abstract import Player from codeclash.games.abstract import CodeGame @@ -8,8 +10,10 @@ class RoboCodeGame(CodeGame): name: str = "RoboCode" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) self.run_cmd_round: str = "./robocode.sh" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -51,12 +55,23 @@ def dict_to_lines(d, prefix=""): dict_to_lines(default_battle_config) return "\n".join(battle_lines) - def determine_winner(self, agents: list[Player]): - response = self.environment.execute(f"head -3 {self.round_log_path} | tail -1") - winner = response["output"].split()[1].rsplit(".", 1)[0] - self.scoreboard.append((self.round, winner)) + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") + lines = result_output.strip().split("\n") + # Get the second line which contains the winner info (closer to original) + winner_line = lines[1] if len(lines) >= 2 else "" + self.logger.debug(f"Winner line: {winner_line}") + if winner_line: + winner = winner_line.split()[1].rsplit(".", 1)[0] + self.logger.debug(f"Concluding winner: {winner}") + return {"winner": winner} + else: + self.logger.debug("No winner line found, returning unknown") + return {"winner": "unknown"} - def execute_round(self, agents: list[Player]): + def execute_round(self, agents: list[Player]) -> dict[str, str]: for agent in agents: # Copy the agent codebase into the game codebase and compile it for cmd in [ @@ -69,7 +84,8 @@ def execute_round(self, agents: list[Player]): # Create .battle file selected_robots = ",".join([f"{agent.name}.MyTank*" for agent in agents]) - battle_file = f"{self.game_id}-round{self.round}.battle" + # Use timestamp for unique battle file name since rounds are managed by tournament + battle_file = f"{self.game_id}-battle{int(time.time())}.battle" with open(battle_file, "w") as f: f.write( f"""#Battle Properties @@ -80,10 +96,18 @@ def execute_round(self, agents: list[Player]): copy_file_to_container(self.environment, battle_file, f"battles/{battle_file}") subprocess.run(f"rm -f {battle_file}", shell=True) - # Run battle - cmd = ( - f"{self.run_cmd_round} -battle {battle_file} -results {self.round_log_path}" - ) + # Run battle with results output to file + results_file = f"results_{int(time.time())}.txt" + cmd = f"{self.run_cmd_round} -battle {battle_file} -results {results_file}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response + + # Read the results file to get result output + cat_response = self.environment.execute(f"cat {results_file}") + result_output = cat_response["output"] + + # Clean up the results file + self.environment.execute(f"rm -f {results_file}") + + return {"log_output": response["output"], "result_output": result_output} diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py index 495d10e9..e66d3a93 100644 --- a/codeclash/games/robotrumble/main.py +++ b/codeclash/games/robotrumble/main.py @@ -1,3 +1,5 @@ +from pathlib import Path + from codeclash.agents.abstract import Player from codeclash.constants import RESULT_TIE from codeclash.games.abstract import CodeGame @@ -6,23 +8,44 @@ class RobotRumbleGame(CodeGame): name: str = "RobotRumble" - def __init__(self, config): - super().__init__(config) + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__( + config, tournament_id=tournament_id, local_output_dir=local_output_dir + ) assert len(config["players"]) == 2, "RobotRumble is a two-player game" self.run_cmd_round: str = "./rumblebot run term" - def determine_winner(self, agents: list[Player]): - response = self.environment.execute(f"tail -2 {self.round_log_path}") - if "Blue won" in response["output"]: - self.scoreboard.append((self.round, agents[0].name)) - elif "Red won" in response["output"]: - self.scoreboard.append((self.round, agents[1].name)) - elif "it was a tie" in response["output"]: - self.scoreboard.append((self.round, RESULT_TIE)) + def determine_winner( + self, result_output: str, agents: list[Player] + ) -> dict[str, str]: + self.logger.debug(f"Determining winner from result output: {result_output}") + lines = result_output.strip().split("\n") + # Get the last 2 lines which contain the game result (same as original) + relevant_lines = lines[-2:] if len(lines) >= 2 else lines + log_text = "\n".join(relevant_lines) + self.logger.debug(f"Relevant lines: {log_text}") + + if "Blue won" in log_text: + winner = agents[0].name + self.logger.debug(f"Blue won - Concluding winner: {winner}") + return {"winner": winner} + elif "Red won" in log_text: + winner = agents[1].name + self.logger.debug(f"Red won - Concluding winner: {winner}") + return {"winner": winner} + elif "it was a tie" in log_text: + self.logger.debug("Game was a tie") + return {"winner": RESULT_TIE} + else: + self.logger.debug("No clear result found, treating as tie") + return {"winner": RESULT_TIE} - def execute_round(self, agents: list[Player]): + def execute_round(self, agents: list[Player]) -> dict[str, str]: args = [f"/{agent.name}/robot.py" for agent in agents] - cmd = f"{self.run_cmd_round} {' '.join(args)} > {self.round_log_path}" + cmd = f"{self.run_cmd_round} {' '.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response + # For RobotRumble, log_output and result_output are the same + output = response["output"] + return {"log_output": output, "result_output": output} diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py index 59b0e753..4a865181 100644 --- a/codeclash/tournaments/abstract.py +++ b/codeclash/tournaments/abstract.py @@ -1,2 +1,26 @@ +import getpass +import time +from pathlib import Path + +from codeclash.constants import DIR_LOGS +from codeclash.utils.log import get_logger + + class AbstractTournament: - pass + def __init__(self, config: dict, *, name: str, **kwargs): + self.config: dict = config + self.name: str = name + self.tournament_id: str = f"{self.name}{time.strftime('%y%m%d%H%M%S')}" + self.local_output_dir: Path = ( + DIR_LOGS / getpass.getuser() / self.tournament_id + ).resolve() + self._metadata: dict = { + "name": self.name, + "tournament_id": self.tournament_id, + } + self.logger = get_logger( + self.name, log_path=self.local_output_dir / "tournament.log", emoji="🏆" + ) + + def get_metadata(self) -> dict: + return self._metadata diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp_training.py index 07ab57c6..9528fb51 100644 --- a/codeclash/tournaments/pvp_training.py +++ b/codeclash/tournaments/pvp_training.py @@ -2,11 +2,14 @@ PvP training mode where multiple agents compete against each other. """ +import traceback + from codeclash.agents import get_agent from codeclash.agents.abstract import Player from codeclash.games import get_game from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament +from codeclash.utils.environment import create_file_on_container from codeclash.utils.log import get_logger @@ -14,10 +17,14 @@ class PvpTraining(AbstractTournament): def __init__( self, config: dict, *, cleanup: bool = False, push_agent: bool = False ): - self.config = config + super().__init__(config, name="PvpTraining") self.cleanup_on_end = cleanup self.push_agent = push_agent - self.game: CodeGame = get_game(self.config) + self.game: CodeGame = get_game( + self.config, + tournament_id=self.tournament_id, + local_output_dir=self.local_output_dir, + ) self.agents: list[Player] = [] for agent_conf in self.config["players"]: self.agents.append(get_agent(agent_conf, self.config["prompts"], self.game)) @@ -33,7 +40,23 @@ def run(self) -> None: def run_training_round(self, round_num: int) -> None: """Execute a single training round.""" - self.game.run_round(self.agents) + # Run the game round and get results + result = self.game.run_round(self.agents) + log_output = result["log_output"] + result_output = result["result_output"] + winner = result["winner"] + + # Handle bookkeeping that was previously in the game + self.game.scoreboard.append((round_num, winner)) + self.logger.info(f"Round {round_num} winner: {winner}") + + # Write log to file + round_log_path = self.game.log_local / f"round_{round_num}.log" + round_log_path.write_text(log_output) + + # Copy log to agent environments + self._post_round_setup(self.agents, round_num, log_output) + for agent in self.agents: self.run_agent(agent, round_num) @@ -43,6 +66,24 @@ def run_agent(self, agent: Player, round_num: int) -> None: agent.run() agent.post_run_hook(round=round_num) + def _post_round_setup(self, agents: list, round_num: int, log_output: str) -> None: + """Copy round logs to agent environments and local directory.""" + for agent in agents: + try: + create_file_on_container( + container=agent.environment, + content=log_output, + dest_path=f"logs/round_{round_num}.log", + ) + except Exception: + self.logger.error( + f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" + ) + else: + self.logger.info(f"Created round log in {agent.name}'s container.") + + self.logger.info("Round completed.") + def cleanup(self) -> None: """Clean up game resources and push agents if requested.""" self.game.end(self.cleanup_on_end) diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py index f2325640..784a7749 100644 --- a/codeclash/tournaments/single_player_training.py +++ b/codeclash/tournaments/single_player_training.py @@ -3,6 +3,7 @@ """ import copy +import traceback from codeclash.agents import get_agent from codeclash.agents.abstract import Player @@ -16,9 +17,15 @@ class SinglePlayerTraining(AbstractTournament): def __init__(self, config: dict, cleanup: bool = False): - self.config = config + super().__init__(config, name="SinglePlayerTraining") self.cleanup_on_end = cleanup - self.game: CodeGame = get_game(self.config) + self.game: CodeGame = get_game( + self.config, + tournament_id=self.tournament_id, + local_output_dir=self.local_output_dir, + ) + # fixme: hack + self.game.rounds = self.config["game"]["rounds"] self.agent: Player = get_agent( self.config["player"], self.config["prompts"], self.game ) @@ -29,27 +36,47 @@ def __init__(self, config: dict, cleanup: bool = False): ) self.logger = get_logger(self.game.name) - def run(self) -> None: + @property + def rounds(self) -> int: + return self.config["game"]["rounds"] + + def run(self): """Main execution function that runs all rounds.""" try: - for round_num in range(1, self.game.rounds + 1): + for round_num in range(1, self.rounds + 1): self.run_training_round(round_num) finally: self.cleanup() def run_training_round(self, round_num: int) -> None: """Execute a single training round.""" - self.game.run_round([self.agent, self.mirror_agent]) + # Run the game round and get results + result = self.game.run_round([self.agent, self.mirror_agent]) + log_output = result["log_output"] + result_output = result["result_output"] + winner = result["winner"] + + # Handle bookkeeping that was previously in the game + self.game.scoreboard.append((round_num, winner)) + self.logger.info(f"Round {round_num} winner: {winner}") + + # Write log to file + round_log_path = self.game.log_local / f"round_{round_num}.log" + round_log_path.write_text(log_output) + + # Copy log to main agent environment only + self._copy_game_log_to_agent([self.agent], round_num, log_output) + self.run_main_agent(round_num) self.run_mirror_agent(round_num) - def run_main_agent(self, round_num: int) -> None: + def run_main_agent(self, round_num: int): """Run the main agent for the current round.""" self.agent.pre_run_hook(new_round=round_num) self.agent.run() self.agent.post_run_hook(round=round_num) - def run_mirror_agent(self, round_num: int) -> None: + def run_mirror_agent(self, round_num: int): """Update mirror agent's codebase with the main agent's changes.""" if round_num == 1: self.logger.info("Skipping updating mirror agent for round 1") @@ -88,6 +115,26 @@ def run_mirror_agent(self, round_num: int) -> None: else: self.logger.info("No diff found for mirror agent, skipping update") - def cleanup(self) -> None: + def _copy_game_log_to_agent( + self, agents: list, round_num: int, log_output: str + ) -> None: + """Copy round logs to agent environments and local directory.""" + for agent in agents: + try: + create_file_on_container( + container=agent.environment, + content=log_output, + dest_path=f"logs/round_{round_num}.log", + ) + except Exception: + self.logger.error( + f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" + ) + else: + self.logger.info(f"Created round log in {agent.name}'s container.") + + self.logger.info("Round completed.") + + def cleanup(self): """Clean up game resources.""" self.game.end(self.cleanup_on_end) From 548b6b424c843091e52a8bf6520dace1cd88def1 Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Sun, 24 Aug 2025 16:46:43 -0400 Subject: [PATCH 4/8] Working evaluation for single player mode --- codeclash/agents/abstract.py | 40 ++++++- codeclash/tournaments/abstract.py | 17 +++ codeclash/tournaments/pvp_training.py | 26 +---- .../tournaments/single_player_training.py | 106 ++++++++---------- configs/battlesnake_single_player.yaml | 2 +- 5 files changed, 108 insertions(+), 83 deletions(-) diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index 1cfc39c3..b6f6c2ff 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -7,7 +7,8 @@ from codeclash.agents.utils import GameContext from codeclash.constants import GH_ORG -from codeclash.utils.environment import assert_zero_exit_code +from codeclash.tournaments.utils.git_utils import filter_git_diff +from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container from codeclash.utils.log import get_logger load_dotenv() @@ -81,6 +82,43 @@ def push(self) -> None: f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})" ) + def reset_and_apply_patch( + self, patch: str, *, base_commit: str = "", filter_patch: bool = True + ) -> None: + """Clean all uncommited changes. If base_commit is provided, reset to that commit. + Then apply the patch to the codebase. + """ + # Need to clean before we copy over the patch (else it's gonna be removed by git clean) + self.logger.debug( + assert_zero_exit_code( + self.environment.execute( + f"git reset --hard {base_commit} && git clean -fd" + ) + ) + ) + + patch = filter_git_diff(patch) if filter_patch else patch + + if not patch.strip(): + self.logger.debug("No patch to apply, skipping") + return + + create_file_on_container( + container=self.environment, # type: ignore + content=patch, + dest_path="tmp_patch.txt", + ) + + self.logger.debug(f"Applying patch to agent's codebase: {patch}") + + commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"] + for cmd in commands: + self.logger.debug(f"Executing command: {cmd}") + out = assert_zero_exit_code( + self.environment.execute(cmd), logger=self.logger + ) + self.logger.debug(out) + # --- Helper methods --- def _tag_round(self, round: int) -> None: diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py index 4a865181..fe1a7fd0 100644 --- a/codeclash/tournaments/abstract.py +++ b/codeclash/tournaments/abstract.py @@ -1,8 +1,10 @@ import getpass import time +import traceback from pathlib import Path from codeclash.constants import DIR_LOGS +from codeclash.utils.environment import create_file_on_container from codeclash.utils.log import get_logger @@ -24,3 +26,18 @@ def __init__(self, config: dict, *, name: str, **kwargs): def get_metadata(self) -> dict: return self._metadata + + def _copy_game_log_to_agent(self, agent, round_num: int, log_output: str) -> None: + """Copy round log to agent environment.""" + try: + create_file_on_container( + container=agent.environment, + content=log_output, + dest_path=f"logs/round_{round_num}.log", + ) + except Exception: + self.logger.error( + f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" + ) + else: + self.logger.info(f"Created round log in {agent.name}'s container.") diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp_training.py index 9528fb51..0e910606 100644 --- a/codeclash/tournaments/pvp_training.py +++ b/codeclash/tournaments/pvp_training.py @@ -2,14 +2,11 @@ PvP training mode where multiple agents compete against each other. """ -import traceback - from codeclash.agents import get_agent from codeclash.agents.abstract import Player from codeclash.games import get_game from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament -from codeclash.utils.environment import create_file_on_container from codeclash.utils.log import get_logger @@ -55,35 +52,20 @@ def run_training_round(self, round_num: int) -> None: round_log_path.write_text(log_output) # Copy log to agent environments - self._post_round_setup(self.agents, round_num, log_output) + for agent in self.agents: + self._copy_game_log_to_agent(agent, round_num, log_output) for agent in self.agents: self.run_agent(agent, round_num) + self.logger.info("Round completed.") + def run_agent(self, agent: Player, round_num: int) -> None: """Run a single agent for the current round.""" agent.pre_run_hook(new_round=round_num) agent.run() agent.post_run_hook(round=round_num) - def _post_round_setup(self, agents: list, round_num: int, log_output: str) -> None: - """Copy round logs to agent environments and local directory.""" - for agent in agents: - try: - create_file_on_container( - container=agent.environment, - content=log_output, - dest_path=f"logs/round_{round_num}.log", - ) - except Exception: - self.logger.error( - f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" - ) - else: - self.logger.info(f"Created round log in {agent.name}'s container.") - - self.logger.info("Round completed.") - def cleanup(self) -> None: """Clean up game resources and push agents if requested.""" self.game.end(self.cleanup_on_end) diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py index 784a7749..693ea915 100644 --- a/codeclash/tournaments/single_player_training.py +++ b/codeclash/tournaments/single_player_training.py @@ -3,7 +3,6 @@ """ import copy -import traceback from codeclash.agents import get_agent from codeclash.agents.abstract import Player @@ -11,7 +10,6 @@ from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament from codeclash.tournaments.utils.git_utils import filter_git_diff -from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container from codeclash.utils.log import get_logger @@ -45,6 +43,7 @@ def run(self): try: for round_num in range(1, self.rounds + 1): self.run_training_round(round_num) + self.evaluate() finally: self.cleanup() @@ -65,10 +64,13 @@ def run_training_round(self, round_num: int) -> None: round_log_path.write_text(log_output) # Copy log to main agent environment only - self._copy_game_log_to_agent([self.agent], round_num, log_output) + self._copy_game_log_to_agent(self.agent, round_num, log_output) self.run_main_agent(round_num) - self.run_mirror_agent(round_num) + mirror_agent_state = round_num - 1 if round_num > 1 else 0 + self.set_mirror_state_to_round(mirror_agent_state) + + self.logger.info("Round completed.") def run_main_agent(self, round_num: int): """Run the main agent for the current round.""" @@ -76,65 +78,51 @@ def run_main_agent(self, round_num: int): self.agent.run() self.agent.post_run_hook(round=round_num) - def run_mirror_agent(self, round_num: int): + def set_mirror_state_to_round(self, round_num: int): """Update mirror agent's codebase with the main agent's changes.""" - if round_num == 1: - self.logger.info("Skipping updating mirror agent for round 1") - return - - # Set mirror agent's codebase to the main agent's codebase of the previous round - full_diff = self.agent.get_metadata()["diff"][round_num - 1] - - full_diff = filter_git_diff(full_diff) - - if full_diff.strip(): - self.logger.debug( - assert_zero_exit_code( - self.mirror_agent.environment.execute( - "git reset --hard && git clean -fd" - ) - ) - ) - - create_file_on_container( - container=self.mirror_agent.environment, # type: ignore - content=full_diff, - dest_path="tmp_patch.txt", - ) - - self.logger.info("Applying patch to mirror agent's codebase") - self.logger.debug(f"Full diff: {full_diff}") - - commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"] - for cmd in commands: - self.logger.debug(f"Executing command: {cmd}") - out = assert_zero_exit_code( - self.mirror_agent.environment.execute(cmd), logger=self.logger - ) - self.logger.debug(out) + if round_num == 0: + full_diff = "" else: - self.logger.info("No diff found for mirror agent, skipping update") - - def _copy_game_log_to_agent( - self, agents: list, round_num: int, log_output: str - ) -> None: - """Copy round logs to agent environments and local directory.""" - for agent in agents: - try: - create_file_on_container( - container=agent.environment, - content=log_output, - dest_path=f"logs/round_{round_num}.log", - ) - except Exception: - self.logger.error( - f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" - ) - else: - self.logger.info(f"Created round log in {agent.name}'s container.") + full_diff = self.agent.get_metadata()["diff"][round_num] + full_diff = filter_git_diff(full_diff) - self.logger.info("Round completed.") + self.mirror_agent.reset_and_apply_patch(full_diff) def cleanup(self): """Clean up game resources.""" self.game.end(self.cleanup_on_end) + + def evaluate(self, n_repetitions: int = 3): + """Evaluate the agent's performance by + calculating the matrix of every round against each other. + """ + p1 = get_agent(self.config["player"], self.config["prompts"], self.game) + p1.name = "p1" + p2 = get_agent(self.config["player"], self.config["prompts"], self.game) + p2.name = "p2" + matrix = { + p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} + for p1_round in range(0, self.rounds + 1) + } + for p1_round in range(0, self.rounds + 1): + for p2_round in range(0, self.rounds + 1): + self.logger.info( + f"Evaluating agent at round {p1_round} against agent at round {p2_round}" + ) + p1_patch = ( + self.agent.get_metadata()["diff"][p1_round] if p1_round > 0 else "" + ) + p2_patch = ( + self.agent.get_metadata()["diff"][p2_round] if p2_round > 0 else "" + ) + p1.reset_and_apply_patch(p1_patch) + p2.reset_and_apply_patch(p2_patch) + for i_repetition in range(n_repetitions): + result = self.game.run_round([p1, p2]) + winner = result["winner"] + self.logger.info( + f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}" + ) + matrix[p1_round][p2_round].append(winner) + self.logger.info(f"Evaluation matrix: {matrix}") + return matrix diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml index 64985c40..984e7890 100644 --- a/configs/battlesnake_single_player.yaml +++ b/configs/battlesnake_single_player.yaml @@ -1,6 +1,6 @@ game: name: BattleSnake - rounds: 5 + rounds: 4 args: width: 11 height: 11 From cbeaf1fb66928029c66c1255a181e5e9ee3eafa2 Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Mon, 25 Aug 2025 12:57:48 -0400 Subject: [PATCH 5/8] Ref/simplify: Add empty patch for round 0 --- codeclash/agents/abstract.py | 4 ++-- codeclash/tournaments/single_player_training.py | 8 ++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index b6f6c2ff..3e7e85bb 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -36,8 +36,8 @@ def __init__( self._metadata = { "name": self.name, "player_unique_id": self._player_unique_id, - "diff": {}, # mapping round -> diff - "incremental_diff": {}, # mapping round -> diff + "diff": {0: ""}, # mapping round -> diff + "incremental_diff": {0: ""}, # mapping round -> diff } # --- Main methods --- diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py index 693ea915..1ea81d86 100644 --- a/codeclash/tournaments/single_player_training.py +++ b/codeclash/tournaments/single_player_training.py @@ -80,12 +80,8 @@ def run_main_agent(self, round_num: int): def set_mirror_state_to_round(self, round_num: int): """Update mirror agent's codebase with the main agent's changes.""" - if round_num == 0: - full_diff = "" - else: - full_diff = self.agent.get_metadata()["diff"][round_num] - full_diff = filter_git_diff(full_diff) - + full_diff = self.agent.get_metadata()["diff"][round_num] + full_diff = filter_git_diff(full_diff) self.mirror_agent.reset_and_apply_patch(full_diff) def cleanup(self): From 16f811789202fdc253b6b51c2dd399604ff57483 Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Mon, 25 Aug 2025 12:58:02 -0400 Subject: [PATCH 6/8] Enh(viewer): Improve game session dropdown Add warning sign next to probably failed; add # rounds; alphabetical sort --- codeclash/viewer/app.py | 37 +++++++++++++++++++++++++-- codeclash/viewer/templates/index.html | 8 +++--- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/codeclash/viewer/app.py b/codeclash/viewer/app.py index 25dcd0ee..a8e62920 100644 --- a/codeclash/viewer/app.py +++ b/codeclash/viewer/app.py @@ -22,6 +22,25 @@ def set_log_base_directory(directory: str | Path): LOG_BASE_DIR = Path(directory).resolve() +def is_probably_failed_run(log_dir: Path) -> bool: + """Check if a run probably failed by checking if metadata.json is missing""" + metadata_file = log_dir / "metadata.json" + return not metadata_file.exists() + + +def get_round_count_from_metadata(log_dir: Path) -> Optional[int]: + """Extract round count from metadata.json if it exists""" + metadata_file = log_dir / "metadata.json" + if not metadata_file.exists(): + return None + + try: + metadata = json.loads(metadata_file.read_text()) + return metadata.get("config", {}).get("game", {}).get("rounds") + except (json.JSONDecodeError, KeyError): + return None + + @dataclass class GameMetadata: """Metadata about a game session""" @@ -150,9 +169,22 @@ def index(): """Main viewer page""" # Get available log directories logs_dir = LOG_BASE_DIR - log_folders = [] + log_folders_info = [] if logs_dir.exists(): - log_folders = [d.name for d in logs_dir.iterdir() if d.is_dir()] + for d in logs_dir.iterdir(): + if d.is_dir(): + folder_info = { + "name": d.name, + "is_failed": is_probably_failed_run(d), + "round_count": get_round_count_from_metadata(d), + } + log_folders_info.append(folder_info) + + # Sort folders alphabetically by name + log_folders_info.sort(key=lambda x: x["name"]) + + # Extract just the names for backwards compatibility + log_folders = [folder["name"] for folder in log_folders_info] selected_folder = request.args.get( "folder", log_folders[0] if log_folders else None @@ -178,6 +210,7 @@ def index(): return render_template( "index.html", log_folders=log_folders, + log_folders_info=log_folders_info, selected_folder=selected_folder, metadata=metadata, trajectories_by_round=trajectories_by_round, diff --git a/codeclash/viewer/templates/index.html b/codeclash/viewer/templates/index.html index c59c7382..40641d3d 100644 --- a/codeclash/viewer/templates/index.html +++ b/codeclash/viewer/templates/index.html @@ -16,10 +16,10 @@

🎮 CodeClash Trajectory Viewer

From 7f4cbb439dc941da77de4d0f1bb13796763f205e Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Mon, 25 Aug 2025 14:00:05 -0400 Subject: [PATCH 7/8] Ref: Change get_agent and prompt rendering (needs to use tournament) --- codeclash/agents/__init__.py | 27 ++------- codeclash/agents/abstract.py | 2 - codeclash/agents/minisweagent.py | 9 +-- codeclash/agents/utils.py | 31 ++++------ codeclash/games/abstract.py | 11 ++-- codeclash/games/battlecode/main.py | 3 +- codeclash/games/corewar/main.py | 3 +- codeclash/games/robotrumble/main.py | 3 +- codeclash/tournaments/abstract.py | 5 +- codeclash/tournaments/pvp_training.py | 33 +++++++++- .../tournaments/single_player_training.py | 60 ++++++++++++++----- configs/battlesnake_single_player.yaml | 2 +- tests/test_integration.py | 11 ++-- 13 files changed, 120 insertions(+), 80 deletions(-) diff --git a/codeclash/agents/__init__.py b/codeclash/agents/__init__.py index 15c8b6f9..5dd9d0a6 100644 --- a/codeclash/agents/__init__.py +++ b/codeclash/agents/__init__.py @@ -1,33 +1,18 @@ +from minisweagent.environments.docker import DockerEnvironment + from codeclash.agents.abstract import Player from codeclash.agents.dummy import Dummy from codeclash.agents.minisweagent import MiniSWEAgent from codeclash.agents.utils import GameContext -from codeclash.constants import DIR_WORK -from codeclash.games.abstract import CodeGame -def get_agent(config: dict, prompts: dict, game: CodeGame) -> Player: +def get_agent( + config: dict, game_context: GameContext, environment: DockerEnvironment +) -> Player: agents = { "dummy": Dummy, "mini": MiniSWEAgent, }.get(config["agent"]) if agents is None: raise ValueError(f"Unknown agent type: {config['agent']}") - environment = game.get_environment( - f"{game.game_id}.{config['name']}" - ) # NOTE: MUST be branch_name (defined in agents/abstract.py) - return agents( - config, - environment, - GameContext( - id=game.game_id, - log_env=game.log_env, - log_local=game.log_local, - name=game.name, - player_id=config["name"], - prompts=prompts, - round=1, - rounds=game.rounds, - working_dir=str(DIR_WORK), - ), - ) + return agents(config, environment, game_context) diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index 3e7e85bb..ed47b8da 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -27,7 +27,6 @@ def __init__( """Unique ID that doesn't clash even accross multiple games. Used for git tags.""" self.environment = environment self.game_context = game_context - self.game_context.render_and_set_prompts() self.logger = get_logger( self.name, log_path=self.game_context.log_local / f"{self.name}.log", @@ -47,7 +46,6 @@ def pre_run_hook(self, *, new_round: int) -> None: if new_round == 1: self._tag_round(0) self.game_context.round = new_round - self.game_context.render_and_set_prompts() def post_run_hook(self, *, round: int) -> None: """Should be called after we called the run method.""" diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py index ec5d7cb8..34665433 100644 --- a/codeclash/agents/minisweagent.py +++ b/codeclash/agents/minisweagent.py @@ -8,8 +8,9 @@ import yaml from jinja2 import Template -from minisweagent import Environment, Model +from minisweagent import Model from minisweagent.agents.default import AgentConfig, DefaultAgent +from minisweagent.environments.docker import DockerEnvironment from minisweagent.models.litellm_model import LitellmModel from minisweagent.run.utils.save import save_traj from rich.console import Console @@ -28,7 +29,7 @@ class ClashAgent(DefaultAgent): def __init__( self, model: Model, - env: Environment, + env: DockerEnvironment, name: str, game_context: GameContext, *, @@ -56,7 +57,7 @@ def render_template(self, template: str, **kwargs) -> str: | asdict(self.env.config) | asdict(self.model.config) | platform.uname()._asdict() - | self.game_context.to_dict() + | self.game_context.to_template_vars() ) return Template(template).render(**kwargs, **cs, **os.environ) @@ -69,7 +70,7 @@ class MiniSWEAgent(Player): """Player with agentic code editing capabilities""" def __init__( - self, config: dict, environment: Environment, game_context: GameContext + self, config: dict, environment: DockerEnvironment, game_context: GameContext ): super().__init__(config, environment=environment, game_context=game_context) diff --git a/codeclash/agents/utils.py b/codeclash/agents/utils.py index a5de6856..16ecd13a 100644 --- a/codeclash/agents/utils.py +++ b/codeclash/agents/utils.py @@ -36,24 +36,15 @@ class GameContext: rounds: int working_dir: str - def render_and_set_prompts(self): - """Render and set prompts using the current game context.""" + def _render_prompt_templates(self) -> dict: context = asdict(self) - del context["prompts"] - for key, template_str in self.prompts.items(): - rendered = Template(template_str).render(**context) - setattr(self, key, rendered) - - def to_dict(self): - """Convert the GameContext to a dictionary, including dynamically added attributes.""" - result = asdict(self) - declared = set(self.__dataclass_fields__) - for attr in dir(self): - if ( - not attr.startswith("_") - and attr not in declared - and not callable(getattr(self, attr)) - ): - result[attr] = getattr(self, attr) - del result["prompts"] - return result + return { + key: Template(template_str).render(**context) + for key, template_str in self.prompts.items() + } + + def to_template_vars(self) -> dict[str, str]: + """Convert the GameContext to a dictionary for rendering prompts in the agent""" + out = asdict(self) | self._render_prompt_templates() + out.pop("prompts") + return out diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py index 9c45e205..ddbb316f 100644 --- a/codeclash/games/abstract.py +++ b/codeclash/games/abstract.py @@ -2,7 +2,6 @@ import os import subprocess from abc import ABC, abstractmethod -from collections import Counter from pathlib import Path from minisweagent.environments.docker import DockerEnvironment @@ -24,12 +23,15 @@ def __init__(self, config: dict, *, tournament_id: str, local_output_dir: Path): The central method is `run_round`, which takes a list of agents and returns the winner of the round. At the end of the the tournament, run the `end` method to clean up the game and agents and write the metadata. + + Args: + config: The overall config for the tournament. + tournament_id: The id of the tournament. + local_output_dir: The host/local directory to write logs to. """ self.url_gh: str = f"git@github.com:{GH_ORG}/{self.name}.git" self.artifacts: list[Path] = [] """Artifact objects that we might want to clean up after the game.""" - self.scoreboard: list[tuple[int, str]] = [] - """List of (round number, winner (player id))""" self.game_config: dict = config["game"] self.config: dict = config self.game_id: str = tournament_id @@ -40,8 +42,6 @@ def __init__(self, config: dict, *, tournament_id: str, local_output_dir: Path): ) self.environment: DockerEnvironment = self.get_environment() """The running docker environment for executing the game""" - # assert len(config["players"]) >= 2, "At least two players are required" - """Total number of rounds to play""" self._metadata: dict = { "name": self.name, "config": self.config, @@ -92,7 +92,6 @@ def get_metadata(self) -> dict: return self._metadata def end(self, cleanup: bool = False): - self.logger.info("Overall score: %s", Counter([x[1] for x in self.scoreboard])) (self.log_local / "metadata.json").write_text(json.dumps(self.get_metadata())) if cleanup: for artifact in self.artifacts: diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py index 59faa13f..43c26295 100644 --- a/codeclash/games/battlecode/main.py +++ b/codeclash/games/battlecode/main.py @@ -1,4 +1,5 @@ import re +import shlex from pathlib import Path from typing import Any @@ -52,7 +53,7 @@ def execute_round(self, agents: list[Any]) -> dict[str, str]: f"--p{idx+1}-dir src --p{idx+1} {agent.name}" for idx, agent in enumerate(agents) ] - cmd = f"{self.run_cmd_round} {' '.join(args)}" + cmd = f"{self.run_cmd_round} {shlex.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py index 55e2960a..7e6e5617 100644 --- a/codeclash/games/corewar/main.py +++ b/codeclash/games/corewar/main.py @@ -1,4 +1,5 @@ import re +import shlex from pathlib import Path from codeclash.agents.abstract import Player @@ -52,7 +53,7 @@ def determine_winner( def execute_round(self, agents: list[Player]) -> dict[str, str]: args = [f"/{agent.name}/warriors/warrior.red" for agent in agents] - cmd = f"{self.run_cmd_round} {' '.join(args)}" + cmd = f"{self.run_cmd_round} {shlex.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py index e66d3a93..8f893ee4 100644 --- a/codeclash/games/robotrumble/main.py +++ b/codeclash/games/robotrumble/main.py @@ -1,3 +1,4 @@ +import shlex from pathlib import Path from codeclash.agents.abstract import Player @@ -42,7 +43,7 @@ def determine_winner( def execute_round(self, agents: list[Player]) -> dict[str, str]: args = [f"/{agent.name}/robot.py" for agent in agents] - cmd = f"{self.run_cmd_round} {' '.join(args)}" + cmd = f"{self.run_cmd_round} {shlex.join(args)}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py index fe1a7fd0..d98a9d6c 100644 --- a/codeclash/tournaments/abstract.py +++ b/codeclash/tournaments/abstract.py @@ -3,7 +3,10 @@ import traceback from pathlib import Path -from codeclash.constants import DIR_LOGS +from codeclash.agents import get_agent +from codeclash.agents.abstract import Player +from codeclash.agents.utils import GameContext +from codeclash.constants import DIR_LOGS, DIR_WORK from codeclash.utils.environment import create_file_on_container from codeclash.utils.log import get_logger diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp_training.py index 0e910606..80b74b81 100644 --- a/codeclash/tournaments/pvp_training.py +++ b/codeclash/tournaments/pvp_training.py @@ -4,6 +4,8 @@ from codeclash.agents import get_agent from codeclash.agents.abstract import Player +from codeclash.agents.utils import GameContext +from codeclash.constants import DIR_WORK from codeclash.games import get_game from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament @@ -24,13 +26,38 @@ def __init__( ) self.agents: list[Player] = [] for agent_conf in self.config["players"]: - self.agents.append(get_agent(agent_conf, self.config["prompts"], self.game)) + self.agents.append(self.get_agent(agent_conf, self.config["prompts"])) self.logger = get_logger(self.game.name) + self.scoreboard: list[tuple[int, str]] = [] + + @property + def rounds(self) -> int: + return self.config["game"]["rounds"] + + def get_agent(self, agent_config: dict, prompts: dict) -> Player: + """Create an agent with environment and game context.""" + environment = self.game.get_environment( + f"{self.game.game_id}.{agent_config['name']}" + ) + + game_context = GameContext( + id=self.game.game_id, + log_env=self.game.log_env, + log_local=self.game.log_local, + name=self.game.name, + player_id=agent_config["name"], + prompts=prompts, + round=1, + rounds=self.rounds, + working_dir=str(DIR_WORK), + ) + + return get_agent(agent_config, game_context, environment) def run(self) -> None: """Main execution function that runs all rounds.""" try: - for round_num in range(1, self.game.rounds + 1): + for round_num in range(1, self.rounds + 1): self.run_training_round(round_num) finally: self.cleanup() @@ -44,7 +71,7 @@ def run_training_round(self, round_num: int) -> None: winner = result["winner"] # Handle bookkeeping that was previously in the game - self.game.scoreboard.append((round_num, winner)) + self.scoreboard.append((round_num, winner)) self.logger.info(f"Round {round_num} winner: {winner}") # Write log to file diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py index 1ea81d86..42c02b33 100644 --- a/codeclash/tournaments/single_player_training.py +++ b/codeclash/tournaments/single_player_training.py @@ -6,6 +6,9 @@ from codeclash.agents import get_agent from codeclash.agents.abstract import Player +from codeclash.agents.dummy import Dummy +from codeclash.agents.utils import GameContext +from codeclash.constants import DIR_WORK from codeclash.games import get_game from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament @@ -22,22 +25,47 @@ def __init__(self, config: dict, cleanup: bool = False): tournament_id=self.tournament_id, local_output_dir=self.local_output_dir, ) - # fixme: hack - self.game.rounds = self.config["game"]["rounds"] - self.agent: Player = get_agent( - self.config["player"], self.config["prompts"], self.game - ) + self.agent: Player = self.get_agent(self.config["player"], round=1) mirror_agent_config = copy.deepcopy(self.config["player"]) mirror_agent_config["name"] = "mirror" - self.mirror_agent: Player = get_agent( - mirror_agent_config, self.config["prompts"], self.game - ) + self.mirror_agent: Player = self.get_agent(mirror_agent_config, round=0) self.logger = get_logger(self.game.name) + self.scoreboard: list[tuple[int, str]] = [] @property def rounds(self) -> int: return self.config["game"]["rounds"] + def get_game_context(self, agent_config: dict, *, round: int) -> GameContext: + """Create a game context for an agent.""" + return GameContext( + id=self.game.game_id, + log_env=self.game.log_env, + log_local=self.game.log_local, + name=self.game.name, + player_id=agent_config["name"], + prompts=self.config["prompts"], + round=round, + rounds=self.rounds, + working_dir=str(DIR_WORK), + ) + + def get_agent(self, agent_config: dict, round: int) -> Player: + """Create an agent with environment and game context.""" + environment = self.game.get_environment( + f"{self.game.game_id}.{agent_config['name']}" + ) + game_context = self.get_game_context(agent_config, round=round) + return get_agent(agent_config, game_context, environment) + + def get_dummy_agent(self) -> Player: + """Create a dummy agent that does nothing.""" + return Dummy( + self.config["player"], + environment=self.game.get_environment(f"{self.game.game_id}.dummy"), + game_context=self.get_game_context(self.config["player"], round=0), + ) + def run(self): """Main execution function that runs all rounds.""" try: @@ -48,15 +76,14 @@ def run(self): self.cleanup() def run_training_round(self, round_num: int) -> None: - """Execute a single training round.""" + """Execute a single training round, i.e., run the game, then run the agent.""" # Run the game round and get results result = self.game.run_round([self.agent, self.mirror_agent]) log_output = result["log_output"] - result_output = result["result_output"] winner = result["winner"] # Handle bookkeeping that was previously in the game - self.game.scoreboard.append((round_num, winner)) + self.scoreboard.append((round_num, winner)) self.logger.info(f"Round {round_num} winner: {winner}") # Write log to file @@ -92,10 +119,13 @@ def evaluate(self, n_repetitions: int = 3): """Evaluate the agent's performance by calculating the matrix of every round against each other. """ - p1 = get_agent(self.config["player"], self.config["prompts"], self.game) - p1.name = "p1" - p2 = get_agent(self.config["player"], self.config["prompts"], self.game) - p2.name = "p2" + p1_config = self.config["player"].copy() + p1_config["name"] = "p1" + p1 = self.get_dummy_agent() + + p2_config = self.config["player"].copy() + p2_config["name"] = "p2" + p2 = self.get_dummy_agent() matrix = { p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} for p1_round in range(0, self.rounds + 1) diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml index 984e7890..af74b703 100644 --- a/configs/battlesnake_single_player.yaml +++ b/configs/battlesnake_single_player.yaml @@ -1,6 +1,6 @@ game: name: BattleSnake - rounds: 4 + rounds: 1 args: width: 11 height: 11 diff --git a/tests/test_integration.py b/tests/test_integration.py index 4ef70c26..1278571e 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -42,8 +42,8 @@ def test_main_battlesnake_integration(): def mock_get_agent(original_get_agent): """Wrapper to replace agent models with DeterministicModel""" - def wrapper(config, prompts, game): - agent = original_get_agent(config, prompts, game) + def wrapper(config, game_context, environment): + agent = original_get_agent(config, game_context, environment) print("In wrapper, got agent of type ", type(agent)) # Replace model if the agent has one (specifically for MiniSWEAgent) @@ -61,10 +61,13 @@ def wrapper(config, prompts, game): return wrapper - # Import the get_agent function and patch it where it's used in main + # Import the get_agent function and patch it where it's used in the tournaments from codeclash.agents import get_agent # Run the main function with cleanup enabled - with patch("main.get_agent", side_effect=mock_get_agent(get_agent)): + with patch( + "codeclash.tournaments.pvp_training.get_agent", + side_effect=mock_get_agent(get_agent), + ): # This should complete without raising any exceptions main(temp_config_path, cleanup=True, push_agent=False) From 73f64f2c5e4f282ef71859bee557d19699869968 Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Mon, 25 Aug 2025 15:42:10 -0400 Subject: [PATCH 8/8] Change: Move round setting to tournament section of config --- codeclash/agents/minisweagent.py | 1 + codeclash/tournaments/pvp_training.py | 2 +- codeclash/tournaments/single_player_training.py | 6 +++--- configs/battlecode.yaml | 3 ++- configs/battlesnake.yaml | 5 +++-- configs/battlesnake_dummy.yaml | 3 ++- configs/battlesnake_single_player.yaml | 4 +++- configs/corewar.yaml | 3 ++- configs/robocode.yaml | 3 ++- configs/robotrumble.yaml | 1 + tests/test_integration.py | 2 +- 11 files changed, 21 insertions(+), 12 deletions(-) diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py index 34665433..f3fec5d4 100644 --- a/codeclash/agents/minisweagent.py +++ b/codeclash/agents/minisweagent.py @@ -105,6 +105,7 @@ def run(self): traj_path, exit_status=exit_status, result=result, + print_fct=self.logger.debug, ) copy_file_to_container( self.environment, diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp_training.py index 80b74b81..ffc223cc 100644 --- a/codeclash/tournaments/pvp_training.py +++ b/codeclash/tournaments/pvp_training.py @@ -32,7 +32,7 @@ def __init__( @property def rounds(self) -> int: - return self.config["game"]["rounds"] + return self.config["tournament"]["rounds"] def get_agent(self, agent_config: dict, prompts: dict) -> Player: """Create an agent with environment and game context.""" diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py index 42c02b33..832d36b4 100644 --- a/codeclash/tournaments/single_player_training.py +++ b/codeclash/tournaments/single_player_training.py @@ -29,12 +29,11 @@ def __init__(self, config: dict, cleanup: bool = False): mirror_agent_config = copy.deepcopy(self.config["player"]) mirror_agent_config["name"] = "mirror" self.mirror_agent: Player = self.get_agent(mirror_agent_config, round=0) - self.logger = get_logger(self.game.name) self.scoreboard: list[tuple[int, str]] = [] @property def rounds(self) -> int: - return self.config["game"]["rounds"] + return self.config["tournament"]["rounds"] def get_game_context(self, agent_config: dict, *, round: int) -> GameContext: """Create a game context for an agent.""" @@ -71,7 +70,8 @@ def run(self): try: for round_num in range(1, self.rounds + 1): self.run_training_round(round_num) - self.evaluate() + if self.config["tournament"]["evaluate_matrix"]: + self.evaluate() finally: self.cleanup() diff --git a/configs/battlecode.yaml b/configs/battlecode.yaml index ec692e5f..ed6386cf 100644 --- a/configs/battlecode.yaml +++ b/configs/battlecode.yaml @@ -1,8 +1,9 @@ game: name: BattleCode - rounds: 2 args: maps: quack +tournament: + rounds: 2 players: - agent: dummy name: p1 diff --git a/configs/battlesnake.yaml b/configs/battlesnake.yaml index 8e57c630..9016df1e 100644 --- a/configs/battlesnake.yaml +++ b/configs/battlesnake.yaml @@ -1,15 +1,16 @@ game: name: BattleSnake - rounds: 2 args: width: 11 height: 11 browser: false +tournament: + rounds: 2 players: - agent: mini name: p1 config: configs/mini/default.yaml - model: claude-sonnet-4-20250514 + model: openai/gpt-5-mini - agent: dummy name: p2 prompts: diff --git a/configs/battlesnake_dummy.yaml b/configs/battlesnake_dummy.yaml index 1610ae33..05a8d4b4 100644 --- a/configs/battlesnake_dummy.yaml +++ b/configs/battlesnake_dummy.yaml @@ -1,10 +1,11 @@ game: name: BattleSnake - rounds: 2 args: width: 11 height: 11 browser: false +tournament: + rounds: 2 players: - agent: dummy name: p1 diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml index af74b703..e5f6f370 100644 --- a/configs/battlesnake_single_player.yaml +++ b/configs/battlesnake_single_player.yaml @@ -1,10 +1,12 @@ game: name: BattleSnake - rounds: 1 args: width: 11 height: 11 browser: false +tournament: + rounds: 1 + evaluate_matrix: true player: agent: mini config: configs/mini/default.yaml diff --git a/configs/corewar.yaml b/configs/corewar.yaml index a244cbe4..daaf3e2e 100644 --- a/configs/corewar.yaml +++ b/configs/corewar.yaml @@ -1,8 +1,9 @@ game: name: CoreWar - rounds: 3 args: r: 100 +tournament: + rounds: 3 players: - agent: dummy name: p1 diff --git a/configs/robocode.yaml b/configs/robocode.yaml index 77e05313..c401cdfd 100644 --- a/configs/robocode.yaml +++ b/configs/robocode.yaml @@ -1,6 +1,5 @@ game: name: RoboCode - rounds: 3 battle: battle: numRounds: 10 @@ -14,6 +13,8 @@ game: args: nodisplay: true nosound: true +tournament: + rounds: 3 players: - agent: dummy name: p1 diff --git a/configs/robotrumble.yaml b/configs/robotrumble.yaml index 2f94d7d6..84e9e06f 100644 --- a/configs/robotrumble.yaml +++ b/configs/robotrumble.yaml @@ -1,5 +1,6 @@ game: name: RobotRumble +tournament: rounds: 3 players: - agent: dummy diff --git a/tests/test_integration.py b/tests/test_integration.py index 1278571e..52fa4676 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -33,7 +33,7 @@ def test_main_battlesnake_integration(): temp_config_path = os.path.join(temp_dir, "test_battlesnake.yaml") # Reduce rounds to 1 for faster testing - config["game"]["rounds"] = 1 + config["tournament"]["rounds"] = 1 # Write the modified config with open(temp_config_path, "w") as f: