diff --git a/codeclash/agents/player.py b/codeclash/agents/player.py index db757c2a..1ad25861 100644 --- a/codeclash/agents/player.py +++ b/codeclash/agents/player.py @@ -24,7 +24,7 @@ def __init__( ) -> None: self.config = config self.name = config["name"] - self._player_unique_id = uuid.uuid4() + self._player_unique_id = str(uuid.uuid4()) """Unique ID that doesn't clash even across multiple games. Used for git tags.""" self.environment = environment self.game_context = game_context diff --git a/codeclash/games/__init__.py b/codeclash/games/__init__.py index cfcfa3e3..e05eb56d 100644 --- a/codeclash/games/__init__.py +++ b/codeclash/games/__init__.py @@ -5,6 +5,7 @@ from codeclash.games.corewar.corewar import CoreWarGame from codeclash.games.dummy.dummy_game import DummyGame from codeclash.games.game import CodeGame +from codeclash.games.huskybench.huskybench import HuskyBenchGame from codeclash.games.robocode.robocode import RoboCodeGame from codeclash.games.robotrumble.robotrumble import RobotRumbleGame @@ -18,6 +19,7 @@ def get_game(config: dict, *, tournament_id: str, local_output_dir: Path) -> Cod BattleSnakeGame, CoreWarGame, DummyGame, + HuskyBenchGame, RoboCodeGame, RobotRumbleGame, ] diff --git a/codeclash/games/battlecode/battlecode.py b/codeclash/games/battlecode/battlecode.py index a41b476e..91103794 100644 --- a/codeclash/games/battlecode/battlecode.py +++ b/codeclash/games/battlecode/battlecode.py @@ -5,7 +5,8 @@ from tqdm.auto import tqdm from codeclash.constants import DIR_WORK, RESULT_TIE -from codeclash.games.game import CodeGame, RoundData, RoundStats +from codeclash.games.game import CodeGame, RoundStats +from codeclash.utils.environment import copy_from_container class BattleCodeGame(CodeGame): @@ -22,9 +23,18 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" --{arg} {val}" - def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats: + def copy_logs_from_env(self, round_num): + super().copy_logs_from_env(round_num) + copy_from_container( + container=self.environment, + src_path="/testbed/logs", + dest_path=self.log_local / "rounds" / str(round_num), + ) + + def get_stats(self, agents: list[Any]) -> RoundStats: winners = [] - for ro in result_outputs: + for sim_file in [f"logs/sim_{idx}.log" for idx in range(self.game_config["sims_per_round"])]: + ro = self.environment.execute(f"cat {sim_file}")["output"] lines = ro.strip().split("\n") # Get the third-to-last line which contains the winner info winner_line = lines[-3] if len(lines) >= 3 else "" @@ -43,17 +53,15 @@ def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats: scores={agent.name: winners.count(agent.name) for agent in agents}, ) - def execute_round(self, agents: list[Any]) -> RoundData: + def execute_round(self, agents: list[Any]): for agent in agents: src, dest = f"/{agent.name}/src/mysubmission/", str(DIR_WORK / "src" / agent.name) self.environment.execute(f"cp -r {src} {dest}") args = [f"--p{idx + 1}-dir src --p{idx + 1} {agent.name}" for idx, agent in enumerate(agents)] cmd = f"{self.run_cmd_round} {' '.join(args)}" self.logger.info(f"Running game: {cmd}") - outputs = [] - for _ in tqdm(range(self.game_config["sims_per_round"])): - response = self.environment.execute(cmd) + + self.environment.execute("rm -rf logs; mkdir logs") + for idx in tqdm(range(self.game_config["sims_per_round"])): + response = self.environment.execute(cmd + f" > logs/sim_{idx}.log") assert response["returncode"] == 0, response - # For BattleCode, log_outputs and result_outputs are the same - outputs.append(response["output"]) - return RoundData(logs=outputs, results=outputs) diff --git a/codeclash/games/battlesnake/battlesnake.py b/codeclash/games/battlesnake/battlesnake.py index 0001ca58..7743126d 100644 --- a/codeclash/games/battlesnake/battlesnake.py +++ b/codeclash/games/battlesnake/battlesnake.py @@ -1,6 +1,5 @@ import json import time -import uuid from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path @@ -8,8 +7,8 @@ from codeclash.agents.player import Player from codeclash.constants import RESULT_TIE -from codeclash.games.game import CodeGame, RoundData, RoundStats -from codeclash.utils.environment import assert_zero_exit_code +from codeclash.games.game import CodeGame, RoundStats +from codeclash.utils.environment import assert_zero_exit_code, copy_from_container class BattleSnakeGame(CodeGame): @@ -39,9 +38,18 @@ def _wait_for_ports(self, ports: list[int], timeout: float = 3.0) -> None: time.sleep(0.1) - def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: + def copy_logs_from_env(self, round_num): + super().copy_logs_from_env(round_num) + copy_from_container( + container=self.environment, + src_path=f"{self.environment.config.cwd}/game/logs", + dest_path=self.log_local / "rounds" / str(round_num), + ) + + def get_stats(self, agents: list[Player]) -> RoundStats: scores = {} - for ro in result_outputs: + for idx in range(self.game_config["sims_per_round"]): + ro = self.environment.execute(f"cat game/logs/sim_out_{idx}.json")["output"] lines = ro.strip().split("\n") results = json.loads(lines[-1]) if lines else {} # Get the last line which contains the game result winner = RESULT_TIE if results["isDraw"] else results["winnerName"] @@ -51,7 +59,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta winner = RESULT_TIE if list(scores.values()).count(scores[winner]) > 1 else winner return RoundStats(winner=winner, scores=scores) - def execute_round(self, agents: list[Player]) -> RoundData: + def execute_round(self, agents: list[Player]): self.logger.debug("Starting game servers") cmd = [] ports = [] @@ -68,46 +76,30 @@ def execute_round(self, agents: list[Player]) -> RoundData: self.logger.debug("All ports are ready") try: - log_outputs, result_outputs = [], [] cmd = self.run_cmd_round + " " + " ".join(cmd) self.logger.info(f"Running game: {cmd}") + self.environment.execute("rm -rf logs; mkdir logs", cwd=f"{self.environment.config.cwd}/game") # Use ThreadPoolExecutor for parallel execution with ThreadPoolExecutor(20) as executor: # Submit all simulations to the thread pool futures = [ - executor.submit(self._run_single_simulation, cmd) for _ in range(self.game_config["sims_per_round"]) + executor.submit(self._run_single_simulation, cmd, idx) + for idx in range(self.game_config["sims_per_round"]) ] # Collect results as they complete for future in tqdm(as_completed(futures), total=len(futures)): - log_output, result_output = future.result() - log_outputs.append(log_output) - result_outputs.append(result_output) - - return RoundData(logs=log_outputs, results=result_outputs) + future.result() finally: # Kill all python servers when done self.environment.execute("pkill -f 'python main.py' || true") - def _run_single_simulation(self, cmd: str) -> tuple[str, str]: + def _run_single_simulation(self, cmd: str, idx: int) -> tuple[str, str]: """Run a single battlesnake simulation and return log and result outputs.""" - # Create temporary output file for results - output_file = f"battlesnake_output_{uuid.uuid4().hex}.json" - - # Run game - response = assert_zero_exit_code( + assert_zero_exit_code( self.environment.execute( - cmd + f" -o {output_file}", + cmd + f" -o logs/sim_out_{idx}.json", cwd=f"{self.environment.config.cwd}/game", ) ) - - # Read the output file for result information - result_response = self.environment.execute(f"cat game/{output_file}") - result_output = result_response["output"] - - # Clean up the output file - self.environment.execute(f"rm -f game/{output_file}") - - return response["output"], result_output diff --git a/codeclash/games/corewar/corewar.py b/codeclash/games/corewar/corewar.py index 994f28a7..c00c4ec2 100644 --- a/codeclash/games/corewar/corewar.py +++ b/codeclash/games/corewar/corewar.py @@ -3,7 +3,8 @@ from pathlib import Path from codeclash.agents.player import Player -from codeclash.games.game import CodeGame, RoundData, RoundStats +from codeclash.games.game import CodeGame, RoundStats +from codeclash.utils.environment import copy_from_container class CoreWarGame(CodeGame): @@ -19,8 +20,16 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" -{arg} {val}" - def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: - result_output = result_outputs[0] # Get the first (and only) element + def copy_logs_from_env(self, round_num: int) -> None: + super().copy_logs_from_env(round_num) + copy_from_container( + container=self.environment, + src_path="/testbed/output.log", + dest_path=self.log_local / "rounds" / str(round_num) / "output.log", + ) + + def get_stats(self, agents: list[Player]) -> RoundStats: + result_output = self.environment.execute("cat output.log")["output"] self.logger.debug(f"Determining winner from result output: {result_output}") scores = [] n = len(agents) * 2 @@ -50,10 +59,9 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta self.logger.debug("No scores found, returning unknown") return RoundStats(winner="unknown", scores={agent.name: 0 for agent in agents}) - def execute_round(self, agents: list[Player]) -> RoundData: + def execute_round(self, agents: list[Player]): args = [f"/{agent.name}/warriors/warrior.red" for agent in agents] - cmd = f"{self.run_cmd_round} {shlex.join(args)} -r {self.game_config['sims_per_round']}" + cmd = f"{self.run_cmd_round} {shlex.join(args)} -r {self.game_config['sims_per_round']} > output.log;" self.logger.info(f"Running game: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response - return RoundData(logs=[response["output"]], results=[response["output"]]) diff --git a/codeclash/games/dummy/dummy_game.py b/codeclash/games/dummy/dummy_game.py index e9ef9ba9..bcdd9ddd 100644 --- a/codeclash/games/dummy/dummy_game.py +++ b/codeclash/games/dummy/dummy_game.py @@ -1,14 +1,23 @@ import re from codeclash.agents.player import Player -from codeclash.games.game import CodeGame, RoundData, RoundStats +from codeclash.games.game import CodeGame, RoundStats +from codeclash.utils.environment import assert_zero_exit_code, copy_from_container class DummyGame(CodeGame): name: str = "DummyGame" - def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: - result_output = result_outputs[0] # Get the first (and only) element + def copy_logs_from_env(self, round_num): + super().copy_logs_from_env(round_num) + copy_from_container( + container=self.environment, + src_path="/testbed/result.log", + dest_path=self.log_local / "rounds" / str(round_num) / "result.log", + ) + + def get_stats(self, agents: list[Player]) -> RoundStats: + result_output = self.environment.execute("cat result.log")["output"] lines = result_output.split("FINAL_RESULTS")[-1].splitlines() scores = {} @@ -25,10 +34,8 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta details={"dummy": True}, ) - def execute_round(self, agents: list[Player]) -> RoundData: + def execute_round(self, agents: list[Player]) -> None: args = [f"/{agent.name}/main.py" for agent in agents] - cmd = f"python engine.py {' '.join(args)} -r {self.game_config['sims_per_round']}" + cmd = f"python engine.py {' '.join(args)} -r {self.game_config['sims_per_round']} > result.log;" self.logger.info(f"Running game: {cmd}") - response = self.environment.execute(cmd) - assert response["returncode"] == 0, response - return RoundData(logs=[response["output"]], results=[response["output"]]) + assert_zero_exit_code(self.environment.execute(cmd)) diff --git a/codeclash/games/dummy/main.py b/codeclash/games/dummy/main.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclash/games/game.py b/codeclash/games/game.py index 8666f89b..5472a32e 100644 --- a/codeclash/games/game.py +++ b/codeclash/games/game.py @@ -23,16 +23,6 @@ def __str__(self) -> str: return "\n".join([f"- Winner: {self.winner}", f"- Scores: {self.scores}"]) -class RoundData(BaseModel): - logs: list[str] - results: list[str] - - -class RoundRecord(BaseModel): - data: RoundData - stats: RoundStats - - class CodeGame(ABC): name: str @@ -97,7 +87,7 @@ def build_image(self): result = subprocess.run( ( "export $(cat .env | xargs);" - f"docker build --build-arg GITHUB_TOKEN=$GITHUB_TOKEN -t {self.image_name} -f docker/{self.name}.Dockerfile ." + f"docker build --no-cache --build-arg GITHUB_TOKEN=$GITHUB_TOKEN -t {self.image_name} -f docker/{self.name}.Dockerfile ." ), shell=True, capture_output=True, @@ -168,12 +158,15 @@ def _pre_round_setup(self, agents: list[Player]): logger=self.logger, ) + def copy_logs_from_env(self, round_num: int) -> None: + """Copy logs from the game's environment to the local machine.""" + (self.log_local / "rounds" / str(round_num)).mkdir(parents=True, exist_ok=True) + @abstractmethod - def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: + def get_stats(self, agents: list[Player]) -> RoundStats: """Determine the winner of the game based on the result output. Args: - result_outputs: The specific output(s) containing winning information agents: List of agents participating in the round Returns: @@ -182,17 +175,14 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta pass @abstractmethod - def execute_round(self, agents: list[Player]) -> RoundData: + def execute_round(self, agents: list[Player]): """Subclasses implement their game-specific logic here. This is the low level implementation, you probably want to use run_round instead, which includes the pre-round setup, post-round setup, and winner determination. - - Returns: - RoundData object """ pass - def run_round(self, agents: list[Player]) -> RoundRecord: + def run_round(self, agents: list[Player], round_num: int) -> RoundStats: """ Run a single round of the game with the given agents. @@ -200,6 +190,7 @@ def run_round(self, agents: list[Player]) -> RoundRecord: handled by the tournament class. """ self._pre_round_setup(agents) - data = self.execute_round(agents) - stats = self.get_stats(data.results, agents) - return RoundRecord(data=data, stats=stats) + self.execute_round(agents) + stats = self.get_stats(agents) + self.copy_logs_from_env(round_num) + return stats diff --git a/codeclash/games/huskybench/huskybench.py b/codeclash/games/huskybench/huskybench.py new file mode 100644 index 00000000..ab847850 --- /dev/null +++ b/codeclash/games/huskybench/huskybench.py @@ -0,0 +1,34 @@ +from pathlib import Path + +from codeclash.agents.player import Player +from codeclash.games.game import CodeGame, RoundStats + + +class HuskyBenchGame(CodeGame): + name: str = "HuskyBench" + + def __init__(self, config, *, tournament_id: str, local_output_dir: Path): + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) + self.run_cmd_round: str = ( + f"python engine/main.py --port 8000 --sim --sim-rounds {self.game_config['sims_per_round']}" + ) + for arg, val in self.game_config.get("args", {}).items(): + if isinstance(val, bool): + if val: + self.run_cmd_round += f" --{arg}" + else: + self.run_cmd_round += f" --{arg} {val}" + + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: + return RoundStats(winner="N/A", scores={}) + + def execute_round(self, agents: list[Player]): + try: + self.logger.debug("Starting game servers") + self.environment.execute(self.run_cmd_round + " > output.log &") + for agent in agents: + self.environment.execute("python client/main.py --port 8000 &", cwd=f"/{agent.name}") + finally: + # Kill all python servers when done + self.environment.execute("pkill -f 'python client/main.py' || true") + self.environment.execute("pkill -f 'python engine/main.py' || true") diff --git a/codeclash/games/robocode/robocode.py b/codeclash/games/robocode/robocode.py index b69c1c2c..58a9f70a 100644 --- a/codeclash/games/robocode/robocode.py +++ b/codeclash/games/robocode/robocode.py @@ -3,8 +3,8 @@ from pathlib import Path from codeclash.agents.player import Player -from codeclash.games.game import CodeGame, RoundData, RoundStats -from codeclash.utils.environment import create_file_in_container +from codeclash.games.game import CodeGame, RoundStats +from codeclash.utils.environment import assert_zero_exit_code, copy_from_container, create_file_in_container class RoboCodeGame(CodeGame): @@ -53,9 +53,17 @@ def dict_to_lines(d, prefix=""): dict_to_lines(default_battle_config) return "\n".join(battle_lines) - def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: - result_output = result_outputs[0] # Get the first (and only) element - self.logger.debug(f"Determining winner from result output: {result_output}") + def copy_logs_from_env(self, round_num: int) -> None: + super().copy_logs_from_env(round_num) + copy_from_container( + container=self.environment, + src_path="/testbed/logs", + dest_path=self.log_local / "rounds" / str(round_num), + ) + + def get_stats(self, agents: list[Player]) -> RoundStats: + result_output = self.environment.execute("cat logs/results.txt")["output"] + print(result_output) lines = result_output.strip().split("\n") scores = {} @@ -73,7 +81,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta return RoundStats(winner=winner, scores=scores, details={"stdout": "\n".join(lines)}) - def execute_round(self, agents: list[Player]) -> RoundData: + def execute_round(self, agents: list[Player]): for agent in agents: # Copy the agent codebase into the game codebase and compile it for cmd in [ @@ -95,17 +103,6 @@ def execute_round(self, agents: list[Player]) -> RoundData: create_file_in_container(self.environment, content=battle_content, dest_path=f"battles/{battle_file}") # Run battle with results output to file - results_file = f"results_{int(time.time())}.txt" - cmd = f"{self.run_cmd_round} -battle {battle_file} -results {results_file}" + cmd = f"mkdir -p logs; {self.run_cmd_round} -battle {battle_file} -results logs/results.txt" self.logger.info(f"Running game: {cmd}") - response = self.environment.execute(cmd) - assert response["returncode"] == 0, response - - # Read the results file to get result output - cat_response = self.environment.execute(f"cat {results_file}") - result_output = cat_response["output"] - - # Clean up the results file - self.environment.execute(f"rm -f {results_file}") - - return RoundData(logs=[response["output"]], results=[result_output]) + assert_zero_exit_code(self.environment.execute(cmd)) diff --git a/codeclash/games/robotrumble/robotrumble.py b/codeclash/games/robotrumble/robotrumble.py index adb394e5..c3f63316 100644 --- a/codeclash/games/robotrumble/robotrumble.py +++ b/codeclash/games/robotrumble/robotrumble.py @@ -4,7 +4,8 @@ from codeclash.agents.player import Player from codeclash.constants import RESULT_TIE -from codeclash.games.game import CodeGame, RoundData, RoundStats +from codeclash.games.game import CodeGame, RoundStats +from codeclash.utils.environment import assert_zero_exit_code, copy_from_container class RobotRumbleGame(CodeGame): @@ -15,9 +16,18 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): assert len(config["players"]) == 2, "RobotRumble is a two-player game" self.run_cmd_round: str = "./rumblebot run term" - def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: + def copy_logs_from_env(self, round_num: int) -> None: + super().copy_logs_from_env(round_num) + copy_from_container( + container=self.environment, + src_path="/testbed/logs", + dest_path=self.log_local / "rounds" / str(round_num), + ) + + def get_stats(self, agents: list[Player]) -> RoundStats: winners = [] - for ro in result_outputs: + for idx in range(self.game_config.get("sims_per_round", 100)): + ro = self.environment.execute(f"cat logs/sim_{idx}.txt")["output"] lines = ro.strip().split("\n") # Get the last 2 lines which contain the game result (same as original) @@ -47,14 +57,10 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta return RoundStats(winner=final_winner, scores=dict(counts)) - def execute_round(self, agents: list[Player]) -> RoundData: - outputs = [] + def execute_round(self, agents: list[Player]): + self.environment.execute("rm -rf logs; mkdir -p logs") args = [f"/{agent.name}/robot.py" for agent in agents] cmd = f"{self.run_cmd_round} {shlex.join(args)}" self.logger.info(f"Running game: {cmd}") - for _ in range(self.game_config.get("sims_per_round", 100)): - response = self.environment.execute(cmd) - assert response["returncode"] == 0, response - outputs.append(response["output"]) - # For RobotRumble, log_outputs and result_outputs are the same - return RoundData(logs=outputs, results=outputs) + for idx in range(self.game_config.get("sims_per_round", 100)): + assert_zero_exit_code(self.environment.execute(cmd + f" > logs/sim_{idx}.txt")) diff --git a/codeclash/tournaments/pvp.py b/codeclash/tournaments/pvp.py index 660c4d0c..07f49a49 100644 --- a/codeclash/tournaments/pvp.py +++ b/codeclash/tournaments/pvp.py @@ -16,10 +16,10 @@ class PvpTournament(AbstractTournament): - def __init__(self, config: dict, *, cleanup: bool = False, push_agent: bool = False): + def __init__(self, config: dict, *, cleanup: bool = False, push: bool = False): super().__init__(config, name="PvpTournament") self.cleanup_on_end = cleanup - self.push_agent = push_agent + self.push = push self.game: CodeGame = get_game( self.config, tournament_id=self.tournament_id, @@ -41,7 +41,7 @@ def get_metadata(self) -> dict: # will be saved in end() return { **super().get_metadata(), - "scoreboard": self.scoreboard, + "scoreboard": [s.model_dump() for s in self.scoreboard], "game": self.game.get_metadata(), "agents": [agent.get_metadata() for agent in self.agents], } @@ -67,40 +67,37 @@ def get_agent(self, agent_config: dict, prompts: dict) -> Player: def run(self) -> None: """Main execution function that runs all rounds.""" try: + self.run_competition_phase(0) # Warm up (doesn't count towards scoreboard) for round_num in range(1, self.rounds + 1): - self.run_evaluation(round_num) - self.run_training_round(round_num) - self.run_evaluation(self.rounds + 1) + self.run_edit_phase(round_num) + self.run_competition_phase(round_num) finally: self.end() - def run_evaluation(self, round_num: int) -> None: + def run_competition_phase(self, round_num: int) -> None: # Run the game round and get results - record = self.game.run_round(self.agents) + stats = self.game.run_round(self.agents, round_num) # Handle bookkeeping that was previously in the game - self.scoreboard.append(record.stats) - self.logger.info(f"Round {round_num}:\n{record.stats}") + self.scoreboard.append(stats) + self.logger.info(f"Round {round_num}:\n{stats}") # Create directory for round logs (self.game.log_local / "rounds" / str(round_num)).mkdir(parents=True, exist_ok=True) # Write logs to file - for idx, lo in enumerate(record.data.logs): - round_log_path = self.game.log_local / "rounds" / str(round_num) / f"sim_{idx}.log" - round_log_path.write_text(lo) results_file = self.game.log_local / "rounds" / str(round_num) / "results.json" - results_file.write_text(json.dumps(record.stats.model_dump(), indent=2)) + results_file.write_text(json.dumps(stats.model_dump(), indent=2)) - def run_training_round(self, round_num: int) -> None: + def run_edit_phase(self, round_num: int) -> None: """Execute a single training round.""" # Copy log to agent environments for agent in self.agents: - self.logger.info(f"Copying round {round_num} log(s) to {agent.name}'s container...") + self.logger.info(f"Copying round {round_num - 1} log(s) to {agent.name}'s container...") copy_to_container( agent.environment, - self.game.log_local / "rounds" / str(round_num), - f"logs/rounds/{round_num}/", + self.game.log_local / "rounds" / str(round_num - 1), + f"logs/rounds/{round_num - 1}/", ) with ThreadPoolExecutor() as executor: @@ -124,6 +121,6 @@ def end(self) -> None: """Save output files, clean up game resources and push agents if requested.""" (self.local_output_dir / "metadata.json").write_text(json.dumps(self.get_metadata(), indent=2)) self.game.end(self.cleanup_on_end) - if self.push_agent: + if self.push: for agent in self.agents: agent.push() diff --git a/codeclash/tournaments/single_player.py b/codeclash/tournaments/single_player.py index 2142a501..14515d78 100644 --- a/codeclash/tournaments/single_player.py +++ b/codeclash/tournaments/single_player.py @@ -11,7 +11,7 @@ from codeclash.agents.utils import GameContext from codeclash.constants import DIR_WORK from codeclash.games import get_game -from codeclash.games.game import CodeGame, RoundRecord +from codeclash.games.game import CodeGame, RoundStats from codeclash.tournaments.tournament import AbstractTournament from codeclash.tournaments.utils.git_utils import filter_git_diff from codeclash.utils.environment import copy_to_container @@ -32,7 +32,7 @@ def __init__(self, config: dict, *, cleanup: bool = False): self.mirror_agent: Player = self.get_agent(mirror_agent_config, round=0) @property - def scoreboard(self) -> list[tuple[int, RoundRecord]]: + def scoreboard(self) -> list[tuple[int, RoundStats]]: return self._metadata.setdefault("scoreboard", []) @property @@ -88,20 +88,15 @@ def run(self): def run_training_round(self, round_num: int) -> None: """Execute a single training round, i.e., run the game, then run the agent.""" # Run the game round and get results - record = self.game.run_round([self.agent, self.mirror_agent]) + stats = self.game.run_round([self.agent, self.mirror_agent], round_num) # Handle bookkeeping that was previously in the game - self.scoreboard.append((round_num, record)) - self.logger.info(f"Round {round_num}:\n{record.stats}") + self.scoreboard.append((round_num, stats)) + self.logger.info(f"Round {round_num}:\n{stats}") # Write log to file - for idx, lo in enumerate(record.data.logs): - round_log_path = self.game.log_local / "rounds" / str(round_num) / f"sim_{idx}.log" - round_log_path.parent.mkdir(parents=True, exist_ok=True) - round_log_path.write_text(lo) results_file = self.game.log_local / "rounds" / str(round_num) / "results.json" - with open(results_file, "w") as f: - json.dump(record.stats.model_dump(), fp=f, indent=2) + results_file.write_text(json.dumps(stats.model_dump(), indent=2)) # Copy log to main agent environment only self.logger.info(f"Copying round {round_num} log(s) to {self.agent.name}'s container...") @@ -157,9 +152,8 @@ def evaluate(self, n_repetitions: int = 3) -> None: p1.reset_and_apply_patch(p1_patch) p2.reset_and_apply_patch(p2_patch) for i_repetition in range(n_repetitions): - record = self.game.run_round([p1, p2]) - winner = record.stats.winner - self.logger.info(f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}") - matrix[p1_round][p2_round].append(winner) + stats = self.game.run_round([p1, p2], round_num=int(f"{p1_round}{p2_round}{i_repetition}")) + self.logger.info(f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {stats.winner}") + matrix[p1_round][p2_round].append(stats.winner) self.logger.info(f"Evaluation matrix: {matrix}") self._metadata.setdefault("evaluation", {})["matrix"] = matrix diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py index 64f8dfb6..9c7468e2 100644 --- a/codeclash/utils/environment.py +++ b/codeclash/utils/environment.py @@ -86,13 +86,15 @@ def copy_to_container( return result -def copy_file_from_container( +def copy_from_container( container: DockerEnvironment, src_path: str | Path, dest_path: str | Path, ): """ - Copy a file from a Docker container to the local filesystem. + Copy a file or directory from a Docker container to the local filesystem. + + The copy operation is recursive for directories. """ cmd = [ "docker", diff --git a/configs/test/dummy_huskybench.yaml b/configs/test/dummy_huskybench.yaml new file mode 100644 index 00000000..b77c30fd --- /dev/null +++ b/configs/test/dummy_huskybench.yaml @@ -0,0 +1,21 @@ +tournament: + rounds: 3 +game: + name: HuskyBench + sims_per_round: 1 +players: +- agent: dummy + name: p1 +- agent: dummy + name: p2 +prompts: + game_description: | + You are a software developer ({{player_id}}) competing in a coding game called CoreWar. + CoreWar is a programming battle where you write "warriors" in an assembly-like language called Redcode to compete within a virtual machine (MARS), aiming to eliminate your rivals by making their code self-terminate. + Victory comes from crafting clever tactics—replicators, scanners, bombers—that exploit memory layout and instruction timing to control the core. + + The game is played in {{rounds}} rounds. For every round, you (and your competitor) edit program code that controls your bot. This is round {{round}}. + After you and your competitor finish editing your codebases, the game is run automatically. + + Your task: improve the bot in `warriors/warrior.red`, located in {{working_dir}}. + {{working_dir}} is your codebase, which contains both your bot and supporting assets. diff --git a/docker/HuskyBench.Dockerfile b/docker/HuskyBench.Dockerfile new file mode 100644 index 00000000..d3c7fe5d --- /dev/null +++ b/docker/HuskyBench.Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.10-slim + +ARG DEBIAN_FRONTEND=noninteractive +ENV TZ=Etc/UTC + +RUN apt update && apt install -y \ +wget \ +git \ +build-essential \ +unzip \ +&& rm -rf /var/lib/apt/lists/* + +ARG GITHUB_TOKEN +RUN git clone https://${GITHUB_TOKEN}@github.com/emagedoc/HuskyBench.git /testbed \ + && cd /testbed \ + && git remote set-url origin https://github.com/emagedoc/HuskyBench.git \ + && unset GITHUB_TOKEN + +WORKDIR /testbed + +RUN pip install -r engine/requirements.txt +RUN mkdir -p /testbed/engine/output diff --git a/main.py b/main.py index d68af4b5..7a3e0f00 100644 --- a/main.py +++ b/main.py @@ -8,12 +8,12 @@ from codeclash.utils.yaml_utils import resolve_includes -def main(config_path: Path, *, cleanup: bool = False, push_agent: bool = False): +def main(config_path: Path, *, cleanup: bool = False, push: bool = False): yaml_content = config_path.read_text() preprocessed_yaml = resolve_includes(yaml_content, base_dir=CONFIG_DIR) config = yaml.safe_load(preprocessed_yaml) - training = PvpTournament(config, cleanup=cleanup, push_agent=push_agent) - training.run() + tournament = PvpTournament(config, cleanup=cleanup, push=push) + tournament.run() def main_cli(argv: list[str] | None = None): @@ -31,7 +31,7 @@ def main_cli(argv: list[str] | None = None): ) parser.add_argument( "-p", - "--push_agent", + "--push", action="store_true", help="If set, push each agent's codebase to a new repository after running.", )