From fd1f33938c44e17ad249e27a293cf67f924e71c2 Mon Sep 17 00:00:00 2001 From: John Yang Date: Tue, 26 Aug 2025 23:07:39 +0000 Subject: [PATCH 1/4] Add `sims_per_round` flag --- codeclash/agents/minisweagent.py | 4 +- codeclash/constants.py | 2 + codeclash/games/abstract.py | 26 +++++--- codeclash/games/battlecode/main.py | 62 ++++++++++-------- codeclash/games/battlesnake/main.py | 59 +++++++++-------- codeclash/games/corewar/main.py | 13 ++-- codeclash/games/robocode/main.py | 16 +++-- codeclash/games/robotrumble/main.py | 64 ++++++++++--------- codeclash/tournaments/abstract.py | 13 ++-- codeclash/tournaments/pvp_training.py | 26 ++++++-- .../tournaments/single_player_training.py | 22 +++++-- codeclash/utils/environment.py | 8 ++- configs/battlecode.yaml | 1 + configs/battlesnake.yaml | 1 + configs/battlesnake_dummy.yaml | 1 + configs/battlesnake_single_player.yaml | 1 + configs/corewar.yaml | 3 +- configs/robocode.yaml | 3 +- configs/robotrumble.yaml | 1 + 19 files changed, 192 insertions(+), 134 deletions(-) diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py index f3fec5d4..d1729628 100644 --- a/codeclash/agents/minisweagent.py +++ b/codeclash/agents/minisweagent.py @@ -17,7 +17,7 @@ from codeclash.agents.abstract import Player from codeclash.agents.utils import GameContext, resolve_api_key -from codeclash.utils.environment import copy_file_to_container +from codeclash.utils.environment import copy_to_container class ClashAgent(DefaultAgent): @@ -107,7 +107,7 @@ def run(self): result=result, print_fct=self.logger.debug, ) - copy_file_to_container( + copy_to_container( self.environment, traj_path, self.game_context.log_env / traj_path.name, diff --git a/codeclash/constants.py b/codeclash/constants.py index 19036cc8..db5d9df7 100644 --- a/codeclash/constants.py +++ b/codeclash/constants.py @@ -3,4 +3,6 @@ DIR_LOGS = Path("logs") DIR_WORK = Path("/testbed") GH_ORG = "emagedoc" +OUTPUTS_LOGS = "log_outputs" +OUTPUTS_RESULTS = "result_outputs" RESULT_TIE = "Tie" diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py index ddbb316f..2daf7f70 100644 --- a/codeclash/games/abstract.py +++ b/codeclash/games/abstract.py @@ -7,7 +7,13 @@ from minisweagent.environments.docker import DockerEnvironment from codeclash.agents.abstract import Player -from codeclash.constants import DIR_LOGS, DIR_WORK, GH_ORG +from codeclash.constants import ( + DIR_LOGS, + DIR_WORK, + GH_ORG, + OUTPUTS_LOGS, + OUTPUTS_RESULTS, +) from codeclash.utils.environment import assert_zero_exit_code, copy_between_containers from codeclash.utils.log import get_logger @@ -139,12 +145,12 @@ def _pre_round_setup(self, agents: list[Player]): @abstractmethod def determine_winner( - self, result_output: str, agents: list[Player] + self, result_outputs: list[str], agents: list[Player] ) -> dict[str, str]: """Determine the winner of the game based on the result output. Args: - result_output: The specific output containing winning information + result_outputs: The specific output(s) containing winning information agents: List of agents participating in the round Returns: @@ -153,13 +159,13 @@ def determine_winner( pass @abstractmethod - def execute_round(self, agents: list[Player]) -> dict[str, str]: + def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: """Subclasses implement their game-specific logic here. This is the low level implementation, you probably want to use run_round instead, which includes the pre-round setup, post-round setup, and winner determination. Returns: - Dictionary with keys "log_output" and "result_output" + Dictionary with keys "log_outputs" and "result_outputs" """ pass @@ -172,14 +178,14 @@ def run_round(self, agents: list[Player]) -> dict[str, str]: """ self._pre_round_setup(agents) result = self.execute_round(agents) - log_output = result["log_output"] - result_output = result["result_output"] + log_outputs = result[OUTPUTS_LOGS] + result_outputs = result[OUTPUTS_RESULTS] - winner_result = self.determine_winner(result_output, agents) + winner_result = self.determine_winner(result_outputs, agents) winner_name = winner_result["winner"] return { - "log_output": log_output, - "result_output": result_output, + OUTPUTS_LOGS: log_outputs, + OUTPUTS_RESULTS: result_outputs, "winner": winner_name, } diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py index 43c26295..9c4b3ac4 100644 --- a/codeclash/games/battlecode/main.py +++ b/codeclash/games/battlecode/main.py @@ -1,9 +1,10 @@ import re -import shlex from pathlib import Path from typing import Any -from codeclash.constants import DIR_WORK, RESULT_TIE +from tqdm.auto import tqdm + +from codeclash.constants import DIR_WORK, OUTPUTS_LOGS, OUTPUTS_RESULTS, RESULT_TIE from codeclash.games.abstract import CodeGame @@ -23,27 +24,30 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner(self, result_output: str, agents: list[Any]) -> dict[str, str]: - self.logger.debug(f"Determining winner from result output: {result_output}") - lines = result_output.strip().split("\n") - # Get the third-to-last line which contains the winner info - winner_line = lines[-3] if len(lines) >= 3 else "" - self.logger.debug(f"Winner line: {winner_line}") - match = re.search(r"\s\((.*)\)\swins\s\(", winner_line) - if match: - winner_key = match.group(1) - self.logger.debug(f"Winner key from match: {winner_key}") - # Map A/B to actual agent names (much closer to original code) - winner = {"A": agents[0].name, "B": agents[1].name}.get( - winner_key, RESULT_TIE - ) - self.logger.debug(f"Concluding winner: {winner}") - return {"winner": winner} - else: - self.logger.debug("No winner match found, returning tie") - return {"winner": RESULT_TIE} + def determine_winner( + self, result_outputs: list[str], agents: list[Any] + ) -> dict[str, str]: + winners = [] + for ro in result_outputs: + lines = ro.strip().split("\n") + # Get the third-to-last line which contains the winner info + winner_line = lines[-3] if len(lines) >= 3 else "" + self.logger.debug(f"Winner line: {winner_line}") + match = re.search(r"\s\((.*)\)\swins\s\(", winner_line) + if match: + winner_key = match.group(1) + self.logger.debug(f"Winner key from match: {winner_key}") + # Map A/B to actual agent names (much closer to original code) + winner = {"A": agents[0].name, "B": agents[1].name}.get( + winner_key, RESULT_TIE + ) + winners.append(winner) + else: + winners.append(RESULT_TIE) + winner = max(set(winners), key=winners.count) + return {"winner": winner} - def execute_round(self, agents: list[Any]) -> dict[str, str]: + def execute_round(self, agents: list[Any]) -> dict[str, list[str]]: for agent in agents: src, dest = f"/{agent.name}/src/mysubmission/", str( DIR_WORK / "src" / agent.name @@ -53,10 +57,12 @@ def execute_round(self, agents: list[Any]) -> dict[str, str]: f"--p{idx+1}-dir src --p{idx+1} {agent.name}" for idx, agent in enumerate(agents) ] - cmd = f"{self.run_cmd_round} {shlex.join(args)}" + cmd = f"{self.run_cmd_round} {' '.join(args)}" self.logger.info(f"Running command: {cmd}") - response = self.environment.execute(cmd) - assert response["returncode"] == 0, response - # For BattleCode, log_output and result_output are the same - output = response["output"] - return {"log_output": output, "result_output": output} + outputs = [] + for _ in tqdm(range(self.game_config["sims_per_round"])): + response = self.environment.execute(cmd) + assert response["returncode"] == 0, response + # For BattleCode, log_outputs and result_outputs are the same + outputs.append(response["output"]) + return {OUTPUTS_LOGS: outputs, OUTPUTS_RESULTS: outputs} diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py index d3a8187e..aec8b30a 100644 --- a/codeclash/games/battlesnake/main.py +++ b/codeclash/games/battlesnake/main.py @@ -3,6 +3,7 @@ from pathlib import Path from codeclash.agents.abstract import Player +from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS from codeclash.games.abstract import CodeGame from codeclash.utils.environment import assert_zero_exit_code @@ -23,18 +24,20 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): self.run_cmd_round += f" --{arg} {val}" def determine_winner( - self, result_output: str, agents: list[Player] + self, result_outputs: list[str], agents: list[Player] ) -> dict[str, str]: - self.logger.debug(f"Determining winner from result output: {result_output}") - lines = result_output.strip().split("\n") - # Get the last line which contains the game result - last_line = lines[-1] if lines else "" - self.logger.debug(f"Last line: {last_line}") - winner = json.loads(last_line)["winnerName"] - self.logger.debug(f"Concluding winner: {winner}") + winners = [] + for ro in result_outputs: + lines = ro.strip().split("\n") + # Get the last line which contains the game result + last_line = lines[-1] if lines else "" + self.logger.debug(f"Last line: {last_line}") + winner = json.loads(last_line)["winnerName"] + winners.append(winner) + winner = max(set(winners), key=winners.count) return {"winner": winner} - def execute_round(self, agents: list[Player]) -> dict[str, str]: + def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: cmd = [] for idx, agent in enumerate(agents): port = 8001 + idx @@ -46,27 +49,33 @@ def execute_round(self, agents: list[Player]) -> dict[str, str]: time.sleep(3) # Give servers time to start - # Create temporary output file for results - output_file = f"battlesnake_output_{int(time.time())}.json" - cmd_str = " ".join(cmd) + f" -o {output_file}" - self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}") - try: - response = assert_zero_exit_code( - self.environment.execute( - f"{self.run_cmd_round} {cmd_str}", - cwd=f"{self.environment.config.cwd}/game", + log_outputs, result_outputs = [], [] + for idx in range(self.game_config["sims_per_round"]): + # Create temporary output file for results + output_file = f"battlesnake_output_{idx}_{int(time.time())}.json" + cmd_str = " ".join(cmd) + f" -o {output_file}" + self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}") + + response = assert_zero_exit_code( + self.environment.execute( + f"{self.run_cmd_round} {cmd_str}", + cwd=f"{self.environment.config.cwd}/game", + ) ) - ) - # Read the output file for result information - result_response = self.environment.execute(f"cat game/{output_file}") - result_output = result_response["output"] + # Read the output file for result information + result_response = self.environment.execute(f"cat game/{output_file}") + result_output = result_response["output"] + log_outputs.append(response["output"]) + result_outputs.append(result_output) + + # Clean up the output file + self.environment.execute(f"rm -f game/{output_file}") - # Clean up the output file - self.environment.execute(f"rm -f game/{output_file}") + time.sleep(0.1) - return {"log_output": response["output"], "result_output": result_output} + return {OUTPUTS_LOGS: log_outputs, OUTPUTS_RESULTS: result_outputs} finally: # Kill all python servers when done self.environment.execute("pkill -f 'python main.py' || true") diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py index 7e6e5617..59e1f989 100644 --- a/codeclash/games/corewar/main.py +++ b/codeclash/games/corewar/main.py @@ -3,6 +3,7 @@ from pathlib import Path from codeclash.agents.abstract import Player +from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS from codeclash.games.abstract import CodeGame @@ -22,8 +23,9 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): self.run_cmd_round += f" -{arg} {val}" def determine_winner( - self, result_output: str, agents: list[Player] + self, result_outputs: list[str], agents: list[Player] ) -> dict[str, str]: + result_output = result_outputs[0] # Get the first (and only) element self.logger.debug(f"Determining winner from result output: {result_output}") scores = [] n = len(agents) * 2 @@ -51,12 +53,13 @@ def determine_winner( self.logger.debug("No scores found, returning unknown") return {"winner": "unknown"} - def execute_round(self, agents: list[Player]) -> dict[str, str]: + def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: args = [f"/{agent.name}/warriors/warrior.red" for agent in agents] cmd = f"{self.run_cmd_round} {shlex.join(args)}" + cmd += f" -r {self.game_config['sims_per_round']}" self.logger.info(f"Running command: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response - # For CoreWar, log_output and result_output are the same - output = response["output"] - return {"log_output": output, "result_output": output} + # For CoreWar, log_outputs and result_outputs are the same + output = [response["output"]] + return {OUTPUTS_LOGS: output, OUTPUTS_RESULTS: output} diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py index 514e7ab2..ffb69694 100644 --- a/codeclash/games/robocode/main.py +++ b/codeclash/games/robocode/main.py @@ -3,8 +3,9 @@ from pathlib import Path from codeclash.agents.abstract import Player +from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS from codeclash.games.abstract import CodeGame -from codeclash.utils.environment import copy_file_to_container +from codeclash.utils.environment import copy_to_container class RoboCodeGame(CodeGame): @@ -25,7 +26,7 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): def _get_battle_config(self) -> str: default_battle_config = { "battle": { - "numRounds": 10, + "numRounds": self.game_config.get("sims_per_round", 100), "gunCoolingRate": 0.1, "rules": {"inactivityTime": 450, "hideEnemyNames": True}, }, @@ -56,12 +57,13 @@ def dict_to_lines(d, prefix=""): return "\n".join(battle_lines) def determine_winner( - self, result_output: str, agents: list[Player] + self, result_outputs: list[str], agents: list[Player] ) -> dict[str, str]: + result_output = result_outputs[0] # Get the first (and only) element self.logger.debug(f"Determining winner from result output: {result_output}") lines = result_output.strip().split("\n") # Get the second line which contains the winner info (closer to original) - winner_line = lines[1] if len(lines) >= 2 else "" + winner_line = lines[2] if len(lines) >= 3 else "" self.logger.debug(f"Winner line: {winner_line}") if winner_line: winner = winner_line.split()[1].rsplit(".", 1)[0] @@ -71,7 +73,7 @@ def determine_winner( self.logger.debug("No winner line found, returning unknown") return {"winner": "unknown"} - def execute_round(self, agents: list[Player]) -> dict[str, str]: + def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: for agent in agents: # Copy the agent codebase into the game codebase and compile it for cmd in [ @@ -93,7 +95,7 @@ def execute_round(self, agents: list[Player]) -> dict[str, str]: robocode.battle.selectedRobots={selected_robots} """ ) - copy_file_to_container(self.environment, battle_file, f"battles/{battle_file}") + copy_to_container(self.environment, battle_file, f"battles/{battle_file}") subprocess.run(f"rm -f {battle_file}", shell=True) # Run battle with results output to file @@ -110,4 +112,4 @@ def execute_round(self, agents: list[Player]) -> dict[str, str]: # Clean up the results file self.environment.execute(f"rm -f {results_file}") - return {"log_output": response["output"], "result_output": result_output} + return {OUTPUTS_LOGS: [response["output"]], OUTPUTS_RESULTS: [result_output]} diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py index 8f893ee4..7f174281 100644 --- a/codeclash/games/robotrumble/main.py +++ b/codeclash/games/robotrumble/main.py @@ -1,8 +1,9 @@ import shlex +from collections import Counter from pathlib import Path from codeclash.agents.abstract import Player -from codeclash.constants import RESULT_TIE +from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS, RESULT_TIE from codeclash.games.abstract import CodeGame @@ -17,36 +18,37 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): self.run_cmd_round: str = "./rumblebot run term" def determine_winner( - self, result_output: str, agents: list[Player] + self, result_outputs: list[str], agents: list[Player] ) -> dict[str, str]: - self.logger.debug(f"Determining winner from result output: {result_output}") - lines = result_output.strip().split("\n") - # Get the last 2 lines which contain the game result (same as original) - relevant_lines = lines[-2:] if len(lines) >= 2 else lines - log_text = "\n".join(relevant_lines) - self.logger.debug(f"Relevant lines: {log_text}") - - if "Blue won" in log_text: - winner = agents[0].name - self.logger.debug(f"Blue won - Concluding winner: {winner}") - return {"winner": winner} - elif "Red won" in log_text: - winner = agents[1].name - self.logger.debug(f"Red won - Concluding winner: {winner}") - return {"winner": winner} - elif "it was a tie" in log_text: - self.logger.debug("Game was a tie") - return {"winner": RESULT_TIE} - else: - self.logger.debug("No clear result found, treating as tie") - return {"winner": RESULT_TIE} + winners = [] + for ro in result_outputs: + lines = ro.strip().split("\n") + + # Get the last 2 lines which contain the game result (same as original) + relevant_lines = lines[-2:] if len(lines) >= 2 else lines + log_text = "\n".join(relevant_lines) + + if "Blue won" in log_text: + winner = agents[0].name + winners.append(winner) + elif "Red won" in log_text: + winner = agents[1].name + winners.append(winner) + elif "it was a tie" in log_text: + winners.append(RESULT_TIE) + else: + winners.append(RESULT_TIE) + print(Counter(winners)) + winner = max(set(winners), key=winners.count) + return {"winner": winner} def execute_round(self, agents: list[Player]) -> dict[str, str]: - args = [f"/{agent.name}/robot.py" for agent in agents] - cmd = f"{self.run_cmd_round} {shlex.join(args)}" - self.logger.info(f"Running command: {cmd}") - response = self.environment.execute(cmd) - assert response["returncode"] == 0, response - # For RobotRumble, log_output and result_output are the same - output = response["output"] - return {"log_output": output, "result_output": output} + outputs = [] + for _ in range(self.game_config.get("sims_per_round", 100)): + args = [f"/{agent.name}/robot.py" for agent in agents] + cmd = f"{self.run_cmd_round} {shlex.join(args)}" + response = self.environment.execute(cmd) + assert response["returncode"] == 0, response + outputs.append(response["output"]) + # For RobotRumble, log_outputs and result_outputs are the same + return {OUTPUTS_LOGS: outputs, OUTPUTS_RESULTS: outputs} diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py index d98a9d6c..74e54a7b 100644 --- a/codeclash/tournaments/abstract.py +++ b/codeclash/tournaments/abstract.py @@ -3,10 +3,7 @@ import traceback from pathlib import Path -from codeclash.agents import get_agent -from codeclash.agents.abstract import Player -from codeclash.agents.utils import GameContext -from codeclash.constants import DIR_LOGS, DIR_WORK +from codeclash.constants import DIR_LOGS from codeclash.utils.environment import create_file_on_container from codeclash.utils.log import get_logger @@ -30,17 +27,17 @@ def __init__(self, config: dict, *, name: str, **kwargs): def get_metadata(self) -> dict: return self._metadata - def _copy_game_log_to_agent(self, agent, round_num: int, log_output: str) -> None: + def _copy_game_log_to_agent( + self, agent, round_num: int, log_output: str, dest_path: str = None + ) -> None: """Copy round log to agent environment.""" try: create_file_on_container( container=agent.environment, content=log_output, - dest_path=f"logs/round_{round_num}.log", + dest_path=dest_path if dest_path else f"logs/round_{round_num}.log", ) except Exception: self.logger.error( f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" ) - else: - self.logger.info(f"Created round log in {agent.name}'s container.") diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp_training.py index ffc223cc..53a8ef8e 100644 --- a/codeclash/tournaments/pvp_training.py +++ b/codeclash/tournaments/pvp_training.py @@ -5,10 +5,11 @@ from codeclash.agents import get_agent from codeclash.agents.abstract import Player from codeclash.agents.utils import GameContext -from codeclash.constants import DIR_WORK +from codeclash.constants import DIR_WORK, OUTPUTS_LOGS, OUTPUTS_RESULTS from codeclash.games import get_game from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament +from codeclash.utils.environment import copy_to_container from codeclash.utils.log import get_logger @@ -66,21 +67,34 @@ def run_training_round(self, round_num: int) -> None: """Execute a single training round.""" # Run the game round and get results result = self.game.run_round(self.agents) - log_output = result["log_output"] - result_output = result["result_output"] + log_outputs = result[OUTPUTS_LOGS] + result_outputs = result[OUTPUTS_RESULTS] winner = result["winner"] # Handle bookkeeping that was previously in the game self.scoreboard.append((round_num, winner)) self.logger.info(f"Round {round_num} winner: {winner}") + # Create directory for round logs + (self.game.log_local / f"round_{round_num}").mkdir(parents=True, exist_ok=True) + # Write log to file - round_log_path = self.game.log_local / f"round_{round_num}.log" - round_log_path.write_text(log_output) + for idx, lo in enumerate(log_outputs): + round_log_path = ( + self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" + ) + round_log_path.write_text(lo) # Copy log to agent environments for agent in self.agents: - self._copy_game_log_to_agent(agent, round_num, log_output) + self.logger.info( + f"Copying round {round_num} log(s) to {agent.name}'s container..." + ) + copy_to_container( + agent.environment, + self.game.log_local / f"round_{round_num}", + f"logs/round_{round_num}/", + ) for agent in self.agents: self.run_agent(agent, round_num) diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py index 832d36b4..9d5fbe6f 100644 --- a/codeclash/tournaments/single_player_training.py +++ b/codeclash/tournaments/single_player_training.py @@ -8,12 +8,12 @@ from codeclash.agents.abstract import Player from codeclash.agents.dummy import Dummy from codeclash.agents.utils import GameContext -from codeclash.constants import DIR_WORK +from codeclash.constants import DIR_WORK, OUTPUTS_LOGS from codeclash.games import get_game from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament from codeclash.tournaments.utils.git_utils import filter_git_diff -from codeclash.utils.log import get_logger +from codeclash.utils.environment import copy_to_container class SinglePlayerTraining(AbstractTournament): @@ -79,7 +79,7 @@ def run_training_round(self, round_num: int) -> None: """Execute a single training round, i.e., run the game, then run the agent.""" # Run the game round and get results result = self.game.run_round([self.agent, self.mirror_agent]) - log_output = result["log_output"] + log_outputs = result[OUTPUTS_LOGS] winner = result["winner"] # Handle bookkeeping that was previously in the game @@ -87,11 +87,21 @@ def run_training_round(self, round_num: int) -> None: self.logger.info(f"Round {round_num} winner: {winner}") # Write log to file - round_log_path = self.game.log_local / f"round_{round_num}.log" - round_log_path.write_text(log_output) + for idx, lo in enumerate(log_outputs): + round_log_path = ( + self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" + ) + round_log_path.write_text(lo) # Copy log to main agent environment only - self._copy_game_log_to_agent(self.agent, round_num, log_output) + self.logger.info( + f"Copying round {round_num} log(s) to {self.agent.name}'s container..." + ) + copy_to_container( + self.agent, + self.game.log_local / f"round_{round_num}", + f"logs/round_{round_num}/", + ) self.run_main_agent(round_num) mirror_agent_state = round_num - 1 if round_num > 1 else 0 diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py index 1d6f6aa1..19165661 100644 --- a/codeclash/utils/environment.py +++ b/codeclash/utils/environment.py @@ -65,13 +65,15 @@ def copy_between_containers( ) -def copy_file_to_container( +def copy_to_container( container: DockerEnvironment, src_path: str | Path, dest_path: str | Path, ): """ - Copy a file from the local filesystem to a Docker container. + Copy a file or directory from the local filesystem to a Docker container. + + The copy operation is recursive for directories. """ if not str(dest_path).startswith("/"): # If not an absolute path, assume relative to container's cwd @@ -130,6 +132,6 @@ def create_file_on_container( tmp_file_path = Path(tmp_file.name) try: - copy_file_to_container(container, tmp_file_path, dest_path) + copy_to_container(container, tmp_file_path, dest_path) finally: tmp_file_path.unlink() # Clean up the temporary file diff --git a/configs/battlecode.yaml b/configs/battlecode.yaml index ed6386cf..74f1c1c5 100644 --- a/configs/battlecode.yaml +++ b/configs/battlecode.yaml @@ -1,5 +1,6 @@ game: name: BattleCode + sims_per_round: 2 args: maps: quack tournament: diff --git a/configs/battlesnake.yaml b/configs/battlesnake.yaml index 9016df1e..1d14af29 100644 --- a/configs/battlesnake.yaml +++ b/configs/battlesnake.yaml @@ -6,6 +6,7 @@ game: browser: false tournament: rounds: 2 + sims_per_round: 10 players: - agent: mini name: p1 diff --git a/configs/battlesnake_dummy.yaml b/configs/battlesnake_dummy.yaml index 05a8d4b4..65c6da12 100644 --- a/configs/battlesnake_dummy.yaml +++ b/configs/battlesnake_dummy.yaml @@ -1,5 +1,6 @@ game: name: BattleSnake + sims_per_round: 10 args: width: 11 height: 11 diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml index e5f6f370..e1d0dea7 100644 --- a/configs/battlesnake_single_player.yaml +++ b/configs/battlesnake_single_player.yaml @@ -7,6 +7,7 @@ game: tournament: rounds: 1 evaluate_matrix: true + sims_per_round: 10 player: agent: mini config: configs/mini/default.yaml diff --git a/configs/corewar.yaml b/configs/corewar.yaml index daaf3e2e..4fe68ad7 100644 --- a/configs/corewar.yaml +++ b/configs/corewar.yaml @@ -1,7 +1,6 @@ game: name: CoreWar - args: - r: 100 + sims_per_round: 10 tournament: rounds: 3 players: diff --git a/configs/robocode.yaml b/configs/robocode.yaml index c401cdfd..c1e1c90c 100644 --- a/configs/robocode.yaml +++ b/configs/robocode.yaml @@ -1,8 +1,8 @@ game: name: RoboCode + sims_per_round: 10 battle: battle: - numRounds: 10 gunCoolingRate: 0.1 rules: inactivityTime: 450 @@ -15,6 +15,7 @@ game: nosound: true tournament: rounds: 3 + sims_per_round: 10 players: - agent: dummy name: p1 diff --git a/configs/robotrumble.yaml b/configs/robotrumble.yaml index 84e9e06f..a6fac21b 100644 --- a/configs/robotrumble.yaml +++ b/configs/robotrumble.yaml @@ -1,5 +1,6 @@ game: name: RobotRumble + sims_per_round: 10 tournament: rounds: 3 players: From c70fa5c139b3f511b70deae7900cc5c46415b312 Mon Sep 17 00:00:00 2001 From: John Yang Date: Tue, 26 Aug 2025 23:28:21 +0000 Subject: [PATCH 2/4] Minor fix --- configs/battlesnake.yaml | 2 +- configs/battlesnake_single_player.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/battlesnake.yaml b/configs/battlesnake.yaml index 1d14af29..daee02ce 100644 --- a/configs/battlesnake.yaml +++ b/configs/battlesnake.yaml @@ -1,12 +1,12 @@ game: name: BattleSnake + sims_per_round: 10 args: width: 11 height: 11 browser: false tournament: rounds: 2 - sims_per_round: 10 players: - agent: mini name: p1 diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml index e1d0dea7..66d91f1a 100644 --- a/configs/battlesnake_single_player.yaml +++ b/configs/battlesnake_single_player.yaml @@ -1,5 +1,6 @@ game: name: BattleSnake + sims_per_round: 10 args: width: 11 height: 11 @@ -7,7 +8,6 @@ game: tournament: rounds: 1 evaluate_matrix: true - sims_per_round: 10 player: agent: mini config: configs/mini/default.yaml From fb84f4f65c3bb6bd404b765089b62da6e386bebf Mon Sep 17 00:00:00 2001 From: John Yang Date: Wed, 27 Aug 2025 20:06:28 +0000 Subject: [PATCH 3/4] Added dataclasses for return type; added score tracking for games --- codeclash/agents/abstract.py | 4 +- codeclash/constants.py | 2 - codeclash/games/abstract.py | 61 +++++++++++-------- codeclash/games/battlecode/main.py | 20 +++--- codeclash/games/battlesnake/main.py | 42 +++++++------ codeclash/games/corewar/main.py | 38 ++++++------ codeclash/games/robocode/main.py | 56 +++++++++-------- codeclash/games/robotrumble/main.py | 32 ++++++---- codeclash/tournaments/abstract.py | 8 ++- .../tournaments/{pvp_training.py => pvp.py} | 21 +++---- ...le_player_training.py => single_player.py} | 20 +++--- codeclash/utils/environment.py | 2 +- main.py | 4 +- tests/test_integration.py | 2 +- 14 files changed, 165 insertions(+), 147 deletions(-) rename codeclash/tournaments/{pvp_training.py => pvp.py} (85%) rename codeclash/tournaments/{single_player_training.py => single_player.py} (91%) diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index ed47b8da..5d30b652 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -8,7 +8,7 @@ from codeclash.agents.utils import GameContext from codeclash.constants import GH_ORG from codeclash.tournaments.utils.git_utils import filter_git_diff -from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container +from codeclash.utils.environment import assert_zero_exit_code, create_file_in_container from codeclash.utils.log import get_logger load_dotenv() @@ -101,7 +101,7 @@ def reset_and_apply_patch( self.logger.debug("No patch to apply, skipping") return - create_file_on_container( + create_file_in_container( container=self.environment, # type: ignore content=patch, dest_path="tmp_patch.txt", diff --git a/codeclash/constants.py b/codeclash/constants.py index db5d9df7..19036cc8 100644 --- a/codeclash/constants.py +++ b/codeclash/constants.py @@ -3,6 +3,4 @@ DIR_LOGS = Path("logs") DIR_WORK = Path("/testbed") GH_ORG = "emagedoc" -OUTPUTS_LOGS = "log_outputs" -OUTPUTS_RESULTS = "result_outputs" RESULT_TIE = "Tie" diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py index 2daf7f70..fbbfefdf 100644 --- a/codeclash/games/abstract.py +++ b/codeclash/games/abstract.py @@ -2,22 +2,42 @@ import os import subprocess from abc import ABC, abstractmethod +from dataclasses import dataclass from pathlib import Path +from typing import Any from minisweagent.environments.docker import DockerEnvironment from codeclash.agents.abstract import Player -from codeclash.constants import ( - DIR_LOGS, - DIR_WORK, - GH_ORG, - OUTPUTS_LOGS, - OUTPUTS_RESULTS, -) +from codeclash.constants import DIR_LOGS, DIR_WORK, GH_ORG from codeclash.utils.environment import assert_zero_exit_code, copy_between_containers from codeclash.utils.log import get_logger +@dataclass +class RoundStats: + winner: str + scores: dict[ + str, float + ] # Map of player to game metric (e.g. # of wins, assets accumulated) + details: dict[str, Any] = None # Optional, for game-specific info + + def __str__(self) -> str: + return "\n".join([f"- Winner: {self.winner}", f"- Scores: {self.scores}"]) + + +@dataclass +class RoundData: + logs: list[str] + results: list[str] + + +@dataclass +class RoundRecord: + data: RoundData + stats: RoundStats + + class CodeGame(ABC): name: str @@ -144,9 +164,7 @@ def _pre_round_setup(self, agents: list[Player]): ) @abstractmethod - def determine_winner( - self, result_outputs: list[str], agents: list[Player] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: """Determine the winner of the game based on the result output. Args: @@ -154,22 +172,22 @@ def determine_winner( agents: List of agents participating in the round Returns: - Dictionary with key "winner" containing the winner's name + RoundStats object """ pass @abstractmethod - def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: + def execute_round(self, agents: list[Player]) -> RoundData: """Subclasses implement their game-specific logic here. This is the low level implementation, you probably want to use run_round instead, which includes the pre-round setup, post-round setup, and winner determination. Returns: - Dictionary with keys "log_outputs" and "result_outputs" + RoundData object """ pass - def run_round(self, agents: list[Player]) -> dict[str, str]: + def run_round(self, agents: list[Player]) -> RoundRecord: """ Run a single round of the game with the given agents. @@ -177,15 +195,6 @@ def run_round(self, agents: list[Player]) -> dict[str, str]: handled by the tournament class. """ self._pre_round_setup(agents) - result = self.execute_round(agents) - log_outputs = result[OUTPUTS_LOGS] - result_outputs = result[OUTPUTS_RESULTS] - - winner_result = self.determine_winner(result_outputs, agents) - winner_name = winner_result["winner"] - - return { - OUTPUTS_LOGS: log_outputs, - OUTPUTS_RESULTS: result_outputs, - "winner": winner_name, - } + data = self.execute_round(agents) + stats = self.get_stats(data.results, agents) + return RoundRecord(data=data, stats=stats) diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py index 9c4b3ac4..f2cf4ca2 100644 --- a/codeclash/games/battlecode/main.py +++ b/codeclash/games/battlecode/main.py @@ -4,8 +4,8 @@ from tqdm.auto import tqdm -from codeclash.constants import DIR_WORK, OUTPUTS_LOGS, OUTPUTS_RESULTS, RESULT_TIE -from codeclash.games.abstract import CodeGame +from codeclash.constants import DIR_WORK, RESULT_TIE +from codeclash.games.abstract import CodeGame, RoundData, RoundStats class BattleCodeGame(CodeGame): @@ -24,9 +24,7 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner( - self, result_outputs: list[str], agents: list[Any] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats: winners = [] for ro in result_outputs: lines = ro.strip().split("\n") @@ -44,10 +42,12 @@ def determine_winner( winners.append(winner) else: winners.append(RESULT_TIE) - winner = max(set(winners), key=winners.count) - return {"winner": winner} + return RoundStats( + winner=max(set(winners), key=winners.count), + scores={agent.name: winners.count(agent.name) for agent in agents}, + ) - def execute_round(self, agents: list[Any]) -> dict[str, list[str]]: + def execute_round(self, agents: list[Any]) -> RoundData: for agent in agents: src, dest = f"/{agent.name}/src/mysubmission/", str( DIR_WORK / "src" / agent.name @@ -58,11 +58,11 @@ def execute_round(self, agents: list[Any]) -> dict[str, list[str]]: for idx, agent in enumerate(agents) ] cmd = f"{self.run_cmd_round} {' '.join(args)}" - self.logger.info(f"Running command: {cmd}") + self.logger.info(f"Running game: {cmd}") outputs = [] for _ in tqdm(range(self.game_config["sims_per_round"])): response = self.environment.execute(cmd) assert response["returncode"] == 0, response # For BattleCode, log_outputs and result_outputs are the same outputs.append(response["output"]) - return {OUTPUTS_LOGS: outputs, OUTPUTS_RESULTS: outputs} + return RoundData(logs=outputs, results=outputs) diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py index aec8b30a..c0c93c5b 100644 --- a/codeclash/games/battlesnake/main.py +++ b/codeclash/games/battlesnake/main.py @@ -2,9 +2,11 @@ import time from pathlib import Path +from tqdm.auto import tqdm + from codeclash.agents.abstract import Player -from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS -from codeclash.games.abstract import CodeGame +from codeclash.constants import RESULT_TIE +from codeclash.games.abstract import CodeGame, RoundData, RoundStats from codeclash.utils.environment import assert_zero_exit_code @@ -23,21 +25,25 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner( - self, result_outputs: list[str], agents: list[Player] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: winners = [] for ro in result_outputs: lines = ro.strip().split("\n") - # Get the last line which contains the game result - last_line = lines[-1] if lines else "" - self.logger.debug(f"Last line: {last_line}") + last_line = ( + lines[-1] if lines else "" + ) # Get the last line which contains the game result winner = json.loads(last_line)["winnerName"] winners.append(winner) - winner = max(set(winners), key=winners.count) - return {"winner": winner} - def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: + win_counts = {agent.name: winners.count(agent.name) for agent in agents} + max_wins = max(win_counts.values()) + winners = [name for name, wins in win_counts.items() if wins == max_wins] + return RoundStats( + winner=RESULT_TIE if len(winners) > 1 else winners[0], + scores=win_counts, + ) + + def execute_round(self, agents: list[Player]) -> RoundData: cmd = [] for idx, agent in enumerate(agents): port = 8001 + idx @@ -51,15 +57,16 @@ def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: try: log_outputs, result_outputs = [], [] - for idx in range(self.game_config["sims_per_round"]): + cmd = self.run_cmd_round + " " + " ".join(cmd) + self.logger.info(f"Running game: {cmd}") + for idx in tqdm(range(self.game_config["sims_per_round"])): # Create temporary output file for results output_file = f"battlesnake_output_{idx}_{int(time.time())}.json" - cmd_str = " ".join(cmd) + f" -o {output_file}" - self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}") + # Run game response = assert_zero_exit_code( self.environment.execute( - f"{self.run_cmd_round} {cmd_str}", + cmd + f" -o {output_file}", cwd=f"{self.environment.config.cwd}/game", ) ) @@ -72,10 +79,9 @@ def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: # Clean up the output file self.environment.execute(f"rm -f game/{output_file}") + time.sleep(0.05) - time.sleep(0.1) - - return {OUTPUTS_LOGS: log_outputs, OUTPUTS_RESULTS: result_outputs} + return RoundData(log_outputs, result_outputs) finally: # Kill all python servers when done self.environment.execute("pkill -f 'python main.py' || true") diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py index 59e1f989..a082a02c 100644 --- a/codeclash/games/corewar/main.py +++ b/codeclash/games/corewar/main.py @@ -3,8 +3,7 @@ from pathlib import Path from codeclash.agents.abstract import Player -from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS -from codeclash.games.abstract import CodeGame +from codeclash.games.abstract import CodeGame, RoundData, RoundStats class CoreWarGame(CodeGame): @@ -22,44 +21,43 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" -{arg} {val}" - def determine_winner( - self, result_outputs: list[str], agents: list[Player] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: result_output = result_outputs[0] # Get the first (and only) element self.logger.debug(f"Determining winner from result output: {result_output}") scores = [] n = len(agents) * 2 lines = result_output.strip().split("\n") + # Get the last n lines which contain the scores (closer to original) relevant_lines = lines[-n:] if len(lines) >= n else lines + relevant_lines = [l for l in relevant_lines if len(l.strip()) > 0] self.logger.debug(f"Relevant lines for scoring: {relevant_lines}") + # Go through each line; we assume score position is correlated with agent index for line in relevant_lines: match = re.search(r".*\sby\s.*\sscores\s(\d+)", line) if match: score = int(match.group(1)) scores.append(score) - self.logger.debug(f"Found score: {score} from line: {line}") - self.logger.debug(f"All scores: {scores}") if scores: - max_score_index = scores.index(max(scores)) - winner = agents[max_score_index].name - self.logger.debug( - f"Concluding winner: {winner} with index {max_score_index}" + if len(scores) != len(agents): + self.logger.error(f"Have {len(scores)} scores but {len(agents)} agents") + return RoundStats( + winner=agents[scores.index(max(scores))].name, + scores={agent.name: score for agent, score in zip(agents, scores)}, + details={"stdout": "\n".join(relevant_lines)}, ) - return {"winner": winner} else: self.logger.debug("No scores found, returning unknown") - return {"winner": "unknown"} + return RoundStats( + winner="unknown", scores={agent.name: 0 for agent in agents} + ) - def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: + def execute_round(self, agents: list[Player]) -> RoundData: args = [f"/{agent.name}/warriors/warrior.red" for agent in agents] - cmd = f"{self.run_cmd_round} {shlex.join(args)}" - cmd += f" -r {self.game_config['sims_per_round']}" - self.logger.info(f"Running command: {cmd}") + cmd = f"{self.run_cmd_round} {shlex.join(args)} -r {self.game_config['sims_per_round']}" + self.logger.info(f"Running game: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response - # For CoreWar, log_outputs and result_outputs are the same - output = [response["output"]] - return {OUTPUTS_LOGS: output, OUTPUTS_RESULTS: output} + return RoundData([response["output"]], [response["output"]]) diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py index ffb69694..880bde96 100644 --- a/codeclash/games/robocode/main.py +++ b/codeclash/games/robocode/main.py @@ -1,11 +1,10 @@ -import subprocess +import re import time from pathlib import Path from codeclash.agents.abstract import Player -from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS -from codeclash.games.abstract import CodeGame -from codeclash.utils.environment import copy_to_container +from codeclash.games.abstract import CodeGame, RoundData, RoundStats +from codeclash.utils.environment import create_file_in_container class RoboCodeGame(CodeGame): @@ -56,24 +55,29 @@ def dict_to_lines(d, prefix=""): dict_to_lines(default_battle_config) return "\n".join(battle_lines) - def determine_winner( - self, result_outputs: list[str], agents: list[Player] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: result_output = result_outputs[0] # Get the first (and only) element self.logger.debug(f"Determining winner from result output: {result_output}") lines = result_output.strip().split("\n") - # Get the second line which contains the winner info (closer to original) - winner_line = lines[2] if len(lines) >= 3 else "" - self.logger.debug(f"Winner line: {winner_line}") - if winner_line: - winner = winner_line.split()[1].rsplit(".", 1)[0] - self.logger.debug(f"Concluding winner: {winner}") - return {"winner": winner} - else: - self.logger.debug("No winner line found, returning unknown") - return {"winner": "unknown"} - - def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: + + scores = {} + for line in lines: + line = line.strip() + if not re.match(r"^\d", line): + continue + match = re.search(r"(\d+)\S+\:\s(\S+)\s+(\d+)", line) + if match: + player = match.group(2).rsplit(".", 1)[0] + score = int(match.group(3)) + scores[player] = score + if int(match.group(1)) == 1: + winner = player + + return RoundStats( + winner=winner, scores=scores, details={"stdout": "\n".join(lines)} + ) + + def execute_round(self, agents: list[Player]) -> RoundData: for agent in agents: # Copy the agent codebase into the game codebase and compile it for cmd in [ @@ -88,20 +92,18 @@ def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: selected_robots = ",".join([f"{agent.name}.MyTank*" for agent in agents]) # Use timestamp for unique battle file name since rounds are managed by tournament battle_file = f"{self.game_id}-battle{int(time.time())}.battle" - with open(battle_file, "w") as f: - f.write( - f"""#Battle Properties + battle_content = f"""#Battle Properties {self._get_battle_config()} robocode.battle.selectedRobots={selected_robots} """ - ) - copy_to_container(self.environment, battle_file, f"battles/{battle_file}") - subprocess.run(f"rm -f {battle_file}", shell=True) + create_file_in_container( + self.environment, content=battle_content, dest_path=f"battles/{battle_file}" + ) # Run battle with results output to file results_file = f"results_{int(time.time())}.txt" cmd = f"{self.run_cmd_round} -battle {battle_file} -results {results_file}" - self.logger.info(f"Running command: {cmd}") + self.logger.info(f"Running game: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response @@ -112,4 +114,4 @@ def execute_round(self, agents: list[Player]) -> dict[str, list[str]]: # Clean up the results file self.environment.execute(f"rm -f {results_file}") - return {OUTPUTS_LOGS: [response["output"]], OUTPUTS_RESULTS: [result_output]} + return RoundData([response["output"]], [result_output]) diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py index 7f174281..56a4febb 100644 --- a/codeclash/games/robotrumble/main.py +++ b/codeclash/games/robotrumble/main.py @@ -3,8 +3,8 @@ from pathlib import Path from codeclash.agents.abstract import Player -from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS, RESULT_TIE -from codeclash.games.abstract import CodeGame +from codeclash.constants import RESULT_TIE +from codeclash.games.abstract import CodeGame, RoundData, RoundStats class RobotRumbleGame(CodeGame): @@ -17,9 +17,7 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): assert len(config["players"]) == 2, "RobotRumble is a two-player game" self.run_cmd_round: str = "./rumblebot run term" - def determine_winner( - self, result_outputs: list[str], agents: list[Player] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: winners = [] for ro in result_outputs: lines = ro.strip().split("\n") @@ -38,17 +36,27 @@ def determine_winner( winners.append(RESULT_TIE) else: winners.append(RESULT_TIE) - print(Counter(winners)) - winner = max(set(winners), key=winners.count) - return {"winner": winner} - def execute_round(self, agents: list[Player]) -> dict[str, str]: + # Count occurrences of each winner + counts = Counter(winners) + + # Find all winners with the maximum count + max_count = max(counts.values()) + top_winners = [w for w, c in counts.items() if c == max_count] + + # If multiple winners have the same count, return RESULT_TIE + final_winner = RESULT_TIE if len(top_winners) > 1 else top_winners[0] + + return RoundStats(winner=final_winner, scores=dict(counts)) + + def execute_round(self, agents: list[Player]) -> RoundData: outputs = [] + args = [f"/{agent.name}/robot.py" for agent in agents] + cmd = f"{self.run_cmd_round} {shlex.join(args)}" + self.logger.info(f"Running game: {cmd}") for _ in range(self.game_config.get("sims_per_round", 100)): - args = [f"/{agent.name}/robot.py" for agent in agents] - cmd = f"{self.run_cmd_round} {shlex.join(args)}" response = self.environment.execute(cmd) assert response["returncode"] == 0, response outputs.append(response["output"]) # For RobotRumble, log_outputs and result_outputs are the same - return {OUTPUTS_LOGS: outputs, OUTPUTS_RESULTS: outputs} + return RoundData(logs=outputs, results=outputs) diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py index 74e54a7b..0c5784be 100644 --- a/codeclash/tournaments/abstract.py +++ b/codeclash/tournaments/abstract.py @@ -4,7 +4,7 @@ from pathlib import Path from codeclash.constants import DIR_LOGS -from codeclash.utils.environment import create_file_on_container +from codeclash.utils.environment import create_file_in_container from codeclash.utils.log import get_logger @@ -12,7 +12,9 @@ class AbstractTournament: def __init__(self, config: dict, *, name: str, **kwargs): self.config: dict = config self.name: str = name - self.tournament_id: str = f"{self.name}{time.strftime('%y%m%d%H%M%S')}" + self.tournament_id: str = ( + f"{self.name}.{config['game']['name']}.{time.strftime('%y%m%d%H%M%S')}" + ) self.local_output_dir: Path = ( DIR_LOGS / getpass.getuser() / self.tournament_id ).resolve() @@ -32,7 +34,7 @@ def _copy_game_log_to_agent( ) -> None: """Copy round log to agent environment.""" try: - create_file_on_container( + create_file_in_container( container=agent.environment, content=log_output, dest_path=dest_path if dest_path else f"logs/round_{round_num}.log", diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp.py similarity index 85% rename from codeclash/tournaments/pvp_training.py rename to codeclash/tournaments/pvp.py index 53a8ef8e..dd670de1 100644 --- a/codeclash/tournaments/pvp_training.py +++ b/codeclash/tournaments/pvp.py @@ -5,19 +5,19 @@ from codeclash.agents import get_agent from codeclash.agents.abstract import Player from codeclash.agents.utils import GameContext -from codeclash.constants import DIR_WORK, OUTPUTS_LOGS, OUTPUTS_RESULTS +from codeclash.constants import DIR_WORK from codeclash.games import get_game -from codeclash.games.abstract import CodeGame +from codeclash.games.abstract import CodeGame, RoundStats from codeclash.tournaments.abstract import AbstractTournament from codeclash.utils.environment import copy_to_container from codeclash.utils.log import get_logger -class PvpTraining(AbstractTournament): +class PvpTournament(AbstractTournament): def __init__( self, config: dict, *, cleanup: bool = False, push_agent: bool = False ): - super().__init__(config, name="PvpTraining") + super().__init__(config, name="PvpTournament") self.cleanup_on_end = cleanup self.push_agent = push_agent self.game: CodeGame = get_game( @@ -29,7 +29,7 @@ def __init__( for agent_conf in self.config["players"]: self.agents.append(self.get_agent(agent_conf, self.config["prompts"])) self.logger = get_logger(self.game.name) - self.scoreboard: list[tuple[int, str]] = [] + self.scoreboard: list[RoundStats] = [] @property def rounds(self) -> int: @@ -66,20 +66,17 @@ def run(self) -> None: def run_training_round(self, round_num: int) -> None: """Execute a single training round.""" # Run the game round and get results - result = self.game.run_round(self.agents) - log_outputs = result[OUTPUTS_LOGS] - result_outputs = result[OUTPUTS_RESULTS] - winner = result["winner"] + record = self.game.run_round(self.agents) # Handle bookkeeping that was previously in the game - self.scoreboard.append((round_num, winner)) - self.logger.info(f"Round {round_num} winner: {winner}") + self.scoreboard.append(record.stats) + self.logger.info(f"Round {round_num}:\n{record.stats}") # Create directory for round logs (self.game.log_local / f"round_{round_num}").mkdir(parents=True, exist_ok=True) # Write log to file - for idx, lo in enumerate(log_outputs): + for idx, lo in enumerate(record.data.logs): round_log_path = ( self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" ) diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player.py similarity index 91% rename from codeclash/tournaments/single_player_training.py rename to codeclash/tournaments/single_player.py index 9d5fbe6f..651f559c 100644 --- a/codeclash/tournaments/single_player_training.py +++ b/codeclash/tournaments/single_player.py @@ -8,9 +8,9 @@ from codeclash.agents.abstract import Player from codeclash.agents.dummy import Dummy from codeclash.agents.utils import GameContext -from codeclash.constants import DIR_WORK, OUTPUTS_LOGS +from codeclash.constants import DIR_WORK from codeclash.games import get_game -from codeclash.games.abstract import CodeGame +from codeclash.games.abstract import CodeGame, RoundStats from codeclash.tournaments.abstract import AbstractTournament from codeclash.tournaments.utils.git_utils import filter_git_diff from codeclash.utils.environment import copy_to_container @@ -29,7 +29,7 @@ def __init__(self, config: dict, cleanup: bool = False): mirror_agent_config = copy.deepcopy(self.config["player"]) mirror_agent_config["name"] = "mirror" self.mirror_agent: Player = self.get_agent(mirror_agent_config, round=0) - self.scoreboard: list[tuple[int, str]] = [] + self.scoreboard: list[RoundStats] = [] @property def rounds(self) -> int: @@ -78,16 +78,14 @@ def run(self): def run_training_round(self, round_num: int) -> None: """Execute a single training round, i.e., run the game, then run the agent.""" # Run the game round and get results - result = self.game.run_round([self.agent, self.mirror_agent]) - log_outputs = result[OUTPUTS_LOGS] - winner = result["winner"] + record = self.game.run_round([self.agent, self.mirror_agent]) # Handle bookkeeping that was previously in the game - self.scoreboard.append((round_num, winner)) - self.logger.info(f"Round {round_num} winner: {winner}") + self.scoreboard.append(record.stats) + self.logger.info(f"Round {round_num}:\n{record.stats}") # Write log to file - for idx, lo in enumerate(log_outputs): + for idx, lo in enumerate(record.logs): round_log_path = ( self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" ) @@ -154,8 +152,8 @@ def evaluate(self, n_repetitions: int = 3): p1.reset_and_apply_patch(p1_patch) p2.reset_and_apply_patch(p2_patch) for i_repetition in range(n_repetitions): - result = self.game.run_round([p1, p2]) - winner = result["winner"] + record = self.game.run_round([p1, p2]) + winner = record.stats.winner self.logger.info( f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}" ) diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py index 19165661..b6b84c81 100644 --- a/codeclash/utils/environment.py +++ b/codeclash/utils/environment.py @@ -117,7 +117,7 @@ def copy_file_from_container( return result -def create_file_on_container( +def create_file_in_container( container: DockerEnvironment, *, content: str, diff --git a/main.py b/main.py index 14245d38..4984870a 100644 --- a/main.py +++ b/main.py @@ -2,13 +2,13 @@ import yaml -from codeclash.tournaments.pvp_training import PvpTraining +from codeclash.tournaments.pvp import PvpTournament def main(config_path: str, *, cleanup: bool = False, push_agent: bool = False): with open(config_path, "r") as f: config = yaml.safe_load(f) - training = PvpTraining(config, cleanup=cleanup, push_agent=push_agent) + training = PvpTournament(config, cleanup=cleanup, push_agent=push_agent) training.run() diff --git a/tests/test_integration.py b/tests/test_integration.py index 52fa4676..feddc06d 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -66,7 +66,7 @@ def wrapper(config, game_context, environment): # Run the main function with cleanup enabled with patch( - "codeclash.tournaments.pvp_training.get_agent", + "codeclash.tournaments.pvp.get_agent", side_effect=mock_get_agent(get_agent), ): # This should complete without raising any exceptions From 94556826255846f84c7832e3128b03d65ca2f3c3 Mon Sep 17 00:00:00 2001 From: John Yang Date: Wed, 27 Aug 2025 20:42:36 +0000 Subject: [PATCH 4/4] Move from black/isort to ruff linting --- .pre-commit-config.yaml | 31 +- codeclash/agents/__init__.py | 4 +- codeclash/agents/abstract.py | 34 +-- codeclash/agents/minisweagent.py | 13 +- codeclash/agents/utils.py | 6 +- codeclash/games/abstract.py | 12 +- codeclash/games/battlecode/main.py | 17 +- codeclash/games/battlesnake/main.py | 12 +- codeclash/games/corewar/main.py | 8 +- codeclash/games/robocode/main.py | 12 +- codeclash/games/robotrumble/main.py | 4 +- codeclash/tournaments/abstract.py | 20 +- codeclash/tournaments/pvp.py | 16 +- codeclash/tournaments/single_player.py | 31 +- codeclash/utils/environment.py | 16 +- codeclash/utils/log.py | 4 +- codeclash/viewer/app.py | 30 +- codeclash/viewer/static/css/style.css | 21 +- codeclash/viewer/static/js/app.js | 403 +++++++++++++------------ main.py | 2 +- main_single_player.py | 2 +- pyproject.toml | 145 +++++++++ tests/test_integration.py | 11 +- 23 files changed, 464 insertions(+), 390 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3e4cc231..8cafb6df 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,9 @@ +ci: + autoupdate_commit_msg: "chore: update pre-commit hooks" + repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -9,14 +12,24 @@ repos: - id: check-merge-conflict - id: debug-statements - - repo: https://github.com/psf/black - rev: 23.3.0 + - repo: https://github.com/crate-ci/typos + rev: v1 + hooks: + - id: typos + files: \.(py|md|rst|yaml|toml) + exclude: pyproject.toml + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.10 hooks: - - id: black - language_version: python3 + # Run the linter. + - id: ruff + args: ["--fix"] + # Run the formatter. + - id: ruff-format - - repo: https://github.com/pycqa/isort - rev: 5.12.0 + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v4.0.0-alpha.8" # Use the sha or tag you want to point at hooks: - - id: isort - args: ["--profile", "black"] + - id: prettier + types_or: ["javascript", "css"] diff --git a/codeclash/agents/__init__.py b/codeclash/agents/__init__.py index 5dd9d0a6..a76b3569 100644 --- a/codeclash/agents/__init__.py +++ b/codeclash/agents/__init__.py @@ -6,9 +6,7 @@ from codeclash.agents.utils import GameContext -def get_agent( - config: dict, game_context: GameContext, environment: DockerEnvironment -) -> Player: +def get_agent(config: dict, game_context: GameContext, environment: DockerEnvironment) -> Player: agents = { "dummy": Dummy, "mini": MiniSWEAgent, diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index 5d30b652..6dff865f 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -24,7 +24,7 @@ def __init__( self.config = config self.name = config["name"] self._player_unique_id = uuid.uuid4() - """Unique ID that doesn't clash even accross multiple games. Used for git tags.""" + """Unique ID that doesn't clash even across multiple games. Used for git tags.""" self.environment = environment self.game_context = game_context self.logger = get_logger( @@ -51,9 +51,7 @@ def post_run_hook(self, *, round: int) -> None: """Should be called after we called the run method.""" self._commit() self._metadata["diff"][round] = self._get_round_diff(round) - self._metadata["incremental_diff"][round] = self._get_round_diff( - round, incremental=True - ) + self._metadata["incremental_diff"][round] = self._get_round_diff(round, incremental=True) @abstractmethod def run(self) -> None: @@ -76,23 +74,15 @@ def push(self) -> None: "git push origin --tags", ]: assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) - self.logger.info( - f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})" - ) + self.logger.info(f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})") - def reset_and_apply_patch( - self, patch: str, *, base_commit: str = "", filter_patch: bool = True - ) -> None: - """Clean all uncommited changes. If base_commit is provided, reset to that commit. + def reset_and_apply_patch(self, patch: str, *, base_commit: str = "", filter_patch: bool = True) -> None: + """Clean all uncommitted changes. If base_commit is provided, reset to that commit. Then apply the patch to the codebase. """ # Need to clean before we copy over the patch (else it's gonna be removed by git clean) self.logger.debug( - assert_zero_exit_code( - self.environment.execute( - f"git reset --hard {base_commit} && git clean -fd" - ) - ) + assert_zero_exit_code(self.environment.execute(f"git reset --hard {base_commit} && git clean -fd")) ) patch = filter_git_diff(patch) if filter_patch else patch @@ -112,9 +102,7 @@ def reset_and_apply_patch( commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"] for cmd in commands: self.logger.debug(f"Executing command: {cmd}") - out = assert_zero_exit_code( - self.environment.execute(cmd), logger=self.logger - ) + out = assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) self.logger.debug(out) # --- Helper methods --- @@ -122,9 +110,7 @@ def reset_and_apply_patch( def _tag_round(self, round: int) -> None: """Git tag the codebase at the given round.""" assert_zero_exit_code( - self.environment.execute( - f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'" - ), + self.environment.execute(f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'"), logger=self.logger, ) @@ -161,9 +147,7 @@ def _get_round_diff(self, round: int, *, incremental: bool = False) -> str: previous_round_tag = self._get_round_tag_name(0) current_round_tag = self._get_round_tag_name(round) out = assert_zero_exit_code( - self.environment.execute( - f"git diff {previous_round_tag}..{current_round_tag}" - ), + self.environment.execute(f"git diff {previous_round_tag}..{current_round_tag}"), logger=self.logger, ) return out["output"] diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py index d1729628..fed2b2b6 100644 --- a/codeclash/agents/minisweagent.py +++ b/codeclash/agents/minisweagent.py @@ -47,9 +47,7 @@ def add_message(self, role: str, content: str, **kwargs): super().add_message(role, content, **kwargs) self.logger.debug(f"[{role}] {content}", extra={"highlighter": None}) if role == "assistant": - self.logger.info( - f"Step taken (step {self.model.n_calls}, cost {self.model.cost:.2f})" - ) + self.logger.info(f"Step taken (step {self.model.n_calls}, cost {self.model.cost:.2f})") def render_template(self, template: str, **kwargs) -> str: cs = ( @@ -69,9 +67,7 @@ def run(self) -> tuple[str, str]: class MiniSWEAgent(Player): """Player with agentic code editing capabilities""" - def __init__( - self, config: dict, environment: DockerEnvironment, game_context: GameContext - ): + def __init__(self, config: dict, environment: DockerEnvironment, game_context: GameContext): super().__init__(config, environment=environment, game_context=game_context) def run(self): @@ -96,10 +92,7 @@ def run(self): result = exc_message print(exc_message) finally: - traj_path = ( - self.game_context.log_local - / f"{self.name}_r{self.game_context.round}.traj.json" - ) + traj_path = self.game_context.log_local / f"{self.name}_r{self.game_context.round}.traj.json" save_traj( self.agent, # type: ignore traj_path, diff --git a/codeclash/agents/utils.py b/codeclash/agents/utils.py index 16ecd13a..13fe3aff 100644 --- a/codeclash/agents/utils.py +++ b/codeclash/agents/utils.py @@ -13,6 +13,7 @@ def resolve_api_key(model: str) -> str: return os.getenv("ANTHROPIC_API_KEY") if "gpt" in model: return os.getenv("OPENAI_API_KEY") + return "" @dataclass @@ -38,10 +39,7 @@ class GameContext: def _render_prompt_templates(self) -> dict: context = asdict(self) - return { - key: Template(template_str).render(**context) - for key, template_str in self.prompts.items() - } + return {key: Template(template_str).render(**context) for key, template_str in self.prompts.items()} def to_template_vars(self) -> dict[str, str]: """Convert the GameContext to a dictionary for rendering prompts in the agent""" diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py index fbbfefdf..82f80a32 100644 --- a/codeclash/games/abstract.py +++ b/codeclash/games/abstract.py @@ -17,9 +17,7 @@ @dataclass class RoundStats: winner: str - scores: dict[ - str, float - ] # Map of player to game metric (e.g. # of wins, assets accumulated) + scores: dict[str, float] # Map of player to game metric (e.g. # of wins, assets accumulated) details: dict[str, Any] = None # Optional, for game-specific info def __str__(self) -> str: @@ -63,9 +61,7 @@ def __init__(self, config: dict, *, tournament_id: str, local_output_dir: Path): self.game_id: str = tournament_id self.log_env: Path = (DIR_WORK / DIR_LOGS / self.game_id).resolve() self.log_local: Path = local_output_dir - self.logger = get_logger( - self.name, log_path=self.log_local / "game.log", emoji="🏓" - ) + self.logger = get_logger(self.name, log_path=self.log_local / "game.log", emoji="🏓") self.environment: DockerEnvironment = self.get_environment() """The running docker environment for executing the game""" self._metadata: dict = { @@ -106,9 +102,7 @@ def build_image(self): if result.returncode == 0: self.logger.info(f"✅ Built Docker image {self.image_name}") else: - self.logger.error( - f"❌ Failed to build Docker image: {result.stderr}\n{result.stdout}{result.stderr}" - ) + self.logger.error(f"❌ Failed to build Docker image: {result.stderr}\n{result.stdout}{result.stderr}") raise RuntimeError(f"Failed to build Docker image: {result.stderr}") def get_metadata(self) -> dict: diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py index f2cf4ca2..743e0788 100644 --- a/codeclash/games/battlecode/main.py +++ b/codeclash/games/battlecode/main.py @@ -12,9 +12,7 @@ class BattleCodeGame(CodeGame): name: str = "BattleCode" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) assert len(config["players"]) == 2, "BattleCode is a two-player game" self.run_cmd_round: str = "python run.py run" for arg, val in self.game_config.get("args", {}).items(): @@ -36,9 +34,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats: winner_key = match.group(1) self.logger.debug(f"Winner key from match: {winner_key}") # Map A/B to actual agent names (much closer to original code) - winner = {"A": agents[0].name, "B": agents[1].name}.get( - winner_key, RESULT_TIE - ) + winner = {"A": agents[0].name, "B": agents[1].name}.get(winner_key, RESULT_TIE) winners.append(winner) else: winners.append(RESULT_TIE) @@ -49,14 +45,9 @@ def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats: def execute_round(self, agents: list[Any]) -> RoundData: for agent in agents: - src, dest = f"/{agent.name}/src/mysubmission/", str( - DIR_WORK / "src" / agent.name - ) + src, dest = f"/{agent.name}/src/mysubmission/", str(DIR_WORK / "src" / agent.name) self.environment.execute(f"cp -r {src} {dest}") - args = [ - f"--p{idx+1}-dir src --p{idx+1} {agent.name}" - for idx, agent in enumerate(agents) - ] + args = [f"--p{idx + 1}-dir src --p{idx + 1} {agent.name}" for idx, agent in enumerate(agents)] cmd = f"{self.run_cmd_round} {' '.join(args)}" self.logger.info(f"Running game: {cmd}") outputs = [] diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py index c0c93c5b..e5cf4e69 100644 --- a/codeclash/games/battlesnake/main.py +++ b/codeclash/games/battlesnake/main.py @@ -14,9 +14,7 @@ class BattleSnakeGame(CodeGame): name: str = "BattleSnake" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) self.run_cmd_round: str = "./battlesnake play" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -29,9 +27,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta winners = [] for ro in result_outputs: lines = ro.strip().split("\n") - last_line = ( - lines[-1] if lines else "" - ) # Get the last line which contains the game result + last_line = lines[-1] if lines else "" # Get the last line which contains the game result winner = json.loads(last_line)["winnerName"] winners.append(winner) @@ -48,9 +44,7 @@ def execute_round(self, agents: list[Player]) -> RoundData: for idx, agent in enumerate(agents): port = 8001 + idx # Start server in background - just add & to run in background! - self.environment.execute( - f"PORT={port} python main.py &", cwd=f"/{agent.name}" - ) + self.environment.execute(f"PORT={port} python main.py &", cwd=f"/{agent.name}") cmd.append(f"--url http://0.0.0.0:{port} -n {agent.name}") time.sleep(3) # Give servers time to start diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py index a082a02c..a1193954 100644 --- a/codeclash/games/corewar/main.py +++ b/codeclash/games/corewar/main.py @@ -10,9 +10,7 @@ class CoreWarGame(CodeGame): name: str = "CoreWar" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) self.run_cmd_round: str = "./src/pmars" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -50,9 +48,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta ) else: self.logger.debug("No scores found, returning unknown") - return RoundStats( - winner="unknown", scores={agent.name: 0 for agent in agents} - ) + return RoundStats(winner="unknown", scores={agent.name: 0 for agent in agents}) def execute_round(self, agents: list[Player]) -> RoundData: args = [f"/{agent.name}/warriors/warrior.red" for agent in agents] diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py index 880bde96..821eccbd 100644 --- a/codeclash/games/robocode/main.py +++ b/codeclash/games/robocode/main.py @@ -11,9 +11,7 @@ class RoboCodeGame(CodeGame): name: str = "RoboCode" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) self.run_cmd_round: str = "./robocode.sh" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -73,9 +71,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta if int(match.group(1)) == 1: winner = player - return RoundStats( - winner=winner, scores=scores, details={"stdout": "\n".join(lines)} - ) + return RoundStats(winner=winner, scores=scores, details={"stdout": "\n".join(lines)}) def execute_round(self, agents: list[Player]) -> RoundData: for agent in agents: @@ -96,9 +92,7 @@ def execute_round(self, agents: list[Player]) -> RoundData: {self._get_battle_config()} robocode.battle.selectedRobots={selected_robots} """ - create_file_in_container( - self.environment, content=battle_content, dest_path=f"battles/{battle_file}" - ) + create_file_in_container(self.environment, content=battle_content, dest_path=f"battles/{battle_file}") # Run battle with results output to file results_file = f"results_{int(time.time())}.txt" diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py index 56a4febb..a70be082 100644 --- a/codeclash/games/robotrumble/main.py +++ b/codeclash/games/robotrumble/main.py @@ -11,9 +11,7 @@ class RobotRumbleGame(CodeGame): name: str = "RobotRumble" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) assert len(config["players"]) == 2, "RobotRumble is a two-player game" self.run_cmd_round: str = "./rumblebot run term" diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py index 0c5784be..f581de6d 100644 --- a/codeclash/tournaments/abstract.py +++ b/codeclash/tournaments/abstract.py @@ -12,26 +12,18 @@ class AbstractTournament: def __init__(self, config: dict, *, name: str, **kwargs): self.config: dict = config self.name: str = name - self.tournament_id: str = ( - f"{self.name}.{config['game']['name']}.{time.strftime('%y%m%d%H%M%S')}" - ) - self.local_output_dir: Path = ( - DIR_LOGS / getpass.getuser() / self.tournament_id - ).resolve() + self.tournament_id: str = f"{self.name}.{config['game']['name']}.{time.strftime('%y%m%d%H%M%S')}" + self.local_output_dir: Path = (DIR_LOGS / getpass.getuser() / self.tournament_id).resolve() self._metadata: dict = { "name": self.name, "tournament_id": self.tournament_id, } - self.logger = get_logger( - self.name, log_path=self.local_output_dir / "tournament.log", emoji="🏆" - ) + self.logger = get_logger(self.name, log_path=self.local_output_dir / "tournament.log", emoji="🏆") def get_metadata(self) -> dict: return self._metadata - def _copy_game_log_to_agent( - self, agent, round_num: int, log_output: str, dest_path: str = None - ) -> None: + def _copy_game_log_to_agent(self, agent, round_num: int, log_output: str, dest_path: str = None) -> None: """Copy round log to agent environment.""" try: create_file_in_container( @@ -40,6 +32,4 @@ def _copy_game_log_to_agent( dest_path=dest_path if dest_path else f"logs/round_{round_num}.log", ) except Exception: - self.logger.error( - f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" - ) + self.logger.error(f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}") diff --git a/codeclash/tournaments/pvp.py b/codeclash/tournaments/pvp.py index dd670de1..562bae48 100644 --- a/codeclash/tournaments/pvp.py +++ b/codeclash/tournaments/pvp.py @@ -14,9 +14,7 @@ class PvpTournament(AbstractTournament): - def __init__( - self, config: dict, *, cleanup: bool = False, push_agent: bool = False - ): + def __init__(self, config: dict, *, cleanup: bool = False, push_agent: bool = False): super().__init__(config, name="PvpTournament") self.cleanup_on_end = cleanup self.push_agent = push_agent @@ -37,9 +35,7 @@ def rounds(self) -> int: def get_agent(self, agent_config: dict, prompts: dict) -> Player: """Create an agent with environment and game context.""" - environment = self.game.get_environment( - f"{self.game.game_id}.{agent_config['name']}" - ) + environment = self.game.get_environment(f"{self.game.game_id}.{agent_config['name']}") game_context = GameContext( id=self.game.game_id, @@ -77,16 +73,12 @@ def run_training_round(self, round_num: int) -> None: # Write log to file for idx, lo in enumerate(record.data.logs): - round_log_path = ( - self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" - ) + round_log_path = self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" round_log_path.write_text(lo) # Copy log to agent environments for agent in self.agents: - self.logger.info( - f"Copying round {round_num} log(s) to {agent.name}'s container..." - ) + self.logger.info(f"Copying round {round_num} log(s) to {agent.name}'s container...") copy_to_container( agent.environment, self.game.log_local / f"round_{round_num}", diff --git a/codeclash/tournaments/single_player.py b/codeclash/tournaments/single_player.py index 651f559c..127309f1 100644 --- a/codeclash/tournaments/single_player.py +++ b/codeclash/tournaments/single_player.py @@ -51,9 +51,7 @@ def get_game_context(self, agent_config: dict, *, round: int) -> GameContext: def get_agent(self, agent_config: dict, round: int) -> Player: """Create an agent with environment and game context.""" - environment = self.game.get_environment( - f"{self.game.game_id}.{agent_config['name']}" - ) + environment = self.game.get_environment(f"{self.game.game_id}.{agent_config['name']}") game_context = self.get_game_context(agent_config, round=round) return get_agent(agent_config, game_context, environment) @@ -86,15 +84,11 @@ def run_training_round(self, round_num: int) -> None: # Write log to file for idx, lo in enumerate(record.logs): - round_log_path = ( - self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" - ) + round_log_path = self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" round_log_path.write_text(lo) # Copy log to main agent environment only - self.logger.info( - f"Copying round {round_num} log(s) to {self.agent.name}'s container..." - ) + self.logger.info(f"Copying round {round_num} log(s) to {self.agent.name}'s container...") copy_to_container( self.agent, self.game.log_local / f"round_{round_num}", @@ -135,28 +129,19 @@ def evaluate(self, n_repetitions: int = 3): p2_config["name"] = "p2" p2 = self.get_dummy_agent() matrix = { - p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} - for p1_round in range(0, self.rounds + 1) + p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} for p1_round in range(0, self.rounds + 1) } for p1_round in range(0, self.rounds + 1): for p2_round in range(0, self.rounds + 1): - self.logger.info( - f"Evaluating agent at round {p1_round} against agent at round {p2_round}" - ) - p1_patch = ( - self.agent.get_metadata()["diff"][p1_round] if p1_round > 0 else "" - ) - p2_patch = ( - self.agent.get_metadata()["diff"][p2_round] if p2_round > 0 else "" - ) + self.logger.info(f"Evaluating agent at round {p1_round} against agent at round {p2_round}") + p1_patch = self.agent.get_metadata()["diff"][p1_round] if p1_round > 0 else "" + p2_patch = self.agent.get_metadata()["diff"][p2_round] if p2_round > 0 else "" p1.reset_and_apply_patch(p1_patch) p2.reset_and_apply_patch(p2_patch) for i_repetition in range(n_repetitions): record = self.game.run_round([p1, p2]) winner = record.stats.winner - self.logger.info( - f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}" - ) + self.logger.info(f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}") matrix[p1_round][p2_round].append(winner) self.logger.info(f"Evaluation matrix: {matrix}") return matrix diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py index b6b84c81..64f8dfb6 100644 --- a/codeclash/utils/environment.py +++ b/codeclash/utils/environment.py @@ -6,9 +6,7 @@ from minisweagent.environments.docker import DockerEnvironment -def assert_zero_exit_code( - result: dict, *, logger: logging.Logger | None = None -) -> dict: +def assert_zero_exit_code(result: dict, *, logger: logging.Logger | None = None) -> dict: if result.get("returncode", 0) != 0: msg = f"Command failed with exit code {result.get('returncode')}:\n{result.get('output')}" if logger is not None: @@ -36,18 +34,14 @@ def copy_between_containers( f"{src_container.container_id}:{src_path}", str(temp_path), ] - result_src = subprocess.run( - cmd_src, check=False, capture_output=True, text=True - ) + result_src = subprocess.run(cmd_src, check=False, capture_output=True, text=True) if result_src.returncode != 0: raise RuntimeError( f"Failed to copy from {src_container.container_id} to local temp: {result_src.stdout}{result_src.stderr}" ) # Ensure destination folder exists - assert_zero_exit_code( - dest_container.execute(f"mkdir -p {Path(dest_path).parent}") - ) + assert_zero_exit_code(dest_container.execute(f"mkdir -p {Path(dest_path).parent}")) # Copy from temporary local directory to destination container cmd_dest = [ @@ -56,9 +50,7 @@ def copy_between_containers( str(temp_path), f"{dest_container.container_id}:{dest_path}", ] - result_dest = subprocess.run( - cmd_dest, check=False, capture_output=True, text=True - ) + result_dest = subprocess.run(cmd_dest, check=False, capture_output=True, text=True) if result_dest.returncode != 0: raise RuntimeError( f"Failed to copy from local temp to {dest_container.container_id}: {result_dest.stdout}{result_dest.stderr}" diff --git a/codeclash/utils/log.py b/codeclash/utils/log.py index 001a0e11..bd5cacca 100644 --- a/codeclash/utils/log.py +++ b/codeclash/utils/log.py @@ -64,9 +64,7 @@ def format(self, record: logging.LogRecord) -> str: return capture.get().rstrip() -def get_logger( - name: str, *, emoji: str = "", log_path: Path | None = None -) -> logging.Logger: +def get_logger(name: str, *, emoji: str = "", log_path: Path | None = None) -> logging.Logger: """Get logger. Use this instead of `logging.getLogger` to ensure that the logger is set up with the correct handlers. """ diff --git a/codeclash/viewer/app.py b/codeclash/viewer/app.py index a8e62920..addfc2aa 100644 --- a/codeclash/viewer/app.py +++ b/codeclash/viewer/app.py @@ -8,7 +8,7 @@ import json from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any from flask import Flask, jsonify, render_template, request @@ -28,7 +28,7 @@ def is_probably_failed_run(log_dir: Path) -> bool: return not metadata_file.exists() -def get_round_count_from_metadata(log_dir: Path) -> Optional[int]: +def get_round_count_from_metadata(log_dir: Path) -> int | None: """Extract round count from metadata.json if it exists""" metadata_file = log_dir / "metadata.json" if not metadata_file.exists(): @@ -45,9 +45,9 @@ def get_round_count_from_metadata(log_dir: Path) -> Optional[int]: class GameMetadata: """Metadata about a game session""" - results: Dict[str, Any] + results: dict[str, Any] main_log: str - rounds: List[Dict[str, Any]] + rounds: list[dict[str, Any]] @dataclass @@ -58,10 +58,10 @@ class TrajectoryInfo: round_num: int api_calls: int cost: float - exit_status: Optional[str] - submission: Optional[str] - memory: Optional[str] - messages: List[Dict[str, Any]] + exit_status: str | None + submission: str | None + memory: str | None + messages: list[dict[str, Any]] class LogParser: @@ -86,9 +86,7 @@ def parse_game_metadata(self) -> GameMetadata: # Parse main.log if it exists main_log_file = self.log_dir / "game.log" - main_log = ( - main_log_file.read_text() if main_log_file.exists() else "No main log found" - ) + main_log = main_log_file.read_text() if main_log_file.exists() else "No main log found" # Parse round logs rounds = [] @@ -99,9 +97,7 @@ def parse_game_metadata(self) -> GameMetadata: return GameMetadata(results=results, main_log=main_log, rounds=rounds) - def parse_trajectory( - self, player_id: int, round_num: int - ) -> Optional[TrajectoryInfo]: + def parse_trajectory(self, player_id: int, round_num: int) -> TrajectoryInfo | None: """Parse a specific trajectory file""" # Try both .json and .log extensions for ext in [".json", ".log"]: @@ -128,7 +124,7 @@ def parse_trajectory( return None - def get_available_trajectories(self) -> List[tuple]: + def get_available_trajectories(self) -> list[tuple]: """Get list of available trajectory files as (player_id, round_num) tuples""" trajectories = [] for traj_file in self.log_dir.glob("p*_r*.traj.*"): @@ -186,9 +182,7 @@ def index(): # Extract just the names for backwards compatibility log_folders = [folder["name"] for folder in log_folders_info] - selected_folder = request.args.get( - "folder", log_folders[0] if log_folders else None - ) + selected_folder = request.args.get("folder", log_folders[0] if log_folders else None) if not selected_folder or not (logs_dir / selected_folder).exists(): return render_template("no_logs.html", log_folders=log_folders) diff --git a/codeclash/viewer/static/css/style.css b/codeclash/viewer/static/css/style.css index 6228241b..315483a0 100644 --- a/codeclash/viewer/static/css/style.css +++ b/codeclash/viewer/static/css/style.css @@ -46,7 +46,8 @@ } body { - font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', sans-serif; + font-family: + -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", sans-serif; line-height: 1.6; color: var(--text-primary); background-color: var(--bg-primary); @@ -159,7 +160,8 @@ body { .metadata-display pre { margin: 0; - font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, monospace; + font-family: + "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, monospace; font-size: 0.875rem; line-height: 1.5; } @@ -226,7 +228,12 @@ details summary { /* Round separator */ .round-separator { height: 2px; - background: linear-gradient(90deg, transparent, var(--border-color), transparent); + background: linear-gradient( + 90deg, + transparent, + var(--border-color), + transparent + ); margin: 2rem 0; border-radius: 1px; } @@ -444,7 +451,8 @@ details summary { .log-content pre { white-space: pre-wrap; word-wrap: break-word; - font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, monospace; + font-family: + "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, monospace; font-size: 0.875rem; line-height: 1.5; margin: 0; @@ -517,7 +525,10 @@ details summary { /* Smooth transitions */ * { - transition: color 0.3s ease, background-color 0.3s ease, border-color 0.3s ease; + transition: + color 0.3s ease, + background-color 0.3s ease, + border-color 0.3s ease; } /* Focus styles for accessibility */ diff --git a/codeclash/viewer/static/js/app.js b/codeclash/viewer/static/js/app.js index a503b542..b65ff030 100644 --- a/codeclash/viewer/static/js/app.js +++ b/codeclash/viewer/static/js/app.js @@ -2,257 +2,274 @@ // Theme management function initializeTheme() { - // Check for saved theme preference or default to 'light' - const savedTheme = localStorage.getItem('theme') || 'light'; - setTheme(savedTheme); + // Check for saved theme preference or default to 'light' + const savedTheme = localStorage.getItem("theme") || "light"; + setTheme(savedTheme); } function setTheme(theme) { - document.documentElement.setAttribute('data-theme', theme); - localStorage.setItem('theme', theme); - - // Update theme toggle button - const themeToggle = document.getElementById('theme-toggle'); - const themeIcon = themeToggle.querySelector('.theme-icon'); - - if (theme === 'dark') { - themeIcon.textContent = '☀️'; - themeToggle.setAttribute('aria-label', 'Switch to light mode'); - } else { - themeIcon.textContent = '🌙'; - themeToggle.setAttribute('aria-label', 'Switch to dark mode'); - } + document.documentElement.setAttribute("data-theme", theme); + localStorage.setItem("theme", theme); + + // Update theme toggle button + const themeToggle = document.getElementById("theme-toggle"); + const themeIcon = themeToggle.querySelector(".theme-icon"); + + if (theme === "dark") { + themeIcon.textContent = "☀️"; + themeToggle.setAttribute("aria-label", "Switch to light mode"); + } else { + themeIcon.textContent = "🌙"; + themeToggle.setAttribute("aria-label", "Switch to dark mode"); + } } function toggleTheme() { - const currentTheme = document.documentElement.getAttribute('data-theme'); - const newTheme = currentTheme === 'dark' ? 'light' : 'dark'; - setTheme(newTheme); + const currentTheme = document.documentElement.getAttribute("data-theme"); + const newTheme = currentTheme === "dark" ? "light" : "dark"; + setTheme(newTheme); } // Folder selection function changeFolder() { - const select = document.getElementById('folder-select'); - const selectedFolder = select.value; - - if (selectedFolder) { - // Reload page with new folder parameter - const url = new URL(window.location); - url.searchParams.set('folder', selectedFolder); - window.location.href = url.toString(); - } + const select = document.getElementById("folder-select"); + const selectedFolder = select.value; + + if (selectedFolder) { + // Reload page with new folder parameter + const url = new URL(window.location); + url.searchParams.set("folder", selectedFolder); + window.location.href = url.toString(); + } } // Enhanced foldout behavior function initializeFoldouts() { - // Add smooth animations to details elements - const detailsElements = document.querySelectorAll('details'); - - detailsElements.forEach(details => { - const summary = details.querySelector('summary'); - - // Add click analytics/feedback - summary.addEventListener('click', function(e) { - // Small delay to allow default behavior - setTimeout(() => { - // Scroll into view if needed - if (details.open) { - const rect = details.getBoundingClientRect(); - const isInViewport = rect.top >= 0 && rect.bottom <= window.innerHeight; - - if (!isInViewport) { - details.scrollIntoView({ - behavior: 'smooth', - block: 'nearest' - }); - } - } - }, 100); - }); + // Add smooth animations to details elements + const detailsElements = document.querySelectorAll("details"); + + detailsElements.forEach((details) => { + const summary = details.querySelector("summary"); + + // Add click analytics/feedback + summary.addEventListener("click", function (e) { + // Small delay to allow default behavior + setTimeout(() => { + // Scroll into view if needed + if (details.open) { + const rect = details.getBoundingClientRect(); + const isInViewport = + rect.top >= 0 && rect.bottom <= window.innerHeight; + + if (!isInViewport) { + details.scrollIntoView({ + behavior: "smooth", + block: "nearest", + }); + } + } + }, 100); }); + }); } // Keyboard shortcuts function initializeKeyboardShortcuts() { - document.addEventListener('keydown', function(e) { - // Ctrl/Cmd + D: Toggle dark mode - if ((e.ctrlKey || e.metaKey) && e.key === 'd') { - e.preventDefault(); - toggleTheme(); - } - - // Escape: Close all open details - if (e.key === 'Escape') { - const openDetails = document.querySelectorAll('details[open]'); - openDetails.forEach(details => { - details.removeAttribute('open'); - }); - } - - // Ctrl/Cmd + E: Expand all details - if ((e.ctrlKey || e.metaKey) && e.key === 'e') { - e.preventDefault(); - const allDetails = document.querySelectorAll('details'); - allDetails.forEach(details => { - details.setAttribute('open', ''); - }); - } + document.addEventListener("keydown", function (e) { + // Ctrl/Cmd + D: Toggle dark mode + if ((e.ctrlKey || e.metaKey) && e.key === "d") { + e.preventDefault(); + toggleTheme(); + } - // Ctrl/Cmd + Shift + E: Collapse all details - if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === 'E') { - e.preventDefault(); - const allDetails = document.querySelectorAll('details'); - allDetails.forEach(details => { - details.removeAttribute('open'); - }); - } + // Escape: Close all open details + if (e.key === "Escape") { + const openDetails = document.querySelectorAll("details[open]"); + openDetails.forEach((details) => { + details.removeAttribute("open"); + }); + } + // Ctrl/Cmd + E: Expand all details + if ((e.ctrlKey || e.metaKey) && e.key === "e") { + e.preventDefault(); + const allDetails = document.querySelectorAll("details"); + allDetails.forEach((details) => { + details.setAttribute("open", ""); + }); + } - }); + // Ctrl/Cmd + Shift + E: Collapse all details + if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === "E") { + e.preventDefault(); + const allDetails = document.querySelectorAll("details"); + allDetails.forEach((details) => { + details.removeAttribute("open"); + }); + } + }); } - - // Code highlighting (basic syntax highlighting) function initializeCodeHighlighting() { - const codeBlocks = document.querySelectorAll('.code-block code, .message-text pre'); + const codeBlocks = document.querySelectorAll( + ".code-block code, .message-text pre", + ); - codeBlocks.forEach(block => { - const text = block.textContent; + codeBlocks.forEach((block) => { + const text = block.textContent; - // Simple bash highlighting - if (text.includes('#!/bin/bash') || text.includes('```bash')) { - block.classList.add('language-bash'); - highlightBash(block); - } + // Simple bash highlighting + if (text.includes("#!/bin/bash") || text.includes("```bash")) { + block.classList.add("language-bash"); + highlightBash(block); + } - // Simple Python highlighting - if (text.includes('def ') || text.includes('import ') || text.includes('python')) { - block.classList.add('language-python'); - highlightPython(block); - } - }); + // Simple Python highlighting + if ( + text.includes("def ") || + text.includes("import ") || + text.includes("python") + ) { + block.classList.add("language-python"); + highlightPython(block); + } + }); } function highlightBash(block) { - let html = block.innerHTML; + let html = block.innerHTML; - // Commands - html = html.replace(/\b(ls|cd|cat|grep|sed|awk|find|mkdir|rm|cp|mv|chmod|echo|export)\b/g, - '$1'); + // Commands + html = html.replace( + /\b(ls|cd|cat|grep|sed|awk|find|mkdir|rm|cp|mv|chmod|echo|export)\b/g, + '$1', + ); - // Flags - html = html.replace(/\s(-[a-zA-Z]+)/g, - ' $1'); + // Flags + html = html.replace( + /\s(-[a-zA-Z]+)/g, + ' $1', + ); - block.innerHTML = html; + block.innerHTML = html; } function highlightPython(block) { - let html = block.innerHTML; + let html = block.innerHTML; - // Keywords - html = html.replace(/\b(def|class|import|from|if|else|elif|for|while|try|except|finally|return|yield|with|as|pass|break|continue|lambda|global|nonlocal)\b/g, - '$1'); + // Keywords + html = html.replace( + /\b(def|class|import|from|if|else|elif|for|while|try|except|finally|return|yield|with|as|pass|break|continue|lambda|global|nonlocal)\b/g, + '$1', + ); - // Strings - html = html.replace(/(["'])((?:\\.|(?!\1)[^\\])*?)\1/g, - '$1$2$1'); + // Strings + html = html.replace( + /(["'])((?:\\.|(?!\1)[^\\])*?)\1/g, + '$1$2$1', + ); - block.innerHTML = html; + block.innerHTML = html; } // Performance monitoring function initializePerformanceMonitoring() { - // Log page load time - window.addEventListener('load', function() { - const loadTime = performance.now(); - console.log(`Page loaded in ${loadTime.toFixed(2)}ms`); - - // Count elements for performance insight - const messageCount = document.querySelectorAll('.message-block').length; - const foldoutCount = document.querySelectorAll('details').length; - - console.log(`Rendered ${messageCount} messages and ${foldoutCount} foldouts`); - }); + // Log page load time + window.addEventListener("load", function () { + const loadTime = performance.now(); + console.log(`Page loaded in ${loadTime.toFixed(2)}ms`); + + // Count elements for performance insight + const messageCount = document.querySelectorAll(".message-block").length; + const foldoutCount = document.querySelectorAll("details").length; + + console.log( + `Rendered ${messageCount} messages and ${foldoutCount} foldouts`, + ); + }); } // Message expand/collapse functionality function expandMessage(clickedElement) { - const messageContent = clickedElement.closest('.message-content'); - const previewShort = messageContent.querySelector('.message-preview-short'); - const contentFull = messageContent.querySelector('.message-content-full'); - const contentExpanded = messageContent.querySelector('.message-content-expanded'); - - // Expanding - hide preview, show full content - if (previewShort) previewShort.style.display = 'none'; - if (contentFull) contentFull.style.display = 'block'; - if (contentExpanded) contentExpanded.style.display = 'block'; - - // Smooth scroll to keep the content in view - setTimeout(() => { - messageContent.scrollIntoView({ - behavior: 'smooth', - block: 'nearest' - }); - }, 100); + const messageContent = clickedElement.closest(".message-content"); + const previewShort = messageContent.querySelector(".message-preview-short"); + const contentFull = messageContent.querySelector(".message-content-full"); + const contentExpanded = messageContent.querySelector( + ".message-content-expanded", + ); + + // Expanding - hide preview, show full content + if (previewShort) previewShort.style.display = "none"; + if (contentFull) contentFull.style.display = "block"; + if (contentExpanded) contentExpanded.style.display = "block"; + + // Smooth scroll to keep the content in view + setTimeout(() => { + messageContent.scrollIntoView({ + behavior: "smooth", + block: "nearest", + }); + }, 100); } function collapseMessage(clickedElement) { - const messageContent = clickedElement.closest('.message-content'); - const previewShort = messageContent.querySelector('.message-preview-short'); - const contentFull = messageContent.querySelector('.message-content-full'); - const contentExpanded = messageContent.querySelector('.message-content-expanded'); + const messageContent = clickedElement.closest(".message-content"); + const previewShort = messageContent.querySelector(".message-preview-short"); + const contentFull = messageContent.querySelector(".message-content-full"); + const contentExpanded = messageContent.querySelector( + ".message-content-expanded", + ); + + // Collapsing - show preview, hide full content + if (contentFull) contentFull.style.display = "none"; + if (contentExpanded) contentExpanded.style.display = "none"; + if (previewShort) previewShort.style.display = "block"; + + // Smooth scroll to keep the content in view + setTimeout(() => { + messageContent.scrollIntoView({ + behavior: "smooth", + block: "nearest", + }); + }, 100); +} - // Collapsing - show preview, hide full content - if (contentFull) contentFull.style.display = 'none'; - if (contentExpanded) contentExpanded.style.display = 'none'; - if (previewShort) previewShort.style.display = 'block'; +function collapseTrajectoryMessages(clickedElement) { + // Find the parent trajectory messages foldout + const trajectoryFoldout = clickedElement.closest( + ".trajectory-messages-foldout", + ); + + if (trajectoryFoldout) { + // Close the details element + trajectoryFoldout.removeAttribute("open"); - // Smooth scroll to keep the content in view + // Smooth scroll to the trajectory header setTimeout(() => { - messageContent.scrollIntoView({ - behavior: 'smooth', - block: 'nearest' + const trajectoryHeader = trajectoryFoldout.closest(".trajectory-header"); + if (trajectoryHeader) { + trajectoryHeader.scrollIntoView({ + behavior: "smooth", + block: "nearest", }); + } }, 100); + } } -function collapseTrajectoryMessages(clickedElement) { - // Find the parent trajectory messages foldout - const trajectoryFoldout = clickedElement.closest('.trajectory-messages-foldout'); - - if (trajectoryFoldout) { - // Close the details element - trajectoryFoldout.removeAttribute('open'); - - // Smooth scroll to the trajectory header - setTimeout(() => { - const trajectoryHeader = trajectoryFoldout.closest('.trajectory-header'); - if (trajectoryHeader) { - trajectoryHeader.scrollIntoView({ - behavior: 'smooth', - block: 'nearest' - }); - } - }, 100); - } -} - - - // Initialize everything when DOM is loaded -document.addEventListener('DOMContentLoaded', function() { - initializeTheme(); - initializeFoldouts(); - initializeKeyboardShortcuts(); - initializeCodeHighlighting(); - initializePerformanceMonitoring(); - - console.log('CodeClash Trajectory Viewer initialized'); - console.log('Keyboard shortcuts:'); - console.log(' Ctrl/Cmd + D: Toggle dark mode'); - console.log(' Ctrl/Cmd + E: Expand all sections'); - console.log(' Ctrl/Cmd + Shift + E: Collapse all sections'); - console.log(' Escape: Close all sections'); +document.addEventListener("DOMContentLoaded", function () { + initializeTheme(); + initializeFoldouts(); + initializeKeyboardShortcuts(); + initializeCodeHighlighting(); + initializePerformanceMonitoring(); + + console.log("CodeClash Trajectory Viewer initialized"); + console.log("Keyboard shortcuts:"); + console.log(" Ctrl/Cmd + D: Toggle dark mode"); + console.log(" Ctrl/Cmd + E: Expand all sections"); + console.log(" Ctrl/Cmd + Shift + E: Collapse all sections"); + console.log(" Escape: Close all sections"); }); diff --git a/main.py b/main.py index 4984870a..9d70146d 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ def main(config_path: str, *, cleanup: bool = False, push_agent: bool = False): - with open(config_path, "r") as f: + with open(config_path) as f: config = yaml.safe_load(f) training = PvpTournament(config, cleanup=cleanup, push_agent=push_agent) training.run() diff --git a/main_single_player.py b/main_single_player.py index 6c7ff720..d02f3a41 100644 --- a/main_single_player.py +++ b/main_single_player.py @@ -6,7 +6,7 @@ def main(config_path: str, cleanup: bool = False): - with open(config_path, "r") as f: + with open(config_path) as f: config = yaml.safe_load(f) training = SinglePlayerTraining(config, cleanup) training.run() diff --git a/pyproject.toml b/pyproject.toml index 8bc097e8..66848c3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,152 @@ dev = [ "pytest", "pytest-cov", "pytest-xdist", + "ruff", ] [tool.setuptools.packages.find] include = ["codeclash*"] + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + # ---- project specific ---- + "tests/test_data", + # Exclude commands so they don't get the __future__ imports + "config/commands", +] + +line-length = 120 +indent-width = 4 + +target-version = "py310" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +# I001: Isort, I002: required import +select = [ + # Error (E) + "E", + # Error (PLE) + "PLE", + # pycodestyle + "E713", # not in + "E714", # is not + "E711", # comparison with None + # pyflakes + "F821", + "F822", + "F401", # unused-import + "F841", # unused var + "F541", # f-string without args + "F901", # raise NotImplemented should be raise NotImplementedError + # isort + "I001", # isort + "I002", # required import + # pyupgrade and related + "UP", # pyupgrade + "C401", # flake8-comprehensions: unnecessary-generator-set + "C402", # flake8-comprehensions: unnecessary-generator-dict + "C403", # flake8-comprehensions: unnecessary-list-comprehension-set + "C404", # flake8-comprehensions: unnecessary-list-comprehension-dict + "C405", # flake8-comprehensions: unnecessary-literal-set + "F632", # pyflakes: is-literal + "W605", # pycodestyle: invalid-escape-sequence + # bugbear + "B006", # mutable default + "B007", # unused loop var + "B009", # getattr with constant + # flake8-errmsg + "EM", + # flake8-return + "RET", + # RUF + "RUF019", # unneded key in dict check + # pytest + "PT", + # flake8-simplify (SIM) + "SIM201", + # flake8-use-pathlib + "PTH100", + "PTH110", + "PTH111", + "PTH112", + "PTH113", + "PTH114", + "PTH117", + "PTH118", + "PTH119", + "PTH120", + "PTH121", + "PTH122", + "PTH202", + "PTH203", + "PTH204", + "PTH205", +] +ignore = [ + # flake8-return + "RET505", # can't autofix + "RET506", # can't autofix + "RET507", # can't autofix + # error (E) + "E501", # line too long + "E402", # import not on top of file + "E722", # bare except + "E741", # ambiguous symbol + # pytest + "PT011", + "PT018", + # flake8-errmsg + "EM101", # exception must not use a string literal + "EM102", # exception must not use an f-string literal + "EM103", # exception must not use a .format(...) string directly +] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" diff --git a/tests/test_integration.py b/tests/test_integration.py index feddc06d..f6ff8d17 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -5,11 +5,10 @@ using DeterministicModel instead of real LLM models. """ -import os import tempfile +from pathlib import Path from unittest.mock import patch -import pytest import yaml from minisweagent.models.test_models import DeterministicModel @@ -25,12 +24,12 @@ def test_main_battlesnake_integration(): config_path = "configs/battlesnake.yaml" # Read the original config - with open(config_path, "r") as f: + with open(config_path) as f: config = yaml.safe_load(f) # Create a temporary directory for test artifacts with tempfile.TemporaryDirectory() as temp_dir: - temp_config_path = os.path.join(temp_dir, "test_battlesnake.yaml") + temp_config_path = Path(temp_dir) / "test_battlesnake.yaml" # Reduce rounds to 1 for faster testing config["tournament"]["rounds"] = 1 @@ -51,9 +50,7 @@ def wrapper(config, game_context, environment): print(f"Replacing model for agent {agent.name}") # Create DeterministicModel with the specified command deterministic_model = DeterministicModel( - outputs=[ - "```bash\necho 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\n```" - ] + outputs=["```bash\necho 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\n```"] ) agent.agent.model = deterministic_model