diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3e4cc231..8cafb6df 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,9 @@ +ci: + autoupdate_commit_msg: "chore: update pre-commit hooks" + repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -9,14 +12,24 @@ repos: - id: check-merge-conflict - id: debug-statements - - repo: https://github.com/psf/black - rev: 23.3.0 + - repo: https://github.com/crate-ci/typos + rev: v1 + hooks: + - id: typos + files: \.(py|md|rst|yaml|toml) + exclude: pyproject.toml + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.10 hooks: - - id: black - language_version: python3 + # Run the linter. + - id: ruff + args: ["--fix"] + # Run the formatter. + - id: ruff-format - - repo: https://github.com/pycqa/isort - rev: 5.12.0 + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v4.0.0-alpha.8" # Use the sha or tag you want to point at hooks: - - id: isort - args: ["--profile", "black"] + - id: prettier + types_or: ["javascript", "css"] diff --git a/codeclash/agents/__init__.py b/codeclash/agents/__init__.py index 5dd9d0a6..a76b3569 100644 --- a/codeclash/agents/__init__.py +++ b/codeclash/agents/__init__.py @@ -6,9 +6,7 @@ from codeclash.agents.utils import GameContext -def get_agent( - config: dict, game_context: GameContext, environment: DockerEnvironment -) -> Player: +def get_agent(config: dict, game_context: GameContext, environment: DockerEnvironment) -> Player: agents = { "dummy": Dummy, "mini": MiniSWEAgent, diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py index 9e385d00..fdf92bc8 100644 --- a/codeclash/agents/abstract.py +++ b/codeclash/agents/abstract.py @@ -9,7 +9,7 @@ from codeclash.agents.utils import GameContext from codeclash.constants import GH_ORG from codeclash.tournaments.utils.git_utils import filter_git_diff -from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container +from codeclash.utils.environment import assert_zero_exit_code, create_file_in_container from codeclash.utils.log import get_logger load_dotenv() @@ -25,7 +25,7 @@ def __init__( self.config = config self.name = config["name"] self._player_unique_id = uuid.uuid4() - """Unique ID that doesn't clash even accross multiple games. Used for git tags.""" + """Unique ID that doesn't clash even across multiple games. Used for git tags.""" self.environment = environment self.game_context = game_context self.logger = get_logger( @@ -54,9 +54,7 @@ def post_run_hook(self, *, round: int) -> None: """Should be called after we called the run method.""" self._commit() self._metadata["diff"][round] = self._get_round_diff(round) - self._metadata["incremental_diff"][round] = self._get_round_diff( - round, incremental=True - ) + self._metadata["incremental_diff"][round] = self._get_round_diff(round, incremental=True) @abstractmethod def run(self) -> None: @@ -79,23 +77,15 @@ def push(self) -> None: "git push origin --tags", ]: assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) - self.logger.info( - f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})" - ) + self.logger.info(f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})") - def reset_and_apply_patch( - self, patch: str, *, base_commit: str = "", filter_patch: bool = True - ) -> None: - """Clean all uncommited changes. If base_commit is provided, reset to that commit. + def reset_and_apply_patch(self, patch: str, *, base_commit: str = "", filter_patch: bool = True) -> None: + """Clean all uncommitted changes. If base_commit is provided, reset to that commit. Then apply the patch to the codebase. """ # Need to clean before we copy over the patch (else it's gonna be removed by git clean) self.logger.debug( - assert_zero_exit_code( - self.environment.execute( - f"git reset --hard {base_commit} && git clean -fd" - ) - ) + assert_zero_exit_code(self.environment.execute(f"git reset --hard {base_commit} && git clean -fd")) ) patch = filter_git_diff(patch) if filter_patch else patch @@ -104,7 +94,7 @@ def reset_and_apply_patch( self.logger.debug("No patch to apply, skipping") return - create_file_on_container( + create_file_in_container( container=self.environment, # type: ignore content=patch, dest_path="tmp_patch.txt", @@ -115,9 +105,7 @@ def reset_and_apply_patch( commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"] for cmd in commands: self.logger.debug(f"Executing command: {cmd}") - out = assert_zero_exit_code( - self.environment.execute(cmd), logger=self.logger - ) + out = assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger) self.logger.debug(out) # --- Helper methods --- @@ -125,9 +113,7 @@ def reset_and_apply_patch( def _tag_round(self, round: int) -> None: """Git tag the codebase at the given round.""" assert_zero_exit_code( - self.environment.execute( - f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'" - ), + self.environment.execute(f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'"), logger=self.logger, ) @@ -164,9 +150,7 @@ def _get_round_diff(self, round: int, *, incremental: bool = False) -> str: previous_round_tag = self._get_round_tag_name(0) current_round_tag = self._get_round_tag_name(round) out = assert_zero_exit_code( - self.environment.execute( - f"git diff {previous_round_tag}..{current_round_tag}" - ), + self.environment.execute(f"git diff {previous_round_tag}..{current_round_tag}"), logger=self.logger, ) return out["output"] diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py index f3fec5d4..fed2b2b6 100644 --- a/codeclash/agents/minisweagent.py +++ b/codeclash/agents/minisweagent.py @@ -17,7 +17,7 @@ from codeclash.agents.abstract import Player from codeclash.agents.utils import GameContext, resolve_api_key -from codeclash.utils.environment import copy_file_to_container +from codeclash.utils.environment import copy_to_container class ClashAgent(DefaultAgent): @@ -47,9 +47,7 @@ def add_message(self, role: str, content: str, **kwargs): super().add_message(role, content, **kwargs) self.logger.debug(f"[{role}] {content}", extra={"highlighter": None}) if role == "assistant": - self.logger.info( - f"Step taken (step {self.model.n_calls}, cost {self.model.cost:.2f})" - ) + self.logger.info(f"Step taken (step {self.model.n_calls}, cost {self.model.cost:.2f})") def render_template(self, template: str, **kwargs) -> str: cs = ( @@ -69,9 +67,7 @@ def run(self) -> tuple[str, str]: class MiniSWEAgent(Player): """Player with agentic code editing capabilities""" - def __init__( - self, config: dict, environment: DockerEnvironment, game_context: GameContext - ): + def __init__(self, config: dict, environment: DockerEnvironment, game_context: GameContext): super().__init__(config, environment=environment, game_context=game_context) def run(self): @@ -96,10 +92,7 @@ def run(self): result = exc_message print(exc_message) finally: - traj_path = ( - self.game_context.log_local - / f"{self.name}_r{self.game_context.round}.traj.json" - ) + traj_path = self.game_context.log_local / f"{self.name}_r{self.game_context.round}.traj.json" save_traj( self.agent, # type: ignore traj_path, @@ -107,7 +100,7 @@ def run(self): result=result, print_fct=self.logger.debug, ) - copy_file_to_container( + copy_to_container( self.environment, traj_path, self.game_context.log_env / traj_path.name, diff --git a/codeclash/agents/utils.py b/codeclash/agents/utils.py index 16ecd13a..13fe3aff 100644 --- a/codeclash/agents/utils.py +++ b/codeclash/agents/utils.py @@ -13,6 +13,7 @@ def resolve_api_key(model: str) -> str: return os.getenv("ANTHROPIC_API_KEY") if "gpt" in model: return os.getenv("OPENAI_API_KEY") + return "" @dataclass @@ -38,10 +39,7 @@ class GameContext: def _render_prompt_templates(self) -> dict: context = asdict(self) - return { - key: Template(template_str).render(**context) - for key, template_str in self.prompts.items() - } + return {key: Template(template_str).render(**context) for key, template_str in self.prompts.items()} def to_template_vars(self) -> dict[str, str]: """Convert the GameContext to a dictionary for rendering prompts in the agent""" diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py index 9cc1a870..82d54696 100644 --- a/codeclash/games/abstract.py +++ b/codeclash/games/abstract.py @@ -2,7 +2,9 @@ import subprocess import time from abc import ABC, abstractmethod +from dataclasses import dataclass from pathlib import Path +from typing import Any from minisweagent.environments.docker import DockerEnvironment @@ -12,6 +14,28 @@ from codeclash.utils.log import get_logger +@dataclass +class RoundStats: + winner: str + scores: dict[str, float] # Map of player to game metric (e.g. # of wins, assets accumulated) + details: dict[str, Any] = None # Optional, for game-specific info + + def __str__(self) -> str: + return "\n".join([f"- Winner: {self.winner}", f"- Scores: {self.scores}"]) + + +@dataclass +class RoundData: + logs: list[str] + results: list[str] + + +@dataclass +class RoundRecord: + data: RoundData + stats: RoundStats + + class CodeGame(ABC): name: str @@ -41,9 +65,7 @@ def __init__(self, config: dict, *, tournament_id: str, local_output_dir: Path): } self.log_env: Path = (DIR_WORK / DIR_LOGS / self.game_id).resolve() self.log_local: Path = local_output_dir - self.logger = get_logger( - self.name, log_path=self.log_local / "game.log", emoji="🏓" - ) + self.logger = get_logger(self.name, log_path=self.log_local / "game.log", emoji="🏓") self.environment: DockerEnvironment = self.get_environment() """The running docker environment for executing the game""" @@ -87,9 +109,7 @@ def build_image(self): if result.returncode == 0: self.logger.info(f"✅ Built Docker image {self.image_name}") else: - self.logger.error( - f"❌ Failed to build Docker image: {result.stderr}\n{result.stdout}{result.stderr}" - ) + self.logger.error(f"❌ Failed to build Docker image: {result.stderr}\n{result.stdout}{result.stderr}") raise RuntimeError(f"Failed to build Docker image: {result.stderr}") def get_metadata(self) -> dict: @@ -146,32 +166,30 @@ def _pre_round_setup(self, agents: list[Player]): ) @abstractmethod - def determine_winner( - self, result_output: str, agents: list[Player] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: """Determine the winner of the game based on the result output. Args: - result_output: The specific output containing winning information + result_outputs: The specific output(s) containing winning information agents: List of agents participating in the round Returns: - Dictionary with key "winner" containing the winner's name + RoundStats object """ pass @abstractmethod - def execute_round(self, agents: list[Player]) -> dict[str, str]: + def execute_round(self, agents: list[Player]) -> RoundData: """Subclasses implement their game-specific logic here. This is the low level implementation, you probably want to use run_round instead, which includes the pre-round setup, post-round setup, and winner determination. Returns: - Dictionary with keys "log_output" and "result_output" + RoundData object """ pass - def run_round(self, agents: list[Player]) -> dict[str, str]: + def run_round(self, agents: list[Player]) -> RoundRecord: """ Run a single round of the game with the given agents. @@ -179,15 +197,6 @@ def run_round(self, agents: list[Player]) -> dict[str, str]: handled by the tournament class. """ self._pre_round_setup(agents) - result = self.execute_round(agents) - log_output = result["log_output"] - result_output = result["result_output"] - - winner_result = self.determine_winner(result_output, agents) - winner_name = winner_result["winner"] - - return { - "log_output": log_output, - "result_output": result_output, - "winner": winner_name, - } + data = self.execute_round(agents) + stats = self.get_stats(data.results, agents) + return RoundRecord(data=data, stats=stats) diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py index 43c26295..743e0788 100644 --- a/codeclash/games/battlecode/main.py +++ b/codeclash/games/battlecode/main.py @@ -1,19 +1,18 @@ import re -import shlex from pathlib import Path from typing import Any +from tqdm.auto import tqdm + from codeclash.constants import DIR_WORK, RESULT_TIE -from codeclash.games.abstract import CodeGame +from codeclash.games.abstract import CodeGame, RoundData, RoundStats class BattleCodeGame(CodeGame): name: str = "BattleCode" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) assert len(config["players"]) == 2, "BattleCode is a two-player game" self.run_cmd_round: str = "python run.py run" for arg, val in self.game_config.get("args", {}).items(): @@ -23,40 +22,38 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner(self, result_output: str, agents: list[Any]) -> dict[str, str]: - self.logger.debug(f"Determining winner from result output: {result_output}") - lines = result_output.strip().split("\n") - # Get the third-to-last line which contains the winner info - winner_line = lines[-3] if len(lines) >= 3 else "" - self.logger.debug(f"Winner line: {winner_line}") - match = re.search(r"\s\((.*)\)\swins\s\(", winner_line) - if match: - winner_key = match.group(1) - self.logger.debug(f"Winner key from match: {winner_key}") - # Map A/B to actual agent names (much closer to original code) - winner = {"A": agents[0].name, "B": agents[1].name}.get( - winner_key, RESULT_TIE - ) - self.logger.debug(f"Concluding winner: {winner}") - return {"winner": winner} - else: - self.logger.debug("No winner match found, returning tie") - return {"winner": RESULT_TIE} + def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats: + winners = [] + for ro in result_outputs: + lines = ro.strip().split("\n") + # Get the third-to-last line which contains the winner info + winner_line = lines[-3] if len(lines) >= 3 else "" + self.logger.debug(f"Winner line: {winner_line}") + match = re.search(r"\s\((.*)\)\swins\s\(", winner_line) + if match: + winner_key = match.group(1) + self.logger.debug(f"Winner key from match: {winner_key}") + # Map A/B to actual agent names (much closer to original code) + winner = {"A": agents[0].name, "B": agents[1].name}.get(winner_key, RESULT_TIE) + winners.append(winner) + else: + winners.append(RESULT_TIE) + return RoundStats( + winner=max(set(winners), key=winners.count), + scores={agent.name: winners.count(agent.name) for agent in agents}, + ) - def execute_round(self, agents: list[Any]) -> dict[str, str]: + def execute_round(self, agents: list[Any]) -> RoundData: for agent in agents: - src, dest = f"/{agent.name}/src/mysubmission/", str( - DIR_WORK / "src" / agent.name - ) + src, dest = f"/{agent.name}/src/mysubmission/", str(DIR_WORK / "src" / agent.name) self.environment.execute(f"cp -r {src} {dest}") - args = [ - f"--p{idx+1}-dir src --p{idx+1} {agent.name}" - for idx, agent in enumerate(agents) - ] - cmd = f"{self.run_cmd_round} {shlex.join(args)}" - self.logger.info(f"Running command: {cmd}") - response = self.environment.execute(cmd) - assert response["returncode"] == 0, response - # For BattleCode, log_output and result_output are the same - output = response["output"] - return {"log_output": output, "result_output": output} + args = [f"--p{idx + 1}-dir src --p{idx + 1} {agent.name}" for idx, agent in enumerate(agents)] + cmd = f"{self.run_cmd_round} {' '.join(args)}" + self.logger.info(f"Running game: {cmd}") + outputs = [] + for _ in tqdm(range(self.game_config["sims_per_round"])): + response = self.environment.execute(cmd) + assert response["returncode"] == 0, response + # For BattleCode, log_outputs and result_outputs are the same + outputs.append(response["output"]) + return RoundData(logs=outputs, results=outputs) diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py index d3a8187e..e5cf4e69 100644 --- a/codeclash/games/battlesnake/main.py +++ b/codeclash/games/battlesnake/main.py @@ -2,8 +2,11 @@ import time from pathlib import Path +from tqdm.auto import tqdm + from codeclash.agents.abstract import Player -from codeclash.games.abstract import CodeGame +from codeclash.constants import RESULT_TIE +from codeclash.games.abstract import CodeGame, RoundData, RoundStats from codeclash.utils.environment import assert_zero_exit_code @@ -11,9 +14,7 @@ class BattleSnakeGame(CodeGame): name: str = "BattleSnake" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) self.run_cmd_round: str = "./battlesnake play" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -22,51 +23,59 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" --{arg} {val}" - def determine_winner( - self, result_output: str, agents: list[Player] - ) -> dict[str, str]: - self.logger.debug(f"Determining winner from result output: {result_output}") - lines = result_output.strip().split("\n") - # Get the last line which contains the game result - last_line = lines[-1] if lines else "" - self.logger.debug(f"Last line: {last_line}") - winner = json.loads(last_line)["winnerName"] - self.logger.debug(f"Concluding winner: {winner}") - return {"winner": winner} + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: + winners = [] + for ro in result_outputs: + lines = ro.strip().split("\n") + last_line = lines[-1] if lines else "" # Get the last line which contains the game result + winner = json.loads(last_line)["winnerName"] + winners.append(winner) + + win_counts = {agent.name: winners.count(agent.name) for agent in agents} + max_wins = max(win_counts.values()) + winners = [name for name, wins in win_counts.items() if wins == max_wins] + return RoundStats( + winner=RESULT_TIE if len(winners) > 1 else winners[0], + scores=win_counts, + ) - def execute_round(self, agents: list[Player]) -> dict[str, str]: + def execute_round(self, agents: list[Player]) -> RoundData: cmd = [] for idx, agent in enumerate(agents): port = 8001 + idx # Start server in background - just add & to run in background! - self.environment.execute( - f"PORT={port} python main.py &", cwd=f"/{agent.name}" - ) + self.environment.execute(f"PORT={port} python main.py &", cwd=f"/{agent.name}") cmd.append(f"--url http://0.0.0.0:{port} -n {agent.name}") time.sleep(3) # Give servers time to start - # Create temporary output file for results - output_file = f"battlesnake_output_{int(time.time())}.json" - cmd_str = " ".join(cmd) + f" -o {output_file}" - self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}") - try: - response = assert_zero_exit_code( - self.environment.execute( - f"{self.run_cmd_round} {cmd_str}", - cwd=f"{self.environment.config.cwd}/game", + log_outputs, result_outputs = [], [] + cmd = self.run_cmd_round + " " + " ".join(cmd) + self.logger.info(f"Running game: {cmd}") + for idx in tqdm(range(self.game_config["sims_per_round"])): + # Create temporary output file for results + output_file = f"battlesnake_output_{idx}_{int(time.time())}.json" + + # Run game + response = assert_zero_exit_code( + self.environment.execute( + cmd + f" -o {output_file}", + cwd=f"{self.environment.config.cwd}/game", + ) ) - ) - # Read the output file for result information - result_response = self.environment.execute(f"cat game/{output_file}") - result_output = result_response["output"] + # Read the output file for result information + result_response = self.environment.execute(f"cat game/{output_file}") + result_output = result_response["output"] + log_outputs.append(response["output"]) + result_outputs.append(result_output) - # Clean up the output file - self.environment.execute(f"rm -f game/{output_file}") + # Clean up the output file + self.environment.execute(f"rm -f game/{output_file}") + time.sleep(0.05) - return {"log_output": response["output"], "result_output": result_output} + return RoundData(log_outputs, result_outputs) finally: # Kill all python servers when done self.environment.execute("pkill -f 'python main.py' || true") diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py index 7e6e5617..a1193954 100644 --- a/codeclash/games/corewar/main.py +++ b/codeclash/games/corewar/main.py @@ -3,16 +3,14 @@ from pathlib import Path from codeclash.agents.abstract import Player -from codeclash.games.abstract import CodeGame +from codeclash.games.abstract import CodeGame, RoundData, RoundStats class CoreWarGame(CodeGame): name: str = "CoreWar" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) self.run_cmd_round: str = "./src/pmars" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -21,42 +19,41 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): else: self.run_cmd_round += f" -{arg} {val}" - def determine_winner( - self, result_output: str, agents: list[Player] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: + result_output = result_outputs[0] # Get the first (and only) element self.logger.debug(f"Determining winner from result output: {result_output}") scores = [] n = len(agents) * 2 lines = result_output.strip().split("\n") + # Get the last n lines which contain the scores (closer to original) relevant_lines = lines[-n:] if len(lines) >= n else lines + relevant_lines = [l for l in relevant_lines if len(l.strip()) > 0] self.logger.debug(f"Relevant lines for scoring: {relevant_lines}") + # Go through each line; we assume score position is correlated with agent index for line in relevant_lines: match = re.search(r".*\sby\s.*\sscores\s(\d+)", line) if match: score = int(match.group(1)) scores.append(score) - self.logger.debug(f"Found score: {score} from line: {line}") - self.logger.debug(f"All scores: {scores}") if scores: - max_score_index = scores.index(max(scores)) - winner = agents[max_score_index].name - self.logger.debug( - f"Concluding winner: {winner} with index {max_score_index}" + if len(scores) != len(agents): + self.logger.error(f"Have {len(scores)} scores but {len(agents)} agents") + return RoundStats( + winner=agents[scores.index(max(scores))].name, + scores={agent.name: score for agent, score in zip(agents, scores)}, + details={"stdout": "\n".join(relevant_lines)}, ) - return {"winner": winner} else: self.logger.debug("No scores found, returning unknown") - return {"winner": "unknown"} + return RoundStats(winner="unknown", scores={agent.name: 0 for agent in agents}) - def execute_round(self, agents: list[Player]) -> dict[str, str]: + def execute_round(self, agents: list[Player]) -> RoundData: args = [f"/{agent.name}/warriors/warrior.red" for agent in agents] - cmd = f"{self.run_cmd_round} {shlex.join(args)}" - self.logger.info(f"Running command: {cmd}") + cmd = f"{self.run_cmd_round} {shlex.join(args)} -r {self.game_config['sims_per_round']}" + self.logger.info(f"Running game: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response - # For CoreWar, log_output and result_output are the same - output = response["output"] - return {"log_output": output, "result_output": output} + return RoundData([response["output"]], [response["output"]]) diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py index 514e7ab2..821eccbd 100644 --- a/codeclash/games/robocode/main.py +++ b/codeclash/games/robocode/main.py @@ -1,19 +1,17 @@ -import subprocess +import re import time from pathlib import Path from codeclash.agents.abstract import Player -from codeclash.games.abstract import CodeGame -from codeclash.utils.environment import copy_file_to_container +from codeclash.games.abstract import CodeGame, RoundData, RoundStats +from codeclash.utils.environment import create_file_in_container class RoboCodeGame(CodeGame): name: str = "RoboCode" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) self.run_cmd_round: str = "./robocode.sh" for arg, val in self.game_config.get("args", {}).items(): if isinstance(val, bool): @@ -25,7 +23,7 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path): def _get_battle_config(self) -> str: default_battle_config = { "battle": { - "numRounds": 10, + "numRounds": self.game_config.get("sims_per_round", 100), "gunCoolingRate": 0.1, "rules": {"inactivityTime": 450, "hideEnemyNames": True}, }, @@ -55,23 +53,27 @@ def dict_to_lines(d, prefix=""): dict_to_lines(default_battle_config) return "\n".join(battle_lines) - def determine_winner( - self, result_output: str, agents: list[Player] - ) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: + result_output = result_outputs[0] # Get the first (and only) element self.logger.debug(f"Determining winner from result output: {result_output}") lines = result_output.strip().split("\n") - # Get the second line which contains the winner info (closer to original) - winner_line = lines[1] if len(lines) >= 2 else "" - self.logger.debug(f"Winner line: {winner_line}") - if winner_line: - winner = winner_line.split()[1].rsplit(".", 1)[0] - self.logger.debug(f"Concluding winner: {winner}") - return {"winner": winner} - else: - self.logger.debug("No winner line found, returning unknown") - return {"winner": "unknown"} - - def execute_round(self, agents: list[Player]) -> dict[str, str]: + + scores = {} + for line in lines: + line = line.strip() + if not re.match(r"^\d", line): + continue + match = re.search(r"(\d+)\S+\:\s(\S+)\s+(\d+)", line) + if match: + player = match.group(2).rsplit(".", 1)[0] + score = int(match.group(3)) + scores[player] = score + if int(match.group(1)) == 1: + winner = player + + return RoundStats(winner=winner, scores=scores, details={"stdout": "\n".join(lines)}) + + def execute_round(self, agents: list[Player]) -> RoundData: for agent in agents: # Copy the agent codebase into the game codebase and compile it for cmd in [ @@ -86,20 +88,16 @@ def execute_round(self, agents: list[Player]) -> dict[str, str]: selected_robots = ",".join([f"{agent.name}.MyTank*" for agent in agents]) # Use timestamp for unique battle file name since rounds are managed by tournament battle_file = f"{self.game_id}-battle{int(time.time())}.battle" - with open(battle_file, "w") as f: - f.write( - f"""#Battle Properties + battle_content = f"""#Battle Properties {self._get_battle_config()} robocode.battle.selectedRobots={selected_robots} """ - ) - copy_file_to_container(self.environment, battle_file, f"battles/{battle_file}") - subprocess.run(f"rm -f {battle_file}", shell=True) + create_file_in_container(self.environment, content=battle_content, dest_path=f"battles/{battle_file}") # Run battle with results output to file results_file = f"results_{int(time.time())}.txt" cmd = f"{self.run_cmd_round} -battle {battle_file} -results {results_file}" - self.logger.info(f"Running command: {cmd}") + self.logger.info(f"Running game: {cmd}") response = self.environment.execute(cmd) assert response["returncode"] == 0, response @@ -110,4 +108,4 @@ def execute_round(self, agents: list[Player]) -> dict[str, str]: # Clean up the results file self.environment.execute(f"rm -f {results_file}") - return {"log_output": response["output"], "result_output": result_output} + return RoundData([response["output"]], [result_output]) diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py index 8f893ee4..a70be082 100644 --- a/codeclash/games/robotrumble/main.py +++ b/codeclash/games/robotrumble/main.py @@ -1,52 +1,60 @@ import shlex +from collections import Counter from pathlib import Path from codeclash.agents.abstract import Player from codeclash.constants import RESULT_TIE -from codeclash.games.abstract import CodeGame +from codeclash.games.abstract import CodeGame, RoundData, RoundStats class RobotRumbleGame(CodeGame): name: str = "RobotRumble" def __init__(self, config, *, tournament_id: str, local_output_dir: Path): - super().__init__( - config, tournament_id=tournament_id, local_output_dir=local_output_dir - ) + super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir) assert len(config["players"]) == 2, "RobotRumble is a two-player game" self.run_cmd_round: str = "./rumblebot run term" - def determine_winner( - self, result_output: str, agents: list[Player] - ) -> dict[str, str]: - self.logger.debug(f"Determining winner from result output: {result_output}") - lines = result_output.strip().split("\n") - # Get the last 2 lines which contain the game result (same as original) - relevant_lines = lines[-2:] if len(lines) >= 2 else lines - log_text = "\n".join(relevant_lines) - self.logger.debug(f"Relevant lines: {log_text}") - - if "Blue won" in log_text: - winner = agents[0].name - self.logger.debug(f"Blue won - Concluding winner: {winner}") - return {"winner": winner} - elif "Red won" in log_text: - winner = agents[1].name - self.logger.debug(f"Red won - Concluding winner: {winner}") - return {"winner": winner} - elif "it was a tie" in log_text: - self.logger.debug("Game was a tie") - return {"winner": RESULT_TIE} - else: - self.logger.debug("No clear result found, treating as tie") - return {"winner": RESULT_TIE} - - def execute_round(self, agents: list[Player]) -> dict[str, str]: + def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats: + winners = [] + for ro in result_outputs: + lines = ro.strip().split("\n") + + # Get the last 2 lines which contain the game result (same as original) + relevant_lines = lines[-2:] if len(lines) >= 2 else lines + log_text = "\n".join(relevant_lines) + + if "Blue won" in log_text: + winner = agents[0].name + winners.append(winner) + elif "Red won" in log_text: + winner = agents[1].name + winners.append(winner) + elif "it was a tie" in log_text: + winners.append(RESULT_TIE) + else: + winners.append(RESULT_TIE) + + # Count occurrences of each winner + counts = Counter(winners) + + # Find all winners with the maximum count + max_count = max(counts.values()) + top_winners = [w for w, c in counts.items() if c == max_count] + + # If multiple winners have the same count, return RESULT_TIE + final_winner = RESULT_TIE if len(top_winners) > 1 else top_winners[0] + + return RoundStats(winner=final_winner, scores=dict(counts)) + + def execute_round(self, agents: list[Player]) -> RoundData: + outputs = [] args = [f"/{agent.name}/robot.py" for agent in agents] cmd = f"{self.run_cmd_round} {shlex.join(args)}" - self.logger.info(f"Running command: {cmd}") - response = self.environment.execute(cmd) - assert response["returncode"] == 0, response - # For RobotRumble, log_output and result_output are the same - output = response["output"] - return {"log_output": output, "result_output": output} + self.logger.info(f"Running game: {cmd}") + for _ in range(self.game_config.get("sims_per_round", 100)): + response = self.environment.execute(cmd) + assert response["returncode"] == 0, response + outputs.append(response["output"]) + # For RobotRumble, log_outputs and result_outputs are the same + return RoundData(logs=outputs, results=outputs) diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py index 2a5e3043..e95ef3dc 100644 --- a/codeclash/tournaments/abstract.py +++ b/codeclash/tournaments/abstract.py @@ -4,7 +4,7 @@ from pathlib import Path from codeclash.constants import DIR_LOGS -from codeclash.utils.environment import create_file_on_container +from codeclash.utils.environment import create_file_in_container from codeclash.utils.log import get_logger @@ -12,34 +12,26 @@ class AbstractTournament: def __init__(self, config: dict, *, name: str, **kwargs): self.config: dict = config self.name: str = name - self.tournament_id: str = f"{self.name}{time.strftime('%y%m%d%H%M%S')}" - self.local_output_dir: Path = ( - DIR_LOGS / getpass.getuser() / self.tournament_id - ).resolve() + self.tournament_id: str = f"{self.name}.{config['game']['name']}.{time.strftime('%y%m%d%H%M%S')}" + self.local_output_dir: Path = (DIR_LOGS / getpass.getuser() / self.tournament_id).resolve() self._metadata: dict = { "name": self.name, "tournament_id": self.tournament_id, "config": self.config, "created_timestamp": int(time.time()), } - self.logger = get_logger( - self.name, log_path=self.local_output_dir / "tournament.log", emoji="🏆" - ) + self.logger = get_logger(self.name, log_path=self.local_output_dir / "tournament.log", emoji="🏆") def get_metadata(self) -> dict: return self._metadata - def _copy_game_log_to_agent(self, agent, round_num: int, log_output: str) -> None: + def _copy_game_log_to_agent(self, agent, round_num: int, log_output: str, dest_path: str = None) -> None: """Copy round log to agent environment.""" try: - create_file_on_container( + create_file_in_container( container=agent.environment, content=log_output, - dest_path=f"logs/round_{round_num}.log", + dest_path=dest_path if dest_path else f"logs/round_{round_num}.log", ) except Exception: - self.logger.error( - f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}" - ) - else: - self.logger.info(f"Created round log in {agent.name}'s container.") + self.logger.error(f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}") diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp.py similarity index 72% rename from codeclash/tournaments/pvp_training.py rename to codeclash/tournaments/pvp.py index 67027c98..f7c4e73e 100644 --- a/codeclash/tournaments/pvp_training.py +++ b/codeclash/tournaments/pvp.py @@ -11,14 +11,13 @@ from codeclash.games import get_game from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament +from codeclash.utils.environment import copy_to_container from codeclash.utils.log import get_logger -class PvpTraining(AbstractTournament): - def __init__( - self, config: dict, *, cleanup: bool = False, push_agent: bool = False - ): - super().__init__(config, name="PvpTraining") +class PvpTournament(AbstractTournament): + def __init__(self, config: dict, *, cleanup: bool = False, push_agent: bool = False): + super().__init__(config, name="PvpTournament") self.cleanup_on_end = cleanup self.push_agent = push_agent self.game: CodeGame = get_game( @@ -50,9 +49,7 @@ def get_metadata(self) -> dict: def get_agent(self, agent_config: dict, prompts: dict) -> Player: """Create an agent with environment and game context.""" - environment = self.game.get_environment( - f"{self.game.game_id}.{agent_config['name']}" - ) + environment = self.game.get_environment(f"{self.game.game_id}.{agent_config['name']}") game_context = GameContext( id=self.game.game_id, @@ -79,22 +76,28 @@ def run(self) -> None: def run_training_round(self, round_num: int) -> None: """Execute a single training round.""" # Run the game round and get results - result = self.game.run_round(self.agents) - log_output = result["log_output"] - result_output = result["result_output"] - winner = result["winner"] + record = self.game.run_round(self.agents) # Handle bookkeeping that was previously in the game - self.scoreboard.append((round_num, winner)) - self.logger.info(f"Round {round_num} winner: {winner}") + self.scoreboard.append(record.stats) + self.logger.info(f"Round {round_num}:\n{record.stats}") + + # Create directory for round logs + (self.game.log_local / f"round_{round_num}").mkdir(parents=True, exist_ok=True) # Write log to file - round_log_path = self.game.log_local / f"round_{round_num}.log" - round_log_path.write_text(log_output) + for idx, lo in enumerate(record.data.logs): + round_log_path = self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" + round_log_path.write_text(lo) # Copy log to agent environments for agent in self.agents: - self._copy_game_log_to_agent(agent, round_num, log_output) + self.logger.info(f"Copying round {round_num} log(s) to {agent.name}'s container...") + copy_to_container( + agent.environment, + self.game.log_local / f"round_{round_num}", + f"logs/round_{round_num}/", + ) for agent in self.agents: self.run_agent(agent, round_num) @@ -109,9 +112,7 @@ def run_agent(self, agent: Player, round_num: int) -> None: def end(self) -> None: """Save output files, clean up game resources and push agents if requested.""" - (self.local_output_dir / "metadata.json").write_text( - json.dumps(self.game.get_metadata()) - ) + (self.local_output_dir / "metadata.json").write_text(json.dumps(self.game.get_metadata())) self.game.end(self.cleanup_on_end) if self.push_agent: for agent in self.agents: diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player.py similarity index 79% rename from codeclash/tournaments/single_player_training.py rename to codeclash/tournaments/single_player.py index e16391c6..c0e74542 100644 --- a/codeclash/tournaments/single_player_training.py +++ b/codeclash/tournaments/single_player.py @@ -13,6 +13,7 @@ from codeclash.games.abstract import CodeGame from codeclash.tournaments.abstract import AbstractTournament from codeclash.tournaments.utils.git_utils import filter_git_diff +from codeclash.utils.environment import copy_to_container class SinglePlayerTraining(AbstractTournament): @@ -61,9 +62,7 @@ def get_game_context(self, agent_config: dict, *, round: int) -> GameContext: def get_agent(self, agent_config: dict, round: int) -> Player: """Create an agent with environment and game context.""" - environment = self.game.get_environment( - f"{self.game.game_id}.{agent_config['name']}" - ) + environment = self.game.get_environment(f"{self.game.game_id}.{agent_config['name']}") game_context = self.get_game_context(agent_config, round=round) return get_agent(agent_config, game_context, environment) @@ -88,20 +87,24 @@ def run(self): def run_training_round(self, round_num: int) -> None: """Execute a single training round, i.e., run the game, then run the agent.""" # Run the game round and get results - result = self.game.run_round([self.agent, self.mirror_agent]) - log_output = result["log_output"] - winner = result["winner"] + record = self.game.run_round([self.agent, self.mirror_agent]) # Handle bookkeeping that was previously in the game - self.scoreboard.append((round_num, winner)) - self.logger.info(f"Round {round_num} winner: {winner}") + self.scoreboard.append(record.stats) + self.logger.info(f"Round {round_num}:\n{record.stats}") # Write log to file - round_log_path = self.game.log_local / f"round_{round_num}.log" - round_log_path.write_text(log_output) + for idx, lo in enumerate(record.logs): + round_log_path = self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log" + round_log_path.write_text(lo) # Copy log to main agent environment only - self._copy_game_log_to_agent(self.agent, round_num, log_output) + self.logger.info(f"Copying round {round_num} log(s) to {self.agent.name}'s container...") + copy_to_container( + self.agent, + self.game.log_local / f"round_{round_num}", + f"logs/round_{round_num}/", + ) self.run_main_agent(round_num) mirror_agent_state = round_num - 1 if round_num > 1 else 0 @@ -137,28 +140,19 @@ def evaluate(self, n_repetitions: int = 3) -> None: p2_config["name"] = "p2" p2 = self.get_dummy_agent() matrix = { - p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} - for p1_round in range(0, self.rounds + 1) + p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} for p1_round in range(0, self.rounds + 1) } for p1_round in range(0, self.rounds + 1): for p2_round in range(0, self.rounds + 1): - self.logger.info( - f"Evaluating agent at round {p1_round} against agent at round {p2_round}" - ) - p1_patch = ( - self.agent.get_metadata()["diff"][p1_round] if p1_round > 0 else "" - ) - p2_patch = ( - self.agent.get_metadata()["diff"][p2_round] if p2_round > 0 else "" - ) + self.logger.info(f"Evaluating agent at round {p1_round} against agent at round {p2_round}") + p1_patch = self.agent.get_metadata()["diff"][p1_round] if p1_round > 0 else "" + p2_patch = self.agent.get_metadata()["diff"][p2_round] if p2_round > 0 else "" p1.reset_and_apply_patch(p1_patch) p2.reset_and_apply_patch(p2_patch) for i_repetition in range(n_repetitions): - result = self.game.run_round([p1, p2]) - winner = result["winner"] - self.logger.info( - f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}" - ) + record = self.game.run_round([p1, p2]) + winner = record.stats.winner + self.logger.info(f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}") matrix[p1_round][p2_round].append(winner) self.logger.info(f"Evaluation matrix: {matrix}") self._metadata.setdefault("evaluation", {})["matrix"] = matrix diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py index 1d6f6aa1..64f8dfb6 100644 --- a/codeclash/utils/environment.py +++ b/codeclash/utils/environment.py @@ -6,9 +6,7 @@ from minisweagent.environments.docker import DockerEnvironment -def assert_zero_exit_code( - result: dict, *, logger: logging.Logger | None = None -) -> dict: +def assert_zero_exit_code(result: dict, *, logger: logging.Logger | None = None) -> dict: if result.get("returncode", 0) != 0: msg = f"Command failed with exit code {result.get('returncode')}:\n{result.get('output')}" if logger is not None: @@ -36,18 +34,14 @@ def copy_between_containers( f"{src_container.container_id}:{src_path}", str(temp_path), ] - result_src = subprocess.run( - cmd_src, check=False, capture_output=True, text=True - ) + result_src = subprocess.run(cmd_src, check=False, capture_output=True, text=True) if result_src.returncode != 0: raise RuntimeError( f"Failed to copy from {src_container.container_id} to local temp: {result_src.stdout}{result_src.stderr}" ) # Ensure destination folder exists - assert_zero_exit_code( - dest_container.execute(f"mkdir -p {Path(dest_path).parent}") - ) + assert_zero_exit_code(dest_container.execute(f"mkdir -p {Path(dest_path).parent}")) # Copy from temporary local directory to destination container cmd_dest = [ @@ -56,22 +50,22 @@ def copy_between_containers( str(temp_path), f"{dest_container.container_id}:{dest_path}", ] - result_dest = subprocess.run( - cmd_dest, check=False, capture_output=True, text=True - ) + result_dest = subprocess.run(cmd_dest, check=False, capture_output=True, text=True) if result_dest.returncode != 0: raise RuntimeError( f"Failed to copy from local temp to {dest_container.container_id}: {result_dest.stdout}{result_dest.stderr}" ) -def copy_file_to_container( +def copy_to_container( container: DockerEnvironment, src_path: str | Path, dest_path: str | Path, ): """ - Copy a file from the local filesystem to a Docker container. + Copy a file or directory from the local filesystem to a Docker container. + + The copy operation is recursive for directories. """ if not str(dest_path).startswith("/"): # If not an absolute path, assume relative to container's cwd @@ -115,7 +109,7 @@ def copy_file_from_container( return result -def create_file_on_container( +def create_file_in_container( container: DockerEnvironment, *, content: str, @@ -130,6 +124,6 @@ def create_file_on_container( tmp_file_path = Path(tmp_file.name) try: - copy_file_to_container(container, tmp_file_path, dest_path) + copy_to_container(container, tmp_file_path, dest_path) finally: tmp_file_path.unlink() # Clean up the temporary file diff --git a/codeclash/utils/log.py b/codeclash/utils/log.py index 001a0e11..bd5cacca 100644 --- a/codeclash/utils/log.py +++ b/codeclash/utils/log.py @@ -64,9 +64,7 @@ def format(self, record: logging.LogRecord) -> str: return capture.get().rstrip() -def get_logger( - name: str, *, emoji: str = "", log_path: Path | None = None -) -> logging.Logger: +def get_logger(name: str, *, emoji: str = "", log_path: Path | None = None) -> logging.Logger: """Get logger. Use this instead of `logging.getLogger` to ensure that the logger is set up with the correct handlers. """ diff --git a/codeclash/viewer/app.py b/codeclash/viewer/app.py index a8e62920..addfc2aa 100644 --- a/codeclash/viewer/app.py +++ b/codeclash/viewer/app.py @@ -8,7 +8,7 @@ import json from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any from flask import Flask, jsonify, render_template, request @@ -28,7 +28,7 @@ def is_probably_failed_run(log_dir: Path) -> bool: return not metadata_file.exists() -def get_round_count_from_metadata(log_dir: Path) -> Optional[int]: +def get_round_count_from_metadata(log_dir: Path) -> int | None: """Extract round count from metadata.json if it exists""" metadata_file = log_dir / "metadata.json" if not metadata_file.exists(): @@ -45,9 +45,9 @@ def get_round_count_from_metadata(log_dir: Path) -> Optional[int]: class GameMetadata: """Metadata about a game session""" - results: Dict[str, Any] + results: dict[str, Any] main_log: str - rounds: List[Dict[str, Any]] + rounds: list[dict[str, Any]] @dataclass @@ -58,10 +58,10 @@ class TrajectoryInfo: round_num: int api_calls: int cost: float - exit_status: Optional[str] - submission: Optional[str] - memory: Optional[str] - messages: List[Dict[str, Any]] + exit_status: str | None + submission: str | None + memory: str | None + messages: list[dict[str, Any]] class LogParser: @@ -86,9 +86,7 @@ def parse_game_metadata(self) -> GameMetadata: # Parse main.log if it exists main_log_file = self.log_dir / "game.log" - main_log = ( - main_log_file.read_text() if main_log_file.exists() else "No main log found" - ) + main_log = main_log_file.read_text() if main_log_file.exists() else "No main log found" # Parse round logs rounds = [] @@ -99,9 +97,7 @@ def parse_game_metadata(self) -> GameMetadata: return GameMetadata(results=results, main_log=main_log, rounds=rounds) - def parse_trajectory( - self, player_id: int, round_num: int - ) -> Optional[TrajectoryInfo]: + def parse_trajectory(self, player_id: int, round_num: int) -> TrajectoryInfo | None: """Parse a specific trajectory file""" # Try both .json and .log extensions for ext in [".json", ".log"]: @@ -128,7 +124,7 @@ def parse_trajectory( return None - def get_available_trajectories(self) -> List[tuple]: + def get_available_trajectories(self) -> list[tuple]: """Get list of available trajectory files as (player_id, round_num) tuples""" trajectories = [] for traj_file in self.log_dir.glob("p*_r*.traj.*"): @@ -186,9 +182,7 @@ def index(): # Extract just the names for backwards compatibility log_folders = [folder["name"] for folder in log_folders_info] - selected_folder = request.args.get( - "folder", log_folders[0] if log_folders else None - ) + selected_folder = request.args.get("folder", log_folders[0] if log_folders else None) if not selected_folder or not (logs_dir / selected_folder).exists(): return render_template("no_logs.html", log_folders=log_folders) diff --git a/codeclash/viewer/static/css/style.css b/codeclash/viewer/static/css/style.css index 6228241b..315483a0 100644 --- a/codeclash/viewer/static/css/style.css +++ b/codeclash/viewer/static/css/style.css @@ -46,7 +46,8 @@ } body { - font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', sans-serif; + font-family: + -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", sans-serif; line-height: 1.6; color: var(--text-primary); background-color: var(--bg-primary); @@ -159,7 +160,8 @@ body { .metadata-display pre { margin: 0; - font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, monospace; + font-family: + "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, monospace; font-size: 0.875rem; line-height: 1.5; } @@ -226,7 +228,12 @@ details summary { /* Round separator */ .round-separator { height: 2px; - background: linear-gradient(90deg, transparent, var(--border-color), transparent); + background: linear-gradient( + 90deg, + transparent, + var(--border-color), + transparent + ); margin: 2rem 0; border-radius: 1px; } @@ -444,7 +451,8 @@ details summary { .log-content pre { white-space: pre-wrap; word-wrap: break-word; - font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, monospace; + font-family: + "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, monospace; font-size: 0.875rem; line-height: 1.5; margin: 0; @@ -517,7 +525,10 @@ details summary { /* Smooth transitions */ * { - transition: color 0.3s ease, background-color 0.3s ease, border-color 0.3s ease; + transition: + color 0.3s ease, + background-color 0.3s ease, + border-color 0.3s ease; } /* Focus styles for accessibility */ diff --git a/codeclash/viewer/static/js/app.js b/codeclash/viewer/static/js/app.js index a503b542..b65ff030 100644 --- a/codeclash/viewer/static/js/app.js +++ b/codeclash/viewer/static/js/app.js @@ -2,257 +2,274 @@ // Theme management function initializeTheme() { - // Check for saved theme preference or default to 'light' - const savedTheme = localStorage.getItem('theme') || 'light'; - setTheme(savedTheme); + // Check for saved theme preference or default to 'light' + const savedTheme = localStorage.getItem("theme") || "light"; + setTheme(savedTheme); } function setTheme(theme) { - document.documentElement.setAttribute('data-theme', theme); - localStorage.setItem('theme', theme); - - // Update theme toggle button - const themeToggle = document.getElementById('theme-toggle'); - const themeIcon = themeToggle.querySelector('.theme-icon'); - - if (theme === 'dark') { - themeIcon.textContent = '☀️'; - themeToggle.setAttribute('aria-label', 'Switch to light mode'); - } else { - themeIcon.textContent = '🌙'; - themeToggle.setAttribute('aria-label', 'Switch to dark mode'); - } + document.documentElement.setAttribute("data-theme", theme); + localStorage.setItem("theme", theme); + + // Update theme toggle button + const themeToggle = document.getElementById("theme-toggle"); + const themeIcon = themeToggle.querySelector(".theme-icon"); + + if (theme === "dark") { + themeIcon.textContent = "☀️"; + themeToggle.setAttribute("aria-label", "Switch to light mode"); + } else { + themeIcon.textContent = "🌙"; + themeToggle.setAttribute("aria-label", "Switch to dark mode"); + } } function toggleTheme() { - const currentTheme = document.documentElement.getAttribute('data-theme'); - const newTheme = currentTheme === 'dark' ? 'light' : 'dark'; - setTheme(newTheme); + const currentTheme = document.documentElement.getAttribute("data-theme"); + const newTheme = currentTheme === "dark" ? "light" : "dark"; + setTheme(newTheme); } // Folder selection function changeFolder() { - const select = document.getElementById('folder-select'); - const selectedFolder = select.value; - - if (selectedFolder) { - // Reload page with new folder parameter - const url = new URL(window.location); - url.searchParams.set('folder', selectedFolder); - window.location.href = url.toString(); - } + const select = document.getElementById("folder-select"); + const selectedFolder = select.value; + + if (selectedFolder) { + // Reload page with new folder parameter + const url = new URL(window.location); + url.searchParams.set("folder", selectedFolder); + window.location.href = url.toString(); + } } // Enhanced foldout behavior function initializeFoldouts() { - // Add smooth animations to details elements - const detailsElements = document.querySelectorAll('details'); - - detailsElements.forEach(details => { - const summary = details.querySelector('summary'); - - // Add click analytics/feedback - summary.addEventListener('click', function(e) { - // Small delay to allow default behavior - setTimeout(() => { - // Scroll into view if needed - if (details.open) { - const rect = details.getBoundingClientRect(); - const isInViewport = rect.top >= 0 && rect.bottom <= window.innerHeight; - - if (!isInViewport) { - details.scrollIntoView({ - behavior: 'smooth', - block: 'nearest' - }); - } - } - }, 100); - }); + // Add smooth animations to details elements + const detailsElements = document.querySelectorAll("details"); + + detailsElements.forEach((details) => { + const summary = details.querySelector("summary"); + + // Add click analytics/feedback + summary.addEventListener("click", function (e) { + // Small delay to allow default behavior + setTimeout(() => { + // Scroll into view if needed + if (details.open) { + const rect = details.getBoundingClientRect(); + const isInViewport = + rect.top >= 0 && rect.bottom <= window.innerHeight; + + if (!isInViewport) { + details.scrollIntoView({ + behavior: "smooth", + block: "nearest", + }); + } + } + }, 100); }); + }); } // Keyboard shortcuts function initializeKeyboardShortcuts() { - document.addEventListener('keydown', function(e) { - // Ctrl/Cmd + D: Toggle dark mode - if ((e.ctrlKey || e.metaKey) && e.key === 'd') { - e.preventDefault(); - toggleTheme(); - } - - // Escape: Close all open details - if (e.key === 'Escape') { - const openDetails = document.querySelectorAll('details[open]'); - openDetails.forEach(details => { - details.removeAttribute('open'); - }); - } - - // Ctrl/Cmd + E: Expand all details - if ((e.ctrlKey || e.metaKey) && e.key === 'e') { - e.preventDefault(); - const allDetails = document.querySelectorAll('details'); - allDetails.forEach(details => { - details.setAttribute('open', ''); - }); - } + document.addEventListener("keydown", function (e) { + // Ctrl/Cmd + D: Toggle dark mode + if ((e.ctrlKey || e.metaKey) && e.key === "d") { + e.preventDefault(); + toggleTheme(); + } - // Ctrl/Cmd + Shift + E: Collapse all details - if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === 'E') { - e.preventDefault(); - const allDetails = document.querySelectorAll('details'); - allDetails.forEach(details => { - details.removeAttribute('open'); - }); - } + // Escape: Close all open details + if (e.key === "Escape") { + const openDetails = document.querySelectorAll("details[open]"); + openDetails.forEach((details) => { + details.removeAttribute("open"); + }); + } + // Ctrl/Cmd + E: Expand all details + if ((e.ctrlKey || e.metaKey) && e.key === "e") { + e.preventDefault(); + const allDetails = document.querySelectorAll("details"); + allDetails.forEach((details) => { + details.setAttribute("open", ""); + }); + } - }); + // Ctrl/Cmd + Shift + E: Collapse all details + if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === "E") { + e.preventDefault(); + const allDetails = document.querySelectorAll("details"); + allDetails.forEach((details) => { + details.removeAttribute("open"); + }); + } + }); } - - // Code highlighting (basic syntax highlighting) function initializeCodeHighlighting() { - const codeBlocks = document.querySelectorAll('.code-block code, .message-text pre'); + const codeBlocks = document.querySelectorAll( + ".code-block code, .message-text pre", + ); - codeBlocks.forEach(block => { - const text = block.textContent; + codeBlocks.forEach((block) => { + const text = block.textContent; - // Simple bash highlighting - if (text.includes('#!/bin/bash') || text.includes('```bash')) { - block.classList.add('language-bash'); - highlightBash(block); - } + // Simple bash highlighting + if (text.includes("#!/bin/bash") || text.includes("```bash")) { + block.classList.add("language-bash"); + highlightBash(block); + } - // Simple Python highlighting - if (text.includes('def ') || text.includes('import ') || text.includes('python')) { - block.classList.add('language-python'); - highlightPython(block); - } - }); + // Simple Python highlighting + if ( + text.includes("def ") || + text.includes("import ") || + text.includes("python") + ) { + block.classList.add("language-python"); + highlightPython(block); + } + }); } function highlightBash(block) { - let html = block.innerHTML; + let html = block.innerHTML; - // Commands - html = html.replace(/\b(ls|cd|cat|grep|sed|awk|find|mkdir|rm|cp|mv|chmod|echo|export)\b/g, - '$1'); + // Commands + html = html.replace( + /\b(ls|cd|cat|grep|sed|awk|find|mkdir|rm|cp|mv|chmod|echo|export)\b/g, + '$1', + ); - // Flags - html = html.replace(/\s(-[a-zA-Z]+)/g, - ' $1'); + // Flags + html = html.replace( + /\s(-[a-zA-Z]+)/g, + ' $1', + ); - block.innerHTML = html; + block.innerHTML = html; } function highlightPython(block) { - let html = block.innerHTML; + let html = block.innerHTML; - // Keywords - html = html.replace(/\b(def|class|import|from|if|else|elif|for|while|try|except|finally|return|yield|with|as|pass|break|continue|lambda|global|nonlocal)\b/g, - '$1'); + // Keywords + html = html.replace( + /\b(def|class|import|from|if|else|elif|for|while|try|except|finally|return|yield|with|as|pass|break|continue|lambda|global|nonlocal)\b/g, + '$1', + ); - // Strings - html = html.replace(/(["'])((?:\\.|(?!\1)[^\\])*?)\1/g, - '$1$2$1'); + // Strings + html = html.replace( + /(["'])((?:\\.|(?!\1)[^\\])*?)\1/g, + '$1$2$1', + ); - block.innerHTML = html; + block.innerHTML = html; } // Performance monitoring function initializePerformanceMonitoring() { - // Log page load time - window.addEventListener('load', function() { - const loadTime = performance.now(); - console.log(`Page loaded in ${loadTime.toFixed(2)}ms`); - - // Count elements for performance insight - const messageCount = document.querySelectorAll('.message-block').length; - const foldoutCount = document.querySelectorAll('details').length; - - console.log(`Rendered ${messageCount} messages and ${foldoutCount} foldouts`); - }); + // Log page load time + window.addEventListener("load", function () { + const loadTime = performance.now(); + console.log(`Page loaded in ${loadTime.toFixed(2)}ms`); + + // Count elements for performance insight + const messageCount = document.querySelectorAll(".message-block").length; + const foldoutCount = document.querySelectorAll("details").length; + + console.log( + `Rendered ${messageCount} messages and ${foldoutCount} foldouts`, + ); + }); } // Message expand/collapse functionality function expandMessage(clickedElement) { - const messageContent = clickedElement.closest('.message-content'); - const previewShort = messageContent.querySelector('.message-preview-short'); - const contentFull = messageContent.querySelector('.message-content-full'); - const contentExpanded = messageContent.querySelector('.message-content-expanded'); - - // Expanding - hide preview, show full content - if (previewShort) previewShort.style.display = 'none'; - if (contentFull) contentFull.style.display = 'block'; - if (contentExpanded) contentExpanded.style.display = 'block'; - - // Smooth scroll to keep the content in view - setTimeout(() => { - messageContent.scrollIntoView({ - behavior: 'smooth', - block: 'nearest' - }); - }, 100); + const messageContent = clickedElement.closest(".message-content"); + const previewShort = messageContent.querySelector(".message-preview-short"); + const contentFull = messageContent.querySelector(".message-content-full"); + const contentExpanded = messageContent.querySelector( + ".message-content-expanded", + ); + + // Expanding - hide preview, show full content + if (previewShort) previewShort.style.display = "none"; + if (contentFull) contentFull.style.display = "block"; + if (contentExpanded) contentExpanded.style.display = "block"; + + // Smooth scroll to keep the content in view + setTimeout(() => { + messageContent.scrollIntoView({ + behavior: "smooth", + block: "nearest", + }); + }, 100); } function collapseMessage(clickedElement) { - const messageContent = clickedElement.closest('.message-content'); - const previewShort = messageContent.querySelector('.message-preview-short'); - const contentFull = messageContent.querySelector('.message-content-full'); - const contentExpanded = messageContent.querySelector('.message-content-expanded'); + const messageContent = clickedElement.closest(".message-content"); + const previewShort = messageContent.querySelector(".message-preview-short"); + const contentFull = messageContent.querySelector(".message-content-full"); + const contentExpanded = messageContent.querySelector( + ".message-content-expanded", + ); + + // Collapsing - show preview, hide full content + if (contentFull) contentFull.style.display = "none"; + if (contentExpanded) contentExpanded.style.display = "none"; + if (previewShort) previewShort.style.display = "block"; + + // Smooth scroll to keep the content in view + setTimeout(() => { + messageContent.scrollIntoView({ + behavior: "smooth", + block: "nearest", + }); + }, 100); +} - // Collapsing - show preview, hide full content - if (contentFull) contentFull.style.display = 'none'; - if (contentExpanded) contentExpanded.style.display = 'none'; - if (previewShort) previewShort.style.display = 'block'; +function collapseTrajectoryMessages(clickedElement) { + // Find the parent trajectory messages foldout + const trajectoryFoldout = clickedElement.closest( + ".trajectory-messages-foldout", + ); + + if (trajectoryFoldout) { + // Close the details element + trajectoryFoldout.removeAttribute("open"); - // Smooth scroll to keep the content in view + // Smooth scroll to the trajectory header setTimeout(() => { - messageContent.scrollIntoView({ - behavior: 'smooth', - block: 'nearest' + const trajectoryHeader = trajectoryFoldout.closest(".trajectory-header"); + if (trajectoryHeader) { + trajectoryHeader.scrollIntoView({ + behavior: "smooth", + block: "nearest", }); + } }, 100); + } } -function collapseTrajectoryMessages(clickedElement) { - // Find the parent trajectory messages foldout - const trajectoryFoldout = clickedElement.closest('.trajectory-messages-foldout'); - - if (trajectoryFoldout) { - // Close the details element - trajectoryFoldout.removeAttribute('open'); - - // Smooth scroll to the trajectory header - setTimeout(() => { - const trajectoryHeader = trajectoryFoldout.closest('.trajectory-header'); - if (trajectoryHeader) { - trajectoryHeader.scrollIntoView({ - behavior: 'smooth', - block: 'nearest' - }); - } - }, 100); - } -} - - - // Initialize everything when DOM is loaded -document.addEventListener('DOMContentLoaded', function() { - initializeTheme(); - initializeFoldouts(); - initializeKeyboardShortcuts(); - initializeCodeHighlighting(); - initializePerformanceMonitoring(); - - console.log('CodeClash Trajectory Viewer initialized'); - console.log('Keyboard shortcuts:'); - console.log(' Ctrl/Cmd + D: Toggle dark mode'); - console.log(' Ctrl/Cmd + E: Expand all sections'); - console.log(' Ctrl/Cmd + Shift + E: Collapse all sections'); - console.log(' Escape: Close all sections'); +document.addEventListener("DOMContentLoaded", function () { + initializeTheme(); + initializeFoldouts(); + initializeKeyboardShortcuts(); + initializeCodeHighlighting(); + initializePerformanceMonitoring(); + + console.log("CodeClash Trajectory Viewer initialized"); + console.log("Keyboard shortcuts:"); + console.log(" Ctrl/Cmd + D: Toggle dark mode"); + console.log(" Ctrl/Cmd + E: Expand all sections"); + console.log(" Ctrl/Cmd + Shift + E: Collapse all sections"); + console.log(" Escape: Close all sections"); }); diff --git a/configs/battlecode.yaml b/configs/battlecode.yaml index ed6386cf..74f1c1c5 100644 --- a/configs/battlecode.yaml +++ b/configs/battlecode.yaml @@ -1,5 +1,6 @@ game: name: BattleCode + sims_per_round: 2 args: maps: quack tournament: diff --git a/configs/battlesnake.yaml b/configs/battlesnake.yaml index 9016df1e..daee02ce 100644 --- a/configs/battlesnake.yaml +++ b/configs/battlesnake.yaml @@ -1,5 +1,6 @@ game: name: BattleSnake + sims_per_round: 10 args: width: 11 height: 11 diff --git a/configs/battlesnake_dummy.yaml b/configs/battlesnake_dummy.yaml index 05a8d4b4..65c6da12 100644 --- a/configs/battlesnake_dummy.yaml +++ b/configs/battlesnake_dummy.yaml @@ -1,5 +1,6 @@ game: name: BattleSnake + sims_per_round: 10 args: width: 11 height: 11 diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml index e5f6f370..66d91f1a 100644 --- a/configs/battlesnake_single_player.yaml +++ b/configs/battlesnake_single_player.yaml @@ -1,5 +1,6 @@ game: name: BattleSnake + sims_per_round: 10 args: width: 11 height: 11 diff --git a/configs/corewar.yaml b/configs/corewar.yaml index daaf3e2e..4fe68ad7 100644 --- a/configs/corewar.yaml +++ b/configs/corewar.yaml @@ -1,7 +1,6 @@ game: name: CoreWar - args: - r: 100 + sims_per_round: 10 tournament: rounds: 3 players: diff --git a/configs/robocode.yaml b/configs/robocode.yaml index c401cdfd..c1e1c90c 100644 --- a/configs/robocode.yaml +++ b/configs/robocode.yaml @@ -1,8 +1,8 @@ game: name: RoboCode + sims_per_round: 10 battle: battle: - numRounds: 10 gunCoolingRate: 0.1 rules: inactivityTime: 450 @@ -15,6 +15,7 @@ game: nosound: true tournament: rounds: 3 + sims_per_round: 10 players: - agent: dummy name: p1 diff --git a/configs/robotrumble.yaml b/configs/robotrumble.yaml index 84e9e06f..a6fac21b 100644 --- a/configs/robotrumble.yaml +++ b/configs/robotrumble.yaml @@ -1,5 +1,6 @@ game: name: RobotRumble + sims_per_round: 10 tournament: rounds: 3 players: diff --git a/main.py b/main.py index 14245d38..9d70146d 100644 --- a/main.py +++ b/main.py @@ -2,13 +2,13 @@ import yaml -from codeclash.tournaments.pvp_training import PvpTraining +from codeclash.tournaments.pvp import PvpTournament def main(config_path: str, *, cleanup: bool = False, push_agent: bool = False): - with open(config_path, "r") as f: + with open(config_path) as f: config = yaml.safe_load(f) - training = PvpTraining(config, cleanup=cleanup, push_agent=push_agent) + training = PvpTournament(config, cleanup=cleanup, push_agent=push_agent) training.run() diff --git a/main_single_player.py b/main_single_player.py index 6c7ff720..d02f3a41 100644 --- a/main_single_player.py +++ b/main_single_player.py @@ -6,7 +6,7 @@ def main(config_path: str, cleanup: bool = False): - with open(config_path, "r") as f: + with open(config_path) as f: config = yaml.safe_load(f) training = SinglePlayerTraining(config, cleanup) training.run() diff --git a/pyproject.toml b/pyproject.toml index 8bc097e8..66848c3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,152 @@ dev = [ "pytest", "pytest-cov", "pytest-xdist", + "ruff", ] [tool.setuptools.packages.find] include = ["codeclash*"] + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + # ---- project specific ---- + "tests/test_data", + # Exclude commands so they don't get the __future__ imports + "config/commands", +] + +line-length = 120 +indent-width = 4 + +target-version = "py310" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +# I001: Isort, I002: required import +select = [ + # Error (E) + "E", + # Error (PLE) + "PLE", + # pycodestyle + "E713", # not in + "E714", # is not + "E711", # comparison with None + # pyflakes + "F821", + "F822", + "F401", # unused-import + "F841", # unused var + "F541", # f-string without args + "F901", # raise NotImplemented should be raise NotImplementedError + # isort + "I001", # isort + "I002", # required import + # pyupgrade and related + "UP", # pyupgrade + "C401", # flake8-comprehensions: unnecessary-generator-set + "C402", # flake8-comprehensions: unnecessary-generator-dict + "C403", # flake8-comprehensions: unnecessary-list-comprehension-set + "C404", # flake8-comprehensions: unnecessary-list-comprehension-dict + "C405", # flake8-comprehensions: unnecessary-literal-set + "F632", # pyflakes: is-literal + "W605", # pycodestyle: invalid-escape-sequence + # bugbear + "B006", # mutable default + "B007", # unused loop var + "B009", # getattr with constant + # flake8-errmsg + "EM", + # flake8-return + "RET", + # RUF + "RUF019", # unneded key in dict check + # pytest + "PT", + # flake8-simplify (SIM) + "SIM201", + # flake8-use-pathlib + "PTH100", + "PTH110", + "PTH111", + "PTH112", + "PTH113", + "PTH114", + "PTH117", + "PTH118", + "PTH119", + "PTH120", + "PTH121", + "PTH122", + "PTH202", + "PTH203", + "PTH204", + "PTH205", +] +ignore = [ + # flake8-return + "RET505", # can't autofix + "RET506", # can't autofix + "RET507", # can't autofix + # error (E) + "E501", # line too long + "E402", # import not on top of file + "E722", # bare except + "E741", # ambiguous symbol + # pytest + "PT011", + "PT018", + # flake8-errmsg + "EM101", # exception must not use a string literal + "EM102", # exception must not use an f-string literal + "EM103", # exception must not use a .format(...) string directly +] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" diff --git a/tests/test_integration.py b/tests/test_integration.py index 52fa4676..f6ff8d17 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -5,11 +5,10 @@ using DeterministicModel instead of real LLM models. """ -import os import tempfile +from pathlib import Path from unittest.mock import patch -import pytest import yaml from minisweagent.models.test_models import DeterministicModel @@ -25,12 +24,12 @@ def test_main_battlesnake_integration(): config_path = "configs/battlesnake.yaml" # Read the original config - with open(config_path, "r") as f: + with open(config_path) as f: config = yaml.safe_load(f) # Create a temporary directory for test artifacts with tempfile.TemporaryDirectory() as temp_dir: - temp_config_path = os.path.join(temp_dir, "test_battlesnake.yaml") + temp_config_path = Path(temp_dir) / "test_battlesnake.yaml" # Reduce rounds to 1 for faster testing config["tournament"]["rounds"] = 1 @@ -51,9 +50,7 @@ def wrapper(config, game_context, environment): print(f"Replacing model for agent {agent.name}") # Create DeterministicModel with the specified command deterministic_model = DeterministicModel( - outputs=[ - "```bash\necho 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\n```" - ] + outputs=["```bash\necho 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\n```"] ) agent.agent.model = deterministic_model @@ -66,7 +63,7 @@ def wrapper(config, game_context, environment): # Run the main function with cleanup enabled with patch( - "codeclash.tournaments.pvp_training.get_agent", + "codeclash.tournaments.pvp.get_agent", side_effect=mock_get_agent(get_agent), ): # This should complete without raising any exceptions