From fd1f33938c44e17ad249e27a293cf67f924e71c2 Mon Sep 17 00:00:00 2001
From: John Yang <byjohnyang@gmail.com>
Date: Tue, 26 Aug 2025 23:07:39 +0000
Subject: [PATCH 1/4] Add `sims_per_round` flag

---
 codeclash/agents/minisweagent.py              |  4 +-
 codeclash/constants.py                        |  2 +
 codeclash/games/abstract.py                   | 26 +++++---
 codeclash/games/battlecode/main.py            | 62 ++++++++++--------
 codeclash/games/battlesnake/main.py           | 59 +++++++++--------
 codeclash/games/corewar/main.py               | 13 ++--
 codeclash/games/robocode/main.py              | 16 +++--
 codeclash/games/robotrumble/main.py           | 64 ++++++++++---------
 codeclash/tournaments/abstract.py             | 13 ++--
 codeclash/tournaments/pvp_training.py         | 26 ++++++--
 .../tournaments/single_player_training.py     | 22 +++++--
 codeclash/utils/environment.py                |  8 ++-
 configs/battlecode.yaml                       |  1 +
 configs/battlesnake.yaml                      |  1 +
 configs/battlesnake_dummy.yaml                |  1 +
 configs/battlesnake_single_player.yaml        |  1 +
 configs/corewar.yaml                          |  3 +-
 configs/robocode.yaml                         |  3 +-
 configs/robotrumble.yaml                      |  1 +
 19 files changed, 192 insertions(+), 134 deletions(-)

diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py
index f3fec5d4..d1729628 100644
--- a/codeclash/agents/minisweagent.py
+++ b/codeclash/agents/minisweagent.py
@@ -17,7 +17,7 @@
 
 from codeclash.agents.abstract import Player
 from codeclash.agents.utils import GameContext, resolve_api_key
-from codeclash.utils.environment import copy_file_to_container
+from codeclash.utils.environment import copy_to_container
 
 
 class ClashAgent(DefaultAgent):
@@ -107,7 +107,7 @@ def run(self):
                 result=result,
                 print_fct=self.logger.debug,
             )
-            copy_file_to_container(
+            copy_to_container(
                 self.environment,
                 traj_path,
                 self.game_context.log_env / traj_path.name,
diff --git a/codeclash/constants.py b/codeclash/constants.py
index 19036cc8..db5d9df7 100644
--- a/codeclash/constants.py
+++ b/codeclash/constants.py
@@ -3,4 +3,6 @@
 DIR_LOGS = Path("logs")
 DIR_WORK = Path("/testbed")
 GH_ORG = "emagedoc"
+OUTPUTS_LOGS = "log_outputs"
+OUTPUTS_RESULTS = "result_outputs"
 RESULT_TIE = "Tie"
diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py
index ddbb316f..2daf7f70 100644
--- a/codeclash/games/abstract.py
+++ b/codeclash/games/abstract.py
@@ -7,7 +7,13 @@
 from minisweagent.environments.docker import DockerEnvironment
 
 from codeclash.agents.abstract import Player
-from codeclash.constants import DIR_LOGS, DIR_WORK, GH_ORG
+from codeclash.constants import (
+    DIR_LOGS,
+    DIR_WORK,
+    GH_ORG,
+    OUTPUTS_LOGS,
+    OUTPUTS_RESULTS,
+)
 from codeclash.utils.environment import assert_zero_exit_code, copy_between_containers
 from codeclash.utils.log import get_logger
 
@@ -139,12 +145,12 @@ def _pre_round_setup(self, agents: list[Player]):
 
     @abstractmethod
     def determine_winner(
-        self, result_output: str, agents: list[Player]
+        self, result_outputs: list[str], agents: list[Player]
     ) -> dict[str, str]:
         """Determine the winner of the game based on the result output.
 
         Args:
-            result_output: The specific output containing winning information
+            result_outputs: The specific output(s) containing winning information
             agents: List of agents participating in the round
 
         Returns:
@@ -153,13 +159,13 @@ def determine_winner(
         pass
 
     @abstractmethod
-    def execute_round(self, agents: list[Player]) -> dict[str, str]:
+    def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
         """Subclasses implement their game-specific logic here.
         This is the low level implementation, you probably want to use run_round instead, which
         includes the pre-round setup, post-round setup, and winner determination.
 
         Returns:
-            Dictionary with keys "log_output" and "result_output"
+            Dictionary with keys "log_outputs" and "result_outputs"
         """
         pass
 
@@ -172,14 +178,14 @@ def run_round(self, agents: list[Player]) -> dict[str, str]:
         """
         self._pre_round_setup(agents)
         result = self.execute_round(agents)
-        log_output = result["log_output"]
-        result_output = result["result_output"]
+        log_outputs = result[OUTPUTS_LOGS]
+        result_outputs = result[OUTPUTS_RESULTS]
 
-        winner_result = self.determine_winner(result_output, agents)
+        winner_result = self.determine_winner(result_outputs, agents)
         winner_name = winner_result["winner"]
 
         return {
-            "log_output": log_output,
-            "result_output": result_output,
+            OUTPUTS_LOGS: log_outputs,
+            OUTPUTS_RESULTS: result_outputs,
             "winner": winner_name,
         }
diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py
index 43c26295..9c4b3ac4 100644
--- a/codeclash/games/battlecode/main.py
+++ b/codeclash/games/battlecode/main.py
@@ -1,9 +1,10 @@
 import re
-import shlex
 from pathlib import Path
 from typing import Any
 
-from codeclash.constants import DIR_WORK, RESULT_TIE
+from tqdm.auto import tqdm
+
+from codeclash.constants import DIR_WORK, OUTPUTS_LOGS, OUTPUTS_RESULTS, RESULT_TIE
 from codeclash.games.abstract import CodeGame
 
 
@@ -23,27 +24,30 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
             else:
                 self.run_cmd_round += f" --{arg} {val}"
 
-    def determine_winner(self, result_output: str, agents: list[Any]) -> dict[str, str]:
-        self.logger.debug(f"Determining winner from result output: {result_output}")
-        lines = result_output.strip().split("\n")
-        # Get the third-to-last line which contains the winner info
-        winner_line = lines[-3] if len(lines) >= 3 else ""
-        self.logger.debug(f"Winner line: {winner_line}")
-        match = re.search(r"\s\((.*)\)\swins\s\(", winner_line)
-        if match:
-            winner_key = match.group(1)
-            self.logger.debug(f"Winner key from match: {winner_key}")
-            # Map A/B to actual agent names (much closer to original code)
-            winner = {"A": agents[0].name, "B": agents[1].name}.get(
-                winner_key, RESULT_TIE
-            )
-            self.logger.debug(f"Concluding winner: {winner}")
-            return {"winner": winner}
-        else:
-            self.logger.debug("No winner match found, returning tie")
-            return {"winner": RESULT_TIE}
+    def determine_winner(
+        self, result_outputs: list[str], agents: list[Any]
+    ) -> dict[str, str]:
+        winners = []
+        for ro in result_outputs:
+            lines = ro.strip().split("\n")
+            # Get the third-to-last line which contains the winner info
+            winner_line = lines[-3] if len(lines) >= 3 else ""
+            self.logger.debug(f"Winner line: {winner_line}")
+            match = re.search(r"\s\((.*)\)\swins\s\(", winner_line)
+            if match:
+                winner_key = match.group(1)
+                self.logger.debug(f"Winner key from match: {winner_key}")
+                # Map A/B to actual agent names (much closer to original code)
+                winner = {"A": agents[0].name, "B": agents[1].name}.get(
+                    winner_key, RESULT_TIE
+                )
+                winners.append(winner)
+            else:
+                winners.append(RESULT_TIE)
+        winner = max(set(winners), key=winners.count)
+        return {"winner": winner}
 
-    def execute_round(self, agents: list[Any]) -> dict[str, str]:
+    def execute_round(self, agents: list[Any]) -> dict[str, list[str]]:
         for agent in agents:
             src, dest = f"/{agent.name}/src/mysubmission/", str(
                 DIR_WORK / "src" / agent.name
@@ -53,10 +57,12 @@ def execute_round(self, agents: list[Any]) -> dict[str, str]:
             f"--p{idx+1}-dir src --p{idx+1} {agent.name}"
             for idx, agent in enumerate(agents)
         ]
-        cmd = f"{self.run_cmd_round} {shlex.join(args)}"
+        cmd = f"{self.run_cmd_round} {' '.join(args)}"
         self.logger.info(f"Running command: {cmd}")
-        response = self.environment.execute(cmd)
-        assert response["returncode"] == 0, response
-        # For BattleCode, log_output and result_output are the same
-        output = response["output"]
-        return {"log_output": output, "result_output": output}
+        outputs = []
+        for _ in tqdm(range(self.game_config["sims_per_round"])):
+            response = self.environment.execute(cmd)
+            assert response["returncode"] == 0, response
+            # For BattleCode, log_outputs and result_outputs are the same
+            outputs.append(response["output"])
+        return {OUTPUTS_LOGS: outputs, OUTPUTS_RESULTS: outputs}
diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py
index d3a8187e..aec8b30a 100644
--- a/codeclash/games/battlesnake/main.py
+++ b/codeclash/games/battlesnake/main.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 
 from codeclash.agents.abstract import Player
+from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS
 from codeclash.games.abstract import CodeGame
 from codeclash.utils.environment import assert_zero_exit_code
 
@@ -23,18 +24,20 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
                 self.run_cmd_round += f" --{arg} {val}"
 
     def determine_winner(
-        self, result_output: str, agents: list[Player]
+        self, result_outputs: list[str], agents: list[Player]
     ) -> dict[str, str]:
-        self.logger.debug(f"Determining winner from result output: {result_output}")
-        lines = result_output.strip().split("\n")
-        # Get the last line which contains the game result
-        last_line = lines[-1] if lines else ""
-        self.logger.debug(f"Last line: {last_line}")
-        winner = json.loads(last_line)["winnerName"]
-        self.logger.debug(f"Concluding winner: {winner}")
+        winners = []
+        for ro in result_outputs:
+            lines = ro.strip().split("\n")
+            # Get the last line which contains the game result
+            last_line = lines[-1] if lines else ""
+            self.logger.debug(f"Last line: {last_line}")
+            winner = json.loads(last_line)["winnerName"]
+            winners.append(winner)
+        winner = max(set(winners), key=winners.count)
         return {"winner": winner}
 
-    def execute_round(self, agents: list[Player]) -> dict[str, str]:
+    def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
         cmd = []
         for idx, agent in enumerate(agents):
             port = 8001 + idx
@@ -46,27 +49,33 @@ def execute_round(self, agents: list[Player]) -> dict[str, str]:
 
         time.sleep(3)  # Give servers time to start
 
-        # Create temporary output file for results
-        output_file = f"battlesnake_output_{int(time.time())}.json"
-        cmd_str = " ".join(cmd) + f" -o {output_file}"
-        self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}")
-
         try:
-            response = assert_zero_exit_code(
-                self.environment.execute(
-                    f"{self.run_cmd_round} {cmd_str}",
-                    cwd=f"{self.environment.config.cwd}/game",
+            log_outputs, result_outputs = [], []
+            for idx in range(self.game_config["sims_per_round"]):
+                # Create temporary output file for results
+                output_file = f"battlesnake_output_{idx}_{int(time.time())}.json"
+                cmd_str = " ".join(cmd) + f" -o {output_file}"
+                self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}")
+
+                response = assert_zero_exit_code(
+                    self.environment.execute(
+                        f"{self.run_cmd_round} {cmd_str}",
+                        cwd=f"{self.environment.config.cwd}/game",
+                    )
                 )
-            )
 
-            # Read the output file for result information
-            result_response = self.environment.execute(f"cat game/{output_file}")
-            result_output = result_response["output"]
+                # Read the output file for result information
+                result_response = self.environment.execute(f"cat game/{output_file}")
+                result_output = result_response["output"]
+                log_outputs.append(response["output"])
+                result_outputs.append(result_output)
+
+                # Clean up the output file
+                self.environment.execute(f"rm -f game/{output_file}")
 
-            # Clean up the output file
-            self.environment.execute(f"rm -f game/{output_file}")
+                time.sleep(0.1)
 
-            return {"log_output": response["output"], "result_output": result_output}
+            return {OUTPUTS_LOGS: log_outputs, OUTPUTS_RESULTS: result_outputs}
         finally:
             # Kill all python servers when done
             self.environment.execute("pkill -f 'python main.py' || true")
diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py
index 7e6e5617..59e1f989 100644
--- a/codeclash/games/corewar/main.py
+++ b/codeclash/games/corewar/main.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 
 from codeclash.agents.abstract import Player
+from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS
 from codeclash.games.abstract import CodeGame
 
 
@@ -22,8 +23,9 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
                 self.run_cmd_round += f" -{arg} {val}"
 
     def determine_winner(
-        self, result_output: str, agents: list[Player]
+        self, result_outputs: list[str], agents: list[Player]
     ) -> dict[str, str]:
+        result_output = result_outputs[0]  # Get the first (and only) element
         self.logger.debug(f"Determining winner from result output: {result_output}")
         scores = []
         n = len(agents) * 2
@@ -51,12 +53,13 @@ def determine_winner(
             self.logger.debug("No scores found, returning unknown")
             return {"winner": "unknown"}
 
-    def execute_round(self, agents: list[Player]) -> dict[str, str]:
+    def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
         args = [f"/{agent.name}/warriors/warrior.red" for agent in agents]
         cmd = f"{self.run_cmd_round} {shlex.join(args)}"
+        cmd += f" -r {self.game_config['sims_per_round']}"
         self.logger.info(f"Running command: {cmd}")
         response = self.environment.execute(cmd)
         assert response["returncode"] == 0, response
-        # For CoreWar, log_output and result_output are the same
-        output = response["output"]
-        return {"log_output": output, "result_output": output}
+        # For CoreWar, log_outputs and result_outputs are the same
+        output = [response["output"]]
+        return {OUTPUTS_LOGS: output, OUTPUTS_RESULTS: output}
diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py
index 514e7ab2..ffb69694 100644
--- a/codeclash/games/robocode/main.py
+++ b/codeclash/games/robocode/main.py
@@ -3,8 +3,9 @@
 from pathlib import Path
 
 from codeclash.agents.abstract import Player
+from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS
 from codeclash.games.abstract import CodeGame
-from codeclash.utils.environment import copy_file_to_container
+from codeclash.utils.environment import copy_to_container
 
 
 class RoboCodeGame(CodeGame):
@@ -25,7 +26,7 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
     def _get_battle_config(self) -> str:
         default_battle_config = {
             "battle": {
-                "numRounds": 10,
+                "numRounds": self.game_config.get("sims_per_round", 100),
                 "gunCoolingRate": 0.1,
                 "rules": {"inactivityTime": 450, "hideEnemyNames": True},
             },
@@ -56,12 +57,13 @@ def dict_to_lines(d, prefix=""):
         return "\n".join(battle_lines)
 
     def determine_winner(
-        self, result_output: str, agents: list[Player]
+        self, result_outputs: list[str], agents: list[Player]
     ) -> dict[str, str]:
+        result_output = result_outputs[0]  # Get the first (and only) element
         self.logger.debug(f"Determining winner from result output: {result_output}")
         lines = result_output.strip().split("\n")
         # Get the second line which contains the winner info (closer to original)
-        winner_line = lines[1] if len(lines) >= 2 else ""
+        winner_line = lines[2] if len(lines) >= 3 else ""
         self.logger.debug(f"Winner line: {winner_line}")
         if winner_line:
             winner = winner_line.split()[1].rsplit(".", 1)[0]
@@ -71,7 +73,7 @@ def determine_winner(
             self.logger.debug("No winner line found, returning unknown")
             return {"winner": "unknown"}
 
-    def execute_round(self, agents: list[Player]) -> dict[str, str]:
+    def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
         for agent in agents:
             # Copy the agent codebase into the game codebase and compile it
             for cmd in [
@@ -93,7 +95,7 @@ def execute_round(self, agents: list[Player]) -> dict[str, str]:
 robocode.battle.selectedRobots={selected_robots}
 """
             )
-        copy_file_to_container(self.environment, battle_file, f"battles/{battle_file}")
+        copy_to_container(self.environment, battle_file, f"battles/{battle_file}")
         subprocess.run(f"rm -f {battle_file}", shell=True)
 
         # Run battle with results output to file
@@ -110,4 +112,4 @@ def execute_round(self, agents: list[Player]) -> dict[str, str]:
         # Clean up the results file
         self.environment.execute(f"rm -f {results_file}")
 
-        return {"log_output": response["output"], "result_output": result_output}
+        return {OUTPUTS_LOGS: [response["output"]], OUTPUTS_RESULTS: [result_output]}
diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py
index 8f893ee4..7f174281 100644
--- a/codeclash/games/robotrumble/main.py
+++ b/codeclash/games/robotrumble/main.py
@@ -1,8 +1,9 @@
 import shlex
+from collections import Counter
 from pathlib import Path
 
 from codeclash.agents.abstract import Player
-from codeclash.constants import RESULT_TIE
+from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS, RESULT_TIE
 from codeclash.games.abstract import CodeGame
 
 
@@ -17,36 +18,37 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
         self.run_cmd_round: str = "./rumblebot run term"
 
     def determine_winner(
-        self, result_output: str, agents: list[Player]
+        self, result_outputs: list[str], agents: list[Player]
     ) -> dict[str, str]:
-        self.logger.debug(f"Determining winner from result output: {result_output}")
-        lines = result_output.strip().split("\n")
-        # Get the last 2 lines which contain the game result (same as original)
-        relevant_lines = lines[-2:] if len(lines) >= 2 else lines
-        log_text = "\n".join(relevant_lines)
-        self.logger.debug(f"Relevant lines: {log_text}")
-
-        if "Blue won" in log_text:
-            winner = agents[0].name
-            self.logger.debug(f"Blue won - Concluding winner: {winner}")
-            return {"winner": winner}
-        elif "Red won" in log_text:
-            winner = agents[1].name
-            self.logger.debug(f"Red won - Concluding winner: {winner}")
-            return {"winner": winner}
-        elif "it was a tie" in log_text:
-            self.logger.debug("Game was a tie")
-            return {"winner": RESULT_TIE}
-        else:
-            self.logger.debug("No clear result found, treating as tie")
-            return {"winner": RESULT_TIE}
+        winners = []
+        for ro in result_outputs:
+            lines = ro.strip().split("\n")
+
+            # Get the last 2 lines which contain the game result (same as original)
+            relevant_lines = lines[-2:] if len(lines) >= 2 else lines
+            log_text = "\n".join(relevant_lines)
+
+            if "Blue won" in log_text:
+                winner = agents[0].name
+                winners.append(winner)
+            elif "Red won" in log_text:
+                winner = agents[1].name
+                winners.append(winner)
+            elif "it was a tie" in log_text:
+                winners.append(RESULT_TIE)
+            else:
+                winners.append(RESULT_TIE)
+        print(Counter(winners))
+        winner = max(set(winners), key=winners.count)
+        return {"winner": winner}
 
     def execute_round(self, agents: list[Player]) -> dict[str, str]:
-        args = [f"/{agent.name}/robot.py" for agent in agents]
-        cmd = f"{self.run_cmd_round} {shlex.join(args)}"
-        self.logger.info(f"Running command: {cmd}")
-        response = self.environment.execute(cmd)
-        assert response["returncode"] == 0, response
-        # For RobotRumble, log_output and result_output are the same
-        output = response["output"]
-        return {"log_output": output, "result_output": output}
+        outputs = []
+        for _ in range(self.game_config.get("sims_per_round", 100)):
+            args = [f"/{agent.name}/robot.py" for agent in agents]
+            cmd = f"{self.run_cmd_round} {shlex.join(args)}"
+            response = self.environment.execute(cmd)
+            assert response["returncode"] == 0, response
+            outputs.append(response["output"])
+        # For RobotRumble, log_outputs and result_outputs are the same
+        return {OUTPUTS_LOGS: outputs, OUTPUTS_RESULTS: outputs}
diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py
index d98a9d6c..74e54a7b 100644
--- a/codeclash/tournaments/abstract.py
+++ b/codeclash/tournaments/abstract.py
@@ -3,10 +3,7 @@
 import traceback
 from pathlib import Path
 
-from codeclash.agents import get_agent
-from codeclash.agents.abstract import Player
-from codeclash.agents.utils import GameContext
-from codeclash.constants import DIR_LOGS, DIR_WORK
+from codeclash.constants import DIR_LOGS
 from codeclash.utils.environment import create_file_on_container
 from codeclash.utils.log import get_logger
 
@@ -30,17 +27,17 @@ def __init__(self, config: dict, *, name: str, **kwargs):
     def get_metadata(self) -> dict:
         return self._metadata
 
-    def _copy_game_log_to_agent(self, agent, round_num: int, log_output: str) -> None:
+    def _copy_game_log_to_agent(
+        self, agent, round_num: int, log_output: str, dest_path: str = None
+    ) -> None:
         """Copy round log to agent environment."""
         try:
             create_file_on_container(
                 container=agent.environment,
                 content=log_output,
-                dest_path=f"logs/round_{round_num}.log",
+                dest_path=dest_path if dest_path else f"logs/round_{round_num}.log",
             )
         except Exception:
             self.logger.error(
                 f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}"
             )
-        else:
-            self.logger.info(f"Created round log in {agent.name}'s container.")
diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp_training.py
index ffc223cc..53a8ef8e 100644
--- a/codeclash/tournaments/pvp_training.py
+++ b/codeclash/tournaments/pvp_training.py
@@ -5,10 +5,11 @@
 from codeclash.agents import get_agent
 from codeclash.agents.abstract import Player
 from codeclash.agents.utils import GameContext
-from codeclash.constants import DIR_WORK
+from codeclash.constants import DIR_WORK, OUTPUTS_LOGS, OUTPUTS_RESULTS
 from codeclash.games import get_game
 from codeclash.games.abstract import CodeGame
 from codeclash.tournaments.abstract import AbstractTournament
+from codeclash.utils.environment import copy_to_container
 from codeclash.utils.log import get_logger
 
 
@@ -66,21 +67,34 @@ def run_training_round(self, round_num: int) -> None:
         """Execute a single training round."""
         # Run the game round and get results
         result = self.game.run_round(self.agents)
-        log_output = result["log_output"]
-        result_output = result["result_output"]
+        log_outputs = result[OUTPUTS_LOGS]
+        result_outputs = result[OUTPUTS_RESULTS]
         winner = result["winner"]
 
         # Handle bookkeeping that was previously in the game
         self.scoreboard.append((round_num, winner))
         self.logger.info(f"Round {round_num} winner: {winner}")
 
+        # Create directory for round logs
+        (self.game.log_local / f"round_{round_num}").mkdir(parents=True, exist_ok=True)
+
         # Write log to file
-        round_log_path = self.game.log_local / f"round_{round_num}.log"
-        round_log_path.write_text(log_output)
+        for idx, lo in enumerate(log_outputs):
+            round_log_path = (
+                self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log"
+            )
+            round_log_path.write_text(lo)
 
         # Copy log to agent environments
         for agent in self.agents:
-            self._copy_game_log_to_agent(agent, round_num, log_output)
+            self.logger.info(
+                f"Copying round {round_num} log(s) to {agent.name}'s container..."
+            )
+            copy_to_container(
+                agent.environment,
+                self.game.log_local / f"round_{round_num}",
+                f"logs/round_{round_num}/",
+            )
 
         for agent in self.agents:
             self.run_agent(agent, round_num)
diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player_training.py
index 832d36b4..9d5fbe6f 100644
--- a/codeclash/tournaments/single_player_training.py
+++ b/codeclash/tournaments/single_player_training.py
@@ -8,12 +8,12 @@
 from codeclash.agents.abstract import Player
 from codeclash.agents.dummy import Dummy
 from codeclash.agents.utils import GameContext
-from codeclash.constants import DIR_WORK
+from codeclash.constants import DIR_WORK, OUTPUTS_LOGS
 from codeclash.games import get_game
 from codeclash.games.abstract import CodeGame
 from codeclash.tournaments.abstract import AbstractTournament
 from codeclash.tournaments.utils.git_utils import filter_git_diff
-from codeclash.utils.log import get_logger
+from codeclash.utils.environment import copy_to_container
 
 
 class SinglePlayerTraining(AbstractTournament):
@@ -79,7 +79,7 @@ def run_training_round(self, round_num: int) -> None:
         """Execute a single training round, i.e., run the game, then run the agent."""
         # Run the game round and get results
         result = self.game.run_round([self.agent, self.mirror_agent])
-        log_output = result["log_output"]
+        log_outputs = result[OUTPUTS_LOGS]
         winner = result["winner"]
 
         # Handle bookkeeping that was previously in the game
@@ -87,11 +87,21 @@ def run_training_round(self, round_num: int) -> None:
         self.logger.info(f"Round {round_num} winner: {winner}")
 
         # Write log to file
-        round_log_path = self.game.log_local / f"round_{round_num}.log"
-        round_log_path.write_text(log_output)
+        for idx, lo in enumerate(log_outputs):
+            round_log_path = (
+                self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log"
+            )
+            round_log_path.write_text(lo)
 
         # Copy log to main agent environment only
-        self._copy_game_log_to_agent(self.agent, round_num, log_output)
+        self.logger.info(
+            f"Copying round {round_num} log(s) to {self.agent.name}'s container..."
+        )
+        copy_to_container(
+            self.agent,
+            self.game.log_local / f"round_{round_num}",
+            f"logs/round_{round_num}/",
+        )
 
         self.run_main_agent(round_num)
         mirror_agent_state = round_num - 1 if round_num > 1 else 0
diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py
index 1d6f6aa1..19165661 100644
--- a/codeclash/utils/environment.py
+++ b/codeclash/utils/environment.py
@@ -65,13 +65,15 @@ def copy_between_containers(
             )
 
 
-def copy_file_to_container(
+def copy_to_container(
     container: DockerEnvironment,
     src_path: str | Path,
     dest_path: str | Path,
 ):
     """
-    Copy a file from the local filesystem to a Docker container.
+    Copy a file or directory from the local filesystem to a Docker container.
+
+    The copy operation is recursive for directories.
     """
     if not str(dest_path).startswith("/"):
         # If not an absolute path, assume relative to container's cwd
@@ -130,6 +132,6 @@ def create_file_on_container(
         tmp_file_path = Path(tmp_file.name)
 
     try:
-        copy_file_to_container(container, tmp_file_path, dest_path)
+        copy_to_container(container, tmp_file_path, dest_path)
     finally:
         tmp_file_path.unlink()  # Clean up the temporary file
diff --git a/configs/battlecode.yaml b/configs/battlecode.yaml
index ed6386cf..74f1c1c5 100644
--- a/configs/battlecode.yaml
+++ b/configs/battlecode.yaml
@@ -1,5 +1,6 @@
 game:
   name: BattleCode
+  sims_per_round: 2
   args:
     maps: quack
 tournament:
diff --git a/configs/battlesnake.yaml b/configs/battlesnake.yaml
index 9016df1e..1d14af29 100644
--- a/configs/battlesnake.yaml
+++ b/configs/battlesnake.yaml
@@ -6,6 +6,7 @@ game:
     browser: false
 tournament:
   rounds: 2
+  sims_per_round: 10
 players:
 - agent: mini
   name: p1
diff --git a/configs/battlesnake_dummy.yaml b/configs/battlesnake_dummy.yaml
index 05a8d4b4..65c6da12 100644
--- a/configs/battlesnake_dummy.yaml
+++ b/configs/battlesnake_dummy.yaml
@@ -1,5 +1,6 @@
 game:
   name: BattleSnake
+  sims_per_round: 10
   args:
     width: 11
     height: 11
diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml
index e5f6f370..e1d0dea7 100644
--- a/configs/battlesnake_single_player.yaml
+++ b/configs/battlesnake_single_player.yaml
@@ -7,6 +7,7 @@ game:
 tournament:
   rounds: 1
   evaluate_matrix: true
+  sims_per_round: 10
 player:
   agent: mini
   config: configs/mini/default.yaml
diff --git a/configs/corewar.yaml b/configs/corewar.yaml
index daaf3e2e..4fe68ad7 100644
--- a/configs/corewar.yaml
+++ b/configs/corewar.yaml
@@ -1,7 +1,6 @@
 game:
   name: CoreWar
-  args:
-    r: 100
+  sims_per_round: 10
 tournament:
   rounds: 3
 players:
diff --git a/configs/robocode.yaml b/configs/robocode.yaml
index c401cdfd..c1e1c90c 100644
--- a/configs/robocode.yaml
+++ b/configs/robocode.yaml
@@ -1,8 +1,8 @@
 game:
   name: RoboCode
+  sims_per_round: 10
   battle:
     battle:
-      numRounds: 10
       gunCoolingRate: 0.1
       rules:
         inactivityTime: 450
@@ -15,6 +15,7 @@ game:
     nosound: true
 tournament:
   rounds: 3
+  sims_per_round: 10
 players:
 - agent: dummy
   name: p1
diff --git a/configs/robotrumble.yaml b/configs/robotrumble.yaml
index 84e9e06f..a6fac21b 100644
--- a/configs/robotrumble.yaml
+++ b/configs/robotrumble.yaml
@@ -1,5 +1,6 @@
 game:
   name: RobotRumble
+  sims_per_round: 10
 tournament:
   rounds: 3
 players:

From c70fa5c139b3f511b70deae7900cc5c46415b312 Mon Sep 17 00:00:00 2001
From: John Yang <byjohnyang@gmail.com>
Date: Tue, 26 Aug 2025 23:28:21 +0000
Subject: [PATCH 2/4] Minor fix

---
 configs/battlesnake.yaml               | 2 +-
 configs/battlesnake_single_player.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/battlesnake.yaml b/configs/battlesnake.yaml
index 1d14af29..daee02ce 100644
--- a/configs/battlesnake.yaml
+++ b/configs/battlesnake.yaml
@@ -1,12 +1,12 @@
 game:
   name: BattleSnake
+  sims_per_round: 10
   args:
     width: 11
     height: 11
     browser: false
 tournament:
   rounds: 2
-  sims_per_round: 10
 players:
 - agent: mini
   name: p1
diff --git a/configs/battlesnake_single_player.yaml b/configs/battlesnake_single_player.yaml
index e1d0dea7..66d91f1a 100644
--- a/configs/battlesnake_single_player.yaml
+++ b/configs/battlesnake_single_player.yaml
@@ -1,5 +1,6 @@
 game:
   name: BattleSnake
+  sims_per_round: 10
   args:
     width: 11
     height: 11
@@ -7,7 +8,6 @@ game:
 tournament:
   rounds: 1
   evaluate_matrix: true
-  sims_per_round: 10
 player:
   agent: mini
   config: configs/mini/default.yaml

From fb84f4f65c3bb6bd404b765089b62da6e386bebf Mon Sep 17 00:00:00 2001
From: John Yang <byjohnyang@gmail.com>
Date: Wed, 27 Aug 2025 20:06:28 +0000
Subject: [PATCH 3/4] Added dataclasses for return type; added score tracking
 for games

---
 codeclash/agents/abstract.py                  |  4 +-
 codeclash/constants.py                        |  2 -
 codeclash/games/abstract.py                   | 61 +++++++++++--------
 codeclash/games/battlecode/main.py            | 20 +++---
 codeclash/games/battlesnake/main.py           | 42 +++++++------
 codeclash/games/corewar/main.py               | 38 ++++++------
 codeclash/games/robocode/main.py              | 56 +++++++++--------
 codeclash/games/robotrumble/main.py           | 32 ++++++----
 codeclash/tournaments/abstract.py             |  8 ++-
 .../tournaments/{pvp_training.py => pvp.py}   | 21 +++----
 ...le_player_training.py => single_player.py} | 20 +++---
 codeclash/utils/environment.py                |  2 +-
 main.py                                       |  4 +-
 tests/test_integration.py                     |  2 +-
 14 files changed, 165 insertions(+), 147 deletions(-)
 rename codeclash/tournaments/{pvp_training.py => pvp.py} (85%)
 rename codeclash/tournaments/{single_player_training.py => single_player.py} (91%)

diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py
index ed47b8da..5d30b652 100644
--- a/codeclash/agents/abstract.py
+++ b/codeclash/agents/abstract.py
@@ -8,7 +8,7 @@
 from codeclash.agents.utils import GameContext
 from codeclash.constants import GH_ORG
 from codeclash.tournaments.utils.git_utils import filter_git_diff
-from codeclash.utils.environment import assert_zero_exit_code, create_file_on_container
+from codeclash.utils.environment import assert_zero_exit_code, create_file_in_container
 from codeclash.utils.log import get_logger
 
 load_dotenv()
@@ -101,7 +101,7 @@ def reset_and_apply_patch(
             self.logger.debug("No patch to apply, skipping")
             return
 
-        create_file_on_container(
+        create_file_in_container(
             container=self.environment,  # type: ignore
             content=patch,
             dest_path="tmp_patch.txt",
diff --git a/codeclash/constants.py b/codeclash/constants.py
index db5d9df7..19036cc8 100644
--- a/codeclash/constants.py
+++ b/codeclash/constants.py
@@ -3,6 +3,4 @@
 DIR_LOGS = Path("logs")
 DIR_WORK = Path("/testbed")
 GH_ORG = "emagedoc"
-OUTPUTS_LOGS = "log_outputs"
-OUTPUTS_RESULTS = "result_outputs"
 RESULT_TIE = "Tie"
diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py
index 2daf7f70..fbbfefdf 100644
--- a/codeclash/games/abstract.py
+++ b/codeclash/games/abstract.py
@@ -2,22 +2,42 @@
 import os
 import subprocess
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
 from pathlib import Path
+from typing import Any
 
 from minisweagent.environments.docker import DockerEnvironment
 
 from codeclash.agents.abstract import Player
-from codeclash.constants import (
-    DIR_LOGS,
-    DIR_WORK,
-    GH_ORG,
-    OUTPUTS_LOGS,
-    OUTPUTS_RESULTS,
-)
+from codeclash.constants import DIR_LOGS, DIR_WORK, GH_ORG
 from codeclash.utils.environment import assert_zero_exit_code, copy_between_containers
 from codeclash.utils.log import get_logger
 
 
+@dataclass
+class RoundStats:
+    winner: str
+    scores: dict[
+        str, float
+    ]  # Map of player to game metric (e.g. # of wins, assets accumulated)
+    details: dict[str, Any] = None  # Optional, for game-specific info
+
+    def __str__(self) -> str:
+        return "\n".join([f"- Winner: {self.winner}", f"- Scores: {self.scores}"])
+
+
+@dataclass
+class RoundData:
+    logs: list[str]
+    results: list[str]
+
+
+@dataclass
+class RoundRecord:
+    data: RoundData
+    stats: RoundStats
+
+
 class CodeGame(ABC):
     name: str
 
@@ -144,9 +164,7 @@ def _pre_round_setup(self, agents: list[Player]):
         )
 
     @abstractmethod
-    def determine_winner(
-        self, result_outputs: list[str], agents: list[Player]
-    ) -> dict[str, str]:
+    def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats:
         """Determine the winner of the game based on the result output.
 
         Args:
@@ -154,22 +172,22 @@ def determine_winner(
             agents: List of agents participating in the round
 
         Returns:
-            Dictionary with key "winner" containing the winner's name
+            RoundStats object
         """
         pass
 
     @abstractmethod
-    def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
+    def execute_round(self, agents: list[Player]) -> RoundData:
         """Subclasses implement their game-specific logic here.
         This is the low level implementation, you probably want to use run_round instead, which
         includes the pre-round setup, post-round setup, and winner determination.
 
         Returns:
-            Dictionary with keys "log_outputs" and "result_outputs"
+            RoundData object
         """
         pass
 
-    def run_round(self, agents: list[Player]) -> dict[str, str]:
+    def run_round(self, agents: list[Player]) -> RoundRecord:
         """
         Run a single round of the game with the given agents.
 
@@ -177,15 +195,6 @@ def run_round(self, agents: list[Player]) -> dict[str, str]:
         handled by the tournament class.
         """
         self._pre_round_setup(agents)
-        result = self.execute_round(agents)
-        log_outputs = result[OUTPUTS_LOGS]
-        result_outputs = result[OUTPUTS_RESULTS]
-
-        winner_result = self.determine_winner(result_outputs, agents)
-        winner_name = winner_result["winner"]
-
-        return {
-            OUTPUTS_LOGS: log_outputs,
-            OUTPUTS_RESULTS: result_outputs,
-            "winner": winner_name,
-        }
+        data = self.execute_round(agents)
+        stats = self.get_stats(data.results, agents)
+        return RoundRecord(data=data, stats=stats)
diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py
index 9c4b3ac4..f2cf4ca2 100644
--- a/codeclash/games/battlecode/main.py
+++ b/codeclash/games/battlecode/main.py
@@ -4,8 +4,8 @@
 
 from tqdm.auto import tqdm
 
-from codeclash.constants import DIR_WORK, OUTPUTS_LOGS, OUTPUTS_RESULTS, RESULT_TIE
-from codeclash.games.abstract import CodeGame
+from codeclash.constants import DIR_WORK, RESULT_TIE
+from codeclash.games.abstract import CodeGame, RoundData, RoundStats
 
 
 class BattleCodeGame(CodeGame):
@@ -24,9 +24,7 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
             else:
                 self.run_cmd_round += f" --{arg} {val}"
 
-    def determine_winner(
-        self, result_outputs: list[str], agents: list[Any]
-    ) -> dict[str, str]:
+    def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats:
         winners = []
         for ro in result_outputs:
             lines = ro.strip().split("\n")
@@ -44,10 +42,12 @@ def determine_winner(
                 winners.append(winner)
             else:
                 winners.append(RESULT_TIE)
-        winner = max(set(winners), key=winners.count)
-        return {"winner": winner}
+        return RoundStats(
+            winner=max(set(winners), key=winners.count),
+            scores={agent.name: winners.count(agent.name) for agent in agents},
+        )
 
-    def execute_round(self, agents: list[Any]) -> dict[str, list[str]]:
+    def execute_round(self, agents: list[Any]) -> RoundData:
         for agent in agents:
             src, dest = f"/{agent.name}/src/mysubmission/", str(
                 DIR_WORK / "src" / agent.name
@@ -58,11 +58,11 @@ def execute_round(self, agents: list[Any]) -> dict[str, list[str]]:
             for idx, agent in enumerate(agents)
         ]
         cmd = f"{self.run_cmd_round} {' '.join(args)}"
-        self.logger.info(f"Running command: {cmd}")
+        self.logger.info(f"Running game: {cmd}")
         outputs = []
         for _ in tqdm(range(self.game_config["sims_per_round"])):
             response = self.environment.execute(cmd)
             assert response["returncode"] == 0, response
             # For BattleCode, log_outputs and result_outputs are the same
             outputs.append(response["output"])
-        return {OUTPUTS_LOGS: outputs, OUTPUTS_RESULTS: outputs}
+        return RoundData(logs=outputs, results=outputs)
diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py
index aec8b30a..c0c93c5b 100644
--- a/codeclash/games/battlesnake/main.py
+++ b/codeclash/games/battlesnake/main.py
@@ -2,9 +2,11 @@
 import time
 from pathlib import Path
 
+from tqdm.auto import tqdm
+
 from codeclash.agents.abstract import Player
-from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS
-from codeclash.games.abstract import CodeGame
+from codeclash.constants import RESULT_TIE
+from codeclash.games.abstract import CodeGame, RoundData, RoundStats
 from codeclash.utils.environment import assert_zero_exit_code
 
 
@@ -23,21 +25,25 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
             else:
                 self.run_cmd_round += f" --{arg} {val}"
 
-    def determine_winner(
-        self, result_outputs: list[str], agents: list[Player]
-    ) -> dict[str, str]:
+    def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats:
         winners = []
         for ro in result_outputs:
             lines = ro.strip().split("\n")
-            # Get the last line which contains the game result
-            last_line = lines[-1] if lines else ""
-            self.logger.debug(f"Last line: {last_line}")
+            last_line = (
+                lines[-1] if lines else ""
+            )  # Get the last line which contains the game result
             winner = json.loads(last_line)["winnerName"]
             winners.append(winner)
-        winner = max(set(winners), key=winners.count)
-        return {"winner": winner}
 
-    def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
+        win_counts = {agent.name: winners.count(agent.name) for agent in agents}
+        max_wins = max(win_counts.values())
+        winners = [name for name, wins in win_counts.items() if wins == max_wins]
+        return RoundStats(
+            winner=RESULT_TIE if len(winners) > 1 else winners[0],
+            scores=win_counts,
+        )
+
+    def execute_round(self, agents: list[Player]) -> RoundData:
         cmd = []
         for idx, agent in enumerate(agents):
             port = 8001 + idx
@@ -51,15 +57,16 @@ def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
 
         try:
             log_outputs, result_outputs = [], []
-            for idx in range(self.game_config["sims_per_round"]):
+            cmd = self.run_cmd_round + " " + " ".join(cmd)
+            self.logger.info(f"Running game: {cmd}")
+            for idx in tqdm(range(self.game_config["sims_per_round"])):
                 # Create temporary output file for results
                 output_file = f"battlesnake_output_{idx}_{int(time.time())}.json"
-                cmd_str = " ".join(cmd) + f" -o {output_file}"
-                self.logger.info(f"Running command: {self.run_cmd_round} {cmd_str}")
 
+                # Run game
                 response = assert_zero_exit_code(
                     self.environment.execute(
-                        f"{self.run_cmd_round} {cmd_str}",
+                        cmd + f" -o {output_file}",
                         cwd=f"{self.environment.config.cwd}/game",
                     )
                 )
@@ -72,10 +79,9 @@ def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
 
                 # Clean up the output file
                 self.environment.execute(f"rm -f game/{output_file}")
+                time.sleep(0.05)
 
-                time.sleep(0.1)
-
-            return {OUTPUTS_LOGS: log_outputs, OUTPUTS_RESULTS: result_outputs}
+            return RoundData(log_outputs, result_outputs)
         finally:
             # Kill all python servers when done
             self.environment.execute("pkill -f 'python main.py' || true")
diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py
index 59e1f989..a082a02c 100644
--- a/codeclash/games/corewar/main.py
+++ b/codeclash/games/corewar/main.py
@@ -3,8 +3,7 @@
 from pathlib import Path
 
 from codeclash.agents.abstract import Player
-from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS
-from codeclash.games.abstract import CodeGame
+from codeclash.games.abstract import CodeGame, RoundData, RoundStats
 
 
 class CoreWarGame(CodeGame):
@@ -22,44 +21,43 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
             else:
                 self.run_cmd_round += f" -{arg} {val}"
 
-    def determine_winner(
-        self, result_outputs: list[str], agents: list[Player]
-    ) -> dict[str, str]:
+    def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats:
         result_output = result_outputs[0]  # Get the first (and only) element
         self.logger.debug(f"Determining winner from result output: {result_output}")
         scores = []
         n = len(agents) * 2
         lines = result_output.strip().split("\n")
+
         # Get the last n lines which contain the scores (closer to original)
         relevant_lines = lines[-n:] if len(lines) >= n else lines
+        relevant_lines = [l for l in relevant_lines if len(l.strip()) > 0]
         self.logger.debug(f"Relevant lines for scoring: {relevant_lines}")
 
+        # Go through each line; we assume score position is correlated with agent index
         for line in relevant_lines:
             match = re.search(r".*\sby\s.*\sscores\s(\d+)", line)
             if match:
                 score = int(match.group(1))
                 scores.append(score)
-                self.logger.debug(f"Found score: {score} from line: {line}")
 
-        self.logger.debug(f"All scores: {scores}")
         if scores:
-            max_score_index = scores.index(max(scores))
-            winner = agents[max_score_index].name
-            self.logger.debug(
-                f"Concluding winner: {winner} with index {max_score_index}"
+            if len(scores) != len(agents):
+                self.logger.error(f"Have {len(scores)} scores but {len(agents)} agents")
+            return RoundStats(
+                winner=agents[scores.index(max(scores))].name,
+                scores={agent.name: score for agent, score in zip(agents, scores)},
+                details={"stdout": "\n".join(relevant_lines)},
             )
-            return {"winner": winner}
         else:
             self.logger.debug("No scores found, returning unknown")
-            return {"winner": "unknown"}
+            return RoundStats(
+                winner="unknown", scores={agent.name: 0 for agent in agents}
+            )
 
-    def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
+    def execute_round(self, agents: list[Player]) -> RoundData:
         args = [f"/{agent.name}/warriors/warrior.red" for agent in agents]
-        cmd = f"{self.run_cmd_round} {shlex.join(args)}"
-        cmd += f" -r {self.game_config['sims_per_round']}"
-        self.logger.info(f"Running command: {cmd}")
+        cmd = f"{self.run_cmd_round} {shlex.join(args)} -r {self.game_config['sims_per_round']}"
+        self.logger.info(f"Running game: {cmd}")
         response = self.environment.execute(cmd)
         assert response["returncode"] == 0, response
-        # For CoreWar, log_outputs and result_outputs are the same
-        output = [response["output"]]
-        return {OUTPUTS_LOGS: output, OUTPUTS_RESULTS: output}
+        return RoundData([response["output"]], [response["output"]])
diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py
index ffb69694..880bde96 100644
--- a/codeclash/games/robocode/main.py
+++ b/codeclash/games/robocode/main.py
@@ -1,11 +1,10 @@
-import subprocess
+import re
 import time
 from pathlib import Path
 
 from codeclash.agents.abstract import Player
-from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS
-from codeclash.games.abstract import CodeGame
-from codeclash.utils.environment import copy_to_container
+from codeclash.games.abstract import CodeGame, RoundData, RoundStats
+from codeclash.utils.environment import create_file_in_container
 
 
 class RoboCodeGame(CodeGame):
@@ -56,24 +55,29 @@ def dict_to_lines(d, prefix=""):
         dict_to_lines(default_battle_config)
         return "\n".join(battle_lines)
 
-    def determine_winner(
-        self, result_outputs: list[str], agents: list[Player]
-    ) -> dict[str, str]:
+    def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats:
         result_output = result_outputs[0]  # Get the first (and only) element
         self.logger.debug(f"Determining winner from result output: {result_output}")
         lines = result_output.strip().split("\n")
-        # Get the second line which contains the winner info (closer to original)
-        winner_line = lines[2] if len(lines) >= 3 else ""
-        self.logger.debug(f"Winner line: {winner_line}")
-        if winner_line:
-            winner = winner_line.split()[1].rsplit(".", 1)[0]
-            self.logger.debug(f"Concluding winner: {winner}")
-            return {"winner": winner}
-        else:
-            self.logger.debug("No winner line found, returning unknown")
-            return {"winner": "unknown"}
-
-    def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
+
+        scores = {}
+        for line in lines:
+            line = line.strip()
+            if not re.match(r"^\d", line):
+                continue
+            match = re.search(r"(\d+)\S+\:\s(\S+)\s+(\d+)", line)
+            if match:
+                player = match.group(2).rsplit(".", 1)[0]
+                score = int(match.group(3))
+                scores[player] = score
+                if int(match.group(1)) == 1:
+                    winner = player
+
+        return RoundStats(
+            winner=winner, scores=scores, details={"stdout": "\n".join(lines)}
+        )
+
+    def execute_round(self, agents: list[Player]) -> RoundData:
         for agent in agents:
             # Copy the agent codebase into the game codebase and compile it
             for cmd in [
@@ -88,20 +92,18 @@ def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
         selected_robots = ",".join([f"{agent.name}.MyTank*" for agent in agents])
         # Use timestamp for unique battle file name since rounds are managed by tournament
         battle_file = f"{self.game_id}-battle{int(time.time())}.battle"
-        with open(battle_file, "w") as f:
-            f.write(
-                f"""#Battle Properties
+        battle_content = f"""#Battle Properties
 {self._get_battle_config()}
 robocode.battle.selectedRobots={selected_robots}
 """
-            )
-        copy_to_container(self.environment, battle_file, f"battles/{battle_file}")
-        subprocess.run(f"rm -f {battle_file}", shell=True)
+        create_file_in_container(
+            self.environment, content=battle_content, dest_path=f"battles/{battle_file}"
+        )
 
         # Run battle with results output to file
         results_file = f"results_{int(time.time())}.txt"
         cmd = f"{self.run_cmd_round} -battle {battle_file} -results {results_file}"
-        self.logger.info(f"Running command: {cmd}")
+        self.logger.info(f"Running game: {cmd}")
         response = self.environment.execute(cmd)
         assert response["returncode"] == 0, response
 
@@ -112,4 +114,4 @@ def execute_round(self, agents: list[Player]) -> dict[str, list[str]]:
         # Clean up the results file
         self.environment.execute(f"rm -f {results_file}")
 
-        return {OUTPUTS_LOGS: [response["output"]], OUTPUTS_RESULTS: [result_output]}
+        return RoundData([response["output"]], [result_output])
diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py
index 7f174281..56a4febb 100644
--- a/codeclash/games/robotrumble/main.py
+++ b/codeclash/games/robotrumble/main.py
@@ -3,8 +3,8 @@
 from pathlib import Path
 
 from codeclash.agents.abstract import Player
-from codeclash.constants import OUTPUTS_LOGS, OUTPUTS_RESULTS, RESULT_TIE
-from codeclash.games.abstract import CodeGame
+from codeclash.constants import RESULT_TIE
+from codeclash.games.abstract import CodeGame, RoundData, RoundStats
 
 
 class RobotRumbleGame(CodeGame):
@@ -17,9 +17,7 @@ def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
         assert len(config["players"]) == 2, "RobotRumble is a two-player game"
         self.run_cmd_round: str = "./rumblebot run term"
 
-    def determine_winner(
-        self, result_outputs: list[str], agents: list[Player]
-    ) -> dict[str, str]:
+    def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundStats:
         winners = []
         for ro in result_outputs:
             lines = ro.strip().split("\n")
@@ -38,17 +36,27 @@ def determine_winner(
                 winners.append(RESULT_TIE)
             else:
                 winners.append(RESULT_TIE)
-        print(Counter(winners))
-        winner = max(set(winners), key=winners.count)
-        return {"winner": winner}
 
-    def execute_round(self, agents: list[Player]) -> dict[str, str]:
+        # Count occurrences of each winner
+        counts = Counter(winners)
+
+        # Find all winners with the maximum count
+        max_count = max(counts.values())
+        top_winners = [w for w, c in counts.items() if c == max_count]
+
+        # If multiple winners have the same count, return RESULT_TIE
+        final_winner = RESULT_TIE if len(top_winners) > 1 else top_winners[0]
+
+        return RoundStats(winner=final_winner, scores=dict(counts))
+
+    def execute_round(self, agents: list[Player]) -> RoundData:
         outputs = []
+        args = [f"/{agent.name}/robot.py" for agent in agents]
+        cmd = f"{self.run_cmd_round} {shlex.join(args)}"
+        self.logger.info(f"Running game: {cmd}")
         for _ in range(self.game_config.get("sims_per_round", 100)):
-            args = [f"/{agent.name}/robot.py" for agent in agents]
-            cmd = f"{self.run_cmd_round} {shlex.join(args)}"
             response = self.environment.execute(cmd)
             assert response["returncode"] == 0, response
             outputs.append(response["output"])
         # For RobotRumble, log_outputs and result_outputs are the same
-        return {OUTPUTS_LOGS: outputs, OUTPUTS_RESULTS: outputs}
+        return RoundData(logs=outputs, results=outputs)
diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py
index 74e54a7b..0c5784be 100644
--- a/codeclash/tournaments/abstract.py
+++ b/codeclash/tournaments/abstract.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 
 from codeclash.constants import DIR_LOGS
-from codeclash.utils.environment import create_file_on_container
+from codeclash.utils.environment import create_file_in_container
 from codeclash.utils.log import get_logger
 
 
@@ -12,7 +12,9 @@ class AbstractTournament:
     def __init__(self, config: dict, *, name: str, **kwargs):
         self.config: dict = config
         self.name: str = name
-        self.tournament_id: str = f"{self.name}{time.strftime('%y%m%d%H%M%S')}"
+        self.tournament_id: str = (
+            f"{self.name}.{config['game']['name']}.{time.strftime('%y%m%d%H%M%S')}"
+        )
         self.local_output_dir: Path = (
             DIR_LOGS / getpass.getuser() / self.tournament_id
         ).resolve()
@@ -32,7 +34,7 @@ def _copy_game_log_to_agent(
     ) -> None:
         """Copy round log to agent environment."""
         try:
-            create_file_on_container(
+            create_file_in_container(
                 container=agent.environment,
                 content=log_output,
                 dest_path=dest_path if dest_path else f"logs/round_{round_num}.log",
diff --git a/codeclash/tournaments/pvp_training.py b/codeclash/tournaments/pvp.py
similarity index 85%
rename from codeclash/tournaments/pvp_training.py
rename to codeclash/tournaments/pvp.py
index 53a8ef8e..dd670de1 100644
--- a/codeclash/tournaments/pvp_training.py
+++ b/codeclash/tournaments/pvp.py
@@ -5,19 +5,19 @@
 from codeclash.agents import get_agent
 from codeclash.agents.abstract import Player
 from codeclash.agents.utils import GameContext
-from codeclash.constants import DIR_WORK, OUTPUTS_LOGS, OUTPUTS_RESULTS
+from codeclash.constants import DIR_WORK
 from codeclash.games import get_game
-from codeclash.games.abstract import CodeGame
+from codeclash.games.abstract import CodeGame, RoundStats
 from codeclash.tournaments.abstract import AbstractTournament
 from codeclash.utils.environment import copy_to_container
 from codeclash.utils.log import get_logger
 
 
-class PvpTraining(AbstractTournament):
+class PvpTournament(AbstractTournament):
     def __init__(
         self, config: dict, *, cleanup: bool = False, push_agent: bool = False
     ):
-        super().__init__(config, name="PvpTraining")
+        super().__init__(config, name="PvpTournament")
         self.cleanup_on_end = cleanup
         self.push_agent = push_agent
         self.game: CodeGame = get_game(
@@ -29,7 +29,7 @@ def __init__(
         for agent_conf in self.config["players"]:
             self.agents.append(self.get_agent(agent_conf, self.config["prompts"]))
         self.logger = get_logger(self.game.name)
-        self.scoreboard: list[tuple[int, str]] = []
+        self.scoreboard: list[RoundStats] = []
 
     @property
     def rounds(self) -> int:
@@ -66,20 +66,17 @@ def run(self) -> None:
     def run_training_round(self, round_num: int) -> None:
         """Execute a single training round."""
         # Run the game round and get results
-        result = self.game.run_round(self.agents)
-        log_outputs = result[OUTPUTS_LOGS]
-        result_outputs = result[OUTPUTS_RESULTS]
-        winner = result["winner"]
+        record = self.game.run_round(self.agents)
 
         # Handle bookkeeping that was previously in the game
-        self.scoreboard.append((round_num, winner))
-        self.logger.info(f"Round {round_num} winner: {winner}")
+        self.scoreboard.append(record.stats)
+        self.logger.info(f"Round {round_num}:\n{record.stats}")
 
         # Create directory for round logs
         (self.game.log_local / f"round_{round_num}").mkdir(parents=True, exist_ok=True)
 
         # Write log to file
-        for idx, lo in enumerate(log_outputs):
+        for idx, lo in enumerate(record.data.logs):
             round_log_path = (
                 self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log"
             )
diff --git a/codeclash/tournaments/single_player_training.py b/codeclash/tournaments/single_player.py
similarity index 91%
rename from codeclash/tournaments/single_player_training.py
rename to codeclash/tournaments/single_player.py
index 9d5fbe6f..651f559c 100644
--- a/codeclash/tournaments/single_player_training.py
+++ b/codeclash/tournaments/single_player.py
@@ -8,9 +8,9 @@
 from codeclash.agents.abstract import Player
 from codeclash.agents.dummy import Dummy
 from codeclash.agents.utils import GameContext
-from codeclash.constants import DIR_WORK, OUTPUTS_LOGS
+from codeclash.constants import DIR_WORK
 from codeclash.games import get_game
-from codeclash.games.abstract import CodeGame
+from codeclash.games.abstract import CodeGame, RoundStats
 from codeclash.tournaments.abstract import AbstractTournament
 from codeclash.tournaments.utils.git_utils import filter_git_diff
 from codeclash.utils.environment import copy_to_container
@@ -29,7 +29,7 @@ def __init__(self, config: dict, cleanup: bool = False):
         mirror_agent_config = copy.deepcopy(self.config["player"])
         mirror_agent_config["name"] = "mirror"
         self.mirror_agent: Player = self.get_agent(mirror_agent_config, round=0)
-        self.scoreboard: list[tuple[int, str]] = []
+        self.scoreboard: list[RoundStats] = []
 
     @property
     def rounds(self) -> int:
@@ -78,16 +78,14 @@ def run(self):
     def run_training_round(self, round_num: int) -> None:
         """Execute a single training round, i.e., run the game, then run the agent."""
         # Run the game round and get results
-        result = self.game.run_round([self.agent, self.mirror_agent])
-        log_outputs = result[OUTPUTS_LOGS]
-        winner = result["winner"]
+        record = self.game.run_round([self.agent, self.mirror_agent])
 
         # Handle bookkeeping that was previously in the game
-        self.scoreboard.append((round_num, winner))
-        self.logger.info(f"Round {round_num} winner: {winner}")
+        self.scoreboard.append(record.stats)
+        self.logger.info(f"Round {round_num}:\n{record.stats}")
 
         # Write log to file
-        for idx, lo in enumerate(log_outputs):
+        for idx, lo in enumerate(record.logs):
             round_log_path = (
                 self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log"
             )
@@ -154,8 +152,8 @@ def evaluate(self, n_repetitions: int = 3):
                 p1.reset_and_apply_patch(p1_patch)
                 p2.reset_and_apply_patch(p2_patch)
                 for i_repetition in range(n_repetitions):
-                    result = self.game.run_round([p1, p2])
-                    winner = result["winner"]
+                    record = self.game.run_round([p1, p2])
+                    winner = record.stats.winner
                     self.logger.info(
                         f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}"
                     )
diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py
index 19165661..b6b84c81 100644
--- a/codeclash/utils/environment.py
+++ b/codeclash/utils/environment.py
@@ -117,7 +117,7 @@ def copy_file_from_container(
     return result
 
 
-def create_file_on_container(
+def create_file_in_container(
     container: DockerEnvironment,
     *,
     content: str,
diff --git a/main.py b/main.py
index 14245d38..4984870a 100644
--- a/main.py
+++ b/main.py
@@ -2,13 +2,13 @@
 
 import yaml
 
-from codeclash.tournaments.pvp_training import PvpTraining
+from codeclash.tournaments.pvp import PvpTournament
 
 
 def main(config_path: str, *, cleanup: bool = False, push_agent: bool = False):
     with open(config_path, "r") as f:
         config = yaml.safe_load(f)
-    training = PvpTraining(config, cleanup=cleanup, push_agent=push_agent)
+    training = PvpTournament(config, cleanup=cleanup, push_agent=push_agent)
     training.run()
 
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 52fa4676..feddc06d 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -66,7 +66,7 @@ def wrapper(config, game_context, environment):
 
         # Run the main function with cleanup enabled
         with patch(
-            "codeclash.tournaments.pvp_training.get_agent",
+            "codeclash.tournaments.pvp.get_agent",
             side_effect=mock_get_agent(get_agent),
         ):
             # This should complete without raising any exceptions

From 94556826255846f84c7832e3128b03d65ca2f3c3 Mon Sep 17 00:00:00 2001
From: John Yang <byjohnyang@gmail.com>
Date: Wed, 27 Aug 2025 20:42:36 +0000
Subject: [PATCH 4/4] Move from black/isort to ruff linting

---
 .pre-commit-config.yaml                |  31 +-
 codeclash/agents/__init__.py           |   4 +-
 codeclash/agents/abstract.py           |  34 +--
 codeclash/agents/minisweagent.py       |  13 +-
 codeclash/agents/utils.py              |   6 +-
 codeclash/games/abstract.py            |  12 +-
 codeclash/games/battlecode/main.py     |  17 +-
 codeclash/games/battlesnake/main.py    |  12 +-
 codeclash/games/corewar/main.py        |   8 +-
 codeclash/games/robocode/main.py       |  12 +-
 codeclash/games/robotrumble/main.py    |   4 +-
 codeclash/tournaments/abstract.py      |  20 +-
 codeclash/tournaments/pvp.py           |  16 +-
 codeclash/tournaments/single_player.py |  31 +-
 codeclash/utils/environment.py         |  16 +-
 codeclash/utils/log.py                 |   4 +-
 codeclash/viewer/app.py                |  30 +-
 codeclash/viewer/static/css/style.css  |  21 +-
 codeclash/viewer/static/js/app.js      | 403 +++++++++++++------------
 main.py                                |   2 +-
 main_single_player.py                  |   2 +-
 pyproject.toml                         | 145 +++++++++
 tests/test_integration.py              |  11 +-
 23 files changed, 464 insertions(+), 390 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3e4cc231..8cafb6df 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,9 @@
+ci:
+  autoupdate_commit_msg: "chore: update pre-commit hooks"
+
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v6.0.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
@@ -9,14 +12,24 @@ repos:
       - id: check-merge-conflict
       - id: debug-statements
 
-  - repo: https://github.com/psf/black
-    rev: 23.3.0
+  - repo: https://github.com/crate-ci/typos
+    rev: v1
+    hooks:
+      - id: typos
+        files: \.(py|md|rst|yaml|toml)
+        exclude: pyproject.toml
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.12.10
     hooks:
-      - id: black
-        language_version: python3
+      # Run the linter.
+      - id: ruff
+        args: ["--fix"]
+      # Run the formatter.
+      - id: ruff-format
 
-  - repo: https://github.com/pycqa/isort
-    rev: 5.12.0
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: "v4.0.0-alpha.8" # Use the sha or tag you want to point at
     hooks:
-      - id: isort
-        args: ["--profile", "black"]
+      - id: prettier
+        types_or: ["javascript", "css"]
diff --git a/codeclash/agents/__init__.py b/codeclash/agents/__init__.py
index 5dd9d0a6..a76b3569 100644
--- a/codeclash/agents/__init__.py
+++ b/codeclash/agents/__init__.py
@@ -6,9 +6,7 @@
 from codeclash.agents.utils import GameContext
 
 
-def get_agent(
-    config: dict, game_context: GameContext, environment: DockerEnvironment
-) -> Player:
+def get_agent(config: dict, game_context: GameContext, environment: DockerEnvironment) -> Player:
     agents = {
         "dummy": Dummy,
         "mini": MiniSWEAgent,
diff --git a/codeclash/agents/abstract.py b/codeclash/agents/abstract.py
index 5d30b652..6dff865f 100644
--- a/codeclash/agents/abstract.py
+++ b/codeclash/agents/abstract.py
@@ -24,7 +24,7 @@ def __init__(
         self.config = config
         self.name = config["name"]
         self._player_unique_id = uuid.uuid4()
-        """Unique ID that doesn't clash even accross multiple games. Used for git tags."""
+        """Unique ID that doesn't clash even across multiple games. Used for git tags."""
         self.environment = environment
         self.game_context = game_context
         self.logger = get_logger(
@@ -51,9 +51,7 @@ def post_run_hook(self, *, round: int) -> None:
         """Should be called after we called the run method."""
         self._commit()
         self._metadata["diff"][round] = self._get_round_diff(round)
-        self._metadata["incremental_diff"][round] = self._get_round_diff(
-            round, incremental=True
-        )
+        self._metadata["incremental_diff"][round] = self._get_round_diff(round, incremental=True)
 
     @abstractmethod
     def run(self) -> None:
@@ -76,23 +74,15 @@ def push(self) -> None:
             "git push origin --tags",
         ]:
             assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger)
-        self.logger.info(
-            f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})"
-        )
+        self.logger.info(f"Pushed {self.name} commit history to remote repository (branch {self._branch_name})")
 
-    def reset_and_apply_patch(
-        self, patch: str, *, base_commit: str = "", filter_patch: bool = True
-    ) -> None:
-        """Clean all uncommited changes. If base_commit is provided, reset to that commit.
+    def reset_and_apply_patch(self, patch: str, *, base_commit: str = "", filter_patch: bool = True) -> None:
+        """Clean all uncommitted changes. If base_commit is provided, reset to that commit.
         Then apply the patch to the codebase.
         """
         # Need to clean before we copy over the patch (else it's gonna be removed by git clean)
         self.logger.debug(
-            assert_zero_exit_code(
-                self.environment.execute(
-                    f"git reset --hard {base_commit} && git clean -fd"
-                )
-            )
+            assert_zero_exit_code(self.environment.execute(f"git reset --hard {base_commit} && git clean -fd"))
         )
 
         patch = filter_git_diff(patch) if filter_patch else patch
@@ -112,9 +102,7 @@ def reset_and_apply_patch(
         commands = ["git status", "git apply tmp_patch.txt", "rm -f tmp_patch.txt"]
         for cmd in commands:
             self.logger.debug(f"Executing command: {cmd}")
-            out = assert_zero_exit_code(
-                self.environment.execute(cmd), logger=self.logger
-            )
+            out = assert_zero_exit_code(self.environment.execute(cmd), logger=self.logger)
             self.logger.debug(out)
 
     # --- Helper methods ---
@@ -122,9 +110,7 @@ def reset_and_apply_patch(
     def _tag_round(self, round: int) -> None:
         """Git tag the codebase at the given round."""
         assert_zero_exit_code(
-            self.environment.execute(
-                f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'"
-            ),
+            self.environment.execute(f"git tag -a {self._get_round_tag_name(round)} -m 'Round {round} Update'"),
             logger=self.logger,
         )
 
@@ -161,9 +147,7 @@ def _get_round_diff(self, round: int, *, incremental: bool = False) -> str:
             previous_round_tag = self._get_round_tag_name(0)
         current_round_tag = self._get_round_tag_name(round)
         out = assert_zero_exit_code(
-            self.environment.execute(
-                f"git diff {previous_round_tag}..{current_round_tag}"
-            ),
+            self.environment.execute(f"git diff {previous_round_tag}..{current_round_tag}"),
             logger=self.logger,
         )
         return out["output"]
diff --git a/codeclash/agents/minisweagent.py b/codeclash/agents/minisweagent.py
index d1729628..fed2b2b6 100644
--- a/codeclash/agents/minisweagent.py
+++ b/codeclash/agents/minisweagent.py
@@ -47,9 +47,7 @@ def add_message(self, role: str, content: str, **kwargs):
         super().add_message(role, content, **kwargs)
         self.logger.debug(f"[{role}] {content}", extra={"highlighter": None})
         if role == "assistant":
-            self.logger.info(
-                f"Step taken (step {self.model.n_calls}, cost {self.model.cost:.2f})"
-            )
+            self.logger.info(f"Step taken (step {self.model.n_calls}, cost {self.model.cost:.2f})")
 
     def render_template(self, template: str, **kwargs) -> str:
         cs = (
@@ -69,9 +67,7 @@ def run(self) -> tuple[str, str]:
 class MiniSWEAgent(Player):
     """Player with agentic code editing capabilities"""
 
-    def __init__(
-        self, config: dict, environment: DockerEnvironment, game_context: GameContext
-    ):
+    def __init__(self, config: dict, environment: DockerEnvironment, game_context: GameContext):
         super().__init__(config, environment=environment, game_context=game_context)
 
     def run(self):
@@ -96,10 +92,7 @@ def run(self):
             result = exc_message
             print(exc_message)
         finally:
-            traj_path = (
-                self.game_context.log_local
-                / f"{self.name}_r{self.game_context.round}.traj.json"
-            )
+            traj_path = self.game_context.log_local / f"{self.name}_r{self.game_context.round}.traj.json"
             save_traj(
                 self.agent,  # type: ignore
                 traj_path,
diff --git a/codeclash/agents/utils.py b/codeclash/agents/utils.py
index 16ecd13a..13fe3aff 100644
--- a/codeclash/agents/utils.py
+++ b/codeclash/agents/utils.py
@@ -13,6 +13,7 @@ def resolve_api_key(model: str) -> str:
         return os.getenv("ANTHROPIC_API_KEY")
     if "gpt" in model:
         return os.getenv("OPENAI_API_KEY")
+    return ""
 
 
 @dataclass
@@ -38,10 +39,7 @@ class GameContext:
 
     def _render_prompt_templates(self) -> dict:
         context = asdict(self)
-        return {
-            key: Template(template_str).render(**context)
-            for key, template_str in self.prompts.items()
-        }
+        return {key: Template(template_str).render(**context) for key, template_str in self.prompts.items()}
 
     def to_template_vars(self) -> dict[str, str]:
         """Convert the GameContext to a dictionary for rendering prompts in the agent"""
diff --git a/codeclash/games/abstract.py b/codeclash/games/abstract.py
index fbbfefdf..82f80a32 100644
--- a/codeclash/games/abstract.py
+++ b/codeclash/games/abstract.py
@@ -17,9 +17,7 @@
 @dataclass
 class RoundStats:
     winner: str
-    scores: dict[
-        str, float
-    ]  # Map of player to game metric (e.g. # of wins, assets accumulated)
+    scores: dict[str, float]  # Map of player to game metric (e.g. # of wins, assets accumulated)
     details: dict[str, Any] = None  # Optional, for game-specific info
 
     def __str__(self) -> str:
@@ -63,9 +61,7 @@ def __init__(self, config: dict, *, tournament_id: str, local_output_dir: Path):
         self.game_id: str = tournament_id
         self.log_env: Path = (DIR_WORK / DIR_LOGS / self.game_id).resolve()
         self.log_local: Path = local_output_dir
-        self.logger = get_logger(
-            self.name, log_path=self.log_local / "game.log", emoji="🏓"
-        )
+        self.logger = get_logger(self.name, log_path=self.log_local / "game.log", emoji="🏓")
         self.environment: DockerEnvironment = self.get_environment()
         """The running docker environment for executing the game"""
         self._metadata: dict = {
@@ -106,9 +102,7 @@ def build_image(self):
         if result.returncode == 0:
             self.logger.info(f"✅ Built Docker image {self.image_name}")
         else:
-            self.logger.error(
-                f"❌ Failed to build Docker image: {result.stderr}\n{result.stdout}{result.stderr}"
-            )
+            self.logger.error(f"❌ Failed to build Docker image: {result.stderr}\n{result.stdout}{result.stderr}")
             raise RuntimeError(f"Failed to build Docker image: {result.stderr}")
 
     def get_metadata(self) -> dict:
diff --git a/codeclash/games/battlecode/main.py b/codeclash/games/battlecode/main.py
index f2cf4ca2..743e0788 100644
--- a/codeclash/games/battlecode/main.py
+++ b/codeclash/games/battlecode/main.py
@@ -12,9 +12,7 @@ class BattleCodeGame(CodeGame):
     name: str = "BattleCode"
 
     def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
-        super().__init__(
-            config, tournament_id=tournament_id, local_output_dir=local_output_dir
-        )
+        super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir)
         assert len(config["players"]) == 2, "BattleCode is a two-player game"
         self.run_cmd_round: str = "python run.py run"
         for arg, val in self.game_config.get("args", {}).items():
@@ -36,9 +34,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats:
                 winner_key = match.group(1)
                 self.logger.debug(f"Winner key from match: {winner_key}")
                 # Map A/B to actual agent names (much closer to original code)
-                winner = {"A": agents[0].name, "B": agents[1].name}.get(
-                    winner_key, RESULT_TIE
-                )
+                winner = {"A": agents[0].name, "B": agents[1].name}.get(winner_key, RESULT_TIE)
                 winners.append(winner)
             else:
                 winners.append(RESULT_TIE)
@@ -49,14 +45,9 @@ def get_stats(self, result_outputs: list[str], agents: list[Any]) -> RoundStats:
 
     def execute_round(self, agents: list[Any]) -> RoundData:
         for agent in agents:
-            src, dest = f"/{agent.name}/src/mysubmission/", str(
-                DIR_WORK / "src" / agent.name
-            )
+            src, dest = f"/{agent.name}/src/mysubmission/", str(DIR_WORK / "src" / agent.name)
             self.environment.execute(f"cp -r {src} {dest}")
-        args = [
-            f"--p{idx+1}-dir src --p{idx+1} {agent.name}"
-            for idx, agent in enumerate(agents)
-        ]
+        args = [f"--p{idx + 1}-dir src --p{idx + 1} {agent.name}" for idx, agent in enumerate(agents)]
         cmd = f"{self.run_cmd_round} {' '.join(args)}"
         self.logger.info(f"Running game: {cmd}")
         outputs = []
diff --git a/codeclash/games/battlesnake/main.py b/codeclash/games/battlesnake/main.py
index c0c93c5b..e5cf4e69 100644
--- a/codeclash/games/battlesnake/main.py
+++ b/codeclash/games/battlesnake/main.py
@@ -14,9 +14,7 @@ class BattleSnakeGame(CodeGame):
     name: str = "BattleSnake"
 
     def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
-        super().__init__(
-            config, tournament_id=tournament_id, local_output_dir=local_output_dir
-        )
+        super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir)
         self.run_cmd_round: str = "./battlesnake play"
         for arg, val in self.game_config.get("args", {}).items():
             if isinstance(val, bool):
@@ -29,9 +27,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta
         winners = []
         for ro in result_outputs:
             lines = ro.strip().split("\n")
-            last_line = (
-                lines[-1] if lines else ""
-            )  # Get the last line which contains the game result
+            last_line = lines[-1] if lines else ""  # Get the last line which contains the game result
             winner = json.loads(last_line)["winnerName"]
             winners.append(winner)
 
@@ -48,9 +44,7 @@ def execute_round(self, agents: list[Player]) -> RoundData:
         for idx, agent in enumerate(agents):
             port = 8001 + idx
             # Start server in background - just add & to run in background!
-            self.environment.execute(
-                f"PORT={port} python main.py &", cwd=f"/{agent.name}"
-            )
+            self.environment.execute(f"PORT={port} python main.py &", cwd=f"/{agent.name}")
             cmd.append(f"--url http://0.0.0.0:{port} -n {agent.name}")
 
         time.sleep(3)  # Give servers time to start
diff --git a/codeclash/games/corewar/main.py b/codeclash/games/corewar/main.py
index a082a02c..a1193954 100644
--- a/codeclash/games/corewar/main.py
+++ b/codeclash/games/corewar/main.py
@@ -10,9 +10,7 @@ class CoreWarGame(CodeGame):
     name: str = "CoreWar"
 
     def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
-        super().__init__(
-            config, tournament_id=tournament_id, local_output_dir=local_output_dir
-        )
+        super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir)
         self.run_cmd_round: str = "./src/pmars"
         for arg, val in self.game_config.get("args", {}).items():
             if isinstance(val, bool):
@@ -50,9 +48,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta
             )
         else:
             self.logger.debug("No scores found, returning unknown")
-            return RoundStats(
-                winner="unknown", scores={agent.name: 0 for agent in agents}
-            )
+            return RoundStats(winner="unknown", scores={agent.name: 0 for agent in agents})
 
     def execute_round(self, agents: list[Player]) -> RoundData:
         args = [f"/{agent.name}/warriors/warrior.red" for agent in agents]
diff --git a/codeclash/games/robocode/main.py b/codeclash/games/robocode/main.py
index 880bde96..821eccbd 100644
--- a/codeclash/games/robocode/main.py
+++ b/codeclash/games/robocode/main.py
@@ -11,9 +11,7 @@ class RoboCodeGame(CodeGame):
     name: str = "RoboCode"
 
     def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
-        super().__init__(
-            config, tournament_id=tournament_id, local_output_dir=local_output_dir
-        )
+        super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir)
         self.run_cmd_round: str = "./robocode.sh"
         for arg, val in self.game_config.get("args", {}).items():
             if isinstance(val, bool):
@@ -73,9 +71,7 @@ def get_stats(self, result_outputs: list[str], agents: list[Player]) -> RoundSta
                 if int(match.group(1)) == 1:
                     winner = player
 
-        return RoundStats(
-            winner=winner, scores=scores, details={"stdout": "\n".join(lines)}
-        )
+        return RoundStats(winner=winner, scores=scores, details={"stdout": "\n".join(lines)})
 
     def execute_round(self, agents: list[Player]) -> RoundData:
         for agent in agents:
@@ -96,9 +92,7 @@ def execute_round(self, agents: list[Player]) -> RoundData:
 {self._get_battle_config()}
 robocode.battle.selectedRobots={selected_robots}
 """
-        create_file_in_container(
-            self.environment, content=battle_content, dest_path=f"battles/{battle_file}"
-        )
+        create_file_in_container(self.environment, content=battle_content, dest_path=f"battles/{battle_file}")
 
         # Run battle with results output to file
         results_file = f"results_{int(time.time())}.txt"
diff --git a/codeclash/games/robotrumble/main.py b/codeclash/games/robotrumble/main.py
index 56a4febb..a70be082 100644
--- a/codeclash/games/robotrumble/main.py
+++ b/codeclash/games/robotrumble/main.py
@@ -11,9 +11,7 @@ class RobotRumbleGame(CodeGame):
     name: str = "RobotRumble"
 
     def __init__(self, config, *, tournament_id: str, local_output_dir: Path):
-        super().__init__(
-            config, tournament_id=tournament_id, local_output_dir=local_output_dir
-        )
+        super().__init__(config, tournament_id=tournament_id, local_output_dir=local_output_dir)
         assert len(config["players"]) == 2, "RobotRumble is a two-player game"
         self.run_cmd_round: str = "./rumblebot run term"
 
diff --git a/codeclash/tournaments/abstract.py b/codeclash/tournaments/abstract.py
index 0c5784be..f581de6d 100644
--- a/codeclash/tournaments/abstract.py
+++ b/codeclash/tournaments/abstract.py
@@ -12,26 +12,18 @@ class AbstractTournament:
     def __init__(self, config: dict, *, name: str, **kwargs):
         self.config: dict = config
         self.name: str = name
-        self.tournament_id: str = (
-            f"{self.name}.{config['game']['name']}.{time.strftime('%y%m%d%H%M%S')}"
-        )
-        self.local_output_dir: Path = (
-            DIR_LOGS / getpass.getuser() / self.tournament_id
-        ).resolve()
+        self.tournament_id: str = f"{self.name}.{config['game']['name']}.{time.strftime('%y%m%d%H%M%S')}"
+        self.local_output_dir: Path = (DIR_LOGS / getpass.getuser() / self.tournament_id).resolve()
         self._metadata: dict = {
             "name": self.name,
             "tournament_id": self.tournament_id,
         }
-        self.logger = get_logger(
-            self.name, log_path=self.local_output_dir / "tournament.log", emoji="🏆"
-        )
+        self.logger = get_logger(self.name, log_path=self.local_output_dir / "tournament.log", emoji="🏆")
 
     def get_metadata(self) -> dict:
         return self._metadata
 
-    def _copy_game_log_to_agent(
-        self, agent, round_num: int, log_output: str, dest_path: str = None
-    ) -> None:
+    def _copy_game_log_to_agent(self, agent, round_num: int, log_output: str, dest_path: str = None) -> None:
         """Copy round log to agent environment."""
         try:
             create_file_in_container(
@@ -40,6 +32,4 @@ def _copy_game_log_to_agent(
                 dest_path=dest_path if dest_path else f"logs/round_{round_num}.log",
             )
         except Exception:
-            self.logger.error(
-                f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}"
-            )
+            self.logger.error(f"Error creating round log in {agent.name}'s container: {traceback.format_exc()}")
diff --git a/codeclash/tournaments/pvp.py b/codeclash/tournaments/pvp.py
index dd670de1..562bae48 100644
--- a/codeclash/tournaments/pvp.py
+++ b/codeclash/tournaments/pvp.py
@@ -14,9 +14,7 @@
 
 
 class PvpTournament(AbstractTournament):
-    def __init__(
-        self, config: dict, *, cleanup: bool = False, push_agent: bool = False
-    ):
+    def __init__(self, config: dict, *, cleanup: bool = False, push_agent: bool = False):
         super().__init__(config, name="PvpTournament")
         self.cleanup_on_end = cleanup
         self.push_agent = push_agent
@@ -37,9 +35,7 @@ def rounds(self) -> int:
 
     def get_agent(self, agent_config: dict, prompts: dict) -> Player:
         """Create an agent with environment and game context."""
-        environment = self.game.get_environment(
-            f"{self.game.game_id}.{agent_config['name']}"
-        )
+        environment = self.game.get_environment(f"{self.game.game_id}.{agent_config['name']}")
 
         game_context = GameContext(
             id=self.game.game_id,
@@ -77,16 +73,12 @@ def run_training_round(self, round_num: int) -> None:
 
         # Write log to file
         for idx, lo in enumerate(record.data.logs):
-            round_log_path = (
-                self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log"
-            )
+            round_log_path = self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log"
             round_log_path.write_text(lo)
 
         # Copy log to agent environments
         for agent in self.agents:
-            self.logger.info(
-                f"Copying round {round_num} log(s) to {agent.name}'s container..."
-            )
+            self.logger.info(f"Copying round {round_num} log(s) to {agent.name}'s container...")
             copy_to_container(
                 agent.environment,
                 self.game.log_local / f"round_{round_num}",
diff --git a/codeclash/tournaments/single_player.py b/codeclash/tournaments/single_player.py
index 651f559c..127309f1 100644
--- a/codeclash/tournaments/single_player.py
+++ b/codeclash/tournaments/single_player.py
@@ -51,9 +51,7 @@ def get_game_context(self, agent_config: dict, *, round: int) -> GameContext:
 
     def get_agent(self, agent_config: dict, round: int) -> Player:
         """Create an agent with environment and game context."""
-        environment = self.game.get_environment(
-            f"{self.game.game_id}.{agent_config['name']}"
-        )
+        environment = self.game.get_environment(f"{self.game.game_id}.{agent_config['name']}")
         game_context = self.get_game_context(agent_config, round=round)
         return get_agent(agent_config, game_context, environment)
 
@@ -86,15 +84,11 @@ def run_training_round(self, round_num: int) -> None:
 
         # Write log to file
         for idx, lo in enumerate(record.logs):
-            round_log_path = (
-                self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log"
-            )
+            round_log_path = self.game.log_local / f"round_{round_num}" / f"sim_{idx}.log"
             round_log_path.write_text(lo)
 
         # Copy log to main agent environment only
-        self.logger.info(
-            f"Copying round {round_num} log(s) to {self.agent.name}'s container..."
-        )
+        self.logger.info(f"Copying round {round_num} log(s) to {self.agent.name}'s container...")
         copy_to_container(
             self.agent,
             self.game.log_local / f"round_{round_num}",
@@ -135,28 +129,19 @@ def evaluate(self, n_repetitions: int = 3):
         p2_config["name"] = "p2"
         p2 = self.get_dummy_agent()
         matrix = {
-            p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)}
-            for p1_round in range(0, self.rounds + 1)
+            p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} for p1_round in range(0, self.rounds + 1)
         }
         for p1_round in range(0, self.rounds + 1):
             for p2_round in range(0, self.rounds + 1):
-                self.logger.info(
-                    f"Evaluating agent at round {p1_round} against agent at round {p2_round}"
-                )
-                p1_patch = (
-                    self.agent.get_metadata()["diff"][p1_round] if p1_round > 0 else ""
-                )
-                p2_patch = (
-                    self.agent.get_metadata()["diff"][p2_round] if p2_round > 0 else ""
-                )
+                self.logger.info(f"Evaluating agent at round {p1_round} against agent at round {p2_round}")
+                p1_patch = self.agent.get_metadata()["diff"][p1_round] if p1_round > 0 else ""
+                p2_patch = self.agent.get_metadata()["diff"][p2_round] if p2_round > 0 else ""
                 p1.reset_and_apply_patch(p1_patch)
                 p2.reset_and_apply_patch(p2_patch)
                 for i_repetition in range(n_repetitions):
                     record = self.game.run_round([p1, p2])
                     winner = record.stats.winner
-                    self.logger.info(
-                        f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}"
-                    )
+                    self.logger.info(f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {winner}")
                     matrix[p1_round][p2_round].append(winner)
         self.logger.info(f"Evaluation matrix: {matrix}")
         return matrix
diff --git a/codeclash/utils/environment.py b/codeclash/utils/environment.py
index b6b84c81..64f8dfb6 100644
--- a/codeclash/utils/environment.py
+++ b/codeclash/utils/environment.py
@@ -6,9 +6,7 @@
 from minisweagent.environments.docker import DockerEnvironment
 
 
-def assert_zero_exit_code(
-    result: dict, *, logger: logging.Logger | None = None
-) -> dict:
+def assert_zero_exit_code(result: dict, *, logger: logging.Logger | None = None) -> dict:
     if result.get("returncode", 0) != 0:
         msg = f"Command failed with exit code {result.get('returncode')}:\n{result.get('output')}"
         if logger is not None:
@@ -36,18 +34,14 @@ def copy_between_containers(
             f"{src_container.container_id}:{src_path}",
             str(temp_path),
         ]
-        result_src = subprocess.run(
-            cmd_src, check=False, capture_output=True, text=True
-        )
+        result_src = subprocess.run(cmd_src, check=False, capture_output=True, text=True)
         if result_src.returncode != 0:
             raise RuntimeError(
                 f"Failed to copy from {src_container.container_id} to local temp: {result_src.stdout}{result_src.stderr}"
             )
 
         # Ensure destination folder exists
-        assert_zero_exit_code(
-            dest_container.execute(f"mkdir -p {Path(dest_path).parent}")
-        )
+        assert_zero_exit_code(dest_container.execute(f"mkdir -p {Path(dest_path).parent}"))
 
         # Copy from temporary local directory to destination container
         cmd_dest = [
@@ -56,9 +50,7 @@ def copy_between_containers(
             str(temp_path),
             f"{dest_container.container_id}:{dest_path}",
         ]
-        result_dest = subprocess.run(
-            cmd_dest, check=False, capture_output=True, text=True
-        )
+        result_dest = subprocess.run(cmd_dest, check=False, capture_output=True, text=True)
         if result_dest.returncode != 0:
             raise RuntimeError(
                 f"Failed to copy from local temp to {dest_container.container_id}: {result_dest.stdout}{result_dest.stderr}"
diff --git a/codeclash/utils/log.py b/codeclash/utils/log.py
index 001a0e11..bd5cacca 100644
--- a/codeclash/utils/log.py
+++ b/codeclash/utils/log.py
@@ -64,9 +64,7 @@ def format(self, record: logging.LogRecord) -> str:
         return capture.get().rstrip()
 
 
-def get_logger(
-    name: str, *, emoji: str = "", log_path: Path | None = None
-) -> logging.Logger:
+def get_logger(name: str, *, emoji: str = "", log_path: Path | None = None) -> logging.Logger:
     """Get logger. Use this instead of `logging.getLogger` to ensure
     that the logger is set up with the correct handlers.
     """
diff --git a/codeclash/viewer/app.py b/codeclash/viewer/app.py
index a8e62920..addfc2aa 100644
--- a/codeclash/viewer/app.py
+++ b/codeclash/viewer/app.py
@@ -8,7 +8,7 @@
 import json
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 from flask import Flask, jsonify, render_template, request
 
@@ -28,7 +28,7 @@ def is_probably_failed_run(log_dir: Path) -> bool:
     return not metadata_file.exists()
 
 
-def get_round_count_from_metadata(log_dir: Path) -> Optional[int]:
+def get_round_count_from_metadata(log_dir: Path) -> int | None:
     """Extract round count from metadata.json if it exists"""
     metadata_file = log_dir / "metadata.json"
     if not metadata_file.exists():
@@ -45,9 +45,9 @@ def get_round_count_from_metadata(log_dir: Path) -> Optional[int]:
 class GameMetadata:
     """Metadata about a game session"""
 
-    results: Dict[str, Any]
+    results: dict[str, Any]
     main_log: str
-    rounds: List[Dict[str, Any]]
+    rounds: list[dict[str, Any]]
 
 
 @dataclass
@@ -58,10 +58,10 @@ class TrajectoryInfo:
     round_num: int
     api_calls: int
     cost: float
-    exit_status: Optional[str]
-    submission: Optional[str]
-    memory: Optional[str]
-    messages: List[Dict[str, Any]]
+    exit_status: str | None
+    submission: str | None
+    memory: str | None
+    messages: list[dict[str, Any]]
 
 
 class LogParser:
@@ -86,9 +86,7 @@ def parse_game_metadata(self) -> GameMetadata:
 
         # Parse main.log if it exists
         main_log_file = self.log_dir / "game.log"
-        main_log = (
-            main_log_file.read_text() if main_log_file.exists() else "No main log found"
-        )
+        main_log = main_log_file.read_text() if main_log_file.exists() else "No main log found"
 
         # Parse round logs
         rounds = []
@@ -99,9 +97,7 @@ def parse_game_metadata(self) -> GameMetadata:
 
         return GameMetadata(results=results, main_log=main_log, rounds=rounds)
 
-    def parse_trajectory(
-        self, player_id: int, round_num: int
-    ) -> Optional[TrajectoryInfo]:
+    def parse_trajectory(self, player_id: int, round_num: int) -> TrajectoryInfo | None:
         """Parse a specific trajectory file"""
         # Try both .json and .log extensions
         for ext in [".json", ".log"]:
@@ -128,7 +124,7 @@ def parse_trajectory(
 
         return None
 
-    def get_available_trajectories(self) -> List[tuple]:
+    def get_available_trajectories(self) -> list[tuple]:
         """Get list of available trajectory files as (player_id, round_num) tuples"""
         trajectories = []
         for traj_file in self.log_dir.glob("p*_r*.traj.*"):
@@ -186,9 +182,7 @@ def index():
     # Extract just the names for backwards compatibility
     log_folders = [folder["name"] for folder in log_folders_info]
 
-    selected_folder = request.args.get(
-        "folder", log_folders[0] if log_folders else None
-    )
+    selected_folder = request.args.get("folder", log_folders[0] if log_folders else None)
 
     if not selected_folder or not (logs_dir / selected_folder).exists():
         return render_template("no_logs.html", log_folders=log_folders)
diff --git a/codeclash/viewer/static/css/style.css b/codeclash/viewer/static/css/style.css
index 6228241b..315483a0 100644
--- a/codeclash/viewer/static/css/style.css
+++ b/codeclash/viewer/static/css/style.css
@@ -46,7 +46,8 @@
 }
 
 body {
-  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', sans-serif;
+  font-family:
+    -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", sans-serif;
   line-height: 1.6;
   color: var(--text-primary);
   background-color: var(--bg-primary);
@@ -159,7 +160,8 @@ body {
 
 .metadata-display pre {
   margin: 0;
-  font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, monospace;
+  font-family:
+    "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, monospace;
   font-size: 0.875rem;
   line-height: 1.5;
 }
@@ -226,7 +228,12 @@ details summary {
 /* Round separator */
 .round-separator {
   height: 2px;
-  background: linear-gradient(90deg, transparent, var(--border-color), transparent);
+  background: linear-gradient(
+    90deg,
+    transparent,
+    var(--border-color),
+    transparent
+  );
   margin: 2rem 0;
   border-radius: 1px;
 }
@@ -444,7 +451,8 @@ details summary {
 .log-content pre {
   white-space: pre-wrap;
   word-wrap: break-word;
-  font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, monospace;
+  font-family:
+    "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, monospace;
   font-size: 0.875rem;
   line-height: 1.5;
   margin: 0;
@@ -517,7 +525,10 @@ details summary {
 
 /* Smooth transitions */
 * {
-  transition: color 0.3s ease, background-color 0.3s ease, border-color 0.3s ease;
+  transition:
+    color 0.3s ease,
+    background-color 0.3s ease,
+    border-color 0.3s ease;
 }
 
 /* Focus styles for accessibility */
diff --git a/codeclash/viewer/static/js/app.js b/codeclash/viewer/static/js/app.js
index a503b542..b65ff030 100644
--- a/codeclash/viewer/static/js/app.js
+++ b/codeclash/viewer/static/js/app.js
@@ -2,257 +2,274 @@
 
 // Theme management
 function initializeTheme() {
-    // Check for saved theme preference or default to 'light'
-    const savedTheme = localStorage.getItem('theme') || 'light';
-    setTheme(savedTheme);
+  // Check for saved theme preference or default to 'light'
+  const savedTheme = localStorage.getItem("theme") || "light";
+  setTheme(savedTheme);
 }
 
 function setTheme(theme) {
-    document.documentElement.setAttribute('data-theme', theme);
-    localStorage.setItem('theme', theme);
-
-    // Update theme toggle button
-    const themeToggle = document.getElementById('theme-toggle');
-    const themeIcon = themeToggle.querySelector('.theme-icon');
-
-    if (theme === 'dark') {
-        themeIcon.textContent = '☀️';
-        themeToggle.setAttribute('aria-label', 'Switch to light mode');
-    } else {
-        themeIcon.textContent = '🌙';
-        themeToggle.setAttribute('aria-label', 'Switch to dark mode');
-    }
+  document.documentElement.setAttribute("data-theme", theme);
+  localStorage.setItem("theme", theme);
+
+  // Update theme toggle button
+  const themeToggle = document.getElementById("theme-toggle");
+  const themeIcon = themeToggle.querySelector(".theme-icon");
+
+  if (theme === "dark") {
+    themeIcon.textContent = "☀️";
+    themeToggle.setAttribute("aria-label", "Switch to light mode");
+  } else {
+    themeIcon.textContent = "🌙";
+    themeToggle.setAttribute("aria-label", "Switch to dark mode");
+  }
 }
 
 function toggleTheme() {
-    const currentTheme = document.documentElement.getAttribute('data-theme');
-    const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
-    setTheme(newTheme);
+  const currentTheme = document.documentElement.getAttribute("data-theme");
+  const newTheme = currentTheme === "dark" ? "light" : "dark";
+  setTheme(newTheme);
 }
 
 // Folder selection
 function changeFolder() {
-    const select = document.getElementById('folder-select');
-    const selectedFolder = select.value;
-
-    if (selectedFolder) {
-        // Reload page with new folder parameter
-        const url = new URL(window.location);
-        url.searchParams.set('folder', selectedFolder);
-        window.location.href = url.toString();
-    }
+  const select = document.getElementById("folder-select");
+  const selectedFolder = select.value;
+
+  if (selectedFolder) {
+    // Reload page with new folder parameter
+    const url = new URL(window.location);
+    url.searchParams.set("folder", selectedFolder);
+    window.location.href = url.toString();
+  }
 }
 
 // Enhanced foldout behavior
 function initializeFoldouts() {
-    // Add smooth animations to details elements
-    const detailsElements = document.querySelectorAll('details');
-
-    detailsElements.forEach(details => {
-        const summary = details.querySelector('summary');
-
-        // Add click analytics/feedback
-        summary.addEventListener('click', function(e) {
-            // Small delay to allow default behavior
-            setTimeout(() => {
-                // Scroll into view if needed
-                if (details.open) {
-                    const rect = details.getBoundingClientRect();
-                    const isInViewport = rect.top >= 0 && rect.bottom <= window.innerHeight;
-
-                    if (!isInViewport) {
-                        details.scrollIntoView({
-                            behavior: 'smooth',
-                            block: 'nearest'
-                        });
-                    }
-                }
-            }, 100);
-        });
+  // Add smooth animations to details elements
+  const detailsElements = document.querySelectorAll("details");
+
+  detailsElements.forEach((details) => {
+    const summary = details.querySelector("summary");
+
+    // Add click analytics/feedback
+    summary.addEventListener("click", function (e) {
+      // Small delay to allow default behavior
+      setTimeout(() => {
+        // Scroll into view if needed
+        if (details.open) {
+          const rect = details.getBoundingClientRect();
+          const isInViewport =
+            rect.top >= 0 && rect.bottom <= window.innerHeight;
+
+          if (!isInViewport) {
+            details.scrollIntoView({
+              behavior: "smooth",
+              block: "nearest",
+            });
+          }
+        }
+      }, 100);
     });
+  });
 }
 
 // Keyboard shortcuts
 function initializeKeyboardShortcuts() {
-    document.addEventListener('keydown', function(e) {
-        // Ctrl/Cmd + D: Toggle dark mode
-        if ((e.ctrlKey || e.metaKey) && e.key === 'd') {
-            e.preventDefault();
-            toggleTheme();
-        }
-
-        // Escape: Close all open details
-        if (e.key === 'Escape') {
-            const openDetails = document.querySelectorAll('details[open]');
-            openDetails.forEach(details => {
-                details.removeAttribute('open');
-            });
-        }
-
-        // Ctrl/Cmd + E: Expand all details
-        if ((e.ctrlKey || e.metaKey) && e.key === 'e') {
-            e.preventDefault();
-            const allDetails = document.querySelectorAll('details');
-            allDetails.forEach(details => {
-                details.setAttribute('open', '');
-            });
-        }
+  document.addEventListener("keydown", function (e) {
+    // Ctrl/Cmd + D: Toggle dark mode
+    if ((e.ctrlKey || e.metaKey) && e.key === "d") {
+      e.preventDefault();
+      toggleTheme();
+    }
 
-        // Ctrl/Cmd + Shift + E: Collapse all details
-        if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === 'E') {
-            e.preventDefault();
-            const allDetails = document.querySelectorAll('details');
-            allDetails.forEach(details => {
-                details.removeAttribute('open');
-            });
-        }
+    // Escape: Close all open details
+    if (e.key === "Escape") {
+      const openDetails = document.querySelectorAll("details[open]");
+      openDetails.forEach((details) => {
+        details.removeAttribute("open");
+      });
+    }
 
+    // Ctrl/Cmd + E: Expand all details
+    if ((e.ctrlKey || e.metaKey) && e.key === "e") {
+      e.preventDefault();
+      const allDetails = document.querySelectorAll("details");
+      allDetails.forEach((details) => {
+        details.setAttribute("open", "");
+      });
+    }
 
-    });
+    // Ctrl/Cmd + Shift + E: Collapse all details
+    if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === "E") {
+      e.preventDefault();
+      const allDetails = document.querySelectorAll("details");
+      allDetails.forEach((details) => {
+        details.removeAttribute("open");
+      });
+    }
+  });
 }
 
-
-
 // Code highlighting (basic syntax highlighting)
 function initializeCodeHighlighting() {
-    const codeBlocks = document.querySelectorAll('.code-block code, .message-text pre');
+  const codeBlocks = document.querySelectorAll(
+    ".code-block code, .message-text pre",
+  );
 
-    codeBlocks.forEach(block => {
-        const text = block.textContent;
+  codeBlocks.forEach((block) => {
+    const text = block.textContent;
 
-        // Simple bash highlighting
-        if (text.includes('#!/bin/bash') || text.includes('```bash')) {
-            block.classList.add('language-bash');
-            highlightBash(block);
-        }
+    // Simple bash highlighting
+    if (text.includes("#!/bin/bash") || text.includes("```bash")) {
+      block.classList.add("language-bash");
+      highlightBash(block);
+    }
 
-        // Simple Python highlighting
-        if (text.includes('def ') || text.includes('import ') || text.includes('python')) {
-            block.classList.add('language-python');
-            highlightPython(block);
-        }
-    });
+    // Simple Python highlighting
+    if (
+      text.includes("def ") ||
+      text.includes("import ") ||
+      text.includes("python")
+    ) {
+      block.classList.add("language-python");
+      highlightPython(block);
+    }
+  });
 }
 
 function highlightBash(block) {
-    let html = block.innerHTML;
+  let html = block.innerHTML;
 
-    // Commands
-    html = html.replace(/\b(ls|cd|cat|grep|sed|awk|find|mkdir|rm|cp|mv|chmod|echo|export)\b/g,
-        '<span style="color: var(--accent-color); font-weight: 600;">$1</span>');
+  // Commands
+  html = html.replace(
+    /\b(ls|cd|cat|grep|sed|awk|find|mkdir|rm|cp|mv|chmod|echo|export)\b/g,
+    '<span style="color: var(--accent-color); font-weight: 600;">$1</span>',
+  );
 
-    // Flags
-    html = html.replace(/\s(-[a-zA-Z]+)/g,
-        ' <span style="color: var(--warning-color);">$1</span>');
+  // Flags
+  html = html.replace(
+    /\s(-[a-zA-Z]+)/g,
+    ' <span style="color: var(--warning-color);">$1</span>',
+  );
 
-    block.innerHTML = html;
+  block.innerHTML = html;
 }
 
 function highlightPython(block) {
-    let html = block.innerHTML;
+  let html = block.innerHTML;
 
-    // Keywords
-    html = html.replace(/\b(def|class|import|from|if|else|elif|for|while|try|except|finally|return|yield|with|as|pass|break|continue|lambda|global|nonlocal)\b/g,
-        '<span style="color: var(--accent-color); font-weight: 600;">$1</span>');
+  // Keywords
+  html = html.replace(
+    /\b(def|class|import|from|if|else|elif|for|while|try|except|finally|return|yield|with|as|pass|break|continue|lambda|global|nonlocal)\b/g,
+    '<span style="color: var(--accent-color); font-weight: 600;">$1</span>',
+  );
 
-    // Strings
-    html = html.replace(/(["'])((?:\\.|(?!\1)[^\\])*?)\1/g,
-        '<span style="color: var(--success-color);">$1$2$1</span>');
+  // Strings
+  html = html.replace(
+    /(["'])((?:\\.|(?!\1)[^\\])*?)\1/g,
+    '<span style="color: var(--success-color);">$1$2$1</span>',
+  );
 
-    block.innerHTML = html;
+  block.innerHTML = html;
 }
 
 // Performance monitoring
 function initializePerformanceMonitoring() {
-    // Log page load time
-    window.addEventListener('load', function() {
-        const loadTime = performance.now();
-        console.log(`Page loaded in ${loadTime.toFixed(2)}ms`);
-
-        // Count elements for performance insight
-        const messageCount = document.querySelectorAll('.message-block').length;
-        const foldoutCount = document.querySelectorAll('details').length;
-
-        console.log(`Rendered ${messageCount} messages and ${foldoutCount} foldouts`);
-    });
+  // Log page load time
+  window.addEventListener("load", function () {
+    const loadTime = performance.now();
+    console.log(`Page loaded in ${loadTime.toFixed(2)}ms`);
+
+    // Count elements for performance insight
+    const messageCount = document.querySelectorAll(".message-block").length;
+    const foldoutCount = document.querySelectorAll("details").length;
+
+    console.log(
+      `Rendered ${messageCount} messages and ${foldoutCount} foldouts`,
+    );
+  });
 }
 
 // Message expand/collapse functionality
 function expandMessage(clickedElement) {
-    const messageContent = clickedElement.closest('.message-content');
-    const previewShort = messageContent.querySelector('.message-preview-short');
-    const contentFull = messageContent.querySelector('.message-content-full');
-    const contentExpanded = messageContent.querySelector('.message-content-expanded');
-
-    // Expanding - hide preview, show full content
-    if (previewShort) previewShort.style.display = 'none';
-    if (contentFull) contentFull.style.display = 'block';
-    if (contentExpanded) contentExpanded.style.display = 'block';
-
-    // Smooth scroll to keep the content in view
-    setTimeout(() => {
-        messageContent.scrollIntoView({
-            behavior: 'smooth',
-            block: 'nearest'
-        });
-    }, 100);
+  const messageContent = clickedElement.closest(".message-content");
+  const previewShort = messageContent.querySelector(".message-preview-short");
+  const contentFull = messageContent.querySelector(".message-content-full");
+  const contentExpanded = messageContent.querySelector(
+    ".message-content-expanded",
+  );
+
+  // Expanding - hide preview, show full content
+  if (previewShort) previewShort.style.display = "none";
+  if (contentFull) contentFull.style.display = "block";
+  if (contentExpanded) contentExpanded.style.display = "block";
+
+  // Smooth scroll to keep the content in view
+  setTimeout(() => {
+    messageContent.scrollIntoView({
+      behavior: "smooth",
+      block: "nearest",
+    });
+  }, 100);
 }
 
 function collapseMessage(clickedElement) {
-    const messageContent = clickedElement.closest('.message-content');
-    const previewShort = messageContent.querySelector('.message-preview-short');
-    const contentFull = messageContent.querySelector('.message-content-full');
-    const contentExpanded = messageContent.querySelector('.message-content-expanded');
+  const messageContent = clickedElement.closest(".message-content");
+  const previewShort = messageContent.querySelector(".message-preview-short");
+  const contentFull = messageContent.querySelector(".message-content-full");
+  const contentExpanded = messageContent.querySelector(
+    ".message-content-expanded",
+  );
+
+  // Collapsing - show preview, hide full content
+  if (contentFull) contentFull.style.display = "none";
+  if (contentExpanded) contentExpanded.style.display = "none";
+  if (previewShort) previewShort.style.display = "block";
+
+  // Smooth scroll to keep the content in view
+  setTimeout(() => {
+    messageContent.scrollIntoView({
+      behavior: "smooth",
+      block: "nearest",
+    });
+  }, 100);
+}
 
-    // Collapsing - show preview, hide full content
-    if (contentFull) contentFull.style.display = 'none';
-    if (contentExpanded) contentExpanded.style.display = 'none';
-    if (previewShort) previewShort.style.display = 'block';
+function collapseTrajectoryMessages(clickedElement) {
+  // Find the parent trajectory messages foldout
+  const trajectoryFoldout = clickedElement.closest(
+    ".trajectory-messages-foldout",
+  );
+
+  if (trajectoryFoldout) {
+    // Close the details element
+    trajectoryFoldout.removeAttribute("open");
 
-    // Smooth scroll to keep the content in view
+    // Smooth scroll to the trajectory header
     setTimeout(() => {
-        messageContent.scrollIntoView({
-            behavior: 'smooth',
-            block: 'nearest'
+      const trajectoryHeader = trajectoryFoldout.closest(".trajectory-header");
+      if (trajectoryHeader) {
+        trajectoryHeader.scrollIntoView({
+          behavior: "smooth",
+          block: "nearest",
         });
+      }
     }, 100);
+  }
 }
 
-function collapseTrajectoryMessages(clickedElement) {
-    // Find the parent trajectory messages foldout
-    const trajectoryFoldout = clickedElement.closest('.trajectory-messages-foldout');
-
-    if (trajectoryFoldout) {
-        // Close the details element
-        trajectoryFoldout.removeAttribute('open');
-
-        // Smooth scroll to the trajectory header
-        setTimeout(() => {
-            const trajectoryHeader = trajectoryFoldout.closest('.trajectory-header');
-            if (trajectoryHeader) {
-                trajectoryHeader.scrollIntoView({
-                    behavior: 'smooth',
-                    block: 'nearest'
-                });
-            }
-        }, 100);
-    }
-}
-
-
-
 // Initialize everything when DOM is loaded
-document.addEventListener('DOMContentLoaded', function() {
-    initializeTheme();
-    initializeFoldouts();
-    initializeKeyboardShortcuts();
-    initializeCodeHighlighting();
-    initializePerformanceMonitoring();
-
-    console.log('CodeClash Trajectory Viewer initialized');
-    console.log('Keyboard shortcuts:');
-    console.log('  Ctrl/Cmd + D: Toggle dark mode');
-    console.log('  Ctrl/Cmd + E: Expand all sections');
-    console.log('  Ctrl/Cmd + Shift + E: Collapse all sections');
-    console.log('  Escape: Close all sections');
+document.addEventListener("DOMContentLoaded", function () {
+  initializeTheme();
+  initializeFoldouts();
+  initializeKeyboardShortcuts();
+  initializeCodeHighlighting();
+  initializePerformanceMonitoring();
+
+  console.log("CodeClash Trajectory Viewer initialized");
+  console.log("Keyboard shortcuts:");
+  console.log("  Ctrl/Cmd + D: Toggle dark mode");
+  console.log("  Ctrl/Cmd + E: Expand all sections");
+  console.log("  Ctrl/Cmd + Shift + E: Collapse all sections");
+  console.log("  Escape: Close all sections");
 });
diff --git a/main.py b/main.py
index 4984870a..9d70146d 100644
--- a/main.py
+++ b/main.py
@@ -6,7 +6,7 @@
 
 
 def main(config_path: str, *, cleanup: bool = False, push_agent: bool = False):
-    with open(config_path, "r") as f:
+    with open(config_path) as f:
         config = yaml.safe_load(f)
     training = PvpTournament(config, cleanup=cleanup, push_agent=push_agent)
     training.run()
diff --git a/main_single_player.py b/main_single_player.py
index 6c7ff720..d02f3a41 100644
--- a/main_single_player.py
+++ b/main_single_player.py
@@ -6,7 +6,7 @@
 
 
 def main(config_path: str, cleanup: bool = False):
-    with open(config_path, "r") as f:
+    with open(config_path) as f:
         config = yaml.safe_load(f)
     training = SinglePlayerTraining(config, cleanup)
     training.run()
diff --git a/pyproject.toml b/pyproject.toml
index 8bc097e8..66848c3e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,7 +36,152 @@ dev = [
     "pytest",
     "pytest-cov",
     "pytest-xdist",
+    "ruff",
 ]
 
 [tool.setuptools.packages.find]
 include = ["codeclash*"]
+
+[tool.ruff]
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".git-rewrite",
+    ".hg",
+    ".ipynb_checkpoints",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pyenv",
+    ".pytest_cache",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    ".vscode",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "site-packages",
+    "venv",
+    # ---- project specific ----
+    "tests/test_data",
+    # Exclude commands so they don't get the __future__ imports
+    "config/commands",
+]
+
+line-length = 120
+indent-width = 4
+
+target-version = "py310"
+
+[tool.ruff.lint]
+# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
+# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
+# McCabe complexity (`C901`) by default.
+# I001: Isort, I002: required import
+select = [
+    # Error (E)
+    "E",
+    # Error (PLE)
+    "PLE",
+    # pycodestyle
+    "E713",  # not in
+    "E714",  # is not
+    "E711",  # comparison with None
+    # pyflakes
+    "F821",
+    "F822",
+    "F401",  # unused-import
+    "F841",  # unused var
+    "F541",  # f-string without args
+    "F901",  # raise NotImplemented should be raise NotImplementedError
+    # isort
+    "I001",  # isort
+    "I002",  # required import
+    # pyupgrade and related
+    "UP",    # pyupgrade
+    "C401",  # flake8-comprehensions: unnecessary-generator-set
+    "C402",  # flake8-comprehensions: unnecessary-generator-dict
+    "C403",  # flake8-comprehensions: unnecessary-list-comprehension-set
+    "C404",  # flake8-comprehensions: unnecessary-list-comprehension-dict
+    "C405",  # flake8-comprehensions: unnecessary-literal-set
+    "F632",  # pyflakes: is-literal
+    "W605",  # pycodestyle: invalid-escape-sequence
+    # bugbear
+    "B006",  # mutable default
+    "B007",  # unused loop var
+    "B009",  # getattr with constant
+    # flake8-errmsg
+    "EM",
+    # flake8-return
+    "RET",
+    # RUF
+    "RUF019",  # unneded key in dict check
+    # pytest
+    "PT",
+    # flake8-simplify (SIM)
+    "SIM201",
+    # flake8-use-pathlib
+    "PTH100",
+    "PTH110",
+    "PTH111",
+    "PTH112",
+    "PTH113",
+    "PTH114",
+    "PTH117",
+    "PTH118",
+    "PTH119",
+    "PTH120",
+    "PTH121",
+    "PTH122",
+    "PTH202",
+    "PTH203",
+    "PTH204",
+    "PTH205",
+]
+ignore = [
+    # flake8-return
+    "RET505",  # can't autofix
+    "RET506",  # can't autofix
+    "RET507",  # can't autofix
+    # error (E)
+    "E501",    # line too long
+    "E402",    # import not on top of file
+    "E722",    # bare except
+    "E741",    # ambiguous symbol
+    # pytest
+    "PT011",
+    "PT018",
+    # flake8-errmsg
+    "EM101",   # exception must not use a string literal
+    "EM102",   # exception must not use an f-string literal
+    "EM103",   # exception must not use a .format(...) string directly
+]
+
+# Allow fix for all enabled rules (when `--fix`) is provided.
+fixable = ["ALL"]
+unfixable = []
+
+# Allow unused variables when underscore-prefixed.
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+
+[tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
diff --git a/tests/test_integration.py b/tests/test_integration.py
index feddc06d..f6ff8d17 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -5,11 +5,10 @@
 using DeterministicModel instead of real LLM models.
 """
 
-import os
 import tempfile
+from pathlib import Path
 from unittest.mock import patch
 
-import pytest
 import yaml
 from minisweagent.models.test_models import DeterministicModel
 
@@ -25,12 +24,12 @@ def test_main_battlesnake_integration():
     config_path = "configs/battlesnake.yaml"
 
     # Read the original config
-    with open(config_path, "r") as f:
+    with open(config_path) as f:
         config = yaml.safe_load(f)
 
     # Create a temporary directory for test artifacts
     with tempfile.TemporaryDirectory() as temp_dir:
-        temp_config_path = os.path.join(temp_dir, "test_battlesnake.yaml")
+        temp_config_path = Path(temp_dir) / "test_battlesnake.yaml"
 
         # Reduce rounds to 1 for faster testing
         config["tournament"]["rounds"] = 1
@@ -51,9 +50,7 @@ def wrapper(config, game_context, environment):
                     print(f"Replacing model for agent {agent.name}")
                     # Create DeterministicModel with the specified command
                     deterministic_model = DeterministicModel(
-                        outputs=[
-                            "```bash\necho 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\n```"
-                        ]
+                        outputs=["```bash\necho 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\n```"]
                     )
                     agent.agent.model = deterministic_model