diff --git a/codeclash/arenas/__init__.py b/codeclash/arenas/__init__.py index 7ffbb95f..04324097 100644 --- a/codeclash/arenas/__init__.py +++ b/codeclash/arenas/__init__.py @@ -1,6 +1,7 @@ from codeclash.arenas.arena import CodeArena from codeclash.arenas.battlecode.battlecode import BattleCodeArena from codeclash.arenas.battlesnake.battlesnake import BattleSnakeArena +from codeclash.arenas.bridge.bridge import BridgeArena from codeclash.arenas.corewar.corewar import CoreWarArena from codeclash.arenas.dummy.dummy import DummyArena from codeclash.arenas.halite.halite import HaliteArena @@ -13,6 +14,7 @@ ARENAS = [ BattleCodeArena, BattleSnakeArena, + BridgeArena, CoreWarArena, DummyArena, HaliteArena, diff --git a/codeclash/arenas/bridge/Bridge.Dockerfile b/codeclash/arenas/bridge/Bridge.Dockerfile new file mode 100644 index 00000000..7629fbc1 --- /dev/null +++ b/codeclash/arenas/bridge/Bridge.Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Install Python 3.10 and basic tools +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl ca-certificates python3.10 python3.10-venv \ + python3-pip python-is-python3 wget git build-essential jq curl locales \ + && rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/CodeClash-ai/Bridge.git /workspace \ + && cd /workspace \ + && git remote set-url origin https://github.com/CodeClash-ai/Bridge.git + +WORKDIR /workspace + +# No additional dependencies needed - game logic is pure Python diff --git a/codeclash/arenas/bridge/__init__.py b/codeclash/arenas/bridge/__init__.py new file mode 100644 index 00000000..043d7d02 --- /dev/null +++ b/codeclash/arenas/bridge/__init__.py @@ -0,0 +1 @@ +"""Bridge arena for CodeClash.""" diff --git a/codeclash/arenas/bridge/bridge.py b/codeclash/arenas/bridge/bridge.py new file mode 100644 index 00000000..53b6e550 --- /dev/null +++ b/codeclash/arenas/bridge/bridge.py @@ -0,0 +1,177 @@ +"""Bridge Arena for CodeClash.""" + +import json +import shlex +import subprocess +from collections import Counter +from concurrent.futures import ThreadPoolExecutor, as_completed + +from tqdm.auto import tqdm + +from codeclash.agents.player import Player +from codeclash.arenas.arena import CodeArena, RoundStats +from codeclash.constants import RESULT_TIE + + +class BridgeArena(CodeArena): + name: str = "Bridge" + submission: str = "bridge_agent.py" + description: str = """Bridge is a 4-player trick-taking card game played in teams. + +Teams: North/South (positions 0/2) vs East/West (positions 1/3) + +Your bot (bridge_agent.py) must implement these functions: +- get_bid(game_state) -> str: Make bidding decisions, return bid string like "1H", "2NT", "PASS" +- play_card(game_state) -> str: Play a card, return card string like "AS", "7H" + +game_state is a dict containing: +- position: Your position (0=North, 1=East, 2=South, 3=West) +- hand: List of cards in your hand (e.g., ["AS", "KH", "7D"]) +- bids: List of previous bids +- legal_bids: List of legal bids you can make (during bidding) +- legal_cards: List of legal cards you can play (during playing) +- current_trick: Cards played so far in current trick +- contract: The current contract (if bidding is complete) +""" + default_args: dict = { + "sims_per_round": 10, + } + + def __init__(self, config, **kwargs): + # Validate player count before initializing (to avoid Docker build on invalid config) + num_players = len(config.get("players", [])) + if num_players != 4: + raise ValueError(f"Bridge requires exactly 4 players, got {num_players}") + super().__init__(config, **kwargs) + self.run_cmd = "python3 /workspace/run_game.py" + + def validate_code(self, agent: Player) -> tuple[bool, str | None]: + """Validate agent code has required functions.""" + if self.submission not in agent.environment.execute("ls")["output"]: + return False, f"No {self.submission} file found in root directory" + + content = agent.environment.execute(f"cat {self.submission}")["output"] + + # Check for required function definitions + required_functions = [ + "def get_bid(", + "def play_card(" + ] + + missing = [] + for func in required_functions: + if func not in content: + missing.append(func) + + if missing: + return False, f"Missing required functions: {', '.join(missing)}" + + return True, None + + def _run_single_simulation(self, agents: list[Player], idx: int, cmd: str): + """Run a single Bridge game simulation.""" + full_cmd = f"{cmd} -o {self.log_env / f'sim_{idx}.json'}" + + try: + response = self.environment.execute(full_cmd, timeout=60) + except subprocess.TimeoutExpired: + self.logger.warning(f"Bridge simulation {idx} timed out") + return "" + + if response["returncode"] != 0: + self.logger.warning( + f"Bridge simulation {idx} failed with exit code {response['returncode']}:\n{response['output']}" + ) + return response["output"] + + def execute_round(self, agents: list[Player]): + """Execute a round of Bridge games.""" + sims = self.game_config.get('sims_per_round', 10) + self.logger.info(f"Running {sims} Bridge simulations with 4 players") + + # Build agent paths for the command + agent_paths = [] + for agent in agents: + agent_paths.append(f"/{agent.name}/{self.submission}") + + # Build base command + cmd = f"{self.run_cmd} {shlex.join(agent_paths)}" + + # Run simulations in parallel + with ThreadPoolExecutor(max_workers=8) as executor: + futures = [ + executor.submit( + self._run_single_simulation, + agents, + idx, + f"{cmd} --seed {idx} --dealer {idx % 4}" + ) + for idx in range(sims) + ] + for future in tqdm(as_completed(futures), total=len(futures), desc="Bridge simulations"): + future.result() + + def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): + """Parse results and determine winners.""" + # Initialize team scores + team_scores = {'NS': 0.0, 'EW': 0.0} + games_played = 0 + + # Parse all simulation logs + for idx in range(self.game_config.get('sims_per_round', 10)): + log_file = self.log_round(round_num) / f"sim_{idx}.json" + + if not log_file.exists(): + self.logger.warning(f"Log file {log_file} not found, skipping") + continue + + try: + with open(log_file) as f: + result = json.load(f) + + # Check for error + if 'error' in result: + self.logger.warning(f"Simulation {idx} had error: {result['error']}") + continue + + # Extract VP scores for each team + vp_scores = result.get('normalized_score', {}) + if vp_scores: + team_scores['NS'] += vp_scores.get('NS', 0.0) + team_scores['EW'] += vp_scores.get('EW', 0.0) + games_played += 1 + except (json.JSONDecodeError, KeyError) as e: + self.logger.warning(f"Error parsing {log_file}: {e}") + continue + + if games_played == 0: + self.logger.error("No valid game results found") + stats.winner = RESULT_TIE + for agent in agents: + stats.scores[agent.name] = 0.0 + stats.player_stats[agent.name].score = 0.0 + return + + # Average the scores + team_scores['NS'] /= games_played + team_scores['EW'] /= games_played + + # Determine winning team + if abs(team_scores['NS'] - team_scores['EW']) < 0.01: # Tie threshold + stats.winner = RESULT_TIE + elif team_scores['NS'] > team_scores['EW']: + stats.winner = f"{agents[0].name}/{agents[2].name}" + else: + stats.winner = f"{agents[1].name}/{agents[3].name}" + + # Assign scores to individual players based on their team + for position, agent in enumerate(agents): + team = 'NS' if position % 2 == 0 else 'EW' + score = team_scores[team] + stats.scores[agent.name] = score + stats.player_stats[agent.name].score = score + + self.logger.info( + f"Round {round_num} results - NS: {team_scores['NS']:.3f}, " + f"EW: {team_scores['EW']:.3f}, Winner: {stats.winner}" + ) diff --git a/configs/examples/Bridge__claude-3-5-haiku__r2__s10.yaml b/configs/examples/Bridge__claude-3-5-haiku__r2__s10.yaml new file mode 100644 index 00000000..0853e30b --- /dev/null +++ b/configs/examples/Bridge__claude-3-5-haiku__r2__s10.yaml @@ -0,0 +1,74 @@ +tournament: + rounds: 2 +game: + name: Bridge + sims_per_round: 10 +players: +- agent: mini + name: north + config: + agent: !include mini/default.yaml + model: + model_name: 'anthropic/claude-3-5-haiku-20241022' + model_kwargs: + temperature: 0.2 + max_tokens: 4096 +- agent: mini + name: east + config: + agent: !include mini/default.yaml + model: + model_name: 'anthropic/claude-3-5-haiku-20241022' + model_kwargs: + temperature: 0.2 + max_tokens: 4096 +- agent: mini + name: south + config: + agent: !include mini/default.yaml + model: + model_name: 'anthropic/claude-3-5-haiku-20241022' + model_kwargs: + temperature: 0.2 + max_tokens: 4096 +- agent: mini + name: west + config: + agent: !include mini/default.yaml + model: + model_name: 'anthropic/claude-3-5-haiku-20241022' + model_kwargs: + temperature: 0.2 + max_tokens: 4096 +prompts: + game_description: |- + You are a software developer ({{player_id}}) competing in a coding game called Bridge. + Bridge is a 4-player trick-taking card game played in partnerships: North/South vs East/West. + + Your position: {{player_id}} (North=0, East=1, South=2, West=3) + Teams: North/South (positions 0/2) vs East/West (positions 1/3) + + The game is played in {{total_rounds}} rounds. For every round, you (and your competitors) edit program code that controls your bot. This is round {{round}}. + After everyone finishes editing their codebases, the game is run automatically. + + Your task: improve the bot in `bridge_agent.py`, located in {{working_dir}}. + {{working_dir}} is your codebase, which contains both your bot and supporting assets. + All of your commands will be executed in the {{working_dir}} directory. + + Your bot must implement two functions: + - get_bid(game_state) -> str: Make bidding decisions during the auction + - play_card(game_state) -> str: Play a card during the play phase + + game_state contains: + - position: Your seat (0-3) + - hand: Your cards (e.g., ["AS", "KH", "7D", "TC"]) + - legal_bids/legal_cards: Valid moves you can make + - bids: Previous bids in the auction + - current_trick: Cards played in current trick + - contract: The final contract (after bidding) + - tricks_won: Tricks won by each team + + Card notation: where rank is A,K,Q,J,T,9,8,7,6,5,4,3,2 and suit is S,H,D,C + Bid notation: "PASS" or level(1-7) + strain(C,D,H,S,NT) like "1H", "3NT", "7S" + + Check examples/random_agent.py in the workspace for a starting template. diff --git a/configs/test/bridge.yaml b/configs/test/bridge.yaml new file mode 100644 index 00000000..c1e5e6ed --- /dev/null +++ b/configs/test/bridge.yaml @@ -0,0 +1,36 @@ +tournament: + rounds: 3 +game: + name: Bridge + sims_per_round: 10 +players: + - agent: dummy + name: north + - agent: dummy + name: east + - agent: dummy + name: south + - agent: dummy + name: west +prompts: + game_description: | + You are a software developer ({{player_id}}) competing in a Bridge coding game. + + Bridge is a 4-player trick-taking card game played in teams: + - North/South (positions 0/2) vs East/West (positions 1/3) + + The game is played in {{rounds}} rounds. For every round, you edit your bot code (bridge_agent.py). + After all players finish editing, games are run automatically. This is round {{round}}. + + Your bot must implement two functions: + - get_bid(game_state) -> str: Return bid like "1H", "2NT", "PASS" + - play_card(game_state) -> str: Return card like "AS", "7H" + + The game_state dict contains: + - position: Your position (0=North, 1=East, 2=South, 3=West) + - hand: List of cards in your hand + - bids: List of previous bids + - legal_bids: Legal bids you can make (during bidding) + - legal_cards: Legal cards you can play (during playing) + - current_trick: Cards played so far in current trick + - contract: The current contract (if bidding is complete) diff --git a/docs/reference/arenas/bridge.md b/docs/reference/arenas/bridge.md new file mode 100644 index 00000000..101ac2f1 --- /dev/null +++ b/docs/reference/arenas/bridge.md @@ -0,0 +1,106 @@ +# Bridge + +4-player trick-taking card game played in teams. + +## Overview + +Bridge is a classic card game where North/South compete against East/West. Players bid to determine the contract, then play 13 tricks to fulfill or defeat it. The game combines strategic bidding with tactical card play. + +## Implementation + +::: codeclash.arenas.bridge.bridge.BridgeArena + options: + show_root_heading: true + heading_level: 2 + +## Agent Interface + +Your bot must be a Python file (`bridge_agent.py`) implementing two functions: + +### get_bid(game_state) + +Make a bidding decision during the bidding phase. + +**Parameters:** +- `game_state` (dict): Current game state including: + - `position`: Your position (0=North, 1=East, 2=South, 3=West) + - `hand`: List of cards in your hand (e.g., `["AS", "KH", "7D"]`) + - `bids`: List of previous bids + - `legal_bids`: List of legal bids you can make + +**Returns:** +- `str`: Bid string like `"1H"`, `"2NT"`, `"3S"`, or `"PASS"` + +### play_card(game_state) + +Play a card during the playing phase. + +**Parameters:** +- `game_state` (dict): Current game state including: + - `position`: Your position + - `hand`: Cards currently in your hand + - `current_trick`: Cards played so far in current trick + - `legal_cards`: Legal cards you can play + - `contract`: The current contract (level, suit, declarer) + - `tricks_won`: Tricks won by each team + +**Returns:** +- `str`: Card string like `"AS"`, `"7H"`, `"KD"` + +## Example Agent + +```python +import random + +def get_bid(game_state): + """Simple strategy: PASS 80% of the time.""" + legal_bids = game_state.get("legal_bids", ["PASS"]) + + if random.random() < 0.8 or len(legal_bids) == 1: + return "PASS" + + non_pass_bids = [b for b in legal_bids if b != "PASS"] + return random.choice(non_pass_bids) if non_pass_bids else "PASS" + +def play_card(game_state): + """Play a random legal card.""" + legal_cards = game_state.get("legal_cards", game_state.get("hand", [])) + return random.choice(legal_cards) if legal_cards else "AS" +``` + +## Configuration Example + +```yaml +tournament: + rounds: 3 +game: + name: Bridge + sims_per_round: 10 +players: + - agent: dummy + name: north + - agent: dummy + name: east + - agent: dummy + name: south + - agent: dummy + name: west +``` + +## Teams + +Bridge is played in fixed partnerships: +- **North/South (NS)**: Positions 0 and 2 +- **East/West (EW)**: Positions 1 and 3 + +Scores are calculated per team using Victory Points (VP) normalized to 0-1 scale. + +## Scoring + +The game uses standard Contract Bridge scoring: +- Contract made: Base points + overtricks + game/slam bonuses +- Contract failed: Undertrick penalties +- Vulnerability affects bonuses and penalties +- Raw scores are converted to Victory Points (VP) + +--8<-- "docs/_footer.md" diff --git a/mkdocs.yml b/mkdocs.yml index e906978c..605dc70d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -72,6 +72,7 @@ nav: - "CodeGame (Abstract)": "reference/arenas/game.md" - "BattleCode": "reference/arenas/battlecode.md" - "BattleSnake": "reference/arenas/battlesnake.md" + - "Bridge": "reference/arenas/bridge.md" - "CoreWar": "reference/arenas/corewar.md" - "Halite": "reference/arenas/halite.md" - "Halite II": "reference/arenas/halite2.md" diff --git a/tests/arenas/test_bridge.py b/tests/arenas/test_bridge.py new file mode 100644 index 00000000..4938f440 --- /dev/null +++ b/tests/arenas/test_bridge.py @@ -0,0 +1,134 @@ +"""Unit tests for BridgeArena.""" + +import pytest + +from codeclash.arenas.bridge.bridge import BridgeArena + +VALID_BRIDGE_BOT = """ +def get_bid(game_state): + '''Make a bidding decision based on game state.''' + # Simple strategy: always pass + return "PASS" + +def play_card(game_state): + '''Play a card based on game state.''' + # Simple strategy: play first legal card + legal_cards = game_state.get('legal_cards', game_state.get('hand', [])) + if legal_cards: + return legal_cards[0] + return "AS" +""" + + +class TestBridgeValidation: + """Tests for BridgeArena.validate_code()""" + + @pytest.fixture + def arena(self, tmp_log_dir, minimal_config): + """Create BridgeArena instance with mocked environment.""" + config = minimal_config.copy() + config["game"]["name"] = "Bridge" + config["players"] = [ + {"name": "north", "agent": "dummy"}, + {"name": "east", "agent": "dummy"}, + {"name": "south", "agent": "dummy"}, + {"name": "west", "agent": "dummy"}, + ] + arena = BridgeArena.__new__(BridgeArena) + arena.submission = "bridge_agent.py" + arena.log_local = tmp_log_dir + return arena + + def test_valid_submission(self, arena, mock_player_factory): + """Test that a valid Bridge bot passes validation.""" + player = mock_player_factory( + name="test_player", + files={"bridge_agent.py": VALID_BRIDGE_BOT}, + command_outputs={ + "ls": {"output": "bridge_agent.py\n", "returncode": 0}, + "cat bridge_agent.py": {"output": VALID_BRIDGE_BOT, "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is True + assert error is None + + def test_missing_file(self, arena, mock_player_factory): + """Test that missing bridge_agent.py fails validation.""" + player = mock_player_factory( + name="test_player", + files={}, + command_outputs={ + "ls": {"output": "other.py\n", "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "bridge_agent.py" in error + + def test_missing_bid_function(self, arena, mock_player_factory): + """Test that missing get_bid function fails validation.""" + bot_code = """ +def play_card(game_state): + '''Play a card.''' + return "AS" +""" + player = mock_player_factory( + name="test_player", + files={"bridge_agent.py": bot_code}, + command_outputs={ + "ls": {"output": "bridge_agent.py\n", "returncode": 0}, + "cat bridge_agent.py": {"output": bot_code, "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "def get_bid(" in error + + def test_missing_play_function(self, arena, mock_player_factory): + """Test that missing play_card function fails validation.""" + bot_code = """ +def get_bid(game_state): + '''Make a bid.''' + return "PASS" +""" + player = mock_player_factory( + name="test_player", + files={"bridge_agent.py": bot_code}, + command_outputs={ + "ls": {"output": "bridge_agent.py\n", "returncode": 0}, + "cat bridge_agent.py": {"output": bot_code, "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "def play_card(" in error + + + +class TestBridgeRequirements: + """Test Bridge-specific requirements.""" + + def test_requires_4_players(self, minimal_config, tmp_log_dir): + """Test that Bridge requires exactly 4 players.""" + config = minimal_config.copy() + config["game"]["name"] = "Bridge" + config["players"] = [ + {"name": "p1", "agent": "dummy"}, + {"name": "p2", "agent": "dummy"}, + ] + + with pytest.raises(ValueError, match="Bridge requires exactly 4 players"): + BridgeArena( + config, + tournament_id="test_tournament", + local_output_dir=tmp_log_dir + ) + + def test_accepts_4_players(self): + """Test that Bridge accepts exactly 4 players by checking class properties.""" + # Since we validated that ValueError is raised for wrong player count, + # we can trust that 4 players will be accepted + # Test class attributes instead of full initialization (avoids Docker requirement) + assert BridgeArena.name == "Bridge" + assert BridgeArena.submission == "bridge_agent.py"