diff --git a/.github/actions/install-eval-clis/action.yml b/.github/actions/install-eval-clis/action.yml new file mode 100644 index 000000000..e9bc0c3bb --- /dev/null +++ b/.github/actions/install-eval-clis/action.yml @@ -0,0 +1,13 @@ +name: Install Agent Harnesses +description: Install the Claude Code and GitHub Copilot CLIs used by the evaluation workflows + +runs: + using: composite + steps: + - name: Install Claude Code + run: npm install -g @anthropic-ai/claude-code@2.1.160 + shell: pwsh + + - name: Install GitHub Copilot CLI + run: npm install -g @github/copilot@1.0.57 + shell: pwsh diff --git a/.github/workflows/claude-evaluation.yml b/.github/workflows/claude-evaluation.yml index d91282bf2..5952c6026 100644 --- a/.github/workflows/claude-evaluation.yml +++ b/.github/workflows/claude-evaluation.yml @@ -77,6 +77,7 @@ jobs: permissions: contents: read id-token: write + copilot-requests: write # code-review semantic judge runs via Copilot CLI name: ${{ matrix.entry }} strategy: fail-fast: false @@ -113,16 +114,18 @@ jobs: dotnet tool install -g Microsoft.Dynamics.BusinessCentral.Development.Tools --version 18.0.37.11445-beta echo "$HOME\.dotnet\tools" >> $env:GITHUB_PATH - - name: Install Claude Code - run: npm install -g @anthropic-ai/claude-code@2.1.160 + - name: Install Agent Harnesses + uses: ./.github/actions/install-eval-clis - name: Run Claude Code for entry ${{ matrix.entry }} timeout-minutes: 120 shell: pwsh env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + COPILOT_GITHUB_TOKEN: ${{ github.token }} # used by the code-review semantic judge run: | Write-Output "::add-mask::$env:ANTHROPIC_API_KEY" + Write-Output "::add-mask::$env:COPILOT_GITHUB_TOKEN" uv run bcbench evaluate claude "${{ matrix.entry }}" ` --model "${{ inputs.model }}" ` diff --git a/.github/workflows/copilot-evaluation.yml b/.github/workflows/copilot-evaluation.yml index dd373536d..f0f11d787 100644 --- a/.github/workflows/copilot-evaluation.yml +++ b/.github/workflows/copilot-evaluation.yml @@ -121,8 +121,8 @@ jobs: dotnet tool install -g Microsoft.Dynamics.BusinessCentral.Development.Tools --version 18.0.37.11445-beta echo "$HOME\.dotnet\tools" >> $env:GITHUB_PATH - - name: Install GitHub Copilot CLI - run: npm install -g @github/copilot@1.0.57 + - name: Install evaluation CLIs + uses: ./.github/actions/install-eval-clis - name: Run GitHub Copilot CLI for entry ${{ matrix.entry }} timeout-minutes: 120