From c850788969507bd99264cee7bae14a5f6a8170d4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 24 Jun 2026 20:44:21 +0000 Subject: [PATCH 1/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 28117322182 --- docs/_data/test-generation.json | 162 ++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index 9e5a7c256..02debedda 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5333,6 +5333,148 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": false } + }, + { + "total": 101, + "date": "2026-06-24", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 316.1, + "average_prompt_tokens": 1534188.1, + "average_completion_tokens": 17710.9, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.03, + "powershell": 4.46, + "view": 11.88, + "grep": 9.96, + "edit": 1.5, + "glob": 0.33, + "create": 0.29, + "lsp:documentSymbol": 0.09, + "web_fetch": 0.01, + "read_powershell": 0.03, + "lsp:hover": 0.04, + "stop_powershell": 0.03, + "task": 0.01 + }, + "github_run_id": "28117322182", + "experiment": { + "mcp_servers": null, + "al_lsp_enabled": true, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.6", + "resolved": 62, + "failed": 39, + "build": 96, + "percentage": 61.4, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": true, + "microsoftInternal__NAV-210200": false, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": true, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": false, + "microsoftInternal__NAV-207247": false, + "microsoftInternal__NAV-227358": true, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": true, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": false, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": true, + "microsoftInternal__NAV-175765": true, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": true, + "microsoftInternal__NAV-218323": false, + "microsoftInternal__NAV-193649": false, + "microsoftInternal__NAV-216918": false, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": true, + "microsoftInternal__NAV-201169": true, + "microsoftInternal__NAV-209737": false, + "microsoftInternal__NAV-216057": false, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": false, + "microsoftInternal__NAV-209835": true, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": true, + "microsoftInternal__NAV-185488": true, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": true, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": true, + "microsoftInternal__NAV-222488": true, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": true, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": false, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": true, + "microsoftInternal__NAV-220314": true, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": true, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": false, + "microsoftInternal__NAV-226875": false, + "microsoftInternal__NAV-211710": true, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": false, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": false, + "microsoftInternal__NAV-224009": false, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": true, + "microsoftInternal__NAV-224447": true, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": false, + "microsoftInternal__NAV-215645": false, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": false, + "microsoftInternal__NAV-209450": true, + "microsoftInternal__NAV-185792": false, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": false, + "microsoftInternal__NAV-226448": true, + "microsoftInternal__NAV-176150": false, + "microsoftInternal__NAV-213741": true, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": false, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": true, + "microsoftInternal__NAV-217104": true, + "microsoftInternal__NAV-195193": false, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -5453,6 +5595,26 @@ "ci_low": 0.576, "ci_high": 0.64, "pass_hat_5": 0.317 + }, + { + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "experiment": { + "mcp_servers": null, + "al_lsp_enabled": true, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "total": 101, + "num_runs": 1, + "average_duration": 316.1, + "benchmark_version": "0.5.6", + "average": 0.614, + "ci_low": null, + "ci_high": null, + "pass_hat_5": null } ] } From de7eb93d77ffe120f6fccbbba9699ab8b4a4c1c1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 24 Jun 2026 23:47:28 +0000 Subject: [PATCH 2/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 28128415227 --- docs/_data/test-generation.json | 149 ++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 5 deletions(-) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index 02debedda..6be6110a4 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5475,6 +5475,145 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": true } + }, + { + "total": 101, + "date": "2026-06-24", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 305.1, + "average_prompt_tokens": 1427025.7, + "average_completion_tokens": 17321.8, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.08, + "powershell": 4.07, + "view": 11.88, + "grep": 9.47, + "edit": 1.54, + "create": 0.3, + "lsp:documentSymbol": 0.08, + "lsp:hover": 0.13, + "glob": 0.53, + "lsp:incomingCalls": 0.01 + }, + "github_run_id": "28128415227", + "experiment": { + "mcp_servers": null, + "al_lsp_enabled": true, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.6", + "resolved": 61, + "failed": 40, + "build": 95, + "percentage": 60.4, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": true, + "microsoftInternal__NAV-210200": true, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": false, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": true, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": false, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": false, + "microsoftInternal__NAV-181900": false, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": true, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": false, + "microsoftInternal__NAV-175765": true, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": true, + "microsoftInternal__NAV-193649": false, + "microsoftInternal__NAV-216918": true, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": true, + "microsoftInternal__NAV-201169": false, + "microsoftInternal__NAV-209737": true, + "microsoftInternal__NAV-216057": true, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": true, + "microsoftInternal__NAV-209835": true, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": true, + "microsoftInternal__NAV-185488": true, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": false, + "microsoftInternal__NAV-221877": true, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": true, + "microsoftInternal__NAV-222488": true, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": false, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": false, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": true, + "microsoftInternal__NAV-220314": false, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": false, + "microsoftInternal__NAV-174087": true, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": true, + "microsoftInternal__NAV-226875": false, + "microsoftInternal__NAV-211710": true, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": true, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": false, + "microsoftInternal__NAV-224009": false, + "microsoftInternal__NAV-203923": false, + "microsoftInternal__NAV-176426": true, + "microsoftInternal__NAV-224447": false, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": true, + "microsoftInternal__NAV-215645": false, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": true, + "microsoftInternal__NAV-209450": false, + "microsoftInternal__NAV-185792": false, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": true, + "microsoftInternal__NAV-226448": true, + "microsoftInternal__NAV-176150": false, + "microsoftInternal__NAV-213741": true, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": false, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": false, + "microsoftInternal__NAV-217104": false, + "microsoftInternal__NAV-195193": true, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -5608,12 +5747,12 @@ "custom_agent": null }, "total": 101, - "num_runs": 1, - "average_duration": 316.1, + "num_runs": 2, + "average_duration": 310.6, "benchmark_version": "0.5.6", - "average": 0.614, - "ci_low": null, - "ci_high": null, + "average": 0.609, + "ci_low": 0.604, + "ci_high": 0.614, "pass_hat_5": null } ] From b96906a5c11442707c03cce0519435d0b2661ce9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 25 Jun 2026 02:50:32 +0000 Subject: [PATCH 3/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 28137029396 --- docs/_data/test-generation.json | 152 ++++++++++++++++++++++++++++++-- 1 file changed, 147 insertions(+), 5 deletions(-) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index 6be6110a4..3ee9e2608 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5614,6 +5614,148 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": true } + }, + { + "total": 101, + "date": "2026-06-25", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 298.2, + "average_prompt_tokens": 1445247.5, + "average_completion_tokens": 16719.8, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.38, + "powershell": 4.59, + "view": 12.41, + "grep": 10.12, + "edit": 1.36, + "create": 0.35, + "glob": 0.44, + "lsp:documentSymbol": 0.1, + "lsp:hover": 0.08, + "task": 0.04, + "sql": 0.02, + "lsp:workspaceSymbol": 0.09, + "lsp:goToDefinition": 0.02 + }, + "github_run_id": "28137029396", + "experiment": { + "mcp_servers": null, + "al_lsp_enabled": true, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.6", + "resolved": 66, + "failed": 35, + "build": 95, + "percentage": 65.3, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": true, + "microsoftInternal__NAV-210200": true, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": true, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": false, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": true, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": true, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": true, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": true, + "microsoftInternal__NAV-175765": false, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": false, + "microsoftInternal__NAV-193649": false, + "microsoftInternal__NAV-216918": true, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": true, + "microsoftInternal__NAV-201169": true, + "microsoftInternal__NAV-209737": true, + "microsoftInternal__NAV-216057": true, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": false, + "microsoftInternal__NAV-209835": true, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": false, + "microsoftInternal__NAV-185488": true, + "microsoftInternal__NAV-174794": false, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": false, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": false, + "microsoftInternal__NAV-222488": false, + "microsoftInternal__NAV-208649": false, + "microsoftInternal__NAV-215972": false, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": true, + "microsoftInternal__NAV-212355": true, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": false, + "microsoftInternal__NAV-220314": true, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": false, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": false, + "microsoftInternal__NAV-226875": true, + "microsoftInternal__NAV-211710": true, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": true, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": true, + "microsoftInternal__NAV-224009": false, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": true, + "microsoftInternal__NAV-224447": true, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": true, + "microsoftInternal__NAV-215645": false, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": true, + "microsoftInternal__NAV-209450": true, + "microsoftInternal__NAV-185792": true, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": false, + "microsoftInternal__NAV-226448": true, + "microsoftInternal__NAV-176150": false, + "microsoftInternal__NAV-213741": false, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": true, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": true, + "microsoftInternal__NAV-217104": true, + "microsoftInternal__NAV-195193": true, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -5747,12 +5889,12 @@ "custom_agent": null }, "total": 101, - "num_runs": 2, - "average_duration": 310.6, + "num_runs": 3, + "average_duration": 306.4666666666667, "benchmark_version": "0.5.6", - "average": 0.609, - "ci_low": 0.604, - "ci_high": 0.614, + "average": 0.624, + "ci_low": 0.607, + "ci_high": 0.653, "pass_hat_5": null } ] From c0db0e4388e5ec94b04fc15dddf8b933bcf8dd7b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 25 Jun 2026 05:56:13 +0000 Subject: [PATCH 4/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 28143592839 --- docs/_data/test-generation.json | 149 +++++++++++++++++++++++++++++++- 1 file changed, 145 insertions(+), 4 deletions(-) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index 3ee9e2608..f6d32b842 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5756,6 +5756,147 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": true } + }, + { + "total": 101, + "date": "2026-06-25", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 296.3, + "average_prompt_tokens": 1396837.6, + "average_completion_tokens": 16546.5, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.07, + "powershell": 4.38, + "view": 11.42, + "grep": 9.31, + "edit": 1.5, + "create": 0.31, + "glob": 0.29, + "lsp:documentSymbol": 0.13, + "lsp:hover": 0.04, + "stop_powershell": 0.02, + "task": 0.01, + "sql": 0.01 + }, + "github_run_id": "28143592839", + "experiment": { + "mcp_servers": null, + "al_lsp_enabled": true, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.6", + "resolved": 66, + "failed": 35, + "build": 97, + "percentage": 65.3, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": true, + "microsoftInternal__NAV-210200": true, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": true, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": true, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": true, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": true, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": true, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": true, + "microsoftInternal__NAV-175765": false, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": true, + "microsoftInternal__NAV-193649": false, + "microsoftInternal__NAV-216918": false, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": false, + "microsoftInternal__NAV-201169": true, + "microsoftInternal__NAV-209737": true, + "microsoftInternal__NAV-216057": true, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": true, + "microsoftInternal__NAV-209835": false, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": false, + "microsoftInternal__NAV-185488": true, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": true, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": true, + "microsoftInternal__NAV-222488": false, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": true, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": false, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": true, + "microsoftInternal__NAV-220314": true, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": true, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": false, + "microsoftInternal__NAV-226875": false, + "microsoftInternal__NAV-211710": true, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": false, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": true, + "microsoftInternal__NAV-224009": false, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": true, + "microsoftInternal__NAV-224447": true, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": true, + "microsoftInternal__NAV-215645": false, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": false, + "microsoftInternal__NAV-209450": true, + "microsoftInternal__NAV-185792": true, + "microsoftInternal__NAV-205825": false, + "microsoftInternal__NAV-191624": true, + "microsoftInternal__NAV-226448": true, + "microsoftInternal__NAV-176150": false, + "microsoftInternal__NAV-213741": false, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": true, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": false, + "microsoftInternal__NAV-217104": false, + "microsoftInternal__NAV-195193": true, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -5889,11 +6030,11 @@ "custom_agent": null }, "total": 101, - "num_runs": 3, - "average_duration": 306.4666666666667, + "num_runs": 4, + "average_duration": 303.925, "benchmark_version": "0.5.6", - "average": 0.624, - "ci_low": 0.607, + "average": 0.631, + "ci_low": 0.609, "ci_high": 0.653, "pass_hat_5": null } From 574f382db9a813e64b9b5d0f6c8385ee04a44e19 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 25 Jun 2026 08:55:52 +0000 Subject: [PATCH 5/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 28150057340 --- docs/_data/test-generation.json | 154 ++++++++++++++++++++++++++++++-- 1 file changed, 148 insertions(+), 6 deletions(-) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index f6d32b842..8f9b499be 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5897,6 +5897,148 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": true } + }, + { + "total": 101, + "date": "2026-06-25", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 299.3, + "average_prompt_tokens": 1457447.5, + "average_completion_tokens": 16625.7, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.02, + "powershell": 4.3, + "view": 11.24, + "grep": 9.26, + "edit": 1.43, + "glob": 0.49, + "create": 0.31, + "read_powershell": 0.01, + "lsp:documentSymbol": 0.08, + "web_fetch": 0.19, + "lsp:hover": 0.03, + "lsp:goToDefinition": 0.04, + "sql": 0.02 + }, + "github_run_id": "28150057340", + "experiment": { + "mcp_servers": null, + "al_lsp_enabled": true, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.6", + "resolved": 60, + "failed": 41, + "build": 95, + "percentage": 59.4, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": true, + "microsoftInternal__NAV-210200": true, + "microsoftInternal__NAV-175577": false, + "microsoftInternal__NAV-227219": true, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": false, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": true, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": false, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": true, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": false, + "microsoftInternal__NAV-175765": false, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": true, + "microsoftInternal__NAV-193649": true, + "microsoftInternal__NAV-216918": true, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": true, + "microsoftInternal__NAV-201169": false, + "microsoftInternal__NAV-209737": false, + "microsoftInternal__NAV-216057": false, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": false, + "microsoftInternal__NAV-209835": true, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": false, + "microsoftInternal__NAV-185488": false, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": false, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": false, + "microsoftInternal__NAV-222488": false, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": false, + "microsoftInternal__NAV-213671": true, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": true, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": false, + "microsoftInternal__NAV-220314": false, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": true, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": false, + "microsoftInternal__NAV-226875": true, + "microsoftInternal__NAV-211710": false, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": false, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": true, + "microsoftInternal__NAV-224009": true, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": false, + "microsoftInternal__NAV-224447": false, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": true, + "microsoftInternal__NAV-215645": false, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": true, + "microsoftInternal__NAV-209450": true, + "microsoftInternal__NAV-185792": true, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": false, + "microsoftInternal__NAV-226448": true, + "microsoftInternal__NAV-176150": false, + "microsoftInternal__NAV-213741": true, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": true, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": true, + "microsoftInternal__NAV-217104": true, + "microsoftInternal__NAV-195193": false, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -6030,13 +6172,13 @@ "custom_agent": null }, "total": 101, - "num_runs": 4, - "average_duration": 303.925, + "num_runs": 5, + "average_duration": 303.0, "benchmark_version": "0.5.6", - "average": 0.631, - "ci_low": 0.609, - "ci_high": 0.653, - "pass_hat_5": null + "average": 0.624, + "ci_low": 0.604, + "ci_high": 0.646, + "pass_hat_5": 0.287 } ] }