From 3c36e123e2fa8843fdf1e3cfbe53c9151b4e8786 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 18 Jun 2026 11:21:46 +0000 Subject: [PATCH 1/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 27745209401 --- docs/_data/test-generation.json | 163 ++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index 9e5a7c256..d4462d800 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5333,6 +5333,147 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": false } + }, + { + "total": 101, + "date": "2026-06-18", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 392.7, + "average_prompt_tokens": 1954073.3, + "average_completion_tokens": 21740.6, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.23, + "powershell": 9.33, + "view": 12.52, + "grep": 10.64, + "edit": 1.3, + "read_powershell": 0.05, + "create": 0.47, + "glob": 0.42, + "sql": 0.04, + "task": 0.01 + }, + "github_run_id": "27745209401", + "experiment": { + "mcp_servers": [ + "altool" + ], + "al_lsp_enabled": false, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.5", + "resolved": 73, + "failed": 28, + "build": 98, + "percentage": 72.3, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": true, + "microsoftInternal__NAV-210200": true, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": false, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": true, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": false, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": true, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": true, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": true, + "microsoftInternal__NAV-175765": true, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": true, + "microsoftInternal__NAV-193649": true, + "microsoftInternal__NAV-216918": true, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": true, + "microsoftInternal__NAV-201169": true, + "microsoftInternal__NAV-209737": true, + "microsoftInternal__NAV-216057": true, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": true, + "microsoftInternal__NAV-209835": true, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": true, + "microsoftInternal__NAV-185488": false, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": true, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": false, + "microsoftInternal__NAV-222488": false, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": true, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": false, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": true, + "microsoftInternal__NAV-220314": true, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": false, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": true, + "microsoftInternal__NAV-226875": false, + "microsoftInternal__NAV-211710": false, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": false, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": false, + "microsoftInternal__NAV-224009": true, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": true, + "microsoftInternal__NAV-224447": true, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": true, + "microsoftInternal__NAV-215645": false, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": true, + "microsoftInternal__NAV-209450": true, + "microsoftInternal__NAV-185792": true, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": true, + "microsoftInternal__NAV-226448": true, + "microsoftInternal__NAV-176150": true, + "microsoftInternal__NAV-213741": true, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": true, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": true, + "microsoftInternal__NAV-217104": true, + "microsoftInternal__NAV-195193": true, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -5453,6 +5594,28 @@ "ci_low": 0.576, "ci_high": 0.64, "pass_hat_5": 0.317 + }, + { + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "experiment": { + "mcp_servers": [ + "altool" + ], + "al_lsp_enabled": false, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "total": 101, + "num_runs": 1, + "average_duration": 392.7, + "benchmark_version": "0.5.5", + "average": 0.723, + "ci_low": null, + "ci_high": null, + "pass_hat_5": null } ] } From a90d0270fee287fe5850d4b09c0a789c7ff35a78 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 18 Jun 2026 16:48:01 +0000 Subject: [PATCH 2/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 27755986205 --- docs/_data/test-generation.json | 152 ++++++++++++++++++++++++++++++-- 1 file changed, 147 insertions(+), 5 deletions(-) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index d4462d800..e86a7ef63 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5474,6 +5474,148 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": true } + }, + { + "total": 101, + "date": "2026-06-18", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 415.8, + "average_prompt_tokens": 2232404.9, + "average_completion_tokens": 22995.0, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.6, + "powershell": 9.09, + "view": 13.81, + "grep": 12.43, + "edit": 1.43, + "create": 0.59, + "read_powershell": 0.07, + "glob": 0.66, + "sql": 0.06, + "task": 0.05, + "stop_powershell": 0.05 + }, + "github_run_id": "27755986205", + "experiment": { + "mcp_servers": [ + "altool" + ], + "al_lsp_enabled": false, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.5", + "resolved": 69, + "failed": 32, + "build": 98, + "percentage": 68.3, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": false, + "microsoftInternal__NAV-210200": false, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": true, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": true, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": true, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": true, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": true, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": true, + "microsoftInternal__NAV-175765": false, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": false, + "microsoftInternal__NAV-193649": false, + "microsoftInternal__NAV-216918": true, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": true, + "microsoftInternal__NAV-201169": true, + "microsoftInternal__NAV-209737": true, + "microsoftInternal__NAV-216057": true, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": true, + "microsoftInternal__NAV-209835": false, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": true, + "microsoftInternal__NAV-185488": true, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": true, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": true, + "microsoftInternal__NAV-222488": false, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": false, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": true, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": true, + "microsoftInternal__NAV-220314": true, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": true, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": false, + "microsoftInternal__NAV-226875": false, + "microsoftInternal__NAV-211710": true, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": true, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": true, + "microsoftInternal__NAV-224009": false, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": true, + "microsoftInternal__NAV-224447": true, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": true, + "microsoftInternal__NAV-215645": false, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": true, + "microsoftInternal__NAV-209450": true, + "microsoftInternal__NAV-185792": true, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": false, + "microsoftInternal__NAV-226448": false, + "microsoftInternal__NAV-176150": false, + "microsoftInternal__NAV-213741": false, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": true, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": true, + "microsoftInternal__NAV-217104": true, + "microsoftInternal__NAV-195193": true, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -5609,12 +5751,12 @@ "custom_agent": null }, "total": 101, - "num_runs": 1, - "average_duration": 392.7, + "num_runs": 2, + "average_duration": 404.25, "benchmark_version": "0.5.5", - "average": 0.723, - "ci_low": null, - "ci_high": null, + "average": 0.703, + "ci_low": 0.683, + "ci_high": 0.723, "pass_hat_5": null } ] From 2f7fad8b7e2a618c18d829f543ae3e62a7bf25b8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 18 Jun 2026 20:04:01 +0000 Subject: [PATCH 3/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 27775201844 --- docs/_data/test-generation.json | 149 +++++++++++++++++++++++++++++++- 1 file changed, 146 insertions(+), 3 deletions(-) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index e86a7ef63..bf002fc14 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5616,6 +5616,149 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": true } + }, + { + "total": 101, + "date": "2026-06-18", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 435.5, + "average_prompt_tokens": 2148889.0, + "average_completion_tokens": 23070.0, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.62, + "powershell": 9.31, + "view": 13.44, + "grep": 11.71, + "edit": 1.19, + "glob": 0.5, + "create": 0.58, + "task": 0.05, + "sql": 0.06, + "read_powershell": 0.06, + "stop_powershell": 0.02, + "list_powershell": 0.01 + }, + "github_run_id": "27775201844", + "experiment": { + "mcp_servers": [ + "altool" + ], + "al_lsp_enabled": false, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.5", + "resolved": 69, + "failed": 32, + "build": 92, + "percentage": 68.3, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": false, + "microsoftInternal__NAV-210200": false, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": true, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": true, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": false, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": true, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": false, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": false, + "microsoftInternal__NAV-175765": false, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": false, + "microsoftInternal__NAV-193649": false, + "microsoftInternal__NAV-216918": true, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": false, + "microsoftInternal__NAV-201169": true, + "microsoftInternal__NAV-209737": true, + "microsoftInternal__NAV-216057": false, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": true, + "microsoftInternal__NAV-209835": true, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": false, + "microsoftInternal__NAV-185488": true, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": true, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": true, + "microsoftInternal__NAV-222488": true, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": false, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": true, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": true, + "microsoftInternal__NAV-214557": true, + "microsoftInternal__NAV-220314": true, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": true, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": true, + "microsoftInternal__NAV-226875": true, + "microsoftInternal__NAV-211710": true, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": true, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": true, + "microsoftInternal__NAV-224009": true, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": true, + "microsoftInternal__NAV-224447": true, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": false, + "microsoftInternal__NAV-215645": true, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": true, + "microsoftInternal__NAV-209450": true, + "microsoftInternal__NAV-185792": false, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": false, + "microsoftInternal__NAV-226448": false, + "microsoftInternal__NAV-176150": false, + "microsoftInternal__NAV-213741": true, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": true, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": true, + "microsoftInternal__NAV-217104": true, + "microsoftInternal__NAV-195193": true, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -5751,10 +5894,10 @@ "custom_agent": null }, "total": 101, - "num_runs": 2, - "average_duration": 404.25, + "num_runs": 3, + "average_duration": 414.6666666666667, "benchmark_version": "0.5.5", - "average": 0.703, + "average": 0.696, "ci_low": 0.683, "ci_high": 0.723, "pass_hat_5": null From 84ec45fab78b5c28d0f97576d045174580a17c3e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 19 Jun 2026 08:12:22 +0000 Subject: [PATCH 4/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 27786053429 --- docs/_data/test-generation.json | 152 ++++++++++++++++++++++++++++++-- 1 file changed, 147 insertions(+), 5 deletions(-) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index bf002fc14..1b6c896c2 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5759,6 +5759,148 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": true } + }, + { + "total": 101, + "date": "2026-06-19", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 392.5, + "average_prompt_tokens": 2001756.4, + "average_completion_tokens": 21527.7, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.67, + "powershell": 9.18, + "view": 12.59, + "grep": 11.45, + "edit": 1.47, + "create": 0.49, + "glob": 0.55, + "read_powershell": 0.1, + "stop_powershell": 0.02, + "task": 0.04, + "sql": 0.06 + }, + "github_run_id": "27786053429", + "experiment": { + "mcp_servers": [ + "altool" + ], + "al_lsp_enabled": false, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.5", + "resolved": 68, + "failed": 33, + "build": 94, + "percentage": 67.3, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": true, + "microsoftInternal__NAV-210200": true, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": true, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": true, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": false, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": true, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": true, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": true, + "microsoftInternal__NAV-175765": true, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": false, + "microsoftInternal__NAV-193649": true, + "microsoftInternal__NAV-216918": true, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": true, + "microsoftInternal__NAV-201169": false, + "microsoftInternal__NAV-209737": true, + "microsoftInternal__NAV-216057": true, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": true, + "microsoftInternal__NAV-209835": false, + "microsoftInternal__NAV-220036": true, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": false, + "microsoftInternal__NAV-185488": true, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": false, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": true, + "microsoftInternal__NAV-222488": true, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": true, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": false, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": true, + "microsoftInternal__NAV-220314": true, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": true, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": false, + "microsoftInternal__NAV-226875": false, + "microsoftInternal__NAV-211710": true, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": true, + "microsoftInternal__NAV-222092": true, + "microsoftInternal__NAV-226004": false, + "microsoftInternal__NAV-224009": true, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": false, + "microsoftInternal__NAV-224447": true, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": true, + "microsoftInternal__NAV-215645": false, + "microsoftInternal__NAV-222484": false, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": true, + "microsoftInternal__NAV-209450": false, + "microsoftInternal__NAV-185792": false, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": true, + "microsoftInternal__NAV-226448": true, + "microsoftInternal__NAV-176150": false, + "microsoftInternal__NAV-213741": false, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": true, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": false, + "microsoftInternal__NAV-217104": true, + "microsoftInternal__NAV-195193": true, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -5894,12 +6036,12 @@ "custom_agent": null }, "total": 101, - "num_runs": 3, - "average_duration": 414.6666666666667, + "num_runs": 4, + "average_duration": 409.125, "benchmark_version": "0.5.5", - "average": 0.696, - "ci_low": 0.683, - "ci_high": 0.723, + "average": 0.691, + "ci_low": 0.678, + "ci_high": 0.713, "pass_hat_5": null } ] From 2e8c015805115887a1b4fd394191884d9d94e80e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 19 Jun 2026 11:27:17 +0000 Subject: [PATCH 5/5] Update leaderboard: GitHub Copilot CLI (claude-opus-4.8) - Run 27813938693 --- docs/_data/test-generation.json | 154 ++++++++++++++++++++++++++++++-- 1 file changed, 148 insertions(+), 6 deletions(-) diff --git a/docs/_data/test-generation.json b/docs/_data/test-generation.json index 1b6c896c2..d43aa40fa 100644 --- a/docs/_data/test-generation.json +++ b/docs/_data/test-generation.json @@ -5901,6 +5901,148 @@ "microsoftInternal__NAV-180484": false, "microsoftInternal__NAV-176082": true } + }, + { + "total": 101, + "date": "2026-06-19", + "model": "claude-opus-4-8", + "agent_name": "GitHub Copilot", + "category": "test-generation", + "average_duration": 404.3, + "average_prompt_tokens": 2140851.5, + "average_completion_tokens": 22422.8, + "average_llm_duration": 0.0, + "average_tool_usage": { + "report_intent": 2.3, + "powershell": 10.25, + "view": 13.24, + "grep": 11.26, + "glob": 0.64, + "edit": 1.26, + "create": 0.58, + "read_powershell": 0.07, + "sql": 0.11, + "task": 0.03, + "stop_powershell": 0.03 + }, + "github_run_id": "27813938693", + "experiment": { + "mcp_servers": [ + "altool" + ], + "al_lsp_enabled": false, + "custom_instructions": false, + "skills_enabled": false, + "custom_agent": null + }, + "benchmark_version": "0.5.5", + "resolved": 68, + "failed": 33, + "build": 94, + "percentage": 67.3, + "instance_results": { + "microsoftInternal__NAV-223493": true, + "microsoftInternal__NAV-223790": true, + "microsoftInternal__NAV-220452": true, + "microsoftInternal__NAV-210200": true, + "microsoftInternal__NAV-175577": true, + "microsoftInternal__NAV-227219": false, + "microsoftInternal__NAV-217974": true, + "microsoftInternal__NAV-206977": true, + "microsoftInternal__NAV-207247": true, + "microsoftInternal__NAV-227358": false, + "microsoftInternal__NAV-179733": true, + "microsoftInternal__NAV-177750": true, + "microsoftInternal__NAV-181900": true, + "microsoftInternal__NAV-210528": true, + "microsoftInternal__NAV-206527": true, + "microsoftInternal__NAV-192565": true, + "microsoftInternal__NAV-182354": false, + "microsoftInternal__NAV-216572": true, + "microsoftInternal__NAV-175765": false, + "microsoftInternal__NAV-218062": false, + "microsoftInternal__NAV-208748": false, + "microsoftInternal__NAV-218323": true, + "microsoftInternal__NAV-193649": false, + "microsoftInternal__NAV-216918": false, + "microsoftInternal__NAV-207177": true, + "microsoftInternal__NAV-213629": true, + "microsoftInternal__NAV-214825": true, + "microsoftInternal__NAV-201169": false, + "microsoftInternal__NAV-209737": true, + "microsoftInternal__NAV-216057": false, + "microsoftInternal__NAV-188438": false, + "microsoftInternal__NAV-206135": true, + "microsoftInternal__NAV-209835": true, + "microsoftInternal__NAV-220036": false, + "microsoftInternal__NAV-207878": false, + "microsoftInternal__NAV-220984": true, + "microsoftInternal__NAV-227240": true, + "microsoftInternal__NAV-223819": true, + "microsoft__BCApps-4699": true, + "microsoftInternal__NAV-185488": true, + "microsoftInternal__NAV-174794": true, + "microsoftInternal__NAV-213524": true, + "microsoftInternal__NAV-211548": true, + "microsoftInternal__NAV-224668": true, + "microsoftInternal__NAV-209496": true, + "microsoft__BCApps-4822": true, + "microsoftInternal__NAV-221877": false, + "microsoftInternal__NAV-214926": true, + "microsoftInternal__NAV-177493": true, + "microsoftInternal__NAV-204450": true, + "microsoftInternal__NAV-222488": false, + "microsoftInternal__NAV-208649": true, + "microsoftInternal__NAV-215972": true, + "microsoftInternal__NAV-213671": false, + "microsoftInternal__NAV-215225": false, + "microsoftInternal__NAV-212355": false, + "microsoftInternal__NAV-178045": false, + "microsoftInternal__NAV-183399": false, + "microsoftInternal__NAV-214557": true, + "microsoftInternal__NAV-220314": true, + "microsoftInternal__NAV-185696": true, + "microsoftInternal__NAV-208320": true, + "microsoftInternal__NAV-174087": false, + "microsoftInternal__NAV-223202": true, + "microsoftInternal__NAV-217797": true, + "microsoftInternal__NAV-218786": false, + "microsoftInternal__NAV-193853": false, + "microsoftInternal__NAV-226875": false, + "microsoftInternal__NAV-211710": true, + "microsoftInternal__NAV-218856": false, + "microsoftInternal__NAV-219082": true, + "microsoftInternal__NAV-222092": false, + "microsoftInternal__NAV-226004": true, + "microsoftInternal__NAV-224009": false, + "microsoftInternal__NAV-203923": true, + "microsoftInternal__NAV-176426": false, + "microsoftInternal__NAV-224447": true, + "microsoft__BCApps-4766": true, + "microsoftInternal__NAV-176194": false, + "microsoftInternal__NAV-227153": true, + "microsoftInternal__NAV-215645": true, + "microsoftInternal__NAV-222484": true, + "microsoft__BCApps-5633": true, + "microsoftInternal__NAV-208851": true, + "microsoftInternal__NAV-209450": true, + "microsoftInternal__NAV-185792": true, + "microsoftInternal__NAV-205825": true, + "microsoftInternal__NAV-191624": false, + "microsoftInternal__NAV-226448": true, + "microsoftInternal__NAV-176150": true, + "microsoftInternal__NAV-213741": true, + "microsoftInternal__NAV-213683": false, + "microsoftInternal__NAV-211521": true, + "microsoftInternal__NAV-218995": true, + "microsoftInternal__NAV-218253": false, + "microsoftInternal__NAV-207236": true, + "microsoftInternal__NAV-226223": true, + "microsoftInternal__NAV-217104": true, + "microsoftInternal__NAV-195193": true, + "microsoftInternal__NAV-180484": false, + "microsoftInternal__NAV-176082": true + } } ], "aggregate": [ @@ -6036,13 +6178,13 @@ "custom_agent": null }, "total": 101, - "num_runs": 4, - "average_duration": 409.125, + "num_runs": 5, + "average_duration": 408.15999999999997, "benchmark_version": "0.5.5", - "average": 0.691, - "ci_low": 0.678, - "ci_high": 0.713, - "pass_hat_5": null + "average": 0.687, + "ci_low": 0.677, + "ci_high": 0.715, + "pass_hat_5": 0.446 } ] }