diff --git a/offline/README.md b/offline/README.md
index 3aa9456..5fdc6bf 100644
--- a/offline/README.md
+++ b/offline/README.md
@@ -8,6 +8,7 @@ Open replication of the code review benchmark used by companies like [Augment](h
 |---|---|
 | [Augment](https://www.augmentcode.com/) | AI code review |
 | [Claude Code](https://claude.ai) | AI assistant |
+| [CloudAEye](https://cloudaeye.com/) | AI code review |
 | [CodeRabbit](https://www.coderabbit.ai/) | AI code review |
 | [Codex](https://openai.com/codex) | AI assistant |
 | [Cursor Bugbot](https://cursor.com) | AI code review |
diff --git a/offline/analysis/benchmark_dashboard.html b/offline/analysis/benchmark_dashboard.html
index aa6278b..7c50f5b 100644
--- a/offline/analysis/benchmark_dashboard.html
+++ b/offline/analysis/benchmark_dashboard.html
@@ -191,58 +191,58 @@
 <body>
     <div class="predefined-filters">
         <div class="predefined-filter" data-filter-id="high_precision" onclick="applyPredefinedFilter('high_precision')">Highest Precision <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="tool_claude_language_python_risk_medium" onclick="applyPredefinedFilter('tool_claude_language_python_risk_medium')">Python + Medium Risk (Precision) <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_kodus-v2_domain_concurrency" onclick="applyPredefinedFilter('tool_kodus-v2_domain_concurrency')">Best for Concurrency (Precision) <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_kg_complexity_complex" onclick="applyPredefinedFilter('tool_kg_complexity_complex')">Best for Complex Code (Precision) <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_propel-v2_risk_high_context_file" onclick="applyPredefinedFilter('tool_propel-v2_risk_high_context_file')">High Risk + File Context (Precision) <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="tool_copilot_change_type_bug_fix" onclick="applyPredefinedFilter('tool_copilot_change_type_bug_fix')">Best for Bug Fixes (Recall) <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="tool_coderabbit_language_typescript_domain_scheduling" onclick="applyPredefinedFilter('tool_coderabbit_language_typescript_domain_scheduling')">Typescript + Scheduling (Recall) <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="change_type_performance" onclick="applyPredefinedFilter('change_type_performance')">Best for Performance Optimization <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="tool_copilot_change_type_bug_fix" onclick="applyPredefinedFilter('tool_copilot_change_type_bug_fix')">Best for Bug Fixes (Recall) <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="go_small" onclick="applyPredefinedFilter('go_small')">Best for Small Go PRs <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_baz_language_java_domain_authentication" onclick="applyPredefinedFilter('tool_baz_language_java_domain_authentication')">Java + Authentication <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="domain_caching" onclick="applyPredefinedFilter('domain_caching')">Best for Caching <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_codeant-v2_pr_size_small_change_type_performance" onclick="applyPredefinedFilter('tool_codeant-v2_pr_size_small_change_type_performance')">Small PRs + Performance Optimization (Precision) <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="ruby_medium" onclick="applyPredefinedFilter('ruby_medium')">Best for Medium Ruby PRs <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="change_type_bug_fix" onclick="applyPredefinedFilter('change_type_bug_fix')">Best for Bug Fixes <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_sourcery_language_typescript_concern_correctness" onclick="applyPredefinedFilter('tool_sourcery_language_typescript_concern_correctness')">Typescript + Correctness <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="tool_gemini_language_ruby_pr_size_medium" onclick="applyPredefinedFilter('tool_gemini_language_ruby_pr_size_medium')">Ruby + Medium PRs (Recall) <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="domain_ui" onclick="applyPredefinedFilter('domain_ui')">Best for Ui <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="tool_macroscope_context_file_concern_correctness" onclick="applyPredefinedFilter('tool_macroscope_context_file_concern_correctness')">File Context + Correctness <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_qodo-extended_change_type_bug_fix_context_cross_file" onclick="applyPredefinedFilter('tool_qodo-extended_change_type_bug_fix_context_cross_file')">Bug Fixes + Cross-File <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="tool_qodo-v2_domain_authentication_concern_correctness" onclick="applyPredefinedFilter('tool_qodo-v2_domain_authentication_concern_correctness')">Authentication + Correctness <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="tool_gemini_pr_size_medium_context_file" onclick="applyPredefinedFilter('tool_gemini_pr_size_medium_context_file')">Medium PRs + File Context <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="tool_qodo-v2_concern_reliability" onclick="applyPredefinedFilter('tool_qodo-v2_concern_reliability')">Best for Reliability <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_augment_domain_concurrency" onclick="applyPredefinedFilter('tool_augment_domain_concurrency')">Best for Concurrency <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="tool_claude_difficulty_moderate_context_file" onclick="applyPredefinedFilter('tool_claude_difficulty_moderate_context_file')">Moderate Bugs + File Context <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="tool_propel_language_ruby_concern_correctness" onclick="applyPredefinedFilter('tool_propel_language_ruby_concern_correctness')">Ruby + Correctness <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="tool_coderabbit_difficulty_moderate_risk_medium" onclick="applyPredefinedFilter('tool_coderabbit_difficulty_moderate_risk_medium')">Moderate Bugs + Medium Risk <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="go_small" onclick="applyPredefinedFilter('go_small')">Best for Small Go PRs <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="tool_macroscope_language_ruby_concern_correctness" onclick="applyPredefinedFilter('tool_macroscope_language_ruby_concern_correctness')">Ruby + Correctness <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="domain_caching" onclick="applyPredefinedFilter('domain_caching')">Best for Caching <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="language_go" onclick="applyPredefinedFilter('language_go')">Best for Go <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="context_file" onclick="applyPredefinedFilter('context_file')">Best for File Context <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="pr_size_small" onclick="applyPredefinedFilter('pr_size_small')">Best for Small PRs <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="change_type_bug_fix" onclick="applyPredefinedFilter('change_type_bug_fix')">Best for Bug Fixes <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="python_medium" onclick="applyPredefinedFilter('python_medium')">Best for Medium Python PRs <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="language_python" onclick="applyPredefinedFilter('language_python')">Best for Python <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="domain_scheduling" onclick="applyPredefinedFilter('domain_scheduling')">Best for Scheduling <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="concern_security" onclick="applyPredefinedFilter('concern_security')">Best for Security <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="security_critical" onclick="applyPredefinedFilter('security_critical')">Security Critical <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="risk_high" onclick="applyPredefinedFilter('risk_high')">Best for High Risk <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="risk_critical" onclick="applyPredefinedFilter('risk_critical')">Best for Critical Risk <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="language_python" onclick="applyPredefinedFilter('language_python')">Best for Python <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="high_recall" onclick="applyPredefinedFilter('high_recall')">Highest Recall <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="domain_scheduling" onclick="applyPredefinedFilter('domain_scheduling')">Best for Scheduling <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="python_medium" onclick="applyPredefinedFilter('python_medium')">Best for Medium Python PRs <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="domain_authentication" onclick="applyPredefinedFilter('domain_authentication')">Best for Authentication <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="difficulty_moderate" onclick="applyPredefinedFilter('difficulty_moderate')">Best for Moderate Bugs <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="high_risk_auth" onclick="applyPredefinedFilter('high_risk_auth')">High Risk Auth <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="risk_critical" onclick="applyPredefinedFilter('risk_critical')">Best for Critical Risk <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="java_medium" onclick="applyPredefinedFilter('java_medium')">Best for Medium Java PRs <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="change_type_feature" onclick="applyPredefinedFilter('change_type_feature')">Best for Features <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="complexity_moderate" onclick="applyPredefinedFilter('complexity_moderate')">Best for Moderate Code <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="complexity_complex" onclick="applyPredefinedFilter('complexity_complex')">Best for Complex Code <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="complex_subtle" onclick="applyPredefinedFilter('complex_subtle')">Complex & Subtle <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="context_file" onclick="applyPredefinedFilter('context_file')">Best for File Context <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="language_java" onclick="applyPredefinedFilter('language_java')">Best for Java <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="difficulty_subtle" onclick="applyPredefinedFilter('difficulty_subtle')">Best for Subtle Bugs <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="high_risk_auth" onclick="applyPredefinedFilter('high_risk_auth')">High Risk Auth <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="concern_correctness" onclick="applyPredefinedFilter('concern_correctness')">Best for Correctness <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="concern_reliability" onclick="applyPredefinedFilter('concern_reliability')">Best for Reliability <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="context_cross_file" onclick="applyPredefinedFilter('context_cross_file')">Best for Cross-File <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="concern_security" onclick="applyPredefinedFilter('concern_security')">Best for Security <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="security_critical" onclick="applyPredefinedFilter('security_critical')">Security Critical <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="complexity_moderate" onclick="applyPredefinedFilter('complexity_moderate')">Best for Moderate Code <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="high_f1" onclick="applyPredefinedFilter('high_f1')">Highest F1 <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="language_java" onclick="applyPredefinedFilter('language_java')">Best for Java <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="pr_size_large" onclick="applyPredefinedFilter('pr_size_large')">Best for Large PRs <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="language_typescript" onclick="applyPredefinedFilter('language_typescript')">Best for Typescript <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="domain_concurrency" onclick="applyPredefinedFilter('domain_concurrency')">Best for Concurrency <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="java_medium" onclick="applyPredefinedFilter('java_medium')">Best for Medium Java PRs <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="language_ruby" onclick="applyPredefinedFilter('language_ruby')">Best for Ruby <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="difficulty_subtle" onclick="applyPredefinedFilter('difficulty_subtle')">Best for Subtle Bugs <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="context_cross_file" onclick="applyPredefinedFilter('context_cross_file')">Best for Cross-File <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="pr_size_medium" onclick="applyPredefinedFilter('pr_size_medium')">Best for Medium PRs <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="domain_authentication" onclick="applyPredefinedFilter('domain_authentication')">Best for Authentication <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="change_type_feature" onclick="applyPredefinedFilter('change_type_feature')">Best for Features <span class="arrow">↗</span></div>
         <div class="predefined-filter" data-filter-id="risk_medium" onclick="applyPredefinedFilter('risk_medium')">Best for Medium Risk <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="pr_size_large" onclick="applyPredefinedFilter('pr_size_large')">Best for Large PRs <span class="arrow">↗</span></div>
-        <div class="predefined-filter" data-filter-id="difficulty_moderate" onclick="applyPredefinedFilter('difficulty_moderate')">Best for Moderate Bugs <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="concern_reliability" onclick="applyPredefinedFilter('concern_reliability')">Best for Reliability <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="domain_concurrency" onclick="applyPredefinedFilter('domain_concurrency')">Best for Concurrency <span class="arrow">↗</span></div>
+        <div class="predefined-filter" data-filter-id="language_ruby" onclick="applyPredefinedFilter('language_ruby')">Best for Ruby <span class="arrow">↗</span></div>
     </div>
 
     <div class="main-container">
@@ -410,10 +410,10 @@
 
     <script>
         // All models data embedded
-        const allModelsData = {"anthropic_claude-opus-4-5-20251101": {"prs": [{"url": "https://github.com/keycloak/keycloak/pull/37429", "language": "Java", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"graphite": {"tp": 0, "fp": 0, "fn": 4}, "gemini": {"tp": 1, "fp": 4, "fn": 3}, "claude": {"tp": 1, "fp": 0, "fn": 3}, "augment": {"tp": 2, "fp": 3, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "copilot": {"tp": 3, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 3, "fn": 4}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 1, "fp": 6, "fn": 3}, "claude-code": {"tp": 1, "fp": 4, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended": {"tp": 3, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 3}, "macroscope": {"tp": 0, "fp": 1, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 4}, "propel-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 2}, "coderabbit": {"tp": 3, "fp": 6, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37634", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 2, "fp": 1, "fn": 2}, "gemini": {"tp": 3, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 2, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 2, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 1, "fn": 2}, "copilot": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 2}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 2, "fp": 0, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 2, "fn": 2}, "baz": {"tp": 2, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 3}, "propel-v2": {"tp": 2, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 3, "fp": 4, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 0, "fn": 2}}}, {"url": "https://github.com/keycloak/keycloak/pull/38446", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "bugbot": {"tp": 0, "fp": 3, "fn": 2}, "propel": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 4, "fn": 1}, "claude-code": {"tp": 1, "fp": 5, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 1}, "macroscope": {"tp": 0, "fp": 2, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 1}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/36882", "language": "Java", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "gemini": {"tp": 0, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "augment": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 1}, "sourcery": {"tp": 0, "fp": 4, "fn": 1}, "claude-code": {"tp": 0, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 1, "fn": 1}, "propel-v2": {"tp": 0, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/36880", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 4, "fn": 3}, "gemini": {"tp": 0, "fp": 3, "fn": 3}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 2}, "bugbot": {"tp": 2, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 6, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 3}, "sourcery": {"tp": 0, "fp": 6, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 4, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 6, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37038", "language": "Java", "pr_size": "small", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 4, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 2, "fp": 2, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 7, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/33832", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 1, "fp": 5, "fn": 1}, "gemini": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 5, "fn": 2}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 5, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/40940", "language": "Java", "pr_size": "small", "domain": "concurrency", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 2, "fp": 2, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 2, "fp": 0, "fn": 0}, "gemini": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 5, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 0, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/93824", "language": "Python", "pr_size": "large", "domain": "concurrency", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 5, "fp": 1, "fn": 0}, "gemini": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 4}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "claude": {"tp": 0, "fp": 2, "fn": 5}, "copilot": {"tp": 1, "fp": 5, "fn": 4}, "propel": {"tp": 2, "fp": 2, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 3, "fp": 3, "fn": 2}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 3, "fp": 2, "fn": 2}, "claude-code": {"tp": 3, "fp": 3, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 5}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 4}, "cubic-v2": {"tp": 5, "fp": 1, "fn": 0}, "macroscope": {"tp": 3, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 3, "fn": 5}, "propel-v2": {"tp": 3, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 3}, "coderabbit": {"tp": 0, "fp": 1, "fn": 5}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 4}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5", "language": "Python", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 3}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 2}, "macroscope": {"tp": 0, "fp": 5, "fn": 3}, "baz": {"tp": 1, "fp": 3, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 16, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1", "language": "Python", "pr_size": "medium", "domain": "API", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 3}, "claude": {"tp": 2, "fp": 4, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "copilot": {"tp": 3, "fp": 6, "fn": 1}, "bugbot": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 1, "fn": 1}, "propel": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 3, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 3, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 2, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 4, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 3}, "propel-v2": {"tp": 2, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/97529", "language": "Go", "pr_size": "small", "domain": "concurrency", "change_type": "performance", "complexity": "complex", "difficulty": "very_subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80168", "language": "Python", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80528", "language": "Python", "pr_size": "small", "domain": "scheduling", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/getsentry/sentry/pull/77754", "language": "Python", "pr_size": "medium", "domain": "serialization", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 3}, "claude": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 0, "fn": 3}, "coderabbit": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 0, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 0, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 3}, "baz": {"tp": 1, "fp": 0, "fn": 3}, "propel-v2": {"tp": 1, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 3}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 3}}}, {"url": "https://github.com/getsentry/sentry/pull/95633", "language": "Python", "pr_size": "medium", "domain": "concurrency", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 3}, "copilot": {"tp": 0, "fp": 6, "fn": 3}, "propel": {"tp": 0, "fp": 0, "fn": 3}, "augment": {"tp": 0, "fp": 3, "fn": 3}, "coderabbit": {"tp": 0, "fp": 7, "fn": 3}, "kg": {"tp": 0, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 4, "fn": 3}, "devin": {"tp": 0, "fp": 2, "fn": 3}, "sourcery": {"tp": 2, "fp": 2, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 0, "fp": 5, "fn": 3}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 3}, "baz": {"tp": 0, "fp": 2, "fn": 3}, "propel-v2": {"tp": 0, "fp": 2, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 0, "fp": 6, "fn": 3}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2", "language": "Python", "pr_size": "medium", "domain": "data_processing", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 3}, "claude": {"tp": 2, "fp": 5, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 2, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 0, "fn": 3}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 8, "fn": 1}, "claude-code": {"tp": 3, "fp": 1, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 3, "fn": 1}, "qodo-extended": {"tp": 3, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 1}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "propel-v2": {"tp": 1, "fp": 4, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3", "language": "Python", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 3, "fn": 3}, "copilot": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 2}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 2, "fp": 5, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 6, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 3, "fn": 3}, "propel-v2": {"tp": 1, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 3, "fp": 1, "fn": 0}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/103633", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 6, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 5, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 3, "fn": 2}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}}}, {"url": "https://github.com/getsentry/sentry/pull/67876", "language": "Python", "pr_size": "small", "domain": "authentication", "change_type": "security_patch", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 2, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 2}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "coderabbit": {"tp": 1, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 8, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 2}}}, {"url": "https://github.com/keycloak/keycloak/pull/32918", "language": "Java", "pr_size": "small", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 3, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 0, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}}}, {"url": "https://github.com/grafana/grafana/pull/94942", "language": "Go", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 1, "fn": 0}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 0}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/90939", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 2, "fp": 0, "fn": 0}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/80329", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "performance", "complexity": "simple", "difficulty": "obvious", "risk": "low", "context": "local", "concern": "maintainability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 5, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 0}, "copilot": {"tp": 1, "fp": 4, "fn": 0}, "propel": {"tp": 0, "fp": 1, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 2, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 0}, "devin": {"tp": 1, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 2, "fn": 0}, "claude-code": {"tp": 1, "fp": 3, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90045", "language": "Go", "pr_size": "medium", "domain": "logging", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 4, "fn": 0}, "claude": {"tp": 3, "fp": 3, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 3, "fn": 0}, "copilot": {"tp": 3, "fp": 7, "fn": 0}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 3, "fp": 5, "fn": 0}, "coderabbit": {"tp": 3, "fp": 2, "fn": 0}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 6, "fn": 1}, "devin": {"tp": 3, "fp": 2, "fn": 0}, "sourcery": {"tp": 3, "fp": 1, "fn": 0}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 3, "fp": 0, "fn": 0}, "baz": {"tp": 2, "fp": 2, "fn": 1}, "propel-v2": {"tp": 3, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 3, "fp": 3, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/106778", "language": "Go", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 10, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "copilot": {"tp": 1, "fp": 2, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 2, "fn": 1}, "coderabbit": {"tp": 1, "fp": 6, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 2, "fn": 2}, "devin": {"tp": 2, "fp": 1, "fn": 0}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/107534", "language": "Go", "pr_size": "small", "domain": "testing", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "low", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 3, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 4, "fn": 1}, "augment": {"tp": 0, "fp": 0, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 1}, "sourcery": {"tp": 0, "fp": 3, "fn": 1}, "claude-code": {"tp": 0, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 1}, "propel-v2": {"tp": 0, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/79265", "language": "Go", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 4}, "claude": {"tp": 1, "fp": 2, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "copilot": {"tp": 2, "fp": 7, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 5}, "augment": {"tp": 0, "fp": 2, "fn": 5}, "propel": {"tp": 1, "fp": 1, "fn": 4}, "coderabbit": {"tp": 3, "fp": 4, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 3, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 2, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 5}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 4}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 3}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 2}, "macroscope": {"tp": 0, "fp": 1, "fn": 5}, "baz": {"tp": 0, "fp": 1, "fn": 5}, "propel-v2": {"tp": 1, "fp": 2, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 4}, "greptile-v4-1": {"tp": 3, "fp": 3, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9", "language": "Ruby", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 0, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 0, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 2}}}, {"url": "https://github.com/grafana/grafana/pull/76186", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 3, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 1, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10", "language": "Ruby", "pr_size": "large", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 10, "fn": 2}, "claude": {"tp": 0, "fp": 3, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 4, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 3, "fp": 0, "fn": 1}, "augment": {"tp": 3, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 9, "fn": 2}, "kg": {"tp": 0, "fp": 2, "fn": 4}, "qodo-v2": {"tp": 2, "fp": 5, "fn": 2}, "devin": {"tp": 0, "fp": 2, "fn": 4}, "sourcery": {"tp": 3, "fp": 9, "fn": 1}, "claude-code": {"tp": 0, "fp": 6, "fn": 4}, "kodus-v2": {"tp": 3, "fp": 8, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 3, "fp": 4, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 2}, "propel-v2": {"tp": 3, "fp": 5, "fn": 1}, "codeant-v2": {"tp": 4, "fp": 6, "fn": 0}, "qodo-extended-v2": {"tp": 3, "fp": 6, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "bug_fix", "complexity": "simple", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 2, "fn": 0}, "claude": {"tp": 3, "fp": 4, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 1, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 0, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 3, "fp": 2, "fn": 0}, "devin": {"tp": 3, "fp": 1, "fn": 0}, "sourcery": {"tp": 1, "fp": 1, "fn": 2}, "claude-code": {"tp": 3, "fp": 1, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 1}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "propel-v2": {"tp": 0, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 9, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8", "language": "Ruby", "pr_size": "medium", "domain": "API", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 2}, "claude": {"tp": 2, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 8, "fn": 2}, "augment": {"tp": 2, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 8, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 0, "fp": 5, "fn": 3}, "claude-code": {"tp": 1, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 3, "fn": 2}, "baz": {"tp": 1, "fp": 2, "fn": 2}, "propel-v2": {"tp": 2, "fp": 5, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 6, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3", "language": "Ruby", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 7, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5", "language": "Ruby", "pr_size": "small", "domain": "UI", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 1, "fn": 2}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6", "language": "Ruby", "pr_size": "small", "domain": "serialization", "change_type": "feature", "complexity": "simple", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 0}, "copilot": {"tp": 1, "fp": 3, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 0}, "devin": {"tp": 1, "fp": 2, "fn": 0}, "sourcery": {"tp": 1, "fp": 4, "fn": 0}, "claude-code": {"tp": 1, "fp": 0, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 1, "fn": 0}, "propel-v2": {"tp": 1, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 3, "fn": 0}, "qodo-extended-v2": {"tp": 0, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4", "language": "Ruby", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 5, "fp": 4, "fn": 1}, "claude": {"tp": 3, "fp": 3, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 6}, "bugbot": {"tp": 1, "fp": 4, "fn": 5}, "copilot": {"tp": 1, "fp": 3, "fn": 5}, "augment": {"tp": 3, "fp": 2, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 5}, "coderabbit": {"tp": 4, "fp": 20, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 6}, "qodo-v2": {"tp": 3, "fp": 8, "fn": 3}, "devin": {"tp": 1, "fp": 3, "fn": 5}, "sourcery": {"tp": 2, "fp": 3, "fn": 4}, "claude-code": {"tp": 3, "fp": 1, "fn": 3}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 4}, "qodo-extended": {"tp": 5, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 4}, "macroscope": {"tp": 1, "fp": 5, "fn": 5}, "baz": {"tp": 1, "fp": 3, "fn": 5}, "propel-v2": {"tp": 3, "fp": 6, "fn": 3}, "codeant-v2": {"tp": 2, "fp": 16, "fn": 4}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 4}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1", "language": "Ruby", "pr_size": "medium", "domain": "file_io", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 2, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 1}, "augment": {"tp": 2, "fp": 2, "fn": 1}, "propel": {"tp": 2, "fp": 0, "fn": 1}, "coderabbit": {"tp": 2, "fp": 6, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 1}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 2}, "baz": {"tp": 2, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 2, "fp": 5, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/22532", "language": "TypeScript", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 11, "fn": 0}, "augment": {"tp": 2, "fp": 4, "fn": 0}, "propel": {"tp": 2, "fp": 2, "fn": 0}, "coderabbit": {"tp": 1, "fp": 10, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 0}, "claude-code": {"tp": 0, "fp": 4, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "propel-v2": {"tp": 1, "fp": 3, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 3, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/8330", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 4, "fn": 0}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 2, "fp": 0, "fn": 0}, "bugbot": {"tp": 2, "fp": 1, "fn": 0}, "copilot": {"tp": 2, "fp": 4, "fn": 0}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "kg": {"tp": 1, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 2, "fp": 0, "fn": 0}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 1, "fn": 0}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14943", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 0, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/22345", "language": "TypeScript", "pr_size": "small", "domain": "database", "change_type": "migration", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 0, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/11059", "language": "TypeScript", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 3, "fn": 2}, "claude": {"tp": 3, "fp": 3, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "bugbot": {"tp": 4, "fp": 4, "fn": 1}, "copilot": {"tp": 4, "fp": 9, "fn": 1}, "augment": {"tp": 5, "fp": 5, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 5, "fp": 14, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 4, "fp": 4, "fn": 1}, "devin": {"tp": 4, "fp": 1, "fn": 1}, "sourcery": {"tp": 5, "fp": 0, "fn": 0}, "claude-code": {"tp": 3, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended": {"tp": 5, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 2}, "macroscope": {"tp": 4, "fp": 7, "fn": 1}, "baz": {"tp": 4, "fp": 1, "fn": 1}, "propel-v2": {"tp": 5, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 4, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 4, "fp": 5, "fn": 1}, "greptile-v4-1": {"tp": 4, "fp": 3, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/7232", "language": "TypeScript", "pr_size": "medium", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 3, "fn": 2}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 5, "fn": 0}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 2, "fp": 10, "fn": 0}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 4, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/14740", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 4}, "claude": {"tp": 2, "fp": 0, "fn": 3}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 3, "fn": 2}, "copilot": {"tp": 3, "fp": 6, "fn": 2}, "augment": {"tp": 4, "fp": 3, "fn": 1}, "propel": {"tp": 4, "fp": 0, "fn": 1}, "coderabbit": {"tp": 4, "fp": 6, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 4, "fp": 3, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 3}, "sourcery": {"tp": 3, "fp": 1, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 4, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 4, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 4, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "propel-v2": {"tp": 4, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/10600", "language": "TypeScript", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 8, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 9, "fn": 3}, "augment": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "coderabbit": {"tp": 2, "fp": 6, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 4, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "propel-v2": {"tp": 1, "fp": 2, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/10967", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "bug_fix", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 9, "fn": 4}, "claude": {"tp": 3, "fp": 4, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "bugbot": {"tp": 2, "fp": 4, "fn": 3}, "copilot": {"tp": 4, "fp": 7, "fn": 1}, "augment": {"tp": 2, "fp": 3, "fn": 3}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 3, "fp": 8, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 4}, "devin": {"tp": 2, "fp": 3, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 3}, "claude-code": {"tp": 2, "fp": 5, "fn": 3}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 3}, "qodo-extended": {"tp": 3, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 4}, "macroscope": {"tp": 2, "fp": 3, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "propel-v2": {"tp": 2, "fp": 4, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 7, "fn": 4}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 4}}}, {"url": "https://github.com/calcom/cal.com/pull/8087", "language": "TypeScript", "pr_size": "medium", "domain": "concurrency", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 4, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 2, "fp": 7, "fn": 0}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 3, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 10, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "propel-v2": {"tp": 1, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}], "tools": ["augment", "baz", "bugbot", "claude", "claude-code", "codeant-v2", "coderabbit", "copilot", "cubic-v2", "devin", "gemini", "graphite", "greptile-v4-1", "kg", "kodus-v2", "macroscope", "propel", "propel-v2", "qodo-extended", "qodo-extended-v2", "qodo-v2", "sourcery"], "dimensions": {"language": ["Go", "Java", "Python", "Ruby", "TypeScript"], "pr_size": ["small", "medium", "large"], "domain": ["API", "UI", "authentication", "caching", "concurrency", "configuration", "data_processing", "database", "file_io", "logging", "scheduling", "serialization", "testing"], "change_type": ["bug_fix", "feature", "migration", "performance", "refactoring", "security_patch"], "complexity": ["simple", "moderate", "complex"], "difficulty": ["obvious", "moderate", "subtle", "very_subtle"], "risk": ["low", "medium", "high", "critical"], "context": ["local", "file", "cross_file", "system"], "concern": ["correctness", "maintainability", "reliability", "security"]}, "overall_metrics": {"greptile-v4-1": {"precision": 40.5, "recall": 48.2, "f1": 44.0, "tp": 66, "fp": 97, "fn": 71, "num_prs": 50}, "copilot": {"precision": 28.3, "recall": 53.3, "f1": 37.0, "tp": 73, "fp": 185, "fn": 64, "num_prs": 50}, "propel": {"precision": 55.8, "recall": 38.7, "f1": 45.7, "tp": 53, "fp": 42, "fn": 84, "num_prs": 50}, "kg": {"precision": 50.0, "recall": 16.8, "f1": 25.1, "tp": 23, "fp": 23, "fn": 114, "num_prs": 50}, "gemini": {"precision": 31.1, "recall": 37.2, "f1": 33.9, "tp": 51, "fp": 113, "fn": 86, "num_prs": 50}, "qodo-extended-v2": {"precision": 54.9, "recall": 61.3, "f1": 57.9, "tp": 84, "fp": 69, "fn": 53, "num_prs": 50}, "qodo-v2": {"precision": 42.9, "recall": 55.5, "f1": 48.4, "tp": 76, "fp": 101, "fn": 61, "num_prs": 50}, "kodus-v2": {"precision": 46.7, "recall": 35.8, "f1": 40.5, "tp": 49, "fp": 56, "fn": 88, "num_prs": 50}, "coderabbit": {"precision": 25.7, "recall": 56.2, "f1": 35.2, "tp": 77, "fp": 223, "fn": 60, "num_prs": 50}, "macroscope": {"precision": 48.4, "recall": 43.8, "f1": 46.0, "tp": 60, "fp": 64, "fn": 77, "num_prs": 50}, "sourcery": {"precision": 33.3, "recall": 51.8, "f1": 40.6, "tp": 71, "fp": 142, "fn": 66, "num_prs": 50}, "augment": {"precision": 47.5, "recall": 61.3, "f1": 53.5, "tp": 84, "fp": 93, "fn": 53, "num_prs": 50}, "codeant-v2": {"precision": 31.9, "recall": 38.0, "f1": 34.7, "tp": 52, "fp": 111, "fn": 85, "num_prs": 50}, "qodo-extended": {"precision": 37.2, "recall": 62.8, "f1": 46.7, "tp": 86, "fp": 145, "fn": 51, "num_prs": 50}, "devin": {"precision": 54.3, "recall": 37.2, "f1": 44.2, "tp": 51, "fp": 43, "fn": 86, "num_prs": 50}, "propel-v2": {"precision": 44.4, "recall": 49.6, "f1": 46.9, "tp": 68, "fp": 85, "fn": 69, "num_prs": 50}, "claude": {"precision": 34.8, "recall": 35.8, "f1": 35.3, "tp": 49, "fp": 92, "fn": 88, "num_prs": 50}, "bugbot": {"precision": 47.2, "recall": 43.8, "f1": 45.5, "tp": 60, "fp": 67, "fn": 77, "num_prs": 50}, "claude-code": {"precision": 34.8, "recall": 40.9, "f1": 37.6, "tp": 56, "fp": 105, "fn": 81, "num_prs": 50}, "graphite": {"precision": 100.0, "recall": 8.8, "f1": 16.1, "tp": 12, "fp": 0, "fn": 125, "num_prs": 50}, "cubic-v2": {"precision": 56.3, "recall": 68.6, "f1": 61.8, "tp": 94, "fp": 73, "fn": 43, "num_prs": 50}, "baz": {"precision": 49.0, "recall": 34.3, "f1": 40.3, "tp": 47, "fp": 49, "fn": 90, "num_prs": 50}}}, "anthropic_claude-sonnet-4-5-20250929": {"prs": [{"url": "https://github.com/keycloak/keycloak/pull/37429", "language": "Java", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"graphite": {"tp": 0, "fp": 0, "fn": 4}, "gemini": {"tp": 0, "fp": 4, "fn": 4}, "claude": {"tp": 1, "fp": 0, "fn": 3}, "augment": {"tp": 2, "fp": 3, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 3, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 3, "fn": 4}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 1, "fp": 5, "fn": 3}, "claude-code": {"tp": 1, "fp": 4, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 3}, "macroscope": {"tp": 0, "fp": 1, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 4}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 2}, "coderabbit": {"tp": 3, "fp": 6, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37634", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 2, "fp": 3, "fn": 2}, "gemini": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 2, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 2, "fn": 2}, "copilot": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 2, "fp": 0, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 2, "fn": 2}, "baz": {"tp": 2, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 2}, "qodo-extended-v2": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 4, "fp": 6, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 2}}}, {"url": "https://github.com/keycloak/keycloak/pull/38446", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "bugbot": {"tp": 0, "fp": 3, "fn": 2}, "propel": {"tp": 0, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 6, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 1}, "macroscope": {"tp": 0, "fp": 3, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 1}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/36882", "language": "Java", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "gemini": {"tp": 0, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "augment": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 1}, "sourcery": {"tp": 0, "fp": 4, "fn": 1}, "claude-code": {"tp": 0, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 3, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/36880", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 5, "fn": 3}, "gemini": {"tp": 0, "fp": 3, "fn": 3}, "propel": {"tp": 2, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 2}, "bugbot": {"tp": 2, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "copilot": {"tp": 0, "fp": 7, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 2}, "devin": {"tp": 0, "fp": 1, "fn": 3}, "sourcery": {"tp": 1, "fp": 5, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 3, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 4, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37038", "language": "Java", "pr_size": "small", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 2, "fp": 2, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 8, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/33832", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 1, "fp": 5, "fn": 1}, "gemini": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 4, "fn": 2}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 5, "fn": 1}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/40940", "language": "Java", "pr_size": "small", "domain": "concurrency", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 2, "fp": 2, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 3, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 2, "fp": 0, "fn": 0}, "gemini": {"tp": 2, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 5, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 2, "fp": 3, "fn": 0}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 0, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/93824", "language": "Python", "pr_size": "large", "domain": "concurrency", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 5, "fp": 1, "fn": 0}, "gemini": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 4}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "claude": {"tp": 0, "fp": 2, "fn": 5}, "copilot": {"tp": 1, "fp": 4, "fn": 4}, "propel": {"tp": 1, "fp": 2, "fn": 4}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 4, "fp": 2, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 3, "fp": 2, "fn": 2}, "claude-code": {"tp": 3, "fp": 3, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 5}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 4}, "cubic-v2": {"tp": 4, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 1, "fn": 3}, "baz": {"tp": 0, "fp": 3, "fn": 5}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 3}, "coderabbit": {"tp": 1, "fp": 3, "fn": 4}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 4}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5", "language": "Python", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 3}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 2}, "macroscope": {"tp": 0, "fp": 5, "fn": 3}, "baz": {"tp": 2, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 16, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1", "language": "Python", "pr_size": "medium", "domain": "API", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 3}, "claude": {"tp": 2, "fp": 7, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "copilot": {"tp": 3, "fp": 6, "fn": 1}, "bugbot": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 1, "fn": 1}, "propel": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 3, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 3, "fp": 2, "fn": 1}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 2, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/97529", "language": "Go", "pr_size": "small", "domain": "concurrency", "change_type": "performance", "complexity": "complex", "difficulty": "very_subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 4, "fn": 0}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80168", "language": "Python", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80528", "language": "Python", "pr_size": "small", "domain": "scheduling", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/getsentry/sentry/pull/77754", "language": "Python", "pr_size": "medium", "domain": "serialization", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 3}, "claude": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 0, "fn": 3}, "coderabbit": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 0, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 0, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 3}, "baz": {"tp": 1, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 3}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 3}}}, {"url": "https://github.com/getsentry/sentry/pull/95633", "language": "Python", "pr_size": "medium", "domain": "concurrency", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 3}, "copilot": {"tp": 0, "fp": 6, "fn": 3}, "propel": {"tp": 0, "fp": 0, "fn": 3}, "augment": {"tp": 0, "fp": 6, "fn": 3}, "coderabbit": {"tp": 0, "fp": 7, "fn": 3}, "kg": {"tp": 0, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 4, "fn": 3}, "devin": {"tp": 0, "fp": 2, "fn": 3}, "sourcery": {"tp": 2, "fp": 2, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 0, "fp": 5, "fn": 3}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 3}, "baz": {"tp": 0, "fp": 1, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 0, "fp": 6, "fn": 3}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2", "language": "Python", "pr_size": "medium", "domain": "data_processing", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 3}, "claude": {"tp": 2, "fp": 6, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 2, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 0, "fn": 3}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 3, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 6, "fn": 1}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 4, "fn": 1}, "qodo-extended": {"tp": 3, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 1}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3", "language": "Python", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 3, "fn": 3}, "copilot": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 2, "fp": 1, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 2, "fp": 5, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 5, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 3, "fn": 3}, "codeant-v2": {"tp": 3, "fp": 1, "fn": 0}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/103633", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 1, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 5, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 3, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}}}, {"url": "https://github.com/getsentry/sentry/pull/67876", "language": "Python", "pr_size": "small", "domain": "authentication", "change_type": "security_patch", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 2, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 2, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 2}, "augment": {"tp": 2, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 5, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 8, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 2}, "baz": {"tp": 2, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 2}}}, {"url": "https://github.com/keycloak/keycloak/pull/32918", "language": "Java", "pr_size": "small", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "augment": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 3, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 0, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}}}, {"url": "https://github.com/grafana/grafana/pull/94942", "language": "Go", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 0, "fn": 0}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 0}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/90939", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/80329", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "performance", "complexity": "simple", "difficulty": "obvious", "risk": "low", "context": "local", "concern": "maintainability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 5, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 0}, "copilot": {"tp": 1, "fp": 4, "fn": 0}, "propel": {"tp": 0, "fp": 1, "fn": 1}, "augment": {"tp": 1, "fp": 3, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 0}, "devin": {"tp": 1, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 2, "fn": 0}, "claude-code": {"tp": 1, "fp": 6, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 3, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90045", "language": "Go", "pr_size": "medium", "domain": "logging", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 4, "fn": 0}, "claude": {"tp": 3, "fp": 3, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 3, "fn": 0}, "copilot": {"tp": 3, "fp": 8, "fn": 0}, "propel": {"tp": 2, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 5, "fn": 0}, "coderabbit": {"tp": 3, "fp": 2, "fn": 0}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 6, "fn": 1}, "devin": {"tp": 3, "fp": 2, "fn": 0}, "sourcery": {"tp": 3, "fp": 1, "fn": 0}, "claude-code": {"tp": 3, "fp": 3, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 3, "fp": 0, "fn": 0}, "baz": {"tp": 2, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/106778", "language": "Go", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 12, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "copilot": {"tp": 1, "fp": 2, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 5, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 6, "fn": 1}, "devin": {"tp": 2, "fp": 1, "fn": 0}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/107534", "language": "Go", "pr_size": "small", "domain": "testing", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "low", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 1}, "augment": {"tp": 0, "fp": 0, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 1}, "sourcery": {"tp": 0, "fp": 3, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 4, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/79265", "language": "Go", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 4}, "claude": {"tp": 1, "fp": 2, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "copilot": {"tp": 3, "fp": 6, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 5}, "augment": {"tp": 1, "fp": 1, "fn": 4}, "propel": {"tp": 1, "fp": 1, "fn": 4}, "coderabbit": {"tp": 3, "fp": 4, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 3}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 2, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 3, "fn": 5}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 4}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 3}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 2}, "macroscope": {"tp": 0, "fp": 1, "fn": 5}, "baz": {"tp": 1, "fp": 0, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 4}, "greptile-v4-1": {"tp": 2, "fp": 4, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9", "language": "Ruby", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 0, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 0, "fp": 3, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 5, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 2}}}, {"url": "https://github.com/grafana/grafana/pull/76186", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 0, "fp": 2, "fn": 2}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 3, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 1, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10", "language": "Ruby", "pr_size": "large", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 8, "fn": 2}, "claude": {"tp": 0, "fp": 3, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 4, "fn": 1}, "copilot": {"tp": 2, "fp": 4, "fn": 2}, "propel": {"tp": 3, "fp": 0, "fn": 1}, "augment": {"tp": 3, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 12, "fn": 2}, "kg": {"tp": 0, "fp": 2, "fn": 4}, "qodo-v2": {"tp": 2, "fp": 5, "fn": 2}, "devin": {"tp": 0, "fp": 2, "fn": 4}, "sourcery": {"tp": 3, "fp": 8, "fn": 1}, "claude-code": {"tp": 0, "fp": 6, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 7, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 3, "fp": 5, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 3, "fp": 6, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 6, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "bug_fix", "complexity": "simple", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 2, "fn": 0}, "claude": {"tp": 3, "fp": 4, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 1, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 2}, "coderabbit": {"tp": 0, "fp": 2, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 3, "fp": 2, "fn": 0}, "devin": {"tp": 3, "fp": 2, "fn": 0}, "sourcery": {"tp": 2, "fp": 1, "fn": 1}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 0}, "macroscope": {"tp": 3, "fp": 0, "fn": 0}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 9, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 0, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8", "language": "Ruby", "pr_size": "medium", "domain": "API", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 4, "fn": 1}, "claude": {"tp": 2, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 3}, "copilot": {"tp": 1, "fp": 8, "fn": 2}, "augment": {"tp": 2, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 8, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 4, "fn": 2}, "claude-code": {"tp": 1, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 3, "fn": 2}, "baz": {"tp": 1, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 8, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3", "language": "Ruby", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 5, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 2, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 2, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 4, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5", "language": "Ruby", "pr_size": "small", "domain": "UI", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 1, "fn": 2}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6", "language": "Ruby", "pr_size": "small", "domain": "serialization", "change_type": "feature", "complexity": "simple", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 4, "fn": 0}, "copilot": {"tp": 1, "fp": 2, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 0}, "sourcery": {"tp": 0, "fp": 4, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 0}, "qodo-extended-v2": {"tp": 0, "fp": 4, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 4, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4", "language": "Ruby", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 4, "fp": 5, "fn": 2}, "claude": {"tp": 5, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 6}, "bugbot": {"tp": 1, "fp": 4, "fn": 5}, "copilot": {"tp": 1, "fp": 3, "fn": 5}, "augment": {"tp": 3, "fp": 2, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 5}, "coderabbit": {"tp": 4, "fp": 13, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 6}, "qodo-v2": {"tp": 4, "fp": 4, "fn": 2}, "devin": {"tp": 1, "fp": 3, "fn": 5}, "sourcery": {"tp": 2, "fp": 3, "fn": 4}, "claude-code": {"tp": 3, "fp": 1, "fn": 3}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 4}, "qodo-extended": {"tp": 6, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 4}, "macroscope": {"tp": 1, "fp": 5, "fn": 5}, "baz": {"tp": 1, "fp": 3, "fn": 5}, "codeant-v2": {"tp": 2, "fp": 16, "fn": 4}, "qodo-extended-v2": {"tp": 4, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 4}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1", "language": "Ruby", "pr_size": "medium", "domain": "file_io", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 3, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 2, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 4, "fn": 1}, "augment": {"tp": 2, "fp": 3, "fn": 1}, "propel": {"tp": 2, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 5, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 1}, "claude-code": {"tp": 3, "fp": 1, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 4, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 2}, "baz": {"tp": 2, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 6, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/22532", "language": "TypeScript", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 11, "fn": 0}, "augment": {"tp": 2, "fp": 4, "fn": 0}, "propel": {"tp": 1, "fp": 3, "fn": 1}, "coderabbit": {"tp": 1, "fp": 4, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 0, "fp": 5, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 3, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/8330", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 5, "fn": 0}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 2, "fp": 0, "fn": 0}, "bugbot": {"tp": 2, "fp": 1, "fn": 0}, "copilot": {"tp": 2, "fp": 4, "fn": 0}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 0, "fp": 3, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 2, "fp": 0, "fn": 0}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14943", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 0, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 3, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/22345", "language": "TypeScript", "pr_size": "small", "domain": "database", "change_type": "migration", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 0, "fn": 1}, "claude-code": {"tp": 1, "fp": 3, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/11059", "language": "TypeScript", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 3, "fn": 2}, "claude": {"tp": 3, "fp": 3, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "bugbot": {"tp": 4, "fp": 3, "fn": 1}, "copilot": {"tp": 4, "fp": 10, "fn": 1}, "augment": {"tp": 5, "fp": 5, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 5, "fp": 13, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 4, "fp": 7, "fn": 1}, "devin": {"tp": 4, "fp": 1, "fn": 1}, "sourcery": {"tp": 5, "fp": 0, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended": {"tp": 5, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 2}, "macroscope": {"tp": 4, "fp": 6, "fn": 1}, "baz": {"tp": 4, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 5, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 5, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/7232", "language": "TypeScript", "pr_size": "medium", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 2}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 5, "fn": 0}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 2, "fp": 10, "fn": 0}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 3, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/14740", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 4}, "claude": {"tp": 2, "fp": 0, "fn": 3}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 3, "fn": 2}, "copilot": {"tp": 3, "fp": 6, "fn": 2}, "augment": {"tp": 4, "fp": 3, "fn": 1}, "propel": {"tp": 4, "fp": 0, "fn": 1}, "coderabbit": {"tp": 4, "fp": 4, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 4}, "qodo-v2": {"tp": 3, "fp": 3, "fn": 2}, "devin": {"tp": 2, "fp": 0, "fn": 3}, "sourcery": {"tp": 3, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 4, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 4, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 4, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/10600", "language": "TypeScript", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 8, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 9, "fn": 3}, "augment": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "coderabbit": {"tp": 2, "fp": 6, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 4, "fn": 2}, "claude-code": {"tp": 1, "fp": 4, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 1, "fn": 4}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/10967", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "bug_fix", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 7, "fn": 4}, "claude": {"tp": 4, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "bugbot": {"tp": 2, "fp": 4, "fn": 3}, "copilot": {"tp": 3, "fp": 8, "fn": 2}, "augment": {"tp": 2, "fp": 3, "fn": 3}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 4, "fp": 6, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 4}, "devin": {"tp": 3, "fp": 1, "fn": 2}, "sourcery": {"tp": 2, "fp": 5, "fn": 3}, "claude-code": {"tp": 2, "fp": 6, "fn": 3}, "kodus-v2": {"tp": 3, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 6, "fn": 3}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 4}, "macroscope": {"tp": 2, "fp": 3, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 4}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 4}}}, {"url": "https://github.com/calcom/cal.com/pull/8087", "language": "TypeScript", "pr_size": "medium", "domain": "concurrency", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 6, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 2, "fp": 5, "fn": 0}, "kg": {"tp": 1, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 2, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 6, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 9, "fn": 0}, "macroscope": {"tp": 1, "fp": 4, "fn": 1}, "baz": {"tp": 1, "fp": 3, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}], "tools": ["augment", "baz", "bugbot", "claude", "claude-code", "codeant-v2", "coderabbit", "copilot", "cubic-v2", "devin", "gemini", "graphite", "greptile-v4-1", "kg", "kodus-v2", "macroscope", "propel", "qodo-extended", "qodo-extended-v2", "qodo-v2", "sourcery"], "dimensions": {"language": ["Go", "Java", "Python", "Ruby", "TypeScript"], "pr_size": ["small", "medium", "large"], "domain": ["API", "UI", "authentication", "caching", "concurrency", "configuration", "data_processing", "database", "file_io", "logging", "scheduling", "serialization", "testing"], "change_type": ["bug_fix", "feature", "migration", "performance", "refactoring", "security_patch"], "complexity": ["simple", "moderate", "complex"], "difficulty": ["obvious", "moderate", "subtle", "very_subtle"], "risk": ["low", "medium", "high", "critical"], "context": ["local", "file", "cross_file", "system"], "concern": ["correctness", "maintainability", "reliability", "security"]}, "overall_metrics": {"greptile-v4-1": {"precision": 36.5, "recall": 45.3, "f1": 40.4, "tp": 62, "fp": 108, "fn": 75, "num_prs": 50}, "copilot": {"precision": 27.2, "recall": 51.1, "f1": 35.5, "tp": 70, "fp": 187, "fn": 67, "num_prs": 50}, "propel": {"precision": 52.6, "recall": 36.5, "f1": 43.1, "tp": 50, "fp": 45, "fn": 87, "num_prs": 50}, "kg": {"precision": 44.7, "recall": 15.3, "f1": 22.8, "tp": 21, "fp": 26, "fn": 116, "num_prs": 50}, "qodo-extended-v2": {"precision": 52.5, "recall": 60.6, "f1": 56.3, "tp": 83, "fp": 75, "fn": 54, "num_prs": 50}, "gemini": {"precision": 29.7, "recall": 35.8, "f1": 32.5, "tp": 49, "fp": 116, "fn": 88, "num_prs": 50}, "qodo-v2": {"precision": 40.5, "recall": 56.2, "f1": 47.1, "tp": 77, "fp": 113, "fn": 60, "num_prs": 50}, "kodus-v2": {"precision": 44.9, "recall": 35.0, "f1": 39.3, "tp": 48, "fp": 59, "fn": 89, "num_prs": 50}, "coderabbit": {"precision": 27.5, "recall": 56.9, "f1": 37.1, "tp": 78, "fp": 206, "fn": 59, "num_prs": 50}, "macroscope": {"precision": 45.8, "recall": 43.8, "f1": 44.8, "tp": 60, "fp": 71, "fn": 77, "num_prs": 50}, "sourcery": {"precision": 33.3, "recall": 51.8, "f1": 40.6, "tp": 71, "fp": 142, "fn": 66, "num_prs": 50}, "augment": {"precision": 46.0, "recall": 63.5, "f1": 53.4, "tp": 87, "fp": 102, "fn": 50, "num_prs": 50}, "codeant-v2": {"precision": 31.1, "recall": 36.5, "f1": 33.6, "tp": 50, "fp": 111, "fn": 87, "num_prs": 50}, "qodo-extended": {"precision": 35.3, "recall": 61.3, "f1": 44.8, "tp": 84, "fp": 154, "fn": 53, "num_prs": 50}, "devin": {"precision": 54.2, "recall": 38.0, "f1": 44.6, "tp": 52, "fp": 44, "fn": 85, "num_prs": 50}, "claude": {"precision": 35.7, "recall": 40.1, "f1": 37.8, "tp": 55, "fp": 99, "fn": 82, "num_prs": 50}, "bugbot": {"precision": 45.4, "recall": 43.1, "f1": 44.2, "tp": 59, "fp": 71, "fn": 78, "num_prs": 50}, "claude-code": {"precision": 30.7, "recall": 40.1, "f1": 34.8, "tp": 55, "fp": 124, "fn": 82, "num_prs": 50}, "graphite": {"precision": 100.0, "recall": 8.8, "f1": 16.1, "tp": 12, "fp": 0, "fn": 125, "num_prs": 50}, "cubic-v2": {"precision": 55.6, "recall": 68.6, "f1": 61.4, "tp": 94, "fp": 75, "fn": 43, "num_prs": 50}, "baz": {"precision": 49.0, "recall": 35.8, "f1": 41.4, "tp": 49, "fp": 51, "fn": 88, "num_prs": 50}}}, "openai_gpt-5.2": {"prs": [{"url": "https://github.com/keycloak/keycloak/pull/37429", "language": "Java", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"graphite": {"tp": 0, "fp": 0, "fn": 4}, "gemini": {"tp": 0, "fp": 5, "fn": 4}, "claude": {"tp": 1, "fp": 0, "fn": 3}, "augment": {"tp": 2, "fp": 3, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 3, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 3, "fn": 4}, "devin": {"tp": 2, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 14, "fn": 3}, "claude-code": {"tp": 1, "fp": 5, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 3, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 3}, "macroscope": {"tp": 0, "fp": 2, "fn": 4}, "baz": {"tp": 1, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 2, "fn": 4}, "propel-v2": {"tp": 1, "fp": 3, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 2}, "coderabbit": {"tp": 3, "fp": 7, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37634", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 2, "fp": 2, "fn": 2}, "gemini": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 2, "fp": 4, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 2, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 2, "fn": 2}, "copilot": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 2, "fp": 1, "fn": 2}, "claude-code": {"tp": 0, "fp": 3, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 2, "fn": 2}, "baz": {"tp": 2, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 5, "fn": 2}, "propel-v2": {"tp": 2, "fp": 2, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 2}, "coderabbit": {"tp": 3, "fp": 7, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 2}}}, {"url": "https://github.com/keycloak/keycloak/pull/38446", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 6, "fn": 2}, "augment": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "bugbot": {"tp": 0, "fp": 3, "fn": 2}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 6, "fn": 1}, "claude-code": {"tp": 1, "fp": 6, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 1}, "macroscope": {"tp": 0, "fp": 4, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/36882", "language": "Java", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "gemini": {"tp": 0, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 1}, "copilot": {"tp": 0, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "augment": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 1}, "sourcery": {"tp": 0, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 3, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 2, "fn": 1}, "propel-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/36880", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 6, "fn": 3}, "gemini": {"tp": 0, "fp": 3, "fn": 3}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 2}, "bugbot": {"tp": 3, "fp": 2, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 6, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 2}, "devin": {"tp": 0, "fp": 1, "fn": 3}, "sourcery": {"tp": 1, "fp": 5, "fn": 2}, "claude-code": {"tp": 0, "fp": 1, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 4, "fn": 3}, "qodo-extended": {"tp": 3, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 4, "fn": 1}, "baz": {"tp": 2, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 6, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37038", "language": "Java", "pr_size": "small", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 2, "fp": 2, "fn": 0}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 2, "fp": 2, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 8, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/33832", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 2, "fp": 4, "fn": 0}, "gemini": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 1, "fp": 1, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 6, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 5, "fn": 1}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/40940", "language": "Java", "pr_size": "small", "domain": "concurrency", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 2, "fp": 2, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 2, "fp": 0, "fn": 0}, "gemini": {"tp": 2, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "copilot": {"tp": 2, "fp": 4, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 2, "fp": 3, "fn": 0}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 0, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 2, "fp": 3, "fn": 0}, "baz": {"tp": 2, "fp": 2, "fn": 0}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/93824", "language": "Python", "pr_size": "large", "domain": "concurrency", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 5, "fp": 1, "fn": 0}, "gemini": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 4}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "claude": {"tp": 0, "fp": 3, "fn": 5}, "copilot": {"tp": 1, "fp": 5, "fn": 4}, "propel": {"tp": 2, "fp": 4, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 4, "fp": 3, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 4}, "sourcery": {"tp": 4, "fp": 2, "fn": 1}, "claude-code": {"tp": 3, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 5}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 4}, "cubic-v2": {"tp": 4, "fp": 1, "fn": 1}, "macroscope": {"tp": 3, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 2, "fn": 5}, "propel-v2": {"tp": 2, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 2, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 3}, "coderabbit": {"tp": 1, "fp": 3, "fn": 4}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 4}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5", "language": "Python", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 3}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 3}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 2}, "macroscope": {"tp": 0, "fp": 5, "fn": 3}, "baz": {"tp": 1, "fp": 3, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 16, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1", "language": "Python", "pr_size": "medium", "domain": "API", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 3}, "claude": {"tp": 2, "fp": 7, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "copilot": {"tp": 3, "fp": 13, "fn": 1}, "bugbot": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 1, "fn": 1}, "propel": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 3, "fp": 2, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 6, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 2}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 3}, "propel-v2": {"tp": 2, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/97529", "language": "Go", "pr_size": "small", "domain": "concurrency", "change_type": "performance", "complexity": "complex", "difficulty": "very_subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 3, "fn": 1}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 2, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 4, "fn": 0}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "propel-v2": {"tp": 1, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80168", "language": "Python", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 2, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80528", "language": "Python", "pr_size": "small", "domain": "scheduling", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/getsentry/sentry/pull/77754", "language": "Python", "pr_size": "medium", "domain": "serialization", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 3}, "claude": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 0, "fn": 3}, "coderabbit": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 0, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 0, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 3}, "baz": {"tp": 1, "fp": 0, "fn": 3}, "propel-v2": {"tp": 1, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 3}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 3}}}, {"url": "https://github.com/getsentry/sentry/pull/95633", "language": "Python", "pr_size": "medium", "domain": "concurrency", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 3}, "copilot": {"tp": 0, "fp": 6, "fn": 3}, "propel": {"tp": 0, "fp": 0, "fn": 3}, "augment": {"tp": 0, "fp": 7, "fn": 3}, "coderabbit": {"tp": 0, "fp": 7, "fn": 3}, "kg": {"tp": 0, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 4, "fn": 3}, "devin": {"tp": 0, "fp": 4, "fn": 3}, "sourcery": {"tp": 2, "fp": 4, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 3}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 3}, "baz": {"tp": 0, "fp": 3, "fn": 3}, "propel-v2": {"tp": 0, "fp": 2, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 0, "fp": 8, "fn": 3}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2", "language": "Python", "pr_size": "medium", "domain": "data_processing", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 3}, "claude": {"tp": 2, "fp": 5, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 3, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 3, "fn": 1}, "coderabbit": {"tp": 0, "fp": 0, "fn": 3}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 3, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 10, "fn": 1}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 5, "fn": 1}, "qodo-extended": {"tp": 3, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 1}, "baz": {"tp": 3, "fp": 2, "fn": 0}, "propel-v2": {"tp": 1, "fp": 5, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 4, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3", "language": "Python", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 3}, "claude": {"tp": 1, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 3, "fn": 3}, "copilot": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 2}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 2, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 6, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 6, "fn": 3}, "propel-v2": {"tp": 1, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 3, "fp": 1, "fn": 0}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/103633", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 6, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 6, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 3, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 0, "fp": 3, "fn": 2}, "baz": {"tp": 0, "fp": 3, "fn": 2}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}}}, {"url": "https://github.com/getsentry/sentry/pull/67876", "language": "Python", "pr_size": "small", "domain": "authentication", "change_type": "security_patch", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 2}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 2, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 2}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "coderabbit": {"tp": 1, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 5, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 2}, "sourcery": {"tp": 1, "fp": 9, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 2, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 2}}}, {"url": "https://github.com/keycloak/keycloak/pull/32918", "language": "Java", "pr_size": "small", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 2, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 0, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}}}, {"url": "https://github.com/grafana/grafana/pull/94942", "language": "Go", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 9, "fn": 0}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 0}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 3, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/90939", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/80329", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "performance", "complexity": "simple", "difficulty": "obvious", "risk": "low", "context": "local", "concern": "maintainability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 5, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 4, "fn": 0}, "copilot": {"tp": 1, "fp": 4, "fn": 0}, "propel": {"tp": 0, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 0}, "coderabbit": {"tp": 1, "fp": 5, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 6, "fn": 0}, "devin": {"tp": 1, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 3, "fn": 0}, "claude-code": {"tp": 1, "fp": 7, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 3, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 2, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90045", "language": "Go", "pr_size": "medium", "domain": "logging", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 5, "fn": 0}, "claude": {"tp": 3, "fp": 4, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 2, "fn": 0}, "copilot": {"tp": 3, "fp": 8, "fn": 0}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 3, "fp": 5, "fn": 0}, "coderabbit": {"tp": 3, "fp": 2, "fn": 0}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 6, "fn": 1}, "devin": {"tp": 3, "fp": 2, "fn": 0}, "sourcery": {"tp": 3, "fp": 0, "fn": 0}, "claude-code": {"tp": 3, "fp": 5, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 3, "fp": 0, "fn": 0}, "baz": {"tp": 2, "fp": 2, "fn": 1}, "propel-v2": {"tp": 3, "fp": 4, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/106778", "language": "Go", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 9, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 2, "fn": 1}, "coderabbit": {"tp": 1, "fp": 7, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 2, "fp": 1, "fn": 0}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/107534", "language": "Go", "pr_size": "small", "domain": "testing", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "low", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 2, "fn": 1}, "copilot": {"tp": 0, "fp": 4, "fn": 1}, "augment": {"tp": 0, "fp": 0, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 1}, "sourcery": {"tp": 0, "fp": 3, "fn": 1}, "claude-code": {"tp": 0, "fp": 7, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 1}, "propel-v2": {"tp": 0, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 3, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/79265", "language": "Go", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 4}, "claude": {"tp": 1, "fp": 2, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "copilot": {"tp": 3, "fp": 6, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 5}, "augment": {"tp": 1, "fp": 1, "fn": 4}, "propel": {"tp": 1, "fp": 1, "fn": 4}, "coderabbit": {"tp": 4, "fp": 5, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 4}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 3, "fp": 3, "fn": 2}, "claude-code": {"tp": 0, "fp": 4, "fn": 5}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 4}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 5}, "baz": {"tp": 1, "fp": 0, "fn": 4}, "propel-v2": {"tp": 1, "fp": 2, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 4}, "greptile-v4-1": {"tp": 2, "fp": 4, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9", "language": "Ruby", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 0, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 3, "fn": 2}, "coderabbit": {"tp": 0, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 6, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 2}}}, {"url": "https://github.com/grafana/grafana/pull/76186", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 3, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "copilot": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 6, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 1, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10", "language": "Ruby", "pr_size": "large", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 10, "fn": 3}, "claude": {"tp": 0, "fp": 3, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 4, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 17, "fn": 2}, "kg": {"tp": 0, "fp": 2, "fn": 4}, "qodo-v2": {"tp": 2, "fp": 8, "fn": 2}, "devin": {"tp": 0, "fp": 2, "fn": 4}, "sourcery": {"tp": 4, "fp": 11, "fn": 0}, "claude-code": {"tp": 0, "fp": 6, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 10, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 2, "fp": 5, "fn": 2}, "baz": {"tp": 2, "fp": 1, "fn": 2}, "propel-v2": {"tp": 3, "fp": 5, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 7, "fn": 2}, "qodo-extended-v2": {"tp": 3, "fp": 7, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "bug_fix", "complexity": "simple", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 2, "fn": 0}, "claude": {"tp": 3, "fp": 5, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 1, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 1, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 3, "fp": 1, "fn": 0}, "devin": {"tp": 3, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 2, "fn": 2}, "claude-code": {"tp": 3, "fp": 4, "fn": 0}, "kodus-v2": {"tp": 3, "fp": 0, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 2}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "propel-v2": {"tp": 0, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 9, "fn": 3}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8", "language": "Ruby", "pr_size": "medium", "domain": "API", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 2}, "claude": {"tp": 2, "fp": 5, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 7, "fn": 2}, "augment": {"tp": 2, "fp": 3, "fn": 1}, "propel": {"tp": 1, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 8, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 2}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 4, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 5, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 3, "fn": 2}, "baz": {"tp": 1, "fp": 2, "fn": 2}, "propel-v2": {"tp": 2, "fp": 5, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 8, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3", "language": "Ruby", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 2, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 5, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 8, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 5, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 6, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 5, "fn": 2}, "propel-v2": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5", "language": "Ruby", "pr_size": "small", "domain": "UI", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 1, "fn": 2}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6", "language": "Ruby", "pr_size": "small", "domain": "serialization", "change_type": "feature", "complexity": "simple", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 3, "fn": 1}, "copilot": {"tp": 1, "fp": 2, "fn": 0}, "augment": {"tp": 0, "fp": 3, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 3, "fn": 1}, "sourcery": {"tp": 0, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 3, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 4, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 7, "fn": 1}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 1}, "macroscope": {"tp": 0, "fp": 2, "fn": 1}, "baz": {"tp": 0, "fp": 5, "fn": 1}, "propel-v2": {"tp": 0, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 4, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4", "language": "Ruby", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 7, "fn": 3}, "claude": {"tp": 2, "fp": 5, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 6}, "bugbot": {"tp": 1, "fp": 4, "fn": 5}, "copilot": {"tp": 1, "fp": 5, "fn": 5}, "augment": {"tp": 2, "fp": 3, "fn": 4}, "propel": {"tp": 1, "fp": 1, "fn": 5}, "coderabbit": {"tp": 3, "fp": 24, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 6}, "qodo-v2": {"tp": 3, "fp": 14, "fn": 3}, "devin": {"tp": 1, "fp": 3, "fn": 5}, "sourcery": {"tp": 1, "fp": 4, "fn": 5}, "claude-code": {"tp": 2, "fp": 4, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 3, "fn": 4}, "qodo-extended": {"tp": 4, "fp": 7, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 4, "fn": 5}, "macroscope": {"tp": 1, "fp": 5, "fn": 5}, "baz": {"tp": 1, "fp": 4, "fn": 5}, "propel-v2": {"tp": 2, "fp": 7, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 17, "fn": 5}, "qodo-extended-v2": {"tp": 3, "fp": 3, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 4}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1", "language": "Ruby", "pr_size": "medium", "domain": "file_io", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 4, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 2, "fp": 2, "fn": 1}, "copilot": {"tp": 2, "fp": 7, "fn": 1}, "augment": {"tp": 2, "fp": 4, "fn": 1}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 5, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 4, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 3, "fn": 1}, "claude-code": {"tp": 3, "fp": 3, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 3, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 2}, "baz": {"tp": 2, "fp": 1, "fn": 1}, "propel-v2": {"tp": 2, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 2, "fp": 7, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 5, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 3, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/22532", "language": "TypeScript", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 11, "fn": 0}, "augment": {"tp": 2, "fp": 4, "fn": 0}, "propel": {"tp": 1, "fp": 3, "fn": 1}, "coderabbit": {"tp": 1, "fp": 4, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "propel-v2": {"tp": 1, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 4, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/8330", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 4, "fn": 0}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 2, "fp": 0, "fn": 0}, "bugbot": {"tp": 2, "fp": 1, "fn": 0}, "copilot": {"tp": 2, "fp": 4, "fn": 0}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "kg": {"tp": 1, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 2, "fp": 0, "fn": 0}, "sourcery": {"tp": 2, "fp": 3, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 1, "fn": 0}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14943", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 3, "fn": 1}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/22345", "language": "TypeScript", "pr_size": "small", "domain": "database", "change_type": "migration", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 5, "fn": 2}, "augment": {"tp": 1, "fp": 3, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 5, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/11059", "language": "TypeScript", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 3, "fn": 2}, "claude": {"tp": 3, "fp": 3, "fn": 2}, "graphite": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 4, "fp": 4, "fn": 1}, "copilot": {"tp": 5, "fp": 12, "fn": 0}, "augment": {"tp": 5, "fp": 8, "fn": 0}, "propel": {"tp": 1, "fp": 2, "fn": 4}, "coderabbit": {"tp": 5, "fp": 16, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 5, "fp": 6, "fn": 0}, "devin": {"tp": 4, "fp": 1, "fn": 1}, "sourcery": {"tp": 5, "fp": 0, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 3, "fp": 6, "fn": 2}, "qodo-extended": {"tp": 5, "fp": 8, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 2}, "macroscope": {"tp": 4, "fp": 6, "fn": 1}, "baz": {"tp": 4, "fp": 1, "fn": 1}, "propel-v2": {"tp": 5, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 3, "fp": 6, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 5, "fn": 1}, "greptile-v4-1": {"tp": 4, "fp": 5, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/7232", "language": "TypeScript", "pr_size": "medium", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 3, "fn": 1}, "copilot": {"tp": 2, "fp": 6, "fn": 0}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 2, "fp": 12, "fn": 0}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 1, "fp": 4, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 5, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 3, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "propel-v2": {"tp": 2, "fp": 5, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/14740", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 4}, "claude": {"tp": 2, "fp": 0, "fn": 3}, "graphite": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 3, "fp": 3, "fn": 2}, "copilot": {"tp": 3, "fp": 8, "fn": 2}, "augment": {"tp": 4, "fp": 3, "fn": 1}, "propel": {"tp": 4, "fp": 0, "fn": 1}, "coderabbit": {"tp": 4, "fp": 7, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 4, "fp": 4, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 3}, "sourcery": {"tp": 3, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 3, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 4, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 4, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 4, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "propel-v2": {"tp": 4, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/10600", "language": "TypeScript", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 12, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 9, "fn": 3}, "augment": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "coderabbit": {"tp": 2, "fp": 7, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 1, "fp": 6, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 5, "fn": 2}, "claude-code": {"tp": 1, "fp": 5, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 6, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 3, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "propel-v2": {"tp": 1, "fp": 2, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 4, "fn": 3}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/10967", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "bug_fix", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 8, "fn": 4}, "claude": {"tp": 4, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "bugbot": {"tp": 2, "fp": 4, "fn": 3}, "copilot": {"tp": 3, "fp": 8, "fn": 2}, "augment": {"tp": 2, "fp": 3, "fn": 3}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 4, "fp": 11, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 4}, "devin": {"tp": 3, "fp": 2, "fn": 2}, "sourcery": {"tp": 2, "fp": 2, "fn": 3}, "claude-code": {"tp": 3, "fp": 3, "fn": 2}, "kodus-v2": {"tp": 2, "fp": 4, "fn": 3}, "qodo-extended": {"tp": 3, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 4}, "macroscope": {"tp": 1, "fp": 4, "fn": 4}, "baz": {"tp": 1, "fp": 2, "fn": 4}, "propel-v2": {"tp": 2, "fp": 5, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 4}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 2}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 3}}}, {"url": "https://github.com/calcom/cal.com/pull/8087", "language": "TypeScript", "pr_size": "medium", "domain": "concurrency", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 2, "fp": 7, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 1, "fp": 6, "fn": 1}, "sourcery": {"tp": 1, "fp": 4, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 5, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "propel-v2": {"tp": 1, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 6, "fn": 1}}}], "tools": ["augment", "baz", "bugbot", "claude", "claude-code", "codeant-v2", "coderabbit", "copilot", "cubic-v2", "devin", "gemini", "graphite", "greptile-v4-1", "kg", "kodus-v2", "macroscope", "propel", "propel-v2", "qodo-extended", "qodo-extended-v2", "qodo-v2", "sourcery"], "dimensions": {"language": ["Go", "Java", "Python", "Ruby", "TypeScript"], "pr_size": ["small", "medium", "large"], "domain": ["API", "UI", "authentication", "caching", "concurrency", "configuration", "data_processing", "database", "file_io", "logging", "scheduling", "serialization", "testing"], "change_type": ["bug_fix", "feature", "migration", "performance", "refactoring", "security_patch"], "complexity": ["simple", "moderate", "complex"], "difficulty": ["obvious", "moderate", "subtle", "very_subtle"], "risk": ["low", "medium", "high", "critical"], "context": ["local", "file", "cross_file", "system"], "concern": ["correctness", "maintainability", "reliability", "security"]}, "overall_metrics": {"greptile-v4-1": {"precision": 34.6, "recall": 46.0, "f1": 39.5, "tp": 63, "fp": 119, "fn": 74, "num_prs": 50}, "copilot": {"precision": 24.7, "recall": 52.6, "f1": 33.6, "tp": 72, "fp": 220, "fn": 65, "num_prs": 50}, "propel": {"precision": 45.0, "recall": 36.5, "f1": 40.3, "tp": 50, "fp": 61, "fn": 87, "num_prs": 50}, "kg": {"precision": 51.1, "recall": 16.8, "f1": 25.3, "tp": 23, "fp": 22, "fn": 114, "num_prs": 50}, "gemini": {"precision": 26.3, "recall": 33.6, "f1": 29.5, "tp": 46, "fp": 129, "fn": 91, "num_prs": 50}, "qodo-extended-v2": {"precision": 48.1, "recall": 56.9, "f1": 52.2, "tp": 78, "fp": 84, "fn": 59, "num_prs": 50}, "qodo-v2": {"precision": 36.2, "recall": 56.2, "f1": 44.0, "tp": 77, "fp": 136, "fn": 60, "num_prs": 50}, "kodus-v2": {"precision": 35.9, "recall": 34.3, "f1": 35.1, "tp": 47, "fp": 84, "fn": 90, "num_prs": 50}, "coderabbit": {"precision": 23.4, "recall": 57.7, "f1": 33.3, "tp": 79, "fp": 259, "fn": 58, "num_prs": 50}, "macroscope": {"precision": 41.8, "recall": 40.9, "f1": 41.3, "tp": 56, "fp": 78, "fn": 81, "num_prs": 50}, "sourcery": {"precision": 29.8, "recall": 53.3, "f1": 38.2, "tp": 73, "fp": 172, "fn": 64, "num_prs": 50}, "augment": {"precision": 41.9, "recall": 60.6, "f1": 49.6, "tp": 83, "fp": 115, "fn": 54, "num_prs": 50}, "codeant-v2": {"precision": 25.7, "recall": 34.3, "f1": 29.4, "tp": 47, "fp": 136, "fn": 90, "num_prs": 50}, "qodo-extended": {"precision": 31.4, "recall": 59.9, "f1": 41.2, "tp": 82, "fp": 179, "fn": 55, "num_prs": 50}, "devin": {"precision": 46.4, "recall": 37.2, "f1": 41.3, "tp": 51, "fp": 59, "fn": 86, "num_prs": 50}, "propel-v2": {"precision": 36.6, "recall": 46.0, "f1": 40.8, "tp": 63, "fp": 109, "fn": 74, "num_prs": 50}, "claude": {"precision": 32.3, "recall": 37.2, "f1": 34.6, "tp": 51, "fp": 107, "fn": 86, "num_prs": 50}, "bugbot": {"precision": 43.2, "recall": 43.8, "f1": 43.5, "tp": 60, "fp": 79, "fn": 77, "num_prs": 50}, "claude-code": {"precision": 27.4, "recall": 41.6, "f1": 33.0, "tp": 57, "fp": 151, "fn": 80, "num_prs": 50}, "graphite": {"precision": 80.0, "recall": 8.8, "f1": 15.8, "tp": 12, "fp": 3, "fn": 125, "num_prs": 50}, "cubic-v2": {"precision": 53.6, "recall": 65.7, "f1": 59.0, "tp": 90, "fp": 78, "fn": 47, "num_prs": 50}, "baz": {"precision": 39.5, "recall": 34.3, "f1": 36.7, "tp": 47, "fp": 72, "fn": 90, "num_prs": 50}}}};
-        const toolDisplayNames = {"graphite": "Graphite", "qodo": "Qodo", "gemini": "Gemini", "claude": "Claude Code", "augment": "Augment", "bugbot": "Cursor Bugbot", "coderabbit": "CodeRabbit", "propel": "Propel", "copilot": "GitHub Copilot", "baz": "Baz", "greptile": "Greptile", "kg": "KG", "entelligence": "Entelligence", "cubic-dev": "Cubic", "sourcery": "Sourcery", "mesa": "Mesa", "codeant": "CodeAnt", "codeant-v2": "CodeAnt v2", "claude-code": "Claude Code (CLI)", "devin": "Devin", "kodus-v2": "Kodus", "greptile-v4": "Greptile v4", "qodo-v2": "Qodo v2", "qodo-extended-v2": "Qodo Extended", "macroscope": "Macroscope", "cubic-v2": "Cubic v2"};
-        const toolColors = {"graphite": "#6366f1", "qodo": "#8b5cf6", "gemini": "#06b6d4", "claude": "#f59e0b", "augment": "#10b981", "bugbot": "#3b82f6", "coderabbit": "#ec4899", "propel": "#14b8a6", "propel-v2": "#0d9488", "copilot": "#6b7280", "baz": "#f97316", "greptile": "#22c55e", "kg": "#a855f7", "entelligence": "#0ea5e9", "cubic-dev": "#d946ef", "sourcery": "#84cc16", "mesa": "#f43f5e", "codeant": "#e11d48", "codeant-v2": "#fb7185", "claude-code": "#d97706", "devin": "#7c3aed", "kodus-v2": "#059669", "greptile-v4": "#16a34a", "qodo-v2": "#7c3aed", "qodo-extended-v2": "#6d28d9", "macroscope": "#0891b2", "cubic-v2": "#c026d3"};
-        const predefinedFilters = [{"id": "high_precision", "label": "Highest Precision", "filters": {}, "sort": "precision", "description": "Tools ranked by precision - fewer false positives, more reliable findings", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "graphite", "best_score": 100.0}, {"id": "tool_kodus-v2_domain_concurrency", "label": "Best for Concurrency (Precision)", "filters": {"domain": ["concurrency"]}, "description": "Threading and async operations.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "kodus-v2", "best_score": 100.0, "sort": "precision"}, {"id": "tool_kg_complexity_complex", "label": "Best for Complex Code (Precision)", "filters": {"complexity": ["complex"]}, "description": "Deep logic and dependencies.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "kg", "best_score": 100.0, "sort": "precision"}, {"id": "tool_propel-v2_risk_high_context_file", "label": "High Risk + File Context (Precision)", "filters": {"risk": ["high"], "context": ["file"]}, "description": "Significant impact, potential data loss. Requires full file understanding.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "propel-v2", "best_score": 100.0, "sort": "precision"}, {"id": "tool_copilot_change_type_bug_fix", "label": "Best for Bug Fixes (Recall)", "filters": {"change_type": ["bug_fix"]}, "description": "Bug fixes and issue resolution", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "copilot", "best_score": 82.4, "sort": "recall"}, {"id": "change_type_performance", "label": "Best for Performance Optimization", "filters": {"change_type": ["performance"]}, "description": "Performance optimization changes", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 81.1}, {"id": "tool_baz_language_java_domain_authentication", "label": "Java + Authentication", "filters": {"language": ["Java"], "domain": ["authentication"]}, "description": "Java codebases with OOP patterns. Auth and access control.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "baz", "best_score": 76.9}, {"id": "domain_caching", "label": "Best for Caching", "filters": {"domain": ["caching"]}, "description": "Cache and memoization.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-extended-v2", "best_score": 76.2}, {"id": "tool_codeant-v2_pr_size_small_change_type_performance", "label": "Small PRs + Performance Optimization (Precision)", "filters": {"pr_size": ["small"], "change_type": ["performance"]}, "description": "Small PRs with 1-2 files, easier to review thoroughly Performance optimization changes", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "codeant-v2", "best_score": 75.0, "sort": "precision"}, {"id": "ruby_medium", "label": "Best for Medium Ruby PRs", "filters": {"language": ["Ruby"], "pr_size": ["medium"]}, "description": "Ruby codebases with Rails patterns. Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "devin", "best_score": 72.7}, {"id": "tool_sourcery_language_typescript_concern_correctness", "label": "Typescript + Correctness", "filters": {"language": ["TypeScript"], "concern": ["correctness"]}, "description": "TypeScript codebases with frontend patterns. Logical correctness and expected behavior", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "sourcery", "best_score": 71.8}, {"id": "domain_ui", "label": "Best for Ui", "filters": {"domain": ["UI"]}, "description": "User interface and frontend.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "bugbot", "best_score": 66.7}, {"id": "tool_macroscope_context_file_concern_correctness", "label": "File Context + Correctness", "filters": {"context": ["file"], "concern": ["correctness"]}, "description": "Requires full file understanding. Logical correctness and expected behavior", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "macroscope", "best_score": 66.7}, {"id": "tool_qodo-extended_change_type_bug_fix_context_cross_file", "label": "Bug Fixes + Cross-File", "filters": {"change_type": ["bug_fix"], "context": ["cross_file"]}, "description": "Bug fixes and issue resolution Spans multiple files.", "best_model": "openai_gpt-5.2", "best_tool": "qodo-extended", "best_score": 66.7}, {"id": "tool_qodo-v2_domain_authentication_concern_correctness", "label": "Authentication + Correctness", "filters": {"domain": ["authentication"], "concern": ["correctness"]}, "description": "Auth and access control. Logical correctness and expected behavior", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-v2", "best_score": 62.3}, {"id": "tool_gemini_pr_size_medium_context_file", "label": "Medium PRs + File Context", "filters": {"pr_size": ["medium"], "context": ["file"]}, "description": "Medium PRs with 3-5 files, typical feature development Requires full file understanding.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "gemini", "best_score": 61.1}, {"id": "tool_augment_domain_concurrency", "label": "Best for Concurrency", "filters": {"domain": ["concurrency"]}, "description": "Threading and async operations.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "augment", "best_score": 60.6}, {"id": "tool_claude_difficulty_moderate_context_file", "label": "Moderate Bugs + File Context", "filters": {"difficulty": ["moderate"], "context": ["file"]}, "description": "Requires careful reading. Requires full file understanding.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "claude", "best_score": 59.3}, {"id": "tool_propel_language_ruby_concern_correctness", "label": "Ruby + Correctness", "filters": {"language": ["Ruby"], "concern": ["correctness"]}, "description": "Ruby codebases with Rails patterns. Logical correctness and expected behavior", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "propel", "best_score": 57.9}, {"id": "tool_coderabbit_difficulty_moderate_risk_medium", "label": "Moderate Bugs + Medium Risk", "filters": {"difficulty": ["moderate"], "risk": ["medium"]}, "description": "Requires careful reading. Moderate user impact.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "coderabbit", "best_score": 47.5}, {"id": "go_small", "label": "Best for Small Go PRs", "filters": {"language": ["Go"], "pr_size": ["small"]}, "description": "Go codebases with concurrency patterns. Small PRs with 1-2 files, easier to review thoroughly", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 77.4}, {"id": "language_go", "label": "Best for Go", "filters": {"language": ["Go"]}, "description": "Go codebases with concurrency patterns.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 75.5}, {"id": "pr_size_small", "label": "Best for Small PRs", "filters": {"pr_size": ["small"]}, "description": "Small PRs with 1-2 files, easier to review thoroughly", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 75.3}, {"id": "change_type_bug_fix", "label": "Best for Bug Fixes", "filters": {"change_type": ["bug_fix"]}, "description": "Bug fixes and issue resolution", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-extended-v2", "best_score": 71.8}, {"id": "python_medium", "label": "Best for Medium Python PRs", "filters": {"language": ["Python"], "pr_size": ["medium"]}, "description": "Python codebases with dynamic typing. Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 71.0}, {"id": "language_python", "label": "Best for Python", "filters": {"language": ["Python"]}, "description": "Python codebases with dynamic typing.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 70.6}, {"id": "risk_high", "label": "Best for High Risk", "filters": {"risk": ["high"]}, "description": "Significant impact, potential data loss.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 70.2}, {"id": "risk_critical", "label": "Best for Critical Risk", "filters": {"risk": ["critical"]}, "description": "Critical security or data corruption risk.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "qodo-extended-v2", "best_score": 69.8}, {"id": "high_recall", "label": "Highest Recall", "filters": {}, "sort": "recall", "description": "Tools ranked by recall - catches more issues, may have more noise", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 68.6}, {"id": "domain_scheduling", "label": "Best for Scheduling", "filters": {"domain": ["scheduling"]}, "description": "Task scheduling.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 68.4}, {"id": "complexity_complex", "label": "Best for Complex Code", "filters": {"complexity": ["complex"]}, "description": "Deep logic and dependencies.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "qodo-extended-v2", "best_score": 67.7}, {"id": "complex_subtle", "label": "Complex & Subtle", "filters": {"complexity": ["complex"], "difficulty": ["subtle", "very_subtle"]}, "description": "Deep logic and dependencies. Non-obvious, needs domain knowledge.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "qodo-extended-v2", "best_score": 67.7}, {"id": "context_file", "label": "Best for File Context", "filters": {"context": ["file"]}, "description": "Requires full file understanding.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 67.6}, {"id": "language_java", "label": "Best for Java", "filters": {"language": ["Java"]}, "description": "Java codebases with OOP patterns.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-extended-v2", "best_score": 66.7}, {"id": "difficulty_subtle", "label": "Best for Subtle Bugs", "filters": {"difficulty": ["subtle"]}, "description": "Non-obvious, needs domain knowledge.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 64.1}, {"id": "high_risk_auth", "label": "High Risk Auth", "filters": {"risk": ["high", "critical"], "domain": ["authentication"]}, "description": "Significant impact, potential data loss. Auth and access control.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 63.2}, {"id": "concern_correctness", "label": "Best for Correctness", "filters": {"concern": ["correctness"]}, "description": "Logical correctness and expected behavior", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cubic-v2", "best_score": 62.8}, {"id": "concern_reliability", "label": "Best for Reliability", "filters": {"concern": ["reliability"]}, "description": "Error handling and system stability", "best_model": "openai_gpt-5.2", "best_tool": "cubic-v2", "best_score": 62.5}, {"id": "context_cross_file", "label": "Best for Cross-File", "filters": {"context": ["cross_file"]}, "description": "Spans multiple files.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-extended-v2", "best_score": 62.0}, {"id": "concern_security", "label": "Best for Security", "filters": {"concern": ["security"]}, "description": "Security vulnerabilities and attack vectors", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 61.9}, {"id": "security_critical", "label": "Security Critical", "filters": {"concern": ["security"], "risk": ["high", "critical"]}, "description": "Security vulnerabilities and attack vectors Significant impact, potential data loss.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 61.9}, {"id": "complexity_moderate", "label": "Best for Moderate Code", "filters": {"complexity": ["moderate"]}, "description": "Some abstraction.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 61.8}, {"id": "high_f1", "label": "Highest F1", "filters": {}, "sort": "f1", "description": "Tools ranked by F1 score - balanced precision and recall", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 61.8}, {"id": "language_typescript", "label": "Best for Typescript", "filters": {"language": ["TypeScript"]}, "description": "TypeScript codebases with frontend patterns.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "devin", "best_score": 61.8}, {"id": "domain_concurrency", "label": "Best for Concurrency", "filters": {"domain": ["concurrency"]}, "description": "Threading and async operations.", "best_model": "openai_gpt-5.2", "best_tool": "cubic-v2", "best_score": 61.1}, {"id": "java_medium", "label": "Best for Medium Java PRs", "filters": {"language": ["Java"], "pr_size": ["medium"]}, "description": "Java codebases with OOP patterns. Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-extended-v2", "best_score": 61.1}, {"id": "language_ruby", "label": "Best for Ruby", "filters": {"language": ["Ruby"]}, "description": "Ruby codebases with Rails patterns.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cubic-v2", "best_score": 61.0}, {"id": "pr_size_medium", "label": "Best for Medium PRs", "filters": {"pr_size": ["medium"]}, "description": "Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-extended-v2", "best_score": 60.4}, {"id": "domain_authentication", "label": "Best for Authentication", "filters": {"domain": ["authentication"]}, "description": "Auth and access control.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 60.3}, {"id": "change_type_feature", "label": "Best for Features", "filters": {"change_type": ["feature"]}, "description": "New feature implementation", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 59.9}, {"id": "risk_medium", "label": "Best for Medium Risk", "filters": {"risk": ["medium"]}, "description": "Moderate user impact.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cubic-v2", "best_score": 58.8}, {"id": "pr_size_large", "label": "Best for Large PRs", "filters": {"pr_size": ["large"]}, "description": "Large PRs with 6+ files, complex changes requiring careful review", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 58.3}, {"id": "difficulty_moderate", "label": "Best for Moderate Bugs", "filters": {"difficulty": ["moderate"]}, "description": "Requires careful reading.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 52.8}];
+        const allModelsData = {"anthropic_claude-opus-4-5-20251101": {"prs": [{"url": "https://github.com/keycloak/keycloak/pull/37429", "language": "Java", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"graphite": {"tp": 0, "fp": 0, "fn": 4}, "gemini": {"tp": 1, "fp": 4, "fn": 3}, "claude": {"tp": 1, "fp": 0, "fn": 3}, "augment": {"tp": 2, "fp": 3, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "copilot": {"tp": 3, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 3, "fn": 4}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 1, "fp": 6, "fn": 3}, "claude-code": {"tp": 1, "fp": 4, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended": {"tp": 3, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 3}, "macroscope": {"tp": 0, "fp": 1, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 4}, "propel-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 2}, "coderabbit": {"tp": 3, "fp": 6, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 3}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37634", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 2, "fp": 1, "fn": 2}, "gemini": {"tp": 3, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 2, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 2, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 1, "fn": 2}, "copilot": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 2}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 2, "fp": 0, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 2, "fn": 2}, "baz": {"tp": 2, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 3}, "propel-v2": {"tp": 2, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 3, "fp": 4, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 0, "fn": 2}, "cloudaeye": {"tp": 3, "fp": 0, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/38446", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "bugbot": {"tp": 0, "fp": 3, "fn": 2}, "propel": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 4, "fn": 1}, "claude-code": {"tp": 1, "fp": 5, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 1}, "macroscope": {"tp": 0, "fp": 2, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 1}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/36882", "language": "Java", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "gemini": {"tp": 0, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "augment": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 1}, "sourcery": {"tp": 0, "fp": 4, "fn": 1}, "claude-code": {"tp": 0, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 1, "fn": 1}, "propel-v2": {"tp": 0, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/36880", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 4, "fn": 3}, "gemini": {"tp": 0, "fp": 3, "fn": 3}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 2}, "bugbot": {"tp": 2, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 6, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 3}, "sourcery": {"tp": 0, "fp": 6, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 4, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 6, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}, "cloudaeye": {"tp": 3, "fp": 2, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/37038", "language": "Java", "pr_size": "small", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 4, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 2, "fp": 2, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 7, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 4, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/33832", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 1, "fp": 5, "fn": 1}, "gemini": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 5, "fn": 2}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 5, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/40940", "language": "Java", "pr_size": "small", "domain": "concurrency", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 2, "fp": 2, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 2, "fp": 0, "fn": 0}, "gemini": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 5, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 0, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/getsentry/sentry/pull/93824", "language": "Python", "pr_size": "large", "domain": "concurrency", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 5, "fp": 1, "fn": 0}, "gemini": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 4}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "claude": {"tp": 0, "fp": 2, "fn": 5}, "copilot": {"tp": 1, "fp": 5, "fn": 4}, "propel": {"tp": 2, "fp": 2, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 3, "fp": 3, "fn": 2}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 3, "fp": 2, "fn": 2}, "claude-code": {"tp": 3, "fp": 3, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 5}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 4}, "cubic-v2": {"tp": 5, "fp": 1, "fn": 0}, "macroscope": {"tp": 3, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 3, "fn": 5}, "propel-v2": {"tp": 3, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 3}, "coderabbit": {"tp": 0, "fp": 1, "fn": 5}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 4}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5", "language": "Python", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 3}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 2}, "macroscope": {"tp": 0, "fp": 5, "fn": 3}, "baz": {"tp": 1, "fp": 3, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 16, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1", "language": "Python", "pr_size": "medium", "domain": "API", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 3}, "claude": {"tp": 2, "fp": 4, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "copilot": {"tp": 3, "fp": 6, "fn": 1}, "bugbot": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 1, "fn": 1}, "propel": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 3, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 3, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 2, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 4, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 3}, "propel-v2": {"tp": 2, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/97529", "language": "Go", "pr_size": "small", "domain": "concurrency", "change_type": "performance", "complexity": "complex", "difficulty": "very_subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80168", "language": "Python", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80528", "language": "Python", "pr_size": "small", "domain": "scheduling", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/getsentry/sentry/pull/77754", "language": "Python", "pr_size": "medium", "domain": "serialization", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 3}, "claude": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 0, "fn": 3}, "coderabbit": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 0, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 0, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 3}, "baz": {"tp": 1, "fp": 0, "fn": 3}, "propel-v2": {"tp": 1, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 3}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 3}, "cloudaeye": {"tp": 4, "fp": 0, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/95633", "language": "Python", "pr_size": "medium", "domain": "concurrency", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 3}, "copilot": {"tp": 0, "fp": 6, "fn": 3}, "propel": {"tp": 0, "fp": 0, "fn": 3}, "augment": {"tp": 0, "fp": 3, "fn": 3}, "coderabbit": {"tp": 0, "fp": 7, "fn": 3}, "kg": {"tp": 0, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 4, "fn": 3}, "devin": {"tp": 0, "fp": 2, "fn": 3}, "sourcery": {"tp": 2, "fp": 2, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 0, "fp": 5, "fn": 3}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 3}, "baz": {"tp": 0, "fp": 2, "fn": 3}, "propel-v2": {"tp": 0, "fp": 2, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 0, "fp": 6, "fn": 3}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}, "cloudaeye": {"tp": 1, "fp": 4, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2", "language": "Python", "pr_size": "medium", "domain": "data_processing", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 3}, "claude": {"tp": 2, "fp": 5, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 2, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 0, "fn": 3}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 8, "fn": 1}, "claude-code": {"tp": 3, "fp": 1, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 3, "fn": 1}, "qodo-extended": {"tp": 3, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 1}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "propel-v2": {"tp": 1, "fp": 4, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3", "language": "Python", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 3, "fn": 3}, "copilot": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 2}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 2, "fp": 5, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 6, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 3, "fn": 3}, "propel-v2": {"tp": 1, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 3, "fp": 1, "fn": 0}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/103633", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 6, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 5, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 3, "fn": 2}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/67876", "language": "Python", "pr_size": "small", "domain": "authentication", "change_type": "security_patch", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 2, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 2}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "coderabbit": {"tp": 1, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 8, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/32918", "language": "Java", "pr_size": "small", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 3, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 0, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/94942", "language": "Go", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 1, "fn": 0}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 0}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90939", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 2, "fp": 0, "fn": 0}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/80329", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "performance", "complexity": "simple", "difficulty": "obvious", "risk": "low", "context": "local", "concern": "maintainability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 5, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 0}, "copilot": {"tp": 1, "fp": 4, "fn": 0}, "propel": {"tp": 0, "fp": 1, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 2, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 0}, "devin": {"tp": 1, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 2, "fn": 0}, "claude-code": {"tp": 1, "fp": 3, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 0}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90045", "language": "Go", "pr_size": "medium", "domain": "logging", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 4, "fn": 0}, "claude": {"tp": 3, "fp": 3, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 3, "fn": 0}, "copilot": {"tp": 3, "fp": 7, "fn": 0}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 3, "fp": 5, "fn": 0}, "coderabbit": {"tp": 3, "fp": 2, "fn": 0}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 6, "fn": 1}, "devin": {"tp": 3, "fp": 2, "fn": 0}, "sourcery": {"tp": 3, "fp": 1, "fn": 0}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 3, "fp": 0, "fn": 0}, "baz": {"tp": 2, "fp": 2, "fn": 1}, "propel-v2": {"tp": 3, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 3, "fp": 3, "fn": 0}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/106778", "language": "Go", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 10, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "copilot": {"tp": 1, "fp": 2, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 2, "fn": 1}, "coderabbit": {"tp": 1, "fp": 6, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 2, "fn": 2}, "devin": {"tp": 2, "fp": 1, "fn": 0}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/107534", "language": "Go", "pr_size": "small", "domain": "testing", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "low", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 3, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 4, "fn": 1}, "augment": {"tp": 0, "fp": 0, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 1}, "sourcery": {"tp": 0, "fp": 3, "fn": 1}, "claude-code": {"tp": 0, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 1}, "propel-v2": {"tp": 0, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 0, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/79265", "language": "Go", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 4}, "claude": {"tp": 1, "fp": 2, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "copilot": {"tp": 2, "fp": 7, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 5}, "augment": {"tp": 0, "fp": 2, "fn": 5}, "propel": {"tp": 1, "fp": 1, "fn": 4}, "coderabbit": {"tp": 3, "fp": 4, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 3, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 2, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 5}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 4}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 3}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 2}, "macroscope": {"tp": 0, "fp": 1, "fn": 5}, "baz": {"tp": 0, "fp": 1, "fn": 5}, "propel-v2": {"tp": 1, "fp": 2, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 4}, "greptile-v4-1": {"tp": 3, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9", "language": "Ruby", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 0, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 0, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/76186", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 3, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 5, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10", "language": "Ruby", "pr_size": "large", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 10, "fn": 2}, "claude": {"tp": 0, "fp": 3, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 4, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 3, "fp": 0, "fn": 1}, "augment": {"tp": 3, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 9, "fn": 2}, "kg": {"tp": 0, "fp": 2, "fn": 4}, "qodo-v2": {"tp": 2, "fp": 5, "fn": 2}, "devin": {"tp": 0, "fp": 2, "fn": 4}, "sourcery": {"tp": 3, "fp": 9, "fn": 1}, "claude-code": {"tp": 0, "fp": 6, "fn": 4}, "kodus-v2": {"tp": 3, "fp": 8, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 3, "fp": 4, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 2}, "propel-v2": {"tp": 3, "fp": 5, "fn": 1}, "codeant-v2": {"tp": 4, "fp": 6, "fn": 0}, "qodo-extended-v2": {"tp": 3, "fp": 6, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "bug_fix", "complexity": "simple", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 2, "fn": 0}, "claude": {"tp": 3, "fp": 4, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 1, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 0, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 3, "fp": 2, "fn": 0}, "devin": {"tp": 3, "fp": 1, "fn": 0}, "sourcery": {"tp": 1, "fp": 1, "fn": 2}, "claude-code": {"tp": 3, "fp": 1, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 1}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "propel-v2": {"tp": 0, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 9, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}, "cloudaeye": {"tp": 0, "fp": 1, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8", "language": "Ruby", "pr_size": "medium", "domain": "API", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 2}, "claude": {"tp": 2, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 8, "fn": 2}, "augment": {"tp": 2, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 8, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 0, "fp": 5, "fn": 3}, "claude-code": {"tp": 1, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 3, "fn": 2}, "baz": {"tp": 1, "fp": 2, "fn": 2}, "propel-v2": {"tp": 2, "fp": 5, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 6, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3", "language": "Ruby", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 7, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5", "language": "Ruby", "pr_size": "small", "domain": "UI", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 1, "fn": 2}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6", "language": "Ruby", "pr_size": "small", "domain": "serialization", "change_type": "feature", "complexity": "simple", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 0}, "copilot": {"tp": 1, "fp": 3, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 0}, "devin": {"tp": 1, "fp": 2, "fn": 0}, "sourcery": {"tp": 1, "fp": 4, "fn": 0}, "claude-code": {"tp": 1, "fp": 0, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 1, "fn": 0}, "propel-v2": {"tp": 1, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 3, "fn": 0}, "qodo-extended-v2": {"tp": 0, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 0}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4", "language": "Ruby", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 5, "fp": 4, "fn": 1}, "claude": {"tp": 3, "fp": 3, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 6}, "bugbot": {"tp": 1, "fp": 4, "fn": 5}, "copilot": {"tp": 1, "fp": 3, "fn": 5}, "augment": {"tp": 3, "fp": 2, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 5}, "coderabbit": {"tp": 4, "fp": 20, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 6}, "qodo-v2": {"tp": 3, "fp": 8, "fn": 3}, "devin": {"tp": 1, "fp": 3, "fn": 5}, "sourcery": {"tp": 2, "fp": 3, "fn": 4}, "claude-code": {"tp": 3, "fp": 1, "fn": 3}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 4}, "qodo-extended": {"tp": 5, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 4}, "macroscope": {"tp": 1, "fp": 5, "fn": 5}, "baz": {"tp": 1, "fp": 3, "fn": 5}, "propel-v2": {"tp": 3, "fp": 6, "fn": 3}, "codeant-v2": {"tp": 2, "fp": 16, "fn": 4}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 4}, "cloudaeye": {"tp": 4, "fp": 6, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1", "language": "Ruby", "pr_size": "medium", "domain": "file_io", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 2, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 1}, "augment": {"tp": 2, "fp": 2, "fn": 1}, "propel": {"tp": 2, "fp": 0, "fn": 1}, "coderabbit": {"tp": 2, "fp": 6, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 1}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 2}, "baz": {"tp": 2, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 2, "fp": 5, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/22532", "language": "TypeScript", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 11, "fn": 0}, "augment": {"tp": 2, "fp": 4, "fn": 0}, "propel": {"tp": 2, "fp": 2, "fn": 0}, "coderabbit": {"tp": 1, "fp": 10, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 0}, "claude-code": {"tp": 0, "fp": 4, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "propel-v2": {"tp": 1, "fp": 3, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 3, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 2}, "cloudaeye": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/8330", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 4, "fn": 0}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 2, "fp": 0, "fn": 0}, "bugbot": {"tp": 2, "fp": 1, "fn": 0}, "copilot": {"tp": 2, "fp": 4, "fn": 0}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "kg": {"tp": 1, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 2, "fp": 0, "fn": 0}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 1, "fn": 0}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14943", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 0, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/22345", "language": "TypeScript", "pr_size": "small", "domain": "database", "change_type": "migration", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 0, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 2}, "cloudaeye": {"tp": 0, "fp": 0, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/11059", "language": "TypeScript", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 3, "fn": 2}, "claude": {"tp": 3, "fp": 3, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "bugbot": {"tp": 4, "fp": 4, "fn": 1}, "copilot": {"tp": 4, "fp": 9, "fn": 1}, "augment": {"tp": 5, "fp": 5, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 5, "fp": 14, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 4, "fp": 4, "fn": 1}, "devin": {"tp": 4, "fp": 1, "fn": 1}, "sourcery": {"tp": 5, "fp": 0, "fn": 0}, "claude-code": {"tp": 3, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended": {"tp": 5, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 2}, "macroscope": {"tp": 4, "fp": 7, "fn": 1}, "baz": {"tp": 4, "fp": 1, "fn": 1}, "propel-v2": {"tp": 5, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 4, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 4, "fp": 5, "fn": 1}, "greptile-v4-1": {"tp": 4, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 2, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/7232", "language": "TypeScript", "pr_size": "medium", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 3, "fn": 2}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 5, "fn": 0}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 2, "fp": 10, "fn": 0}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 4, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 4, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14740", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 4}, "claude": {"tp": 2, "fp": 0, "fn": 3}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 3, "fn": 2}, "copilot": {"tp": 3, "fp": 6, "fn": 2}, "augment": {"tp": 4, "fp": 3, "fn": 1}, "propel": {"tp": 4, "fp": 0, "fn": 1}, "coderabbit": {"tp": 4, "fp": 6, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 4, "fp": 3, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 3}, "sourcery": {"tp": 3, "fp": 1, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 4, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 4, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 4, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "propel-v2": {"tp": 4, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 2}, "cloudaeye": {"tp": 4, "fp": 0, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/10600", "language": "TypeScript", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 8, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 9, "fn": 3}, "augment": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "coderabbit": {"tp": 2, "fp": 6, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 4, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "propel-v2": {"tp": 1, "fp": 2, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/10967", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "bug_fix", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 9, "fn": 4}, "claude": {"tp": 3, "fp": 4, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "bugbot": {"tp": 2, "fp": 4, "fn": 3}, "copilot": {"tp": 4, "fp": 7, "fn": 1}, "augment": {"tp": 2, "fp": 3, "fn": 3}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 3, "fp": 8, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 4}, "devin": {"tp": 2, "fp": 3, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 3}, "claude-code": {"tp": 2, "fp": 5, "fn": 3}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 3}, "qodo-extended": {"tp": 3, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 4}, "macroscope": {"tp": 2, "fp": 3, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "propel-v2": {"tp": 2, "fp": 4, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 7, "fn": 4}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 4}, "cloudaeye": {"tp": 4, "fp": 3, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/8087", "language": "TypeScript", "pr_size": "medium", "domain": "concurrency", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 4, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 2, "fp": 7, "fn": 0}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 3, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 10, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "propel-v2": {"tp": 1, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 4, "fn": 1}}}], "tools": ["augment", "baz", "bugbot", "claude", "claude-code", "cloudaeye", "codeant-v2", "coderabbit", "copilot", "cubic-v2", "devin", "gemini", "graphite", "greptile-v4-1", "kg", "kodus-v2", "macroscope", "propel", "propel-v2", "qodo-extended", "qodo-extended-v2", "qodo-v2", "sourcery"], "dimensions": {"language": ["Go", "Java", "Python", "Ruby", "TypeScript"], "pr_size": ["small", "medium", "large"], "domain": ["API", "UI", "authentication", "caching", "concurrency", "configuration", "data_processing", "database", "file_io", "logging", "scheduling", "serialization", "testing"], "change_type": ["bug_fix", "feature", "migration", "performance", "refactoring", "security_patch"], "complexity": ["simple", "moderate", "complex"], "difficulty": ["obvious", "moderate", "subtle", "very_subtle"], "risk": ["low", "medium", "high", "critical"], "context": ["local", "file", "cross_file", "system"], "concern": ["correctness", "maintainability", "reliability", "security"]}, "overall_metrics": {"graphite": {"precision": 100.0, "recall": 8.8, "f1": 16.1, "tp": 12, "fp": 0, "fn": 125, "num_prs": 50}, "devin": {"precision": 54.3, "recall": 37.2, "f1": 44.2, "tp": 51, "fp": 43, "fn": 86, "num_prs": 50}, "qodo-extended": {"precision": 37.2, "recall": 62.8, "f1": 46.7, "tp": 86, "fp": 145, "fn": 51, "num_prs": 50}, "sourcery": {"precision": 33.3, "recall": 51.8, "f1": 40.6, "tp": 71, "fp": 142, "fn": 66, "num_prs": 50}, "kodus-v2": {"precision": 46.7, "recall": 35.8, "f1": 40.5, "tp": 49, "fp": 56, "fn": 88, "num_prs": 50}, "baz": {"precision": 49.0, "recall": 34.3, "f1": 40.3, "tp": 47, "fp": 49, "fn": 90, "num_prs": 50}, "cubic-v2": {"precision": 56.3, "recall": 68.6, "f1": 61.8, "tp": 94, "fp": 73, "fn": 43, "num_prs": 50}, "augment": {"precision": 47.5, "recall": 61.3, "f1": 53.5, "tp": 84, "fp": 93, "fn": 53, "num_prs": 50}, "qodo-extended-v2": {"precision": 54.9, "recall": 61.3, "f1": 57.9, "tp": 84, "fp": 69, "fn": 53, "num_prs": 50}, "propel": {"precision": 55.8, "recall": 38.7, "f1": 45.7, "tp": 53, "fp": 42, "fn": 84, "num_prs": 50}, "bugbot": {"precision": 47.2, "recall": 43.8, "f1": 45.5, "tp": 60, "fp": 67, "fn": 77, "num_prs": 50}, "greptile-v4-1": {"precision": 40.5, "recall": 48.2, "f1": 44.0, "tp": 66, "fp": 97, "fn": 71, "num_prs": 50}, "qodo-v2": {"precision": 42.9, "recall": 55.5, "f1": 48.4, "tp": 76, "fp": 101, "fn": 61, "num_prs": 50}, "claude-code": {"precision": 34.8, "recall": 40.9, "f1": 37.6, "tp": 56, "fp": 105, "fn": 81, "num_prs": 50}, "gemini": {"precision": 31.1, "recall": 37.2, "f1": 33.9, "tp": 51, "fp": 113, "fn": 86, "num_prs": 50}, "coderabbit": {"precision": 25.7, "recall": 56.2, "f1": 35.2, "tp": 77, "fp": 223, "fn": 60, "num_prs": 50}, "macroscope": {"precision": 48.4, "recall": 43.8, "f1": 46.0, "tp": 60, "fp": 64, "fn": 77, "num_prs": 50}, "propel-v2": {"precision": 44.4, "recall": 49.6, "f1": 46.9, "tp": 68, "fp": 85, "fn": 69, "num_prs": 50}, "claude": {"precision": 34.8, "recall": 35.8, "f1": 35.3, "tp": 49, "fp": 92, "fn": 88, "num_prs": 50}, "cloudaeye": {"precision": 62.3, "recall": 72.3, "f1": 66.9, "tp": 99, "fp": 60, "fn": 38, "num_prs": 50}, "codeant-v2": {"precision": 31.9, "recall": 38.0, "f1": 34.7, "tp": 52, "fp": 111, "fn": 85, "num_prs": 50}, "kg": {"precision": 50.0, "recall": 16.8, "f1": 25.1, "tp": 23, "fp": 23, "fn": 114, "num_prs": 50}, "copilot": {"precision": 28.3, "recall": 53.3, "f1": 37.0, "tp": 73, "fp": 185, "fn": 64, "num_prs": 50}}}, "anthropic_claude-sonnet-4-5-20250929": {"prs": [{"url": "https://github.com/keycloak/keycloak/pull/37429", "language": "Java", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"graphite": {"tp": 0, "fp": 0, "fn": 4}, "gemini": {"tp": 0, "fp": 4, "fn": 4}, "claude": {"tp": 1, "fp": 0, "fn": 3}, "augment": {"tp": 2, "fp": 3, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 3, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 3, "fn": 4}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 1, "fp": 5, "fn": 3}, "claude-code": {"tp": 1, "fp": 4, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 3}, "macroscope": {"tp": 0, "fp": 1, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 4}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 2}, "coderabbit": {"tp": 3, "fp": 6, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 3}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37634", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 2, "fp": 3, "fn": 2}, "gemini": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 2, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 2, "fn": 2}, "copilot": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 2, "fp": 0, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 2, "fn": 2}, "baz": {"tp": 2, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 2}, "qodo-extended-v2": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 4, "fp": 6, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 3, "fp": 0, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/38446", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "bugbot": {"tp": 0, "fp": 3, "fn": 2}, "propel": {"tp": 0, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 6, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 1}, "macroscope": {"tp": 0, "fp": 3, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 1}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/36882", "language": "Java", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "gemini": {"tp": 0, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "augment": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 1}, "sourcery": {"tp": 0, "fp": 4, "fn": 1}, "claude-code": {"tp": 0, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 3, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/36880", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 5, "fn": 3}, "gemini": {"tp": 0, "fp": 3, "fn": 3}, "propel": {"tp": 2, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 2}, "bugbot": {"tp": 2, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "copilot": {"tp": 0, "fp": 7, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 2}, "devin": {"tp": 0, "fp": 1, "fn": 3}, "sourcery": {"tp": 1, "fp": 5, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 3, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 4, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}, "cloudaeye": {"tp": 3, "fp": 2, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/37038", "language": "Java", "pr_size": "small", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 2, "fp": 2, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 8, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 4, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/33832", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 1, "fp": 5, "fn": 1}, "gemini": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 4, "fn": 2}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 5, "fn": 1}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/40940", "language": "Java", "pr_size": "small", "domain": "concurrency", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 2, "fp": 2, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 3, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 2, "fp": 0, "fn": 0}, "gemini": {"tp": 2, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 5, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 2, "fp": 3, "fn": 0}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 0, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 0, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/93824", "language": "Python", "pr_size": "large", "domain": "concurrency", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 5, "fp": 1, "fn": 0}, "gemini": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 4}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "claude": {"tp": 0, "fp": 2, "fn": 5}, "copilot": {"tp": 1, "fp": 4, "fn": 4}, "propel": {"tp": 1, "fp": 2, "fn": 4}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 4, "fp": 2, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 3, "fp": 2, "fn": 2}, "claude-code": {"tp": 3, "fp": 3, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 5}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 4}, "cubic-v2": {"tp": 4, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 1, "fn": 3}, "baz": {"tp": 0, "fp": 3, "fn": 5}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 3}, "coderabbit": {"tp": 1, "fp": 3, "fn": 4}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 4}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5", "language": "Python", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 3}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 2}, "macroscope": {"tp": 0, "fp": 5, "fn": 3}, "baz": {"tp": 2, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 16, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1", "language": "Python", "pr_size": "medium", "domain": "API", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 3}, "claude": {"tp": 2, "fp": 7, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "copilot": {"tp": 3, "fp": 6, "fn": 1}, "bugbot": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 1, "fn": 1}, "propel": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 3, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 3, "fp": 2, "fn": 1}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 2, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 2, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/97529", "language": "Go", "pr_size": "small", "domain": "concurrency", "change_type": "performance", "complexity": "complex", "difficulty": "very_subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 4, "fn": 0}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80168", "language": "Python", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80528", "language": "Python", "pr_size": "small", "domain": "scheduling", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/getsentry/sentry/pull/77754", "language": "Python", "pr_size": "medium", "domain": "serialization", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 3}, "claude": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 0, "fn": 3}, "coderabbit": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 0, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 0, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 3}, "baz": {"tp": 1, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 3}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 3}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/95633", "language": "Python", "pr_size": "medium", "domain": "concurrency", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 3}, "copilot": {"tp": 0, "fp": 6, "fn": 3}, "propel": {"tp": 0, "fp": 0, "fn": 3}, "augment": {"tp": 0, "fp": 6, "fn": 3}, "coderabbit": {"tp": 0, "fp": 7, "fn": 3}, "kg": {"tp": 0, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 4, "fn": 3}, "devin": {"tp": 0, "fp": 2, "fn": 3}, "sourcery": {"tp": 2, "fp": 2, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 0, "fp": 5, "fn": 3}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 3}, "baz": {"tp": 0, "fp": 1, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 0, "fp": 6, "fn": 3}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}, "cloudaeye": {"tp": 1, "fp": 4, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2", "language": "Python", "pr_size": "medium", "domain": "data_processing", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 3}, "claude": {"tp": 2, "fp": 6, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 2, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 0, "fn": 3}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 3, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 6, "fn": 1}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 4, "fn": 1}, "qodo-extended": {"tp": 3, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 1}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3", "language": "Python", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 3, "fn": 3}, "copilot": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 2, "fp": 1, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 2, "fp": 5, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 5, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 3, "fn": 3}, "codeant-v2": {"tp": 3, "fp": 1, "fn": 0}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/103633", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 1, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 5, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 3, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/67876", "language": "Python", "pr_size": "small", "domain": "authentication", "change_type": "security_patch", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 2, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 2, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 2}, "augment": {"tp": 2, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 5, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 8, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 2}, "baz": {"tp": 2, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/32918", "language": "Java", "pr_size": "small", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "augment": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 3, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 0, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/94942", "language": "Go", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 0, "fn": 0}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 0}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90939", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/80329", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "performance", "complexity": "simple", "difficulty": "obvious", "risk": "low", "context": "local", "concern": "maintainability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 5, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 0}, "copilot": {"tp": 1, "fp": 4, "fn": 0}, "propel": {"tp": 0, "fp": 1, "fn": 1}, "augment": {"tp": 1, "fp": 3, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 0}, "devin": {"tp": 1, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 2, "fn": 0}, "claude-code": {"tp": 1, "fp": 6, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 3, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 0}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90045", "language": "Go", "pr_size": "medium", "domain": "logging", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 4, "fn": 0}, "claude": {"tp": 3, "fp": 3, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 3, "fn": 0}, "copilot": {"tp": 3, "fp": 8, "fn": 0}, "propel": {"tp": 2, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 5, "fn": 0}, "coderabbit": {"tp": 3, "fp": 2, "fn": 0}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 6, "fn": 1}, "devin": {"tp": 3, "fp": 2, "fn": 0}, "sourcery": {"tp": 3, "fp": 1, "fn": 0}, "claude-code": {"tp": 3, "fp": 3, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 3, "fp": 0, "fn": 0}, "baz": {"tp": 2, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/106778", "language": "Go", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 12, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "copilot": {"tp": 1, "fp": 2, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 5, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 6, "fn": 1}, "devin": {"tp": 2, "fp": 1, "fn": 0}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/107534", "language": "Go", "pr_size": "small", "domain": "testing", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "low", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 1}, "augment": {"tp": 0, "fp": 0, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 1}, "sourcery": {"tp": 0, "fp": 3, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 4, "fn": 1}, "cloudaeye": {"tp": 0, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/79265", "language": "Go", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 4}, "claude": {"tp": 1, "fp": 2, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "copilot": {"tp": 3, "fp": 6, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 5}, "augment": {"tp": 1, "fp": 1, "fn": 4}, "propel": {"tp": 1, "fp": 1, "fn": 4}, "coderabbit": {"tp": 3, "fp": 4, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 3}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 2, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 3, "fn": 5}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 4}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 3}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 2}, "macroscope": {"tp": 0, "fp": 1, "fn": 5}, "baz": {"tp": 1, "fp": 0, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 4}, "greptile-v4-1": {"tp": 2, "fp": 4, "fn": 3}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9", "language": "Ruby", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 0, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 0, "fp": 3, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 5, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/76186", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 0, "fp": 2, "fn": 2}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 3, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 1, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 6, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10", "language": "Ruby", "pr_size": "large", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 8, "fn": 2}, "claude": {"tp": 0, "fp": 3, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 4, "fn": 1}, "copilot": {"tp": 2, "fp": 4, "fn": 2}, "propel": {"tp": 3, "fp": 0, "fn": 1}, "augment": {"tp": 3, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 12, "fn": 2}, "kg": {"tp": 0, "fp": 2, "fn": 4}, "qodo-v2": {"tp": 2, "fp": 5, "fn": 2}, "devin": {"tp": 0, "fp": 2, "fn": 4}, "sourcery": {"tp": 3, "fp": 8, "fn": 1}, "claude-code": {"tp": 0, "fp": 6, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 7, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 3, "fp": 5, "fn": 1}, "baz": {"tp": 2, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 3, "fp": 6, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 6, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 3}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "bug_fix", "complexity": "simple", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 2, "fn": 0}, "claude": {"tp": 3, "fp": 4, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 1, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 2}, "coderabbit": {"tp": 0, "fp": 2, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 3, "fp": 2, "fn": 0}, "devin": {"tp": 3, "fp": 2, "fn": 0}, "sourcery": {"tp": 2, "fp": 1, "fn": 1}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 0}, "macroscope": {"tp": 3, "fp": 0, "fn": 0}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 9, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 0, "fn": 1}, "cloudaeye": {"tp": 0, "fp": 1, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8", "language": "Ruby", "pr_size": "medium", "domain": "API", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 4, "fn": 1}, "claude": {"tp": 2, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 3}, "copilot": {"tp": 1, "fp": 8, "fn": 2}, "augment": {"tp": 2, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 8, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 4, "fn": 2}, "claude-code": {"tp": 1, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 3, "fn": 2}, "baz": {"tp": 1, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 8, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3", "language": "Ruby", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 5, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 2, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 2, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 4, "fn": 2}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5", "language": "Ruby", "pr_size": "small", "domain": "UI", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 1, "fn": 2}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6", "language": "Ruby", "pr_size": "small", "domain": "serialization", "change_type": "feature", "complexity": "simple", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 4, "fn": 0}, "copilot": {"tp": 1, "fp": 2, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 0}, "sourcery": {"tp": 0, "fp": 4, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 0}, "qodo-extended-v2": {"tp": 0, "fp": 4, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 4, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4", "language": "Ruby", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 4, "fp": 5, "fn": 2}, "claude": {"tp": 5, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 6}, "bugbot": {"tp": 1, "fp": 4, "fn": 5}, "copilot": {"tp": 1, "fp": 3, "fn": 5}, "augment": {"tp": 3, "fp": 2, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 5}, "coderabbit": {"tp": 4, "fp": 13, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 6}, "qodo-v2": {"tp": 4, "fp": 4, "fn": 2}, "devin": {"tp": 1, "fp": 3, "fn": 5}, "sourcery": {"tp": 2, "fp": 3, "fn": 4}, "claude-code": {"tp": 3, "fp": 1, "fn": 3}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 4}, "qodo-extended": {"tp": 6, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 4}, "macroscope": {"tp": 1, "fp": 5, "fn": 5}, "baz": {"tp": 1, "fp": 3, "fn": 5}, "codeant-v2": {"tp": 2, "fp": 16, "fn": 4}, "qodo-extended-v2": {"tp": 4, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 4}, "cloudaeye": {"tp": 4, "fp": 7, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1", "language": "Ruby", "pr_size": "medium", "domain": "file_io", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 3, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 2, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 4, "fn": 1}, "augment": {"tp": 2, "fp": 3, "fn": 1}, "propel": {"tp": 2, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 5, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 1}, "claude-code": {"tp": 3, "fp": 1, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 4, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 2}, "baz": {"tp": 2, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 6, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/22532", "language": "TypeScript", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 11, "fn": 0}, "augment": {"tp": 2, "fp": 4, "fn": 0}, "propel": {"tp": 1, "fp": 3, "fn": 1}, "coderabbit": {"tp": 1, "fp": 4, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 0, "fp": 5, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 3, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/8330", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 5, "fn": 0}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 2, "fp": 0, "fn": 0}, "bugbot": {"tp": 2, "fp": 1, "fn": 0}, "copilot": {"tp": 2, "fp": 4, "fn": 0}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 0, "fp": 3, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 2, "fp": 0, "fn": 0}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14943", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 0, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 3, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/22345", "language": "TypeScript", "pr_size": "small", "domain": "database", "change_type": "migration", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 0, "fn": 1}, "claude-code": {"tp": 1, "fp": 3, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 2}, "cloudaeye": {"tp": 0, "fp": 0, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/11059", "language": "TypeScript", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 3, "fn": 2}, "claude": {"tp": 3, "fp": 3, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "bugbot": {"tp": 4, "fp": 3, "fn": 1}, "copilot": {"tp": 4, "fp": 10, "fn": 1}, "augment": {"tp": 5, "fp": 5, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 5, "fp": 13, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 4, "fp": 7, "fn": 1}, "devin": {"tp": 4, "fp": 1, "fn": 1}, "sourcery": {"tp": 5, "fp": 0, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended": {"tp": 5, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 2}, "macroscope": {"tp": 4, "fp": 6, "fn": 1}, "baz": {"tp": 4, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 5, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 5, "fn": 2}, "cloudaeye": {"tp": 3, "fp": 2, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/7232", "language": "TypeScript", "pr_size": "medium", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 2}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 5, "fn": 0}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 2, "fp": 10, "fn": 0}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 3, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 4, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14740", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 4}, "claude": {"tp": 2, "fp": 0, "fn": 3}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 3, "fn": 2}, "copilot": {"tp": 3, "fp": 6, "fn": 2}, "augment": {"tp": 4, "fp": 3, "fn": 1}, "propel": {"tp": 4, "fp": 0, "fn": 1}, "coderabbit": {"tp": 4, "fp": 4, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 4}, "qodo-v2": {"tp": 3, "fp": 3, "fn": 2}, "devin": {"tp": 2, "fp": 0, "fn": 3}, "sourcery": {"tp": 3, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 4, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 4, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 4, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 2}, "cloudaeye": {"tp": 4, "fp": 0, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/10600", "language": "TypeScript", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 8, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 9, "fn": 3}, "augment": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "coderabbit": {"tp": 2, "fp": 6, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 4, "fn": 2}, "claude-code": {"tp": 1, "fp": 4, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 1, "fn": 4}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 3}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 2}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/10967", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "bug_fix", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 7, "fn": 4}, "claude": {"tp": 4, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "bugbot": {"tp": 2, "fp": 4, "fn": 3}, "copilot": {"tp": 3, "fp": 8, "fn": 2}, "augment": {"tp": 2, "fp": 3, "fn": 3}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 4, "fp": 6, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 4}, "devin": {"tp": 3, "fp": 1, "fn": 2}, "sourcery": {"tp": 2, "fp": 5, "fn": 3}, "claude-code": {"tp": 2, "fp": 6, "fn": 3}, "kodus-v2": {"tp": 3, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 6, "fn": 3}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 4}, "macroscope": {"tp": 2, "fp": 3, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 4}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 4}, "cloudaeye": {"tp": 3, "fp": 5, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/8087", "language": "TypeScript", "pr_size": "medium", "domain": "concurrency", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 6, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 2, "fp": 5, "fn": 0}, "kg": {"tp": 1, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 2, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 6, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 9, "fn": 0}, "macroscope": {"tp": 1, "fp": 4, "fn": 1}, "baz": {"tp": 1, "fp": 3, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 4, "fn": 1}}}], "tools": ["augment", "baz", "bugbot", "claude", "claude-code", "cloudaeye", "codeant-v2", "coderabbit", "copilot", "cubic-v2", "devin", "gemini", "graphite", "greptile-v4-1", "kg", "kodus-v2", "macroscope", "propel", "qodo-extended", "qodo-extended-v2", "qodo-v2", "sourcery"], "dimensions": {"language": ["Go", "Java", "Python", "Ruby", "TypeScript"], "pr_size": ["small", "medium", "large"], "domain": ["API", "UI", "authentication", "caching", "concurrency", "configuration", "data_processing", "database", "file_io", "logging", "scheduling", "serialization", "testing"], "change_type": ["bug_fix", "feature", "migration", "performance", "refactoring", "security_patch"], "complexity": ["simple", "moderate", "complex"], "difficulty": ["obvious", "moderate", "subtle", "very_subtle"], "risk": ["low", "medium", "high", "critical"], "context": ["local", "file", "cross_file", "system"], "concern": ["correctness", "maintainability", "reliability", "security"]}, "overall_metrics": {"graphite": {"precision": 100.0, "recall": 8.8, "f1": 16.1, "tp": 12, "fp": 0, "fn": 125, "num_prs": 50}, "devin": {"precision": 54.2, "recall": 38.0, "f1": 44.6, "tp": 52, "fp": 44, "fn": 85, "num_prs": 50}, "qodo-extended": {"precision": 35.3, "recall": 61.3, "f1": 44.8, "tp": 84, "fp": 154, "fn": 53, "num_prs": 50}, "sourcery": {"precision": 33.3, "recall": 51.8, "f1": 40.6, "tp": 71, "fp": 142, "fn": 66, "num_prs": 50}, "kodus-v2": {"precision": 44.9, "recall": 35.0, "f1": 39.3, "tp": 48, "fp": 59, "fn": 89, "num_prs": 50}, "baz": {"precision": 49.0, "recall": 35.8, "f1": 41.4, "tp": 49, "fp": 51, "fn": 88, "num_prs": 50}, "cubic-v2": {"precision": 55.6, "recall": 68.6, "f1": 61.4, "tp": 94, "fp": 75, "fn": 43, "num_prs": 50}, "augment": {"precision": 46.0, "recall": 63.5, "f1": 53.4, "tp": 87, "fp": 102, "fn": 50, "num_prs": 50}, "qodo-extended-v2": {"precision": 52.5, "recall": 60.6, "f1": 56.3, "tp": 83, "fp": 75, "fn": 54, "num_prs": 50}, "propel": {"precision": 52.6, "recall": 36.5, "f1": 43.1, "tp": 50, "fp": 45, "fn": 87, "num_prs": 50}, "bugbot": {"precision": 45.4, "recall": 43.1, "f1": 44.2, "tp": 59, "fp": 71, "fn": 78, "num_prs": 50}, "greptile-v4-1": {"precision": 36.5, "recall": 45.3, "f1": 40.4, "tp": 62, "fp": 108, "fn": 75, "num_prs": 50}, "qodo-v2": {"precision": 40.5, "recall": 56.2, "f1": 47.1, "tp": 77, "fp": 113, "fn": 60, "num_prs": 50}, "claude-code": {"precision": 30.7, "recall": 40.1, "f1": 34.8, "tp": 55, "fp": 124, "fn": 82, "num_prs": 50}, "gemini": {"precision": 29.7, "recall": 35.8, "f1": 32.5, "tp": 49, "fp": 116, "fn": 88, "num_prs": 50}, "coderabbit": {"precision": 27.5, "recall": 56.9, "f1": 37.1, "tp": 78, "fp": 206, "fn": 59, "num_prs": 50}, "macroscope": {"precision": 45.8, "recall": 43.8, "f1": 44.8, "tp": 60, "fp": 71, "fn": 77, "num_prs": 50}, "claude": {"precision": 35.7, "recall": 40.1, "f1": 37.8, "tp": 55, "fp": 99, "fn": 82, "num_prs": 50}, "cloudaeye": {"precision": 59.6, "recall": 72.3, "f1": 65.3, "tp": 99, "fp": 67, "fn": 38, "num_prs": 50}, "codeant-v2": {"precision": 31.1, "recall": 36.5, "f1": 33.6, "tp": 50, "fp": 111, "fn": 87, "num_prs": 50}, "kg": {"precision": 44.7, "recall": 15.3, "f1": 22.8, "tp": 21, "fp": 26, "fn": 116, "num_prs": 50}, "copilot": {"precision": 27.2, "recall": 51.1, "f1": 35.5, "tp": 70, "fp": 187, "fn": 67, "num_prs": 50}}}, "openai_gpt-5.2": {"prs": [{"url": "https://github.com/keycloak/keycloak/pull/37429", "language": "Java", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"graphite": {"tp": 0, "fp": 0, "fn": 4}, "gemini": {"tp": 0, "fp": 5, "fn": 4}, "claude": {"tp": 1, "fp": 0, "fn": 3}, "augment": {"tp": 2, "fp": 3, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 3, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 3, "fn": 4}, "devin": {"tp": 2, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 14, "fn": 3}, "claude-code": {"tp": 1, "fp": 5, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 3, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 3}, "macroscope": {"tp": 0, "fp": 2, "fn": 4}, "baz": {"tp": 1, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 2, "fn": 4}, "propel-v2": {"tp": 1, "fp": 3, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 2}, "coderabbit": {"tp": 3, "fp": 7, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 3}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 3}}}, {"url": "https://github.com/keycloak/keycloak/pull/37634", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 2, "fp": 2, "fn": 2}, "gemini": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 2, "fp": 4, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 2, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 2, "fn": 2}, "copilot": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 2, "fp": 1, "fn": 2}, "sourcery": {"tp": 2, "fp": 1, "fn": 2}, "claude-code": {"tp": 0, "fp": 3, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 2, "fn": 2}, "baz": {"tp": 2, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 5, "fn": 2}, "propel-v2": {"tp": 2, "fp": 2, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 2}, "coderabbit": {"tp": 3, "fp": 7, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 3, "fp": 0, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/38446", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 6, "fn": 2}, "augment": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "bugbot": {"tp": 0, "fp": 3, "fn": 2}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 6, "fn": 1}, "claude-code": {"tp": 1, "fp": 6, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 1}, "macroscope": {"tp": 0, "fp": 4, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 3, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/36882", "language": "Java", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 1, "fn": 1}, "gemini": {"tp": 0, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 1}, "copilot": {"tp": 0, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "augment": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 1}, "sourcery": {"tp": 0, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 3, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 2, "fn": 1}, "propel-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 0, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/36880", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 0, "fp": 6, "fn": 3}, "gemini": {"tp": 0, "fp": 3, "fn": 3}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 2}, "bugbot": {"tp": 3, "fp": 2, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 6, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 2}, "devin": {"tp": 0, "fp": 1, "fn": 3}, "sourcery": {"tp": 1, "fp": 5, "fn": 2}, "claude-code": {"tp": 0, "fp": 1, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 4, "fn": 3}, "qodo-extended": {"tp": 3, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 2}, "macroscope": {"tp": 2, "fp": 4, "fn": 1}, "baz": {"tp": 2, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 6, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 3}, "cloudaeye": {"tp": 3, "fp": 2, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/37038", "language": "Java", "pr_size": "small", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "very_subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 2, "fp": 2, "fn": 0}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 2, "fp": 2, "fn": 0}, "baz": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 8, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 4, "fn": 0}}}, {"url": "https://github.com/keycloak/keycloak/pull/33832", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"claude": {"tp": 2, "fp": 4, "fn": 0}, "gemini": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 1, "fp": 1, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 6, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 5, "fn": 1}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/40940", "language": "Java", "pr_size": "small", "domain": "concurrency", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 2, "fp": 2, "fn": 0}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1", "language": "Java", "pr_size": "medium", "domain": "authentication", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 2, "fp": 0, "fn": 0}, "gemini": {"tp": 2, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "copilot": {"tp": 2, "fp": 4, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 2, "fp": 3, "fn": 0}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 0, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 2, "fp": 3, "fn": 0}, "baz": {"tp": 2, "fp": 2, "fn": 0}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/93824", "language": "Python", "pr_size": "large", "domain": "concurrency", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"augment": {"tp": 5, "fp": 1, "fn": 0}, "gemini": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 4}, "graphite": {"tp": 1, "fp": 0, "fn": 4}, "claude": {"tp": 0, "fp": 3, "fn": 5}, "copilot": {"tp": 1, "fp": 5, "fn": 4}, "propel": {"tp": 2, "fp": 4, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 4, "fp": 3, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 4}, "sourcery": {"tp": 4, "fp": 2, "fn": 1}, "claude-code": {"tp": 3, "fp": 1, "fn": 2}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 5}, "qodo-extended": {"tp": 1, "fp": 4, "fn": 4}, "cubic-v2": {"tp": 4, "fp": 1, "fn": 1}, "macroscope": {"tp": 3, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 2, "fn": 5}, "propel-v2": {"tp": 2, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 2, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 3}, "coderabbit": {"tp": 1, "fp": 3, "fn": 4}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 4}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5", "language": "Python", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 3}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 3}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 4, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 2}, "devin": {"tp": 1, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 4, "fn": 3}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 3, "fn": 2}, "macroscope": {"tp": 0, "fp": 5, "fn": 3}, "baz": {"tp": 1, "fp": 3, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 1, "fp": 16, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1", "language": "Python", "pr_size": "medium", "domain": "API", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 3}, "claude": {"tp": 2, "fp": 7, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "copilot": {"tp": 3, "fp": 13, "fn": 1}, "bugbot": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 1, "fn": 1}, "propel": {"tp": 3, "fp": 1, "fn": 1}, "coderabbit": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 2}, "devin": {"tp": 3, "fp": 2, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 6, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 2}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 3}, "propel-v2": {"tp": 2, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 2, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/97529", "language": "Go", "pr_size": "small", "domain": "concurrency", "change_type": "performance", "complexity": "complex", "difficulty": "very_subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 3, "fn": 1}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 2, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 4, "fn": 0}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "propel-v2": {"tp": 1, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 2, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80168", "language": "Python", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 2, "fn": 0}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 2, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/80528", "language": "Python", "pr_size": "small", "domain": "scheduling", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 1, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 0, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/getsentry/sentry/pull/77754", "language": "Python", "pr_size": "medium", "domain": "serialization", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 3}, "claude": {"tp": 2, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 1, "fn": 3}, "copilot": {"tp": 2, "fp": 0, "fn": 2}, "propel": {"tp": 2, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 0, "fn": 3}, "coderabbit": {"tp": 3, "fp": 2, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 3, "fn": 2}, "claude-code": {"tp": 1, "fp": 0, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 0, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 3}, "baz": {"tp": 1, "fp": 0, "fn": 3}, "propel-v2": {"tp": 1, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 3}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 3}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/95633", "language": "Python", "pr_size": "medium", "domain": "concurrency", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 2, "fn": 3}, "copilot": {"tp": 0, "fp": 6, "fn": 3}, "propel": {"tp": 0, "fp": 0, "fn": 3}, "augment": {"tp": 0, "fp": 7, "fn": 3}, "coderabbit": {"tp": 0, "fp": 7, "fn": 3}, "kg": {"tp": 0, "fp": 2, "fn": 3}, "qodo-v2": {"tp": 0, "fp": 4, "fn": 3}, "devin": {"tp": 0, "fp": 4, "fn": 3}, "sourcery": {"tp": 2, "fp": 4, "fn": 1}, "claude-code": {"tp": 0, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 3}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 3}, "baz": {"tp": 0, "fp": 3, "fn": 3}, "propel-v2": {"tp": 0, "fp": 2, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 1, "fn": 3}, "qodo-extended-v2": {"tp": 0, "fp": 8, "fn": 3}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 3}, "cloudaeye": {"tp": 1, "fp": 4, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2", "language": "Python", "pr_size": "medium", "domain": "data_processing", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 3}, "claude": {"tp": 2, "fp": 5, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 3, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 3, "fn": 1}, "coderabbit": {"tp": 0, "fp": 0, "fn": 3}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 3, "fp": 1, "fn": 0}, "devin": {"tp": 0, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 10, "fn": 1}, "claude-code": {"tp": 3, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 5, "fn": 1}, "qodo-extended": {"tp": 3, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 1, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 1}, "baz": {"tp": 3, "fp": 2, "fn": 0}, "propel-v2": {"tp": 1, "fp": 5, "fn": 2}, "codeant-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 4, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3", "language": "Python", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 4, "fn": 3}, "claude": {"tp": 1, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 0, "fp": 3, "fn": 3}, "copilot": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 2}, "propel": {"tp": 1, "fp": 1, "fn": 2}, "coderabbit": {"tp": 2, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 6, "fn": 1}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 6, "fn": 3}, "propel-v2": {"tp": 1, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 3, "fp": 1, "fn": 0}, "qodo-extended-v2": {"tp": 3, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/103633", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 6, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 6, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 3, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 0, "fp": 3, "fn": 2}, "baz": {"tp": 0, "fp": 3, "fn": 2}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/getsentry/sentry/pull/67876", "language": "Python", "pr_size": "small", "domain": "authentication", "change_type": "security_patch", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 2}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 2, "fn": 2}, "copilot": {"tp": 1, "fp": 4, "fn": 2}, "augment": {"tp": 1, "fp": 2, "fn": 2}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "coderabbit": {"tp": 1, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 5, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 2}, "sourcery": {"tp": 1, "fp": 9, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 2}, "propel-v2": {"tp": 1, "fp": 3, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 2, "fn": 3}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 1}}}, {"url": "https://github.com/keycloak/keycloak/pull/32918", "language": "Java", "pr_size": "small", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 0, "fp": 2, "fn": 2}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 0, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/94942", "language": "Go", "pr_size": "small", "domain": "data_processing", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 2, "fp": 9, "fn": 0}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "coderabbit": {"tp": 1, "fp": 2, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 0}, "qodo-v2": {"tp": 2, "fp": 0, "fn": 0}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 1, "fp": 3, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 2, "fp": 1, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 0, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90939", "language": "Go", "pr_size": "small", "domain": "caching", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 0, "fn": 1}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "augment": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 1, "fp": 1, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 0, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 2, "fp": 0, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 0, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/80329", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "performance", "complexity": "simple", "difficulty": "obvious", "risk": "low", "context": "local", "concern": "maintainability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 0}, "claude": {"tp": 1, "fp": 5, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 4, "fn": 0}, "copilot": {"tp": 1, "fp": 4, "fn": 0}, "propel": {"tp": 0, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 0}, "coderabbit": {"tp": 1, "fp": 5, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 6, "fn": 0}, "devin": {"tp": 1, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 3, "fn": 0}, "claude-code": {"tp": 1, "fp": 7, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 3, "fn": 0}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 2, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 0}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/90045", "language": "Go", "pr_size": "medium", "domain": "logging", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 5, "fn": 0}, "claude": {"tp": 3, "fp": 4, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 2, "fn": 0}, "copilot": {"tp": 3, "fp": 8, "fn": 0}, "propel": {"tp": 1, "fp": 2, "fn": 2}, "augment": {"tp": 3, "fp": 5, "fn": 0}, "coderabbit": {"tp": 3, "fp": 2, "fn": 0}, "kg": {"tp": 2, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 6, "fn": 1}, "devin": {"tp": 3, "fp": 2, "fn": 0}, "sourcery": {"tp": 3, "fp": 0, "fn": 0}, "claude-code": {"tp": 3, "fp": 5, "fn": 0}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 0}, "macroscope": {"tp": 3, "fp": 0, "fn": 0}, "baz": {"tp": 2, "fp": 2, "fn": 1}, "propel-v2": {"tp": 3, "fp": 4, "fn": 0}, "codeant-v2": {"tp": 2, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 3, "fp": 4, "fn": 0}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/106778", "language": "Go", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 0, "fp": 9, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 2, "fp": 0, "fn": 0}, "copilot": {"tp": 1, "fp": 1, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 1, "fp": 2, "fn": 1}, "coderabbit": {"tp": 1, "fp": 7, "fn": 1}, "kg": {"tp": 1, "fp": 1, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 1}, "devin": {"tp": 2, "fp": 1, "fn": 0}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 5, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/107534", "language": "Go", "pr_size": "small", "domain": "testing", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "low", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 2, "fn": 1}, "copilot": {"tp": 0, "fp": 4, "fn": 1}, "augment": {"tp": 0, "fp": 0, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 1, "fn": 1}, "kg": {"tp": 0, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 2, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 1}, "sourcery": {"tp": 0, "fp": 3, "fn": 1}, "claude-code": {"tp": 0, "fp": 7, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 1}, "propel-v2": {"tp": 0, "fp": 2, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 3, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 1}, "cloudaeye": {"tp": 0, "fp": 0, "fn": 1}}}, {"url": "https://github.com/grafana/grafana/pull/79265", "language": "Go", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 4}, "claude": {"tp": 1, "fp": 2, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "copilot": {"tp": 3, "fp": 6, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 5}, "augment": {"tp": 1, "fp": 1, "fn": 4}, "propel": {"tp": 1, "fp": 1, "fn": 4}, "coderabbit": {"tp": 4, "fp": 5, "fn": 1}, "kg": {"tp": 1, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 4}, "devin": {"tp": 1, "fp": 1, "fn": 4}, "sourcery": {"tp": 3, "fp": 3, "fn": 2}, "claude-code": {"tp": 0, "fp": 4, "fn": 5}, "kodus-v2": {"tp": 2, "fp": 2, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 5, "fn": 4}, "cubic-v2": {"tp": 3, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 2, "fn": 5}, "baz": {"tp": 1, "fp": 0, "fn": 4}, "propel-v2": {"tp": 1, "fp": 2, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 4}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 4}, "greptile-v4-1": {"tp": 2, "fp": 4, "fn": 3}, "cloudaeye": {"tp": 4, "fp": 0, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9", "language": "Ruby", "pr_size": "small", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 0, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 0, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 1}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 3, "fn": 2}, "coderabbit": {"tp": 0, "fp": 2, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 1, "fn": 2}, "sourcery": {"tp": 0, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 6, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 2, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 1, "fn": 0}}}, {"url": "https://github.com/grafana/grafana/pull/76186", "language": "Go", "pr_size": "small", "domain": "logging", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 3, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "copilot": {"tp": 0, "fp": 0, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 0, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 0, "fp": 6, "fn": 2}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 2, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 0}, "macroscope": {"tp": 0, "fp": 1, "fn": 2}, "baz": {"tp": 0, "fp": 0, "fn": 2}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 1, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 2, "fp": 5, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10", "language": "Ruby", "pr_size": "large", "domain": "configuration", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 10, "fn": 3}, "claude": {"tp": 0, "fp": 3, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 3, "fp": 4, "fn": 1}, "copilot": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 3, "fp": 1, "fn": 1}, "augment": {"tp": 3, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 17, "fn": 2}, "kg": {"tp": 0, "fp": 2, "fn": 4}, "qodo-v2": {"tp": 2, "fp": 8, "fn": 2}, "devin": {"tp": 0, "fp": 2, "fn": 4}, "sourcery": {"tp": 4, "fp": 11, "fn": 0}, "claude-code": {"tp": 0, "fp": 6, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 10, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 2, "fp": 5, "fn": 2}, "baz": {"tp": 2, "fp": 1, "fn": 2}, "propel-v2": {"tp": 3, "fp": 5, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 7, "fn": 2}, "qodo-extended-v2": {"tp": 3, "fp": 7, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 4, "fn": 3}, "cloudaeye": {"tp": 2, "fp": 2, "fn": 2}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "bug_fix", "complexity": "simple", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 2, "fn": 0}, "claude": {"tp": 3, "fp": 5, "fn": 0}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 3, "fp": 1, "fn": 0}, "copilot": {"tp": 3, "fp": 1, "fn": 0}, "augment": {"tp": 3, "fp": 2, "fn": 0}, "propel": {"tp": 2, "fp": 1, "fn": 1}, "coderabbit": {"tp": 0, "fp": 2, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 3, "fp": 1, "fn": 0}, "devin": {"tp": 3, "fp": 3, "fn": 0}, "sourcery": {"tp": 1, "fp": 2, "fn": 2}, "claude-code": {"tp": 3, "fp": 4, "fn": 0}, "kodus-v2": {"tp": 3, "fp": 0, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 1, "fn": 1}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 2}, "baz": {"tp": 3, "fp": 1, "fn": 0}, "propel-v2": {"tp": 0, "fp": 0, "fn": 3}, "codeant-v2": {"tp": 0, "fp": 9, "fn": 3}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 2}, "greptile-v4-1": {"tp": 1, "fp": 0, "fn": 2}, "cloudaeye": {"tp": 0, "fp": 1, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8", "language": "Ruby", "pr_size": "medium", "domain": "API", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 2}, "claude": {"tp": 2, "fp": 5, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 1, "fp": 1, "fn": 2}, "copilot": {"tp": 1, "fp": 7, "fn": 2}, "augment": {"tp": 2, "fp": 3, "fn": 1}, "propel": {"tp": 1, "fp": 4, "fn": 2}, "coderabbit": {"tp": 2, "fp": 8, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 1, "fp": 2, "fn": 2}, "devin": {"tp": 1, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 4, "fn": 2}, "claude-code": {"tp": 1, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 5, "fn": 2}, "qodo-extended": {"tp": 3, "fp": 1, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 3, "fn": 2}, "baz": {"tp": 1, "fp": 2, "fn": 2}, "propel-v2": {"tp": 2, "fp": 5, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 8, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 2, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3", "language": "Ruby", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 4, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "copilot": {"tp": 0, "fp": 2, "fn": 2}, "bugbot": {"tp": 0, "fp": 2, "fn": 2}, "augment": {"tp": 1, "fp": 5, "fn": 1}, "propel": {"tp": 1, "fp": 1, "fn": 1}, "coderabbit": {"tp": 1, "fp": 8, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 5, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 6, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 6, "fn": 0}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 0, "fn": 0}, "baz": {"tp": 0, "fp": 5, "fn": 2}, "propel-v2": {"tp": 0, "fp": 1, "fn": 2}, "codeant-v2": {"tp": 1, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 3, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5", "language": "Ruby", "pr_size": "small", "domain": "UI", "change_type": "performance", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 1, "fn": 2}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 0, "fp": 3, "fn": 2}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 1, "fn": 2}, "coderabbit": {"tp": 0, "fp": 1, "fn": 2}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 0, "fp": 2, "fn": 2}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 0}, "macroscope": {"tp": 0, "fp": 0, "fn": 2}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "propel-v2": {"tp": 0, "fp": 0, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6", "language": "Ruby", "pr_size": "small", "domain": "serialization", "change_type": "feature", "complexity": "simple", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 1}, "claude": {"tp": 0, "fp": 0, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 1}, "bugbot": {"tp": 0, "fp": 3, "fn": 1}, "copilot": {"tp": 1, "fp": 2, "fn": 0}, "augment": {"tp": 0, "fp": 3, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 1}, "coderabbit": {"tp": 0, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 0, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 3, "fn": 1}, "sourcery": {"tp": 0, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 3, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 4, "fn": 1}, "qodo-extended": {"tp": 0, "fp": 7, "fn": 1}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 1}, "macroscope": {"tp": 0, "fp": 2, "fn": 1}, "baz": {"tp": 0, "fp": 5, "fn": 1}, "propel-v2": {"tp": 0, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 1}, "qodo-extended-v2": {"tp": 0, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 0, "fp": 4, "fn": 1}, "cloudaeye": {"tp": 0, "fp": 1, "fn": 1}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4", "language": "Ruby", "pr_size": "large", "domain": "API", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 7, "fn": 3}, "claude": {"tp": 2, "fp": 5, "fn": 4}, "graphite": {"tp": 0, "fp": 0, "fn": 6}, "bugbot": {"tp": 1, "fp": 4, "fn": 5}, "copilot": {"tp": 1, "fp": 5, "fn": 5}, "augment": {"tp": 2, "fp": 3, "fn": 4}, "propel": {"tp": 1, "fp": 1, "fn": 5}, "coderabbit": {"tp": 3, "fp": 24, "fn": 3}, "kg": {"tp": 0, "fp": 0, "fn": 6}, "qodo-v2": {"tp": 3, "fp": 14, "fn": 3}, "devin": {"tp": 1, "fp": 3, "fn": 5}, "sourcery": {"tp": 1, "fp": 4, "fn": 5}, "claude-code": {"tp": 2, "fp": 4, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 3, "fn": 4}, "qodo-extended": {"tp": 4, "fp": 7, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 4, "fn": 5}, "macroscope": {"tp": 1, "fp": 5, "fn": 5}, "baz": {"tp": 1, "fp": 4, "fn": 5}, "propel-v2": {"tp": 2, "fp": 7, "fn": 4}, "codeant-v2": {"tp": 1, "fp": 17, "fn": 5}, "qodo-extended-v2": {"tp": 3, "fp": 3, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 3, "fn": 4}, "cloudaeye": {"tp": 3, "fp": 8, "fn": 3}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1", "language": "Ruby", "pr_size": "medium", "domain": "file_io", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 4, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 3}, "bugbot": {"tp": 2, "fp": 2, "fn": 1}, "copilot": {"tp": 2, "fp": 7, "fn": 1}, "augment": {"tp": 2, "fp": 4, "fn": 1}, "propel": {"tp": 2, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 5, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 2, "fp": 4, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 3, "fn": 1}, "claude-code": {"tp": 3, "fp": 3, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 3, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 2}, "baz": {"tp": 2, "fp": 1, "fn": 1}, "propel-v2": {"tp": 2, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 1, "fn": 0}}}, {"url": "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2", "language": "Ruby", "pr_size": "medium", "domain": "UI", "change_type": "feature", "complexity": "moderate", "difficulty": "moderate", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 0, "fn": 0}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 2, "fp": 3, "fn": 0}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 1, "fp": 0, "fn": 1}, "coderabbit": {"tp": 2, "fp": 7, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 1, "fp": 5, "fn": 1}, "claude-code": {"tp": 0, "fp": 5, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended": {"tp": 1, "fp": 2, "fn": 1}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 0, "fn": 1}, "propel-v2": {"tp": 1, "fp": 3, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 2, "fn": 1}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 1, "fp": 1, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/22532", "language": "TypeScript", "pr_size": "medium", "domain": "caching", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 1}, "claude": {"tp": 0, "fp": 1, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 0, "fp": 1, "fn": 2}, "copilot": {"tp": 2, "fp": 11, "fn": 0}, "augment": {"tp": 2, "fp": 4, "fn": 0}, "propel": {"tp": 1, "fp": 3, "fn": 1}, "coderabbit": {"tp": 1, "fp": 4, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 1}, "devin": {"tp": 1, "fp": 1, "fn": 1}, "sourcery": {"tp": 1, "fp": 3, "fn": 1}, "claude-code": {"tp": 1, "fp": 4, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 0, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 0, "fp": 2, "fn": 2}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "propel-v2": {"tp": 1, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 2, "fp": 4, "fn": 0}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 0, "fp": 5, "fn": 2}, "cloudaeye": {"tp": 1, "fp": 2, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/8330", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "bug_fix", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 2, "fp": 4, "fn": 0}, "claude": {"tp": 2, "fp": 0, "fn": 0}, "graphite": {"tp": 2, "fp": 0, "fn": 0}, "bugbot": {"tp": 2, "fp": 1, "fn": 0}, "copilot": {"tp": 2, "fp": 4, "fn": 0}, "augment": {"tp": 2, "fp": 3, "fn": 0}, "propel": {"tp": 1, "fp": 2, "fn": 1}, "coderabbit": {"tp": 2, "fp": 2, "fn": 0}, "kg": {"tp": 1, "fp": 2, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 2, "fn": 0}, "devin": {"tp": 2, "fp": 0, "fn": 0}, "sourcery": {"tp": 2, "fp": 3, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 0}, "kodus-v2": {"tp": 2, "fp": 1, "fn": 0}, "qodo-extended": {"tp": 2, "fp": 4, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 2, "fp": 1, "fn": 0}, "baz": {"tp": 2, "fp": 1, "fn": 0}, "propel-v2": {"tp": 2, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended-v2": {"tp": 2, "fp": 2, "fn": 0}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 0}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14943", "language": "TypeScript", "pr_size": "small", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 2, "fn": 2}, "claude": {"tp": 1, "fp": 0, "fn": 1}, "graphite": {"tp": 1, "fp": 0, "fn": 1}, "bugbot": {"tp": 1, "fp": 2, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 2, "fp": 1, "fn": 0}, "propel": {"tp": 2, "fp": 0, "fn": 0}, "coderabbit": {"tp": 2, "fp": 1, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 2, "fp": 1, "fn": 0}, "devin": {"tp": 1, "fp": 0, "fn": 1}, "sourcery": {"tp": 2, "fp": 1, "fn": 0}, "claude-code": {"tp": 0, "fp": 2, "fn": 2}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 2, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 0, "fn": 0}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 1, "fp": 3, "fn": 1}, "propel-v2": {"tp": 1, "fp": 0, "fn": 1}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 1, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 0, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/22345", "language": "TypeScript", "pr_size": "small", "domain": "database", "change_type": "migration", "complexity": "moderate", "difficulty": "subtle", "risk": "medium", "context": "file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 0, "fp": 5, "fn": 2}, "claude": {"tp": 0, "fp": 0, "fn": 2}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 0, "fp": 5, "fn": 2}, "augment": {"tp": 1, "fp": 3, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 1, "fp": 3, "fn": 1}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 1, "fp": 3, "fn": 1}, "devin": {"tp": 0, "fp": 0, "fn": 2}, "sourcery": {"tp": 1, "fp": 1, "fn": 1}, "claude-code": {"tp": 1, "fp": 5, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 0, "fp": 6, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 1, "fn": 1}, "macroscope": {"tp": 1, "fp": 0, "fn": 1}, "baz": {"tp": 0, "fp": 2, "fn": 2}, "propel-v2": {"tp": 0, "fp": 2, "fn": 2}, "codeant-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended-v2": {"tp": 0, "fp": 0, "fn": 2}, "greptile-v4-1": {"tp": 0, "fp": 3, "fn": 2}, "cloudaeye": {"tp": 0, "fp": 0, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/11059", "language": "TypeScript", "pr_size": "large", "domain": "authentication", "change_type": "feature", "complexity": "complex", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 3, "fp": 3, "fn": 2}, "claude": {"tp": 3, "fp": 3, "fn": 2}, "graphite": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 4, "fp": 4, "fn": 1}, "copilot": {"tp": 5, "fp": 12, "fn": 0}, "augment": {"tp": 5, "fp": 8, "fn": 0}, "propel": {"tp": 1, "fp": 2, "fn": 4}, "coderabbit": {"tp": 5, "fp": 16, "fn": 0}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 5, "fp": 6, "fn": 0}, "devin": {"tp": 4, "fp": 1, "fn": 1}, "sourcery": {"tp": 5, "fp": 0, "fn": 0}, "claude-code": {"tp": 2, "fp": 2, "fn": 3}, "kodus-v2": {"tp": 3, "fp": 6, "fn": 2}, "qodo-extended": {"tp": 5, "fp": 8, "fn": 0}, "cubic-v2": {"tp": 3, "fp": 0, "fn": 2}, "macroscope": {"tp": 4, "fp": 6, "fn": 1}, "baz": {"tp": 4, "fp": 1, "fn": 1}, "propel-v2": {"tp": 5, "fp": 3, "fn": 0}, "codeant-v2": {"tp": 3, "fp": 6, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 5, "fn": 1}, "greptile-v4-1": {"tp": 4, "fp": 5, "fn": 1}, "cloudaeye": {"tp": 3, "fp": 3, "fn": 2}}}, {"url": "https://github.com/calcom/cal.com/pull/7232", "language": "TypeScript", "pr_size": "medium", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 2, "fn": 1}, "claude": {"tp": 1, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 3, "fn": 1}, "copilot": {"tp": 2, "fp": 6, "fn": 0}, "augment": {"tp": 1, "fp": 2, "fn": 1}, "propel": {"tp": 0, "fp": 2, "fn": 2}, "coderabbit": {"tp": 2, "fp": 12, "fn": 0}, "kg": {"tp": 0, "fp": 1, "fn": 2}, "qodo-v2": {"tp": 2, "fp": 3, "fn": 0}, "devin": {"tp": 1, "fp": 4, "fn": 1}, "sourcery": {"tp": 1, "fp": 2, "fn": 1}, "claude-code": {"tp": 1, "fp": 5, "fn": 1}, "kodus-v2": {"tp": 1, "fp": 5, "fn": 1}, "qodo-extended": {"tp": 2, "fp": 3, "fn": 0}, "cubic-v2": {"tp": 2, "fp": 3, "fn": 0}, "macroscope": {"tp": 1, "fp": 2, "fn": 1}, "baz": {"tp": 1, "fp": 1, "fn": 1}, "propel-v2": {"tp": 2, "fp": 5, "fn": 0}, "codeant-v2": {"tp": 0, "fp": 3, "fn": 2}, "qodo-extended-v2": {"tp": 2, "fp": 1, "fn": 0}, "greptile-v4-1": {"tp": 1, "fp": 2, "fn": 1}, "cloudaeye": {"tp": 2, "fp": 4, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/14740", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 3, "fn": 4}, "claude": {"tp": 2, "fp": 0, "fn": 3}, "graphite": {"tp": 1, "fp": 1, "fn": 4}, "bugbot": {"tp": 3, "fp": 3, "fn": 2}, "copilot": {"tp": 3, "fp": 8, "fn": 2}, "augment": {"tp": 4, "fp": 3, "fn": 1}, "propel": {"tp": 4, "fp": 0, "fn": 1}, "coderabbit": {"tp": 4, "fp": 7, "fn": 1}, "kg": {"tp": 2, "fp": 0, "fn": 3}, "qodo-v2": {"tp": 4, "fp": 4, "fn": 1}, "devin": {"tp": 2, "fp": 0, "fn": 3}, "sourcery": {"tp": 3, "fp": 2, "fn": 2}, "claude-code": {"tp": 1, "fp": 3, "fn": 4}, "kodus-v2": {"tp": 2, "fp": 0, "fn": 3}, "qodo-extended": {"tp": 4, "fp": 4, "fn": 1}, "cubic-v2": {"tp": 4, "fp": 2, "fn": 1}, "macroscope": {"tp": 2, "fp": 4, "fn": 3}, "baz": {"tp": 1, "fp": 1, "fn": 4}, "propel-v2": {"tp": 4, "fp": 1, "fn": 1}, "codeant-v2": {"tp": 3, "fp": 4, "fn": 2}, "qodo-extended-v2": {"tp": 4, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 3, "fp": 2, "fn": 2}, "cloudaeye": {"tp": 4, "fp": 0, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/10600", "language": "TypeScript", "pr_size": "medium", "domain": "authentication", "change_type": "feature", "complexity": "moderate", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "security", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 5, "fn": 3}, "claude": {"tp": 1, "fp": 12, "fn": 3}, "graphite": {"tp": 0, "fp": 0, "fn": 4}, "bugbot": {"tp": 1, "fp": 0, "fn": 3}, "copilot": {"tp": 1, "fp": 9, "fn": 3}, "augment": {"tp": 1, "fp": 4, "fn": 3}, "propel": {"tp": 1, "fp": 1, "fn": 3}, "coderabbit": {"tp": 2, "fp": 7, "fn": 2}, "kg": {"tp": 0, "fp": 0, "fn": 4}, "qodo-v2": {"tp": 1, "fp": 6, "fn": 3}, "devin": {"tp": 1, "fp": 0, "fn": 3}, "sourcery": {"tp": 2, "fp": 5, "fn": 2}, "claude-code": {"tp": 1, "fp": 5, "fn": 3}, "kodus-v2": {"tp": 1, "fp": 1, "fn": 3}, "qodo-extended": {"tp": 1, "fp": 6, "fn": 3}, "cubic-v2": {"tp": 2, "fp": 2, "fn": 2}, "macroscope": {"tp": 0, "fp": 3, "fn": 4}, "baz": {"tp": 0, "fp": 1, "fn": 4}, "propel-v2": {"tp": 1, "fp": 2, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 4, "fn": 3}, "qodo-extended-v2": {"tp": 1, "fp": 3, "fn": 3}, "greptile-v4-1": {"tp": 2, "fp": 1, "fn": 2}, "cloudaeye": {"tp": 4, "fp": 1, "fn": 0}}}, {"url": "https://github.com/calcom/cal.com/pull/10967", "language": "TypeScript", "pr_size": "large", "domain": "scheduling", "change_type": "bug_fix", "complexity": "complex", "difficulty": "subtle", "risk": "high", "context": "cross_file", "concern": "correctness", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 8, "fn": 4}, "claude": {"tp": 4, "fp": 3, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 5}, "bugbot": {"tp": 2, "fp": 4, "fn": 3}, "copilot": {"tp": 3, "fp": 8, "fn": 2}, "augment": {"tp": 2, "fp": 3, "fn": 3}, "propel": {"tp": 1, "fp": 0, "fn": 4}, "coderabbit": {"tp": 4, "fp": 11, "fn": 1}, "kg": {"tp": 0, "fp": 0, "fn": 5}, "qodo-v2": {"tp": 1, "fp": 4, "fn": 4}, "devin": {"tp": 3, "fp": 2, "fn": 2}, "sourcery": {"tp": 2, "fp": 2, "fn": 3}, "claude-code": {"tp": 3, "fp": 3, "fn": 2}, "kodus-v2": {"tp": 2, "fp": 4, "fn": 3}, "qodo-extended": {"tp": 3, "fp": 5, "fn": 2}, "cubic-v2": {"tp": 1, "fp": 2, "fn": 4}, "macroscope": {"tp": 1, "fp": 4, "fn": 4}, "baz": {"tp": 1, "fp": 2, "fn": 4}, "propel-v2": {"tp": 2, "fp": 5, "fn": 3}, "codeant-v2": {"tp": 1, "fp": 6, "fn": 4}, "qodo-extended-v2": {"tp": 3, "fp": 2, "fn": 2}, "greptile-v4-1": {"tp": 2, "fp": 2, "fn": 3}, "cloudaeye": {"tp": 4, "fp": 4, "fn": 1}}}, {"url": "https://github.com/calcom/cal.com/pull/8087", "language": "TypeScript", "pr_size": "medium", "domain": "concurrency", "change_type": "refactoring", "complexity": "moderate", "difficulty": "subtle", "risk": "critical", "context": "cross_file", "concern": "reliability", "summary": "", "tool_metrics": {"gemini": {"tp": 1, "fp": 1, "fn": 1}, "claude": {"tp": 1, "fp": 1, "fn": 1}, "graphite": {"tp": 0, "fp": 0, "fn": 2}, "bugbot": {"tp": 1, "fp": 1, "fn": 1}, "copilot": {"tp": 1, "fp": 3, "fn": 1}, "augment": {"tp": 1, "fp": 1, "fn": 1}, "propel": {"tp": 0, "fp": 0, "fn": 2}, "coderabbit": {"tp": 2, "fp": 7, "fn": 0}, "kg": {"tp": 1, "fp": 0, "fn": 1}, "qodo-v2": {"tp": 1, "fp": 1, "fn": 1}, "devin": {"tp": 1, "fp": 6, "fn": 1}, "sourcery": {"tp": 1, "fp": 4, "fn": 1}, "claude-code": {"tp": 1, "fp": 1, "fn": 1}, "kodus-v2": {"tp": 0, "fp": 0, "fn": 2}, "qodo-extended": {"tp": 1, "fp": 3, "fn": 1}, "cubic-v2": {"tp": 2, "fp": 5, "fn": 0}, "macroscope": {"tp": 1, "fp": 1, "fn": 1}, "baz": {"tp": 1, "fp": 2, "fn": 1}, "propel-v2": {"tp": 1, "fp": 4, "fn": 1}, "codeant-v2": {"tp": 1, "fp": 2, "fn": 1}, "qodo-extended-v2": {"tp": 1, "fp": 1, "fn": 1}, "greptile-v4-1": {"tp": 1, "fp": 6, "fn": 1}, "cloudaeye": {"tp": 1, "fp": 3, "fn": 1}}}], "tools": ["augment", "baz", "bugbot", "claude", "claude-code", "cloudaeye", "codeant-v2", "coderabbit", "copilot", "cubic-v2", "devin", "gemini", "graphite", "greptile-v4-1", "kg", "kodus-v2", "macroscope", "propel", "propel-v2", "qodo-extended", "qodo-extended-v2", "qodo-v2", "sourcery"], "dimensions": {"language": ["Go", "Java", "Python", "Ruby", "TypeScript"], "pr_size": ["small", "medium", "large"], "domain": ["API", "UI", "authentication", "caching", "concurrency", "configuration", "data_processing", "database", "file_io", "logging", "scheduling", "serialization", "testing"], "change_type": ["bug_fix", "feature", "migration", "performance", "refactoring", "security_patch"], "complexity": ["simple", "moderate", "complex"], "difficulty": ["obvious", "moderate", "subtle", "very_subtle"], "risk": ["low", "medium", "high", "critical"], "context": ["local", "file", "cross_file", "system"], "concern": ["correctness", "maintainability", "reliability", "security"]}, "overall_metrics": {"graphite": {"precision": 80.0, "recall": 8.8, "f1": 15.8, "tp": 12, "fp": 3, "fn": 125, "num_prs": 50}, "devin": {"precision": 46.4, "recall": 37.2, "f1": 41.3, "tp": 51, "fp": 59, "fn": 86, "num_prs": 50}, "qodo-extended": {"precision": 31.4, "recall": 59.9, "f1": 41.2, "tp": 82, "fp": 179, "fn": 55, "num_prs": 50}, "sourcery": {"precision": 29.8, "recall": 53.3, "f1": 38.2, "tp": 73, "fp": 172, "fn": 64, "num_prs": 50}, "kodus-v2": {"precision": 35.9, "recall": 34.3, "f1": 35.1, "tp": 47, "fp": 84, "fn": 90, "num_prs": 50}, "baz": {"precision": 39.5, "recall": 34.3, "f1": 36.7, "tp": 47, "fp": 72, "fn": 90, "num_prs": 50}, "cubic-v2": {"precision": 53.6, "recall": 65.7, "f1": 59.0, "tp": 90, "fp": 78, "fn": 47, "num_prs": 50}, "augment": {"precision": 41.9, "recall": 60.6, "f1": 49.6, "tp": 83, "fp": 115, "fn": 54, "num_prs": 50}, "qodo-extended-v2": {"precision": 48.1, "recall": 56.9, "f1": 52.2, "tp": 78, "fp": 84, "fn": 59, "num_prs": 50}, "propel": {"precision": 45.0, "recall": 36.5, "f1": 40.3, "tp": 50, "fp": 61, "fn": 87, "num_prs": 50}, "bugbot": {"precision": 43.2, "recall": 43.8, "f1": 43.5, "tp": 60, "fp": 79, "fn": 77, "num_prs": 50}, "greptile-v4-1": {"precision": 34.6, "recall": 46.0, "f1": 39.5, "tp": 63, "fp": 119, "fn": 74, "num_prs": 50}, "qodo-v2": {"precision": 36.2, "recall": 56.2, "f1": 44.0, "tp": 77, "fp": 136, "fn": 60, "num_prs": 50}, "claude-code": {"precision": 27.4, "recall": 41.6, "f1": 33.0, "tp": 57, "fp": 151, "fn": 80, "num_prs": 50}, "gemini": {"precision": 26.3, "recall": 33.6, "f1": 29.5, "tp": 46, "fp": 129, "fn": 91, "num_prs": 50}, "coderabbit": {"precision": 23.4, "recall": 57.7, "f1": 33.3, "tp": 79, "fp": 259, "fn": 58, "num_prs": 50}, "macroscope": {"precision": 41.8, "recall": 40.9, "f1": 41.3, "tp": 56, "fp": 78, "fn": 81, "num_prs": 50}, "propel-v2": {"precision": 36.6, "recall": 46.0, "f1": 40.8, "tp": 63, "fp": 109, "fn": 74, "num_prs": 50}, "claude": {"precision": 32.3, "recall": 37.2, "f1": 34.6, "tp": 51, "fp": 107, "fn": 86, "num_prs": 50}, "cloudaeye": {"precision": 57.6, "recall": 71.5, "f1": 63.8, "tp": 98, "fp": 72, "fn": 39, "num_prs": 50}, "codeant-v2": {"precision": 25.7, "recall": 34.3, "f1": 29.4, "tp": 47, "fp": 136, "fn": 90, "num_prs": 50}, "kg": {"precision": 51.1, "recall": 16.8, "f1": 25.3, "tp": 23, "fp": 22, "fn": 114, "num_prs": 50}, "copilot": {"precision": 24.7, "recall": 52.6, "f1": 33.6, "tp": 72, "fp": 220, "fn": 65, "num_prs": 50}}}};
+        const toolDisplayNames = {"graphite": "Graphite", "qodo": "Qodo", "gemini": "Gemini", "claude": "Claude Code", "augment": "Augment", "bugbot": "Cursor Bugbot", "coderabbit": "CodeRabbit", "propel": "Propel", "copilot": "GitHub Copilot", "baz": "Baz", "greptile": "Greptile", "kg": "KG", "entelligence": "Entelligence", "cubic-dev": "Cubic", "sourcery": "Sourcery", "mesa": "Mesa", "codeant": "CodeAnt", "codeant-v2": "CodeAnt v2", "claude-code": "Claude Code (CLI)", "devin": "Devin", "kodus-v2": "Kodus", "greptile-v4": "Greptile v4", "qodo-v2": "Qodo v2", "qodo-extended-v2": "Qodo Extended", "macroscope": "Macroscope", "cubic-v2": "Cubic v2", "cloudaeye": "CloudAEye"};
+        const toolColors = {"graphite": "#6366f1", "qodo": "#8b5cf6", "gemini": "#06b6d4", "claude": "#f59e0b", "augment": "#10b981", "bugbot": "#3b82f6", "coderabbit": "#ec4899", "propel": "#14b8a6", "propel-v2": "#0d9488", "copilot": "#6b7280", "baz": "#f97316", "greptile": "#22c55e", "kg": "#a855f7", "entelligence": "#0ea5e9", "cubic-dev": "#d946ef", "sourcery": "#84cc16", "mesa": "#f43f5e", "codeant": "#e11d48", "codeant-v2": "#fb7185", "claude-code": "#d97706", "devin": "#7c3aed", "kodus-v2": "#059669", "greptile-v4": "#16a34a", "qodo-v2": "#7c3aed", "qodo-extended-v2": "#6d28d9", "macroscope": "#0891b2", "cubic-v2": "#c026d3", "cloudaeye": "#38bdf8"};
+        const predefinedFilters = [{"id": "high_precision", "label": "Highest Precision", "filters": {}, "sort": "precision", "description": "Tools ranked by precision - fewer false positives, more reliable findings", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "graphite", "best_score": 100.0}, {"id": "tool_claude_language_python_risk_medium", "label": "Python + Medium Risk (Precision)", "filters": {"language": ["Python"], "risk": ["medium"]}, "description": "Python codebases with dynamic typing. Moderate user impact.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "claude", "best_score": 100.0, "sort": "precision"}, {"id": "tool_kodus-v2_domain_concurrency", "label": "Best for Concurrency (Precision)", "filters": {"domain": ["concurrency"]}, "description": "Threading and async operations.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "kodus-v2", "best_score": 100.0, "sort": "precision"}, {"id": "tool_kg_complexity_complex", "label": "Best for Complex Code (Precision)", "filters": {"complexity": ["complex"]}, "description": "Deep logic and dependencies.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "kg", "best_score": 100.0, "sort": "precision"}, {"id": "tool_propel-v2_risk_high_context_file", "label": "High Risk + File Context (Precision)", "filters": {"risk": ["high"], "context": ["file"]}, "description": "Significant impact, potential data loss. Requires full file understanding.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "propel-v2", "best_score": 100.0, "sort": "precision"}, {"id": "tool_coderabbit_language_typescript_domain_scheduling", "label": "Typescript + Scheduling (Recall)", "filters": {"language": ["TypeScript"], "domain": ["scheduling"]}, "description": "TypeScript codebases with frontend patterns. Task scheduling.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "coderabbit", "best_score": 87.5, "sort": "recall"}, {"id": "change_type_performance", "label": "Best for Performance Optimization", "filters": {"change_type": ["performance"]}, "description": "Performance optimization changes", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 82.4}, {"id": "tool_copilot_change_type_bug_fix", "label": "Best for Bug Fixes (Recall)", "filters": {"change_type": ["bug_fix"]}, "description": "Bug fixes and issue resolution", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "copilot", "best_score": 82.4, "sort": "recall"}, {"id": "go_small", "label": "Best for Small Go PRs", "filters": {"language": ["Go"], "pr_size": ["small"]}, "description": "Go codebases with concurrency patterns. Small PRs with 1-2 files, easier to review thoroughly", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 77.4}, {"id": "tool_baz_language_java_domain_authentication", "label": "Java + Authentication", "filters": {"language": ["Java"], "domain": ["authentication"]}, "description": "Java codebases with OOP patterns. Auth and access control.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "baz", "best_score": 76.9}, {"id": "tool_codeant-v2_pr_size_small_change_type_performance", "label": "Small PRs + Performance Optimization (Precision)", "filters": {"pr_size": ["small"], "change_type": ["performance"]}, "description": "Small PRs with 1-2 files, easier to review thoroughly Performance optimization changes", "best_model": "openai_gpt-5.2", "best_tool": "codeant-v2", "best_score": 75.0, "sort": "precision"}, {"id": "ruby_medium", "label": "Best for Medium Ruby PRs", "filters": {"language": ["Ruby"], "pr_size": ["medium"]}, "description": "Ruby codebases with Rails patterns. Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "devin", "best_score": 72.7}, {"id": "change_type_bug_fix", "label": "Best for Bug Fixes", "filters": {"change_type": ["bug_fix"]}, "description": "Bug fixes and issue resolution", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-extended-v2", "best_score": 71.8}, {"id": "tool_sourcery_language_typescript_concern_correctness", "label": "Typescript + Correctness", "filters": {"language": ["TypeScript"], "concern": ["correctness"]}, "description": "TypeScript codebases with frontend patterns. Logical correctness and expected behavior", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "sourcery", "best_score": 71.8}, {"id": "tool_gemini_language_ruby_pr_size_medium", "label": "Ruby + Medium PRs (Recall)", "filters": {"language": ["Ruby"], "pr_size": ["medium"]}, "description": "Ruby codebases with Rails patterns. Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "gemini", "best_score": 69.2, "sort": "recall"}, {"id": "domain_ui", "label": "Best for Ui", "filters": {"domain": ["UI"]}, "description": "User interface and frontend.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "bugbot", "best_score": 66.7}, {"id": "tool_qodo-extended_change_type_bug_fix_context_cross_file", "label": "Bug Fixes + Cross-File", "filters": {"change_type": ["bug_fix"], "context": ["cross_file"]}, "description": "Bug fixes and issue resolution Spans multiple files.", "best_model": "openai_gpt-5.2", "best_tool": "qodo-extended", "best_score": 66.7}, {"id": "tool_qodo-v2_concern_reliability", "label": "Best for Reliability", "filters": {"concern": ["reliability"]}, "description": "Error handling and system stability", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "qodo-v2", "best_score": 60.9}, {"id": "tool_augment_domain_concurrency", "label": "Best for Concurrency", "filters": {"domain": ["concurrency"]}, "description": "Threading and async operations.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "augment", "best_score": 60.6}, {"id": "tool_propel_language_ruby_concern_correctness", "label": "Ruby + Correctness", "filters": {"language": ["Ruby"], "concern": ["correctness"]}, "description": "Ruby codebases with Rails patterns. Logical correctness and expected behavior", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "propel", "best_score": 57.9}, {"id": "tool_macroscope_language_ruby_concern_correctness", "label": "Ruby + Correctness", "filters": {"language": ["Ruby"], "concern": ["correctness"]}, "description": "Ruby codebases with Rails patterns. Logical correctness and expected behavior", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "macroscope", "best_score": 57.9}, {"id": "domain_caching", "label": "Best for Caching", "filters": {"domain": ["caching"]}, "description": "Cache and memoization.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 76.2}, {"id": "language_go", "label": "Best for Go", "filters": {"language": ["Go"]}, "description": "Go codebases with concurrency patterns.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 76.0}, {"id": "context_file", "label": "Best for File Context", "filters": {"context": ["file"]}, "description": "Requires full file understanding.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 75.4}, {"id": "pr_size_small", "label": "Best for Small PRs", "filters": {"pr_size": ["small"]}, "description": "Small PRs with 1-2 files, easier to review thoroughly", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cubic-v2", "best_score": 75.3}, {"id": "domain_scheduling", "label": "Best for Scheduling", "filters": {"domain": ["scheduling"]}, "description": "Task scheduling.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 75.0}, {"id": "concern_security", "label": "Best for Security", "filters": {"concern": ["security"]}, "description": "Security vulnerabilities and attack vectors", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 74.4}, {"id": "security_critical", "label": "Security Critical", "filters": {"concern": ["security"], "risk": ["high", "critical"]}, "description": "Security vulnerabilities and attack vectors Significant impact, potential data loss.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 74.4}, {"id": "risk_high", "label": "Best for High Risk", "filters": {"risk": ["high"]}, "description": "Significant impact, potential data loss.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 72.9}, {"id": "language_python", "label": "Best for Python", "filters": {"language": ["Python"]}, "description": "Python codebases with dynamic typing.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 72.7}, {"id": "high_recall", "label": "Highest Recall", "filters": {}, "sort": "recall", "description": "Tools ranked by recall - catches more issues, may have more noise", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 72.3}, {"id": "python_medium", "label": "Best for Medium Python PRs", "filters": {"language": ["Python"], "pr_size": ["medium"]}, "description": "Python codebases with dynamic typing. Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 72.2}, {"id": "domain_authentication", "label": "Best for Authentication", "filters": {"domain": ["authentication"]}, "description": "Auth and access control.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cloudaeye", "best_score": 72.0}, {"id": "difficulty_moderate", "label": "Best for Moderate Bugs", "filters": {"difficulty": ["moderate"]}, "description": "Requires careful reading.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 72.0}, {"id": "high_risk_auth", "label": "High Risk Auth", "filters": {"risk": ["high", "critical"], "domain": ["authentication"]}, "description": "Significant impact, potential data loss. Auth and access control.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 71.0}, {"id": "risk_critical", "label": "Best for Critical Risk", "filters": {"risk": ["critical"]}, "description": "Critical security or data corruption risk.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "qodo-extended-v2", "best_score": 69.8}, {"id": "java_medium", "label": "Best for Medium Java PRs", "filters": {"language": ["Java"], "pr_size": ["medium"]}, "description": "Java codebases with OOP patterns. Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cloudaeye", "best_score": 68.6}, {"id": "change_type_feature", "label": "Best for Features", "filters": {"change_type": ["feature"]}, "description": "New feature implementation", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 68.3}, {"id": "complexity_moderate", "label": "Best for Moderate Code", "filters": {"complexity": ["moderate"]}, "description": "Some abstraction.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 68.2}, {"id": "complexity_complex", "label": "Best for Complex Code", "filters": {"complexity": ["complex"]}, "description": "Deep logic and dependencies.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "qodo-extended-v2", "best_score": 67.7}, {"id": "complex_subtle", "label": "Complex & Subtle", "filters": {"complexity": ["complex"], "difficulty": ["subtle", "very_subtle"]}, "description": "Deep logic and dependencies. Non-obvious, needs domain knowledge.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "qodo-extended-v2", "best_score": 67.7}, {"id": "concern_correctness", "label": "Best for Correctness", "filters": {"concern": ["correctness"]}, "description": "Logical correctness and expected behavior", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 67.3}, {"id": "high_f1", "label": "Highest F1", "filters": {}, "sort": "f1", "description": "Tools ranked by F1 score - balanced precision and recall", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 66.9}, {"id": "language_java", "label": "Best for Java", "filters": {"language": ["Java"]}, "description": "Java codebases with OOP patterns.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cloudaeye", "best_score": 66.7}, {"id": "pr_size_large", "label": "Best for Large PRs", "filters": {"pr_size": ["large"]}, "description": "Large PRs with 6+ files, complex changes requiring careful review", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 66.7}, {"id": "language_typescript", "label": "Best for Typescript", "filters": {"language": ["TypeScript"]}, "description": "TypeScript codebases with frontend patterns.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 65.7}, {"id": "difficulty_subtle", "label": "Best for Subtle Bugs", "filters": {"difficulty": ["subtle"]}, "description": "Non-obvious, needs domain knowledge.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 65.5}, {"id": "context_cross_file", "label": "Best for Cross-File", "filters": {"context": ["cross_file"]}, "description": "Spans multiple files.", "best_model": "anthropic_claude-opus-4-5-20251101", "best_tool": "cloudaeye", "best_score": 64.0}, {"id": "pr_size_medium", "label": "Best for Medium PRs", "filters": {"pr_size": ["medium"]}, "description": "Medium PRs with 3-5 files, typical feature development", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cloudaeye", "best_score": 63.8}, {"id": "risk_medium", "label": "Best for Medium Risk", "filters": {"risk": ["medium"]}, "description": "Moderate user impact.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cloudaeye", "best_score": 63.6}, {"id": "concern_reliability", "label": "Best for Reliability", "filters": {"concern": ["reliability"]}, "description": "Error handling and system stability", "best_model": "openai_gpt-5.2", "best_tool": "cubic-v2", "best_score": 62.5}, {"id": "domain_concurrency", "label": "Best for Concurrency", "filters": {"domain": ["concurrency"]}, "description": "Threading and async operations.", "best_model": "openai_gpt-5.2", "best_tool": "cubic-v2", "best_score": 61.1}, {"id": "language_ruby", "label": "Best for Ruby", "filters": {"language": ["Ruby"]}, "description": "Ruby codebases with Rails patterns.", "best_model": "anthropic_claude-sonnet-4-5-20250929", "best_tool": "cubic-v2", "best_score": 61.0}];
 
         let currentModel = 'anthropic_claude-opus-4-5-20251101';
         let currentFilters = {
diff --git a/offline/analysis/benchmark_dashboard.json b/offline/analysis/benchmark_dashboard.json
index 1fa9b91..ec6f76c 100644
--- a/offline/analysis/benchmark_dashboard.json
+++ b/offline/analysis/benchmark_dashboard.json
@@ -124,6 +124,11 @@
               "tp": 1,
               "fp": 4,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 3
             }
           }
         },
@@ -249,6 +254,11 @@
               "tp": 2,
               "fp": 0,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -374,6 +384,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -499,6 +514,11 @@
               "tp": 0,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -624,6 +644,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 2,
+              "fn": 0
             }
           }
         },
@@ -749,6 +774,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 4,
+              "fn": 0
             }
           }
         },
@@ -874,6 +904,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -999,6 +1034,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -1124,6 +1164,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -1249,6 +1294,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 4
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -1374,6 +1424,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -1499,6 +1554,11 @@
               "tp": 3,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -1624,6 +1684,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -1749,6 +1814,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -1874,6 +1944,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -1999,6 +2074,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -2124,6 +2204,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 4,
+              "fn": 2
             }
           }
         },
@@ -2249,6 +2334,11 @@
               "tp": 2,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -2374,6 +2464,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -2499,6 +2594,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -2624,6 +2724,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -2749,6 +2854,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -2874,6 +2984,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -2999,6 +3114,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -3124,6 +3244,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -3249,6 +3374,11 @@
               "tp": 3,
               "fp": 3,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -3374,6 +3504,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -3499,6 +3634,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -3624,6 +3764,11 @@
               "tp": 3,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -3749,6 +3894,11 @@
               "tp": 0,
               "fp": 2,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -3874,6 +4024,11 @@
               "tp": 0,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 5,
+              "fn": 0
             }
           }
         },
@@ -3999,6 +4154,11 @@
               "tp": 2,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 2
             }
           }
         },
@@ -4124,6 +4284,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 1,
+              "fn": 3
             }
           }
         },
@@ -4249,6 +4414,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -4374,6 +4544,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -4499,6 +4674,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -4624,6 +4804,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -4749,6 +4934,11 @@
               "tp": 2,
               "fp": 3,
               "fn": 4
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 6,
+              "fn": 2
             }
           }
         },
@@ -4874,6 +5064,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -4999,6 +5194,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -5124,6 +5324,11 @@
               "tp": 0,
               "fp": 5,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -5249,6 +5454,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -5374,6 +5584,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -5499,6 +5714,11 @@
               "tp": 0,
               "fp": 2,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 0,
+              "fn": 2
             }
           }
         },
@@ -5624,6 +5844,11 @@
               "tp": 4,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 2,
+              "fn": 2
             }
           }
         },
@@ -5749,6 +5974,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 4,
+              "fn": 0
             }
           }
         },
@@ -5874,6 +6104,11 @@
               "tp": 3,
               "fp": 2,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -5999,6 +6234,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -6124,6 +6364,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 4
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 3,
+              "fn": 1
             }
           }
         },
@@ -6249,6 +6494,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 4,
+              "fn": 1
             }
           }
         }
@@ -6259,6 +6509,7 @@
         "bugbot",
         "claude",
         "claude-code",
+        "cloudaeye",
         "codeant-v2",
         "coderabbit",
         "copilot",
@@ -6344,49 +6595,76 @@
         ]
       },
       "overall_metrics": {
-        "greptile-v4-1": {
-          "precision": 40.5,
-          "recall": 48.2,
-          "f1": 44.0,
-          "tp": 66,
-          "fp": 97,
-          "fn": 71,
+        "graphite": {
+          "precision": 100.0,
+          "recall": 8.8,
+          "f1": 16.1,
+          "tp": 12,
+          "fp": 0,
+          "fn": 125,
           "num_prs": 50
         },
-        "copilot": {
-          "precision": 28.3,
-          "recall": 53.3,
-          "f1": 37.0,
-          "tp": 73,
-          "fp": 185,
-          "fn": 64,
+        "devin": {
+          "precision": 54.3,
+          "recall": 37.2,
+          "f1": 44.2,
+          "tp": 51,
+          "fp": 43,
+          "fn": 86,
           "num_prs": 50
         },
-        "propel": {
-          "precision": 55.8,
-          "recall": 38.7,
-          "f1": 45.7,
-          "tp": 53,
-          "fp": 42,
-          "fn": 84,
+        "qodo-extended": {
+          "precision": 37.2,
+          "recall": 62.8,
+          "f1": 46.7,
+          "tp": 86,
+          "fp": 145,
+          "fn": 51,
           "num_prs": 50
         },
-        "kg": {
-          "precision": 50.0,
-          "recall": 16.8,
-          "f1": 25.1,
-          "tp": 23,
-          "fp": 23,
-          "fn": 114,
+        "sourcery": {
+          "precision": 33.3,
+          "recall": 51.8,
+          "f1": 40.6,
+          "tp": 71,
+          "fp": 142,
+          "fn": 66,
           "num_prs": 50
         },
-        "gemini": {
-          "precision": 31.1,
-          "recall": 37.2,
-          "f1": 33.9,
-          "tp": 51,
-          "fp": 113,
-          "fn": 86,
+        "kodus-v2": {
+          "precision": 46.7,
+          "recall": 35.8,
+          "f1": 40.5,
+          "tp": 49,
+          "fp": 56,
+          "fn": 88,
+          "num_prs": 50
+        },
+        "baz": {
+          "precision": 49.0,
+          "recall": 34.3,
+          "f1": 40.3,
+          "tp": 47,
+          "fp": 49,
+          "fn": 90,
+          "num_prs": 50
+        },
+        "cubic-v2": {
+          "precision": 56.3,
+          "recall": 68.6,
+          "f1": 61.8,
+          "tp": 94,
+          "fp": 73,
+          "fn": 43,
+          "num_prs": 50
+        },
+        "augment": {
+          "precision": 47.5,
+          "recall": 61.3,
+          "f1": 53.5,
+          "tp": 84,
+          "fp": 93,
+          "fn": 53,
           "num_prs": 50
         },
         "qodo-extended-v2": {
@@ -6398,6 +6676,33 @@
           "fn": 53,
           "num_prs": 50
         },
+        "propel": {
+          "precision": 55.8,
+          "recall": 38.7,
+          "f1": 45.7,
+          "tp": 53,
+          "fp": 42,
+          "fn": 84,
+          "num_prs": 50
+        },
+        "bugbot": {
+          "precision": 47.2,
+          "recall": 43.8,
+          "f1": 45.5,
+          "tp": 60,
+          "fp": 67,
+          "fn": 77,
+          "num_prs": 50
+        },
+        "greptile-v4-1": {
+          "precision": 40.5,
+          "recall": 48.2,
+          "f1": 44.0,
+          "tp": 66,
+          "fp": 97,
+          "fn": 71,
+          "num_prs": 50
+        },
         "qodo-v2": {
           "precision": 42.9,
           "recall": 55.5,
@@ -6407,13 +6712,22 @@
           "fn": 61,
           "num_prs": 50
         },
-        "kodus-v2": {
-          "precision": 46.7,
-          "recall": 35.8,
-          "f1": 40.5,
-          "tp": 49,
-          "fp": 56,
-          "fn": 88,
+        "claude-code": {
+          "precision": 34.8,
+          "recall": 40.9,
+          "f1": 37.6,
+          "tp": 56,
+          "fp": 105,
+          "fn": 81,
+          "num_prs": 50
+        },
+        "gemini": {
+          "precision": 31.1,
+          "recall": 37.2,
+          "f1": 33.9,
+          "tp": 51,
+          "fp": 113,
+          "fn": 86,
           "num_prs": 50
         },
         "coderabbit": {
@@ -6434,51 +6748,6 @@
           "fn": 77,
           "num_prs": 50
         },
-        "sourcery": {
-          "precision": 33.3,
-          "recall": 51.8,
-          "f1": 40.6,
-          "tp": 71,
-          "fp": 142,
-          "fn": 66,
-          "num_prs": 50
-        },
-        "augment": {
-          "precision": 47.5,
-          "recall": 61.3,
-          "f1": 53.5,
-          "tp": 84,
-          "fp": 93,
-          "fn": 53,
-          "num_prs": 50
-        },
-        "codeant-v2": {
-          "precision": 31.9,
-          "recall": 38.0,
-          "f1": 34.7,
-          "tp": 52,
-          "fp": 111,
-          "fn": 85,
-          "num_prs": 50
-        },
-        "qodo-extended": {
-          "precision": 37.2,
-          "recall": 62.8,
-          "f1": 46.7,
-          "tp": 86,
-          "fp": 145,
-          "fn": 51,
-          "num_prs": 50
-        },
-        "devin": {
-          "precision": 54.3,
-          "recall": 37.2,
-          "f1": 44.2,
-          "tp": 51,
-          "fp": 43,
-          "fn": 86,
-          "num_prs": 50
-        },
         "propel-v2": {
           "precision": 44.4,
           "recall": 49.6,
@@ -6497,49 +6766,40 @@
           "fn": 88,
           "num_prs": 50
         },
-        "bugbot": {
-          "precision": 47.2,
-          "recall": 43.8,
-          "f1": 45.5,
-          "tp": 60,
-          "fp": 67,
-          "fn": 77,
-          "num_prs": 50
-        },
-        "claude-code": {
-          "precision": 34.8,
-          "recall": 40.9,
-          "f1": 37.6,
-          "tp": 56,
-          "fp": 105,
-          "fn": 81,
+        "cloudaeye": {
+          "precision": 62.3,
+          "recall": 72.3,
+          "f1": 66.9,
+          "tp": 99,
+          "fp": 60,
+          "fn": 38,
           "num_prs": 50
         },
-        "graphite": {
-          "precision": 100.0,
-          "recall": 8.8,
-          "f1": 16.1,
-          "tp": 12,
-          "fp": 0,
-          "fn": 125,
+        "codeant-v2": {
+          "precision": 31.9,
+          "recall": 38.0,
+          "f1": 34.7,
+          "tp": 52,
+          "fp": 111,
+          "fn": 85,
           "num_prs": 50
         },
-        "cubic-v2": {
-          "precision": 56.3,
-          "recall": 68.6,
-          "f1": 61.8,
-          "tp": 94,
-          "fp": 73,
-          "fn": 43,
+        "kg": {
+          "precision": 50.0,
+          "recall": 16.8,
+          "f1": 25.1,
+          "tp": 23,
+          "fp": 23,
+          "fn": 114,
           "num_prs": 50
         },
-        "baz": {
-          "precision": 49.0,
-          "recall": 34.3,
-          "f1": 40.3,
-          "tp": 47,
-          "fp": 49,
-          "fn": 90,
+        "copilot": {
+          "precision": 28.3,
+          "recall": 53.3,
+          "f1": 37.0,
+          "tp": 73,
+          "fp": 185,
+          "fn": 64,
           "num_prs": 50
         }
       }
@@ -6663,6 +6923,11 @@
               "tp": 1,
               "fp": 4,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 3
             }
           }
         },
@@ -6783,6 +7048,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -6903,6 +7173,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -7023,6 +7298,11 @@
               "tp": 0,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -7143,6 +7423,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 2,
+              "fn": 0
             }
           }
         },
@@ -7263,6 +7548,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 4,
+              "fn": 0
             }
           }
         },
@@ -7383,6 +7673,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -7503,6 +7798,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -7623,6 +7923,11 @@
               "tp": 2,
               "fp": 0,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -7743,6 +8048,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 4
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -7863,6 +8173,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -7983,6 +8298,11 @@
               "tp": 3,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -8103,6 +8423,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -8223,6 +8548,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -8343,6 +8673,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -8463,8 +8798,13 @@
               "tp": 1,
               "fp": 1,
               "fn": 3
-            }
-          }
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 0
+            }
+          }
         },
         {
           "url": "https://github.com/getsentry/sentry/pull/95633",
@@ -8583,6 +8923,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 4,
+              "fn": 2
             }
           }
         },
@@ -8703,6 +9048,11 @@
               "tp": 2,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -8823,6 +9173,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -8943,6 +9298,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -9063,6 +9423,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -9183,6 +9548,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -9303,6 +9673,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 2,
+              "fn": 0
             }
           }
         },
@@ -9423,6 +9798,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -9543,6 +9923,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -9663,6 +10048,11 @@
               "tp": 3,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -9783,6 +10173,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -9903,6 +10298,11 @@
               "tp": 0,
               "fp": 4,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -10023,6 +10423,11 @@
               "tp": 2,
               "fp": 4,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -10143,6 +10548,11 @@
               "tp": 0,
               "fp": 2,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -10263,6 +10673,11 @@
               "tp": 0,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 6,
+              "fn": 0
             }
           }
         },
@@ -10383,6 +10798,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 2
             }
           }
         },
@@ -10503,6 +10923,11 @@
               "tp": 2,
               "fp": 0,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 1,
+              "fn": 3
             }
           }
         },
@@ -10623,6 +11048,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -10743,6 +11173,11 @@
               "tp": 0,
               "fp": 4,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -10863,6 +11298,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -10983,6 +11423,11 @@
               "tp": 0,
               "fp": 4,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -11103,6 +11548,11 @@
               "tp": 2,
               "fp": 3,
               "fn": 4
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 7,
+              "fn": 2
             }
           }
         },
@@ -11223,6 +11673,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -11343,6 +11798,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -11463,6 +11923,11 @@
               "tp": 1,
               "fp": 4,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -11583,6 +12048,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -11703,6 +12173,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -11823,6 +12298,11 @@
               "tp": 0,
               "fp": 5,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 0,
+              "fn": 2
             }
           }
         },
@@ -11943,6 +12423,11 @@
               "tp": 3,
               "fp": 5,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 2,
+              "fn": 2
             }
           }
         },
@@ -12063,6 +12548,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 4,
+              "fn": 0
             }
           }
         },
@@ -12183,6 +12673,11 @@
               "tp": 3,
               "fp": 2,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -12303,6 +12798,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -12423,6 +12923,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 4
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 5,
+              "fn": 2
             }
           }
         },
@@ -12543,6 +13048,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 4,
+              "fn": 1
             }
           }
         }
@@ -12553,6 +13063,7 @@
         "bugbot",
         "claude",
         "claude-code",
+        "cloudaeye",
         "codeant-v2",
         "coderabbit",
         "copilot",
@@ -12637,67 +13148,40 @@
         ]
       },
       "overall_metrics": {
-        "greptile-v4-1": {
-          "precision": 36.5,
-          "recall": 45.3,
-          "f1": 40.4,
-          "tp": 62,
-          "fp": 108,
-          "fn": 75,
-          "num_prs": 50
-        },
-        "copilot": {
-          "precision": 27.2,
-          "recall": 51.1,
-          "f1": 35.5,
-          "tp": 70,
-          "fp": 187,
-          "fn": 67,
-          "num_prs": 50
-        },
-        "propel": {
-          "precision": 52.6,
-          "recall": 36.5,
-          "f1": 43.1,
-          "tp": 50,
-          "fp": 45,
-          "fn": 87,
-          "num_prs": 50
-        },
-        "kg": {
-          "precision": 44.7,
-          "recall": 15.3,
-          "f1": 22.8,
-          "tp": 21,
-          "fp": 26,
-          "fn": 116,
+        "graphite": {
+          "precision": 100.0,
+          "recall": 8.8,
+          "f1": 16.1,
+          "tp": 12,
+          "fp": 0,
+          "fn": 125,
           "num_prs": 50
         },
-        "qodo-extended-v2": {
-          "precision": 52.5,
-          "recall": 60.6,
-          "f1": 56.3,
-          "tp": 83,
-          "fp": 75,
-          "fn": 54,
+        "devin": {
+          "precision": 54.2,
+          "recall": 38.0,
+          "f1": 44.6,
+          "tp": 52,
+          "fp": 44,
+          "fn": 85,
           "num_prs": 50
         },
-        "gemini": {
-          "precision": 29.7,
-          "recall": 35.8,
-          "f1": 32.5,
-          "tp": 49,
-          "fp": 116,
-          "fn": 88,
+        "qodo-extended": {
+          "precision": 35.3,
+          "recall": 61.3,
+          "f1": 44.8,
+          "tp": 84,
+          "fp": 154,
+          "fn": 53,
           "num_prs": 50
         },
-        "qodo-v2": {
-          "precision": 40.5,
-          "recall": 56.2,
-          "f1": 47.1,
-          "tp": 77,
-          "fp": 113,
-          "fn": 60,
+        "sourcery": {
+          "precision": 33.3,
+          "recall": 51.8,
+          "f1": 40.6,
+          "tp": 71,
+          "fp": 142,
+          "fn": 66,
           "num_prs": 50
         },
         "kodus-v2": {
@@ -12709,31 +13193,22 @@
           "fn": 89,
           "num_prs": 50
         },
-        "coderabbit": {
-          "precision": 27.5,
-          "recall": 56.9,
-          "f1": 37.1,
-          "tp": 78,
-          "fp": 206,
-          "fn": 59,
+        "baz": {
+          "precision": 49.0,
+          "recall": 35.8,
+          "f1": 41.4,
+          "tp": 49,
+          "fp": 51,
+          "fn": 88,
           "num_prs": 50
         },
-        "macroscope": {
-          "precision": 45.8,
-          "recall": 43.8,
-          "f1": 44.8,
-          "tp": 60,
-          "fp": 71,
-          "fn": 77,
-          "num_prs": 50
-        },
-        "sourcery": {
-          "precision": 33.3,
-          "recall": 51.8,
-          "f1": 40.6,
-          "tp": 71,
-          "fp": 142,
-          "fn": 66,
+        "cubic-v2": {
+          "precision": 55.6,
+          "recall": 68.6,
+          "f1": 61.4,
+          "tp": 94,
+          "fp": 75,
+          "fn": 43,
           "num_prs": 50
         },
         "augment": {
@@ -12745,42 +13220,24 @@
           "fn": 50,
           "num_prs": 50
         },
-        "codeant-v2": {
-          "precision": 31.1,
+        "qodo-extended-v2": {
+          "precision": 52.5,
+          "recall": 60.6,
+          "f1": 56.3,
+          "tp": 83,
+          "fp": 75,
+          "fn": 54,
+          "num_prs": 50
+        },
+        "propel": {
+          "precision": 52.6,
           "recall": 36.5,
-          "f1": 33.6,
+          "f1": 43.1,
           "tp": 50,
-          "fp": 111,
+          "fp": 45,
           "fn": 87,
           "num_prs": 50
         },
-        "qodo-extended": {
-          "precision": 35.3,
-          "recall": 61.3,
-          "f1": 44.8,
-          "tp": 84,
-          "fp": 154,
-          "fn": 53,
-          "num_prs": 50
-        },
-        "devin": {
-          "precision": 54.2,
-          "recall": 38.0,
-          "f1": 44.6,
-          "tp": 52,
-          "fp": 44,
-          "fn": 85,
-          "num_prs": 50
-        },
-        "claude": {
-          "precision": 35.7,
-          "recall": 40.1,
-          "f1": 37.8,
-          "tp": 55,
-          "fp": 99,
-          "fn": 82,
-          "num_prs": 50
-        },
         "bugbot": {
           "precision": 45.4,
           "recall": 43.1,
@@ -12790,6 +13247,24 @@
           "fn": 78,
           "num_prs": 50
         },
+        "greptile-v4-1": {
+          "precision": 36.5,
+          "recall": 45.3,
+          "f1": 40.4,
+          "tp": 62,
+          "fp": 108,
+          "fn": 75,
+          "num_prs": 50
+        },
+        "qodo-v2": {
+          "precision": 40.5,
+          "recall": 56.2,
+          "f1": 47.1,
+          "tp": 77,
+          "fp": 113,
+          "fn": 60,
+          "num_prs": 50
+        },
         "claude-code": {
           "precision": 30.7,
           "recall": 40.1,
@@ -12799,31 +13274,76 @@
           "fn": 82,
           "num_prs": 50
         },
-        "graphite": {
-          "precision": 100.0,
-          "recall": 8.8,
-          "f1": 16.1,
-          "tp": 12,
-          "fp": 0,
-          "fn": 125,
+        "gemini": {
+          "precision": 29.7,
+          "recall": 35.8,
+          "f1": 32.5,
+          "tp": 49,
+          "fp": 116,
+          "fn": 88,
           "num_prs": 50
         },
-        "cubic-v2": {
-          "precision": 55.6,
-          "recall": 68.6,
-          "f1": 61.4,
-          "tp": 94,
-          "fp": 75,
-          "fn": 43,
+        "coderabbit": {
+          "precision": 27.5,
+          "recall": 56.9,
+          "f1": 37.1,
+          "tp": 78,
+          "fp": 206,
+          "fn": 59,
           "num_prs": 50
         },
-        "baz": {
-          "precision": 49.0,
-          "recall": 35.8,
-          "f1": 41.4,
-          "tp": 49,
-          "fp": 51,
-          "fn": 88,
+        "macroscope": {
+          "precision": 45.8,
+          "recall": 43.8,
+          "f1": 44.8,
+          "tp": 60,
+          "fp": 71,
+          "fn": 77,
+          "num_prs": 50
+        },
+        "claude": {
+          "precision": 35.7,
+          "recall": 40.1,
+          "f1": 37.8,
+          "tp": 55,
+          "fp": 99,
+          "fn": 82,
+          "num_prs": 50
+        },
+        "cloudaeye": {
+          "precision": 59.6,
+          "recall": 72.3,
+          "f1": 65.3,
+          "tp": 99,
+          "fp": 67,
+          "fn": 38,
+          "num_prs": 50
+        },
+        "codeant-v2": {
+          "precision": 31.1,
+          "recall": 36.5,
+          "f1": 33.6,
+          "tp": 50,
+          "fp": 111,
+          "fn": 87,
+          "num_prs": 50
+        },
+        "kg": {
+          "precision": 44.7,
+          "recall": 15.3,
+          "f1": 22.8,
+          "tp": 21,
+          "fp": 26,
+          "fn": 116,
+          "num_prs": 50
+        },
+        "copilot": {
+          "precision": 27.2,
+          "recall": 51.1,
+          "f1": 35.5,
+          "tp": 70,
+          "fp": 187,
+          "fn": 67,
           "num_prs": 50
         }
       }
@@ -12952,6 +13472,11 @@
               "tp": 1,
               "fp": 4,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 3
             }
           }
         },
@@ -13077,6 +13602,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -13202,6 +13732,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -13327,6 +13862,11 @@
               "tp": 0,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -13452,6 +13992,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 2,
+              "fn": 0
             }
           }
         },
@@ -13577,6 +14122,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 4,
+              "fn": 0
             }
           }
         },
@@ -13702,6 +14252,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -13827,6 +14382,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -13952,6 +14512,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -14077,6 +14642,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 4
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -14202,6 +14772,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -14327,6 +14902,11 @@
               "tp": 3,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -14452,6 +15032,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 2,
+              "fn": 0
             }
           }
         },
@@ -14577,6 +15162,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -14702,6 +15292,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -14827,6 +15422,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -14952,6 +15552,11 @@
               "tp": 0,
               "fp": 5,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 4,
+              "fn": 2
             }
           }
         },
@@ -15077,6 +15682,11 @@
               "tp": 2,
               "fp": 4,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -15202,6 +15812,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -15327,6 +15942,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -15452,6 +16072,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -15577,6 +16202,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -15702,6 +16332,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -15827,6 +16462,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -15952,6 +16592,11 @@
               "tp": 1,
               "fp": 4,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -16077,6 +16722,11 @@
               "tp": 3,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -16202,9 +16852,14 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
-            }
-          }
-        },
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
+            }
+          }
+        },
         {
           "url": "https://github.com/grafana/grafana/pull/107534",
           "language": "Go",
@@ -16327,6 +16982,11 @@
               "tp": 0,
               "fp": 5,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -16452,6 +17112,11 @@
               "tp": 2,
               "fp": 4,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -16577,6 +17242,11 @@
               "tp": 0,
               "fp": 2,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -16702,6 +17372,11 @@
               "tp": 0,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 5,
+              "fn": 0
             }
           }
         },
@@ -16827,6 +17502,11 @@
               "tp": 1,
               "fp": 4,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 2,
+              "fn": 2
             }
           }
         },
@@ -16952,6 +17632,11 @@
               "tp": 1,
               "fp": 0,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 1,
+              "fn": 3
             }
           }
         },
@@ -17077,6 +17762,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -17202,6 +17892,11 @@
               "tp": 1,
               "fp": 3,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -17327,6 +18022,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -17452,6 +18152,11 @@
               "tp": 0,
               "fp": 4,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -17577,6 +18282,11 @@
               "tp": 2,
               "fp": 3,
               "fn": 4
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 8,
+              "fn": 3
             }
           }
         },
@@ -17702,6 +18412,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -17827,6 +18542,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 1,
+              "fn": 1
             }
           }
         },
@@ -17952,6 +18672,11 @@
               "tp": 0,
               "fp": 5,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 2,
+              "fn": 1
             }
           }
         },
@@ -18077,6 +18802,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 0
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -18202,6 +18932,11 @@
               "tp": 1,
               "fp": 1,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 0,
+              "fn": 0
             }
           }
         },
@@ -18327,6 +19062,11 @@
               "tp": 0,
               "fp": 3,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 0,
+              "fp": 0,
+              "fn": 2
             }
           }
         },
@@ -18452,6 +19192,11 @@
               "tp": 4,
               "fp": 5,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 3,
+              "fp": 3,
+              "fn": 2
             }
           }
         },
@@ -18577,6 +19322,11 @@
               "tp": 1,
               "fp": 2,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 2,
+              "fp": 4,
+              "fn": 0
             }
           }
         },
@@ -18702,6 +19452,11 @@
               "tp": 3,
               "fp": 2,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 0,
+              "fn": 1
             }
           }
         },
@@ -18827,6 +19582,11 @@
               "tp": 2,
               "fp": 1,
               "fn": 2
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 1,
+              "fn": 0
             }
           }
         },
@@ -18952,6 +19712,11 @@
               "tp": 2,
               "fp": 2,
               "fn": 3
+            },
+            "cloudaeye": {
+              "tp": 4,
+              "fp": 4,
+              "fn": 1
             }
           }
         },
@@ -19077,6 +19842,11 @@
               "tp": 1,
               "fp": 6,
               "fn": 1
+            },
+            "cloudaeye": {
+              "tp": 1,
+              "fp": 3,
+              "fn": 1
             }
           }
         }
@@ -19087,6 +19857,7 @@
         "bugbot",
         "claude",
         "claude-code",
+        "cloudaeye",
         "codeant-v2",
         "coderabbit",
         "copilot",
@@ -19172,49 +19943,76 @@
         ]
       },
       "overall_metrics": {
-        "greptile-v4-1": {
-          "precision": 34.6,
-          "recall": 46.0,
-          "f1": 39.5,
-          "tp": 63,
-          "fp": 119,
-          "fn": 74,
+        "graphite": {
+          "precision": 80.0,
+          "recall": 8.8,
+          "f1": 15.8,
+          "tp": 12,
+          "fp": 3,
+          "fn": 125,
           "num_prs": 50
         },
-        "copilot": {
-          "precision": 24.7,
-          "recall": 52.6,
-          "f1": 33.6,
-          "tp": 72,
-          "fp": 220,
-          "fn": 65,
+        "devin": {
+          "precision": 46.4,
+          "recall": 37.2,
+          "f1": 41.3,
+          "tp": 51,
+          "fp": 59,
+          "fn": 86,
           "num_prs": 50
         },
-        "propel": {
-          "precision": 45.0,
-          "recall": 36.5,
-          "f1": 40.3,
-          "tp": 50,
-          "fp": 61,
-          "fn": 87,
+        "qodo-extended": {
+          "precision": 31.4,
+          "recall": 59.9,
+          "f1": 41.2,
+          "tp": 82,
+          "fp": 179,
+          "fn": 55,
           "num_prs": 50
         },
-        "kg": {
-          "precision": 51.1,
-          "recall": 16.8,
-          "f1": 25.3,
-          "tp": 23,
-          "fp": 22,
-          "fn": 114,
+        "sourcery": {
+          "precision": 29.8,
+          "recall": 53.3,
+          "f1": 38.2,
+          "tp": 73,
+          "fp": 172,
+          "fn": 64,
           "num_prs": 50
         },
-        "gemini": {
-          "precision": 26.3,
-          "recall": 33.6,
-          "f1": 29.5,
-          "tp": 46,
-          "fp": 129,
-          "fn": 91,
+        "kodus-v2": {
+          "precision": 35.9,
+          "recall": 34.3,
+          "f1": 35.1,
+          "tp": 47,
+          "fp": 84,
+          "fn": 90,
+          "num_prs": 50
+        },
+        "baz": {
+          "precision": 39.5,
+          "recall": 34.3,
+          "f1": 36.7,
+          "tp": 47,
+          "fp": 72,
+          "fn": 90,
+          "num_prs": 50
+        },
+        "cubic-v2": {
+          "precision": 53.6,
+          "recall": 65.7,
+          "f1": 59.0,
+          "tp": 90,
+          "fp": 78,
+          "fn": 47,
+          "num_prs": 50
+        },
+        "augment": {
+          "precision": 41.9,
+          "recall": 60.6,
+          "f1": 49.6,
+          "tp": 83,
+          "fp": 115,
+          "fn": 54,
           "num_prs": 50
         },
         "qodo-extended-v2": {
@@ -19226,6 +20024,33 @@
           "fn": 59,
           "num_prs": 50
         },
+        "propel": {
+          "precision": 45.0,
+          "recall": 36.5,
+          "f1": 40.3,
+          "tp": 50,
+          "fp": 61,
+          "fn": 87,
+          "num_prs": 50
+        },
+        "bugbot": {
+          "precision": 43.2,
+          "recall": 43.8,
+          "f1": 43.5,
+          "tp": 60,
+          "fp": 79,
+          "fn": 77,
+          "num_prs": 50
+        },
+        "greptile-v4-1": {
+          "precision": 34.6,
+          "recall": 46.0,
+          "f1": 39.5,
+          "tp": 63,
+          "fp": 119,
+          "fn": 74,
+          "num_prs": 50
+        },
         "qodo-v2": {
           "precision": 36.2,
           "recall": 56.2,
@@ -19235,13 +20060,22 @@
           "fn": 60,
           "num_prs": 50
         },
-        "kodus-v2": {
-          "precision": 35.9,
-          "recall": 34.3,
-          "f1": 35.1,
-          "tp": 47,
-          "fp": 84,
-          "fn": 90,
+        "claude-code": {
+          "precision": 27.4,
+          "recall": 41.6,
+          "f1": 33.0,
+          "tp": 57,
+          "fp": 151,
+          "fn": 80,
+          "num_prs": 50
+        },
+        "gemini": {
+          "precision": 26.3,
+          "recall": 33.6,
+          "f1": 29.5,
+          "tp": 46,
+          "fp": 129,
+          "fn": 91,
           "num_prs": 50
         },
         "coderabbit": {
@@ -19262,51 +20096,6 @@
           "fn": 81,
           "num_prs": 50
         },
-        "sourcery": {
-          "precision": 29.8,
-          "recall": 53.3,
-          "f1": 38.2,
-          "tp": 73,
-          "fp": 172,
-          "fn": 64,
-          "num_prs": 50
-        },
-        "augment": {
-          "precision": 41.9,
-          "recall": 60.6,
-          "f1": 49.6,
-          "tp": 83,
-          "fp": 115,
-          "fn": 54,
-          "num_prs": 50
-        },
-        "codeant-v2": {
-          "precision": 25.7,
-          "recall": 34.3,
-          "f1": 29.4,
-          "tp": 47,
-          "fp": 136,
-          "fn": 90,
-          "num_prs": 50
-        },
-        "qodo-extended": {
-          "precision": 31.4,
-          "recall": 59.9,
-          "f1": 41.2,
-          "tp": 82,
-          "fp": 179,
-          "fn": 55,
-          "num_prs": 50
-        },
-        "devin": {
-          "precision": 46.4,
-          "recall": 37.2,
-          "f1": 41.3,
-          "tp": 51,
-          "fp": 59,
-          "fn": 86,
-          "num_prs": 50
-        },
         "propel-v2": {
           "precision": 36.6,
           "recall": 46.0,
@@ -19325,49 +20114,40 @@
           "fn": 86,
           "num_prs": 50
         },
-        "bugbot": {
-          "precision": 43.2,
-          "recall": 43.8,
-          "f1": 43.5,
-          "tp": 60,
-          "fp": 79,
-          "fn": 77,
-          "num_prs": 50
-        },
-        "claude-code": {
-          "precision": 27.4,
-          "recall": 41.6,
-          "f1": 33.0,
-          "tp": 57,
-          "fp": 151,
-          "fn": 80,
+        "cloudaeye": {
+          "precision": 57.6,
+          "recall": 71.5,
+          "f1": 63.8,
+          "tp": 98,
+          "fp": 72,
+          "fn": 39,
           "num_prs": 50
         },
-        "graphite": {
-          "precision": 80.0,
-          "recall": 8.8,
-          "f1": 15.8,
-          "tp": 12,
-          "fp": 3,
-          "fn": 125,
+        "codeant-v2": {
+          "precision": 25.7,
+          "recall": 34.3,
+          "f1": 29.4,
+          "tp": 47,
+          "fp": 136,
+          "fn": 90,
           "num_prs": 50
         },
-        "cubic-v2": {
-          "precision": 53.6,
-          "recall": 65.7,
-          "f1": 59.0,
-          "tp": 90,
-          "fp": 78,
-          "fn": 47,
+        "kg": {
+          "precision": 51.1,
+          "recall": 16.8,
+          "f1": 25.3,
+          "tp": 23,
+          "fp": 22,
+          "fn": 114,
           "num_prs": 50
         },
-        "baz": {
-          "precision": 39.5,
-          "recall": 34.3,
-          "f1": 36.7,
-          "tp": 47,
-          "fp": 72,
-          "fn": 90,
+        "copilot": {
+          "precision": 24.7,
+          "recall": 52.6,
+          "f1": 33.6,
+          "tp": 72,
+          "fp": 220,
+          "fn": 65,
           "num_prs": 50
         }
       }
@@ -19384,6 +20164,23 @@
       "best_tool": "graphite",
       "best_score": 100.0
     },
+    {
+      "id": "tool_claude_language_python_risk_medium",
+      "label": "Python + Medium Risk (Precision)",
+      "filters": {
+        "language": [
+          "Python"
+        ],
+        "risk": [
+          "medium"
+        ]
+      },
+      "description": "Python codebases with dynamic typing. Moderate user impact.",
+      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "best_tool": "claude",
+      "best_score": 100.0,
+      "sort": "precision"
+    },
     {
       "id": "tool_kodus-v2_domain_concurrency",
       "label": "Best for Concurrency (Precision)",
@@ -19429,6 +20226,36 @@
       "best_score": 100.0,
       "sort": "precision"
     },
+    {
+      "id": "tool_coderabbit_language_typescript_domain_scheduling",
+      "label": "Typescript + Scheduling (Recall)",
+      "filters": {
+        "language": [
+          "TypeScript"
+        ],
+        "domain": [
+          "scheduling"
+        ]
+      },
+      "description": "TypeScript codebases with frontend patterns. Task scheduling.",
+      "best_model": "anthropic_claude-sonnet-4-5-20250929",
+      "best_tool": "coderabbit",
+      "best_score": 87.5,
+      "sort": "recall"
+    },
+    {
+      "id": "change_type_performance",
+      "label": "Best for Performance Optimization",
+      "filters": {
+        "change_type": [
+          "performance"
+        ]
+      },
+      "description": "Performance optimization changes",
+      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "best_tool": "cloudaeye",
+      "best_score": 82.4
+    },
     {
       "id": "tool_copilot_change_type_bug_fix",
       "label": "Best for Bug Fixes (Recall)",
@@ -19444,17 +20271,20 @@
       "sort": "recall"
     },
     {
-      "id": "change_type_performance",
-      "label": "Best for Performance Optimization",
+      "id": "go_small",
+      "label": "Best for Small Go PRs",
       "filters": {
-        "change_type": [
-          "performance"
+        "language": [
+          "Go"
+        ],
+        "pr_size": [
+          "small"
         ]
       },
-      "description": "Performance optimization changes",
+      "description": "Go codebases with concurrency patterns. Small PRs with 1-2 files, easier to review thoroughly",
       "best_model": "anthropic_claude-opus-4-5-20251101",
       "best_tool": "cubic-v2",
-      "best_score": 81.1
+      "best_score": 77.4
     },
     {
       "id": "tool_baz_language_java_domain_authentication",
@@ -19472,19 +20302,6 @@
       "best_tool": "baz",
       "best_score": 76.9
     },
-    {
-      "id": "domain_caching",
-      "label": "Best for Caching",
-      "filters": {
-        "domain": [
-          "caching"
-        ]
-      },
-      "description": "Cache and memoization.",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 76.2
-    },
     {
       "id": "tool_codeant-v2_pr_size_small_change_type_performance",
       "label": "Small PRs + Performance Optimization (Precision)",
@@ -19497,7 +20314,7 @@
         ]
       },
       "description": "Small PRs with 1-2 files, easier to review thoroughly Performance optimization changes",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "best_model": "openai_gpt-5.2",
       "best_tool": "codeant-v2",
       "best_score": 75.0,
       "sort": "precision"
@@ -19518,6 +20335,19 @@
       "best_tool": "devin",
       "best_score": 72.7
     },
+    {
+      "id": "change_type_bug_fix",
+      "label": "Best for Bug Fixes",
+      "filters": {
+        "change_type": [
+          "bug_fix"
+        ]
+      },
+      "description": "Bug fixes and issue resolution",
+      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "best_tool": "qodo-extended-v2",
+      "best_score": 71.8
+    },
     {
       "id": "tool_sourcery_language_typescript_concern_correctness",
       "label": "Typescript + Correctness",
@@ -19534,6 +20364,23 @@
       "best_tool": "sourcery",
       "best_score": 71.8
     },
+    {
+      "id": "tool_gemini_language_ruby_pr_size_medium",
+      "label": "Ruby + Medium PRs (Recall)",
+      "filters": {
+        "language": [
+          "Ruby"
+        ],
+        "pr_size": [
+          "medium"
+        ]
+      },
+      "description": "Ruby codebases with Rails patterns. Medium PRs with 3-5 files, typical feature development",
+      "best_model": "anthropic_claude-sonnet-4-5-20250929",
+      "best_tool": "gemini",
+      "best_score": 69.2,
+      "sort": "recall"
+    },
     {
       "id": "domain_ui",
       "label": "Best for Ui",
@@ -19547,22 +20394,6 @@
       "best_tool": "bugbot",
       "best_score": 66.7
     },
-    {
-      "id": "tool_macroscope_context_file_concern_correctness",
-      "label": "File Context + Correctness",
-      "filters": {
-        "context": [
-          "file"
-        ],
-        "concern": [
-          "correctness"
-        ]
-      },
-      "description": "Requires full file understanding. Logical correctness and expected behavior",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "macroscope",
-      "best_score": 66.7
-    },
     {
       "id": "tool_qodo-extended_change_type_bug_fix_context_cross_file",
       "label": "Bug Fixes + Cross-File",
@@ -19580,36 +20411,17 @@
       "best_score": 66.7
     },
     {
-      "id": "tool_qodo-v2_domain_authentication_concern_correctness",
-      "label": "Authentication + Correctness",
+      "id": "tool_qodo-v2_concern_reliability",
+      "label": "Best for Reliability",
       "filters": {
-        "domain": [
-          "authentication"
-        ],
         "concern": [
-          "correctness"
+          "reliability"
         ]
       },
-      "description": "Auth and access control. Logical correctness and expected behavior",
+      "description": "Error handling and system stability",
       "best_model": "anthropic_claude-opus-4-5-20251101",
       "best_tool": "qodo-v2",
-      "best_score": 62.3
-    },
-    {
-      "id": "tool_gemini_pr_size_medium_context_file",
-      "label": "Medium PRs + File Context",
-      "filters": {
-        "pr_size": [
-          "medium"
-        ],
-        "context": [
-          "file"
-        ]
-      },
-      "description": "Medium PRs with 3-5 files, typical feature development Requires full file understanding.",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "gemini",
-      "best_score": 61.1
+      "best_score": 60.9
     },
     {
       "id": "tool_augment_domain_concurrency",
@@ -19625,23 +20437,23 @@
       "best_score": 60.6
     },
     {
-      "id": "tool_claude_difficulty_moderate_context_file",
-      "label": "Moderate Bugs + File Context",
+      "id": "tool_propel_language_ruby_concern_correctness",
+      "label": "Ruby + Correctness",
       "filters": {
-        "difficulty": [
-          "moderate"
+        "language": [
+          "Ruby"
         ],
-        "context": [
-          "file"
+        "concern": [
+          "correctness"
         ]
       },
-      "description": "Requires careful reading. Requires full file understanding.",
-      "best_model": "anthropic_claude-sonnet-4-5-20250929",
-      "best_tool": "claude",
-      "best_score": 59.3
+      "description": "Ruby codebases with Rails patterns. Logical correctness and expected behavior",
+      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "best_tool": "propel",
+      "best_score": 57.9
     },
     {
-      "id": "tool_propel_language_ruby_concern_correctness",
+      "id": "tool_macroscope_language_ruby_concern_correctness",
       "label": "Ruby + Correctness",
       "filters": {
         "language": [
@@ -19653,53 +20465,47 @@
       },
       "description": "Ruby codebases with Rails patterns. Logical correctness and expected behavior",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "propel",
+      "best_tool": "macroscope",
       "best_score": 57.9
     },
     {
-      "id": "tool_coderabbit_difficulty_moderate_risk_medium",
-      "label": "Moderate Bugs + Medium Risk",
+      "id": "domain_caching",
+      "label": "Best for Caching",
       "filters": {
-        "difficulty": [
-          "moderate"
-        ],
-        "risk": [
-          "medium"
+        "domain": [
+          "caching"
         ]
       },
-      "description": "Requires careful reading. Moderate user impact.",
+      "description": "Cache and memoization.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "coderabbit",
-      "best_score": 47.5
+      "best_tool": "cloudaeye",
+      "best_score": 76.2
     },
     {
-      "id": "go_small",
-      "label": "Best for Small Go PRs",
+      "id": "language_go",
+      "label": "Best for Go",
       "filters": {
         "language": [
           "Go"
-        ],
-        "pr_size": [
-          "small"
         ]
       },
-      "description": "Go codebases with concurrency patterns. Small PRs with 1-2 files, easier to review thoroughly",
+      "description": "Go codebases with concurrency patterns.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 77.4
+      "best_tool": "cloudaeye",
+      "best_score": 76.0
     },
     {
-      "id": "language_go",
-      "label": "Best for Go",
+      "id": "context_file",
+      "label": "Best for File Context",
       "filters": {
-        "language": [
-          "Go"
+        "context": [
+          "file"
         ]
       },
-      "description": "Go codebases with concurrency patterns.",
+      "description": "Requires full file understanding.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 75.5
+      "best_tool": "cloudaeye",
+      "best_score": 75.4
     },
     {
       "id": "pr_size_small",
@@ -19715,46 +20521,47 @@
       "best_score": 75.3
     },
     {
-      "id": "change_type_bug_fix",
-      "label": "Best for Bug Fixes",
+      "id": "domain_scheduling",
+      "label": "Best for Scheduling",
       "filters": {
-        "change_type": [
-          "bug_fix"
+        "domain": [
+          "scheduling"
         ]
       },
-      "description": "Bug fixes and issue resolution",
+      "description": "Task scheduling.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 71.8
+      "best_tool": "cloudaeye",
+      "best_score": 75.0
     },
     {
-      "id": "python_medium",
-      "label": "Best for Medium Python PRs",
+      "id": "concern_security",
+      "label": "Best for Security",
       "filters": {
-        "language": [
-          "Python"
-        ],
-        "pr_size": [
-          "medium"
+        "concern": [
+          "security"
         ]
       },
-      "description": "Python codebases with dynamic typing. Medium PRs with 3-5 files, typical feature development",
+      "description": "Security vulnerabilities and attack vectors",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 71.0
+      "best_tool": "cloudaeye",
+      "best_score": 74.4
     },
     {
-      "id": "language_python",
-      "label": "Best for Python",
+      "id": "security_critical",
+      "label": "Security Critical",
       "filters": {
-        "language": [
-          "Python"
+        "concern": [
+          "security"
+        ],
+        "risk": [
+          "high",
+          "critical"
         ]
       },
-      "description": "Python codebases with dynamic typing.",
+      "description": "Security vulnerabilities and attack vectors Significant impact, potential data loss.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 70.6
+      "best_tool": "cloudaeye",
+      "best_score": 74.4
     },
     {
       "id": "risk_high",
@@ -19766,21 +20573,21 @@
       },
       "description": "Significant impact, potential data loss.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 70.2
+      "best_tool": "cloudaeye",
+      "best_score": 72.9
     },
     {
-      "id": "risk_critical",
-      "label": "Best for Critical Risk",
+      "id": "language_python",
+      "label": "Best for Python",
       "filters": {
-        "risk": [
-          "critical"
+        "language": [
+          "Python"
         ]
       },
-      "description": "Critical security or data corruption risk.",
-      "best_model": "anthropic_claude-sonnet-4-5-20250929",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 69.8
+      "description": "Python codebases with dynamic typing.",
+      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "best_tool": "cloudaeye",
+      "best_score": 72.7
     },
     {
       "id": "high_recall",
@@ -19789,90 +20596,50 @@
       "sort": "recall",
       "description": "Tools ranked by recall - catches more issues, may have more noise",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 68.6
+      "best_tool": "cloudaeye",
+      "best_score": 72.3
     },
     {
-      "id": "domain_scheduling",
-      "label": "Best for Scheduling",
-      "filters": {
-        "domain": [
-          "scheduling"
-        ]
-      },
-      "description": "Task scheduling.",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 68.4
-    },
-    {
-      "id": "complexity_complex",
-      "label": "Best for Complex Code",
-      "filters": {
-        "complexity": [
-          "complex"
-        ]
-      },
-      "description": "Deep logic and dependencies.",
-      "best_model": "anthropic_claude-sonnet-4-5-20250929",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 67.7
-    },
-    {
-      "id": "complex_subtle",
-      "label": "Complex & Subtle",
+      "id": "python_medium",
+      "label": "Best for Medium Python PRs",
       "filters": {
-        "complexity": [
-          "complex"
+        "language": [
+          "Python"
         ],
-        "difficulty": [
-          "subtle",
-          "very_subtle"
-        ]
-      },
-      "description": "Deep logic and dependencies. Non-obvious, needs domain knowledge.",
-      "best_model": "anthropic_claude-sonnet-4-5-20250929",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 67.7
-    },
-    {
-      "id": "context_file",
-      "label": "Best for File Context",
-      "filters": {
-        "context": [
-          "file"
+        "pr_size": [
+          "medium"
         ]
       },
-      "description": "Requires full file understanding.",
+      "description": "Python codebases with dynamic typing. Medium PRs with 3-5 files, typical feature development",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 67.6
+      "best_tool": "cloudaeye",
+      "best_score": 72.2
     },
     {
-      "id": "language_java",
-      "label": "Best for Java",
+      "id": "domain_authentication",
+      "label": "Best for Authentication",
       "filters": {
-        "language": [
-          "Java"
+        "domain": [
+          "authentication"
         ]
       },
-      "description": "Java codebases with OOP patterns.",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 66.7
+      "description": "Auth and access control.",
+      "best_model": "anthropic_claude-sonnet-4-5-20250929",
+      "best_tool": "cloudaeye",
+      "best_score": 72.0
     },
     {
-      "id": "difficulty_subtle",
-      "label": "Best for Subtle Bugs",
+      "id": "difficulty_moderate",
+      "label": "Best for Moderate Bugs",
       "filters": {
         "difficulty": [
-          "subtle"
+          "moderate"
         ]
       },
-      "description": "Non-obvious, needs domain knowledge.",
+      "description": "Requires careful reading.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 64.1
+      "best_tool": "cloudaeye",
+      "best_score": 72.0
     },
     {
       "id": "high_risk_auth",
@@ -19888,90 +20655,106 @@
       },
       "description": "Significant impact, potential data loss. Auth and access control.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 63.2
+      "best_tool": "cloudaeye",
+      "best_score": 71.0
     },
     {
-      "id": "concern_correctness",
-      "label": "Best for Correctness",
+      "id": "risk_critical",
+      "label": "Best for Critical Risk",
       "filters": {
-        "concern": [
-          "correctness"
+        "risk": [
+          "critical"
         ]
       },
-      "description": "Logical correctness and expected behavior",
+      "description": "Critical security or data corruption risk.",
       "best_model": "anthropic_claude-sonnet-4-5-20250929",
-      "best_tool": "cubic-v2",
-      "best_score": 62.8
+      "best_tool": "qodo-extended-v2",
+      "best_score": 69.8
     },
     {
-      "id": "concern_reliability",
-      "label": "Best for Reliability",
+      "id": "java_medium",
+      "label": "Best for Medium Java PRs",
       "filters": {
-        "concern": [
-          "reliability"
+        "language": [
+          "Java"
+        ],
+        "pr_size": [
+          "medium"
         ]
       },
-      "description": "Error handling and system stability",
-      "best_model": "openai_gpt-5.2",
-      "best_tool": "cubic-v2",
-      "best_score": 62.5
+      "description": "Java codebases with OOP patterns. Medium PRs with 3-5 files, typical feature development",
+      "best_model": "anthropic_claude-sonnet-4-5-20250929",
+      "best_tool": "cloudaeye",
+      "best_score": 68.6
     },
     {
-      "id": "context_cross_file",
-      "label": "Best for Cross-File",
+      "id": "change_type_feature",
+      "label": "Best for Features",
       "filters": {
-        "context": [
-          "cross_file"
+        "change_type": [
+          "feature"
         ]
       },
-      "description": "Spans multiple files.",
+      "description": "New feature implementation",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 62.0
+      "best_tool": "cloudaeye",
+      "best_score": 68.3
     },
     {
-      "id": "concern_security",
-      "label": "Best for Security",
+      "id": "complexity_moderate",
+      "label": "Best for Moderate Code",
       "filters": {
-        "concern": [
-          "security"
+        "complexity": [
+          "moderate"
         ]
       },
-      "description": "Security vulnerabilities and attack vectors",
+      "description": "Some abstraction.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 61.9
+      "best_tool": "cloudaeye",
+      "best_score": 68.2
     },
     {
-      "id": "security_critical",
-      "label": "Security Critical",
+      "id": "complexity_complex",
+      "label": "Best for Complex Code",
       "filters": {
-        "concern": [
-          "security"
-        ],
-        "risk": [
-          "high",
-          "critical"
+        "complexity": [
+          "complex"
         ]
       },
-      "description": "Security vulnerabilities and attack vectors Significant impact, potential data loss.",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 61.9
+      "description": "Deep logic and dependencies.",
+      "best_model": "anthropic_claude-sonnet-4-5-20250929",
+      "best_tool": "qodo-extended-v2",
+      "best_score": 67.7
     },
     {
-      "id": "complexity_moderate",
-      "label": "Best for Moderate Code",
+      "id": "complex_subtle",
+      "label": "Complex & Subtle",
       "filters": {
         "complexity": [
-          "moderate"
+          "complex"
+        ],
+        "difficulty": [
+          "subtle",
+          "very_subtle"
         ]
       },
-      "description": "Some abstraction.",
+      "description": "Deep logic and dependencies. Non-obvious, needs domain knowledge.",
+      "best_model": "anthropic_claude-sonnet-4-5-20250929",
+      "best_tool": "qodo-extended-v2",
+      "best_score": 67.7
+    },
+    {
+      "id": "concern_correctness",
+      "label": "Best for Correctness",
+      "filters": {
+        "concern": [
+          "correctness"
+        ]
+      },
+      "description": "Logical correctness and expected behavior",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 61.8
+      "best_tool": "cloudaeye",
+      "best_score": 67.3
     },
     {
       "id": "high_f1",
@@ -19980,102 +20763,86 @@
       "sort": "f1",
       "description": "Tools ranked by F1 score - balanced precision and recall",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 61.8
+      "best_tool": "cloudaeye",
+      "best_score": 66.9
     },
     {
-      "id": "language_typescript",
-      "label": "Best for Typescript",
+      "id": "language_java",
+      "label": "Best for Java",
       "filters": {
         "language": [
-          "TypeScript"
+          "Java"
         ]
       },
-      "description": "TypeScript codebases with frontend patterns.",
+      "description": "Java codebases with OOP patterns.",
       "best_model": "anthropic_claude-sonnet-4-5-20250929",
-      "best_tool": "devin",
-      "best_score": 61.8
-    },
-    {
-      "id": "domain_concurrency",
-      "label": "Best for Concurrency",
-      "filters": {
-        "domain": [
-          "concurrency"
-        ]
-      },
-      "description": "Threading and async operations.",
-      "best_model": "openai_gpt-5.2",
-      "best_tool": "cubic-v2",
-      "best_score": 61.1
+      "best_tool": "cloudaeye",
+      "best_score": 66.7
     },
     {
-      "id": "java_medium",
-      "label": "Best for Medium Java PRs",
+      "id": "pr_size_large",
+      "label": "Best for Large PRs",
       "filters": {
-        "language": [
-          "Java"
-        ],
         "pr_size": [
-          "medium"
+          "large"
         ]
       },
-      "description": "Java codebases with OOP patterns. Medium PRs with 3-5 files, typical feature development",
+      "description": "Large PRs with 6+ files, complex changes requiring careful review",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 61.1
+      "best_tool": "cloudaeye",
+      "best_score": 66.7
     },
     {
-      "id": "language_ruby",
-      "label": "Best for Ruby",
+      "id": "language_typescript",
+      "label": "Best for Typescript",
       "filters": {
         "language": [
-          "Ruby"
+          "TypeScript"
         ]
       },
-      "description": "Ruby codebases with Rails patterns.",
-      "best_model": "anthropic_claude-sonnet-4-5-20250929",
-      "best_tool": "cubic-v2",
-      "best_score": 61.0
+      "description": "TypeScript codebases with frontend patterns.",
+      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "best_tool": "cloudaeye",
+      "best_score": 65.7
     },
     {
-      "id": "pr_size_medium",
-      "label": "Best for Medium PRs",
+      "id": "difficulty_subtle",
+      "label": "Best for Subtle Bugs",
       "filters": {
-        "pr_size": [
-          "medium"
+        "difficulty": [
+          "subtle"
         ]
       },
-      "description": "Medium PRs with 3-5 files, typical feature development",
+      "description": "Non-obvious, needs domain knowledge.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "qodo-extended-v2",
-      "best_score": 60.4
+      "best_tool": "cloudaeye",
+      "best_score": 65.5
     },
     {
-      "id": "domain_authentication",
-      "label": "Best for Authentication",
+      "id": "context_cross_file",
+      "label": "Best for Cross-File",
       "filters": {
-        "domain": [
-          "authentication"
+        "context": [
+          "cross_file"
         ]
       },
-      "description": "Auth and access control.",
+      "description": "Spans multiple files.",
       "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 60.3
+      "best_tool": "cloudaeye",
+      "best_score": 64.0
     },
     {
-      "id": "change_type_feature",
-      "label": "Best for Features",
+      "id": "pr_size_medium",
+      "label": "Best for Medium PRs",
       "filters": {
-        "change_type": [
-          "feature"
+        "pr_size": [
+          "medium"
         ]
       },
-      "description": "New feature implementation",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
-      "best_tool": "cubic-v2",
-      "best_score": 59.9
+      "description": "Medium PRs with 3-5 files, typical feature development",
+      "best_model": "anthropic_claude-sonnet-4-5-20250929",
+      "best_tool": "cloudaeye",
+      "best_score": 63.8
     },
     {
       "id": "risk_medium",
@@ -20087,34 +20854,47 @@
       },
       "description": "Moderate user impact.",
       "best_model": "anthropic_claude-sonnet-4-5-20250929",
+      "best_tool": "cloudaeye",
+      "best_score": 63.6
+    },
+    {
+      "id": "concern_reliability",
+      "label": "Best for Reliability",
+      "filters": {
+        "concern": [
+          "reliability"
+        ]
+      },
+      "description": "Error handling and system stability",
+      "best_model": "openai_gpt-5.2",
       "best_tool": "cubic-v2",
-      "best_score": 58.8
+      "best_score": 62.5
     },
     {
-      "id": "pr_size_large",
-      "label": "Best for Large PRs",
+      "id": "domain_concurrency",
+      "label": "Best for Concurrency",
       "filters": {
-        "pr_size": [
-          "large"
+        "domain": [
+          "concurrency"
         ]
       },
-      "description": "Large PRs with 6+ files, complex changes requiring careful review",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "description": "Threading and async operations.",
+      "best_model": "openai_gpt-5.2",
       "best_tool": "cubic-v2",
-      "best_score": 58.3
+      "best_score": 61.1
     },
     {
-      "id": "difficulty_moderate",
-      "label": "Best for Moderate Bugs",
+      "id": "language_ruby",
+      "label": "Best for Ruby",
       "filters": {
-        "difficulty": [
-          "moderate"
+        "language": [
+          "Ruby"
         ]
       },
-      "description": "Requires careful reading.",
-      "best_model": "anthropic_claude-opus-4-5-20251101",
+      "description": "Ruby codebases with Rails patterns.",
+      "best_model": "anthropic_claude-sonnet-4-5-20250929",
       "best_tool": "cubic-v2",
-      "best_score": 52.8
+      "best_score": 61.0
     }
   ],
   "tool_display_names": {
@@ -20143,7 +20923,8 @@
     "qodo-v2": "Qodo v2",
     "qodo-extended-v2": "Qodo Extended",
     "macroscope": "Macroscope",
-    "cubic-v2": "Cubic v2"
+    "cubic-v2": "Cubic v2",
+    "cloudaeye": "CloudAEye"
   },
   "tool_colors": {
     "graphite": "#6366f1",
@@ -20172,7 +20953,8 @@
     "qodo-v2": "#7c3aed",
     "qodo-extended-v2": "#6d28d9",
     "macroscope": "#0891b2",
-    "cubic-v2": "#c026d3"
+    "cubic-v2": "#c026d3",
+    "cloudaeye": "#38bdf8"
   },
   "default_model": "anthropic_claude-opus-4-5-20251101",
   "min_prs_threshold": 5
diff --git a/offline/analysis/benchmark_dashboard.py b/offline/analysis/benchmark_dashboard.py
index 307b10b..7278220 100644
--- a/offline/analysis/benchmark_dashboard.py
+++ b/offline/analysis/benchmark_dashboard.py
@@ -69,6 +69,7 @@ def _is_hidden(tool: str) -> bool:
     "qodo-extended-v2": "Qodo Extended",
     "macroscope": "Macroscope",
     "cubic-v2": "Cubic v2",
+    "cloudaeye": "CloudAEye",
 }
 
 TOOL_COLORS = {
@@ -99,6 +100,7 @@ def _is_hidden(tool: str) -> bool:
     "qodo-extended-v2": "#6d28d9",
     "macroscope": "#0891b2",
     "cubic-v2": "#c026d3",
+    "cloudaeye": "#38bdf8",
 }
 
 
diff --git a/offline/results/anthropic_claude-opus-4-5-20251101/candidates.json b/offline/results/anthropic_claude-opus-4-5-20251101/candidates.json
index 730f434..2c84795 100644
--- a/offline/results/anthropic_claude-opus-4-5-20251101/candidates.json
+++ b/offline/results/anthropic_claude-opus-4-5-20251101/candidates.json
@@ -1020,6 +1020,44 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "GroupPermissionsV2.hasPermission() uses resourceStore.findByName(server, groupId) but getGroupIdsWithViewPermission() passes groupResource.getId() causing id-vs-name mismatch for per-group permissions",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GroupPermissions.canManage() narrowed to only check root.hasOneAdminRole(AdminRoles.MANAGE_USERS), breaking group management when default user-management grants exist without direct MANAGE_USERS role",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "RolePermissions role-mapping gated on root.hasOneAdminRole(AdminRoles.MANAGE_USERS) before checkAdminRoles(role), narrowing access compared to broader canManageDefault semantics used elsewhere",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AdminPermissions listener registration guarded by ADMIN_FINE_GRAINED_AUTHZ but V2 paths use ADMIN_FINE_GRAINED_AUTHZ_V2, causing permission cleanup to be skipped when V2 is enabled without V1",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GroupResourceTypeEvaluationTest.testManageAllGroups expects 204 No Content for subgroup creation but GroupResource.addChild() returns 201 Created when creating new child groups",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GroupPermissionsV2.getGroupIdsWithViewPermission() has id/name mismatch when calling hasPermission with groupResource.getId() but hasPermission resolves with resourceStore.findByName treating argument as name",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/33832": {
@@ -2019,6 +2057,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "ASN1Decoder.readLength returns -1 for indefinite-length encoding, but callers like readInteger and readNext pass this negative length directly to read(length), causing NegativeArraySizeException when allocating new byte[length]",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ASN1Decoder.readLength() compares decoded length against total input limit rather than remaining bytes after tag/length bytes are consumed, incorrectly rejecting valid payloads whose content exactly fills remaining bytes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AuthzClientCryptoProvider creates two ASN1Encoder instances and writes r/s to them, but these instances are immediately discarded since the returned bytes are built from separate encoder instances - the standalone calls are dead code with no effect",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/40940": {
@@ -2847,6 +2905,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: reader thread is not joined before assertion - the test sets deletedAll flag and immediately asserts caughtExceptions is empty without waiting for the background thread to complete, allowing late exceptions to be appended after the assertion",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1": {
@@ -3673,6 +3739,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "isConditionalPasskeysEnabled(context.getUser()) requires user != null, causing authenticate/challenge to skip webauthnAuth.fillContextForm(context) on the initial login page before a user is selected, breaking conditional passkey UI",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In the failed-login error path, unknown-user attempts leave context.getUser() null, so the passkey UI setup via webauthnAuth.fillContextForm(context) is skipped on retry, causing passkey retry options to disappear",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/93824": {
@@ -4547,6 +4627,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: replacement process starts before prior exits - duplicate flushing can occur when killed process still runs during restart because _ensure_processes_alive kills an unhealthy worker and immediately calls _create_process_for_shards without any wait/join for prior exit",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Leaked processes when join deadline breaks loop early - remaining child processes keep running when timeout expires because the loop breaks entirely once deadline is exhausted, skipping terminate() calls for remaining entries",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Naming mismatch: shard/shards metric tags split dashboards - inconsistent metric tag keys used for same shard-set context ('shard' at line 185 vs 'shards' at line 199)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Timing race: sleep no longer waits - test_basic monkeypatches time.sleep to a no-op but then relies on time.sleep(0.1) to give flusher threads time to process, which contradicts the monkeypatch",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5": {
@@ -5930,6 +6036,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "ValidationError: age=0 bypasses mutual-exclusion check - invalid mixed-spec reports pass validation when age or timestamp is zero because the check uses truthiness instead of presence",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Logic error from mispaired nodestore results - fetch_error_details pairs error_ids with events.values() by position instead of by key, but nodestore.backend.get_multi returns a dict with no positional contract, causing ids to be misassociated with wrong payloads",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "KeyError: update reads wrong validated_data key - BaseDetectorTypeValidator.update reads validated_data.get('detector_type') but the serializer declares field as 'type', causing updates to be ignored and fall back to instance.group_type",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Authorization telemetry recorded before feature check - analytics.record is called before the feature gate check, so denied requests still emit preprod_artifact.api.assemble events",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1": {
@@ -7184,6 +7316,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "AttributeError: organization_context.member.has_global_access is accessed without a None check, causing request handling to break when authenticated via API key or org auth token",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: OptimizedCursorPaginator.get_item_key applies math.floor/math.ceil to datetime values when used with order_by='-datetime', which will fail at runtime",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AssertionError: BasePaginator.get_result allows negative start_offset for previous-page cursors, causing Django QuerySet negative slicing crash",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AssertionError: OptimizedCursorPaginator.get_result explicitly allows negative cursor.offset when enable_advanced_features is true, causing Django QuerySet negative slicing crash",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/97529": {
@@ -7946,6 +8104,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition in TotalDocs: concurrent map access can panic when event watcher indexes while initialization logs total docs, as TotalDocs iterates b.cache with no lock while BuildIndex writes b.cache under b.cacheMu",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Nil pointer panic in History/Origin methods: s.search is dereferenced without nil guard, but search is optional in construction and other methods like Search have nil guards",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition causing duplicate index builds: the full-function cache lock was removed from BuildIndex, so concurrent callers can execute expensive index creation simultaneously for the same key before either stores into b.cache",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/80168": {
@@ -8689,6 +8867,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but only contains 'pass', failing to implement required abstract methods (get_dedupe_value, get_group_key_values, build_occurrence_and_event_data), making it non-instantiable and causing TypeError at runtime",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "New abstract method build_occurrence_and_event_data was added to StatefulDetectorHandler but MetricAlertDetectorHandler does not implement it, causing ABC instantiation failure",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Docstring for function claims it returns a list of DetectorEvaluationResult but the actual return type annotation is dict[DetectorGroupKey, DetectorEvaluationResult]",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/80528": {
@@ -9208,6 +9406,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "In get_monitor_environment_context, the code copies and humanizes config['schedule_type'] into a local config variable, but the returned dict uses monitor_environment.monitor.config instead of the mutated local config, causing the transformation to be dropped",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/77754": {
@@ -9822,6 +10028,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Shared datetime default freezes queued timestamp - the dataclass field default `queued: datetime = timezone.now()` is evaluated once at class definition time, so omitted `queued` values reuse a frozen timestamp",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: datetime in to_dict may break JSON serialization - dataclasses.asdict(self) includes queued as a datetime which is passed directly into Celery task kwargs, causing serialization failure",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Naming typo: test_from_dict_inalid_data should be test_from_dict_invalid_data",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Naming mismatch: test_from_dict_empty_array test name says array but the body passes an empty dict {}",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError enqueueing non-JSON task kwargs - assignment_source.to_dict() contains a datetime in queued field which cannot be JSON-serialized when passed to apply_async kwargs",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/95633": {
@@ -10643,6 +10881,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "close() method sets shutdown_event and joins commit thread before queue_pool.shutdown() drains workers, causing offsets completed during queue shutdown to miss final commit pass",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "test_concurrent_processing_across_groups only checks group presence and completion count but never verifies actual concurrent/parallel execution",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "test_concurrent_processing_different_groups claims to test concurrency but only asserts 4 results were processed without any concurrency-specific observation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Thread-queue-parallel tests use fixed polling/sleep loops (max_wait=50 with time.sleep(0.1)) that will fail intermittently when background processing takes longer than expected",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "test_thread_queue_parallel_error_handling docstring claims errors don't block commits for other messages but body asserts no commits occur for any message",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2": {
@@ -12012,6 +12282,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice - Django QuerySet negative slicing is not supported and will raise AssertionError",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Cursor building crashes when paginator key is datetime or other non-numeric field - get_item_key applies math.floor/ceil and int() to datetime values which will raise TypeError",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Previous-page cursor with negative offset causes crash - new code preserves raw negative offset for cursor.is_prev which flows into queryset slicing, causing AssertionError since Django doesn't support negative slicing",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3": {
@@ -13169,6 +13459,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "hash() cache key breaks invalidation - Python hash values are process-dependent, so cache lookup/set and invalidate_upsampling_cache can compute different keys across workers, breaking cross-process cache reuse/invalidation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Zero sample_rate skipped by falsy guard - using 'if client_sample_rate:' causes valid zero values (0/0.0) to be skipped instead of being propagated to normalized_data['sample_rate']",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/103633": {
@@ -14000,6 +14304,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Test comment says cache entry should 'Allow access to the dashboard' but the inserted fixture stores map[string]bool{\"dashboards:uid:dash1\": false}, creating a contradictory test setup/comment mismatch",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Revoked access persists when previously allowed permission remains cached - getCachedIdentityPermissions only checks permCache presence without freshness/revalidation, allowing stale cached grants to remain effective until TTL expiry",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/67876": {
@@ -14854,6 +15172,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "KeyError risk: integration.metadata[\"sender\"][\"login\"] access is unchecked - Integration.metadata is a JSONField defaulting to {} and doesn't guarantee sender/login keys exist, causing potential KeyError for integrations lacking that metadata",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "CSRF/replay vulnerability: OAuth state is derived from pipeline.signature which is deterministically generated from pipeline view class names via md5_text, making it predictable and reusable rather than a secure per-request CSRF token",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/32918": {
@@ -15430,6 +15762,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Cleanup uses wrong IDP alias - test creates providers with aliases 'idp-alias-<i>' and 'idp-alias-20' but registers cleanup against the unrelated literal 'alias', causing created providers to leak when teardown removes non-existent alias",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/94942": {
@@ -16115,6 +16455,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "LogicError in pkg/expr/reader.go: enableSqlExpressions function returns false on every path due to inverted logic (!h.features.IsEnabledGlobally) and both branches returning false, causing SQL expressions to be unconditionally disabled",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError in pkg/expr/sql_command.go: sql.NewInMemoryDB().QueryFramesInto is called but the method unconditionally returns 'not implemented' error, causing SQL execution to deterministically fail at runtime",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/90939": {
@@ -16561,6 +16915,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: incomplete double-checked locking in pkg/api/webassets/webassets.go:GetWebAssets - duplicate fetches run when concurrent callers observe empty cache before lock acquisition because the code does not re-check entryPointAssetsCache after acquiring the write lock",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Logic error: nil cache overwrites valid entry in pkg/api/webassets/webassets.go:GetWebAssets - cached assets are lost when asset loading fails after a previous successful population because entryPointAssetsCache = result is always assigned, and result can be nil on loader failure",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/80329": {
@@ -17513,6 +17881,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Routine batch progress messages in CleanAnnotations are logged using r.log.Error instead of appropriate log levels (Info/Debug), causing error log flooding during normal control-flow operations",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/90045": {
@@ -18953,6 +19329,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Wrong metrics recorder on Update failure - storage failure metrics break because legacy recorder (recordLegacyDuration) is called instead of recordStorageDuration when Update errors",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Context loses log fields in Delete - structured logging breaks because enriched logger with name/kind/method values is created but plain d.Log is stored into context instead of the enriched log",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Wrong metrics label in Delete success - storage metrics attribution breaks because object name is passed as the third argument instead of options.Kind when recording storage duration",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Wrong metrics recorder in DeleteCollection goroutine - legacy latency/error metrics break because recordStorageDuration is used instead of recordLegacyDuration for the async legacy DeleteCollection path",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/106778": {
@@ -20098,6 +20500,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "ReferenceError: ctx is undefined in useIsGrafanaPromRuleEditable() - the function references ctx variable with no visible binding, causing editability checks to crash when processing GrafanaPromRuleDTO",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "React key prop omission in FilterView.tsx - GrafanaRuleListItem rendered in rules.map() is missing the key prop while sibling branches provide key={key}, causing potential stale row reuse when list order changes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/107534": {
@@ -21764,6 +22180,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition in device limit check: concurrent requests can pass the count check before inserting because there's no transaction or lock around the count-and-insert decision in CreateOrUpdateDevice",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Inconsistent time window basis: updateDevice builds its BETWEEN window from device.UpdatedAt while CreateOrUpdateDevice counts active devices using time.Now().UTC(), causing existing devices to fail updates when their UpdatedAt differs from current time",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Anonymous authentication fails when ErrDeviceLimitReached is returned from device tagging, whereas previously tagging errors were only logged as warnings",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Misleading error: updateDevice returns ErrDeviceLimitReached when RowsAffected()==0, but zero rows can also mean the device doesn't exist or is outside the update time window, not just that the limit was reached",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Anonymous login breaks when TagDevice returns ErrDeviceLimitReached because the new code propagates this error instead of treating it as best-effort like other tagging errors",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9": {
@@ -22451,6 +22899,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError when SiteSetting.default_locale is nil - code calls .to_sym on potentially nil value before .compact can help",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition in lazy locale loading - unsynchronized access to @loaded_locales allows concurrent threads to both observe locale as unloaded and execute load path simultaneously",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "String/Symbol locale mismatch causes same locale to load twice - @loaded_locales.include?(locale) check has no normalization, so 'en' and :en are treated as different keys",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/76186": {
@@ -23011,6 +23479,50 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "TestLogger.FromContext returns a fresh logger instance instead of preserving the receiver's captured state, causing test assertions to break when code logs via FromContext logger",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "FromContext fallback to d.New() when result is not *log.ConcreteLogger drops context-derived attributes instead of preserving them",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware.QueryData dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware.CallResource dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware.CheckHealth dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware.CollectMetrics dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Explicit traceID logging was removed from logger_middleware.go and the new contextual logger middleware does not add traceID to context, breaking request correlation for debugging",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10": {
@@ -24778,6 +25290,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError in before_validation callback when host is nil because sub! is called unconditionally on self.host",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "NoMethodError in update/destroy actions when EmbeddableHost.where(id: params[:id]).first returns nil and the result is dereferenced without nil guard",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Unhandled promise rejection from destroyRecord() - delete flow silently fails when backend destroy request rejects, unlike the save path which has .catch(popupAjaxError)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7": {
@@ -25763,6 +26295,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Light-theme heading lightness changed from 20% to 50% in .topic-list-item h3, causing unexpected heading contrast changes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8": {
@@ -27154,6 +27694,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError in add_members: usernames parameter assumes String type but crashes when Array is passed since Array doesn't implement split method",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Off-by-one error in pagination: totalPages calculation using Math.floor(user_count / limit) + 1 overcounts by one when user_count is exactly divisible by limit, causing an empty extra page",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Ignored Promise rejection in admin_group_route.js: findMembers() called without return, then, or error handling, so AJAX failures propagate as unhandled rejected Promises",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition in member list: removeMember() and addMembers() trigger findMembers() after async AJAX success without sequencing/cancellation, allowing overlapping reloads to resolve out of order and overwrite newer state with stale data",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3": {
@@ -28030,6 +28596,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Stale invalid email rejection: valid addresses stay blocked when server rejected same email once transiently because rejectedEmails cache is never cleared on email edits, retry, or success",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition loses match_count increments: non-atomic read/modify/write on ActiveRecord row where concurrent requests can both read the same prior value and overwrite each other",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5": {
@@ -28828,6 +29408,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Invalid -ms-align-items CSS property in mixins.scss line 121 - this is not a valid legacy IE/Edge flexbox property and will break alignment when the mixin is used",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Wrong -webkit-box-ordinal-group and -moz-box-ordinal-group mapping in mixins.scss lines 125-130 - these 2009 flexbox properties have different semantics than modern order property and are not equivalent one-to-one",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6": {
@@ -29642,6 +30236,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "The include_website_name method should be named include_website_name? to match the serializer's established convention for include-hook predicates, otherwise the guard method will be ignored and website_name will be serialized unconditionally",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4": {
@@ -31723,6 +32325,74 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError: poll_feed crashes when RSS item lacks content field - i.content is nil and .scrub raises NoMethodError with no guard or rescue present",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "RuntimeError: network and parse failures are unhandled in poll_feed - SimpleRSS.parse and open() have no exception handling, causing job failures on unreachable/malformed feeds",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "NoMethodError: nil post dereference on stale embed - TopicEmbed.import passes embed.post to PostRevisor without nil guard, crashes when associated post is missing",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "NoMethodError: topic retrieval crashes when embed_by_username setting is unset - SiteSetting.embed_by_username.downcase called without nil guard",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: comments container may be null - document.getElementById('discourse-comments') result is dereferenced without null check in embed.js",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Security: origin check accepts malicious superstrings - discourseUrl.indexOf(e.origin) === -1 is a substring check that can be bypassed by attacker-controlled origins containing the trusted string",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SyntaxError: invalid ERB block closing - app/views/embed/best.html.erb uses invalid 'end if' syntax instead of 'end'",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Test name/body mismatch - spec says 'raises an error' but only asserts response.should_not be_success, while controller actually raises Discourse::InvalidAccess",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SSRF vulnerability: open-uri fetches unvalidated feed_polling_url - no scheme/host allowlist validation before fetching admin-configurable URL",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SSRF vulnerability: open(url) fetches untrusted URLs in TopicEmbed.import_remote - URL validation only checks host equality but doesn't sanitize before open()",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "XSS vulnerability: unescaped URL interpolated into HTML - url is inserted into href and link text without escaping quotes or HTML characters",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1": {
@@ -32970,6 +33640,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Site upload limit ignored: size validation replaces dynamic per-type site settings with hardcoded 10MB, causing configured limits larger or smaller than 10MB to be ignored",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "413 error message reports wrong max size: hardcoded 10MB in 413 handler makes user-facing 'file too large' message report 10MB even when configured server/site limit differs",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Percentage resize string '80%' passed to OptimizedImage.downsize may break animated GIF downsize path that expects WxH geometry",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "downsize method arity changed by override: later 4-arg definition overrides earlier 5-arg definition, making existing width/height-style callers raise ArgumentError",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2": {
@@ -34056,6 +34752,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError on nil TopicUser unsubscribe - TopicsController#unsubscribe dereferences tu.notification_level immediately after TopicUser.find_by(...) with no nil guard, causing crash when user has no TopicUser row for the topic",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "CSS selector typo breaks previous-discussion styling - class attribute contains '.previous-discussion' with a leading dot instead of 'previous-discussion', so styling/hooks targeting the class will not match",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/22532": {
@@ -35252,6 +35962,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "sed -i '' -E syntax is BSD/macOS-specific and fails on GNU sed (Linux), causing runtime error when updating GOOGLE_WEBHOOK_URL in .env file",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition with shared log file /tmp/tmole.log - concurrent script invocations can overwrite or read each other's session log and derive wrong TUNNEL_URL",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Fixed 10-second timeout (20 iterations \u00d7 0.5s sleep) for tmole startup may be insufficient for slow initialization, causing tunnel setup failure",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/8330": {
@@ -36304,6 +37034,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Dayjs === comparison always false: comparing two freshly created Dayjs objects with === checks object identity and is always false for equal instants, breaking override-day detection when start and end represent the same instant",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Working-hours check ignores slot end: both start and end are computed from slotStartTime while slotEndTime is never used, causing slots that extend past workingHour.endTime to be misclassified as available",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/14943": {
@@ -37007,6 +37751,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: retry tracking breaks when concurrent schedulers update the same reminder - retryCount is updated with reminder.retryCount + 1 based on a previously read value from findMany, not an atomic Prisma increment, causing lost updates",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Logic error: deleteMany removes non-SMS reminders when retryCount exceeds 1 - the OR filter's second branch only checks retryCount > 1 without method: WorkflowMethods.SMS, matching any WorkflowReminder row with retryCount > 1",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/22345": {
@@ -39725,6 +40483,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition in app-credential.ts: concurrent requests can both miss findFirst before create, causing duplicate credential creation due to non-transactional read-then-write and missing unique constraint on (userId, appId) pair",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "parseRefreshTokenResponse.ts returns hardcoded 'refresh_token' string when provider omits refresh_token, which will be persisted and break OAuth refresh flows",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "refreshOAuthTokens.ts returns raw fetch Response in sync branch instead of parsed token payload, causing type mismatch with callers expecting OAuth data with access_token/expiry_date fields",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GoogleCalendarService.ts accesses res?.data on refreshOAuthTokens result, but the function returns a fetch Response which doesn't have a .data property, breaking token field access",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/7232": {
@@ -41033,6 +41817,44 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Unhandled promise errors from reminder deletions - deleteScheduledEmailReminder/deleteScheduledSMSReminder called without await inside forEach, promises not included in Promise.all, causing silent failures in handleCancelBooking.ts",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Unawaited reminder deletions in handleNewBooking.ts - async helpers called in forEach without await, try/catch cannot catch later promise rejections",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Error handling aborts remaining cancellations in scheduleEmailReminders.ts - single try/catch around for loop means if one API cancellation fails, remaining reminders stay scheduled",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError leaves SendGrid batch undeleted - only immediateDelete branch calls SendGrid cancellation, callers without immediateDelete flag only update DB cancelled flag without deleting external SendGrid batch",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Stale reminders remain due to unawaited async deletes in bookings.tsx - fire-and-forget deleteScheduled* calls inside forEach are no longer part of awaited work after Promise.all removal",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Stale DB reminders after removed deleteMany in workflows.tsx - immediateDelete branch cancels SendGrid but returns without deleting WorkflowReminder row, cleanup job only targets cancelled=true reminders",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/14740": {
@@ -42457,6 +43279,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Authorization check in addGuestsHandler uses AND (&&) instead of OR (||) for isTeamAdmin and isTeamOwner, requiring users to have both roles instead of either one to gain access",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Duplicate guest emails within the submitted guests array are not deduplicated before bulk insert, allowing duplicate attendee rows to be created",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Guest state initialized to [''] causes validation failure when dialog opens or resets with no entered emails, as empty string fails email validation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Blacklist check is case-sensitive - submitted guest emails are not normalized to lowercase before comparison against the lowercased blacklist, allowing blocked guests to be added with mixed-case email casing",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/10600": {
@@ -43751,6 +44599,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Naming mismatch in disable endpoint: log message says 'cannot proceed with backup code login' but the code is in the TOTP disable flow, not login handling",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition in backup code validation: concurrent login requests can both read the same backupCodes snapshot, pass validation, and write back without transaction or compare-and-swap, allowing backup code reuse",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Backup code case mismatch: comparison uses credentials.backupCode.replaceAll('-', '') without case normalization, but codes are generated as lowercase hex, so mixed-case user input won't match",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Exported component name 'TwoFactor' in BackupCode.tsx mismatches the file purpose and backup-code-specific UI it renders, causing confusion in stack traces and debugging",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential TypeError from undefined backupCodes.map: code calls body.backupCodes.map() without null/undefined guard after setup response, will crash if backupCodes is missing or null",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/10967": {
@@ -45465,6 +46345,56 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Potential TypeError: createEvent signature mismatch - CalendarManager.ts passes two arguments (calEvent, credential.id) but CalendarService.ts still implements createEvent with only one parameter",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: unguarded first calendar access - EventManager.ts destructures first entry from evt.destinationCalendar and dereferences mainHostDestinationCalendar.integration without optional chaining when destinationCalendar can be null or empty",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError from createEvent arity change - Calendar.d.ts interface requires createEvent(event, credentialId) but CalendarService.ts implementation still declares only one parameter",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError from destinationCalendar shape change - destinationCalendar changed from single object to DestinationCalendar[] | null but EventManager.ts consumer doesn't handle null/empty case properly",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError: impossible calendar lookup fallback - GoogleCalendarService.updateEvent fallback compares cal.externalId against externalCalendarId when externalCalendarId is already falsy, making the find predicate always fail",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Team calendars dropped on booking create - handleNewBooking builds evt.destinationCalendar as array with multiple calendars but createBooking only persists evt.destinationCalendar[0], silently dropping additional calendars",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Recurring deletes skip DB-fetched credential - handleCancelBooking recurring-delete branch iterates only bookingToDelete.user.credentials and ignores the DB-recovered calendarCredential, causing external deletions to be missed",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Redundant optional chaining obscures non-null branch - editLocation.handler.ts uses optional chaining inside ternary true branches where values are already proven to be present",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/8087": {
@@ -46639,6 +47569,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "async forEach in handleCancelBooking.ts causes calendar update failures to go unhandled when async callbacks reject after handler continues",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition in bookings.tsx where external calendar cleanup may be incomplete because forEach does not await async callbacks, making calendar deletion fire-and-forget",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "UnhandledPromiseRejection in packages/app-store/vital/lib/reschedule.ts where async forEach causes deletion failures to escape the surrounding try/catch block",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "UnhandledPromiseRejection in packages/app-store/wipemycalother/lib/reschedule.ts where async forEach causes promise rejections from getCalendar and deleteEvent to escape error handling",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Promise error in bookings.tsx where async forEach cleanup is unawaited, causing calendar deletions to escape error handling when delete/getCalendar rejects",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/37429": {
@@ -47835,6 +48797,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "RuntimeException crashes verification when the derived _en.properties file does not exist - verifySafeHtml() throws RuntimeException on IOException including missing English bundle files with no fallback handling",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Typo in method name 'santizeAnchors' should be 'sanitizeAnchors' - causes confusion when searching for sanitize logic",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/37634": {
@@ -48814,6 +49790,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NullPointerException: In AccessTokenContext.java constructor, line 73 validates grantType twice instead of validating rawTokenId, allowing null rawTokenId to pass validation despite the error message claiming otherwise",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Javadoc mismatch: OAuth2GrantTypeFactory.getShortcut documentation states shortcuts are 'usually like 3-letters' but actual implementations use 2-letter values (ac, cc, pg)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Overly broad exception assertion: Test testIncorrectGrantType catches generic RuntimeException instead of specific IllegalArgumentException, causing test to pass for unrelated runtime failures",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/38446": {
@@ -49721,6 +50717,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoSuchElementException: Optional.get() called without presence check in RecoveryAuthnCodeInputLoginBean constructor - login form rendering crashes when user has no recovery code credential",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Credential removal breaks when reconstructed recovery credential has a different or missing id - getCredentials reconstructs RecoveryAuthnCodesCredentialModel via createFromValues without preserving the stored id from updateCredential",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/36882": {
@@ -50238,6 +51248,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "picocli.exit calls System.exit for non-OK codes outside test launch mode, causing JVM termination when rolling-updates feature is disabled because FEATURE_DISABLED exit code is non-zero (=4)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/36880": {
@@ -51098,6 +52116,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Permission cleanup breaks when V2 is enabled without V1 - cleanup is gated by ADMIN_FINE_GRAINED_AUTHZ (V1 flag) but V2-specific permission logic is added separately, so cleanup path won't run with V2 enabled without V1",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Per-client permission lookup always misses because resource lookup uses server.getId() as owner argument, but per-client resources are not owned by the resource-server id, causing lookup to miss and fall back to type-level 'Clients' resource",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Dead code: private getEvaluationContext() method is never called - all permission-evaluation paths use root.evaluatePermission() directly without this helper",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "IndexOutOfBoundsException risk from fixed client-scope index - test code uses get(1) on default client scopes but only asserts not(empty()) which guarantees size >= 1, not >= 2",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   }
-}
\ No newline at end of file
+}
diff --git a/offline/results/anthropic_claude-opus-4-5-20251101/evaluations.json b/offline/results/anthropic_claude-opus-4-5-20251101/evaluations.json
index 9e2b09c..dd9e35f 100644
--- a/offline/results/anthropic_claude-opus-4-5-20251101/evaluations.json
+++ b/offline/results/anthropic_claude-opus-4-5-20251101/evaluations.json
@@ -2018,6 +2018,49 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37429__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37429__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The method name 'santizeAnchors' should be 'sanitizeAnchors' (missing 'i').",
+          "severity": "Low",
+          "matched_candidate": "Typo in method name 'santizeAnchors' should be 'sanitizeAnchors' - causes confusion when searching for sanitize logic",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment - the typo in the method name 'santizeAnchors' which should be 'sanitizeAnchors' (missing 'i'). Both comments point to the same spelling error in the same method name."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "RuntimeException crashes verification when the derived _en.properties file does not exist - verifySafeHtml() throws RuntimeException on IOException including missing English bundle files with no fallback handling"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The translation is in Italian instead of Lithuanian. This should be translated to Lithuanian to match the file's locale (messages_lt.properties).",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The totpStep1 value uses Traditional Chinese terms in the Simplified Chinese file (zh_CN), which is likely incorrect for this locale. Please verify the locale\u2011appropriate translation.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The anchor sanitization logic has a potential issue where it consumes English matcher groups without proper validation. If the translated text has more anchor tags than the English text, this could lead to incorrect validation results.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 4,
+      "tp": 1,
+      "fp": 1,
+      "fn": 3,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.25,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37429__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37429__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/37634": {
@@ -3940,6 +3983,51 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37634__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37634__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Wrong parameter in null check (grantType vs. rawTokenId)",
+          "severity": "Critical",
+          "matched_candidate": "NullPointerException: In AccessTokenContext.java constructor, line 73 validates grantType twice instead of validating rawTokenId, allowing null rawTokenId to pass validation despite the error message claiming otherwise",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment - that the null check is using the wrong parameter (grantType is being validated twice instead of validating rawTokenId). The candidate provides more detail by specifying the file, line number, and consequence, but it's pointing to the same underlying bug."
+        },
+        {
+          "golden_comment": "Javadoc mentions \"usually like 3-letters shortcut\" but some implementations use 2-letter shortcuts (\"ac\", \"cc\", \"rt\", \"te\", \"pc\", \"ci\", \"ro\"). Consider updating documentation to reflect actual usage pattern.",
+          "severity": "Low",
+          "matched_candidate": "Javadoc mismatch: OAuth2GrantTypeFactory.getShortcut documentation states shortcuts are 'usually like 3-letters' but actual implementations use 2-letter values (ac, cc, pg)",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: the Javadoc documentation mentions '3-letters shortcut' but the actual implementations use 2-letter shortcuts. The golden comment lists examples like 'ac', 'cc', 'rt', 'te', 'pc', 'ci', 'ro' while the candidate lists 'ac', 'cc', 'pg'. Despite slightly different examples, they're pointing to the exact same documentation inconsistency problem."
+        },
+        {
+          "golden_comment": " Catching generic RuntimeException is too broad. The implementation throws IllegalArgumentException specifically - catch that instead for more precise testing.",
+          "severity": "Low",
+          "matched_candidate": "Overly broad exception assertion: Test testIncorrectGrantType catches generic RuntimeException instead of specific IllegalArgumentException, causing test to pass for unrelated runtime failures",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: the test catches a generic RuntimeException when it should catch the specific IllegalArgumentException. Both comments point out that this is too broad and could cause the test to pass for unrelated runtime failures."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "In isAccessTokenId, the substring for the grant shortcut and the equality check look inverted: the grant shortcut occupies indices 4\u20135 (substring(4,6)), and a match should return true (combined with UUID check), not false.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 4,
+      "tp": 3,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.75,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37634__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37634__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/38446": {
@@ -5510,6 +5598,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR38446__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR38446__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Unsafe raw List deserialization without type safety. Calling Optional.get() directly on the Optional returned by RecoveryAuthnCodesUtils.getCredential(user) without checking isPresent() can lead to a NoSuchElementException if the Optional is empty.",
+          "severity": "Medium",
+          "matched_candidate": "NoSuchElementException: Optional.get() called without presence check in RecoveryAuthnCodeInputLoginBean constructor - login form rendering crashes when user has no recovery code credential",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same core problem as the golden comment: calling Optional.get() without checking isPresent() first on the result of RecoveryAuthnCodesUtils.getCredential(user), which can lead to a NoSuchElementException if the Optional is empty. Both comments point to the same unsafe Optional handling bug, just with slightly different wording."
+        },
+        {
+          "golden_comment": "After creating the RecoveryAuthnCodesCredentialModel, consider setting its id from the stored credential (e.g., myUser.recoveryCodes.getId()); otherwise getId() will be null and downstream removal by id (e.g., removeStoredCredentialById in the authenticator flow) may not work.",
+          "severity": "Low",
+          "matched_candidate": "Credential removal breaks when reconstructed recovery credential has a different or missing id - getCredentials reconstructs RecoveryAuthnCodesCredentialModel via createFromValues without preserving the stored id from updateCredential",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same underlying problem as the golden comment. Both point out that when RecoveryAuthnCodesCredentialModel is created/reconstructed via createFromValues, the id from the stored credential is not preserved/set, which will cause downstream removal by id (removeStoredCredentialById) to fail. The candidate mentions 'reconstructed recovery credential has a different or missing id' and 'without preserving the stored id', which is semantically equivalent to the golden comment's concern about 'setting its id from the stored credential' and 'getId() will be null'."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR38446__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR38446__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/36882": {
@@ -6708,6 +6829,32 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR36882__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR36882__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect method call for exit codes. The picocli.exit() method calls System.exit() directly, which is problematic:",
+          "severity": "Medium",
+          "matched_candidate": "picocli.exit calls System.exit for non-OK codes outside test launch mode, causing JVM termination when rolling-updates feature is disabled because FEATURE_DISABLED exit code is non-zero (=4)",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same problem as the golden comment - that picocli.exit() calls System.exit() directly, which is problematic. The candidate provides more specific context about when this occurs (when rolling-updates feature is disabled with FEATURE_DISABLED exit code), but the core issue is the same: the incorrect use of picocli.exit() method that causes System.exit() to be called."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 1,
+      "tp": 1,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR36882__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR36882__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/36880": {
@@ -8421,6 +8568,53 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR36880__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR36880__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Inconsistent feature flag bug causing orphaned permissions. The AdminPermissions event listener, responsible for cleaning up permissions upon role, client, or group removal, is incorrectly guarded by the ADMIN_FINE_GRAINED_AUTHZ (V1) feature flag. This is inconsistent with other methods in the class that use ADMIN_FINE_GRAINED_AUTHZ_V2. Consequently, if ADMIN_FINE_GRAINED_AUTHZ_V2 is enabled but V1 is not, the permission cleanup logic will not execute, leading to orphaned permission data. Cleanup should occur regardless of which fine-grained authorization version is enabled.",
+          "severity": "High",
+          "matched_candidate": "Permission cleanup breaks when V2 is enabled without V1 - cleanup is gated by ADMIN_FINE_GRAINED_AUTHZ (V1 flag) but V2-specific permission logic is added separately, so cleanup path won't run with V2 enabled without V1",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same bug as the golden comment: the permission cleanup logic is incorrectly guarded by the ADMIN_FINE_GRAINED_AUTHZ (V1) feature flag, which means when V2 is enabled without V1, the cleanup won't run, leading to orphaned permissions. Both comments point to the same inconsistency between the V1 flag guarding cleanup and V2 being used elsewhere."
+        },
+        {
+          "golden_comment": "In hasPermission(ClientModel client, String scope), the resource lookup uses findByName(server, client.getId(), server.getId()), but AdminPermissionsSchema.getOrCreateResource creates per-client resources with the owner set to resourceServer.getClientId(), so this lookup will never find those resources and will always fall back to the 'all-clients' resource, effectively ignoring client-specific permissions.",
+          "severity": "High",
+          "matched_candidate": "Per-client permission lookup always misses because resource lookup uses server.getId() as owner argument, but per-client resources are not owned by the resource-server id, causing lookup to miss and fall back to type-level 'Clients' resource",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same underlying bug: the resource lookup in hasPermission uses server.getId() as the owner argument, but per-client resources are created with a different owner (resourceServer.getClientId()), causing the lookup to always miss and fall back to the type-level resource. Both comments point to the same mismatch between how resources are created vs how they are looked up, resulting in client-specific permissions being ignored."
+        },
+        {
+          "golden_comment": "In getClientsWithPermission(String scope), iterating resourceStore.findByType(server, AdminPermissionsSchema.CLIENTS_RESOURCE_TYPE) and returning resource.getName() will only ever consider the type-level 'Clients' resource (per-client resources have no type) and return its name, while AvailableRoleMappingResource#getRoleIdsWithPermissions expects actual client IDs to pass to realm.getClientById, which can lead to incorrect behavior or a null client and subsequent failures.",
+          "severity": "High",
+          "matched_candidate": "Per-client permission lookup always misses because resource lookup uses server.getId() as owner argument, but per-client resources are not owned by the resource-server id, causing lookup to miss and fall back to type-level 'Clients' resource",
+          "confidence": 0.85,
+          "reasoning": "The candidate issue identifies a problem with per-client permission lookup failing due to incorrect owner argument in resource lookup, causing it to fall back to the type-level 'Clients' resource. The golden comment describes the same fundamental problem - that iterating resourceStore.findByType() only considers the type-level 'Clients' resource (not per-client resources) and returns its name instead of actual client IDs. Both issues point to the same underlying bug: the method fails to properly retrieve per-client resources and instead only works with the type-level resource, leading to incorrect behavior when actual client IDs are expected. The candidate provides a specific technical reason (owner argument mismatch) for why per-client resources are missed, which aligns with the golden comment's observation that per-client resources have no type and won't be found."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Dead code: private getEvaluationContext() method is never called - all permission-evaluation paths use root.evaluatePermission() directly without this helper"
+        },
+        {
+          "candidate": "IndexOutOfBoundsException risk from fixed client-scope index - test code uses get(1) on default client scopes but only asserts not(empty()) which guarantees size >= 1, not >= 2"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 2,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR36880__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR36880__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/37038": {
@@ -10017,6 +10211,52 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37038__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37038__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect permission check in canManage() method",
+          "severity": "High",
+          "matched_candidate": "GroupPermissions.canManage() narrowed to only check root.hasOneAdminRole(AdminRoles.MANAGE_USERS), breaking group management when default user-management grants exist without direct MANAGE_USERS role",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies a problem with the canManage() method in GroupPermissions, specifically that it incorrectly narrowed the permission check to only check root.hasOneAdminRole(AdminRoles.MANAGE_USERS). This is describing an incorrect permission check in the canManage() method, which matches the golden comment about 'Incorrect permission check in canManage() method'. Both are pointing to the same underlying issue - the permission check logic in canManage() is wrong."
+        },
+        {
+          "golden_comment": "In getGroupIdsWithViewPermission, hasPermission is called with groupResource.getId() and the same groupResource.getId() is added to granted, but hasPermission resolves resources by name (treating the argument as a group id) and the GroupPermissionEvaluator contract says this method returns group IDs that are later used as UserModel.GROUPS and in getUsersCount group filters. This mismatch means per-group VIEW_MEMBERS/MANAGE_MEMBERS permissions may not yield the expected group IDs for filtering and counts, and evaluation may effectively only look at the type-level 'all-groups' resource; consider revisiting whether this should operate on the underlying group ids (resource names) instead so it aligns with the JPA queries and the interface contract.",
+          "severity": "High",
+          "matched_candidate": "GroupPermissionsV2.hasPermission() uses resourceStore.findByName(server, groupId) but getGroupIdsWithViewPermission() passes groupResource.getId() causing id-vs-name mismatch for per-group permissions",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: in getGroupIdsWithViewPermission(), groupResource.getId() is being passed to hasPermission(), but hasPermission() uses resourceStore.findByName() which expects a group id/name, not a resource id. This is the core id-vs-name mismatch that the golden comment describes, which causes per-group VIEW_MEMBERS/MANAGE_MEMBERS permissions to not work correctly."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "RolePermissions role-mapping gated on root.hasOneAdminRole(AdminRoles.MANAGE_USERS) before checkAdminRoles(role), narrowing access compared to broader canManageDefault semantics used elsewhere"
+        },
+        {
+          "candidate": "AdminPermissions listener registration guarded by ADMIN_FINE_GRAINED_AUTHZ but V2 paths use ADMIN_FINE_GRAINED_AUTHZ_V2, causing permission cleanup to be skipped when V2 is enabled without V1"
+        },
+        {
+          "candidate": "GroupResourceTypeEvaluationTest.testManageAllGroups expects 204 No Content for subgroup creation but GroupResource.addChild() returns 201 Created when creating new child groups"
+        },
+        {
+          "candidate": "GroupPermissionsV2.getGroupIdsWithViewPermission() has id/name mismatch when calling hasPermission with groupResource.getId() but hasPermission resolves with resourceStore.findByName treating argument as name"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 6,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 4,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37038__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37038__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/33832": {
@@ -11621,6 +11861,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR33832__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR33832__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Dead code exists where ASN1Encoder instances are created and written to, but their results are immediately discarded. The actual encoding is performed by new ASN1Encoder instances created in the subsequent return statement, rendering the earlier operations useless.",
+          "severity": "Low",
+          "matched_candidate": "AuthzClientCryptoProvider creates two ASN1Encoder instances and writes r/s to them, but these instances are immediately discarded since the returned bytes are built from separate encoder instances - the standalone calls are dead code with no effect",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: ASN1Encoder instances are created and written to (with r/s values), but these instances are immediately discarded because the actual return statement creates new/separate encoder instances. Both comments identify this as dead code with no effect."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "ASN1Decoder.readLength returns -1 for indefinite-length encoding, but callers like readInteger and readNext pass this negative length directly to read(length), causing NegativeArraySizeException when allocating new byte[length]"
+        },
+        {
+          "candidate": "ASN1Decoder.readLength() compares decoded length against total input limit rather than remaining bytes after tag/length bytes are consumed, incorrectly rejecting valid payloads whose content exactly fills remaining bytes"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Returns wrong provider (default keystore instead of BouncyCastle)",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR33832__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR33832__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/40940": {
@@ -13072,6 +13350,37 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR40940__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR40940__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The reader thread isn\u2019t waited for; flipping deletedAll to true and asserting immediately can race and miss exceptions added just after the flag change, making this test flaky.",
+          "severity": "Medium",
+          "matched_candidate": "Race condition: reader thread is not joined before assertion - the test sets deletedAll flag and immediately asserts caughtExceptions is empty without waiting for the background thread to complete, allowing late exceptions to be appended after the assertion",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: the reader thread is not waited for (not joined), and setting the deletedAll flag to true followed by an immediate assertion creates a race condition where exceptions could be added after the flag change but before/after the assertion, making the test flaky. Both comments point to the same underlying race condition bug with the same root cause (missing thread synchronization) and same consequence (potential test flakiness due to missed exceptions)."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "Returning null from getSubGroupsCount() violates the GroupModel contract (Javadoc says it never returns null) and may lead to NPEs in callers that expect a non-null count.",
+          "severity": "Critical"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR40940__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR40940__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1": {
@@ -14574,6 +14883,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak-greptile__greptile-v4-1__PR1__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak-greptile__greptile-v4-1__PR1__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "With isConditionalPasskeysEnabled(UserModel user) requiring user != null, authenticate(...) will not call webauthnAuth.fillContextForm(context) on the initial login page where context.getUser() is still null, so conditional passkey UI will not be set up for first-time passkey login. Consider whether this should also be enabled when no user has been selected yet so ID-less passkey authentication on the initial login form continues to work.",
+          "severity": "Medium",
+          "matched_candidate": "isConditionalPasskeysEnabled(context.getUser()) requires user != null, causing authenticate/challenge to skip webauthnAuth.fillContextForm(context) on the initial login page before a user is selected, breaking conditional passkey UI",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: isConditionalPasskeysEnabled() requires user != null, which causes authenticate() to skip webauthnAuth.fillContextForm(context) on the initial login page when no user is selected yet, breaking the conditional passkey UI for first-time/ID-less passkey authentication."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "In the failed-login error path, unknown-user attempts leave context.getUser() null, so the passkey UI setup via webauthnAuth.fillContextForm(context) is skipped on retry, causing passkey retry options to disappear"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "ConditionalPasskeysEnabled() called without UserModel parameter",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak-greptile__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak-greptile__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/93824": {
@@ -16650,6 +16994,62 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR93824__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR93824__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Inconsistent metric tagging with 'shard' and 'shards'",
+          "severity": "Medium",
+          "matched_candidate": "Naming mismatch: shard/shards metric tags split dashboards - inconsistent metric tag keys used for same shard-set context ('shard' at line 185 vs 'shards' at line 199)",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue directly identifies the same problem as the golden comment - inconsistent metric tagging where 'shard' is used in one place (line 185) and 'shards' is used in another (line 199). Both comments point to the exact same naming inconsistency issue with metric tags."
+        },
+        {
+          "golden_comment": "Fixed sleep in tests can be flaky; wait on condition instead",
+          "severity": "Low",
+          "matched_candidate": "Timing race: sleep no longer waits - test_basic monkeypatches time.sleep to a no-op but then relies on time.sleep(0.1) to give flusher threads time to process, which contradicts the monkeypatch",
+          "confidence": 0.8,
+          "reasoning": "Both comments identify issues with using sleep in tests. The golden comment points out that fixed sleep can be flaky and suggests waiting on a condition instead. The candidate comment identifies a specific problem where the test monkeypatches time.sleep to a no-op but then relies on time.sleep to give threads time to process, which is a timing/race condition issue. Both are fundamentally about the same underlying problem - unreliable sleep-based synchronization in tests - though the candidate focuses on a specific manifestation (the monkeypatch contradiction) while the golden comment addresses the general flakiness concern."
+        },
+        {
+          "golden_comment": "Sleep in test_consumer.py won\u2019t actually wait because time.sleep was monkeypatched above; consider restoring sleep or using a different sync to ensure the flusher has time to process.",
+          "severity": "Medium",
+          "matched_candidate": "Timing race: sleep no longer waits - test_basic monkeypatches time.sleep to a no-op but then relies on time.sleep(0.1) to give flusher threads time to process, which contradicts the monkeypatch",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: time.sleep was monkeypatched to a no-op, but the test then relies on time.sleep(0.1) to give the flusher time to process, which won't actually wait due to the monkeypatch. Both comments point out this contradiction/timing issue."
+        },
+        {
+          "golden_comment": "Breaking out of the loop when the deadline has elapsed can skip terminating remaining flusher processes, potentially leaving them running after shutdown; consider ensuring termination is attempted even if the deadline is exceeded.",
+          "severity": "Medium",
+          "matched_candidate": "Leaked processes when join deadline breaks loop early - remaining child processes keep running when timeout expires because the loop breaks entirely once deadline is exhausted, skipping terminate() calls for remaining entries",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: when the deadline/timeout expires, the loop breaks early, which causes remaining flusher/child processes to not be terminated, leaving them running after shutdown. Both comments point to the same bug where breaking out of the loop on deadline expiration skips the terminate() calls for remaining processes."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Race condition: replacement process starts before prior exits - duplicate flushing can occur when killed process still runs during restart because _ensure_processes_alive kills an unhealthy worker and immediately calls _create_process_for_shards without any wait/join for prior exit"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Because flusher processes are created via multiprocessing.get_context('spawn').Process, they are instances of multiprocessing.context.SpawnProcess, which on POSIX is not a subclass of multiprocessing.Process, so this isinstance check will always be false and hung processes won't be killed here.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR93824__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR93824__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5": {
@@ -18630,6 +19030,51 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR5__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR5__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Detector validator uses wrong key when updating type",
+          "severity": "Medium",
+          "matched_candidate": "KeyError: update reads wrong validated_data key - BaseDetectorTypeValidator.update reads validated_data.get('detector_type') but the serializer declares field as 'type', causing updates to be ignored and fall back to instance.group_type",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment. Both point out that the detector validator uses the wrong key - it reads 'detector_type' from validated_data when the serializer declares the field as 'type'. The candidate provides more detail about the consequence (updates being ignored and falling back to instance.group_type), but it's fundamentally the same bug."
+        },
+        {
+          "golden_comment": "Using zip(error_ids, events.values()) assumes the get_multi result preserves the input order; dict value order is not guaranteed to match error_ids, so event data can be paired with the wrong ID (missing nodes also shift alignment).",
+          "severity": "Low",
+          "matched_candidate": "Logic error from mispaired nodestore results - fetch_error_details pairs error_ids with events.values() by position instead of by key, but nodestore.backend.get_multi returns a dict with no positional contract, causing ids to be misassociated with wrong payloads",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: using zip(error_ids, events.values()) incorrectly assumes that the dict returned by get_multi preserves the input order of error_ids, when in fact dict value order is not guaranteed to match, causing event data to be paired with wrong IDs. Both comments point to the same logic error with the same root cause (dict ordering not matching input order) and same consequence (misassociation of ids with payloads)."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "ValidationError: age=0 bypasses mutual-exclusion check - invalid mixed-spec reports pass validation when age or timestamp is zero because the check uses truthiness instead of presence"
+        },
+        {
+          "candidate": "Authorization telemetry recorded before feature check - analytics.record is called before the feature gate check, so denied requests still emit preprod_artifact.api.assemble events"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Breaking changes in error response format",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR5__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR5__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1": {
@@ -20655,6 +21100,55 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR1__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR1__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Django querysets do not support negative slicing",
+          "severity": "High",
+          "matched_candidate": "AssertionError: BasePaginator.get_result allows negative start_offset for previous-page cursors, causing Django QuerySet negative slicing crash",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the exact same underlying problem as the golden comment. The golden comment states 'Django querysets do not support negative slicing', and the candidate issue describes 'BasePaginator.get_result allows negative start_offset for previous-page cursors, causing Django QuerySet negative slicing crash'. Both are pointing to the same bug: negative slicing being used on Django QuerySets, which is not supported and causes errors."
+        },
+        {
+          "golden_comment": "When requests are authenticated with API keys or org auth tokens (which have user_id=None), organization_context.member is None. Line 71 attempts to access organization_context.member.has_global_access without checking if member is None, causing an AttributeError crash when optimized_pagination=true is used, even though the request passed all permission checks with valid org:write scope.",
+          "severity": "High",
+          "matched_candidate": "AttributeError: organization_context.member.has_global_access is accessed without a None check, causing request handling to break when authenticated via API key or org auth token",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: accessing organization_context.member.has_global_access without checking if member is None, which causes an AttributeError when authenticated via API key or org auth token. Both comments point to the same bug, same cause (API key/org auth token authentication resulting in None member), and same consequence (AttributeError crash)."
+        },
+        {
+          "golden_comment": "get_item_key assumes a numeric key, but the paginator is used with order_by=-datetime in the audit logs endpoint; calling math.floor/ceil on a datetime will raise a TypeError.",
+          "severity": "High",
+          "matched_candidate": "TypeError: OptimizedCursorPaginator.get_item_key applies math.floor/math.ceil to datetime values when used with order_by='-datetime', which will fail at runtime",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the get_item_key method in OptimizedCursorPaginator applies math.floor/math.ceil to values, but when used with order_by='-datetime' (as in the audit logs endpoint), these operations will fail on datetime values with a TypeError. Both comments point to the same bug with the same root cause and same consequence."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "AssertionError: OptimizedCursorPaginator.get_result explicitly allows negative cursor.offset when enable_advanced_features is true, causing Django QuerySet negative slicing crash"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Importing non-existent OptimizedCursorPaginator",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 4,
+      "tp": 3,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 0.75,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/97529": {
@@ -22136,6 +22630,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR97529__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR97529__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "A race condition in BuildIndex allows multiple goroutines to concurrently build the same expensive index for the same key. This is caused by moving the b.cacheMu lock from protecting the entire function to only protecting the final cache assignment. ",
+          "severity": "High",
+          "matched_candidate": "Race condition causing duplicate index builds: the full-function cache lock was removed from BuildIndex, so concurrent callers can execute expensive index creation simultaneously for the same key before either stores into b.cache",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: a race condition in BuildIndex where the cache lock was moved/removed from protecting the entire function, allowing multiple goroutines to concurrently build the same expensive index for the same key before storing to cache. Both comments describe the same underlying bug with the same cause (lock scope change) and same consequence (duplicate expensive index builds)."
+        },
+        {
+          "golden_comment": "Calling s.search.TotalDocs() here may race with concurrent index creation: TotalDocs iterates b.cache without synchronization, and the event watcher goroutine started just above could trigger BuildIndex writes concurrently, potentially causing a concurrent map read/write panic.",
+          "severity": "High",
+          "matched_candidate": "Race condition in TotalDocs: concurrent map access can panic when event watcher indexes while initialization logs total docs, as TotalDocs iterates b.cache with no lock while BuildIndex writes b.cache under b.cacheMu",
+          "confidence": 0.99,
+          "reasoning": "The candidate issue identifies the exact same race condition as the golden comment: TotalDocs() iterates b.cache without synchronization while the event watcher goroutine (started just above) can trigger BuildIndex which writes to b.cache concurrently, potentially causing a concurrent map read/write panic. The candidate even provides additional detail about b.cacheMu being used by BuildIndex but not by TotalDocs."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Nil pointer panic in History/Origin methods: s.search is dereferenced without nil guard, but search is optional in construction and other methods like Search have nil guards"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR97529__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR97529__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/80168": {
@@ -23601,6 +24132,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR80168__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR80168__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but only contains pass, failing to implement its required abstract methods: counter_names (property), get_dedupe_value(), get_group_key_values(), and build_occurrence_and_event_data(). This will cause a TypeError at runtime when the class is instantiated.",
+          "severity": "High",
+          "matched_candidate": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but only contains 'pass', failing to implement required abstract methods (get_dedupe_value, get_group_key_values, build_occurrence_and_event_data), making it non-instantiable and causing TypeError at runtime",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same core problem as the golden comment: MetricAlertDetectorHandler inherits from StatefulDetectorHandler but only contains 'pass', failing to implement required abstract methods, which will cause a TypeError at runtime. The candidate mentions three of the four abstract methods (get_dedupe_value, get_group_key_values, build_occurrence_and_event_data) but omits counter_names property. However, the fundamental issue identified is the same - the class is non-instantiable due to missing abstract method implementations."
+        },
+        {
+          "golden_comment": "Docstring says this returns a list of DetectorEvaluationResult, but the method now returns a dict keyed by DetectorGroupKey. Consider updating the docstring to match the new return type.",
+          "severity": "Low",
+          "matched_candidate": "Docstring for function claims it returns a list of DetectorEvaluationResult but the actual return type annotation is dict[DetectorGroupKey, DetectorEvaluationResult]",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the exact same issue: the docstring claims the function returns a list of DetectorEvaluationResult, but the actual return type is a dict keyed by DetectorGroupKey. Both suggest the docstring needs to be updated to match the actual return type."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "New abstract method build_occurrence_and_event_data was added to StatefulDetectorHandler but MetricAlertDetectorHandler does not implement it, causing ABC instantiation failure"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR80168__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR80168__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/80528": {
@@ -24975,6 +25543,37 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR80528__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR80528__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The function modifies the config variable to include display values but then returns the original monitor.config instead of the modified version.",
+          "severity": "High",
+          "matched_candidate": "In get_monitor_environment_context, the code copies and humanizes config['schedule_type'] into a local config variable, but the returned dict uses monitor_environment.monitor.config instead of the mutated local config, causing the transformation to be dropped",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: the code creates a local 'config' variable with modifications (humanizing schedule_type), but then returns monitor.config instead of the modified local config variable, causing the transformation to be lost. Both comments point to the same bug where the modified config is not being returned."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "The code fetches MonitorCheckIn objects by ID when the required data already exists in previous_checkins. This creates an unnecessary database query.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR80528__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR80528__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/77754": {
@@ -26724,6 +27323,53 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR77754__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR77754__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Shared mutable default in dataclass timestamp",
+          "severity": "Medium",
+          "matched_candidate": "Shared datetime default freezes queued timestamp - the dataclass field default `queued: datetime = timezone.now()` is evaluated once at class definition time, so omitted `queued` values reuse a frozen timestamp",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same issue: the datetime default value in the dataclass is evaluated once at class definition time rather than at instance creation time. The golden comment describes it as 'shared mutable default in dataclass timestamp' and the candidate describes it as 'datetime default freezes queued timestamp' with the same explanation that it's 'evaluated once at class definition time'. They are pointing to the exact same bug."
+        },
+        {
+          "golden_comment": "The method name has a typo: test_from_dict_inalid_data should be test_from_dict_invalid_data.",
+          "severity": "Low",
+          "matched_candidate": "Naming typo: test_from_dict_inalid_data should be test_from_dict_invalid_data",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment - a typo in the method name where 'inalid' should be 'invalid' in 'test_from_dict_inalid_data'. Both comments point to the same naming typo and suggest the same correction."
+        },
+        {
+          "golden_comment": "Method name says 'empty_array' but tests empty dict - consider renaming to 'test_from_dict_empty_dict' for clarity.",
+          "severity": "Low",
+          "matched_candidate": "Naming mismatch: test_from_dict_empty_array test name says array but the body passes an empty dict {}",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the exact same issue: the test method is named 'test_from_dict_empty_array' but it actually tests an empty dict, not an array. Both suggest this is a naming mismatch/inconsistency that should be addressed. The golden comment suggests renaming to 'test_from_dict_empty_dict' while the candidate simply points out the mismatch, but they're identifying the same underlying problem."
+        },
+        {
+          "golden_comment": "to_dict() returns a datetime for queued; if this dict is passed in task kwargs (e.g., via apply_async), JSON serialization may fail depending on the serializer, which can cause enqueue errors.",
+          "severity": "Medium",
+          "matched_candidate": "TypeError enqueueing non-JSON task kwargs - assignment_source.to_dict() contains a datetime in queued field which cannot be JSON-serialized when passed to apply_async kwargs",
+          "confidence": 0.99,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the to_dict() method returns a datetime in the 'queued' field, and when this dict is passed to apply_async kwargs, it cannot be JSON-serialized, which will cause errors during task enqueueing. Both comments point to the same underlying serialization issue with the same root cause (datetime in to_dict()) and same consequence (JSON serialization failure in apply_async)."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 4,
+      "tp": 4,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.8,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR77754__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR77754__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/95633": {
@@ -28392,6 +29038,54 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR95633__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR95633__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The test test_thread_queue_parallel_error_handling has a docstring that doesn't match the test implementation.",
+          "severity": "Low",
+          "matched_candidate": "test_thread_queue_parallel_error_handling docstring claims errors don't block commits for other messages but body asserts no commits occur for any message",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies that the test_thread_queue_parallel_error_handling has a docstring that doesn't match the test implementation - specifically that the docstring claims errors don't block commits for other messages, but the test body asserts no commits occur for any message. This is exactly the same issue as the golden comment, which states the docstring doesn't match the test implementation."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "close() method sets shutdown_event and joins commit thread before queue_pool.shutdown() drains workers, causing offsets completed during queue shutdown to miss final commit pass"
+        },
+        {
+          "candidate": "test_concurrent_processing_across_groups only checks group presence and completion count but never verifies actual concurrent/parallel execution"
+        },
+        {
+          "candidate": "test_concurrent_processing_different_groups claims to test concurrency but only asserts 4 results were processed without any concurrency-specific observation"
+        },
+        {
+          "candidate": "Thread-queue-parallel tests use fixed polling/sleep loops (max_wait=50 with time.sleep(0.1)) that will fail intermittently when background processing takes longer than expected"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The queue.shutdown() method with 'immediate=False' parameter may not exist in the standard Python queue module. This could cause AttributeError at runtime. Verify the correct API or implement a custom shutdown mechanism.",
+          "severity": "High"
+        },
+        {
+          "golden_comment": "The magic number 50 for max_wait is used repeatedly throughout the tests. Consider extracting this as a named constant to improve maintainability.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 3,
+      "tp": 1,
+      "fp": 4,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.2,
+      "recall": 0.3333333333333333,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR95633__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR95633__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2": {
@@ -30387,6 +31081,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR2__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR2__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "OptimizedCursorPaginator negative-offset branch slices QuerySet with a negative start index",
+          "severity": "Critical",
+          "matched_candidate": "Pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice - Django QuerySet negative slicing is not supported and will raise AssertionError",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same issue: the OptimizedCursorPaginator has a problem with negative offset/index being passed to Django QuerySet slicing, which is not supported. The golden comment mentions 'negative-offset branch slices QuerySet with a negative start index' and the candidate mentions 'negative offset to a Django QuerySet slice - Django QuerySet negative slicing is not supported'. They are describing the exact same bug."
+        },
+        {
+          "golden_comment": "BasePaginator negative-offset branch slices QuerySet with a negative start index",
+          "severity": "High",
+          "matched_candidate": "Pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice - Django QuerySet negative slicing is not supported and will raise AssertionError",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment - that BasePaginator's negative offset handling results in slicing a Django QuerySet with a negative start index, which is not supported by Django and will cause an error. Both comments point to the same bug: negative offset values being passed to QuerySet slicing operations."
+        },
+        {
+          "golden_comment": "OptimizedCursorPaginator.get_item_key uses floor/ceil on a datetime key (order_by='-datetime'), causing TypeError.",
+          "severity": "High",
+          "matched_candidate": "Cursor building crashes when paginator key is datetime or other non-numeric field - get_item_key applies math.floor/ceil and int() to datetime values which will raise TypeError",
+          "confidence": 0.99,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the get_item_key method in OptimizedCursorPaginator applies math.floor/ceil (and int()) to datetime values, which will cause a TypeError. Both comments point to the same bug where numeric operations are incorrectly applied to datetime fields."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Previous-page cursor with negative offset causes crash - new code preserves raw negative offset for cursor.is_prev which flows into queryset slicing, causing AssertionError since Django doesn't support negative slicing"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR2__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR2__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3": {
@@ -32240,6 +32978,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR3__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR3__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "sample_rate = 0.0 is falsy and skipped",
+          "severity": "Low",
+          "matched_candidate": "Zero sample_rate skipped by falsy guard - using 'if client_sample_rate:' causes valid zero values (0/0.0) to be skipped instead of being propagated to normalized_data['sample_rate']",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment - that using a falsy check ('if client_sample_rate:') causes sample_rate values of 0 or 0.0 to be incorrectly skipped because they are falsy values. Both comments point to the same bug where valid zero sample rates are not properly handled."
+        },
+        {
+          "golden_comment": "Using Python\u2019s built-in hash() to build cache keys is non-deterministic across processes (hash randomization), so keys won\u2019t match across workers and invalidate_upsampling_cache may fail to delete them. Use a deterministic serialization of project_ids for the cache key.",
+          "severity": "Low",
+          "matched_candidate": "hash() cache key breaks invalidation - Python hash values are process-dependent, so cache lookup/set and invalidate_upsampling_cache can compute different keys across workers, breaking cross-process cache reuse/invalidation",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: using Python's hash() for cache keys is non-deterministic across processes due to hash randomization, which causes cache key mismatches between workers and breaks cache invalidation (specifically mentioning invalidate_upsampling_cache). Both comments point to the same underlying bug and concern."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "The upsampling eligibility check passes the outer dataset instead of the actual dataset used by scoped_dataset. In paths where the query ultimately runs against discover (e.g., dashboard split) while the original dataset is metrics, upsampling may be skipped even when all projects are allowlisted.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR3__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR3__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/103633": {
@@ -33767,6 +34543,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR103633__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR103633__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The Check operation exhibits asymmetric cache trust logic: cached permission grants are trusted and returned immediately, but cached denials from the same permission cache are ignored, leading to a fresh database lookup. This allows stale cached grants to provide access to revoked resources, posing a security risk. ",
+          "severity": "High",
+          "matched_candidate": "Revoked access persists when previously allowed permission remains cached - getCachedIdentityPermissions only checks permCache presence without freshness/revalidation, allowing stale cached grants to remain effective until TTL expiry",
+          "confidence": 0.85,
+          "reasoning": "The candidate issue identifies the same core security problem as the golden comment: stale cached permission grants can persist and allow access to revoked resources. The candidate specifically mentions that getCachedIdentityPermissions checks cache presence without freshness/revalidation, allowing stale grants to remain effective. While the golden comment emphasizes the asymmetric trust logic (cached grants trusted vs cached denials ignored), both are pointing to the same fundamental security vulnerability - that cached permission grants can provide access to revoked resources because the cache isn't properly validated."
+        },
+        {
+          "golden_comment": "The test comment says the cached permissions 'allow access', but the map stores false for dashboards:uid:dash1, so checkPermission will still treat this scope as not allowed.",
+          "severity": "Low",
+          "matched_candidate": "Test comment says cache entry should 'Allow access to the dashboard' but the inserted fixture stores map[string]bool{\"dashboards:uid:dash1\": false}, creating a contradictory test setup/comment mismatch",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the exact same issue: the test comment claims the cached permissions 'allow access' to the dashboard, but the actual map stores `false` for 'dashboards:uid:dash1', which contradicts the comment and means access would not be allowed. The candidate correctly identifies this as a contradictory test setup/comment mismatch."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR103633__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR103633__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/67876": {
@@ -35491,6 +36300,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR67876__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR67876__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "OAuth state uses pipeline.signature (static) instead of a per-request random value",
+          "severity": "Medium",
+          "matched_candidate": "CSRF/replay vulnerability: OAuth state is derived from pipeline.signature which is deterministically generated from pipeline view class names via md5_text, making it predictable and reusable rather than a secure per-request CSRF token",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment - that OAuth state uses pipeline.signature (which is static/deterministic based on pipeline view class names) instead of a per-request random value. The candidate provides more detail about why it's problematic (predictable, reusable, not a secure CSRF token), but it's fundamentally the same security concern."
+        },
+        {
+          "golden_comment": "The code attempts to access integration.metadata[sender][login] without checking for the existence of the sender key. This causes a KeyError for integrations where the sender metadata was not set during creation",
+          "severity": "High",
+          "matched_candidate": "KeyError risk: integration.metadata[\"sender\"][\"login\"] access is unchecked - Integration.metadata is a JSONField defaulting to {} and doesn't guarantee sender/login keys exist, causing potential KeyError for integrations lacking that metadata",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment - accessing integration.metadata['sender']['login'] without checking if the 'sender' key exists, which would cause a KeyError. Both comments point to the same bug: unchecked dictionary access that assumes the 'sender' key exists in the metadata when it may not have been set during integration creation."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "Null reference if github_authenticated_user state is missing",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR67876__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR67876__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/32918": {
@@ -36880,6 +37727,37 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR32918__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR32918__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Cleanup reference uses incorrect alias - should be 'idp-alias-' + i instead of 'alias'.",
+          "severity": "Medium",
+          "matched_candidate": "Cleanup uses wrong IDP alias - test creates providers with aliases 'idp-alias-<i>' and 'idp-alias-20' but registers cleanup against the unrelated literal 'alias', causing created providers to leak when teardown removes non-existent alias",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment - that the cleanup is using the wrong alias. The golden comment says it should be 'idp-alias-' + i instead of 'alias', and the candidate explains that tests create providers with aliases 'idp-alias-<i>' but cleanup uses the literal 'alias', causing providers to leak. This is the same underlying bug."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "Recursive caching call using session instead of delegate",
+          "severity": "Critical"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR32918__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR32918__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/94942": {
@@ -38277,6 +39155,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR94942__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR94942__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The enableSqlExpressions function has flawed logic that always returns false, effectively disabling SQL expressions unconditionally:",
+          "severity": "Critical",
+          "matched_candidate": "LogicError in pkg/expr/reader.go: enableSqlExpressions function returns false on every path due to inverted logic (!h.features.IsEnabledGlobally) and both branches returning false, causing SQL expressions to be unconditionally disabled",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the enableSqlExpressions function has flawed/inverted logic that causes it to return false on every path, effectively disabling SQL expressions unconditionally. Both comments point to the same bug with the same root cause (inverted logic with !h.features.IsEnabledGlobally and both branches returning false)."
+        },
+        {
+          "golden_comment": "Several methods such as NewInMemoryDB().RunCommands and db.QueryFramesInto return 'not implemented'.",
+          "severity": "High",
+          "matched_candidate": "TypeError in pkg/expr/sql_command.go: sql.NewInMemoryDB().QueryFramesInto is called but the method unconditionally returns 'not implemented' error, causing SQL execution to deterministically fail at runtime",
+          "confidence": 0.85,
+          "reasoning": "The candidate issue identifies that QueryFramesInto returns 'not implemented' error, which is part of what the golden comment mentions. The golden comment refers to 'Several methods such as NewInMemoryDB().RunCommands and db.QueryFramesInto return not implemented'. The candidate specifically calls out QueryFramesInto returning 'not implemented', which is one of the methods mentioned in the golden comment. While the candidate doesn't mention RunCommands, it does identify the same underlying issue pattern - methods returning 'not implemented' errors."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR94942__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR94942__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/90939": {
@@ -39620,6 +40531,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR90939__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR90939__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The GetWebAssets function implements an incomplete double-checked locking pattern for caching web assets. The function first checks if the cache is populated using a read lock (RLock), and if the cache is empty, it acquires a write lock to populate it. However, it fails to re-check whether the cache was populated by another goroutine while waiting to acquire the write lock.",
+          "severity": "Medium",
+          "matched_candidate": "Race condition: incomplete double-checked locking in pkg/api/webassets/webassets.go:GetWebAssets - duplicate fetches run when concurrent callers observe empty cache before lock acquisition because the code does not re-check entryPointAssetsCache after acquiring the write lock",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the incomplete double-checked locking pattern in GetWebAssets function. Both describe the race condition where the code fails to re-check the cache after acquiring the write lock, which could lead to duplicate fetches when multiple goroutines observe an empty cache before lock acquisition. The candidate even specifies the same file location (pkg/api/webassets/webassets.go:GetWebAssets)."
+        },
+        {
+          "golden_comment": "In addition to the missing double-check, the function has a critical flaw in its error handling: it unconditionally assigns the fetch result to the cache (line 69: entryPointAssetsCache = result) regardless of whether the fetch succeeded or failed. When an error occurs during asset fetching, result is nil, and this nil value overwrites any previously valid cache entry.",
+          "severity": "High",
+          "matched_candidate": "Logic error: nil cache overwrites valid entry in pkg/api/webassets/webassets.go:GetWebAssets - cached assets are lost when asset loading fails after a previous successful population because entryPointAssetsCache = result is always assigned, and result can be nil on loader failure",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: when asset loading fails, the nil result overwrites the previously valid cache entry because `entryPointAssetsCache = result` is always assigned regardless of success or failure. Both comments point to line 69 and describe the same critical flaw in error handling where nil values can overwrite valid cached data."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR90939__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR90939__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/80329": {
@@ -41010,6 +41954,32 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR80329__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR80329__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The code uses Error log level for what appears to be debugging information. This will pollute error logs in production. Consider using Debug or Info level instead.",
+          "severity": "Low",
+          "matched_candidate": "Routine batch progress messages in CleanAnnotations are logged using r.log.Error instead of appropriate log levels (Info/Debug), causing error log flooding during normal control-flow operations",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: using Error log level for routine/debugging information (specifically in CleanAnnotations batch progress messages). Both comments point out that this is inappropriate logging level usage that will cause log pollution/flooding, and both suggest using Info/Debug level instead."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 1,
+      "tp": 1,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR80329__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR80329__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/90045": {
@@ -42971,6 +43941,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR90045__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR90045__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The context is being created with d.Log instead of the log variable that was initialized with additional context values (name, kind, method). This means those values won't be propagated to the logging context.",
+          "severity": "Medium",
+          "matched_candidate": "Context loses log fields in Delete - structured logging breaks because enriched logger with name/kind/method values is created but plain d.Log is stored into context instead of the enriched log",
+          "confidence": 0.99,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the context is being created with d.Log (the plain logger) instead of the enriched 'log' variable that contains the additional context values (name, kind, method). Both comments point out that this causes the structured logging fields to be lost."
+        },
+        {
+          "golden_comment": "Bug: calling recordLegacyDuration when storage operation fails should be recordStorageDuration.",
+          "severity": "High",
+          "matched_candidate": "Wrong metrics recorder on Update failure - storage failure metrics break because legacy recorder (recordLegacyDuration) is called instead of recordStorageDuration when Update errors",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same bug as the golden comment: when a storage operation (specifically Update) fails, the code incorrectly calls recordLegacyDuration instead of recordStorageDuration. Both comments point to the same incorrect method being called in the error handling path."
+        },
+        {
+          "golden_comment": "Inconsistency: using name instead of options.Kind for metrics recording differs from other methods.",
+          "severity": "Medium",
+          "matched_candidate": "Wrong metrics label in Delete success - storage metrics attribution breaks because object name is passed as the third argument instead of options.Kind when recording storage duration",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: in the Delete method's metrics recording, 'name' is being used instead of 'options.Kind' as the third argument. The golden comment frames it as an inconsistency with other methods, while the candidate frames it as wrong metrics label causing attribution issues. Both point to the exact same bug - using 'name' instead of 'options.Kind' for storage duration metrics."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Wrong metrics recorder in DeleteCollection goroutine - legacy latency/error metrics break because recordStorageDuration is used instead of recordLegacyDuration for the async legacy DeleteCollection path"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR90045__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR90045__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/106778": {
@@ -44654,6 +45668,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR106778__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR106778__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The rendered GrafanaRuleListItem is missing the required key prop for React list items. This can cause rendering issues when the list order changes.",
+          "severity": "Medium",
+          "matched_candidate": "React key prop omission in FilterView.tsx - GrafanaRuleListItem rendered in rules.map() is missing the key prop while sibling branches provide key={key}, causing potential stale row reuse when list order changes",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the GrafanaRuleListItem component rendered in a map() function is missing the required React key prop. The candidate even provides additional context about sibling branches having the key prop and the potential consequences (stale row reuse when list order changes), which aligns with the golden comment's concern about rendering issues when list order changes."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "ReferenceError: ctx is undefined in useIsGrafanaPromRuleEditable() - the function references ctx variable with no visible binding, causing editability checks to crash when processing GrafanaPromRuleDTO"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "RuleActionsButtons is invoked with only promRule, but SilenceGrafanaRuleDrawer inside RuleActionsButtons still depends on a Grafana Ruler rule being present, so for Grafana rules coming from list views the 'Silence notifications' menu entry (now driven by Grafana Prom abilities) will toggle showSilenceDrawer without ever rendering the drawer. This means clicking 'Silence notifications' for these rules has no visible effect, even when abilities indicate silencing is allowed.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR106778__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR106778__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/107534": {
@@ -45885,6 +46934,29 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR107534__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR107534__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "The applyTemplateVariables method is called with request.filters as the third parameter, but this parameter is not used in the corresponding test setup.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 0,
+      "total_golden": 1,
+      "tp": 0,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR107534__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR107534__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/79265": {
@@ -48041,6 +49113,62 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR79265__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR79265__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Race condition: Multiple concurrent requests could pass the device count check simultaneously and create devices beyond the limit. Consider using a database transaction or lock.",
+          "severity": "High",
+          "matched_candidate": "Race condition in device limit check: concurrent requests can pass the count check before inserting because there's no transaction or lock around the count-and-insert decision in CreateOrUpdateDevice",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same race condition problem as the golden comment. Both point out that concurrent requests can pass the device count check simultaneously before inserting, allowing devices to be created beyond the limit. Both suggest the same solution approach (using a transaction or lock). The candidate even specifically mentions 'CreateOrUpdateDevice' as the location of the issue, which aligns with the golden comment's concern about the device count check."
+        },
+        {
+          "golden_comment": "Anonymous authentication now fails entirely if anonDeviceService.TagDevice returns ErrDeviceLimitReached. Previously, device tagging was asynchronous and non-blocking. This change prevents anonymous users from authenticating when the device limit is reached.",
+          "severity": "Medium",
+          "matched_candidate": "Anonymous authentication fails when ErrDeviceLimitReached is returned from device tagging, whereas previously tagging errors were only logged as warnings",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same core problem as the golden comment: anonymous authentication now fails when ErrDeviceLimitReached is returned from device tagging, whereas previously tagging errors were only logged (non-blocking). Both comments point to the same behavioral change where device limit errors now block authentication instead of being handled gracefully."
+        },
+        {
+          "golden_comment": "Returning ErrDeviceLimitReached when no rows were updated is misleading; the device might not exist.",
+          "severity": "Low",
+          "matched_candidate": "Misleading error: updateDevice returns ErrDeviceLimitReached when RowsAffected()==0, but zero rows can also mean the device doesn't exist or is outside the update time window, not just that the limit was reached",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment - that returning ErrDeviceLimitReached when RowsAffected()==0 is misleading because zero rows affected could mean the device doesn't exist (or other conditions), not just that the limit was reached. Both comments point to the same misleading error handling logic."
+        },
+        {
+          "golden_comment": "Time window calculation inconsistency: Using device.UpdatedAt.UTC().Add(-anonymousDeviceExpiration) as the lower bound but device.UpdatedAt as the current time may not match the intended logic. Consider using time.Now().UTC() consistently.",
+          "severity": "Low",
+          "matched_candidate": "Inconsistent time window basis: updateDevice builds its BETWEEN window from device.UpdatedAt while CreateOrUpdateDevice counts active devices using time.Now().UTC(), causing existing devices to fail updates when their UpdatedAt differs from current time",
+          "confidence": 0.85,
+          "reasoning": "Both comments identify the same core issue: there's an inconsistency in how time is being used for the device window calculation. The golden comment points out that device.UpdatedAt is used inconsistently (with UTC() in one place but not another) and suggests using time.Now().UTC() consistently. The candidate comment identifies the same problem - that updateDevice uses device.UpdatedAt for the BETWEEN window while CreateOrUpdateDevice uses time.Now().UTC(), causing a mismatch. Both are pointing to the same underlying time window calculation inconsistency between device.UpdatedAt and time.Now()."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Anonymous login breaks when TagDevice returns ErrDeviceLimitReached because the new code propagates this error instead of treating it as best-effort like other tagging errors"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "This call won\u2019t compile: dbSession.Exec(args...) is given a []interface{} where the first element is the query, but Exec\u2019s signature requires a first parameter of type string (not an interface{} splat).",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.8,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR79265__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR79265__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9": {
@@ -49494,6 +50622,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR9__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR9__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Thread-safety issue with lazy @loaded_locales",
+          "severity": "Low",
+          "matched_candidate": "Race condition in lazy locale loading - unsynchronized access to @loaded_locales allows concurrent threads to both observe locale as unloaded and execute load path simultaneously",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment - a thread-safety issue with the lazy @loaded_locales variable. The candidate provides more detail by explaining it's a race condition where concurrent threads can both see the locale as unloaded and execute the load path simultaneously, but this is the same underlying thread-safety concern with lazy @loaded_locales that the golden comment identifies."
+        },
+        {
+          "golden_comment": "Consider normalizing the input locale (e.g., to a symbol) when checking/loading here to avoid double-loading if the same locale is passed as a String vs Symbol (also applies to other locations in the PR).",
+          "severity": "Low",
+          "matched_candidate": "String/Symbol locale mismatch causes same locale to load twice - @loaded_locales.include?(locale) check has no normalization, so 'en' and :en are treated as different keys",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: the lack of normalization between String and Symbol locale representations (e.g., 'en' vs :en) causes the same locale to be loaded twice because the @loaded_locales.include?(locale) check treats them as different keys. Both comments point to the same underlying bug regarding String/Symbol locale mismatch in the loading check."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "NoMethodError when SiteSetting.default_locale is nil - code calls .to_sym on potentially nil value before .compact can help"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR9__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR9__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/76186": {
@@ -50887,6 +52052,55 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR76186__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR76186__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The ContextualLoggerMiddleware methods (QueryData, CallResource, CheckHealth, CollectMetrics) panic when a nil request is received. This occurs because they directly access req.PluginContext (via the instrumentContext function) without first checking if req is nil. This is a regression, as previous middleware layers gracefully handled nil requests.",
+          "severity": "High",
+          "matched_candidate": "ContextualLoggerMiddleware.QueryData dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment - that ContextualLoggerMiddleware.QueryData dereferences req.PluginContext without checking if req is nil, which causes a panic. The golden comment mentions this issue affects multiple methods (QueryData, CallResource, CheckHealth, CollectMetrics), while the candidate specifically mentions QueryData, but they are identifying the same underlying bug pattern."
+        },
+        {
+          "golden_comment": "The traceID is no longer logged for plugin requests. During a refactoring, the tracing import and the logic to extract and add traceID from the context to log parameters were removed from the LoggerMiddleware. The newly introduced ContextualLoggerMiddleware does not add this information, resulting in missing traceID in plugin request logs and impacting debugging and request tracing capabilities.",
+          "severity": "Low",
+          "matched_candidate": "Explicit traceID logging was removed from logger_middleware.go and the new contextual logger middleware does not add traceID to context, breaking request correlation for debugging",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: the traceID logging was removed from logger_middleware.go during refactoring, and the new contextual logger middleware does not add traceID, which breaks request correlation/tracing for debugging. Both comments point to the same underlying issue - missing traceID in plugin request logs affecting debugging capabilities."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "TestLogger.FromContext returns a fresh logger instance instead of preserving the receiver's captured state, causing test assertions to break when code logs via FromContext logger"
+        },
+        {
+          "candidate": "FromContext fallback to d.New() when result is not *log.ConcreteLogger drops context-derived attributes instead of preserving them"
+        },
+        {
+          "candidate": "ContextualLoggerMiddleware.CallResource dereferences req.PluginContext without checking req for nil, causing panic on nil request"
+        },
+        {
+          "candidate": "ContextualLoggerMiddleware.CheckHealth dereferences req.PluginContext without checking req for nil, causing panic on nil request"
+        },
+        {
+          "candidate": "ContextualLoggerMiddleware.CollectMetrics dereferences req.PluginContext without checking req for nil, causing panic on nil request"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 7,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 5,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.2857142857142857,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR76186__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR76186__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10": {
@@ -53241,6 +54455,52 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR10__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR10__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "NoMethodError before_validation in EmbeddableHost",
+          "severity": "Critical",
+          "matched_candidate": "NoMethodError in before_validation callback when host is nil because sub! is called unconditionally on self.host",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment - a NoMethodError occurring in the before_validation callback of EmbeddableHost. The candidate provides additional detail that the error occurs because sub! is called unconditionally on self.host when host is nil, which is the specific cause of the NoMethodError. Both are pointing to the same bug in the same location."
+        },
+        {
+          "golden_comment": "The update and destroy methods in Admin::EmbeddableHostsController do not validate the existence of the EmbeddableHost record retrieved by ID. If EmbeddableHost.where(id: params[:id]).first returns nil (i.e., the host does not exist), attempting to call methods on the nil object (e.g., save_host or destroy) will result in a NoMethodError.",
+          "severity": "Medium",
+          "matched_candidate": "NoMethodError in update/destroy actions when EmbeddableHost.where(id: params[:id]).first returns nil and the result is dereferenced without nil guard",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the update and destroy actions in the controller don't handle the case when EmbeddableHost.where(id: params[:id]).first returns nil, which would cause a NoMethodError when trying to call methods on the nil object. Both comments point to the same bug with the same root cause and same consequence."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Unhandled promise rejection from destroyRecord() - delete flow silently fails when backend destroy request rejects, unlike the save path which has .catch(popupAjaxError)"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "record_for_host compares lower(host) = ? but does not normalize the parameter\u2019s case, so mixed\u2011case referer hosts may fail to match even though comparison intends to be case\u2011insensitive.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "Because this migration inserts embeddable_hosts rows with raw SQL, any existing embeddable_hosts values that include http:// or /https:// or path segments won\u2019t go through the EmbeddableHost model\u2019s normalization, so the new host lookup (which compares only the bare host) may fail for migrated data. Consider ensuring that migrated hosts are normalized to the same format as newly created EmbeddableHost records so existing embedding configurations keep working.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 4,
+      "tp": 2,
+      "fp": 1,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR10__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR10__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7": {
@@ -55036,6 +56296,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR7__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR7__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [
+        {
+          "candidate": "Light-theme heading lightness changed from 20% to 50% in .topic-list-item h3, causing unexpected heading contrast changes"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 3,
+      "tp": 0,
+      "fp": 1,
+      "fn": 3,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR7__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR7__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8": {
@@ -57006,6 +58301,51 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR8__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR8__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": " The findMembers() call is now asynchronous and unhandled. The controller may not have member data immediately available, creating a race condition.",
+          "severity": "High",
+          "matched_candidate": "Ignored Promise rejection in admin_group_route.js: findMembers() called without return, then, or error handling, so AJAX failures propagate as unhandled rejected Promises",
+          "confidence": 0.85,
+          "reasoning": "Both comments identify the same core issue: the findMembers() call is asynchronous (returns a Promise) but is not being properly handled. The golden comment focuses on the race condition aspect (controller may not have data immediately), while the candidate focuses on the unhandled Promise/error handling aspect. Both are pointing to the same underlying problem - an async call that isn't being awaited or handled properly, which could lead to timing issues and unhandled rejections."
+        },
+        {
+          "golden_comment": "In the next action, capping the next offset at user_count can produce an empty page (e.g., total equal to limit results in offset == total, showing 2/2 with no members). This can cause confusing UX on the last page.",
+          "severity": "Medium",
+          "matched_candidate": "Off-by-one error in pagination: totalPages calculation using Math.floor(user_count / limit) + 1 overcounts by one when user_count is exactly divisible by limit, causing an empty extra page",
+          "confidence": 0.85,
+          "reasoning": "Both comments identify the same underlying issue: pagination logic that can result in an empty last page. The golden comment describes this as capping next offset at user_count producing an empty page when total equals limit. The candidate describes it as totalPages calculation overcounting when user_count is exactly divisible by limit, causing an empty extra page. Both are pointing to the same pagination bug that creates a confusing empty final page, just describing different aspects of the same root cause."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "NoMethodError in add_members: usernames parameter assumes String type but crashes when Array is passed since Array doesn't implement split method"
+        },
+        {
+          "candidate": "Race condition in member list: removeMember() and addMembers() trigger findMembers() after async AJAX success without sequencing/cancellation, allowing overlapping reloads to resolve out of order and overwrite newer state with stale data"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "HTTP method mismatch in .remove_member - test uses PUT but remove_member action expects DELETE",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR8__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR8__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3": {
@@ -58517,6 +59857,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR3__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR3__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "BlockedEmail.should_block_email? method has side effects during a read operation - it updates statistics even when just checking if an email should be blocked. This could cause race conditions in concurrent environments and makes the method name misleading.",
+          "severity": "Medium",
+          "matched_candidate": "Race condition loses match_count increments: non-atomic read/modify/write on ActiveRecord row where concurrent requests can both read the same prior value and overwrite each other",
+          "confidence": 0.9,
+          "reasoning": "The candidate issue identifies a race condition with non-atomic read/modify/write operations on match_count increments, which is the same underlying problem as the golden comment. The golden comment mentions 'race conditions in concurrent environments' due to the side effect of updating statistics during a read operation. Both are pointing to the same issue - the should_block_email? method has a race condition when updating the match_count because concurrent requests can overwrite each other's increments."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Stale invalid email rejection: valid addresses stay blocked when server rejected same email once transiently because rejectedEmails cache is never cleared on email edits, retry, or success"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Regex pattern @(#{domains}) only matches domain suffixes, not full domains. evil.example.com would match whitelist entry example.com.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR3__20260407",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR3__20260407/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5": {
@@ -60018,6 +61393,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR5__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR5__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "-ms-align-items never existed in any version of IE/Edge; the correct legacy property is -ms-flex-align.",
+          "severity": "Low",
+          "matched_candidate": "Invalid -ms-align-items CSS property in mixins.scss line 121 - this is not a valid legacy IE/Edge flexbox property and will break alignment when the mixin is used",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same problem as the golden comment - that -ms-align-items is an invalid/incorrect CSS property for legacy IE/Edge flexbox support. While the golden comment specifically mentions that the correct property should be -ms-flex-align, and the candidate doesn't provide this specific correction, both are pointing to the exact same underlying issue: the -ms-align-items property is invalid and won't work for legacy browser support."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Wrong -webkit-box-ordinal-group and -moz-box-ordinal-group mapping in mixins.scss lines 125-130 - these 2009 flexbox properties have different semantics than modern order property and are not equivalent one-to-one"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Mixing float: left with flexbox causes layout issues. Further this PR removes the float-based right alignment for .d-header .panel, which may cause the login panel in the non-Ember/noscript header (where .panel is nested inside .row and not a flex item) to stack under the title instead of remaining right-aligned.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR5__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR5__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6": {
@@ -61347,6 +62757,32 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR6__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR6__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The include_website_name method is missing the required ? suffix. Rails serializers expect include_ methods to end with ? for conditional attribute inclusion, a convention followed by other methods in this serializer. Without it, the website_name attribute may not be conditionally included as intended. Additionally, the '.' << website_host string concatenation should be replaced with '.' + website_host or '.#{website_host}' to avoid mutating string literals, which can lead to issues.",
+          "severity": "Medium",
+          "matched_candidate": "The include_website_name method should be named include_website_name? to match the serializer's established convention for include-hook predicates, otherwise the guard method will be ignored and website_name will be serialized unconditionally",
+          "confidence": 0.9,
+          "reasoning": "The candidate issue correctly identifies the same core problem as the golden comment - that the include_website_name method is missing the required '?' suffix. The candidate explains that without the '?' suffix, the guard method will be ignored and website_name will be serialized unconditionally, which matches the golden comment's concern about the attribute not being conditionally included as intended. However, the candidate does not mention the secondary issue about string concatenation ('.' << website_host), but it does identify the primary issue correctly."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 1,
+      "tp": 1,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR6__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR6__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4": {
@@ -64140,6 +65576,81 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR4__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR4__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "SSRF vulnerability using open(url) without validation",
+          "severity": "Critical",
+          "matched_candidate": "SSRF vulnerability: open(url) fetches untrusted URLs in TopicEmbed.import_remote - URL validation only checks host equality but doesn't sanitize before open()",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same SSRF vulnerability as the golden comment. Both point to the use of open(url) without proper validation in the TopicEmbed.import_remote method. The candidate provides additional context about the URL validation only checking host equality but not sanitizing before the open() call, which is the same underlying security concern about fetching untrusted URLs."
+        },
+        {
+          "golden_comment": "The current origin validation using indexOf is insufficient and can be bypassed. An attacker could use a malicious domain like evil-discourseUrl.com to pass this check.",
+          "severity": "Medium",
+          "matched_candidate": "Security: origin check accepts malicious superstrings - discourseUrl.indexOf(e.origin) === -1 is a substring check that can be bypassed by attacker-controlled origins containing the trusted string",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same security vulnerability as the golden comment - that the indexOf-based origin validation can be bypassed. Both point out that the substring check is insufficient and can be exploited by malicious domains. The candidate mentions 'superstrings' and 'attacker-controlled origins containing the trusted string' which is the same concept as the golden's example of 'evil-discourseUrl.com' bypassing the check."
+        },
+        {
+          "golden_comment": "The TopicEmbed.import method is susceptible to a NoMethodError if the contents parameter is nil when attempting to append a string, and an XSS vulnerability due to unescaped url interpolation in the generated HTML.",
+          "severity": "Medium",
+          "matched_candidate": "XSS vulnerability: unescaped URL interpolated into HTML - url is inserted into href and link text without escaping quotes or HTML characters",
+          "confidence": 0.85,
+          "reasoning": "The golden comment mentions two issues: (1) NoMethodError if contents is nil, and (2) XSS vulnerability due to unescaped url interpolation. The candidate issue identifies the XSS vulnerability with unescaped URL interpolation in HTML, which matches the second part of the golden comment. While the candidate doesn't mention the NoMethodError issue, it does correctly identify the XSS vulnerability which is one of the two issues mentioned in the golden comment."
+        },
+        {
+          "golden_comment": "The ERB block closes with end if, which is invalid Ruby/ERB and will raise at render; it should just be end to close the if block.",
+          "severity": "Medium",
+          "matched_candidate": "SyntaxError: invalid ERB block closing - app/views/embed/best.html.erb uses invalid 'end if' syntax instead of 'end'",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the ERB block uses invalid 'end if' syntax instead of 'end' to close the if block. Both point to the same syntax error in the same file (app/views/embed/best.html.erb) that would cause a render error."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "NoMethodError: poll_feed crashes when RSS item lacks content field - i.content is nil and .scrub raises NoMethodError with no guard or rescue present"
+        },
+        {
+          "candidate": "RuntimeError: network and parse failures are unhandled in poll_feed - SimpleRSS.parse and open() have no exception handling, causing job failures on unreachable/malformed feeds"
+        },
+        {
+          "candidate": "NoMethodError: nil post dereference on stale embed - TopicEmbed.import passes embed.post to PostRevisor without nil guard, crashes when associated post is missing"
+        },
+        {
+          "candidate": "NoMethodError: topic retrieval crashes when embed_by_username setting is unset - SiteSetting.embed_by_username.downcase called without nil guard"
+        },
+        {
+          "candidate": "TypeError: comments container may be null - document.getElementById('discourse-comments') result is dereferenced without null check in embed.js"
+        },
+        {
+          "candidate": "Test name/body mismatch - spec says 'raises an error' but only asserts response.should_not be_success, while controller actually raises Discourse::InvalidAccess"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "postMessage targetOrigin should be the origin (scheme+host+port), not the full referrer URL; using the full URL will cause the message to be dropped and prevent resizing.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The code sets X-Frame-Options: ALLOWALL which completely disables clickjacking protection. The referer validation can be bypassed (referer headers are easily spoofed), and the fallback to empty string for nil referer masks validation failures.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 11,
+      "total_golden": 6,
+      "tp": 4,
+      "fp": 6,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.36363636363636365,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR4__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR4__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1": {
@@ -65943,6 +67454,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR1__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR1__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The downsize method is defined twice. The second definition, which expects a single dimensions string parameter, overrides the first, which expected separate max_width and max_height parameters. This makes the original method unreachable and breaks existing code that calls it with separate width and height arguments.",
+          "severity": "Medium",
+          "matched_candidate": "downsize method arity changed by override: later 4-arg definition overrides earlier 5-arg definition, making existing width/height-style callers raise ArgumentError",
+          "confidence": 0.75,
+          "reasoning": "The candidate identifies the same core issue - that the downsize method is defined twice and the later definition overrides the earlier one, breaking existing callers. However, there's a discrepancy in the details: the golden comment says the second definition expects a 'single dimensions string parameter' while the candidate says it's a '4-arg definition' overriding a '5-arg definition'. Despite this detail difference, both identify the fundamental problem of method override causing existing callers to break."
+        },
+        {
+          "golden_comment": "Hardcoding maxSizeKB = 10 * 1024 ignores Discourse.SiteSettings['max_' + type + '_size_kb'], so the client-side limit can diverge from server-side and per-type settings (also applies to the 413 handler below).",
+          "severity": "Low",
+          "matched_candidate": "Site upload limit ignored: size validation replaces dynamic per-type site settings with hardcoded 10MB, causing configured limits larger or smaller than 10MB to be ignored",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same problem as the golden comment: the hardcoded 10MB (10 * 1024 KB) value replaces the dynamic per-type site settings (max_<type>_size_kb), causing the client-side limit to diverge from server-side configured limits. Both comments point to the same bug where hardcoding ignores the Discourse.SiteSettings configuration."
+        },
+        {
+          "golden_comment": "Passing 80% as the dimensions can fail for animated GIFs when allow_animated_thumbnails is true, since the animated path uses gifsicle --resize-fit which expects WxH geometry, not a percentage; downsizing would then silently fail.",
+          "severity": "Medium",
+          "matched_candidate": "Percentage resize string '80%' passed to OptimizedImage.downsize may break animated GIF downsize path that expects WxH geometry",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: passing a percentage string ('80%') to the resize/downsize operation will fail for animated GIFs because the animated GIF processing path (using gifsicle --resize-fit) expects WxH geometry format, not a percentage. Both comments point to the same underlying bug where percentage dimensions are incompatible with the animated GIF processing path."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "413 error message reports wrong max size: hardcoded 10MB in 413 handler makes user-facing 'file too large' message report 10MB even when configured server/site limit differs"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2": {
@@ -67596,6 +69151,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR2__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR2__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "logic: Potential nil pointer exception - if no TopicUser record exists, tu will be nil and calling methods on it will crash",
+          "severity": "High",
+          "matched_candidate": "NoMethodError on nil TopicUser unsubscribe - TopicsController#unsubscribe dereferences tu.notification_level immediately after TopicUser.find_by(...) with no nil guard, causing crash when user has no TopicUser row for the topic",
+          "confidence": 0.99,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the TopicUser.find_by call can return nil, and the code immediately calls methods on the result (tu.notification_level) without checking for nil first, which will cause a crash/exception. Both comments point to the same nil pointer/NoMethodError issue in the same location."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "CSS selector typo breaks previous-discussion styling - class attribute contains '.previous-discussion' with a leading dot instead of 'previous-discussion', so styling/hooks targeting the class will not match"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Typo in property name: 'stopNotificiationsText' should be 'stopNotificationsText' (missing 'n' in 'Notifications')",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR2__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR2__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/22532": {
@@ -69312,6 +70902,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR22532__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR22532__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "logic: macOS-specific sed syntax with empty string after -i flag will fail on Linux systems",
+          "severity": "Low",
+          "matched_candidate": "sed -i '' -E syntax is BSD/macOS-specific and fails on GNU sed (Linux), causing runtime error when updating GOOGLE_WEBHOOK_URL in .env file",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the exact same issue: the sed -i '' syntax is macOS/BSD-specific and will fail on Linux systems which use GNU sed. The candidate issue provides more detail about the specific context (updating GOOGLE_WEBHOOK_URL in .env file) but points to the same underlying cross-platform compatibility problem."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Race condition with shared log file /tmp/tmole.log - concurrent script invocations can overwrite or read each other's session log and derive wrong TUNNEL_URL"
+        },
+        {
+          "candidate": "Fixed 10-second timeout (20 iterations \u00d7 0.5s sleep) for tmole startup may be insufficient for slow initialization, causing tunnel setup failure"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The updateManyByCredentialId call uses an empty data object, which prevents Prisma's @updatedAt decorator from updating the updatedAt timestamp. This results in inaccurate cache status tracking, as the timestamp isn't updated when the cache is refreshed. To fix this, explicitly set the updatedAt field.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR22532__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR22532__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/8330": {
@@ -70941,6 +72569,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR8330__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR8330__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect end time calculation using slotStartTime instead of slotEndTime",
+          "severity": "Medium",
+          "matched_candidate": "Working-hours check ignores slot end: both start and end are computed from slotStartTime while slotEndTime is never used, causing slots that extend past workingHour.endTime to be misclassified as available",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment - that the end time calculation incorrectly uses slotStartTime instead of slotEndTime. The candidate provides more detail about the consequences (slots extending past workingHour.endTime being misclassified) but points to the identical underlying bug."
+        },
+        {
+          "golden_comment": "Using === for dayjs object comparison will always return false as it compares object references, not values. Use .isSame() method instead: dayjs(date.start).add(utcOffset, 'minutes').isSame(dayjs(date.end).add(utcOffset, minutes))",
+          "severity": "Medium",
+          "matched_candidate": "Dayjs === comparison always false: comparing two freshly created Dayjs objects with === checks object identity and is always false for equal instants, breaking override-day detection when start and end represent the same instant",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: using === to compare two dayjs objects will always return false because it compares object references/identity rather than the actual date values. Both comments point out this is a bug that affects the same functionality (override-day detection when start and end are the same instant). The candidate even mentions the same consequence - it will always be false for equal instants."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR8330__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR8330__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/14943": {
@@ -72393,6 +74054,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR14943__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR14943__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Using retryCount: reminder.retryCount + 1 reads a possibly stale value and can lose increments under concurrency; consider an atomic increment via Prisma (increment: 1) to avoid race conditions (also applies to the similar update in the catch block).",
+          "severity": "High",
+          "matched_candidate": "Race condition: retry tracking breaks when concurrent schedulers update the same reminder - retryCount is updated with reminder.retryCount + 1 based on a previously read value from findMany, not an atomic Prisma increment, causing lost updates",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: using `reminder.retryCount + 1` based on a previously read value instead of an atomic Prisma increment, which causes race conditions and lost updates under concurrent execution. Both comments point to the same bug with the same root cause and suggest the same solution (atomic increment)."
+        },
+        {
+          "golden_comment": "The deletion logic in scheduleSMSReminders.ts incorrectly deletes non-SMS workflow reminders (e.g., Email, WhatsApp) that have retryCount > 1. This occurs because the retryCount condition within the OR clause for deletion lacks a method: WorkflowMethods.SMS filter, causing it to apply to all reminder types instead of only SMS reminders, which is the intended scope of this function.",
+          "severity": "High",
+          "matched_candidate": "Logic error: deleteMany removes non-SMS reminders when retryCount exceeds 1 - the OR filter's second branch only checks retryCount > 1 without method: WorkflowMethods.SMS, matching any WorkflowReminder row with retryCount > 1",
+          "confidence": 0.99,
+          "reasoning": "The candidate issue identifies exactly the same bug as the golden comment: the deleteMany operation incorrectly deletes non-SMS reminders when retryCount > 1 because the second branch of the OR filter lacks a method: WorkflowMethods.SMS condition. Both comments point to the same logic error where the retryCount condition applies to all reminder types instead of only SMS reminders."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR14943__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR14943__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/22345": {
@@ -73804,6 +75498,33 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR22345__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR22345__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "In getBaseConditions(), the else if (filterConditions) and final else branches are unreachable. This is because getAuthorizationConditions() always returns a non-null Prisma.Sql object, making authConditions always truthy, which means only the first two if/else if conditions are ever evaluated.",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "Fetching userIdsFromOrg only when teamsFromOrg.length > 0 can exclude org-level members for orgs without child teams; consider deriving from teamIds (which includes orgId) or removing the guard so org-only orgs still include member user bookings.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 0,
+      "total_golden": 2,
+      "tp": 0,
+      "fp": 0,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR22345__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR22345__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/11059": {
@@ -76469,6 +78190,62 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR11059__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR11059__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The parseRefreshTokenResponse function incorrectly sets refresh_token to the hardcoded string 'refresh_token' when it's missing from the OAuth refresh token response. This invalidates the token, breaking subsequent token refreshes and causing authentication failures.",
+          "severity": "High",
+          "matched_candidate": "parseRefreshTokenResponse.ts returns hardcoded 'refresh_token' string when provider omits refresh_token, which will be persisted and break OAuth refresh flows",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: the parseRefreshTokenResponse function returns a hardcoded 'refresh_token' string when the provider omits the refresh_token from the response, which will be persisted and break OAuth refresh flows. Both comments point to the same bug with the same consequences (breaking authentication/refresh flows)."
+        },
+        {
+          "golden_comment": "When APP_CREDENTIAL_SHARING_ENABLED and CALCOM_CREDENTIAL_SYNC_ENDPOINT are set, the refreshFunction helper returns the fetch Response, but several callers (for example GoogleCalendarService.refreshAccessToken expecting res.data, and HubspotCalendarService.refreshAccessToken expecting a HubspotToken) assume it returns the integration-specific token object. That mismatch will cause runtime errors in the sync-enabled path unless the return type or those call sites are adjusted.",
+          "severity": "High",
+          "matched_candidate": "refreshOAuthTokens.ts returns raw fetch Response in sync branch instead of parsed token payload, causing type mismatch with callers expecting OAuth data with access_token/expiry_date fields",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: the refreshOAuthTokens.ts function returns a raw fetch Response object in the sync-enabled branch, but callers expect a parsed token object (with fields like access_token, expiry_date, etc.). Both comments point to the same type mismatch that will cause runtime errors when credential sync is enabled."
+        },
+        {
+          "golden_comment": "When the sync endpoint path is used, res is a fetch Response and has no .data; res?.data will be undefined and token.access_token will throw at runtime. This relies on a consistent return shape from refreshOAuthTokens, which isn\u2019t guaranteed currently.",
+          "severity": "High",
+          "matched_candidate": "refreshOAuthTokens.ts returns raw fetch Response in sync branch instead of parsed token payload, causing type mismatch with callers expecting OAuth data with access_token/expiry_date fields",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: when the sync endpoint path is used in refreshOAuthTokens, it returns a raw fetch Response instead of parsed token data. This causes a type mismatch because callers expect OAuth data with access_token/expiry_date fields, but they receive a Response object where .data would be undefined. Both comments point to the same underlying bug about inconsistent return types from refreshOAuthTokens."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Race condition in app-credential.ts: concurrent requests can both miss findFirst before create, causing duplicate credential creation due to non-transactional read-then-write and missing unique constraint on (userId, appId) pair"
+        },
+        {
+          "candidate": "GoogleCalendarService.ts accesses res?.data on refreshOAuthTokens result, but the function returns a fetch Response which doesn't have a .data property, breaking token field access"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Invalid Zod schema syntax. Computed property keys like [z.string().toString()] are not valid in Zod object schemas and will cause runtime errors. ",
+          "severity": "High"
+        },
+        {
+          "golden_comment": "parseRefreshTokenResponse returns a Zod safeParse result ({ success, data, error }), not the credential key object. Persisting that as key stores the wrapper instead of the token payload; we should store the parsed data or use schema parse.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 3,
+      "fp": 2,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 0.6,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR11059__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR11059__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/7232": {
@@ -78160,6 +79937,52 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR7232__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR7232__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Asynchronous functions deleteScheduledEmailReminder and deleteScheduledSMSReminder are called without await inside forEach loops. This occurs during booking rescheduling/cancellation, and workflow/workflow step deletion/updates. Consequently, scheduled workflow reminders may not be reliably cancelled, potentially leaving them active.",
+          "severity": "Medium",
+          "matched_candidate": "Unhandled promise errors from reminder deletions - deleteScheduledEmailReminder/deleteScheduledSMSReminder called without await inside forEach, promises not included in Promise.all, causing silent failures in handleCancelBooking.ts",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: deleteScheduledEmailReminder and deleteScheduledSMSReminder are called without await inside forEach loops. Both comments point out that this causes promises to not be properly handled, leading to unreliable cancellation of scheduled workflow reminders. The candidate mentions it occurs in handleCancelBooking.ts while the golden comment mentions multiple contexts (booking rescheduling/cancellation, workflow/workflow step deletion/updates), but they're identifying the same underlying bug pattern."
+        },
+        {
+          "golden_comment": "When immediateDelete is true, the deleteScheduledEmailReminder function cancels the SendGrid email but fails to delete the corresponding WorkflowReminder record from the database. This creates orphaned database entries and is inconsistent with the immediateDelete: false path, which marks the record as cancelled. The SendGrid DELETE API call is also omitted in this path.",
+          "severity": "High",
+          "matched_candidate": "Stale DB reminders after removed deleteMany in workflows.tsx - immediateDelete branch cancels SendGrid but returns without deleting WorkflowReminder row, cleanup job only targets cancelled=true reminders",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same core problem: when immediateDelete is true, the function cancels SendGrid but returns without deleting the WorkflowReminder database row, leaving orphaned/stale records. The candidate also notes this is inconsistent with the cleanup job behavior (which targets cancelled=true reminders). This matches the golden comment's concern about orphaned database entries and inconsistency with the immediateDelete: false path."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Unawaited reminder deletions in handleNewBooking.ts - async helpers called in forEach without await, try/catch cannot catch later promise rejections"
+        },
+        {
+          "candidate": "Error handling aborts remaining cancellations in scheduleEmailReminders.ts - single try/catch around for loop means if one API cancellation fails, remaining reminders stay scheduled"
+        },
+        {
+          "candidate": "LogicError leaves SendGrid batch undeleted - only immediateDelete branch calls SendGrid cancellation, callers without immediateDelete flag only update DB cancelled flag without deleting external SendGrid batch"
+        },
+        {
+          "candidate": "Stale reminders remain due to unawaited async deletes in bookings.tsx - fire-and-forget deleteScheduled* calls inside forEach are no longer part of awaited work after Promise.all removal"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 6,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 4,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR7232__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR7232__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/14740": {
@@ -80498,6 +82321,58 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR14740__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR14740__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Case sensitivity bypass in email blacklist",
+          "severity": "High",
+          "matched_candidate": "Blacklist check is case-sensitive - submitted guest emails are not normalized to lowercase before comparison against the lowercased blacklist, allowing blocked guests to be added with mixed-case email casing",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment - that the email blacklist check can be bypassed due to case sensitivity. The candidate provides more detail, explaining that submitted emails aren't normalized to lowercase before comparison against the lowercased blacklist, which allows blocked guests to bypass the check using mixed-case email addresses. This is the same underlying security issue."
+        },
+        {
+          "golden_comment": "The logic for checking team admin/owner permissions is incorrect. This condition uses AND (&&) which requires both isTeamAdmin AND isTeamOwner to be true, but it should use OR (||) since a user needs to be either an admin OR an owner to have permission.",
+          "severity": "Critical",
+          "matched_candidate": "Authorization check in addGuestsHandler uses AND (&&) instead of OR (||) for isTeamAdmin and isTeamOwner, requiring users to have both roles instead of either one to gain access",
+          "confidence": 1.0,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: the authorization check incorrectly uses AND (&&) instead of OR (||) for isTeamAdmin and isTeamOwner conditions. Both comments point out that this requires users to have both roles instead of either one, which is the same logical bug."
+        },
+        {
+          "golden_comment": "uniqueGuests filters out existing attendees and blacklisted emails but does not deduplicate duplicates within the input; createMany can insert duplicate attendee rows if the client submits repeated emails.",
+          "severity": "Medium",
+          "matched_candidate": "Duplicate guest emails within the submitted guests array are not deduplicated before bulk insert, allowing duplicate attendee rows to be created",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same problem as the golden comment: duplicate guest emails within the input array are not deduplicated before the bulk insert (createMany), which can result in duplicate attendee rows being created. Both comments point to the same bug - the uniqueGuests function filters out existing attendees and blacklisted emails but fails to deduplicate duplicates within the input itself."
+        },
+        {
+          "golden_comment": "Starting with an array containing an empty string may cause validation issues. Consider starting with an empty array [] and handling the empty state in the MultiEmail component instead.",
+          "severity": "Low",
+          "matched_candidate": "Guest state initialized to [''] causes validation failure when dialog opens or resets with no entered emails, as empty string fails email validation",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same problem as the golden comment - that initializing the guest state with [''] (an array containing an empty string) causes validation issues because the empty string fails email validation. Both comments point to the same underlying bug: starting with an empty string in the array rather than an empty array."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "This calls the email sender with the original guests, so existing attendees included in the input will be treated as new when sending notifications, leading to incorrect emails.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR14740__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR14740__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/10600": {
@@ -82597,6 +84472,57 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR10600__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR10600__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The exported function TwoFactor handles backup codes and is in BackupCode.tsx. Inconsistent naming.",
+          "severity": "Low",
+          "matched_candidate": "Exported component name 'TwoFactor' in BackupCode.tsx mismatches the file purpose and backup-code-specific UI it renders, causing confusion in stack traces and debugging",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same problem as the golden comment - that the exported component is named 'TwoFactor' but it's in a file called 'BackupCode.tsx' and handles backup code functionality. Both comments point out this naming inconsistency between the component name and the file name/purpose."
+        },
+        {
+          "golden_comment": "Error message mentions 'backup code login' but this is a disable endpoint, not login",
+          "severity": "Low",
+          "matched_candidate": "Naming mismatch in disable endpoint: log message says 'cannot proceed with backup code login' but the code is in the TOTP disable flow, not login handling",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same issue: the error message incorrectly mentions 'backup code login' when the code is actually in a disable endpoint/flow, not a login flow. The golden comment says 'this is a disable endpoint, not login' and the candidate says 'the code is in the TOTP disable flow, not login handling'. They are pointing to the exact same naming/messaging mismatch."
+        },
+        {
+          "golden_comment": "Backup code validation is case-sensitive due to the use of indexOf(). This causes validation to fail if a user enters uppercase hex characters, as backup codes should be case-insensitive for a better user experience.",
+          "severity": "Medium",
+          "matched_candidate": "Backup code case mismatch: comparison uses credentials.backupCode.replaceAll('-', '') without case normalization, but codes are generated as lowercase hex, so mixed-case user input won't match",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same underlying problem as the golden comment - that backup code validation fails when users enter uppercase characters because the comparison doesn't normalize case. The golden comment mentions indexOf() being case-sensitive, while the candidate mentions the comparison without case normalization, but both are pointing to the same bug: backup codes should be case-insensitive but the current implementation is case-sensitive, causing validation failures for mixed-case user input."
+        },
+        {
+          "golden_comment": "Because backupCodes are decrypted and mutated in memory before being written back, two concurrent login requests using the same backupCode could both pass this check and update, so a single backup code may effectively be accepted more than once if used concurrently, weakening the intended one-time-use semantics.",
+          "severity": "High",
+          "matched_candidate": "Race condition in backup code validation: concurrent login requests can both read the same backupCodes snapshot, pass validation, and write back without transaction or compare-and-swap, allowing backup code reuse",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue identifies exactly the same race condition problem as the golden comment. Both describe how concurrent login requests can read the same backup codes, both pass validation, and both write back - allowing a single backup code to be used more than once. The candidate even mentions the lack of transaction or compare-and-swap protection, which is the underlying cause of the issue described in the golden comment."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Potential TypeError from undefined backupCodes.map: code calls body.backupCodes.map() without null/undefined guard after setup response, will crash if backupCodes is missing or null"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 4,
+      "tp": 4,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.8,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR10600__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR10600__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/10967": {
@@ -85029,6 +86955,68 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR10967__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR10967__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Potential null reference if mainHostDestinationCalendar is undefined if evt.destinationCalendar is null or an empty array ",
+          "severity": "High",
+          "matched_candidate": "TypeError: unguarded first calendar access - EventManager.ts destructures first entry from evt.destinationCalendar and dereferences mainHostDestinationCalendar.integration without optional chaining when destinationCalendar can be null or empty",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment - both point out that there's a potential null/undefined reference when accessing mainHostDestinationCalendar because evt.destinationCalendar can be null or empty. The candidate specifically mentions the destructuring of the first entry and the unguarded access to mainHostDestinationCalendar.integration without optional chaining, which is the same underlying issue about potential null reference."
+        },
+        {
+          "golden_comment": "The optional chaining on mainHostDestinationCalendar?.integration is redundant since you already check mainHostDestinationCalendar in the ternary condition.",
+          "severity": "Low",
+          "matched_candidate": "Redundant optional chaining obscures non-null branch - editLocation.handler.ts uses optional chaining inside ternary true branches where values are already proven to be present",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same problem as the golden comment - redundant optional chaining inside ternary true branches where values are already proven to be present. The golden comment specifically mentions mainHostDestinationCalendar?.integration being redundant since mainHostDestinationCalendar is already checked in the ternary condition. The candidate describes this exact pattern in a more general way, mentioning it occurs in editLocation.handler.ts."
+        },
+        {
+          "golden_comment": "Logic error: when externalCalendarId is provided, you're searching for a calendar where externalId === externalCalendarId, but this will always fail since you're looking for a calendar that matches itself. Should likely find by credentialId or use different logic.",
+          "severity": "High",
+          "matched_candidate": "LogicError: impossible calendar lookup fallback - GoogleCalendarService.updateEvent fallback compares cal.externalId against externalCalendarId when externalCalendarId is already falsy, making the find predicate always fail",
+          "confidence": 0.6,
+          "reasoning": "Both comments identify a logic error in the calendar lookup/find operation involving externalCalendarId. However, they describe different problems: The golden comment says when externalCalendarId IS provided, comparing externalId === externalCalendarId will always fail because you're looking for a calendar that matches itself. The candidate says the fallback compares when externalCalendarId is already FALSY, making the predicate always fail. These are opposite conditions - one is about when externalCalendarId exists, the other about when it's falsy. They're pointing to different aspects or misunderstanding the same code section, but the core issue about the calendar lookup logic being flawed is similar."
+        },
+        {
+          "golden_comment": "The Calendar interface now requires createEvent(event, credentialId), but some implementations (e.g., Lark/Office365) still declare createEvent(event) only\u2014this breaks the interface contract (also applies to other locations in the PR).",
+          "severity": "Low",
+          "matched_candidate": "TypeError from createEvent arity change - Calendar.d.ts interface requires createEvent(event, credentialId) but CalendarService.ts implementation still declares only one parameter",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the exact same problem as the golden comment: the Calendar interface requires createEvent(event, credentialId) with two parameters, but implementations still only declare one parameter (event), breaking the interface contract. The candidate specifically mentions CalendarService.ts while the golden mentions Lark/Office365, but they're both pointing to the same underlying issue of interface/implementation mismatch for the createEvent method signature."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "TypeError from destinationCalendar shape change - destinationCalendar changed from single object to DestinationCalendar[] | null but EventManager.ts consumer doesn't handle null/empty case properly"
+        },
+        {
+          "candidate": "Team calendars dropped on booking create - handleNewBooking builds evt.destinationCalendar as array with multiple calendars but createBooking only persists evt.destinationCalendar[0], silently dropping additional calendars"
+        },
+        {
+          "candidate": "Recurring deletes skip DB-fetched credential - handleCancelBooking recurring-delete branch iterates only bookingToDelete.user.credentials and ignores the DB-recovered calendarCredential, causing external deletions to be missed"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Logic inversion in organization creation: The slug property is now conditionally set when IS_TEAM_BILLING_ENABLED is true, instead of when it's false as originally intended. This change, combined with requestedSlug still being set when IS_TEAM_BILLING_ENABLED is true, results in both properties being set when billing is enabled, and neither when disabled",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 8,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 3,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR10967__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR10967__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/8087": {
@@ -86638,6 +88626,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR8087__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR8087__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The code uses forEach with async callbacks, which causes asynchronous operations (e.g., calendar/video event deletions, payment refunds) to run concurrently without being awaited. This 'fire-and-forget' behavior leads to unhandled promise rejections, race conditions, and incomplete cleanup, as surrounding try-catch blocks cannot properly handle errors from these unawaited promises. Replace forEach with for...of loops or Promise.all() with map() to ensure proper sequential execution and error handling.",
+          "severity": "Critical",
+          "matched_candidate": "async forEach in handleCancelBooking.ts causes calendar update failures to go unhandled when async callbacks reject after handler continues",
+          "confidence": 0.95,
+          "reasoning": "The candidate issue identifies the same core problem as the golden comment: using async forEach causes asynchronous operations to not be properly awaited, leading to unhandled rejections. The candidate specifically mentions 'async forEach in handleCancelBooking.ts causes calendar update failures to go unhandled when async callbacks reject after handler continues' which is exactly the 'fire-and-forget' behavior described in the golden comment where errors from unawaited promises cannot be properly handled."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Race condition in bookings.tsx where external calendar cleanup may be incomplete because forEach does not await async callbacks, making calendar deletion fire-and-forget"
+        },
+        {
+          "candidate": "UnhandledPromiseRejection in packages/app-store/vital/lib/reschedule.ts where async forEach causes deletion failures to escape the surrounding try/catch block"
+        },
+        {
+          "candidate": "UnhandledPromiseRejection in packages/app-store/wipemycalother/lib/reschedule.ts where async forEach causes promise rejections from getCalendar and deleteEvent to escape error handling"
+        },
+        {
+          "candidate": "Promise error in bookings.tsx where async forEach cleanup is unawaited, causing calendar deletions to escape error handling when delete/getCalendar rejects"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Consider adding try-catch around the await to handle import failures gracefully",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 4,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.2,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR8087__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR8087__20260310/pull/1"
     }
   }
-}
\ No newline at end of file
+}
diff --git a/offline/results/anthropic_claude-sonnet-4-5-20250929/candidates.json b/offline/results/anthropic_claude-sonnet-4-5-20250929/candidates.json
index e3106da..974dfd3 100644
--- a/offline/results/anthropic_claude-sonnet-4-5-20250929/candidates.json
+++ b/offline/results/anthropic_claude-sonnet-4-5-20250929/candidates.json
@@ -1163,6 +1163,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "RuntimeException thrown when derived _en.properties file does not exist, causing verification to crash instead of gracefully handling missing bundle files",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Method name 'santizeAnchors' is misspelled - should be 'sanitizeAnchors'",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/37634": {
@@ -2250,6 +2264,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NullPointerException: wrong parameter in null check - line 73 in AccessTokenContext.java validates grantType instead of rawTokenId, allowing null rawTokenId to pass validation despite error message claiming otherwise",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Javadoc mismatch on shortcut length - OAuth2GrantTypeFactory.getShortcut documentation states shortcuts are 'usually like 3-letters' but actual implementations use 2-letter shortcuts (ac, cc, pg)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "RuntimeException: overly broad exception assertion - DefaultTokenContextEncoderProviderTest.testIncorrectGrantType catches generic RuntimeException instead of specific IllegalArgumentException, allowing unrelated runtime failures to pass the test",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/38446": {
@@ -3163,6 +3197,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoSuchElementException: Optional.get() called without presence check in RecoveryAuthnCodeInputLoginBean.java - credentialModelOpt.get() is called without verifying the Optional is present, causing crashes when user has no recovery code credential",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential null ID edge case in recovery credentials - BackwardsCompatibilityUserStorage.getCredentials() reconstructs RecoveryAuthnCodesCredentialModel without preserving the stored credential ID, causing credential removal to break",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/36882": {
@@ -3764,6 +3812,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "picocli.exit call in UpdateCompatibilityCheck.run terminates the entire JVM when rolling-updates feature is disabled, breaking embedding scenarios because FEATURE_DISABLED exit code (4) triggers System.exit in Picocli.exit for non-test modes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/36880": {
@@ -4648,6 +4704,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Permission cleanup breaks when V2 is enabled without V1 - cleanup is gated by ADMIN_FINE_GRAINED_AUTHZ flag while V2-specific client-permission logic is added separately, causing role/client/group removal cleanup to not run when V2 is enabled without V1",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Client-specific grants are ignored when resource owner differs from server id - resource lookup uses server.getId() as owner argument while per-client resources are not owned by the resource-server id, causing lookup to miss and fall back to type-level 'Clients' resource",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Dead code: getEvaluationContext() method is never called - private method is defined but all permission-evaluation paths use root.evaluatePermission() directly without this helper",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "IndexOutOfBoundsException from fixed client-scope index - test code uses get(1) on default client scopes but only asserts size >= 1, not >= 2, causing potential crash when client has fewer than two default scopes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/37038": {
@@ -5707,6 +5789,44 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "LogicError: per-group checks use resource id lookup - group-specific permissions break when resource name differs from internal resource id. getGroupIdsWithViewPermission() passes groupResource.getId() but hasPermission() expects a resource name in findByName(server, groupId), causing id-vs-name mismatch",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError: manage permission no longer honors defaults - canManage() narrowed to hasOneAdminRole(AdminRoles.MANAGE_USERS) check, breaking group management when default user-management grants exist without direct MANAGE_USERS role",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential LogicError: manage-users check narrows default-role mapping - role mapping gated on hasOneAdminRole(AdminRoles.MANAGE_USERS) before checkAdminRoles(role), while canManageDefault() defines broader default-manage semantics, potentially breaking role mapping when canManageDefault grants access without direct MANAGE_USERS role",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError: inconsistent admin fine-grained feature flag - V2 management/evaluator paths use ADMIN_FINE_GRAINED_AUTHZ_V2 flag but listener registration uses ADMIN_FINE_GRAINED_AUTHZ flag, causing permission cleanup to never run when V2 is enabled without V1",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AssertionError: child group status mismatches API - test expects subgroup creation to return 204 No Content but GroupResource.addChild() returns 201 Created when creating new child groups",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError: manage-members grants hidden from group filtering - getGroupIdsWithViewPermission() has id/name mismatch where it passes groupResource.getId() to hasPermission() which treats it as resource name via findByName(), causing per-group VIEW_MEMBERS/MANAGE_MEMBERS permissions to be missed",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/33832": {
@@ -6754,6 +6874,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "ASN1Decoder.readLength returns -1 for indefinite-length encoding, but callers readInteger and readNext don't handle this sentinel value, passing it directly to read(length) which causes NegativeArraySizeException when allocating new byte[length]",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ASN1Decoder.readLength() compares decoded length against total input limit instead of remaining bytes after tag/length consumption, incorrectly rejecting valid payloads where content exactly fills remaining bytes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AuthzClientCryptoProvider.concatenatedRSToASN1DER creates two ASN1Encoder instances and writes r/s to them, but immediately discards these instances as dead code since the returned bytes are built from separate encoder instances",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/40940": {
@@ -7624,6 +7764,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Reader thread not joined before assertion - test sets deletedAll flag and immediately asserts caughtExceptions is empty without waiting for the background thread to finish, causing a race condition where exceptions can be added after the assertion",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1": {
@@ -8450,6 +8598,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "isConditionalPasskeysEnabled(context.getUser()) gates passkey setup but requires user != null, causing authenticate/challenge to skip webauthnAuth.fillContextForm(context) on initial login page before user is selected",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In challenge method error path, when login fails with unknown user, context.getUser() is null so isConditionalPasskeysEnabled guard skips webauthnAuth.fillContextForm(context), removing passkey UI options on retry",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/93824": {
@@ -9390,6 +9552,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: replacement process starts before prior exits in SpanFlusher._ensure_processes_alive - kills unhealthy worker and immediately calls _create_process_for_shards without waiting for prior process to exit, allowing overlapping shard handlers",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Leaked processes when join deadline breaks loop early in SpanFlusher.join - loop breaks when deadline expires but skips terminate() calls for remaining processes, leaving child workers running",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Naming mismatch: shard/shards metric tags split dashboards - inconsistent metric tag keys where one timer uses 'shard' and another uses 'shards' for the same shard-set context",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Timing race: sleep no longer waits in test_basic - test monkeypatches time.sleep to no-op but then relies on time.sleep(0.1) to give flusher threads time to process, making the wait ineffective",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5": {
@@ -10809,6 +10997,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "ValidationError: age=0 bypasses mutual-exclusion check - invalid mixed-spec reports pass validation when age or timestamp is zero in src/sentry/issues/endpoints/browser_reporting_collector.py",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Logic error from mispaired nodestore results - error details map to wrong IDs when get_multi omits or reorders entries in src/sentry/replays/endpoints/project_replay_summarize_breadcrumbs.py",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "KeyError: update reads wrong validated_data key - detector type updates break when validated_data only contains serializer field name type in src/sentry/workflow_engine/endpoints/validators/base/detector.py",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Authorization telemetry recorded before feature check - unauthorized attempts are logged as assemble events when feature access is denied in src/sentry/preprod/api/endpoints/organization_preprod_artifact_assemble.py",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1": {
@@ -12087,6 +12301,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "AttributeError: organization_context.member accessed without None check in OrganizationAuditLogsEndpoint.get, breaks when authenticated via API key or org auth token",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: OptimizedCursorPaginator.get_item_key applies math.floor/ceil to datetime fields, causing cursor generation to crash with order_by='-datetime'",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AssertionError: BasePaginator.get_result allows negative QuerySet slicing on previous-page cursors by using offset directly instead of max(0, offset)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AssertionError: OptimizedCursorPaginator.get_result allows negative start_offset when enable_advanced_features is true, causing Django ORM to crash on negative slice",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: OptimizedCursorPaginator.get_item_key applies math.floor/ceil to non-numeric fields (datetime/string) when used with non-numeric order_by keys",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/97529": {
@@ -12885,6 +13131,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: TotalDocs() call at pkg/storage/unified/resource/search.go:216 accesses b.cache without lock while BuildIndex writes to b.cache under lock, causing potential concurrent map read/write panic",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Nil pointer dereference: History() and Origin() methods in pkg/storage/unified/resource/server.go:922-929 call s.search without nil check when search is optional",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition: BuildIndex in pkg/storage/unified/search/bleve.go allows duplicate concurrent index builds for same key due to narrowed lock scope, only protecting cache write instead of entire function",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/80168": {
@@ -13646,6 +13912,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but doesn't implement required abstract methods (get_dedupe_value, get_group_key_values, build_occurrence_and_event_data), making it non-instantiable and causing TypeError at runtime",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "New abstract method build_occurrence_and_event_data added to StatefulDetectorHandler but existing subclass MetricAlertDetectorHandler doesn't implement it, causing instantiation failure",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Docstring for evaluate method claims it returns a list of DetectorEvaluationResult but the actual return type is dict[DetectorGroupKey, DetectorEvaluationResult]",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/80528": {
@@ -14177,6 +14463,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Dead code: transformed config is not used in return value. Code copies and humanizes config['schedule_type'] at lines 160-163, but line 168 returns monitor_environment.monitor.config instead of the mutated local config variable, causing the transformation to be dropped",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/77754": {
@@ -14785,6 +15079,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Shared datetime default freezes queued timestamp - the dataclass field default `queued: datetime = timezone.now()` is evaluated once at class definition time, so omitted `queued` values reuse a frozen timestamp",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: datetime in to_dict may break JSON serialization - to_dict() returns dataclasses.asdict(self) which includes queued as a datetime, breaking serialization when passed to Celery task kwargs",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Naming typo: test_from_dict_inalid_data mislabels failure - test name contains typo 'inalid' instead of 'invalid'",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Naming mismatch: empty_array test uses dict - test named test_from_dict_empty_array passes an empty dict {} instead of an array",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential: TypeError enqueueing non-JSON task kwargs - assignment_source.to_dict() is enqueued directly in task kwargs but contains non-serializable datetime objects",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/95633": {
@@ -15642,6 +15968,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "close() method drops processed-but-uncommitted offsets because shutdown_event is set and commit thread is joined before queue_pool.shutdown() drains workers, causing offsets completed during shutdown to miss final commit",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "test_concurrent_processing_across_groups only checks that all items completed and group IDs appeared, but doesn't verify actual concurrent execution through timing or overlap checks",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "test_concurrent_processing_different_groups claims to test concurrency but only asserts that 4 results were processed without any concurrency-specific verification",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Background-thread tests use fixed sleep/polling loops with hardcoded timeouts that will fail intermittently when processing takes longer than expected",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "test_thread_queue_parallel_error_handling has misleading docstring claiming errors don't block offset commits for other messages, but test body asserts no commits occur",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2": {
@@ -17047,6 +17405,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "AssertionError: negative QuerySet slicing enabled - pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice in src/sentry/api/paginator.py:877-882",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: math.floor/ceil on non-numeric keys - cursor building crashes when paginator key is datetime or other non-numeric field in src/sentry/api/paginator.py:838-840",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AssertionError: prev pagination keeps negative offset - existing paginator crashes when previous-page cursor has a negative offset in src/sentry/api/paginator.py:182-184",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3": {
@@ -18246,6 +18624,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "hash() cache key breaks invalidation: Python hash values are process-dependent, so cache lookup/set and invalidate_upsampling_cache can compute different keys across workers in src/sentry/api/helpers/error_upsampling.py lines 27 and 73, breaking cross-process cache reuse/invalidation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Zero sample_rate skipped by falsy guard: _set_sample_rate_from_error_sampling uses 'if client_sample_rate:' which skips valid zero values (0/0.0) instead of propagating them to normalized_data['sample_rate'] in src/sentry/testutils/factories.py lines 353-355",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/103633": {
@@ -19119,6 +19511,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Test comment contradicts setup in pkg/services/authz/rbac/service_test.go:981-982 - comment says 'Allow access to the dashboard' but cache stores false (denial)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Cached permissions are trusted without freshness validation in pkg/services/authz/rbac/service.go:123-135 - revoked access persists until TTL expiry",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/67876": {
@@ -20057,6 +20463,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "KeyError: sender metadata access unchecked - integration.metadata[\"sender\"][\"login\"] is dereferenced without key guard or exception handling in GitHubInstallation.dispatch, can raise KeyError for integrations lacking that metadata",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "CSRF/replay risk: predictable OAuth state - OAuth state is derived from reusable pipeline.signature which is deterministically derived via md5_text rather than being a randomized per-request CSRF token, making it predictable and reusable",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/32918": {
@@ -20681,6 +21101,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "cleanup uses wrong IDP alias - testCacheIDPForLogin creates providers with aliases 'idp-alias-<i>' and 'idp-alias-20' but registers cleanup against the literal 'alias', causing created providers to leak when teardown removes non-existent alias",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/94942": {
@@ -21462,6 +21890,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "enableSqlExpressions function in pkg/expr/reader.go:194-200 returns false on every path, unconditionally disabling SQL expressions",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Caller gate at pkg/expr/reader.go:129-132 rejects QueryTypeSQL whenever !enabled due to enableSqlExpressions always returning false",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "QueryFramesInto method in pkg/expr/sql/db.go:20-21 unconditionally returns 'not implemented' error, causing deterministic runtime failure",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SQLCommand.Execute in pkg/expr/sql_command.go:96-100 calls sql.NewInMemoryDB().QueryFramesInto which always fails due to unimplemented method",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/90939": {
@@ -21962,6 +22416,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: incomplete double-checked locking in pkg/api/webassets/webassets.go - duplicate fetches run when concurrent callers observe empty cache before lock acquisition because cache is not re-checked after acquiring write lock",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Logic error: nil cache overwrites valid entry in pkg/api/webassets/webassets.go - cached assets are lost when asset loading fails after a previous successful population because result is always assigned to entryPointAssetsCache even when nil",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/80329": {
@@ -22998,6 +23466,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Normal control-flow messages in CleanAnnotations are logged with r.log.Error instead of appropriate log level (Info/Debug), flooding error logs with routine batch progress messages",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/90045": {
@@ -24468,6 +24944,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Update error path calls d.recordLegacyDuration(...) instead of d.recordStorageDuration(...) in pkg/apiserver/rest/dualwriter_mode3.go:125-132",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Delete method stores plain d.Log into context instead of enriched log variable, dropping name/kind/method fields in pkg/apiserver/rest/dualwriter_mode3.go:96-97",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Delete success path records storage duration with 'name' parameter instead of 'options.Kind' in pkg/apiserver/rest/dualwriter_mode3.go:106",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "DeleteCollection async legacy path uses d.recordStorageDuration(...) instead of d.recordLegacyDuration(...) in pkg/apiserver/rest/dualwriter_mode3.go:161-166",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/106778": {
@@ -25739,6 +26241,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "ReferenceError: ctx is undefined in useIsGrafanaPromRuleEditable() at useAbilities.ts:369-370",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Missing React key prop on GrafanaRuleListItem in FilterView.tsx rules.map(), causing potential stale row reuse when list order changes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/107534": {
@@ -27549,6 +28065,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: device limit check overshoots - concurrent requests can all observe count < limit before any write completes in CreateOrUpdateDevice (database.go:108-117)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError: inconsistent time window basis - updateDevice uses device.UpdatedAt while CreateOrUpdateDevice uses time.Now().UTC() for counting active devices, causing existing devices to fail update WHERE clause (database.go:80-81, 110)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError: anonymous auth can fail on limit error - ErrDeviceLimitReached is propagated as hard failure in client.go:44-46 instead of warning like other tagging errors",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "MisleadingError: no rows updated returns limit reached - updateDevice returns ErrDeviceLimitReached when RowsAffected()==0, but zero rows can also mean device doesn't exist or is outside time window (database.go:95-97)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError: anonymous auth fails on device limit - Authenticate returns error when TagDevice yields ErrDeviceLimitReached, breaking previous best-effort behavior (client.go:44-46)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9": {
@@ -28296,6 +28844,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError from missing SiteSetting.default_locale - config/initializers/i18n.rb:17 unconditionally calls SiteSetting.default_locale.to_sym without nil guard",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition in lazy locale loading - unsynchronized lazy initialization and check-then-act access to @loaded_locales array in lib/freedom_patches/translate_accelerator.rb:62-64 allows concurrent threads to double-load locales",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "String/Symbol locale mismatch causes duplicate loading - @loaded_locales.include?(locale) check in lib/freedom_patches/translate_accelerator.rb:62-64 lacks normalization, treating string and symbol forms of same locale as distinct",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/76186": {
@@ -28994,6 +29562,62 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "TestLogger.FromContext returns a fresh logger instance instead of preserving the receiver's captured state, causing test assertions to break when code logs via FromContext logger",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "FromContext fallback to d.New() loses the logger returned by FromContext and drops context-derived attributes when the returned logger is not *log.ConcreteLogger",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "QueryData method dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "CallResource method dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "CheckHealth method dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "CollectMetrics method dereferences req.PluginContext without checking req for nil, causing panic on nil request",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Explicit traceID extraction removed from logger_middleware.go, and replacement context enrichment does not add traceID, breaking request correlation when debugging plugin calls across traced services",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware dereferences req.PluginContext without nil check in QueryData/CallResource/CheckHealth/CollectMetrics methods, causing panic when nil request reaches middleware",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LoggerMiddleware changed from explicitly appending traceID to using FromContext, while instrumentContext only injects endpoint/pluginId/dsName/dsUID/uname without traceID, potentially breaking request correlation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10": {
@@ -30779,6 +31403,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError: before_validation dereferences nil host in app/models/embeddable_host.rb - validation crashes when host is nil because sub! is called unconditionally",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "NoMethodError on missing EmbeddableHost in update/destroy in app/controllers/admin/embeddable_hosts_controller.rb - request crashes when params[:id] does not match any record, no nil guard present",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Unhandled rejection from destroyRecord promise in app/assets/javascripts/admin/components/embeddable-host.js.es6 - delete flow silently fails when backend destroy request rejects, missing rejection handler",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7": {
@@ -31910,6 +32554,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Light-theme heading lightness changed from 20% to 50% in .topic-list-item h3, causing unexpected contrast changes in app/assets/stylesheets/mobile/topic-post.scss",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8": {
@@ -33373,6 +34025,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError in add_members: usernames parameter assumes String type but crashes when Array is passed, as Array does not implement split method",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Off-by-one error in pagination: totalPages calculation overcounts by one when user_count is exactly divisible by limit, causing empty extra page",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Unhandled Promise rejection in admin_group_route.js: findMembers() called without error handling, hiding fetch errors",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition in member list reload: overlapping async findMembers() calls can resolve out of order, overwriting newer state with stale member data",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3": {
@@ -34261,6 +34939,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Stale email rejection: valid addresses stay blocked when server rejected same email once transiently. rejectedEmails cache is never cleared on email edits, retry, or success in create_account_controller.js",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition in blocked_email.rb: non-atomic read/modify/write on match_count field loses increments when concurrent requests update the same record",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5": {
@@ -35089,6 +35781,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Invalid -ms-align-items property in align-items mixin at app/assets/stylesheets/common/foundation/mixins.scss:121 breaks IE/legacy Edge alignment (line already emits correct -ms-flex-align property)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Incorrect -webkit-box-ordinal-group and -moz-box-ordinal-group mapping in order mixin at app/assets/stylesheets/common/foundation/mixins.scss:125-130 (2009 flexbox properties have different semantics than modern order property)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6": {
@@ -35975,6 +36681,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "include_website_name method is missing the '?' suffix required by the serializer's include-hook convention, causing the hook to be ignored. Should be 'include_website_name?' to match the pattern used by other hooks like include_email?, include_card_image_badge_id?, and dynamically defined hooks in staff_attributes, private_attributes, and untrusted_attributes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4": {
@@ -38050,6 +38764,74 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError: missing content crashes feed polling - job crashes when an RSS item lacks a content field in app/jobs/scheduled/poll_feed.rb:31-36, i.content is nil and .scrub raises NoMethodError with no guard or rescue present",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "RuntimeError: network and parse failures are unhandled - scheduled job fails noisily when the feed is unreachable or malformed in app/jobs/scheduled/poll_feed.rb:29, SimpleRSS.parse open() has no local rescue around network fetch or parse",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "NoMethodError: nil post dereference on stale embed - revise crashes when embed exists but associated post is missing in app/models/topic_embed.rb:32-36, embed.post can be nil but is passed to PostRevisor.new(post) with no nil guard",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "NoMethodError from nil downcase on missing setting - topic retrieval crashes when embed_by_username is unset or nil in lib/topic_retriever.rb:49, SiteSetting.embed_by_username.downcase called with no local nil guard",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: comments container may be null - script crashes when #discourse-comments is absent on the page in app/assets/javascripts/embed.js:5-12, document.getElementById returns null but comments.appendChild(iframe) dereferences with no null guard",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: origin check accepts malicious superstrings - message validation breaks when attacker origin contains discourseUrl as a substring in app/assets/javascripts/embed.js:17, discourseUrl.indexOf(e.origin) === -1 is a substring check rather than exact origin comparison",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SyntaxError: invalid ERB block closing - template rendering breaks when ERB parses invalid 'end if' syntax in app/views/embed/best.html.erb:6, uses <%- end if %> instead of <%- end %>",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Test name/body mismatch on error assertion - spec name says 'raises an error' but body only asserts response.should_not be_success in spec/controllers/embed_controller_spec.rb:13-16",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SSRF: open-uri fetches unvalidated feed URL - internal network access becomes possible when feed_polling_url points to metadata or private hosts in app/jobs/scheduled/poll_feed.rb:24-30, open(SiteSetting.feed_polling_url) has no scheme/host allowlist or destination validation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SSRF: open fetches untrusted URLs - internal network access occurs when attacker controls import_remote URL in app/models/topic_embed.rb:48, open(url).read has insufficient validation beyond URI host equality check",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "XSS: unescaped URL interpolated into HTML - script injection renders when url contains quotes or HTML in app/models/topic_embed.rb:13, raw url interpolated in href and link text without escaping",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1": {
@@ -39435,6 +40217,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Site upload limit ignored - size validation breaks when site settings allow larger or smaller uploads because hardcoded 10MB replaces dynamic per-type site settings in app/assets/javascripts/discourse/lib/utilities.js",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "413 error message reports wrong max size - user-facing 'file too large' message always reports 10MB even when configured server/site limit differs in app/assets/javascripts/discourse/lib/utilities.js",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Percentage resize string may break downsize - passing '80%' to OptimizedImage.downsize for animated GIFs may cause issues when backend expects WxH geometry in app/controllers/uploads_controller.rb",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Downsize arity changed by override - second definition of downsize method overrides first one, making 5-arg form (width, height) unreachable and breaking existing callers in app/models/optimized_image.rb",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2": {
@@ -40521,6 +41329,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "NoMethodError on nil TopicUser unsubscribe - TopicsController#unsubscribe dereferences tu.notification_level without nil guard after TopicUser.find_by at app/controllers/topics_controller.rb:105-107",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "CSS selector typo breaks previous-discussion styling - class attribute contains '.previous-discussion' with leading dot instead of 'previous-discussion' at app/views/email/notification.html.erb:10",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/22532": {
@@ -41801,6 +42623,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "sed -i command uses BSD/macOS-specific syntax (sed -i '' -E) that fails on Linux with GNU sed in scripts/test-gcal-webhooks.sh:67-69",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Shared log file /tmp/tmole.log creates race condition when multiple script instances run concurrently, causing wrong webhook URL to be reused",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Fixed 10-second timeout for tmole startup polling is insufficient for slow initialization, causing premature failure in scripts/test-gcal-webhooks.sh:41-52",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/8330": {
@@ -42877,6 +43719,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Dayjs === comparison always false: comparing two freshly created Dayjs objects with === checks object identity and is always false for equal instants in packages/trpc/server/routers/viewer/slots.ts:114",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Working-hours check ignores slot end: both start and end are computed from slotStartTime while slotEndTime is never used, causing slots that extend past workingHour.endTime to be misclassified as available in packages/trpc/server/routers/viewer/slots.ts:141-143",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/14943": {
@@ -43616,6 +44472,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition: retryCount increment uses stale read value instead of atomic Prisma increment, causing lost updates when concurrent schedulers update the same reminder",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "deleteMany filter with OR condition lacks method: WorkflowMethods.SMS in second branch, causing deletion of non-SMS workflow reminders when retryCount > 1",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/22345": {
@@ -46562,6 +47432,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition in apps/web/pages/api/webhook/app-credential.ts: non-transactional read-then-write allows concurrent requests to both create duplicate credentials because there's no unique constraint on (userId, appId) pair",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Logic error in packages/app-store/_utils/oauth/parseRefreshTokenResponse.ts: hardcoded placeholder 'refresh_token' string is returned when provider omits refresh_token, causing wrong value to be persisted",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Type error in packages/app-store/_utils/oauth/refreshOAuthTokens.ts: sync branch returns raw fetch Response instead of parsed token payload, creating incompatible return type with callers expecting .data.access_token fields",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Type error in packages/app-store/googlecalendar/lib/CalendarService.ts: code accesses res?.data on fetch Response object which doesn't have .data property, breaking token field access",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/7232": {
@@ -48050,6 +48946,44 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Unhandled promise errors from reminder deletions in handleCancelBooking.ts - deleteScheduledEmailReminder/deleteScheduledSMSReminder called without await inside forEach, promises not included in Promise.all",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Promise handling error in handleNewBooking.ts - unawaited reminder deletions in forEach loop, try/catch cannot reliably catch later promise rejections",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Error handling aborts remaining cancellations in scheduleEmailReminders.ts - single try/catch wrapping all reminder cancellations causes later reminders to be skipped if one request fails",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError leaves SendGrid batch undeleted in emailReminderManager.ts - callers invoke deleteScheduledEmailReminder without immediateDelete flag, only updating DB cancelled flag without calling SendGrid cancellation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Stale reminders remain due to unawaited async deletes in viewer/bookings.tsx - deleteScheduledEmailReminder and deleteScheduledSMSReminder called without await in forEach, cleanup no longer part of mutation's awaited work",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Stale DB reminders after removed deleteMany in viewer/workflows.tsx - immediateDelete branch cancels SendGrid but doesn't delete or update WorkflowReminder row, and cleanup job only targets cancelled=true reminders",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/14740": {
@@ -49504,6 +50438,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "AuthorizationError: admin permission check requires both roles - access is denied when a team admin is not also the team owner due to using && instead of || in isTeamAdminOrOwner check at packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:46-48",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ValidationError: duplicate guest emails not deduplicated - duplicate attendee rows can be created when input contains repeated emails because filtering only checks against existing attendees and blacklist, not duplicates within the submitted guests array at packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:74-78",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ValidationError: empty-string sentinel blocks add - guest submission breaks when dialog opens or resets with no entered emails because state initializes to [''] and validation fails on empty string at apps/web/components/dialog/AddGuestsDialog.tsx:32",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AuthorizationBypass: blacklist check is case-sensitive - blocked guests can be added when submitted with mixed-case email casing because blacklist is normalized with toLowerCase() but submitted guest values are compared unchanged at packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:74-77",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/10600": {
@@ -50913,6 +51873,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Naming mismatch in disable.ts: error log says 'cannot proceed with backup code login' but endpoint is for TOTP disable flow, not login",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition in next-auth-options.ts: concurrent login requests can both validate and consume the same one-time backup code due to lack of transaction/compare-and-swap",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Case sensitivity bug in backup code validation: codes generated in lowercase hex but validation doesn't normalize user input, causing mixed-case entries to fail",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Component naming mismatch in BackupCode.tsx: file exports function named 'TwoFactor' but renders backup-code-specific UI",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential TypeError in EnableTwoFactorModal.tsx: calling backupCodes.map without null/undefined guard when setup response may omit backupCodes field",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/10967": {
@@ -52759,6 +53751,56 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Potential TypeError: extra createEvent arg breaks integrations - event creation breaks when calendar adapters implement the old two-parameter contract differently. New call site passes createEvent(calEvent, credential.id), but CalendarService.ts still implements createEvent(event) with only one parameter, causing interface/signature mismatch",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: unguarded first calendar access - location fallback crashes when destinationCalendar is null or empty. Code destructures first entry from evt.destinationCalendar ?? [] and immediately dereferences mainHostDestinationCalendar.integration without optional chaining or guard",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential: TypeError from createEvent arity change - implementations or callers break when they still use the old single-argument signature. Interface requires createEvent(event, credentialId) but CalendarService.ts still declares only one parameter",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential: TypeError from destinationCalendar shape change - event consumers break when they still treat destinationCalendar as a single object. destinationCalendar becomes DestinationCalendar[] | null, but EventManager.ts destructures and dereferences without optional chaining",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LogicError: impossible calendar lookup fallback - calendar selection falls back incorrectly when externalCalendarId is absent. Fallback predicate compares cal.externalId against absent value and cannot recover intended destination calendar",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TypeError: team calendars dropped on booking create - collective member calendars are ignored when multiple destination calendars were gathered. evt.destinationCalendar built as array with team member calendars appended, but createBooking persists only evt.destinationCalendar[0]",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential TypeError: recurring deletes skip DB-fetched credential - linked recurring events remain undeleted when credential exists only in DB, not user.credentials. Recurring-delete branch iterates only bookingToDelete.user.credentials and ignores DB-fetched calendarCredential",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential: redundant optional chaining obscures non-null branch - truthy-checked values are still accessed as optional. Extra ?. inside ternary true branches where values are already proven present",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/8087": {
@@ -54179,6 +55221,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "TypeError: async forEach errors escape handler in packages/features/bookings/lib/handleCancelBooking.ts - calendar update failures go unhandled when async callbacks reject after handler continues",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Race condition: deletions continue after handler returns in packages/trpc/server/routers/viewer/bookings.tsx - external calendar cleanup may be incomplete when response proceeds immediately",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "UnhandledPromiseRejection: async forEach escapes try/catch in packages/app-store/vital/lib/reschedule.ts - deletion failures go uncaught when async callbacks reject after forEach returns",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "UnhandledPromiseRejection: async forEach escapes try/catch in packages/app-store/wipemycalother/lib/reschedule.ts - deletion failures go uncaught when async callbacks reject inside forEach",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Promise error: async forEach cleanup unawaited in packages/trpc/server/routers/viewer/bookings.tsx - calendar deletions escape error handling when delete/getCalendar rejects",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   }
-}
\ No newline at end of file
+}
diff --git a/offline/results/anthropic_claude-sonnet-4-5-20250929/evaluations.json b/offline/results/anthropic_claude-sonnet-4-5-20250929/evaluations.json
index 244bef5..1611832 100644
--- a/offline/results/anthropic_claude-sonnet-4-5-20250929/evaluations.json
+++ b/offline/results/anthropic_claude-sonnet-4-5-20250929/evaluations.json
@@ -1963,6 +1963,49 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37429__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37429__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The method name 'santizeAnchors' should be 'sanitizeAnchors' (missing 'i').",
+          "severity": "Low",
+          "matched_candidate": "Method name 'santizeAnchors' is misspelled - should be 'sanitizeAnchors'",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same typo in the method name 'santizeAnchors' - a missing 'i' that should make it 'sanitizeAnchors'. The wording differs slightly but they describe the identical spelling error."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "RuntimeException thrown when derived _en.properties file does not exist, causing verification to crash instead of gracefully handling missing bundle files"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The translation is in Italian instead of Lithuanian. This should be translated to Lithuanian to match the file's locale (messages_lt.properties).",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The totpStep1 value uses Traditional Chinese terms in the Simplified Chinese file (zh_CN), which is likely incorrect for this locale. Please verify the locale\u2011appropriate translation.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The anchor sanitization logic has a potential issue where it consumes English matcher groups without proper validation. If the translated text has more anchor tags than the English text, this could lead to incorrect validation results.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 4,
+      "tp": 1,
+      "fp": 1,
+      "fn": 3,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.25,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37429__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37429__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/37634": {
@@ -3900,6 +3943,51 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37634__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37634__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Wrong parameter in null check (grantType vs. rawTokenId)",
+          "severity": "Critical",
+          "matched_candidate": "NullPointerException: wrong parameter in null check - line 73 in AccessTokenContext.java validates grantType instead of rawTokenId, allowing null rawTokenId to pass validation despite error message claiming otherwise",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same bug: on line 73 of AccessTokenContext.java, the null check validates the wrong parameter (grantType instead of rawTokenId). The candidate provides more detail about the consequences (NullPointerException risk and misleading error message), but the core issue is identical."
+        },
+        {
+          "golden_comment": "Javadoc mentions \"usually like 3-letters shortcut\" but some implementations use 2-letter shortcuts (\"ac\", \"cc\", \"rt\", \"te\", \"pc\", \"ci\", \"ro\"). Consider updating documentation to reflect actual usage pattern.",
+          "severity": "Low",
+          "matched_candidate": "Javadoc mismatch on shortcut length - OAuth2GrantTypeFactory.getShortcut documentation states shortcuts are 'usually like 3-letters' but actual implementations use 2-letter shortcuts (ac, cc, pg)",
+          "confidence": 0.95,
+          "reasoning": "Both the golden comment and candidate issue identify the same underlying problem: the Javadoc for OAuth2GrantTypeFactory.getShortcut() states shortcuts are 'usually like 3-letters' but actual implementations use 2-letter shortcuts. While the golden comment lists more examples (ac, cc, rt, te, pc, ci, ro) and the candidate lists fewer (ac, cc, pg), they both correctly identify the documentation-implementation mismatch regarding shortcut length and suggest updating the documentation to reflect actual usage."
+        },
+        {
+          "golden_comment": " Catching generic RuntimeException is too broad. The implementation throws IllegalArgumentException specifically - catch that instead for more precise testing.",
+          "severity": "Low",
+          "matched_candidate": "RuntimeException: overly broad exception assertion - DefaultTokenContextEncoderProviderTest.testIncorrectGrantType catches generic RuntimeException instead of specific IllegalArgumentException, allowing unrelated runtime failures to pass the test",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same issue: the test catches RuntimeException when it should catch the more specific IllegalArgumentException. The golden comment notes that catching generic RuntimeException is too broad and should be replaced with IllegalArgumentException. The candidate issue makes the same point, additionally explaining why this is problematic (unrelated runtime failures could pass). Both refer to the same test method and the same underlying problem."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "In isAccessTokenId, the substring for the grant shortcut and the equality check look inverted: the grant shortcut occupies indices 4\u20135 (substring(4,6)), and a match should return true (combined with UUID check), not false.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 4,
+      "tp": 3,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.75,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37634__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37634__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/38446": {
@@ -5432,6 +5520,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR38446__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR38446__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Unsafe raw List deserialization without type safety. Calling Optional.get() directly on the Optional returned by RecoveryAuthnCodesUtils.getCredential(user) without checking isPresent() can lead to a NoSuchElementException if the Optional is empty.",
+          "severity": "Medium",
+          "matched_candidate": "NoSuchElementException: Optional.get() called without presence check in RecoveryAuthnCodeInputLoginBean.java - credentialModelOpt.get() is called without verifying the Optional is present, causing crashes when user has no recovery code credential",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same core problem: calling Optional.get() without checking isPresent() first on the Optional returned by RecoveryAuthnCodesUtils.getCredential(user), which can throw NoSuchElementException when the Optional is empty. The golden comment mentions this in the context of 'credentialModelOpt' and the candidate specifically identifies the same issue in RecoveryAuthnCodeInputLoginBean.java with credentialModelOpt.get(). While the golden comment also mentions 'unsafe raw List deserialization without type safety' as an additional concern, the primary issue about the unsafe Optional.get() call is identical in both."
+        },
+        {
+          "golden_comment": "After creating the RecoveryAuthnCodesCredentialModel, consider setting its id from the stored credential (e.g., myUser.recoveryCodes.getId()); otherwise getId() will be null and downstream removal by id (e.g., removeStoredCredentialById in the authenticator flow) may not work.",
+          "severity": "Low",
+          "matched_candidate": "Potential null ID edge case in recovery credentials - BackwardsCompatibilityUserStorage.getCredentials() reconstructs RecoveryAuthnCodesCredentialModel without preserving the stored credential ID, causing credential removal to break",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: when creating RecoveryAuthnCodesCredentialModel in BackwardsCompatibilityUserStorage.getCredentials(), the credential ID is not being set/preserved from the stored credential. This causes downstream operations that rely on the ID (specifically removeStoredCredentialById) to fail. The golden comment suggests setting the ID from the stored credential, while the candidate describes the consequence of not doing so. They are describing the same bug from slightly different angles."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR38446__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR38446__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/36882": {
@@ -6649,6 +6770,32 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR36882__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR36882__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect method call for exit codes. The picocli.exit() method calls System.exit() directly, which is problematic:",
+          "severity": "Medium",
+          "matched_candidate": "picocli.exit call in UpdateCompatibilityCheck.run terminates the entire JVM when rolling-updates feature is disabled, breaking embedding scenarios because FEATURE_DISABLED exit code (4) triggers System.exit in Picocli.exit for non-test modes",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same core problem: the picocli.exit() method call results in System.exit() being invoked, which terminates the JVM. The golden comment states this is 'problematic' while the candidate provides more specific context about when it occurs (FEATURE_DISABLED exit code 4, non-test modes) and why it's problematic (breaks embedding scenarios). Despite different levels of detail, they both point to the same underlying bug - the inappropriate use of System.exit() via picocli.exit()."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 1,
+      "tp": 1,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR36882__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR36882__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/36880": {
@@ -8337,6 +8484,53 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR36880__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR36880__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Inconsistent feature flag bug causing orphaned permissions. The AdminPermissions event listener, responsible for cleaning up permissions upon role, client, or group removal, is incorrectly guarded by the ADMIN_FINE_GRAINED_AUTHZ (V1) feature flag. This is inconsistent with other methods in the class that use ADMIN_FINE_GRAINED_AUTHZ_V2. Consequently, if ADMIN_FINE_GRAINED_AUTHZ_V2 is enabled but V1 is not, the permission cleanup logic will not execute, leading to orphaned permission data. Cleanup should occur regardless of which fine-grained authorization version is enabled.",
+          "severity": "High",
+          "matched_candidate": "Permission cleanup breaks when V2 is enabled without V1 - cleanup is gated by ADMIN_FINE_GRAINED_AUTHZ flag while V2-specific client-permission logic is added separately, causing role/client/group removal cleanup to not run when V2 is enabled without V1",
+          "confidence": 0.98,
+          "reasoning": "Both issues identify the same core problem: the AdminPermissions event listener's cleanup logic is guarded by the ADMIN_FINE_GRAINED_AUTHZ (V1) flag, but when ADMIN_FINE_GRAINED_AUTHZ_V2 is enabled without V1, the cleanup doesn't execute. This leads to orphaned permissions. The candidate describes this as 'cleanup breaks when V2 is enabled without V1' and notes the inconsistency between the V1 flag gating cleanup and V2-specific logic being added separately. The golden comment describes the same issue as 'inconsistent feature flag bug' where cleanup is 'incorrectly guarded by V1' while other methods use V2, causing cleanup to not execute when 'V2 is enabled but V1 is not.' Both identify the same root cause, same consequence (orphaned permissions/cleanup not running), and same problematic condition (V2 enabled without V1)."
+        },
+        {
+          "golden_comment": "In hasPermission(ClientModel client, String scope), the resource lookup uses findByName(server, client.getId(), server.getId()), but AdminPermissionsSchema.getOrCreateResource creates per-client resources with the owner set to resourceServer.getClientId(), so this lookup will never find those resources and will always fall back to the 'all-clients' resource, effectively ignoring client-specific permissions.",
+          "severity": "High",
+          "matched_candidate": "Client-specific grants are ignored when resource owner differs from server id - resource lookup uses server.getId() as owner argument while per-client resources are not owned by the resource-server id, causing lookup to miss and fall back to type-level 'Clients' resource",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same core issue: the resource lookup in hasPermission uses server.getId() as the owner parameter, but per-client resources created by getOrCreateResource use resourceServer.getClientId() as the owner. This mismatch causes the lookup to fail and fall back to the 'all-clients' resource, effectively ignoring client-specific permissions. The candidate uses slightly different terminology ('grants' vs 'permissions', 'type-level Clients resource' vs 'all-clients resource') but describes the exact same bug with the same root cause and consequence."
+        },
+        {
+          "golden_comment": "In getClientsWithPermission(String scope), iterating resourceStore.findByType(server, AdminPermissionsSchema.CLIENTS_RESOURCE_TYPE) and returning resource.getName() will only ever consider the type-level 'Clients' resource (per-client resources have no type) and return its name, while AvailableRoleMappingResource#getRoleIdsWithPermissions expects actual client IDs to pass to realm.getClientById, which can lead to incorrect behavior or a null client and subsequent failures.",
+          "severity": "High",
+          "matched_candidate": "Client-specific grants are ignored when resource owner differs from server id - resource lookup uses server.getId() as owner argument while per-client resources are not owned by the resource-server id, causing lookup to miss and fall back to type-level 'Clients' resource",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same core problem: the code incorrectly retrieves only the type-level 'Clients' resource instead of per-client resources. The golden comment focuses on the consequence (returning resource.getName() instead of actual client IDs), while the candidate focuses on the root cause (using server.getId() as owner when per-client resources have different owners). These are two perspectives on the same bug - the candidate explains WHY the type-level resource is returned (ownership mismatch in lookup), and the golden explains WHAT happens as a result (wrong values returned, leading to failures). They describe the same underlying issue."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Dead code: getEvaluationContext() method is never called - private method is defined but all permission-evaluation paths use root.evaluatePermission() directly without this helper"
+        },
+        {
+          "candidate": "IndexOutOfBoundsException from fixed client-scope index - test code uses get(1) on default client scopes but only asserts size >= 1, not >= 2, causing potential crash when client has fewer than two default scopes"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 2,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR36880__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR36880__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/37038": {
@@ -9916,6 +10110,52 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37038__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37038__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect permission check in canManage() method",
+          "severity": "High",
+          "matched_candidate": "LogicError: manage permission no longer honors defaults - canManage() narrowed to hasOneAdminRole(AdminRoles.MANAGE_USERS) check, breaking group management when default user-management grants exist without direct MANAGE_USERS role",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue with the canManage() method's permission checking logic. The golden comment states there's an 'Incorrect permission check in canManage() method' and the candidate provides specific details about how it's incorrect: the method was narrowed to only check hasOneAdminRole(AdminRoles.MANAGE_USERS), which breaks functionality when default user-management grants exist without the direct MANAGE_USERS role. The candidate is describing the specific nature of the incorrect permission check mentioned in the golden comment."
+        },
+        {
+          "golden_comment": "In getGroupIdsWithViewPermission, hasPermission is called with groupResource.getId() and the same groupResource.getId() is added to granted, but hasPermission resolves resources by name (treating the argument as a group id) and the GroupPermissionEvaluator contract says this method returns group IDs that are later used as UserModel.GROUPS and in getUsersCount group filters. This mismatch means per-group VIEW_MEMBERS/MANAGE_MEMBERS permissions may not yield the expected group IDs for filtering and counts, and evaluation may effectively only look at the type-level 'all-groups' resource; consider revisiting whether this should operate on the underlying group ids (resource names) instead so it aligns with the JPA queries and the interface contract.",
+          "severity": "High",
+          "matched_candidate": "LogicError: per-group checks use resource id lookup - group-specific permissions break when resource name differs from internal resource id. getGroupIdsWithViewPermission() passes groupResource.getId() but hasPermission() expects a resource name in findByName(server, groupId), causing id-vs-name mismatch",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same core problem: getGroupIdsWithViewPermission() calls hasPermission() with groupResource.getId(), but hasPermission() treats this argument as a group name/id for lookup (via findByName), creating a mismatch between resource IDs and group identifiers. Both note this breaks the intended per-group permission evaluation and affects filtering/counting. The candidate uses slightly different terminology ('resource name differs from internal resource id', 'id-vs-name mismatch') but describes the identical bug and its consequences."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Potential LogicError: manage-users check narrows default-role mapping - role mapping gated on hasOneAdminRole(AdminRoles.MANAGE_USERS) before checkAdminRoles(role), while canManageDefault() defines broader default-manage semantics, potentially breaking role mapping when canManageDefault grants access without direct MANAGE_USERS role"
+        },
+        {
+          "candidate": "LogicError: inconsistent admin fine-grained feature flag - V2 management/evaluator paths use ADMIN_FINE_GRAINED_AUTHZ_V2 flag but listener registration uses ADMIN_FINE_GRAINED_AUTHZ flag, causing permission cleanup to never run when V2 is enabled without V1"
+        },
+        {
+          "candidate": "AssertionError: child group status mismatches API - test expects subgroup creation to return 204 No Content but GroupResource.addChild() returns 201 Created when creating new child groups"
+        },
+        {
+          "candidate": "LogicError: manage-members grants hidden from group filtering - getGroupIdsWithViewPermission() has id/name mismatch where it passes groupResource.getId() to hasPermission() which treats it as resource name via findByName(), causing per-group VIEW_MEMBERS/MANAGE_MEMBERS permissions to be missed"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 6,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 4,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37038__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37038__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/33832": {
@@ -11498,6 +11738,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR33832__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR33832__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Dead code exists where ASN1Encoder instances are created and written to, but their results are immediately discarded. The actual encoding is performed by new ASN1Encoder instances created in the subsequent return statement, rendering the earlier operations useless.",
+          "severity": "Low",
+          "matched_candidate": "AuthzClientCryptoProvider.concatenatedRSToASN1DER creates two ASN1Encoder instances and writes r/s to them, but immediately discards these instances as dead code since the returned bytes are built from separate encoder instances",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the same problem: ASN1Encoder instances are created and written to (with r/s values), but these instances are immediately discarded because the actual return value uses newly created ASN1Encoder instances. The candidate specifically names the method (AuthzClientCryptoProvider.concatenatedRSToASN1DER) and describes the exact same dead code pattern - creating encoders, writing to them, then discarding them in favor of new instances in the return statement."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "ASN1Decoder.readLength returns -1 for indefinite-length encoding, but callers readInteger and readNext don't handle this sentinel value, passing it directly to read(length) which causes NegativeArraySizeException when allocating new byte[length]"
+        },
+        {
+          "candidate": "ASN1Decoder.readLength() compares decoded length against total input limit instead of remaining bytes after tag/length consumption, incorrectly rejecting valid payloads where content exactly fills remaining bytes"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Returns wrong provider (default keystore instead of BouncyCastle)",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR33832__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR33832__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/40940": {
@@ -12964,6 +13242,37 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR40940__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR40940__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The reader thread isn\u2019t waited for; flipping deletedAll to true and asserting immediately can race and miss exceptions added just after the flag change, making this test flaky.",
+          "severity": "Medium",
+          "matched_candidate": "Reader thread not joined before assertion - test sets deletedAll flag and immediately asserts caughtExceptions is empty without waiting for the background thread to finish, causing a race condition where exceptions can be added after the assertion",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same race condition issue: the test sets the deletedAll flag to true and immediately asserts that caughtExceptions is empty, without waiting for the reader thread to complete. This creates a race where exceptions can be added to the collection after the flag is set but before the assertion is checked, making the test flaky. The candidate uses 'joined' terminology while the golden uses 'waited for', but both describe the identical underlying problem."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "Returning null from getSubGroupsCount() violates the GroupModel contract (Javadoc says it never returns null) and may lead to NPEs in callers that expect a non-null count.",
+          "severity": "Critical"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR40940__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR40940__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1": {
@@ -14435,6 +14744,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak-greptile__greptile-v4-1__PR1__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak-greptile__greptile-v4-1__PR1__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "ConditionalPasskeysEnabled() called without UserModel parameter",
+          "severity": "Medium",
+          "matched_candidate": "isConditionalPasskeysEnabled(context.getUser()) gates passkey setup but requires user != null, causing authenticate/challenge to skip webauthnAuth.fillContextForm(context) on initial login page before user is selected",
+          "confidence": 0.92,
+          "reasoning": "Both issues identify the same underlying problem: a function checking for conditional passkeys is being called without proper user context. The golden comment points out 'ConditionalPasskeysEnabled() called without UserModel parameter' and the candidate describes 'isConditionalPasskeysEnabled(context.getUser())' requiring 'user != null', which causes the webauthn form filling to be skipped when user is not yet available. These are semantically the same issue - the conditional passkeys check is being performed before a user is available/selected, leading to incorrect behavior."
+        },
+        {
+          "golden_comment": "With isConditionalPasskeysEnabled(UserModel user) requiring user != null, authenticate(...) will not call webauthnAuth.fillContextForm(context) on the initial login page where context.getUser() is still null, so conditional passkey UI will not be set up for first-time passkey login. Consider whether this should also be enabled when no user has been selected yet so ID-less passkey authentication on the initial login form continues to work.",
+          "severity": "Medium",
+          "matched_candidate": "isConditionalPasskeysEnabled(context.getUser()) gates passkey setup but requires user != null, causing authenticate/challenge to skip webauthnAuth.fillContextForm(context) on initial login page before user is selected",
+          "confidence": 0.98,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same problem: the isConditionalPasskeysEnabled() method requires a non-null user, which causes authenticate() to skip calling webauthnAuth.fillContextForm(context) on the initial login page where context.getUser() is null. This prevents conditional passkey UI setup for first-time/ID-less passkey authentication. The candidate uses slightly more technical phrasing ('gates passkey setup', 'authenticate/challenge') but describes the identical issue with the same root cause, affected code paths, and consequences."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "In challenge method error path, when login fails with unknown user, context.getUser() is null so isConditionalPasskeysEnabled guard skips webauthnAuth.fillContextForm(context), removing passkey UI options on retry"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak-greptile__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak-greptile__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/93824": {
@@ -16494,6 +16840,62 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR93824__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR93824__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Inconsistent metric tagging with 'shard' and 'shards'",
+          "severity": "Medium",
+          "matched_candidate": "Naming mismatch: shard/shards metric tags split dashboards - inconsistent metric tag keys where one timer uses 'shard' and another uses 'shards' for the same shard-set context",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: inconsistent use of metric tag keys 'shard' versus 'shards'. The golden comment states this inconsistency directly, while the candidate provides more context about the impact (dashboard splitting) and specifies it occurs between timers in the same shard-set context. Despite different wording and detail levels, they point to the identical problem."
+        },
+        {
+          "golden_comment": "Fixed sleep in tests can be flaky; wait on condition instead",
+          "severity": "Low",
+          "matched_candidate": "Timing race: sleep no longer waits in test_basic - test monkeypatches time.sleep to no-op but then relies on time.sleep(0.1) to give flusher threads time to process, making the wait ineffective",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: the test uses a fixed sleep/timing mechanism that is unreliable. The golden comment notes that fixed sleeps are flaky and should be replaced with condition-based waiting. The candidate issue provides more specific detail about WHY it's flaky (the monkeypatch makes sleep a no-op), but both are pointing to the same problem - the test relies on time.sleep which is ineffective/unreliable and should use proper synchronization instead."
+        },
+        {
+          "golden_comment": "Sleep in test_consumer.py won\u2019t actually wait because time.sleep was monkeypatched above; consider restoring sleep or using a different sync to ensure the flusher has time to process.",
+          "severity": "Medium",
+          "matched_candidate": "Timing race: sleep no longer waits in test_basic - test monkeypatches time.sleep to no-op but then relies on time.sleep(0.1) to give flusher threads time to process, making the wait ineffective",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: time.sleep was monkeypatched/mocked to be a no-op, but the test code later relies on time.sleep(0.1) to actually wait for flusher threads to process. The golden comment mentions test_consumer.py while the candidate mentions test_basic, but both are pointing to the identical underlying problem - a timing race condition where sleep won't actually wait due to the monkeypatch."
+        },
+        {
+          "golden_comment": "Breaking out of the loop when the deadline has elapsed can skip terminating remaining flusher processes, potentially leaving them running after shutdown; consider ensuring termination is attempted even if the deadline is exceeded.",
+          "severity": "Medium",
+          "matched_candidate": "Leaked processes when join deadline breaks loop early in SpanFlusher.join - loop breaks when deadline expires but skips terminate() calls for remaining processes, leaving child workers running",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the same issue: when the deadline expires in SpanFlusher.join, the loop breaks early, which causes the terminate() calls for remaining processes to be skipped, potentially leaving child processes running after shutdown. The candidate uses slightly different wording ('leaked processes', 'child workers') but describes the exact same bug and its consequences."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Race condition: replacement process starts before prior exits in SpanFlusher._ensure_processes_alive - kills unhealthy worker and immediately calls _create_process_for_shards without waiting for prior process to exit, allowing overlapping shard handlers"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Because flusher processes are created via multiprocessing.get_context('spawn').Process, they are instances of multiprocessing.context.SpawnProcess, which on POSIX is not a subclass of multiprocessing.Process, so this isinstance check will always be false and hung processes won't be killed here.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR93824__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR93824__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5": {
@@ -18454,6 +18856,51 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR5__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR5__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Detector validator uses wrong key when updating type",
+          "severity": "Medium",
+          "matched_candidate": "KeyError: update reads wrong validated_data key - detector type updates break when validated_data only contains serializer field name type in src/sentry/workflow_engine/endpoints/validators/base/detector.py",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: the detector validator is using an incorrect key when updating. The golden comment states 'uses wrong key when updating type' and the candidate specifies this is a KeyError where 'update reads wrong validated_data key' specifically mentioning 'detector type updates break' due to using the serializer field name 'type' instead of the correct key. Both point to the same bug in the detector validator's update mechanism."
+        },
+        {
+          "golden_comment": "Using zip(error_ids, events.values()) assumes the get_multi result preserves the input order; dict value order is not guaranteed to match error_ids, so event data can be paired with the wrong ID (missing nodes also shift alignment).",
+          "severity": "Low",
+          "matched_candidate": "Logic error from mispaired nodestore results - error details map to wrong IDs when get_multi omits or reorders entries in src/sentry/replays/endpoints/project_replay_summarize_breadcrumbs.py",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same underlying problem: the code incorrectly assumes that `get_multi` results maintain order and alignment with `error_ids`, when in fact the dictionary values may be reordered or missing entries, causing event data to be paired with wrong IDs. The candidate describes this as 'mispaired nodestore results' and 'error details map to wrong IDs when get_multi omits or reorders entries', which is semantically identical to the golden comment's concern about zip pairing and alignment issues."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "ValidationError: age=0 bypasses mutual-exclusion check - invalid mixed-spec reports pass validation when age or timestamp is zero in src/sentry/issues/endpoints/browser_reporting_collector.py"
+        },
+        {
+          "candidate": "Authorization telemetry recorded before feature check - unauthorized attempts are logged as assemble events when feature access is denied in src/sentry/preprod/api/endpoints/organization_preprod_artifact_assemble.py"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Breaking changes in error response format",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR5__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR5__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1": {
@@ -20448,6 +20895,58 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR1__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR1__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Django querysets do not support negative slicing",
+          "severity": "High",
+          "matched_candidate": "AssertionError: BasePaginator.get_result allows negative QuerySet slicing on previous-page cursors by using offset directly instead of max(0, offset)",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same underlying problem: negative slicing of Django QuerySets. The golden comment states that Django querysets don't support negative slicing. The candidate issue explains the specific mechanism causing this problem - the BasePaginator.get_result method allows negative QuerySet slicing by using offset directly without ensuring it's non-negative (i.e., not using max(0, offset)). The candidate provides more technical detail about how the bug manifests, but both are pointing to the same core issue of attempting negative slicing on QuerySets."
+        },
+        {
+          "golden_comment": "When requests are authenticated with API keys or org auth tokens (which have user_id=None), organization_context.member is None. Line 71 attempts to access organization_context.member.has_global_access without checking if member is None, causing an AttributeError crash when optimized_pagination=true is used, even though the request passed all permission checks with valid org:write scope.",
+          "severity": "High",
+          "matched_candidate": "AttributeError: organization_context.member accessed without None check in OrganizationAuditLogsEndpoint.get, breaks when authenticated via API key or org auth token",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same bug: accessing organization_context.member.has_global_access on line 71 without checking if member is None first, which causes an AttributeError when requests are authenticated with API keys or org auth tokens (where user_id=None and member is None). The candidate issue is more concise but captures all the essential elements: the AttributeError, the missing None check, the specific location (OrganizationAuditLogsEndpoint.get), and the authentication context (API key or org auth token) that triggers the bug."
+        },
+        {
+          "golden_comment": "get_item_key assumes a numeric key, but the paginator is used with order_by=-datetime in the audit logs endpoint; calling math.floor/ceil on a datetime will raise a TypeError.",
+          "severity": "High",
+          "matched_candidate": "TypeError: OptimizedCursorPaginator.get_item_key applies math.floor/ceil to datetime fields, causing cursor generation to crash with order_by='-datetime'",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same issue: the get_item_key method in OptimizedCursorPaginator incorrectly applies math.floor/ceil operations to datetime values when the paginator is used with datetime-based ordering (specifically order_by=-datetime in audit logs). This causes a TypeError because datetime objects cannot be passed to math.floor/ceil. The candidate issue is more explicit about the crash and mentions the specific method name, but describes the identical underlying bug."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "AssertionError: OptimizedCursorPaginator.get_result allows negative start_offset when enable_advanced_features is true, causing Django ORM to crash on negative slice"
+        },
+        {
+          "candidate": "TypeError: OptimizedCursorPaginator.get_item_key applies math.floor/ceil to non-numeric fields (datetime/string) when used with non-numeric order_by keys"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Importing non-existent OptimizedCursorPaginator",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 4,
+      "tp": 3,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.6,
+      "recall": 0.75,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/97529": {
@@ -21906,6 +22405,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR97529__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR97529__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "A race condition in BuildIndex allows multiple goroutines to concurrently build the same expensive index for the same key. This is caused by moving the b.cacheMu lock from protecting the entire function to only protecting the final cache assignment. ",
+          "severity": "High",
+          "matched_candidate": "Race condition: BuildIndex in pkg/storage/unified/search/bleve.go allows duplicate concurrent index builds for same key due to narrowed lock scope, only protecting cache write instead of entire function",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the same race condition issue: the lock scope in BuildIndex was narrowed to only protect the cache write operation instead of the entire function, allowing multiple goroutines to concurrently build the same index for the same key. The candidate provides more specific details (file path, exact problem description) but describes the identical underlying bug."
+        },
+        {
+          "golden_comment": "Calling s.search.TotalDocs() here may race with concurrent index creation: TotalDocs iterates b.cache without synchronization, and the event watcher goroutine started just above could trigger BuildIndex writes concurrently, potentially causing a concurrent map read/write panic.",
+          "severity": "High",
+          "matched_candidate": "Race condition: TotalDocs() call at pkg/storage/unified/resource/search.go:216 accesses b.cache without lock while BuildIndex writes to b.cache under lock, causing potential concurrent map read/write panic",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same race condition issue: TotalDocs() accesses b.cache without synchronization while BuildIndex (triggered by the event watcher goroutine) writes to b.cache concurrently, potentially causing a concurrent map read/write panic. The candidate specifies the exact location (pkg/storage/unified/resource/search.go:216) and mentions the lock mechanism explicitly, while the golden comment describes the same issue more generally. They are semantically identical."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Nil pointer dereference: History() and Origin() methods in pkg/storage/unified/resource/server.go:922-929 call s.search without nil check when search is optional"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR97529__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR97529__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/80168": {
@@ -23356,6 +23892,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR80168__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR80168__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but only contains pass, failing to implement its required abstract methods: counter_names (property), get_dedupe_value(), get_group_key_values(), and build_occurrence_and_event_data(). This will cause a TypeError at runtime when the class is instantiated.",
+          "severity": "High",
+          "matched_candidate": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but doesn't implement required abstract methods (get_dedupe_value, get_group_key_values, build_occurrence_and_event_data), making it non-instantiable and causing TypeError at runtime",
+          "confidence": 0.92,
+          "reasoning": "Both comments identify the same core issue: MetricAlertDetectorHandler inherits from StatefulDetectorHandler but fails to implement required abstract methods, which will cause a TypeError at runtime when instantiated. The golden comment lists 4 abstract methods (counter_names property, get_dedupe_value, get_group_key_values, and build_occurrence_and_event_data), while the candidate lists 3 methods (get_dedupe_value, get_group_key_values, build_occurrence_and_event_data), missing the counter_names property. Despite this minor difference in completeness, both identify the same fundamental problem and its consequence."
+        },
+        {
+          "golden_comment": "Docstring says this returns a list of DetectorEvaluationResult, but the method now returns a dict keyed by DetectorGroupKey. Consider updating the docstring to match the new return type.",
+          "severity": "Low",
+          "matched_candidate": "Docstring for evaluate method claims it returns a list of DetectorEvaluationResult but the actual return type is dict[DetectorGroupKey, DetectorEvaluationResult]",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same issue: the docstring incorrectly states the return type is a list of DetectorEvaluationResult when the actual return type is a dict keyed by DetectorGroupKey with DetectorEvaluationResult values. The candidate issue is more specific about the actual return type annotation, but both are pointing to the same docstring/implementation mismatch."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "New abstract method build_occurrence_and_event_data added to StatefulDetectorHandler but existing subclass MetricAlertDetectorHandler doesn't implement it, causing instantiation failure"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR80168__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR80168__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/80528": {
@@ -24700,6 +25273,37 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR80528__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR80528__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The function modifies the config variable to include display values but then returns the original monitor.config instead of the modified version.",
+          "severity": "High",
+          "matched_candidate": "Dead code: transformed config is not used in return value. Code copies and humanizes config['schedule_type'] at lines 160-163, but line 168 returns monitor_environment.monitor.config instead of the mutated local config variable, causing the transformation to be dropped",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same issue: the function modifies a local 'config' variable (adding display values/humanizing schedule_type) but then returns the original 'monitor.config' instead of the modified version, causing the transformations to be lost. The candidate provides more specific details (line numbers, the specific transformation being 'schedule_type'), but the core problem is identical."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "The code fetches MonitorCheckIn objects by ID when the required data already exists in previous_checkins. This creates an unnecessary database query.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR80528__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR80528__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/77754": {
@@ -26413,6 +27017,57 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR77754__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR77754__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Shared mutable default in dataclass timestamp",
+          "severity": "Medium",
+          "matched_candidate": "Shared datetime default freezes queued timestamp - the dataclass field default `queued: datetime = timezone.now()` is evaluated once at class definition time, so omitted `queued` values reuse a frozen timestamp",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same core issue: a mutable default argument in a dataclass field. The golden comment mentions 'shared mutable default in dataclass timestamp' while the candidate explains that `queued: datetime = timezone.now()` is evaluated once at class definition time, causing all instances to share the same frozen timestamp value. This is the classic Python mutable default argument problem applied to dataclasses. The candidate provides more technical detail but describes the exact same bug."
+        },
+        {
+          "golden_comment": "The method name has a typo: test_from_dict_inalid_data should be test_from_dict_invalid_data.",
+          "severity": "Low",
+          "matched_candidate": "Naming typo: test_from_dict_inalid_data mislabels failure - test name contains typo 'inalid' instead of 'invalid'",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same typo in the method name 'test_from_dict_inalid_data' where 'inalid' should be 'invalid'. The golden comment directly states the typo, while the candidate provides more context about what the typo is ('inalid' instead of 'invalid'), but they are pointing to the identical issue."
+        },
+        {
+          "golden_comment": "Method name says 'empty_array' but tests empty dict - consider renaming to 'test_from_dict_empty_dict' for clarity.",
+          "severity": "Low",
+          "matched_candidate": "Naming mismatch: empty_array test uses dict - test named test_from_dict_empty_array passes an empty dict {} instead of an array",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: a test method named 'test_from_dict_empty_array' is using an empty dictionary {} instead of an empty array, creating a mismatch between the method name and what it actually tests. The golden comment suggests renaming to 'test_from_dict_empty_dict' while the candidate describes it as a 'naming mismatch', but both are pointing to the exact same problem."
+        },
+        {
+          "golden_comment": "to_dict() returns a datetime for queued; if this dict is passed in task kwargs (e.g., via apply_async), JSON serialization may fail depending on the serializer, which can cause enqueue errors.",
+          "severity": "Medium",
+          "matched_candidate": "TypeError: datetime in to_dict may break JSON serialization - to_dict() returns dataclasses.asdict(self) which includes queued as a datetime, breaking serialization when passed to Celery task kwargs",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same core issue: the to_dict() method returns a datetime object for the 'queued' field, which causes JSON serialization failures when passed to Celery task kwargs. The candidate provides more technical detail (mentioning dataclasses.asdict and TypeError) while the golden comment is more general, but they describe the identical underlying problem and its consequences."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Potential: TypeError enqueueing non-JSON task kwargs - assignment_source.to_dict() is enqueued directly in task kwargs but contains non-serializable datetime objects"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 4,
+      "tp": 4,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.8,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR77754__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR77754__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/95633": {
@@ -28074,6 +28729,54 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR95633__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR95633__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The test test_thread_queue_parallel_error_handling has a docstring that doesn't match the test implementation.",
+          "severity": "Low",
+          "matched_candidate": "test_thread_queue_parallel_error_handling has misleading docstring claiming errors don't block offset commits for other messages, but test body asserts no commits occur",
+          "confidence": 0.95,
+          "reasoning": "Both the golden comment and candidate issue identify the same underlying problem: the docstring for test_thread_queue_parallel_error_handling does not match what the test actually does. The golden comment states this generally, while the candidate provides specific details about the mismatch (docstring claims errors don't block commits, but test asserts no commits occur). These are describing the same docstring-implementation mismatch issue."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "close() method drops processed-but-uncommitted offsets because shutdown_event is set and commit thread is joined before queue_pool.shutdown() drains workers, causing offsets completed during shutdown to miss final commit"
+        },
+        {
+          "candidate": "test_concurrent_processing_across_groups only checks that all items completed and group IDs appeared, but doesn't verify actual concurrent execution through timing or overlap checks"
+        },
+        {
+          "candidate": "test_concurrent_processing_different_groups claims to test concurrency but only asserts that 4 results were processed without any concurrency-specific verification"
+        },
+        {
+          "candidate": "Background-thread tests use fixed sleep/polling loops with hardcoded timeouts that will fail intermittently when processing takes longer than expected"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The queue.shutdown() method with 'immediate=False' parameter may not exist in the standard Python queue module. This could cause AttributeError at runtime. Verify the correct API or implement a custom shutdown mechanism.",
+          "severity": "High"
+        },
+        {
+          "golden_comment": "The magic number 50 for max_wait is used repeatedly throughout the tests. Consider extracting this as a named constant to improve maintainability.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 3,
+      "tp": 1,
+      "fp": 4,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.2,
+      "recall": 0.3333333333333333,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR95633__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR95633__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2": {
@@ -30038,6 +30741,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR2__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR2__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "OptimizedCursorPaginator negative-offset branch slices QuerySet with a negative start index",
+          "severity": "Critical",
+          "matched_candidate": "AssertionError: negative QuerySet slicing enabled - pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice in src/sentry/api/paginator.py:877-882",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same underlying problem: negative offset/index being used in QuerySet slicing within the OptimizedCursorPaginator code. The golden comment mentions 'negative-offset branch slices QuerySet with a negative start index' and the candidate describes 'pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice'. Both point to the same file location (paginator.py around line 877-882) and the same root cause - negative slicing of QuerySets which causes crashes."
+        },
+        {
+          "golden_comment": "BasePaginator negative-offset branch slices QuerySet with a negative start index",
+          "severity": "High",
+          "matched_candidate": "AssertionError: negative QuerySet slicing enabled - pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice in src/sentry/api/paginator.py:877-882",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same underlying problem: negative offset values being used in QuerySet slicing within the BasePaginator code. The golden comment describes 'negative-offset branch slices QuerySet with a negative start index' and the candidate describes 'pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice'. Both point to the same bug in the paginator code (specifically around line 877-882 in src/sentry/api/paginator.py) where negative offsets cause problematic QuerySet slicing. The candidate provides more detail about the crash and AssertionError, but the core issue is identical."
+        },
+        {
+          "golden_comment": "OptimizedCursorPaginator.get_item_key uses floor/ceil on a datetime key (order_by='-datetime'), causing TypeError.",
+          "severity": "High",
+          "matched_candidate": "TypeError: math.floor/ceil on non-numeric keys - cursor building crashes when paginator key is datetime or other non-numeric field in src/sentry/api/paginator.py:838-840",
+          "confidence": 0.98,
+          "reasoning": "Both issues identify the same underlying problem: the OptimizedCursorPaginator is using math.floor/ceil operations on datetime keys, which causes a TypeError because these mathematical operations cannot be applied to datetime objects. The candidate issue provides more specific location information (src/sentry/api/paginator.py:838-840) and explicitly mentions the crash during cursor building, while the golden comment is more concise. However, they both describe the identical bug."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "AssertionError: prev pagination keeps negative offset - existing paginator crashes when previous-page cursor has a negative offset in src/sentry/api/paginator.py:182-184"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR2__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR2__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3": {
@@ -31857,6 +32604,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR3__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR3__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "sample_rate = 0.0 is falsy and skipped",
+          "severity": "Low",
+          "matched_candidate": "Zero sample_rate skipped by falsy guard: _set_sample_rate_from_error_sampling uses 'if client_sample_rate:' which skips valid zero values (0/0.0) instead of propagating them to normalized_data['sample_rate'] in src/sentry/testutils/factories.py lines 353-355",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: the code uses a falsy check ('if client_sample_rate:') which incorrectly skips the value 0.0 for sample_rate, even though 0.0 is a valid value that should be processed. The candidate provides more detail about the location and context, but the core bug is identical."
+        },
+        {
+          "golden_comment": "Using Python\u2019s built-in hash() to build cache keys is non-deterministic across processes (hash randomization), so keys won\u2019t match across workers and invalidate_upsampling_cache may fail to delete them. Use a deterministic serialization of project_ids for the cache key.",
+          "severity": "Low",
+          "matched_candidate": "hash() cache key breaks invalidation: Python hash values are process-dependent, so cache lookup/set and invalidate_upsampling_cache can compute different keys across workers in src/sentry/api/helpers/error_upsampling.py lines 27 and 73, breaking cross-process cache reuse/invalidation",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same core issue: Python's hash() function produces process-dependent values due to hash randomization, which causes cache key mismatches across workers. Both specifically mention that this breaks cache invalidation (invalidate_upsampling_cache) and affects the same file (error_upsampling.py). The candidate provides more specific line numbers (27 and 73) but describes the identical problem and impact as the golden comment."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "The upsampling eligibility check passes the outer dataset instead of the actual dataset used by scoped_dataset. In paths where the query ultimately runs against discover (e.g., dashboard split) while the original dataset is metrics, upsampling may be skipped even when all projects are allowlisted.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR3__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR3__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/103633": {
@@ -33370,6 +34155,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR103633__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR103633__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The Check operation exhibits asymmetric cache trust logic: cached permission grants are trusted and returned immediately, but cached denials from the same permission cache are ignored, leading to a fresh database lookup. This allows stale cached grants to provide access to revoked resources, posing a security risk. ",
+          "severity": "High",
+          "matched_candidate": "Cached permissions are trusted without freshness validation in pkg/services/authz/rbac/service.go:123-135 - revoked access persists until TTL expiry",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same core security vulnerability: cached permission grants are trusted without validation, allowing revoked access to persist until cache expiry. The golden comment provides more technical detail about the asymmetric cache trust logic (grants trusted vs denials ignored), while the candidate focuses on the lack of freshness validation, but they both point to the same fundamental problem - stale cached grants can provide unauthorized access to revoked resources."
+        },
+        {
+          "golden_comment": "The test comment says the cached permissions 'allow access', but the map stores false for dashboards:uid:dash1, so checkPermission will still treat this scope as not allowed.",
+          "severity": "Low",
+          "matched_candidate": "Test comment contradicts setup in pkg/services/authz/rbac/service_test.go:981-982 - comment says 'Allow access to the dashboard' but cache stores false (denial)",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same issue: there is a contradiction between the test comment (which states permissions allow access to the dashboard) and the actual cached value (false for dashboards:uid:dash1), meaning access would be denied. The candidate provides the specific line numbers and uses slightly different wording ('denial' vs 'not allowed'), but describes the exact same problem."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR103633__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR103633__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/67876": {
@@ -35096,6 +35914,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR67876__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR67876__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "OAuth state uses pipeline.signature (static) instead of a per-request random value",
+          "severity": "Medium",
+          "matched_candidate": "CSRF/replay risk: predictable OAuth state - OAuth state is derived from reusable pipeline.signature which is deterministically derived via md5_text rather than being a randomized per-request CSRF token, making it predictable and reusable",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same core security issue: the OAuth state parameter is using a static/predictable value (pipeline.signature) instead of a random per-request value. The candidate provides more technical detail about the CSRF/replay risk and mentions md5_text, but fundamentally describes the same vulnerability - that the state is deterministic and reusable rather than being randomly generated for each request."
+        },
+        {
+          "golden_comment": "The code attempts to access integration.metadata[sender][login] without checking for the existence of the sender key. This causes a KeyError for integrations where the sender metadata was not set during creation",
+          "severity": "High",
+          "matched_candidate": "KeyError: sender metadata access unchecked - integration.metadata[\"sender\"][\"login\"] is dereferenced without key guard or exception handling in GitHubInstallation.dispatch, can raise KeyError for integrations lacking that metadata",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same underlying problem: accessing integration.metadata['sender']['login'] without first checking if the 'sender' key exists, which can cause a KeyError. The golden comment focuses on the missing existence check for the sender key, while the candidate mentions both the lack of key guard and exception handling. Despite slightly different wording and the candidate providing more context (mentioning the specific method GitHubInstallation.dispatch), they both point to the exact same bug."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "Null reference if github_authenticated_user state is missing",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR67876__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR67876__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/32918": {
@@ -36479,6 +37335,37 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR32918__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR32918__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Cleanup reference uses incorrect alias - should be 'idp-alias-' + i instead of 'alias'.",
+          "severity": "Medium",
+          "matched_candidate": "cleanup uses wrong IDP alias - testCacheIDPForLogin creates providers with aliases 'idp-alias-<i>' and 'idp-alias-20' but registers cleanup against the literal 'alias', causing created providers to leak when teardown removes non-existent alias",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same bug: the cleanup code uses the wrong alias value. The golden comment states it should be 'idp-alias-' + i instead of 'alias', while the candidate provides more context explaining that testCacheIDPForLogin creates providers with aliases like 'idp-alias-<i>' and 'idp-alias-20' but the cleanup incorrectly uses the literal string 'alias'. Both are describing the identical issue - the cleanup reference uses an incorrect/hardcoded alias value instead of the dynamically generated one."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "Recursive caching call using session instead of delegate",
+          "severity": "Critical"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR32918__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR32918__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/94942": {
@@ -37897,6 +38784,46 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR94942__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR94942__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The enableSqlExpressions function has flawed logic that always returns false, effectively disabling SQL expressions unconditionally:",
+          "severity": "Critical",
+          "matched_candidate": "enableSqlExpressions function in pkg/expr/reader.go:194-200 returns false on every path, unconditionally disabling SQL expressions",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same problem: the enableSqlExpressions function has logic that always returns false, which unconditionally disables SQL expressions. The candidate even provides the specific file location (pkg/expr/reader.go:194-200) and uses nearly identical language to describe the issue. This is a clear match."
+        },
+        {
+          "golden_comment": "Several methods such as NewInMemoryDB().RunCommands and db.QueryFramesInto return 'not implemented'.",
+          "severity": "High",
+          "matched_candidate": "QueryFramesInto method in pkg/expr/sql/db.go:20-21 unconditionally returns 'not implemented' error, causing deterministic runtime failure",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: methods returning 'not implemented' errors. The golden comment mentions multiple methods including QueryFramesInto, while the candidate specifically focuses on QueryFramesInto in pkg/expr/sql/db.go. The candidate provides more specific location details but identifies the same core problem that the golden comment describes."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Caller gate at pkg/expr/reader.go:129-132 rejects QueryTypeSQL whenever !enabled due to enableSqlExpressions always returning false"
+        },
+        {
+          "candidate": "SQLCommand.Execute in pkg/expr/sql_command.go:96-100 calls sql.NewInMemoryDB().QueryFramesInto which always fails due to unimplemented method"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 2,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR94942__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR94942__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/90939": {
@@ -39220,6 +40147,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR90939__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR90939__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The GetWebAssets function implements an incomplete double-checked locking pattern for caching web assets. The function first checks if the cache is populated using a read lock (RLock), and if the cache is empty, it acquires a write lock to populate it. However, it fails to re-check whether the cache was populated by another goroutine while waiting to acquire the write lock.",
+          "severity": "Medium",
+          "matched_candidate": "Race condition: incomplete double-checked locking in pkg/api/webassets/webassets.go - duplicate fetches run when concurrent callers observe empty cache before lock acquisition because cache is not re-checked after acquiring write lock",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same problem: an incomplete double-checked locking pattern in the GetWebAssets function where the cache is not re-checked after acquiring the write lock. The golden comment explains that the function checks cache with read lock, then acquires write lock if empty, but fails to re-check if another goroutine populated the cache while waiting. The candidate describes this identically as 'duplicate fetches run when concurrent callers observe empty cache before lock acquisition because cache is not re-checked after acquiring write lock.' Both identify the same race condition, same location (webassets.go), and same root cause (missing re-check after write lock acquisition)."
+        },
+        {
+          "golden_comment": "In addition to the missing double-check, the function has a critical flaw in its error handling: it unconditionally assigns the fetch result to the cache (line 69: entryPointAssetsCache = result) regardless of whether the fetch succeeded or failed. When an error occurs during asset fetching, result is nil, and this nil value overwrites any previously valid cache entry.",
+          "severity": "High",
+          "matched_candidate": "Logic error: nil cache overwrites valid entry in pkg/api/webassets/webassets.go - cached assets are lost when asset loading fails after a previous successful population because result is always assigned to entryPointAssetsCache even when nil",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same bug: the function unconditionally assigns the result to entryPointAssetsCache (line 69) even when the fetch fails and result is nil, which overwrites any previously valid cache entry. The candidate uses slightly different wording ('cached assets are lost' vs 'overwrites any previously valid cache entry') but describes the identical logic error in the same location with the same consequences."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR90939__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR90939__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/80329": {
@@ -40631,6 +41591,32 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR80329__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR80329__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The code uses Error log level for what appears to be debugging information. This will pollute error logs in production. Consider using Debug or Info level instead.",
+          "severity": "Low",
+          "matched_candidate": "Normal control-flow messages in CleanAnnotations are logged with r.log.Error instead of appropriate log level (Info/Debug), flooding error logs with routine batch progress messages",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: using Error log level (r.log.Error) for routine/debugging information instead of more appropriate levels like Info or Debug. The golden comment notes this will pollute error logs in production, while the candidate specifically mentions 'flooding error logs with routine batch progress messages' in CleanAnnotations. Both are describing the same logging level misuse problem, just with slightly different wording and the candidate providing more specific context about where it occurs."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 1,
+      "tp": 1,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR80329__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR80329__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/90045": {
@@ -42548,6 +43534,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR90045__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR90045__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The context is being created with d.Log instead of the log variable that was initialized with additional context values (name, kind, method). This means those values won't be propagated to the logging context.",
+          "severity": "Medium",
+          "matched_candidate": "Delete method stores plain d.Log into context instead of enriched log variable, dropping name/kind/method fields in pkg/apiserver/rest/dualwriter_mode3.go:96-97",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same issue: in the Delete method, the context is being created with d.Log instead of the enriched log variable that contains the name/kind/method fields. The candidate is more specific about the location (lines 96-97 in dualwriter_mode3.go) and uses slightly different wording ('enriched log variable' vs 'log variable with additional context values'), but they describe the identical problem."
+        },
+        {
+          "golden_comment": "Bug: calling recordLegacyDuration when storage operation fails should be recordStorageDuration.",
+          "severity": "High",
+          "matched_candidate": "Update error path calls d.recordLegacyDuration(...) instead of d.recordStorageDuration(...) in pkg/apiserver/rest/dualwriter_mode3.go:125-132",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the same bug: in the error path (when storage operation fails), the code incorrectly calls `recordLegacyDuration` instead of `recordStorageDuration`. The candidate provides more specific location details (file and line numbers), but describes the identical underlying issue."
+        },
+        {
+          "golden_comment": "Inconsistency: using name instead of options.Kind for metrics recording differs from other methods.",
+          "severity": "Medium",
+          "matched_candidate": "Delete success path records storage duration with 'name' parameter instead of 'options.Kind' in pkg/apiserver/rest/dualwriter_mode3.go:106",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: in the delete success path at line 106 of dualwriter_mode3.go, the code uses 'name' parameter for recording storage duration metrics, while it should use 'options.Kind' to be consistent with other methods. The candidate is more specific about the location and context, but describes the exact same inconsistency problem."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "DeleteCollection async legacy path uses d.recordStorageDuration(...) instead of d.recordLegacyDuration(...) in pkg/apiserver/rest/dualwriter_mode3.go:161-166"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR90045__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR90045__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/106778": {
@@ -44240,6 +45270,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR106778__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR106778__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The rendered GrafanaRuleListItem is missing the required key prop for React list items. This can cause rendering issues when the list order changes.",
+          "severity": "Medium",
+          "matched_candidate": "Missing React key prop on GrafanaRuleListItem in FilterView.tsx rules.map(), causing potential stale row reuse when list order changes",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same issue: the missing React key prop on GrafanaRuleListItem elements in a map() function. The golden comment mentions 'rendering issues when the list order changes' and the candidate mentions 'potential stale row reuse when list order changes' - these are semantically identical consequences of missing keys. Both point to the same file (FilterView.tsx) and the same component (GrafanaRuleListItem) in a rules.map() iteration."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "ReferenceError: ctx is undefined in useIsGrafanaPromRuleEditable() at useAbilities.ts:369-370"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "RuleActionsButtons is invoked with only promRule, but SilenceGrafanaRuleDrawer inside RuleActionsButtons still depends on a Grafana Ruler rule being present, so for Grafana rules coming from list views the 'Silence notifications' menu entry (now driven by Grafana Prom abilities) will toggle showSilenceDrawer without ever rendering the drawer. This means clicking 'Silence notifications' for these rules has no visible effect, even when abilities indicate silencing is allowed.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR106778__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR106778__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/107534": {
@@ -45441,6 +46506,29 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR107534__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR107534__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "The applyTemplateVariables method is called with request.filters as the third parameter, but this parameter is not used in the corresponding test setup.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 0,
+      "total_golden": 1,
+      "tp": 0,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR107534__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR107534__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/79265": {
@@ -47587,6 +48675,62 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR79265__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR79265__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Race condition: Multiple concurrent requests could pass the device count check simultaneously and create devices beyond the limit. Consider using a database transaction or lock.",
+          "severity": "High",
+          "matched_candidate": "Race condition: device limit check overshoots - concurrent requests can all observe count < limit before any write completes in CreateOrUpdateDevice (database.go:108-117)",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same race condition issue: concurrent requests can bypass the device count limit check because multiple requests can read the count simultaneously before any writes complete. The golden comment suggests using transactions or locks as a solution, while the candidate provides specific line numbers (108-117) and describes the mechanism ('all observe count < limit before any write completes'). These are semantically identical issues with the same root cause and impact."
+        },
+        {
+          "golden_comment": "Anonymous authentication now fails entirely if anonDeviceService.TagDevice returns ErrDeviceLimitReached. Previously, device tagging was asynchronous and non-blocking. This change prevents anonymous users from authenticating when the device limit is reached.",
+          "severity": "Medium",
+          "matched_candidate": "LogicError: anonymous auth can fail on limit error - ErrDeviceLimitReached is propagated as hard failure in client.go:44-46 instead of warning like other tagging errors",
+          "confidence": 0.95,
+          "reasoning": "Both the golden comment and candidate issue identify the same underlying problem: anonymous authentication now fails completely when ErrDeviceLimitReached is returned from device tagging, whereas previously this was non-blocking. The golden comment explains it was previously asynchronous and non-blocking, while the candidate describes it as being propagated as a hard failure instead of a warning. Both point to the same bug at client.go:44-46 where the error handling changed from non-blocking to blocking behavior."
+        },
+        {
+          "golden_comment": "Returning ErrDeviceLimitReached when no rows were updated is misleading; the device might not exist.",
+          "severity": "Low",
+          "matched_candidate": "MisleadingError: no rows updated returns limit reached - updateDevice returns ErrDeviceLimitReached when RowsAffected()==0, but zero rows can also mean device doesn't exist or is outside time window (database.go:95-97)",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: returning ErrDeviceLimitReached when no rows are updated is misleading because zero affected rows could indicate the device doesn't exist (or other conditions), not necessarily that a device limit was reached. The candidate provides more detail about the specific conditions (device doesn't exist or is outside time window) but the core problem is identical."
+        },
+        {
+          "golden_comment": "Time window calculation inconsistency: Using device.UpdatedAt.UTC().Add(-anonymousDeviceExpiration) as the lower bound but device.UpdatedAt as the current time may not match the intended logic. Consider using time.Now().UTC() consistently.",
+          "severity": "Low",
+          "matched_candidate": "LogicError: inconsistent time window basis - updateDevice uses device.UpdatedAt while CreateOrUpdateDevice uses time.Now().UTC() for counting active devices, causing existing devices to fail update WHERE clause (database.go:80-81, 110)",
+          "confidence": 0.92,
+          "reasoning": "Both issues identify the same underlying problem: inconsistent use of time references when calculating the time window for active devices. The golden comment points out the inconsistency between using device.UpdatedAt.UTC().Add(-anonymousDeviceExpiration) and device.UpdatedAt, while the candidate identifies the broader inconsistency between updateDevice using device.UpdatedAt and CreateOrUpdateDevice using time.Now().UTC(). Both are describing the same logical flaw - mixing different time bases (device.UpdatedAt vs time.Now().UTC()) in the time window calculation, which can cause incorrect behavior. The candidate provides more specific context about the impact (WHERE clause failures) and line numbers, but fundamentally identifies the same issue."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "LogicError: anonymous auth fails on device limit - Authenticate returns error when TagDevice yields ErrDeviceLimitReached, breaking previous best-effort behavior (client.go:44-46)"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "This call won\u2019t compile: dbSession.Exec(args...) is given a []interface{} where the first element is the query, but Exec\u2019s signature requires a first parameter of type string (not an interface{} splat).",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.8,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR79265__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR79265__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9": {
@@ -49042,6 +50186,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR9__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR9__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Thread-safety issue with lazy @loaded_locales",
+          "severity": "Low",
+          "matched_candidate": "Race condition in lazy locale loading - unsynchronized lazy initialization and check-then-act access to @loaded_locales array in lib/freedom_patches/translate_accelerator.rb:62-64 allows concurrent threads to double-load locales",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same thread-safety issue with the @loaded_locales variable. The golden comment describes it as a 'Thread-safety issue with lazy @loaded_locales' while the candidate provides more technical detail about the race condition, unsynchronized lazy initialization, and check-then-act pattern that allows concurrent threads to double-load locales. The candidate even specifies the exact file and line numbers (lib/freedom_patches/translate_accelerator.rb:62-64). These are describing the same underlying concurrency bug with different levels of detail."
+        },
+        {
+          "golden_comment": "Consider normalizing the input locale (e.g., to a symbol) when checking/loading here to avoid double-loading if the same locale is passed as a String vs Symbol (also applies to other locations in the PR).",
+          "severity": "Low",
+          "matched_candidate": "String/Symbol locale mismatch causes duplicate loading - @loaded_locales.include?(locale) check in lib/freedom_patches/translate_accelerator.rb:62-64 lacks normalization, treating string and symbol forms of same locale as distinct",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: the lack of normalization when checking locales leads to treating string and symbol forms of the same locale as distinct values, potentially causing duplicate loading. The golden comment suggests normalizing input (e.g., to symbol) to avoid double-loading, while the candidate specifically points to the @loaded_locales.include?(locale) check at lines 62-64 in translate_accelerator.rb as lacking normalization. Both are describing the same String/Symbol mismatch problem and its consequence of duplicate loading."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "NoMethodError from missing SiteSetting.default_locale - config/initializers/i18n.rb:17 unconditionally calls SiteSetting.default_locale.to_sym without nil guard"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR9__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR9__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/76186": {
@@ -50467,6 +51648,58 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR76186__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR76186__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The ContextualLoggerMiddleware methods (QueryData, CallResource, CheckHealth, CollectMetrics) panic when a nil request is received. This occurs because they directly access req.PluginContext (via the instrumentContext function) without first checking if req is nil. This is a regression, as previous middleware layers gracefully handled nil requests.",
+          "severity": "High",
+          "matched_candidate": "ContextualLoggerMiddleware dereferences req.PluginContext without nil check in QueryData/CallResource/CheckHealth/CollectMetrics methods, causing panic when nil request reaches middleware",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same problem: the ContextualLoggerMiddleware methods (QueryData, CallResource, CheckHealth, CollectMetrics) panic when receiving a nil request because they dereference req.PluginContext without first checking if req is nil. The candidate uses slightly more technical language ('dereferences req.PluginContext') while the golden comment explains it more verbosely ('directly access req.PluginContext via instrumentContext function'), but they describe the identical bug with the same root cause, affected methods, and failure mode (panic on nil request)."
+        },
+        {
+          "golden_comment": "The traceID is no longer logged for plugin requests. During a refactoring, the tracing import and the logic to extract and add traceID from the context to log parameters were removed from the LoggerMiddleware. The newly introduced ContextualLoggerMiddleware does not add this information, resulting in missing traceID in plugin request logs and impacting debugging and request tracing capabilities.",
+          "severity": "Low",
+          "matched_candidate": "Explicit traceID extraction removed from logger_middleware.go, and replacement context enrichment does not add traceID, breaking request correlation when debugging plugin calls across traced services",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same core issue: traceID is no longer being logged for plugin requests due to refactoring changes in the middleware. The golden comment explains that tracing import and extraction logic were removed from LoggerMiddleware and the new ContextualLoggerMiddleware doesn't add this information. The candidate issue states that explicit traceID extraction was removed from logger_middleware.go and the replacement context enrichment doesn't add traceID. Both describe the same problem (missing traceID in logs), same cause (refactoring/removal of extraction logic), and same impact (breaking debugging/request tracing capabilities)."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "TestLogger.FromContext returns a fresh logger instance instead of preserving the receiver's captured state, causing test assertions to break when code logs via FromContext logger"
+        },
+        {
+          "candidate": "FromContext fallback to d.New() loses the logger returned by FromContext and drops context-derived attributes when the returned logger is not *log.ConcreteLogger"
+        },
+        {
+          "candidate": "CallResource method dereferences req.PluginContext without checking req for nil, causing panic on nil request"
+        },
+        {
+          "candidate": "CheckHealth method dereferences req.PluginContext without checking req for nil, causing panic on nil request"
+        },
+        {
+          "candidate": "CollectMetrics method dereferences req.PluginContext without checking req for nil, causing panic on nil request"
+        },
+        {
+          "candidate": "LoggerMiddleware changed from explicitly appending traceID to using FromContext, while instrumentContext only injects endpoint/pluginId/dsName/dsUID/uname without traceID, potentially breaking request correlation"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 9,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 6,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.2222222222222222,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR76186__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR76186__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10": {
@@ -52744,6 +53977,52 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR10__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR10__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "NoMethodError before_validation in EmbeddableHost",
+          "severity": "Critical",
+          "matched_candidate": "NoMethodError: before_validation dereferences nil host in app/models/embeddable_host.rb - validation crashes when host is nil because sub! is called unconditionally",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: a NoMethodError occurring in the before_validation callback for EmbeddableHost when the host is nil. The golden comment is more concise, while the candidate provides additional technical detail about the root cause (sub! being called unconditionally on a nil host), but they are describing the same bug."
+        },
+        {
+          "golden_comment": "The update and destroy methods in Admin::EmbeddableHostsController do not validate the existence of the EmbeddableHost record retrieved by ID. If EmbeddableHost.where(id: params[:id]).first returns nil (i.e., the host does not exist), attempting to call methods on the nil object (e.g., save_host or destroy) will result in a NoMethodError.",
+          "severity": "Medium",
+          "matched_candidate": "NoMethodError on missing EmbeddableHost in update/destroy in app/controllers/admin/embeddable_hosts_controller.rb - request crashes when params[:id] does not match any record, no nil guard present",
+          "confidence": 0.98,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same underlying problem: the update and destroy methods in Admin::EmbeddableHostsController fail to validate whether the EmbeddableHost record exists before calling methods on it. Both mention that when params[:id] doesn't match a record, the code attempts to call methods (save_host/destroy) on nil, resulting in a NoMethodError. The candidate uses more concise language ('no nil guard present', 'request crashes') but describes the identical issue in the same file and methods."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Unhandled rejection from destroyRecord promise in app/assets/javascripts/admin/components/embeddable-host.js.es6 - delete flow silently fails when backend destroy request rejects, missing rejection handler"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "record_for_host compares lower(host) = ? but does not normalize the parameter\u2019s case, so mixed\u2011case referer hosts may fail to match even though comparison intends to be case\u2011insensitive.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "Because this migration inserts embeddable_hosts rows with raw SQL, any existing embeddable_hosts values that include http:// or /https:// or path segments won\u2019t go through the EmbeddableHost model\u2019s normalization, so the new host lookup (which compares only the bare host) may fail for migrated data. Consider ensuring that migrated hosts are normalized to the same format as newly created EmbeddableHost records so existing embedding configurations keep working.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 4,
+      "tp": 2,
+      "fp": 1,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR10__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR10__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7": {
@@ -54587,6 +55866,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR7__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR7__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [
+        {
+          "candidate": "Light-theme heading lightness changed from 20% to 50% in .topic-list-item h3, causing unexpected contrast changes in app/assets/stylesheets/mobile/topic-post.scss"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 3,
+      "tp": 0,
+      "fp": 1,
+      "fn": 3,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR7__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR7__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8": {
@@ -56529,6 +57843,48 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR8__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR8__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": " The findMembers() call is now asynchronous and unhandled. The controller may not have member data immediately available, creating a race condition.",
+          "severity": "High",
+          "matched_candidate": "Race condition in member list reload: overlapping async findMembers() calls can resolve out of order, overwriting newer state with stale member data",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same core issue: the asynchronous findMembers() call creates a race condition. The golden comment focuses on the immediate unavailability of member data, while the candidate provides more detail about overlapping calls resolving out of order. However, both are describing the same fundamental problem - the unhandled async operation causing timing/race condition issues with member data."
+        },
+        {
+          "golden_comment": "In the next action, capping the next offset at user_count can produce an empty page (e.g., total equal to limit results in offset == total, showing 2/2 with no members). This can cause confusing UX on the last page.",
+          "severity": "Medium",
+          "matched_candidate": "Off-by-one error in pagination: totalPages calculation overcounts by one when user_count is exactly divisible by limit, causing empty extra page",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same pagination bug: when user_count is exactly divisible by limit, the calculation produces an empty page. The golden comment describes this as 'capping the next offset at user_count can produce an empty page (e.g., total equal to limit results in offset == total)', while the candidate describes it as 'totalPages calculation overcounts by one when user_count is exactly divisible by limit, causing empty extra page'. Both are describing the off-by-one error that results in an empty final page when the total count is a perfect multiple of the page size."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "NoMethodError in add_members: usernames parameter assumes String type but crashes when Array is passed, as Array does not implement split method"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "HTTP method mismatch in .remove_member - test uses PUT but remove_member action expects DELETE",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR8__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR8__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3": {
@@ -58030,6 +59386,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR3__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR3__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "BlockedEmail.should_block_email? method has side effects during a read operation - it updates statistics even when just checking if an email should be blocked. This could cause race conditions in concurrent environments and makes the method name misleading.",
+          "severity": "Medium",
+          "matched_candidate": "Race condition in blocked_email.rb: non-atomic read/modify/write on match_count field loses increments when concurrent requests update the same record",
+          "confidence": 0.75,
+          "reasoning": "Both issues identify problems with the `should_block_email?` method updating `match_count`, but they focus on different aspects. The golden comment emphasizes the design flaw of having side effects in a read operation (misleading method name, unexpected behavior). The candidate focuses on the technical consequence of race conditions from non-atomic updates. While related, the golden issue is about the broader design problem while the candidate is about one specific technical consequence. However, they are pointing to the same underlying code behavior - the problematic update of match_count during what appears to be a read operation."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Stale email rejection: valid addresses stay blocked when server rejected same email once transiently. rejectedEmails cache is never cleared on email edits, retry, or success in create_account_controller.js"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Regex pattern @(#{domains}) only matches domain suffixes, not full domains. evil.example.com would match whitelist entry example.com.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR3__20260407",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR3__20260407/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5": {
@@ -59525,6 +60916,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR5__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR5__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "-ms-align-items never existed in any version of IE/Edge; the correct legacy property is -ms-flex-align.",
+          "severity": "Low",
+          "matched_candidate": "Invalid -ms-align-items property in align-items mixin at app/assets/stylesheets/common/foundation/mixins.scss:121 breaks IE/legacy Edge alignment (line already emits correct -ms-flex-align property)",
+          "confidence": 0.95,
+          "reasoning": "Both the golden comment and candidate issue identify the same underlying problem: the use of `-ms-align-items` which is an invalid/non-existent property. The golden comment states that `-ms-align-items` never existed and the correct property is `-ms-flex-align`. The candidate issue identifies the same invalid property (`-ms-align-items`) at a specific location and notes that the correct property (`-ms-flex-align`) is already being emitted. Both are pointing to the same bug - the incorrect use of a non-existent vendor prefix property."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Incorrect -webkit-box-ordinal-group and -moz-box-ordinal-group mapping in order mixin at app/assets/stylesheets/common/foundation/mixins.scss:125-130 (2009 flexbox properties have different semantics than modern order property)"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Mixing float: left with flexbox causes layout issues. Further this PR removes the float-based right alignment for .d-header .panel, which may cause the login panel in the non-Ember/noscript header (where .panel is nested inside .row and not a flex item) to stack under the title instead of remaining right-aligned.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR5__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR5__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6": {
@@ -60871,6 +62297,32 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR6__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR6__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The include_website_name method is missing the required ? suffix. Rails serializers expect include_ methods to end with ? for conditional attribute inclusion, a convention followed by other methods in this serializer. Without it, the website_name attribute may not be conditionally included as intended. Additionally, the '.' << website_host string concatenation should be replaced with '.' + website_host or '.#{website_host}' to avoid mutating string literals, which can lead to issues.",
+          "severity": "Medium",
+          "matched_candidate": "include_website_name method is missing the '?' suffix required by the serializer's include-hook convention, causing the hook to be ignored. Should be 'include_website_name?' to match the pattern used by other hooks like include_email?, include_card_image_badge_id?, and dynamically defined hooks in staff_attributes, private_attributes, and untrusted_attributes",
+          "confidence": 0.95,
+          "reasoning": "Both the golden comment and candidate issue identify the same core problem: the include_website_name method is missing the required '?' suffix that Rails serializers expect for conditional attribute inclusion methods. Both note that this convention is followed by other methods in the serializer (like include_email?, include_card_image_badge_id?). The golden comment additionally mentions a string concatenation issue, which the candidate doesn't address, but the primary issue about the missing '?' suffix is identical in both."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 1,
+      "tp": 1,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR6__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR6__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4": {
@@ -63582,6 +65034,84 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR4__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR4__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "SSRF vulnerability using open(url) without validation",
+          "severity": "Critical",
+          "matched_candidate": "SSRF: open-uri fetches unvalidated feed URL - internal network access becomes possible when feed_polling_url points to metadata or private hosts in app/jobs/scheduled/poll_feed.rb:24-30, open(SiteSetting.feed_polling_url) has no scheme/host allowlist or destination validation",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same SSRF (Server-Side Request Forgery) vulnerability. The golden comment points to using open(url) without validation, and the candidate issue specifically identifies the same problem in app/jobs/scheduled/poll_feed.rb where open(SiteSetting.feed_polling_url) is called without scheme/host allowlist or destination validation. The candidate provides more specific details (file location, line numbers, and attack vectors like metadata/private hosts access) but describes the identical underlying security issue."
+        },
+        {
+          "golden_comment": "The current origin validation using indexOf is insufficient and can be bypassed. An attacker could use a malicious domain like evil-discourseUrl.com to pass this check.",
+          "severity": "Medium",
+          "matched_candidate": "TypeError: origin check accepts malicious superstrings - message validation breaks when attacker origin contains discourseUrl as a substring in app/assets/javascripts/embed.js:17, discourseUrl.indexOf(e.origin) === -1 is a substring check rather than exact origin comparison",
+          "confidence": 0.98,
+          "reasoning": "Both the golden comment and candidate issue identify the same security vulnerability: the indexOf check for origin validation is insufficient because it performs substring matching rather than exact origin comparison. Both explain that a malicious domain containing the legitimate discourseUrl as a substring (e.g., 'evil-discourseUrl.com') would bypass the validation. The candidate provides more technical detail including the specific line number and code snippet, but the core issue is identical."
+        },
+        {
+          "golden_comment": "The TopicEmbed.import method is susceptible to a NoMethodError if the contents parameter is nil when attempting to append a string, and an XSS vulnerability due to unescaped url interpolation in the generated HTML.",
+          "severity": "Medium",
+          "matched_candidate": "XSS: unescaped URL interpolated into HTML - script injection renders when url contains quotes or HTML in app/models/topic_embed.rb:13, raw url interpolated in href and link text without escaping",
+          "confidence": 0.85,
+          "reasoning": "Both comments identify the same XSS vulnerability in topic_embed.rb related to unescaped URL interpolation in HTML. The golden comment mentions two issues (NoMethodError and XSS), while the candidate focuses specifically on the XSS vulnerability at line 13. The candidate provides more specific details about the XSS issue (script injection via quotes/HTML in URLs, affecting href and link text), which is the same underlying security vulnerability mentioned in the golden comment. The NoMethodError mentioned in the golden comment is a separate issue not covered by the candidate, but the XSS vulnerability itself is a match."
+        },
+        {
+          "golden_comment": "The ERB block closes with end if, which is invalid Ruby/ERB and will raise at render; it should just be end to close the if block.",
+          "severity": "Medium",
+          "matched_candidate": "SyntaxError: invalid ERB block closing - template rendering breaks when ERB parses invalid 'end if' syntax in app/views/embed/best.html.erb:6, uses <%- end if %> instead of <%- end %>",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same problem: an invalid ERB block closing syntax using 'end if' instead of just 'end'. The golden comment explains this will raise an error at render time, while the candidate provides more specific details (file path, line number, and mentions SyntaxError and template rendering breaks), but they are fundamentally describing the same underlying issue."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "NoMethodError: missing content crashes feed polling - job crashes when an RSS item lacks a content field in app/jobs/scheduled/poll_feed.rb:31-36, i.content is nil and .scrub raises NoMethodError with no guard or rescue present"
+        },
+        {
+          "candidate": "RuntimeError: network and parse failures are unhandled - scheduled job fails noisily when the feed is unreachable or malformed in app/jobs/scheduled/poll_feed.rb:29, SimpleRSS.parse open() has no local rescue around network fetch or parse"
+        },
+        {
+          "candidate": "NoMethodError: nil post dereference on stale embed - revise crashes when embed exists but associated post is missing in app/models/topic_embed.rb:32-36, embed.post can be nil but is passed to PostRevisor.new(post) with no nil guard"
+        },
+        {
+          "candidate": "NoMethodError from nil downcase on missing setting - topic retrieval crashes when embed_by_username is unset or nil in lib/topic_retriever.rb:49, SiteSetting.embed_by_username.downcase called with no local nil guard"
+        },
+        {
+          "candidate": "TypeError: comments container may be null - script crashes when #discourse-comments is absent on the page in app/assets/javascripts/embed.js:5-12, document.getElementById returns null but comments.appendChild(iframe) dereferences with no null guard"
+        },
+        {
+          "candidate": "Test name/body mismatch on error assertion - spec name says 'raises an error' but body only asserts response.should_not be_success in spec/controllers/embed_controller_spec.rb:13-16"
+        },
+        {
+          "candidate": "SSRF: open fetches untrusted URLs - internal network access occurs when attacker controls import_remote URL in app/models/topic_embed.rb:48, open(url).read has insufficient validation beyond URI host equality check"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "postMessage targetOrigin should be the origin (scheme+host+port), not the full referrer URL; using the full URL will cause the message to be dropped and prevent resizing.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The code sets X-Frame-Options: ALLOWALL which completely disables clickjacking protection. The referer validation can be bypassed (referer headers are easily spoofed), and the fallback to empty string for nil referer masks validation failures.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 11,
+      "total_golden": 6,
+      "tp": 4,
+      "fp": 7,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.36363636363636365,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR4__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR4__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1": {
@@ -65404,6 +66934,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR1__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR1__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The downsize method is defined twice. The second definition, which expects a single dimensions string parameter, overrides the first, which expected separate max_width and max_height parameters. This makes the original method unreachable and breaks existing code that calls it with separate width and height arguments.",
+          "severity": "Medium",
+          "matched_candidate": "Downsize arity changed by override - second definition of downsize method overrides first one, making 5-arg form (width, height) unreachable and breaking existing callers in app/models/optimized_image.rb",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the exact same issue: the downsize method is defined twice, with the second definition (taking a dimensions string parameter) overriding the first definition (taking separate max_width and max_height parameters). Both note this makes the original method unreachable and breaks existing code. The candidate even provides additional specific context about the 5-arg form and mentions the breaking caller in app/models/optimized_image.rb, which adds detail but describes the same core problem."
+        },
+        {
+          "golden_comment": "Hardcoding maxSizeKB = 10 * 1024 ignores Discourse.SiteSettings['max_' + type + '_size_kb'], so the client-side limit can diverge from server-side and per-type settings (also applies to the 413 handler below).",
+          "severity": "Low",
+          "matched_candidate": "Site upload limit ignored - size validation breaks when site settings allow larger or smaller uploads because hardcoded 10MB replaces dynamic per-type site settings in app/assets/javascripts/discourse/lib/utilities.js",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same problem: hardcoding maxSizeKB = 10 * 1024 (10MB) instead of using the dynamic site settings from Discourse.SiteSettings['max_' + type + '_size_kb']. Both point out that this causes the client-side limit to diverge from server-side and per-type settings. The candidate explicitly mentions the file path (app/assets/javascripts/discourse/lib/utilities.js) and describes the same core issue - hardcoded 10MB replacing dynamic per-type site settings."
+        },
+        {
+          "golden_comment": "Passing 80% as the dimensions can fail for animated GIFs when allow_animated_thumbnails is true, since the animated path uses gifsicle --resize-fit which expects WxH geometry, not a percentage; downsizing would then silently fail.",
+          "severity": "Medium",
+          "matched_candidate": "Percentage resize string may break downsize - passing '80%' to OptimizedImage.downsize for animated GIFs may cause issues when backend expects WxH geometry in app/controllers/uploads_controller.rb",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: passing '80%' as a percentage string to OptimizedImage.downsize causes problems for animated GIFs because the backend (gifsicle --resize-fit) expects WxH geometry format instead of percentage format. The golden comment provides more detail about the allow_animated_thumbnails flag and the silent failure, while the candidate is more concise, but they both point to the exact same bug - percentage dimensions breaking the animated GIF resize functionality."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "413 error message reports wrong max size - user-facing 'file too large' message always reports 10MB even when configured server/site limit differs in app/assets/javascripts/discourse/lib/utilities.js"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2": {
@@ -67019,6 +68593,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR2__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR2__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "logic: Potential nil pointer exception - if no TopicUser record exists, tu will be nil and calling methods on it will crash",
+          "severity": "High",
+          "matched_candidate": "NoMethodError on nil TopicUser unsubscribe - TopicsController#unsubscribe dereferences tu.notification_level without nil guard after TopicUser.find_by at app/controllers/topics_controller.rb:105-107",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same nil pointer/dereference issue. The golden comment warns about calling methods on a potentially nil `tu` object when no TopicUser record exists. The candidate issue specifically identifies this as a NoMethodError when dereferencing `tu.notification_level` without a nil guard after `TopicUser.find_by`. Both are describing the same underlying bug: accessing methods on a potentially nil TopicUser object."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "CSS selector typo breaks previous-discussion styling - class attribute contains '.previous-discussion' with leading dot instead of 'previous-discussion' at app/views/email/notification.html.erb:10"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Typo in property name: 'stopNotificiationsText' should be 'stopNotificationsText' (missing 'n' in 'Notifications')",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR2__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR2__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/22532": {
@@ -68720,6 +70329,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR22532__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR22532__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "logic: macOS-specific sed syntax with empty string after -i flag will fail on Linux systems",
+          "severity": "Low",
+          "matched_candidate": "sed -i command uses BSD/macOS-specific syntax (sed -i '' -E) that fails on Linux with GNU sed in scripts/test-gcal-webhooks.sh:67-69",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the same portability issue: the sed command uses macOS/BSD-specific syntax (specifically the empty string after -i flag) that is incompatible with Linux/GNU sed. The golden comment describes it as 'macOS-specific sed syntax with empty string after -i flag will fail on Linux' while the candidate specifies the exact syntax 'sed -i '' -E' and location. These are semantically identical issues."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Shared log file /tmp/tmole.log creates race condition when multiple script instances run concurrently, causing wrong webhook URL to be reused"
+        },
+        {
+          "candidate": "Fixed 10-second timeout for tmole startup polling is insufficient for slow initialization, causing premature failure in scripts/test-gcal-webhooks.sh:41-52"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The updateManyByCredentialId call uses an empty data object, which prevents Prisma's @updatedAt decorator from updating the updatedAt timestamp. This results in inaccurate cache status tracking, as the timestamp isn't updated when the cache is refreshed. To fix this, explicitly set the updatedAt field.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR22532__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR22532__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/8330": {
@@ -70318,6 +71965,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR8330__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR8330__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect end time calculation using slotStartTime instead of slotEndTime",
+          "severity": "Medium",
+          "matched_candidate": "Working-hours check ignores slot end: both start and end are computed from slotStartTime while slotEndTime is never used, causing slots that extend past workingHour.endTime to be misclassified as available in packages/trpc/server/routers/viewer/slots.ts:141-143",
+          "confidence": 0.98,
+          "reasoning": "Both comments identify the same bug: the code incorrectly uses slotStartTime when calculating the end time for working hours validation, while slotEndTime is never used. The golden comment states 'Incorrect end time calculation using slotStartTime instead of slotEndTime' and the candidate provides more detail about the same issue - that both start and end are computed from slotStartTime, slotEndTime is unused, and this causes slots extending past working hours to be incorrectly classified. They describe the same underlying defect with different levels of detail."
+        },
+        {
+          "golden_comment": "Using === for dayjs object comparison will always return false as it compares object references, not values. Use .isSame() method instead: dayjs(date.start).add(utcOffset, 'minutes').isSame(dayjs(date.end).add(utcOffset, minutes))",
+          "severity": "Medium",
+          "matched_candidate": "Dayjs === comparison always false: comparing two freshly created Dayjs objects with === checks object identity and is always false for equal instants in packages/trpc/server/routers/viewer/slots.ts:114",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: using === to compare dayjs objects checks object identity/references rather than value equality, which will always return false even for equal timestamps. The golden comment suggests using .isSame() method, while the candidate identifies the problem occurs at a specific location (packages/trpc/server/routers/viewer/slots.ts:114). Both are flagging the same underlying bug with dayjs object comparison."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR8330__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR8330__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/14943": {
@@ -71757,6 +73437,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR14943__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR14943__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Using retryCount: reminder.retryCount + 1 reads a possibly stale value and can lose increments under concurrency; consider an atomic increment via Prisma (increment: 1) to avoid race conditions (also applies to the similar update in the catch block).",
+          "severity": "High",
+          "matched_candidate": "Race condition: retryCount increment uses stale read value instead of atomic Prisma increment, causing lost updates when concurrent schedulers update the same reminder",
+          "confidence": 1.0,
+          "reasoning": "Both comments identify the same race condition issue: using `retryCount: reminder.retryCount + 1` reads a potentially stale value that can result in lost increments when multiple concurrent operations update the same reminder. Both suggest using Prisma's atomic increment operation as the solution. The candidate is more concise but captures the exact same underlying problem and cause."
+        },
+        {
+          "golden_comment": "The deletion logic in scheduleSMSReminders.ts incorrectly deletes non-SMS workflow reminders (e.g., Email, WhatsApp) that have retryCount > 1. This occurs because the retryCount condition within the OR clause for deletion lacks a method: WorkflowMethods.SMS filter, causing it to apply to all reminder types instead of only SMS reminders, which is the intended scope of this function.",
+          "severity": "High",
+          "matched_candidate": "deleteMany filter with OR condition lacks method: WorkflowMethods.SMS in second branch, causing deletion of non-SMS workflow reminders when retryCount > 1",
+          "confidence": 0.98,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same bug: the deleteMany operation's OR clause has a retryCount condition that is missing the 'method: WorkflowMethods.SMS' filter in one of its branches. This causes the deletion logic to incorrectly delete non-SMS workflow reminders (Email, WhatsApp, etc.) when retryCount > 1, instead of limiting deletions to only SMS reminders as intended. The candidate uses more concise technical language while the golden comment provides more context, but they describe the identical underlying issue."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR14943__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR14943__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/22345": {
@@ -73193,6 +74906,33 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR22345__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR22345__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "In getBaseConditions(), the else if (filterConditions) and final else branches are unreachable. This is because getAuthorizationConditions() always returns a non-null Prisma.Sql object, making authConditions always truthy, which means only the first two if/else if conditions are ever evaluated.",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "Fetching userIdsFromOrg only when teamsFromOrg.length > 0 can exclude org-level members for orgs without child teams; consider deriving from teamIds (which includes orgId) or removing the guard so org-only orgs still include member user bookings.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 0,
+      "total_golden": 2,
+      "tp": 0,
+      "fp": 0,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR22345__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR22345__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/11059": {
@@ -75854,6 +77594,62 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR11059__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR11059__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The parseRefreshTokenResponse function incorrectly sets refresh_token to the hardcoded string 'refresh_token' when it's missing from the OAuth refresh token response. This invalidates the token, breaking subsequent token refreshes and causing authentication failures.",
+          "severity": "High",
+          "matched_candidate": "Logic error in packages/app-store/_utils/oauth/parseRefreshTokenResponse.ts: hardcoded placeholder 'refresh_token' string is returned when provider omits refresh_token, causing wrong value to be persisted",
+          "confidence": 0.98,
+          "reasoning": "Both issues identify the same bug: the parseRefreshTokenResponse function sets refresh_token to the hardcoded string 'refresh_token' when it's missing from the response, which causes the wrong value to be persisted and breaks subsequent authentication. The candidate uses slightly different wording ('placeholder', 'persisted') but describes the identical problem in the same file with the same root cause and consequences."
+        },
+        {
+          "golden_comment": "When APP_CREDENTIAL_SHARING_ENABLED and CALCOM_CREDENTIAL_SYNC_ENDPOINT are set, the refreshFunction helper returns the fetch Response, but several callers (for example GoogleCalendarService.refreshAccessToken expecting res.data, and HubspotCalendarService.refreshAccessToken expecting a HubspotToken) assume it returns the integration-specific token object. That mismatch will cause runtime errors in the sync-enabled path unless the return type or those call sites are adjusted.",
+          "severity": "High",
+          "matched_candidate": "Type error in packages/app-store/_utils/oauth/refreshOAuthTokens.ts: sync branch returns raw fetch Response instead of parsed token payload, creating incompatible return type with callers expecting .data.access_token fields",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same underlying problem: when sync is enabled (APP_CREDENTIAL_SHARING_ENABLED and CALCOM_CREDENTIAL_SYNC_ENDPOINT are set), the refreshFunction returns a raw fetch Response object instead of the parsed token object that callers expect. The golden comment mentions specific callers like GoogleCalendarService and HubspotCalendarService expecting structured data (res.data, HubspotToken), while the candidate mentions callers expecting .data.access_token fields. Both describe the same type mismatch that will cause runtime errors in the sync-enabled code path."
+        },
+        {
+          "golden_comment": "When the sync endpoint path is used, res is a fetch Response and has no .data; res?.data will be undefined and token.access_token will throw at runtime. This relies on a consistent return shape from refreshOAuthTokens, which isn\u2019t guaranteed currently.",
+          "severity": "High",
+          "matched_candidate": "Type error in packages/app-store/_utils/oauth/refreshOAuthTokens.ts: sync branch returns raw fetch Response instead of parsed token payload, creating incompatible return type with callers expecting .data.access_token fields",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: when the sync endpoint path is used, the function returns a raw fetch Response object instead of a parsed token payload. This causes a type mismatch because callers expect a structure with `.data.access_token` fields, but the Response object doesn't have this shape. The golden comment mentions 'res?.data will be undefined and token.access_token will throw at runtime' while the candidate describes it as 'sync branch returns raw fetch Response instead of parsed token payload, creating incompatible return type'. Both are describing the same bug with slightly different wording."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Race condition in apps/web/pages/api/webhook/app-credential.ts: non-transactional read-then-write allows concurrent requests to both create duplicate credentials because there's no unique constraint on (userId, appId) pair"
+        },
+        {
+          "candidate": "Type error in packages/app-store/googlecalendar/lib/CalendarService.ts: code accesses res?.data on fetch Response object which doesn't have .data property, breaking token field access"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Invalid Zod schema syntax. Computed property keys like [z.string().toString()] are not valid in Zod object schemas and will cause runtime errors. ",
+          "severity": "High"
+        },
+        {
+          "golden_comment": "parseRefreshTokenResponse returns a Zod safeParse result ({ success, data, error }), not the credential key object. Persisting that as key stores the wrapper instead of the token payload; we should store the parsed data or use schema parse.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 3,
+      "fp": 2,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 0.6,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR11059__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR11059__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/7232": {
@@ -77589,6 +79385,52 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR7232__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR7232__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Asynchronous functions deleteScheduledEmailReminder and deleteScheduledSMSReminder are called without await inside forEach loops. This occurs during booking rescheduling/cancellation, and workflow/workflow step deletion/updates. Consequently, scheduled workflow reminders may not be reliably cancelled, potentially leaving them active.",
+          "severity": "Medium",
+          "matched_candidate": "Unhandled promise errors from reminder deletions in handleCancelBooking.ts - deleteScheduledEmailReminder/deleteScheduledSMSReminder called without await inside forEach, promises not included in Promise.all",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same underlying problem: asynchronous functions deleteScheduledEmailReminder and deleteScheduledSMSReminder are called without await inside forEach loops. The golden comment mentions this occurs in multiple contexts (booking rescheduling/cancellation, workflow operations), while the candidate specifically identifies handleCancelBooking.ts. The candidate also adds the detail about promises not being included in Promise.all, which is another way of describing the same issue. Both describe the same bug - unhandled async operations that could lead to unreliable reminder cancellation."
+        },
+        {
+          "golden_comment": "When immediateDelete is true, the deleteScheduledEmailReminder function cancels the SendGrid email but fails to delete the corresponding WorkflowReminder record from the database. This creates orphaned database entries and is inconsistent with the immediateDelete: false path, which marks the record as cancelled. The SendGrid DELETE API call is also omitted in this path.",
+          "severity": "High",
+          "matched_candidate": "Stale DB reminders after removed deleteMany in viewer/workflows.tsx - immediateDelete branch cancels SendGrid but doesn't delete or update WorkflowReminder row, and cleanup job only targets cancelled=true reminders",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same core issue: when immediateDelete is true, the code cancels the SendGrid email but fails to delete or update the WorkflowReminder database record, creating orphaned/stale entries. The candidate additionally mentions the cleanup job only targeting cancelled=true reminders and references the removed deleteMany, providing more context, but the fundamental bug being identified is identical."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Promise handling error in handleNewBooking.ts - unawaited reminder deletions in forEach loop, try/catch cannot reliably catch later promise rejections"
+        },
+        {
+          "candidate": "Error handling aborts remaining cancellations in scheduleEmailReminders.ts - single try/catch wrapping all reminder cancellations causes later reminders to be skipped if one request fails"
+        },
+        {
+          "candidate": "LogicError leaves SendGrid batch undeleted in emailReminderManager.ts - callers invoke deleteScheduledEmailReminder without immediateDelete flag, only updating DB cancelled flag without calling SendGrid cancellation"
+        },
+        {
+          "candidate": "Stale reminders remain due to unawaited async deletes in viewer/bookings.tsx - deleteScheduledEmailReminder and deleteScheduledSMSReminder called without await in forEach, cleanup no longer part of mutation's awaited work"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 6,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 4,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR7232__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR7232__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/14740": {
@@ -79885,6 +81727,58 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR14740__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR14740__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Case sensitivity bypass in email blacklist",
+          "severity": "High",
+          "matched_candidate": "AuthorizationBypass: blacklist check is case-sensitive - blocked guests can be added when submitted with mixed-case email casing because blacklist is normalized with toLowerCase() but submitted guest values are compared unchanged at packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:74-77",
+          "confidence": 1.0,
+          "reasoning": "Both issues identify the same vulnerability: a case sensitivity bypass in the email blacklist check. The golden comment describes it as 'Case sensitivity bypass in email blacklist' while the candidate provides more technical detail, explaining that the blacklist is normalized with toLowerCase() but submitted guest values are compared unchanged, allowing blocked guests to bypass the check using mixed-case emails. They are describing the exact same security flaw."
+        },
+        {
+          "golden_comment": "The logic for checking team admin/owner permissions is incorrect. This condition uses AND (&&) which requires both isTeamAdmin AND isTeamOwner to be true, but it should use OR (||) since a user needs to be either an admin OR an owner to have permission.",
+          "severity": "Critical",
+          "matched_candidate": "AuthorizationError: admin permission check requires both roles - access is denied when a team admin is not also the team owner due to using && instead of || in isTeamAdminOrOwner check at packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:46-48",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same logical error: the use of AND (&&) instead of OR (||) when checking team admin/owner permissions. The golden comment explains that the condition incorrectly requires both isTeamAdmin AND isTeamOwner to be true, when it should allow either role. The candidate issue describes the same problem - that access is denied when a user is a team admin but not also the owner due to the && operator instead of ||. Both point to the same bug in the isTeamAdminOrOwner check, just with different wording and presentation styles."
+        },
+        {
+          "golden_comment": "uniqueGuests filters out existing attendees and blacklisted emails but does not deduplicate duplicates within the input; createMany can insert duplicate attendee rows if the client submits repeated emails.",
+          "severity": "Medium",
+          "matched_candidate": "ValidationError: duplicate guest emails not deduplicated - duplicate attendee rows can be created when input contains repeated emails because filtering only checks against existing attendees and blacklist, not duplicates within the submitted guests array at packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:74-78",
+          "confidence": 1.0,
+          "reasoning": "Both the golden comment and candidate issue identify the exact same bug: the uniqueGuests filter removes existing attendees and blacklisted emails but fails to deduplicate duplicate emails within the input array itself, allowing createMany to insert duplicate attendee rows when the client submits repeated emails. The candidate provides more specific location information (file path and line numbers) but describes the identical underlying issue."
+        },
+        {
+          "golden_comment": "Starting with an array containing an empty string may cause validation issues. Consider starting with an empty array [] and handling the empty state in the MultiEmail component instead.",
+          "severity": "Low",
+          "matched_candidate": "ValidationError: empty-string sentinel blocks add - guest submission breaks when dialog opens or resets with no entered emails because state initializes to [''] and validation fails on empty string at apps/web/components/dialog/AddGuestsDialog.tsx:32",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same underlying problem: initializing state with an array containing an empty string [''] causes validation issues. The golden comment suggests starting with an empty array [] instead, while the candidate issue describes the specific validation error that occurs due to this initialization. They are pointing to the same bug - the problematic initialization of the email state with [''] rather than []."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "This calls the email sender with the original guests, so existing attendees included in the input will be treated as new when sending notifications, leading to incorrect emails.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR14740__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR14740__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/10600": {
@@ -81994,6 +83888,57 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR10600__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR10600__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The exported function TwoFactor handles backup codes and is in BackupCode.tsx. Inconsistent naming.",
+          "severity": "Low",
+          "matched_candidate": "Component naming mismatch in BackupCode.tsx: file exports function named 'TwoFactor' but renders backup-code-specific UI",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: the BackupCode.tsx file exports a function named 'TwoFactor' which is inconsistent with the file's purpose and content (handling backup codes). The golden comment calls this 'inconsistent naming' while the candidate describes it as a 'component naming mismatch' - these are semantically equivalent descriptions of the same problem."
+        },
+        {
+          "golden_comment": "Error message mentions 'backup code login' but this is a disable endpoint, not login",
+          "severity": "Low",
+          "matched_candidate": "Naming mismatch in disable.ts: error log says 'cannot proceed with backup code login' but endpoint is for TOTP disable flow, not login",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: an error message in the disable endpoint incorrectly mentions 'backup code login' when it should reference the disable flow instead. The candidate provides more detail about the file location (disable.ts) and the specific error log text, but the core issue is identical - a naming/messaging mismatch where login terminology is used in a disable endpoint context."
+        },
+        {
+          "golden_comment": "Backup code validation is case-sensitive due to the use of indexOf(). This causes validation to fail if a user enters uppercase hex characters, as backup codes should be case-insensitive for a better user experience.",
+          "severity": "Medium",
+          "matched_candidate": "Case sensitivity bug in backup code validation: codes generated in lowercase hex but validation doesn't normalize user input, causing mixed-case entries to fail",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: backup code validation fails when users enter codes with different casing (specifically uppercase) because the validation is case-sensitive. The golden comment mentions indexOf() as the technical cause and notes codes should be case-insensitive. The candidate issue describes the same problem - codes are generated in lowercase but validation doesn't normalize input, causing mixed-case entries to fail. Both are describing the identical bug with slightly different technical details but the same root cause and user impact."
+        },
+        {
+          "golden_comment": "Because backupCodes are decrypted and mutated in memory before being written back, two concurrent login requests using the same backupCode could both pass this check and update, so a single backup code may effectively be accepted more than once if used concurrently, weakening the intended one-time-use semantics.",
+          "severity": "High",
+          "matched_candidate": "Race condition in next-auth-options.ts: concurrent login requests can both validate and consume the same one-time backup code due to lack of transaction/compare-and-swap",
+          "confidence": 0.98,
+          "reasoning": "Both the golden comment and candidate issue identify the same race condition vulnerability: concurrent login requests can both successfully validate and use the same backup code because the check-then-update operation is not atomic. The golden comment explains it as 'decrypted and mutated in memory before being written back' allowing both requests to pass the check, while the candidate describes it as lacking 'transaction/compare-and-swap'. Both conclude this weakens the one-time-use semantics of backup codes. These are semantically identical descriptions of the same concurrency bug."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Potential TypeError in EnableTwoFactorModal.tsx: calling backupCodes.map without null/undefined guard when setup response may omit backupCodes field"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 4,
+      "tp": 4,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.8,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR10600__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR10600__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/10967": {
@@ -84419,6 +86364,71 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR10967__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR10967__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Potential null reference if mainHostDestinationCalendar is undefined if evt.destinationCalendar is null or an empty array ",
+          "severity": "High",
+          "matched_candidate": "TypeError: unguarded first calendar access - location fallback crashes when destinationCalendar is null or empty. Code destructures first entry from evt.destinationCalendar ?? [] and immediately dereferences mainHostDestinationCalendar.integration without optional chaining or guard",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same underlying issue: a potential null/undefined reference error when accessing mainHostDestinationCalendar. The golden comment points out the problem occurs when evt.destinationCalendar is null or empty array, leading to mainHostDestinationCalendar being undefined. The candidate issue describes the exact same problem with more technical detail - destructuring from evt.destinationCalendar ?? [] results in undefined, which is then dereferenced without guards. Both are flagging the same null reference vulnerability in the same code path."
+        },
+        {
+          "golden_comment": "The optional chaining on mainHostDestinationCalendar?.integration is redundant since you already check mainHostDestinationCalendar in the ternary condition.",
+          "severity": "Low",
+          "matched_candidate": "Potential: redundant optional chaining obscures non-null branch - truthy-checked values are still accessed as optional. Extra ?. inside ternary true branches where values are already proven present",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same issue: redundant optional chaining (?.) being used on a value that has already been verified to exist through a truthy check in the ternary condition. The golden comment specifically mentions 'mainHostDestinationCalendar?.integration' being redundant after checking 'mainHostDestinationCalendar', while the candidate describes this as 'redundant optional chaining obscures non-null branch' where 'truthy-checked values are still accessed as optional' in ternary true branches. These are semantically identical - both point to unnecessary optional chaining after a truthiness check."
+        },
+        {
+          "golden_comment": "The Calendar interface now requires createEvent(event, credentialId), but some implementations (e.g., Lark/Office365) still declare createEvent(event) only\u2014this breaks the interface contract (also applies to other locations in the PR).",
+          "severity": "Low",
+          "matched_candidate": "Potential TypeError: extra createEvent arg breaks integrations - event creation breaks when calendar adapters implement the old two-parameter contract differently. New call site passes createEvent(calEvent, credential.id), but CalendarService.ts still implements createEvent(event) with only one parameter, causing interface/signature mismatch",
+          "confidence": 0.95,
+          "reasoning": "Both comments identify the same core issue: a mismatch between the Calendar interface contract and its implementations. The golden comment notes that the interface now requires createEvent(event, credentialId) but some implementations (Lark/Office365) only declare createEvent(event). The candidate issue describes the same problem from a slightly different angle - the new call site passes two parameters (calEvent, credential.id) but CalendarService.ts implements only one parameter, causing an interface/signature mismatch. Both are pointing to the same breaking change where the interface contract has been updated but not all implementations have been updated accordingly."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Potential: TypeError from createEvent arity change - implementations or callers break when they still use the old single-argument signature. Interface requires createEvent(event, credentialId) but CalendarService.ts still declares only one parameter"
+        },
+        {
+          "candidate": "Potential: TypeError from destinationCalendar shape change - event consumers break when they still treat destinationCalendar as a single object. destinationCalendar becomes DestinationCalendar[] | null, but EventManager.ts destructures and dereferences without optional chaining"
+        },
+        {
+          "candidate": "LogicError: impossible calendar lookup fallback - calendar selection falls back incorrectly when externalCalendarId is absent. Fallback predicate compares cal.externalId against absent value and cannot recover intended destination calendar"
+        },
+        {
+          "candidate": "TypeError: team calendars dropped on booking create - collective member calendars are ignored when multiple destination calendars were gathered. evt.destinationCalendar built as array with team member calendars appended, but createBooking persists only evt.destinationCalendar[0]"
+        },
+        {
+          "candidate": "Potential TypeError: recurring deletes skip DB-fetched credential - linked recurring events remain undeleted when credential exists only in DB, not user.credentials. Recurring-delete branch iterates only bookingToDelete.user.credentials and ignores DB-fetched calendarCredential"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Logic error: when externalCalendarId is provided, you're searching for a calendar where externalId === externalCalendarId, but this will always fail since you're looking for a calendar that matches itself. Should likely find by credentialId or use different logic.",
+          "severity": "High"
+        },
+        {
+          "golden_comment": "Logic inversion in organization creation: The slug property is now conditionally set when IS_TEAM_BILLING_ENABLED is true, instead of when it's false as originally intended. This change, combined with requestedSlug still being set when IS_TEAM_BILLING_ENABLED is true, results in both properties being set when billing is enabled, and neither when disabled",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 8,
+      "total_golden": 5,
+      "tp": 3,
+      "fp": 5,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.375,
+      "recall": 0.6,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR10967__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR10967__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/8087": {
@@ -86133,6 +88143,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR8087__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR8087__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The code uses forEach with async callbacks, which causes asynchronous operations (e.g., calendar/video event deletions, payment refunds) to run concurrently without being awaited. This 'fire-and-forget' behavior leads to unhandled promise rejections, race conditions, and incomplete cleanup, as surrounding try-catch blocks cannot properly handle errors from these unawaited promises. Replace forEach with for...of loops or Promise.all() with map() to ensure proper sequential execution and error handling.",
+          "severity": "Critical",
+          "matched_candidate": "TypeError: async forEach errors escape handler in packages/features/bookings/lib/handleCancelBooking.ts - calendar update failures go unhandled when async callbacks reject after handler continues",
+          "confidence": 0.95,
+          "reasoning": "Both issues identify the same core problem: async callbacks in forEach loops are not properly awaited, causing errors to escape the handler and go unhandled. The golden comment describes this as 'fire-and-forget' behavior with unhandled promise rejections, while the candidate specifically mentions 'async forEach errors escape handler' and 'calendar update failures go unhandled when async callbacks reject'. Both are pointing to the identical underlying issue of unawaited async operations in forEach loops leading to improper error handling."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Race condition: deletions continue after handler returns in packages/trpc/server/routers/viewer/bookings.tsx - external calendar cleanup may be incomplete when response proceeds immediately"
+        },
+        {
+          "candidate": "UnhandledPromiseRejection: async forEach escapes try/catch in packages/app-store/vital/lib/reschedule.ts - deletion failures go uncaught when async callbacks reject after forEach returns"
+        },
+        {
+          "candidate": "UnhandledPromiseRejection: async forEach escapes try/catch in packages/app-store/wipemycalother/lib/reschedule.ts - deletion failures go uncaught when async callbacks reject inside forEach"
+        },
+        {
+          "candidate": "Promise error: async forEach cleanup unawaited in packages/trpc/server/routers/viewer/bookings.tsx - calendar deletions escape error handling when delete/getCalendar rejects"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Consider adding try-catch around the await to handle import failures gracefully",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 4,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.2,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR8087__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR8087__20260310/pull/1"
     }
   }
-}
\ No newline at end of file
+}
diff --git a/offline/results/benchmark_data.json b/offline/results/benchmark_data.json
index 95da859..a64d9cd 100644
--- a/offline/results/benchmark_data.json
+++ b/offline/results/benchmark_data.json
@@ -1364,6 +1364,25 @@
             "created_at": "2026-04-07T19:38:04Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR37429__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37429__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "misc/theme-verifier/src/main/java/org/keycloak/themeverifier/VerifyMessageProperties.java",
+            "line": 80,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n RuntimeException: missing English bundle aborts verification\n\n\n  ###### Where:\n\n `misc/theme-verifier/src/main/java/org/keycloak/themeverifier/VerifyMessageProperties.java`\n\n\n  ###### Description:\n\n verification crashes when the derived _en.properties file does not exist\nNew code in VerifyMessageProperties.verify now always calls verifySafeHtml() at misc/theme-verifier/src/main/java/org/keycloak/themeverifier/VerifyMessageProperties.java:46-53. Inside verifySafeHtml(), opening the derived English companion file throws RuntimeException on any IOException, including a missing _en.properties file, at :73-80. There is no local fallback for missing bundle files; only missing individual keys are handled later by getEnglishValue() at :164-170. This makes the alert a concrete new error-handling regression, not pre-existing behavior.\n\n\n\n<br/>\n\n  ```diff\n  +        try (FileInputStream fis = new FileInputStream(englishFile)) {\n+            bundleEnglish = new PropertyResourceBundle(fis);\n+        } catch (IOException e) {\n+            throw new RuntimeException(\"unable to read file \" + englishFile, e);\n+        }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=a2deff54-bc43-4aba-9c08-eb223fa58c94&type=bug&issue=RuntimeException%3A+missing+English+bundle+aborts+verification&path=misc%2Ftheme-verifier%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fthemeverifier%2FVerifyMessageProperties.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:15Z"
+          },
+          {
+            "path": "misc/theme-verifier/src/main/java/org/keycloak/themeverifier/VerifyMessageProperties.java",
+            "line": 150,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: typo in santizeAnchors method name\n\n\n  ###### Where:\n\n `misc/theme-verifier/src/main/java/org/keycloak/themeverifier/VerifyMessageProperties.java`\n\n\n  ###### Description:\n\n maintenance lookup becomes confusing when searching for sanitize logic\nNew PR code introduces private method `VerifyMessageProperties.santizeAnchors` at misc/theme-verifier/src/main/java/org/keycloak/themeverifier/VerifyMessageProperties.java:150, and it is called from new code at :88. The identifier is an obvious typo of 'sanitize' and is contradicted by nearby new names/comments such as `sanitized` at :92-99 and the method\u2019s own sanitize-related purpose. This matches the naming-typo rule and is directly visible in added lines; not pre-existing or unchanged code.\n\n\n\n<br/>\n\n  ```diff\n  +    private String santizeAnchors(String key, String value, String englishValue) {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=a2deff54-bc43-4aba-9c08-eb223fa58c94&type=bug&issue=Potential%3A+typo+in+santizeAnchors+method+name&path=misc%2Ftheme-verifier%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fthemeverifier%2FVerifyMessageProperties.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:18Z"
+          }
+        ]
       }
     ]
   },
@@ -2498,6 +2517,31 @@
             "created_at": "2026-04-07T19:31:41Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR37634__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37634__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "services/src/main/java/org/keycloak/protocol/oidc/encode/AccessTokenContext.java",
+            "line": 73,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NullPointerException: wrong parameter in null check\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/protocol/oidc/encode/AccessTokenContext.java`\n\n\n  ###### Description:\n\n null rawTokenId passes validation when constructor receives a null token id\nNew constructor code in services/src/main/java/org/keycloak/protocol/oidc/encode/AccessTokenContext.java:69-77 checks grantType twice: line 72 validates grantType, and line 73 again calls Objects.requireNonNull(grantType, \"Null rawTokenId not allowed\") instead of validating rawTokenId. This directly introduces the confirmed wrong-parameter null-check pattern; rawTokenId can be null despite the constructor message claiming otherwise.\n\n\n\n<br/>\n\n  ```diff\n  +        Objects.requireNonNull(grantType, \"Null rawTokenId not allowed\");\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=ad67366c-ce98-4a2a-aedf-53a1fa735889&type=bug&issue=NullPointerException%3A+wrong+parameter+in+null+check&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fprotocol%2Foidc%2Fencode%2FAccessTokenContext.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:51Z"
+          },
+          {
+            "path": "server-spi-private/src/main/java/org/keycloak/protocol/oidc/grants/OAuth2GrantTypeFactory.java",
+            "line": 33,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: Javadoc mismatch on shortcut length\n\n\n  ###### Where:\n\n `server-spi-private/src/main/java/org/keycloak/protocol/oidc/grants/OAuth2GrantTypeFactory.java`\n\n\n  ###### Description:\n\n documentation misleads implementers when existing grant shortcuts are only two letters\nNew Javadoc in OAuth2GrantTypeFactory.getShortcut states shortcuts are 'usually like 3-letters', but PR-visible implementors contradict it with 2-letter values: AuthorizationCodeGrantTypeFactory.GRANT_SHORTCUT='ac' at services/.../AuthorizationCodeGrantTypeFactory.java:32 and getShortcut() at :39-41, ClientCredentialsGrantTypeFactory.getShortcut()='cc' at :38-40, and PermissionGrantTypeFactory.getShortcut()='pg' at :38-40. This is a direct documentation mismatch introduced in the + lines.\n\n\n\n<br/>\n\n  ```diff\n  +    /**\n+     * @return usually like 3-letters shortcut of specific grants. It can be useful for example in the tokens when the amount of characters should be limited and hence using full grant name\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=ad67366c-ce98-4a2a-aedf-53a1fa735889&type=bug&issue=Potential%3A+Javadoc+mismatch+on+shortcut+length&path=server-spi-private%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fprotocol%2Foidc%2Fgrants%2FOAuth2GrantTypeFactory.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:54Z"
+          },
+          {
+            "path": "services/src/test/java/org/keycloak/protocol/oidc/encode/DefaultTokenContextEncoderProviderTest.java",
+            "line": 83,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n RuntimeException: overly broad exception assertion\n\n\n  ###### Where:\n\n `services/src/test/java/org/keycloak/protocol/oidc/encode/DefaultTokenContextEncoderProviderTest.java`\n\n\n  ###### Description:\n\n test passes for wrong failures when implementation throws a different RuntimeException subtype\nNew test code at DefaultTokenContextEncoderProviderTest.testIncorrectGrantType catches generic RuntimeException (services/src/test/java/org/keycloak/protocol/oidc/encode/DefaultTokenContextEncoderProviderTest.java:77-84). The exercised production method DefaultTokenContextEncoderProvider.getTokenContextFromTokenId throws specific IllegalArgumentException for malformed/unknown token-id parts, including unknown grant type (services/src/main/java/org/keycloak/protocol/oidc/encode/DefaultTokenContextEncoderProvider.java:67-89). Because the test catches any RuntimeException, unrelated runtime failures would also satisfy the test, matching the overly broad exception-handling pattern.\n\n\n\n<br/>\n\n  ```diff\n  +        } catch (RuntimeException iae) {\n+            // ignored\n+        }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=ad67366c-ce98-4a2a-aedf-53a1fa735889&type=bug&issue=RuntimeException%3A+overly+broad+exception+assertion&path=services%2Fsrc%2Ftest%2Fjava%2Forg%2Fkeycloak%2Fprotocol%2Foidc%2Fencode%2FDefaultTokenContextEncoderProviderTest.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:57Z"
+          }
+        ]
       }
     ]
   },
@@ -3646,6 +3690,25 @@
             "created_at": "2026-04-07T19:31:24Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR38446__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR38446__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "services/src/main/java/org/keycloak/forms/login/freemarker/model/RecoveryAuthnCodeInputLoginBean.java",
+            "line": 19,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NoSuchElementException: Optional.get without presence check\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/forms/login/freemarker/model/RecoveryAuthnCodeInputLoginBean.java`\n\n\n  ###### Description:\n\n login form rendering crashes when user has no recovery code credential\nNew PR code in RecoveryAuthnCodeInputLoginBean constructor stores RecoveryAuthnCodesUtils.getCredential(user) into an Optional and immediately calls credentialModelOpt.get() without any presence check at services/src/main/java/org/keycloak/forms/login/freemarker/model/RecoveryAuthnCodeInputLoginBean.java:17-19. The new helper explicitly returns Optional<CredentialModel> from two findFirst() calls, so empty is a normal outcome, not an exceptional contract breach, at server-spi/src/main/java/org/keycloak/models/utils/RecoveryAuthnCodesUtils.java:56-61. A sibling caller added in the same PR handles the same Optional defensively with isPresent() before dereferencing, confirming a guard is expected: services/src/main/java/org/keycloak/authentication/authenticators/browser/RecoveryAuthnCodesFormAuthenticator.java:80-83. This exact Optional.get-without-check pattern matches the confirmed Java input_validation rule.\n\n\n\n<br/>\n\n  ```diff\n  +        Optional<CredentialModel> credentialModelOpt = RecoveryAuthnCodesUtils.getCredential(user);\n+\n+        RecoveryAuthnCodesCredentialModel recoveryCodeCredentialModel = RecoveryAuthnCodesCredentialModel.createFromCredentialModel(credentialModelOpt.get());\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=3298e6e5-40a9-4370-b54e-469e56f6ec92&type=bug&issue=NoSuchElementException%3A+Optional.get+without+presence+check&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fforms%2Flogin%2Ffreemarker%2Fmodel%2FRecoveryAuthnCodeInputLoginBean.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:27:04Z"
+          },
+          {
+            "path": "testsuite/integration-arquillian/servers/auth-server/services/testsuite-providers/src/main/java/org/keycloak/testsuite/federation/BackwardsCompatibilityUserStorage.java",
+            "line": 242,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: null ID edge case in recovery credentials\n\n\n  ###### Where:\n\n `testsuite/integration-arquillian/servers/auth-server/services/testsuite-providers/src/main/java/org/keycloak/testsuite/federation/BackwardsCompatibilityUserStorage.java`\n\n\n  ###### Description:\n\n credential removal breaks when reconstructed recovery credential has a different or missing id\nNew recovery-code support stores a generated credential id in BackwardsCompatibilityUserStorage.updateCredential at BackwardsCompatibilityUserStorage.java:194-200, but getCredentials reconstructs a fresh RecoveryAuthnCodesCredentialModel via createFromValues using only credentialData/createdDate/userLabel at BackwardsCompatibilityUserStorage.java:237-242 and does not preserve the stored id. This is a concrete new mismatch in the added code and matches the null-id/lost-id edge-case pattern for reconstructed recovery credentials. One external hop remains unverified because RecoveryAuthnCodesCredentialModel.createFromValues is outside the indexed codebase, so confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +                model = RecoveryAuthnCodesCredentialModel.createFromValues(\n+                        JsonSerialization.readValue(myUser.recoveryCodes.getCredentialData(), List.class),\n+                        myUser.recoveryCodes.getCreatedDate(),\n+                        myUser.recoveryCodes.getUserLabel()\n+                );\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=3298e6e5-40a9-4370-b54e-469e56f6ec92&type=bug&issue=Potential%3A+null+ID+edge+case+in+recovery+credentials&path=testsuite%2Fintegration-arquillian%2Fservers%2Fauth-server%2Fservices%2Ftestsuite-providers%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Ftestsuite%2Ffederation%2FBackwardsCompatibilityUserStorage.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:27:07Z"
+          }
+        ]
       }
     ]
   },
@@ -4526,6 +4589,19 @@
             "created_at": "2026-04-07T19:21:12Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR36882__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR36882__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityCheck.java",
+            "line": 48,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: picocli exit terminates JVM\n\n\n  ###### Where:\n\n `quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityCheck.java`\n\n\n  ###### Description:\n\n embedding breaks when feature is disabled because the whole JVM exits immediately\nNew branch in UpdateCompatibilityCheck.run calls inherited picocli.exit when rolling-updates is disabled (UpdateCompatibilityCheck.java:46-48). AbstractCommand exposes the Picocli instance used by commands (AbstractCommand.java:31-33,74-76), and Picocli.exit hard-calls System.exit for any non-OK code outside test launch mode (Picocli.java:203-208). FEATURE_DISABLED is non-zero (=4) in CompatibilityResult.java:34-35, so this new line introduces JVM termination on the disabled-feature path.\n\n\n\n<br/>\n\n  ```diff\n  +            picocli.exit(CompatibilityResult.FEATURE_DISABLED);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=bd7a0d6d-a533-4431-b456-12c6e67539d2&type=bug&issue=LogicError%3A+picocli+exit+terminates+JVM&path=quarkus%2Fruntime%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fquarkus%2Fruntime%2Fcli%2Fcommand%2FUpdateCompatibilityCheck.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:39Z"
+          }
+        ]
       }
     ]
   },
@@ -5684,6 +5760,37 @@
             "created_at": "2026-04-07T19:32:43Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR36880__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR36880__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "services/src/main/java/org/keycloak/services/resources/admin/permissions/AdminPermissions.java",
+            "line": 77,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: LogicError wrong feature flag disables cleanup\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/services/resources/admin/permissions/AdminPermissions.java`\n\n\n  ###### Description:\n\n permission cleanup breaks when V2 is enabled without V1\nNew listener cleanup is gated by `Profile.Feature.ADMIN_FINE_GRAINED_AUTHZ` at `AdminPermissions.java:77`, while this PR introduces V2-specific client-permission logic in new classes `MgmtPermissionsV2.clients()` (`MgmtPermissionsV2.java:63-67`) and `ClientPermissionsV2` (`ClientPermissionsV2.java:49-287`). This is the exact confirmed mismatch pattern: cleanup remains keyed to V1 flag even though V2 permission machinery is added separately, so with V2 enabled without V1 the role/client/group removal cleanup path at `AdminPermissions.java:78-94` will not run.\n\n\n\n<br/>\n\n  ```diff\n  +                if (Profile.isFeatureEnabled(Profile.Feature.ADMIN_FINE_GRAINED_AUTHZ)) {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=713c10fb-2509-4aaf-87eb-34e95a891e09&type=bug&issue=Potential%3A+LogicError+wrong+feature+flag+disables+cleanup&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fservices%2Fresources%2Fadmin%2Fpermissions%2FAdminPermissions.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:11:29Z"
+          },
+          {
+            "path": "services/src/main/java/org/keycloak/services/resources/admin/permissions/ClientPermissionsV2.java",
+            "line": 214,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: per-client permission lookup always misses\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/services/resources/admin/permissions/ClientPermissionsV2.java`\n\n\n  ###### Description:\n\n client-specific grants are ignored when resource owner differs from server id\nIn new `ClientPermissionsV2.hasPermission(ClientModel,String)`, resource lookup uses `resourceStore.findByName(server, client.getId(), server.getId())` at `ClientPermissionsV2.java:214`. In the same PR, `AdminPermissionsSchema` resolves client resource names to the internal client id (`AdminPermissionsSchema.java:93-94,175-183`), so name lookup is by client id; however the owner argument remains `server.getId()`. This matches the known structural bug pattern for Keycloak admin permissions: per-client resources are not owned by the resource-server id, causing lookup to miss and fall back to the type-level \"Clients\" resource at `ClientPermissionsV2.java:215-222`, which ignores client-specific grants. The bug is newly introduced in added V2 code and directly affects all callers routing through `canManage/canConfigure/canView/canMap*` (`ClientPermissionsV2.java:57-122`).\n\n\n\n<br/>\n\n  ```diff\n  +        Resource resource =  resourceStore.findByName(server, client.getId(), server.getId());\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=713c10fb-2509-4aaf-87eb-34e95a891e09&type=bug&issue=LogicError%3A+per-client+permission+lookup+always+misses&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fservices%2Fresources%2Fadmin%2Fpermissions%2FClientPermissionsV2.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:11:32Z"
+          },
+          {
+            "path": "services/src/main/java/org/keycloak/services/resources/admin/permissions/ClientPermissionsV2.java",
+            "line": 272,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n CodeClarity: dead getEvaluationContext implementation\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/services/resources/admin/permissions/ClientPermissionsV2.java`\n\n\n  ###### Description:\n\n custom evaluation attributes never apply when method is never called\nNew private method `ClientPermissionsV2.getEvaluationContext()` is added at services/src/main/java/org/keycloak/services/resources/admin/permissions/ClientPermissionsV2.java:262-272, but all in-class permission-evaluation paths use `root.evaluatePermission(...)` directly at :224, :249, and :276 without this helper, and repo usage tracing found no callers for this private symbol. Because the file is new in this PR, this dead code is newly introduced rather than pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +    private EvaluationContext getEvaluationContext(ClientModel authorizedClient, AccessToken token) {\n+        ClientModelIdentity identity = new ClientModelIdentity(session, authorizedClient, token);\n+        return new DefaultEvaluationContext(identity, session) {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=713c10fb-2509-4aaf-87eb-34e95a891e09&type=bug&issue=CodeClarity%3A+dead+getEvaluationContext+implementation&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fservices%2Fresources%2Fadmin%2Fpermissions%2FClientPermissionsV2.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:11:35Z"
+          },
+          {
+            "path": "tests/base/src/test/java/org/keycloak/tests/admin/authz/fgap/PermissionClientTest.java",
+            "line": 149,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n IndexOutOfBoundsException from fixed client-scope index\n\n\n  ###### Where:\n\n `tests/base/src/test/java/org/keycloak/tests/admin/authz/fgap/PermissionClientTest.java`\n\n\n  ###### Description:\n\n Test crashes when a client has fewer than two default scopes\nNew test code in PermissionClientTest.testManageOnlyOneClient indexes default client scopes with get(1) at lines 148-149 and again at 174-178, but the only prior assertion is not(empty()) at line 171, which guarantees size >= 1, not >= 2. No caller/setup in the diff proves myclient always has at least two default scopes, so the new fixed-index assumption is an introduced edge-case bug.\n\n\n\n<br/>\n\n  ```diff\n  +            ClientScopeRepresentation clientScopeRep = clientResource.getDefaultClientScopes().get(1);\n...\n+        ClientScopeRepresentation clientScopeRep = defaultClientScopes.get(1);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=713c10fb-2509-4aaf-87eb-34e95a891e09&type=bug&issue=IndexOutOfBoundsException+from+fixed+client-scope+index&path=tests%2Fbase%2Fsrc%2Ftest%2Fjava%2Forg%2Fkeycloak%2Ftests%2Fadmin%2Fauthz%2Ffgap%2FPermissionClientTest.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:11:38Z"
+          }
+        ]
       }
     ]
   },
@@ -6947,6 +7054,49 @@
             "created_at": "2026-04-07T19:34:49Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR37038__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37038__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "services/src/main/java/org/keycloak/services/resources/admin/permissions/GroupPermissionsV2.java",
+            "line": 141,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: per-group checks use resource id lookup\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/services/resources/admin/permissions/GroupPermissionsV2.java`\n\n\n  ###### Description:\n\n group-specific permissions break when resource name differs from internal resource id\nNew V2 code resolves per-group permissions with `resourceStore.findByName(server, groupId)` in `GroupPermissionsV2.hasPermission()` (`GroupPermissionsV2.java:141`), but `getGroupIdsWithViewPermission()` passes `groupResource.getId()` into that helper (`GroupPermissionsV2.java:121-123`). That is an immediate id-vs-name mismatch inside the added code itself. The prior implementation evaluated the iterated `Resource` directly and returned the underlying group id from `resource.getName()` (`GroupPermissions.java:312-316`), so this is a PR-introduced logic regression, not pre-existing. `MgmtPermissionsV2.groups()` now instantiates this class (`MgmtPermissionsV2.java:58-61`).\n\n\n\n<br/>\n\n  ```diff\n  +\n+        Resource resource = groupId == null ? null : resourceStore.findByName(server, groupId);\n+\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e7b4aafb-8412-49b7-a769-c68b2ae9c39d&type=bug&issue=LogicError%3A+per-group+checks+use+resource+id+lookup&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fservices%2Fresources%2Fadmin%2Fpermissions%2FGroupPermissionsV2.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:21Z"
+          },
+          {
+            "path": "services/src/main/java/org/keycloak/services/resources/admin/permissions/GroupPermissions.java",
+            "line": 275,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: manage permission no longer honors defaults\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/services/resources/admin/permissions/GroupPermissions.java`\n\n\n  ###### Description:\n\n group management breaks when default user-management grants exist without direct MANAGE_USERS role\nNew code narrows `GroupPermissions.canManage()` to `root.hasOneAdminRole(AdminRoles.MANAGE_USERS)` at GroupPermissions.java:275, replacing the prior delegation to broader user-management default semantics. In the same PR, `UserPermissions.canManage()` still documents and implements broader default/permission-based behavior beyond the direct role check at UserPermissions.java:220-244. This is a concrete logic regression in the new `+` line, and it propagates into group-based user checks via UserPermissions.java:584 (`root.groups()::canManageMembers`) and the new V2 path repeats the same narrowing at GroupPermissionsV2.java:65-70.\n\n\n\n<br/>\n\n  ```diff\n  +        return root.hasOneAdminRole(AdminRoles.MANAGE_USERS);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e7b4aafb-8412-49b7-a769-c68b2ae9c39d&type=bug&issue=LogicError%3A+manage+permission+no+longer+honors+defaults&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fservices%2Fresources%2Fadmin%2Fpermissions%2FGroupPermissions.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:24Z"
+          },
+          {
+            "path": "services/src/main/java/org/keycloak/services/resources/admin/permissions/RolePermissions.java",
+            "line": 302,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: LogicError: manage-users check narrows default-role mapping\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/services/resources/admin/permissions/RolePermissions.java`\n\n\n  ###### Description:\n\n role mapping breaks when canManageDefault grants access without direct MANAGE_USERS role\nNew code at RolePermissions.java:302 gates role-mapping on root.hasOneAdminRole(AdminRoles.MANAGE_USERS) before checkAdminRoles(role), while the same class still defines broader default-manage semantics in canManageDefault(RoleModel) at RolePermissions.java:390-419 and uses that broader gate for analogous mapping logic in canMapComposite(RoleModel) at RolePermissions.java:384-405. requireMapRole() at RolePermissions.java:331-337 is a live enforcement entrypoint. This is a concrete narrowing introduced in the PR, but one semantic hop remains open as no explicit external contract was found proving all canManageDefault cases must continue to map roles.\n\n\n\n<br/>\n\n  ```diff\n  +        if (root.hasOneAdminRole(AdminRoles.MANAGE_USERS)) return checkAdminRoles(role);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e7b4aafb-8412-49b7-a769-c68b2ae9c39d&type=bug&issue=Potential%3A+LogicError%3A+manage-users+check+narrows+default-role+mapping&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fservices%2Fresources%2Fadmin%2Fpermissions%2FRolePermissions.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:33Z"
+          },
+          {
+            "path": "services/src/main/java/org/keycloak/services/resources/admin/permissions/AdminPermissions.java",
+            "line": 98,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: inconsistent admin fine-grained feature flag\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/services/resources/admin/permissions/AdminPermissions.java`\n\n\n  ###### Description:\n\n permission cleanup never runs when V2 is enabled without V1\nConfirmed feature-flag mismatch in new code: AdminPermissions.java:39-71 selects V2 management/evaluator paths under ADMIN_FINE_GRAINED_AUTHZ_V2, but the listener registration added/modified at AdminPermissions.java:74-98 is guarded by ADMIN_FINE_GRAINED_AUTHZ. The listener body calls management(...).roles/clients/groups().setPermissionsEnabled(...) for cleanup, so when V2 is enabled without V1 the V2 permission system is active but this cleanup path is skipped, matching the known valid pattern for inconsistent feature flags.\n\n\n\n<br/>\n\n  ```diff\n  +        if (Profile.isFeatureEnabled(Profile.Feature.ADMIN_FINE_GRAINED_AUTHZ)) {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e7b4aafb-8412-49b7-a769-c68b2ae9c39d&type=bug&issue=LogicError%3A+inconsistent+admin+fine-grained+feature+flag&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fservices%2Fresources%2Fadmin%2Fpermissions%2FAdminPermissions.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:36Z"
+          },
+          {
+            "path": "tests/base/src/test/java/org/keycloak/tests/admin/authz/fgap/GroupResourceTypeEvaluationTest.java",
+            "line": 176,
+            "body": "Review #6\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AssertionError: child group status mismatches API\n\n\n  ###### Where:\n\n `tests/base/src/test/java/org/keycloak/tests/admin/authz/fgap/GroupResourceTypeEvaluationTest.java`\n\n\n  ###### Description:\n\n Test fails when subgroup creation returns 201 Created instead of 204 No Content\nNew assertion in GroupResourceTypeEvaluationTest.testManageAllGroups expects subgroup creation to return 204, but the endpoint it exercises is GroupResource.addChild(), which returns 201 Created when rep.getId() == null and a new child is created (services/src/main/java/org/keycloak/services/resources/admin/GroupResource.java:167-179, 201-218). The same test file also expects 201 for another new subgroup creation path at lines 226-228, confirming the 204 assertion is the incorrect + line.\n\n\n\n<br/>\n\n  ```diff\n  +        try (Response response = realmAdminClient.realm(realm.getName()).groups().group(topGroup.getId()).subGroup(group)) {\n+            assertEquals(Response.Status.NO_CONTENT.getStatusCode(), response.getStatus());\n+        }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e7b4aafb-8412-49b7-a769-c68b2ae9c39d&type=bug&issue=AssertionError%3A+child+group+status+mismatches+API&path=tests%2Fbase%2Fsrc%2Ftest%2Fjava%2Forg%2Fkeycloak%2Ftests%2Fadmin%2Fauthz%2Ffgap%2FGroupResourceTypeEvaluationTest.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:42Z"
+          },
+          {
+            "path": null,
+            "line": null,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: manage-members grants hidden from group filtering\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/services/resources/admin/permissions/GroupPermissions.java`\n\n\n  ###### Description:\n\n eligible groups disappear when admin has only MANAGE_MEMBERS_SCOPE on a group\nConfirmed in new V2 implementation used by MgmtPermissionsV2.groups() at services/src/main/java/org/keycloak/services/resources/admin/permissions/MgmtPermissionsV2.java:58-61. In GroupPermissionsV2.getGroupIdsWithViewPermission(), the new code iterates resources and calls hasPermission(groupResource.getId(), ...) then adds groupResource.getId() to the result set (services/src/main/java/org/keycloak/services/resources/admin/permissions/GroupPermissionsV2.java:121-123). But hasPermission resolves the resource with resourceStore.findByName(server, groupId) (services/src/main/java/org/keycloak/services/resources/admin/permissions/GroupPermissionsV2.java:141), so it treats the argument as a resource name, not an id. This id/name mismatch is a concrete new logic bug in the added lines and can cause per-group VIEW_MEMBERS/MANAGE_MEMBERS permissions to be missed.\n\n\n\n<br/>\n\n  ```diff\n  +    public Set<String> getGroupIdsWithViewPermission() {\n+        if (root.users().canView()) return Collections.emptySet();\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e7b4aafb-8412-49b7-a769-c68b2ae9c39d&type=bug&issue=LogicError%3A+manage-members+grants+hidden+from+group+filtering&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fservices%2Fresources%2Fadmin%2Fpermissions%2FGroupPermissions.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:25:30Z"
+          }
+        ]
       }
     ]
   },
@@ -8079,6 +8229,31 @@
             "created_at": "2026-04-07T19:29:46Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR33832__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR33832__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "authz/client/src/main/java/org/keycloak/authorization/client/util/crypto/ASN1Decoder.java",
+            "line": 135,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n IOException: indefinite length causes NegativeArraySizeException\n\n\n  ###### Where:\n\n `authz/client/src/main/java/org/keycloak/authorization/client/util/crypto/ASN1Decoder.java`\n\n\n  ###### Description:\n\n parsing crashes when ASN.1 uses indefinite-length encoding\nNew code in ASN1Decoder.readLength returns -1 for indefinite-length encoding (ASN1Decoder.java:133-135), but callers in the same new file do not handle that sentinel: readInteger passes length directly to read(length) (ASN1Decoder.java:71-73) and readNext computes length += reset() then calls read(length) (ASN1Decoder.java:80-82). read(int) allocates new byte[length] (ASN1Decoder.java:169-170), so a negative length causes a runtime NegativeArraySizeException rather than the declared IOException path. This decoder is exercised by new PR code via AuthzClientCryptoProvider.asn1derToConcatenatedRS calling readSequence/readInteger (AuthzClientCryptoProvider.java:128-134).\n\n\n\n<br/>\n\n  ```diff\n  +        if (length == 0x80) {\n+            return -1;      // indefinite-length encoding\n+        }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=a8a826a0-bc03-478a-b9cf-eefa1cfa4abc&type=bug&issue=IOException%3A+indefinite+length+causes+NegativeArraySizeException&path=authz%2Fclient%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fauthorization%2Fclient%2Futil%2Fcrypto%2FASN1Decoder.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:26:33Z"
+          },
+          {
+            "path": "authz/client/src/main/java/org/keycloak/authorization/client/util/crypto/ASN1Decoder.java",
+            "line": 163,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n IOException: valid trailing-byte length rejected\n\n\n  ###### Where:\n\n `authz/client/src/main/java/org/keycloak/authorization/client/util/crypto/ASN1Decoder.java`\n\n\n  ###### Description:\n\n decoding rejects content that exactly consumes remaining bytes\nNew code in ASN1Decoder.readLength() compares decoded length against the total input limit (`limit`) rather than remaining bytes after tag/length bytes have already been consumed, and it rejects `length >= limit` at authz/client/src/main/java/org/keycloak/authorization/client/util/crypto/ASN1Decoder.java:160-163. In the same class, callers consume tag and length first (`readSequence()` at lines 50-58, `readInteger()` at lines 66-72), so a payload whose content exactly fills the remaining bytes can be incorrectly rejected. This is a concrete edge-case bug on the new `+` lines, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +            if (length >= limit) // after all we must have read at least 1 byte\n+            {\n+                throw new IOException(\"corrupted stream - out of bounds length found\");\n+            }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=a8a826a0-bc03-478a-b9cf-eefa1cfa4abc&type=bug&issue=IOException%3A+valid+trailing-byte+length+rejected&path=authz%2Fclient%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fauthorization%2Fclient%2Futil%2Fcrypto%2FASN1Decoder.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:26:36Z"
+          },
+          {
+            "path": "authz/client/src/main/java/org/keycloak/authorization/client/util/crypto/AuthzClientCryptoProvider.java",
+            "line": 115,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Dead code: discarded ASN1Encoder results\n\n\n  ###### Where:\n\n `authz/client/src/main/java/org/keycloak/authorization/client/util/crypto/AuthzClientCryptoProvider.java`\n\n\n  ###### Description:\n\n encoding intent is obscured when created encoders are written then immediately discarded\nNew code in AuthzClientCryptoProvider.getEcdsaCryptoProvider().concatenatedRSToASN1DER creates two ASN1Encoder instances and writes r/s to them at AuthzClientCryptoProvider.java:114-115, but those instances are immediately discarded. The returned bytes are built from separate encoder instances at AuthzClientCryptoProvider.java:117-121. ASN1Encoder.write(BigInteger) mutates and returns the same encoder instance (ASN1Encoder.java:46-49), so the standalone calls have no effect and are dead code. This is introduced entirely by the new file, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +                ASN1Encoder.create().write(rBigInteger);\n+                ASN1Encoder.create().write(sBigInteger);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=a8a826a0-bc03-478a-b9cf-eefa1cfa4abc&type=bug&issue=Dead+code%3A+discarded+ASN1Encoder+results&path=authz%2Fclient%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fauthorization%2Fclient%2Futil%2Fcrypto%2FAuthzClientCryptoProvider.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:26:39Z"
+          }
+        ]
       }
     ]
   },
@@ -9085,6 +9260,19 @@
             "created_at": "2026-04-07T19:29:09Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR40940__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR40940__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "tests/base/src/test/java/org/keycloak/tests/admin/group/GroupTest.java",
+            "line": 157,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n RaceCondition: reader thread not joined\n\n\n  ###### Where:\n\n `tests/base/src/test/java/org/keycloak/tests/admin/group/GroupTest.java`\n\n\n  ###### Description:\n\n test assertion misses reader exceptions when thread appends after deletedAll flips\nNew test code in GroupTest.createMultiDeleteMultiReadMulti starts a background reader thread at tests/base/src/test/java/org/keycloak/tests/admin/group/GroupTest.java:139-149, records async failures into caughtExceptions at :144-146, then sets deletedAll at :155 and immediately asserts the list is empty at :157 without joining/waiting for the thread. This directly matches the flaky race pattern for tests: a late exception can be appended after the assertion.\n\n\n\n<br/>\n\n  ```diff\n  +        new Thread(() -> {\n+            while (!deletedAll.get()) {\n+                try {\n+                    managedRealm.admin().groups().groups(null, 0, Integer.MAX_VALUE, true);\n+                } catch (Exception e) {\n+                    caughtExceptions.add(e);\n+                }\n+            }\n+        }).start();\n...\n+        deletedAll.set(true);\n+\n+        assertThat(caughtExceptions, Matchers.empty());\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1324890c-5458-47a2-9945-842300e11ffe&type=bug&issue=RaceCondition%3A+reader+thread+not+joined&path=tests%2Fbase%2Fsrc%2Ftest%2Fjava%2Forg%2Fkeycloak%2Ftests%2Fadmin%2Fgroup%2FGroupTest.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:26:10Z"
+          }
+        ]
       }
     ]
   },
@@ -10209,6 +10397,25 @@
             "created_at": "2026-04-07T19:31:17Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak-greptile__cloudaeye__PR1__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak-greptile__cloudaeye__PR1__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "services/src/main/java/org/keycloak/authentication/authenticators/browser/UsernamePasswordForm.java",
+            "line": 116,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: LogicError disables initial passkey setup\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/authentication/authenticators/browser/UsernamePasswordForm.java`\n\n\n  ###### Description:\n\n conditional passkey UI breaks when initial login page has no selected user\nNew code in UsernamePasswordForm gates passkey setup on isConditionalPasskeysEnabled(context.getUser()), and that helper now requires user != null (UsernamePasswordForm.java:115-116, 160-161). This means authenticate/challenge skip webauthnAuth.fillContextForm(context) on the initial login page before a user is selected. Downstream logic does not compensate: WebAuthnAuthenticator.shouldShowWebAuthnAuthenticators also returns context.getUser() != null (WebAuthnAuthenticator.java:128-130). This matches the confirmed Keycloak edge-case pattern and is newly introduced behavior.\n\n\n\n<br/>\n\n  ```diff\n  +        if (isConditionalPasskeysEnabled(context.getUser())) {\n+            webauthnAuth.fillContextForm(context);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=9aff3fc5-2457-4081-b4b9-0db06a38efdc&type=bug&issue=Potential%3A+LogicError+disables+initial+passkey+setup&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fauthentication%2Fauthenticators%2Fbrowser%2FUsernamePasswordForm.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:50:58Z"
+          },
+          {
+            "path": "services/src/main/java/org/keycloak/authentication/authenticators/browser/UsernamePasswordForm.java",
+            "line": 138,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: LogicError skips passkey challenge on login errors\n\n\n  ###### Where:\n\n `services/src/main/java/org/keycloak/authentication/authenticators/browser/UsernamePasswordForm.java`\n\n\n  ###### Description:\n\n passkey retry options disappear when challenge runs before any user is identified\nNew guard in UsernamePasswordForm.challenge(AuthenticationFlowContext,String,String) at services/src/main/java/org/keycloak/authentication/authenticators/browser/UsernamePasswordForm.java:137-138 only calls webauthnAuth.fillContextForm(context) when isConditionalPasskeysEnabled(context.getUser()) is true, and that helper now requires user != null at :160-162. In the failed-login error path (:135-142), unknown-user attempts leave context.getUser() null, so passkey UI setup is skipped on retry. This is newly introduced by the added null-user guard, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +        if (isConditionalPasskeysEnabled(context.getUser())) {\n+            // setup webauthn data when possible\n+            webauthnAuth.fillContextForm(context);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=9aff3fc5-2457-4081-b4b9-0db06a38efdc&type=bug&issue=Potential%3A+LogicError+skips+passkey+challenge+on+login+errors&path=services%2Fsrc%2Fmain%2Fjava%2Forg%2Fkeycloak%2Fauthentication%2Fauthenticators%2Fbrowser%2FUsernamePasswordForm.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:51:01Z"
+          }
+        ]
       }
     ]
   },
@@ -11364,6 +11571,37 @@
             "created_at": "2026-04-07T19:29:13Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry__cloudaeye__PR93824__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR93824__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/spans/consumers/process/flusher.py",
+            "line": 259,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: replacement process starts before prior exits\n\n\n  ###### Where:\n\n `src/sentry/spans/consumers/process/flusher.py`\n\n\n  ###### Description:\n\n duplicate flushing can occur when killed process still runs during restart\nNew code in `SpanFlusher._ensure_processes_alive` kills an unhealthy worker and immediately calls `_create_process_for_shards` without any wait/join for prior exit (`src/sentry/spans/consumers/process/flusher.py:253-259`). `_create_process_for_shards` starts a fresh worker right away (`src/sentry/spans/consumers/process/flusher.py:86-125`), so restart sequencing allows overlapping shard handlers if the old process has not fully exited yet. This behavior is introduced by this PR, not pre-existing. Upstream path is `ProcessSpansStrategyFactory.create_with_partitions` constructing `SpanFlusher` (`src/sentry/spans/consumers/process/factory.py:71-75`), exposed via the new `flusher_processes` consumer option (`src/sentry/consumers/__init__.py:430-437`).\n\n\n\n<br/>\n\n  ```diff\n  +            try:\n+                if isinstance(process, multiprocessing.Process):\n+                    process.kill()\n+            except (ValueError, AttributeError):\n+                pass  # Process already closed, ignore\n+\n+            self._create_process_for_shards(process_index, shards)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=4fe5b51e-1bc5-48a1-8e9c-5aceb1c3dd6d&type=bug&issue=Race+condition%3A+replacement+process+starts+before+prior+exits&path=src%2Fsentry%2Fspans%2Fconsumers%2Fprocess%2Fflusher.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:10Z"
+          },
+          {
+            "path": "src/sentry/spans/consumers/process/flusher.py",
+            "line": 347,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Leaked processes when join deadline breaks loop early\n\n\n  ###### Where:\n\n `src/sentry/spans/consumers/process/flusher.py`\n\n\n  ###### Description:\n\n remaining child processes keep running when timeout expires before later entries\nNew `SpanFlusher.join` iterates processes and breaks out entirely once `deadline` is exhausted (`src/sentry/spans/consumers/process/flusher.py:337-341`). Because the later `process.terminate()` call is inside that same loop (`src/sentry/spans/consumers/process/flusher.py:346-347`), every remaining entry after the break is skipped and receives no cleanup, leaving child workers running. This is the exact early-exit cleanup bug pattern and is newly introduced in this PR. Upstream construction is through `ProcessSpansStrategyFactory.create_with_partitions` (`src/sentry/spans/consumers/process/factory.py:71-75`), with CLI exposure added in `src/sentry/consumers/__init__.py:430-437`.\n\n\n\n<br/>\n\n  ```diff\n  +        for process_index, process in self.processes.items():\n+            if deadline is not None:\n+                remaining_time = deadline - time.time()\n+                if remaining_time <= 0:\n+                    break\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=4fe5b51e-1bc5-48a1-8e9c-5aceb1c3dd6d&type=bug&issue=Leaked+processes+when+join+deadline+breaks+loop+early&path=src%2Fsentry%2Fspans%2Fconsumers%2Fprocess%2Fflusher.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:12Z"
+          },
+          {
+            "path": "src/sentry/spans/consumers/process/flusher.py",
+            "line": 199,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Naming mismatch: shard/shards metric tags split dashboards\n\n\n  ###### Where:\n\n `src/sentry/spans/consumers/process/flusher.py`\n\n\n  ###### Description:\n\n metrics fragment when same dimension uses different tag keys\nNew diff lines in src/sentry/spans/consumers/process/flusher.py use inconsistent metric tag keys for the same shard-set context: `metrics.timer(... tags={\"shard\": shard_tag})` at 185 and `metrics.timer(... tags={\"shards\": shard_tag})` at 199. The surrounding new code also uses `shard` consistently at 192-196 for `spans.buffer.segment_size_bytes`, making `shards` the clear outlier. This is a structural naming contradiction in newly added code, so the alert is valid.\n\n\n\n<br/>\n\n  ```diff\n  +                with metrics.timer(\"spans.buffer.flusher.produce\", tags={\"shard\": shard_tag}):\n+                with metrics.timer(\"spans.buffer.flusher.wait_produce\", tags={\"shards\": shard_tag}):\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=4fe5b51e-1bc5-48a1-8e9c-5aceb1c3dd6d&type=bug&issue=Naming+mismatch%3A+shard%2Fshards+metric+tags+split+dashboards&path=src%2Fsentry%2Fspans%2Fconsumers%2Fprocess%2Fflusher.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:15Z"
+          },
+          {
+            "path": "tests/sentry/spans/consumers/process/test_consumer.py",
+            "line": 62,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Timing race: sleep no longer waits\n\n\n  ###### Where:\n\n `tests/sentry/spans/consumers/process/test_consumer.py`\n\n\n  ###### Description:\n\n background flushing can be unobserved when time.sleep is monkeypatched to a no-op\nConfirmed in test_consumer.py itself: test_basic monkeypatches global time.sleep to a no-op at tests/sentry/spans/consumers/process/test_consumer.py:14, then newly relies on time.sleep(0.1) at :61-62 to 'Give flusher threads time to process'. Because both are in the same test body, the added sleep cannot actually wait, so the comment and behavior directly contradict each other.\n\n\n\n<br/>\n\n  ```diff\n  +    # Give flusher threads time to process after drift change\n+    time.sleep(0.1)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=4fe5b51e-1bc5-48a1-8e9c-5aceb1c3dd6d&type=bug&issue=Timing+race%3A+sleep+no+longer+waits&path=tests%2Fsentry%2Fspans%2Fconsumers%2Fprocess%2Ftest_consumer.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:18Z"
+          }
+        ]
       }
     ]
   },
@@ -12907,6 +13145,37 @@
             "created_at": "2026-04-07T19:31:30Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry-greptile__cloudaeye__PR5__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR5__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/issues/endpoints/browser_reporting_collector.py",
+            "line": 59,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n ValidationError: age=0 bypasses mutual-exclusion check\n\n\n  ###### Where:\n\n `src/sentry/issues/endpoints/browser_reporting_collector.py`\n\n\n  ###### Description:\n\n invalid mixed-spec reports pass validation when age or timestamp is zero\n\n\n\n<br/>\n\n  ```diff\n  +        if self.initial_data.get(\"age\"):\n+            raise serializers.ValidationError(\"If timestamp is present, age must be absent\")\n...\n+        if self.initial_data.get(\"timestamp\"):\n+            raise serializers.ValidationError(\"If age is present, timestamp must be absent\")\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b54ba0dc-7062-4c1e-b0dd-e22e7202ff96&type=bug&issue=ValidationError%3A+age%3D0+bypasses+mutual-exclusion+check&path=src%2Fsentry%2Fissues%2Fendpoints%2Fbrowser_reporting_collector.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:55:46Z"
+          },
+          {
+            "path": "src/sentry/replays/endpoints/project_replay_summarize_breadcrumbs.py",
+            "line": 118,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Logic error from mispaired nodestore results\n\n\n  ###### Where:\n\n `src/sentry/replays/endpoints/project_replay_summarize_breadcrumbs.py`\n\n\n  ###### Description:\n\n error details map to wrong IDs when get_multi omits or reorders entries\nNew code in `fetch_error_details` pairs `error_ids` with `events.values()` by position instead of by key (`src/sentry/replays/endpoints/project_replay_summarize_breadcrumbs.py:107-118`). But `nodestore.backend.get_multi` returns a dict keyed by node id, not a positional list, and internally may merge fetched and cached items via `items.update(cache_items)` after deduplication (`src/sentry/nodestore/base.py:174-210`). That means no positional contract exists for `.values()`, so ids can be misassociated with the wrong payloads. Upstream use is concrete: `ProjectReplaySummarizeBreadcrumbsEndpoint.get` calls `fetch_error_details` and passes the resulting events into breadcrumb analysis (`src/sentry/replays/endpoints/project_replay_summarize_breadcrumbs.py:52-100`).\n\n\n\n<br/>\n\n  ```diff\n  +        return [\n+            ErrorEvent(\n+                category=\"error\",\n+                id=event_id,\n+                title=data.get(\"title\", \"\"),\n+                timestamp=data.get(\"timestamp\", 0.0),\n+                message=data.get(\"message\", \"\"),\n+            )\n+            for event_id, data in zip(error_ids, events.values())\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b54ba0dc-7062-4c1e-b0dd-e22e7202ff96&type=bug&issue=Logic+error+from+mispaired+nodestore+results&path=src%2Fsentry%2Freplays%2Fendpoints%2Fproject_replay_summarize_breadcrumbs.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:55:49Z"
+          },
+          {
+            "path": "src/sentry/workflow_engine/endpoints/validators/base/detector.py",
+            "line": 63,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n KeyError: update reads wrong validated_data key\n\n\n  ###### Where:\n\n `src/sentry/workflow_engine/endpoints/validators/base/detector.py`\n\n\n  ###### Description:\n\n detector type updates break when validated_data only contains serializer field name type\nNew logic bug in BaseDetectorTypeValidator.update: the serializer declares and validates field `type` (`detector.py:35`, `detector.py:39-52`), but update reads `validated_data.get(\"detector_type\", instance.group_type).slug` at `detector.py:63`. Because `.get()` is used, this is not a KeyError; instead updates using the validated `type` field are ignored and fall back to `instance.group_type`. This mismatch is directly visible in the new code and is not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +        instance.type = validated_data.get(\"detector_type\", instance.group_type).slug\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b54ba0dc-7062-4c1e-b0dd-e22e7202ff96&type=bug&issue=KeyError%3A+update+reads+wrong+validated_data+key&path=src%2Fsentry%2Fworkflow_engine%2Fendpoints%2Fvalidators%2Fbase%2Fdetector.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:55:52Z"
+          },
+          {
+            "path": "src/sentry/preprod/api/endpoints/organization_preprod_artifact_assemble.py",
+            "line": 86,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-SECURITY__ALERT-blue.svg) \n  ![License](https://img.shields.io/badge/Risk_Type-Authorization__telemetry__recorded__before__feature__check-purple.svg) \n  ![License](https://img.shields.io/badge/Risk_Score-4.0-red.svg)\n\n\n\n  ###### Vulnerability Details: \n unauthorized attempts are logged as assemble events when feature access is denied\nNew code in src/sentry/preprod/api/endpoints/organization_preprod_artifact_assemble.py:81-86 records analytics before the feature gate at 88-90. This means requests that fail `features.has(...)` still emit `preprod_artifact.api.assemble`, so denied attempts are counted as assemble events. The behavior is directly visible in the diff; no upstream auth check changes this ordering.\n\n\n\n  ###### Where:\n `src/sentry/preprod/api/endpoints/organization_preprod_artifact_assemble.py`\n\n\n\n<br/>\n\n  ```diff\n  +        analytics.record(\n+            \"preprod_artifact.api.assemble\",\n+            organization_id=project.organization_id,\n+            project_id=project.id,\n+            user_id=request.user.id,\n+        )\n  ```\n\n\n\n<br/>\n\n  ###### Security Justification:\n\n  unauthorized attempts are logged as assemble events when feature access is denied\n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b54ba0dc-7062-4c1e-b0dd-e22e7202ff96&type=security&vulnerability_name=Authorization+telemetry+recorded+before+feature+check&path=src%2Fsentry%2Fpreprod%2Fapi%2Fendpoints%2Forganization_preprod_artifact_assemble.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:55:55Z"
+          }
+        ]
       }
     ]
   },
@@ -14316,6 +14585,43 @@
             "created_at": "2026-04-07T19:32:48Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry-greptile__cloudaeye__PR1__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR1__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/api/endpoints/organization_auditlogs.py",
+            "line": 71,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AttributeError: member access on non-member auth\n\n\n  ###### Where:\n\n `src/sentry/api/endpoints/organization_auditlogs.py`\n\n\n  ###### Description:\n\n request handling breaks when authenticated via API key or org auth token\nNew code dereferences `organization_context.member.has_global_access` with no None check in `OrganizationAuditLogsEndpoint.get` at src/sentry/api/endpoints/organization_auditlogs.py:68-73. This matches the repository rule for input_validation: `organization_context.member` access without a guard is always flaggable because auth paths can populate organization context without a member. The access is newly introduced, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +        enable_advanced = request.user.is_superuser or organization_context.member.has_global_access\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=abf69230-e96a-4196-861f-516a1423a7b2&type=bug&issue=AttributeError%3A+member+access+on+non-member+auth&path=src%2Fsentry%2Fapi%2Fendpoints%2Forganization_auditlogs.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:36:24Z"
+          },
+          {
+            "path": "src/sentry/api/endpoints/organization_auditlogs.py",
+            "line": 80,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential TypeError: datetime ordering with OptimizedCursorPaginator\n\n\n  ###### Where:\n\n `src/sentry/api/endpoints/organization_auditlogs.py`\n\n\n  ###### Description:\n\n cursor generation breaks when paginator assumes numeric keys for -datetime ordering\nNew code routes OrganizationAuditLogsEndpoint.get through OptimizedCursorPaginator with order_by='-datetime' at src/sentry/api/endpoints/organization_auditlogs.py:76-83. That paginator's get_item_key unconditionally does math.floor/math.ceil then int(...) on getattr(item, self.key) at src/sentry/api/paginator.py:838-840. With self.key='datetime', this is the known invalid datetime floor/ceil pattern and will fail at runtime when cursor keys are computed. Upstream blast radius: OrganizationAuditLogsEndpoint.get is the traced caller/injection site (src/sentry/api/endpoints/organization_auditlogs.py:42, 76-83). Downstream dependencies: OptimizedCursorPaginator.get_item_key depends on math.floor/math.ceil and the model datetime attribute (src/sentry/api/paginator.py:838-840), and get_result passes that key function into build_cursor (src/sentry/api/paginator.py:897-906).\n\n\n\n<br/>\n\n  ```diff\n  +                paginator_cls=OptimizedCursorPaginator,\n+                order_by=\"-datetime\",\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=abf69230-e96a-4196-861f-516a1423a7b2&type=bug&issue=Potential+TypeError%3A+datetime+ordering+with+OptimizedCursorPaginator&path=src%2Fsentry%2Fapi%2Fendpoints%2Forganization_auditlogs.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:36:27Z"
+          },
+          {
+            "path": "src/sentry/api/paginator.py",
+            "line": 184,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AssertionError: negative QuerySet slicing allowed\n\n\n  ###### Where:\n\n `src/sentry/api/paginator.py`\n\n\n  ###### Description:\n\n pagination crashes when previous-page cursors produce negative offsets\nThe PR changed BasePaginator.get_result from always slicing queryset[offset:stop] to preserving raw offset on prev-page paths: start_offset = max(0, offset) if not cursor.is_prev else offset, then queryset[start_offset:stop] at src/sentry/api/paginator.py:182-184. This newly allows negative start indexes whenever a previous-page cursor carries a negative offset, matching the known Django QuerySet negative-slicing AssertionError edge case. The same pattern is repeated in OptimizedCursorPaginator at src/sentry/api/paginator.py:877-886, but the proximate source alert here is the BasePaginator change. Upstream blast radius: BasePaginator.get_result underlies DateTimePaginator, which has many callers, including OrganizationAuditLogsEndpoint and other endpoints traced in usage (src/sentry/api/paginator.py::DateTimePaginator usage report). Downstream dependencies: both BasePaginator.get_result and OptimizedCursorPaginator.get_result rely on Django ORM queryset slicing semantics via list(queryset[start_offset:stop]).\n\n\n\n<br/>\n\n  ```diff\n  +        start_offset = max(0, offset) if not cursor.is_prev else offset\n+        stop = start_offset + limit + extra\n+        results = list(queryset[start_offset:stop])\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=abf69230-e96a-4196-861f-516a1423a7b2&type=bug&issue=AssertionError%3A+negative+QuerySet+slicing+allowed&path=src%2Fsentry%2Fapi%2Fpaginator.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:36:30Z"
+          },
+          {
+            "path": "src/sentry/api/paginator.py",
+            "line": 883,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AssertionError: advanced negative slicing crashes Django ORM\n\n\n  ###### Where:\n\n `src/sentry/api/paginator.py`\n\n\n  ###### Description:\n\n pagination crashes when enable_advanced_features allows negative cursor.offset\nNew code in `OptimizedCursorPaginator.get_result` explicitly allows `start_offset = cursor.offset` when negative and slices `queryset[start_offset:stop]` (`src/sentry/api/paginator.py:877-883`). Django QuerySet negative slicing is unsupported, so this added branch can raise at runtime. The PR also introduces a new upstream caller that enables this paginator for audit logs (`src/sentry/api/endpoints/organization_auditlogs.py:73-83`), so this is new, reachable code rather than pre-existing behavior.\n\n\n\n<br/>\n\n  ```diff\n  +        if self.enable_advanced_features and cursor.offset < 0:\n+            start_offset = cursor.offset  # Allow negative offsets for advanced pagination\n+            stop = start_offset + limit + extra\n+            results = list(queryset[start_offset:stop])\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=abf69230-e96a-4196-861f-516a1423a7b2&type=bug&issue=AssertionError%3A+advanced+negative+slicing+crashes+Django+ORM&path=src%2Fsentry%2Fapi%2Fpaginator.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:36:33Z"
+          },
+          {
+            "path": "src/sentry/api/paginator.py",
+            "line": 840,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: math.floor/ceil on non-numeric keys\n\n\n  ###### Where:\n\n `src/sentry/api/paginator.py`\n\n\n  ###### Description:\n\n cursor building crashes when paginating datetime or string ordered fields\nNew `OptimizedCursorPaginator.get_item_key` applies `math.floor`/`math.ceil` to `getattr(item, self.key)` (`src/sentry/api/paginator.py:838-840`). The same PR newly wires this paginator into audit logs with `order_by='-datetime'` (`src/sentry/api/endpoints/organization_auditlogs.py:79-82`), making `self.key` a datetime field. `floor`/`ceil` on datetime values is a structural type mismatch that will fail when cursor keys are computed, so the alert is a real new logic error.\n\n\n\n<br/>\n\n  ```diff\n  +    def get_item_key(self, item, for_prev=False):\n+        value = getattr(item, self.key)\n+        return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=abf69230-e96a-4196-861f-516a1423a7b2&type=bug&issue=TypeError%3A+math.floor%2Fceil+on+non-numeric+keys&path=src%2Fsentry%2Fapi%2Fpaginator.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:36:36Z"
+          }
+        ]
       }
     ]
   },
@@ -15331,6 +15637,31 @@
             "created_at": "2026-04-07T19:31:16Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR97529__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR97529__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "pkg/storage/unified/resource/search.go",
+            "line": 216,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: race panic via unsynchronized TotalDocs call\n\n\n  ###### Where:\n\n `pkg/storage/unified/resource/search.go`\n\n\n  ###### Description:\n\n concurrent map access can panic when event watcher indexes while initialization logs total docs\nNew code at pkg/storage/unified/resource/search.go:216 calls s.search.TotalDocs(). The traced implementation bleveBackend.TotalDocs in pkg/storage/unified/search/bleve.go:145-155 iterates b.cache with no lock, while BuildIndex writes b.cache under b.cacheMu at pkg/storage/unified/search/bleve.go:137-139. That is a concrete concurrent map read/write hazard introduced by this new call site into the unsafe path.\n\n\n\n<br/>\n\n  ```diff\n  +\ts.log.Info(\"search index initialized\", \"duration_secs\", end-start, \"total_docs\", s.search.TotalDocs())\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d877631-8f3a-4ce3-8ef7-f7092e383d34&type=bug&issue=Potential%3A+race+panic+via+unsynchronized+TotalDocs+call&path=pkg%2Fstorage%2Funified%2Fresource%2Fsearch.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:39:33Z"
+          },
+          {
+            "path": "pkg/storage/unified/resource/server.go",
+            "line": 929,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: nil pointer panic in History/Origin\n\n\n  ###### Where:\n\n `pkg/storage/unified/resource/server.go`\n\n\n  ###### Description:\n\n request handling breaks when search is nil and History or Origin is called\npkg/storage/unified/resource/server.go:922-929 adds History/Origin methods that dereference s.search without any nil guard. The same file shows search is optional in construction (pkg/storage/unified/resource/server.go:248-253), and Search still retains the guard at pkg/storage/unified/resource/server.go:913-918. Init only calls s.search.init when s.search != nil (pkg/storage/unified/resource/server.go:300-302), so these new methods can panic when search is not configured.\n\n\n\n<br/>\n\n  ```diff\n  +func (s *server) History(ctx context.Context, req *HistoryRequest) (*HistoryResponse, error) {\n+\treturn s.search.History(ctx, req)\n+}\n+\n+func (s *server) Origin(ctx context.Context, req *OriginRequest) (*OriginResponse, error) {\n+\treturn s.search.Origin(ctx, req)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d877631-8f3a-4ce3-8ef7-f7092e383d34&type=bug&issue=Potential%3A+nil+pointer+panic+in+History%2FOrigin&path=pkg%2Fstorage%2Funified%2Fresource%2Fserver.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:39:36Z"
+          },
+          {
+            "path": "pkg/storage/unified/search/bleve.go",
+            "line": 139,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: duplicate index builds\n\n\n  ###### Where:\n\n `pkg/storage/unified/search/bleve.go`\n\n\n  ###### Description:\n\n cache coherence breaks when concurrent callers build the same key simultaneously\nValid new concurrency regression in pkg/storage/unified/search/bleve.go:BuildIndex. The diff removed the full-function cache lock at lines 85-90 and now locks only around `b.cache[key] = idx` at 137-139, so concurrent callers can execute the expensive index creation/build path simultaneously for the same key before either stores into `b.cache`. This is introduced by the PR, not pre-existing. Evidence log also confirms BuildIndex performs multi-step work before caching, and `TotalDocs` in the same file (145-155) iterates `b.cache` without locking, increasing the correctness risk from the narrowed lock scope.\n\n\n\n<br/>\n\n  ```diff\n  - \tb.cacheMu.Lock()\n- \tdefer b.cacheMu.Unlock()\n...\n+\tb.cacheMu.Lock()\nb.cache[key] = idx\n+\tb.cacheMu.Unlock()\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d877631-8f3a-4ce3-8ef7-f7092e383d34&type=bug&issue=Race+condition%3A+duplicate+index+builds&path=pkg%2Fstorage%2Funified%2Fsearch%2Fbleve.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:39:39Z"
+          }
+        ]
       }
     ]
   },
@@ -16389,6 +16720,31 @@
             "created_at": "2026-04-07T19:31:42Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry__cloudaeye__PR80168__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR80168__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/incidents/grouptype.py",
+            "line": 12,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: TypeError from abstract stateful handler instantiation\n\n\n  ###### Where:\n\n `src/sentry/incidents/grouptype.py`\n\n\n  ###### Description:\n\n handler creation breaks when StatefulDetectorHandler requires abstract methods not implemented here\nNew code makes `MetricAlertDetectorHandler` a `StatefulDetectorHandler` with `pass` only (`src/sentry/incidents/grouptype.py:11-12`). `StatefulDetectorHandler` has abstract methods `get_dedupe_value`, `get_group_key_values`, and newly added `build_occurrence_and_event_data` (`src/sentry/workflow_engine/processors/detector.py:151-170`), so this subclass is non-instantiable. Upstream, `Detector.detector_handler` resolves the grouptype and calls `group_type.detector_handler(self)` (`src/sentry/workflow_engine/models/detector.py:58-65`), which would instantiate the abstract handler and raise at runtime.\n\n\n\n<br/>\n\n  ```diff\n  +class MetricAlertDetectorHandler(StatefulDetectorHandler[QuerySubscriptionUpdate]):\n+    pass\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=19a3d5c8-f6f7-4d9f-9cb6-b428d6638118&type=bug&issue=Potential%3A+TypeError+from+abstract+stateful+handler+instantiation&path=src%2Fsentry%2Fincidents%2Fgrouptype.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:21Z"
+          },
+          {
+            "path": "src/sentry/workflow_engine/processors/detector.py",
+            "line": 170,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: abstract handler instantiation fails\n\n\n  ###### Where:\n\n `src/sentry/workflow_engine/processors/detector.py`\n\n\n  ###### Description:\n\n handler creation breaks when existing subclasses lack the new abstract method\nNew abstract method `StatefulDetectorHandler.build_occurrence_and_event_data` was added in `src/sentry/workflow_engine/processors/detector.py:166-170`. In the same PR, `MetricAlertDetectorHandler` was changed to inherit `StatefulDetectorHandler` but remains `pass`, so it does not implement the abstract method and is abstract itself (`src/sentry/incidents/grouptype.py:11-12`). `Detector.detector_handler` resolves a handler via `Detector.group_type` -> grouptype registry (`src/sentry/workflow_engine/models/detector.py:58-64`), so detector type selection now reaches the changed handler class. This is new in the diff and is a concrete ABC-instantiation regression, not speculation.\n\n\n\n<br/>\n\n  ```diff\n  +    @abc.abstractmethod\n+    def build_occurrence_and_event_data(\n+        self, group_key: DetectorGroupKey, value: int, new_status: PriorityLevel\n+    ) -> tuple[IssueOccurrence, dict[str, Any]]:\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=19a3d5c8-f6f7-4d9f-9cb6-b428d6638118&type=bug&issue=TypeError%3A+abstract+handler+instantiation+fails&path=src%2Fsentry%2Fworkflow_engine%2Fprocessors%2Fdetector.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:24Z"
+          },
+          {
+            "path": "src/sentry/workflow_engine/processors/detector.py",
+            "line": 228,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Docstring returns list but function returns dict\n\n\n  ###### Where:\n\n `src/sentry/workflow_engine/processors/detector.py`\n\n\n  ###### Description:\n\n future callers are misled when relying on the documented return shape\n\n\n\n<br/>\n\n  ```diff\n  +    ) -> dict[DetectorGroupKey, DetectorEvaluationResult]:\n\"\"\"\nEvaluates a given data packet and returns a list of `DetectorEvaluationResult`.\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=19a3d5c8-f6f7-4d9f-9cb6-b428d6638118&type=bug&issue=Docstring+returns+list+but+function+returns+dict&path=src%2Fsentry%2Fworkflow_engine%2Fprocessors%2Fdetector.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:30Z"
+          }
+        ]
       }
     ]
   },
@@ -17364,6 +17720,19 @@
             "created_at": "2026-04-07T19:29:29Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry__cloudaeye__PR80528__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR80528__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/monitors/logic/incident_occurrence.py",
+            "line": 168,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: dead code drops transformed config\n\n\n  ###### Where:\n\n `src/sentry/monitors/logic/incident_occurrence.py`\n\n\n  ###### Description:\n\n displayed context stays unmodified when schedule_type should be humanized\nNew code in incident_occurrence.get_monitor_environment_context copies and humanizes config['schedule_type'] at src/sentry/monitors/logic/incident_occurrence.py:160-163, but the returned dict uses monitor_environment.monitor.config at line 168 instead of the mutated local config. This drops the transformation introduced by the + lines. The bad value propagates outward because create_incident_occurrence inserts get_monitor_environment_context(monitor_env) into event_data['contexts']['monitor'] before produce_occurrence_to_kafka at src/sentry/monitors/logic/incident_occurrence.py:88-113.\n\n\n\n<br/>\n\n  ```diff\n  +    config = monitor_environment.monitor.config.copy()\n+    if \"schedule_type\" in config:\n+        config[\"schedule_type\"] = monitor_environment.monitor.get_schedule_type_display()\n...\n+        \"config\": monitor_environment.monitor.config,\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=17e60825-a6e8-4e23-afd8-7b152f59e2e5&type=bug&issue=Potential%3A+dead+code+drops+transformed+config&path=src%2Fsentry%2Fmonitors%2Flogic%2Fincident_occurrence.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:27Z"
+          }
+        ]
       }
     ]
   },
@@ -18346,6 +18715,43 @@
             "created_at": "2026-04-07T19:30:55Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry__cloudaeye__PR77754__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR77754__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/integrations/services/assignment_source.py",
+            "line": 18,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Shared datetime default freezes queued timestamp\n\n\n  ###### Where:\n\n `src/sentry/integrations/services/assignment_source.py`\n\n\n  ###### Description:\n\n new instances get the import-time timestamp when queued is omitted\nThe new dataclass field default `queued: datetime = timezone.now()` in `src/sentry/integrations/services/assignment_source.py:18` is evaluated once at class definition time, so omitted `queued` values reuse a frozen timestamp. This is concretely exercised by new code in `AssignmentSource.from_integration()` (`src/sentry/integrations/services/assignment_source.py:21-25`), which does not pass `queued`, and by new callers in `src/sentry/integrations/utils/sync.py:96-99,112-115` that construct `AssignmentSource` through `from_integration()`.\n\n\n\n<br/>\n\n  ```diff\n  +    queued: datetime = timezone.now()\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=f468f0c8-8d8e-4896-9a0f-43bd6070e55c&type=bug&issue=Shared+datetime+default+freezes+queued+timestamp&path=src%2Fsentry%2Fintegrations%2Fservices%2Fassignment_source.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:33Z"
+          },
+          {
+            "path": "src/sentry/integrations/services/assignment_source.py",
+            "line": 28,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: datetime in to_dict may break JSON serialization\n\n\n  ###### Where:\n\n `src/sentry/integrations/services/assignment_source.py`\n\n\n  ###### Description:\n\n task enqueue or JSON encoding breaks when serialized kwargs include queued\nNew code in src/sentry/integrations/services/assignment_source.py:27-28 returns dataclasses.asdict(self), which includes queued from src/sentry/integrations/services/assignment_source.py:18 as a datetime. That dict is passed directly into Celery task kwargs at src/sentry/integrations/utils/sync.py:137-144 via sync_assignee_outbound.apply_async(..., kwargs={..., 'assignment_source_dict': assignment_source.to_dict()}). This introduces a concrete serialization boundary for a datetime in task kwargs, matching the validated error_handling pattern.\n\n\n\n<br/>\n\n  ```diff\n  +    def to_dict(self) -> dict[str, Any]:\n+        return asdict(self)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=f468f0c8-8d8e-4896-9a0f-43bd6070e55c&type=bug&issue=TypeError%3A+datetime+in+to_dict+may+break+JSON+serialization&path=src%2Fsentry%2Fintegrations%2Fservices%2Fassignment_source.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:39Z"
+          },
+          {
+            "path": "tests/sentry/integrations/services/test_assignment_source.py",
+            "line": 13,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Naming typo: test_from_dict_inalid_data mislabels failure\n\n\n  ###### Where:\n\n `tests/sentry/integrations/services/test_assignment_source.py`\n\n\n  ###### Description:\n\n failure attribution misleads when the misspelled test name appears in test output\nNew code in tests/sentry/integrations/services/test_assignment_source.py:13 adds `test_from_dict_inalid_data`, an obvious typo in the test identifier (`inalid` vs `invalid`). This is a direct naming contradiction visible in the added line and will surface in test discovery/output.\n\n\n\n<br/>\n\n  ```diff\n  +    def test_from_dict_inalid_data(self):\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=f468f0c8-8d8e-4896-9a0f-43bd6070e55c&type=bug&issue=Naming+typo%3A+test_from_dict_inalid_data+mislabels+failure&path=tests%2Fsentry%2Fintegrations%2Fservices%2Ftest_assignment_source.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:51Z"
+          },
+          {
+            "path": "tests/sentry/integrations/services/test_assignment_source.py",
+            "line": 9,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Naming mismatch: empty_array test uses dict\n\n\n  ###### Where:\n\n `tests/sentry/integrations/services/test_assignment_source.py`\n\n\n  ###### Description:\n\n test intent is unclear when the name says array but body passes a dict\nNew code in tests/sentry/integrations/services/test_assignment_source.py:8-9 names the test `test_from_dict_empty_array` but the body passes an empty dict `{}`. The test name contradicts the test input in the added lines, so the alert is valid.\n\n\n\n<br/>\n\n  ```diff\n  +    def test_from_dict_empty_array(self):\n+        data: dict[str, Any] = {}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=f468f0c8-8d8e-4896-9a0f-43bd6070e55c&type=bug&issue=Naming+mismatch%3A+empty_array+test+uses+dict&path=tests%2Fsentry%2Fintegrations%2Fservices%2Ftest_assignment_source.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:54Z"
+          },
+          {
+            "path": null,
+            "line": null,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: TypeError enqueueing non-JSON task kwargs\n\n\n  ###### Where:\n\n `src/sentry/integrations/tasks/sync_assignee_outbound.py`\n\n\n  ###### Description:\n\n task enqueue breaks when assignment_source_dict contains non-serializable objects\nNew code in src/sentry/integrations/utils/sync.py:137-144 enqueues assignment_source.to_dict() directly in task kwargs. AssignmentSource.to_dict() in src/sentry/integrations/services/assignment_source.py:18,27-28 includes queued, whose default value is a datetime. Per the project rule for error_handling, passing a datetime directly in apply_async kwargs is a concrete JSON-serialization failure. The tagged task file only receives the dict later (src/sentry/integrations/tasks/sync_assignee_outbound.py:34,53-55); the proximate introduced bug is upstream at enqueue time.\n\n\n\n<br/>\n\n  ```diff\n  +    assignment_source_dict: dict[str, Any] | None = None,\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=f468f0c8-8d8e-4896-9a0f-43bd6070e55c&type=bug&issue=Potential%3A+TypeError+enqueueing+non-JSON+task+kwargs&path=src%2Fsentry%2Fintegrations%2Ftasks%2Fsync_assignee_outbound.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:48Z"
+          }
+        ]
       }
     ]
   },
@@ -19449,6 +19855,43 @@
             "created_at": "2026-04-07T19:31:36Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry__cloudaeye__PR95633__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR95633__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/remote_subscriptions/consumers/queue_consumer.py",
+            "line": 338,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: close drops processed-but-uncommitted offsets\n\n\n  ###### Where:\n\n `src/sentry/remote_subscriptions/consumers/queue_consumer.py`\n\n\n  ###### Description:\n\n completed work remains uncommitted when close stops commit loop before queue shutdown finishes\nNew code in `SimpleQueueProcessingStrategy.close` sets `shutdown_event` and joins the commit thread before `FixedQueuePool.shutdown()` drains/join workers (`src/sentry/remote_subscriptions/consumers/queue_consumer.py:335-338`). Offsets are only committed in `_commit_loop` (`:273-291`), while workers mark offsets complete later in their `finally` block during processing/shutdown (`:148-149`, `:231-243`). That ordering allows offsets finished during queue shutdown to miss any final commit pass. Upstream creation is the new `ResultsStrategyFactory.create_thread_queue_parallel_worker` path in `src/sentry/remote_subscriptions/consumers/result_consumer.py:244-259` via `create_with_partitions` (`:209-210`).\n\n\n\n<br/>\n\n  ```diff\n  +    def close(self) -> None:\n+        self.shutdown_event.set()\n+        self.commit_thread.join(timeout=5.0)\n+        self.queue_pool.shutdown()\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1b5a0bd4-c31d-49f5-80db-eb06bf2716b1&type=bug&issue=LogicError%3A+close+drops+processed-but-uncommitted+offsets&path=src%2Fsentry%2Fremote_subscriptions%2Fconsumers%2Fqueue_consumer.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:45:00Z"
+          },
+          {
+            "path": "tests/sentry/remote_subscriptions/consumers/test_queue_consumer.py",
+            "line": 173,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AssertionError: concurrency test doesn't verify concurrency\n\n\n  ###### Where:\n\n `tests/sentry/remote_subscriptions/consumers/test_queue_consumer.py`\n\n\n  ###### Description:\n\n test passes even when processing is fully serial because only group presence is checked\nNew test code in TestFixedQueuePool.test_concurrent_processing_across_groups claims concurrency in both name/docstring, but the body only asserts all 6 items completed and that 3 group IDs appeared in processed output; it never checks overlap, timing, or parallel execution. This is a direct structural contradiction visible in tests/sentry/remote_subscriptions/consumers/test_queue_consumer.py:137-173, which qualifies as a code_clarity issue without further tracing.\n\n\n\n<br/>\n\n  ```diff\n  +    def test_concurrent_processing_across_groups(self):\n+        \"\"\"Test that different groups are processed concurrently.\"\"\"\n...\n+        assert len(groups_seen) == 3\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1b5a0bd4-c31d-49f5-80db-eb06bf2716b1&type=bug&issue=AssertionError%3A+concurrency+test+doesn%27t+verify+concurrency&path=tests%2Fsentry%2Fremote_subscriptions%2Fconsumers%2Ftest_queue_consumer.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:45:03Z"
+          },
+          {
+            "path": "tests/sentry/remote_subscriptions/consumers/test_queue_consumer.py",
+            "line": 326,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AssertionError: strategy concurrency test doesn't test concurrency\n\n\n  ###### Where:\n\n `tests/sentry/remote_subscriptions/consumers/test_queue_consumer.py`\n\n\n  ###### Description:\n\n test name/docstring claim concurrency when body only asserts four results were processed\nNew test `TestSimpleQueueProcessingStrategy.test_concurrent_processing_different_groups` claims concurrency in both name/docstring, but its body only submits 4 messages, waits for completion, and asserts `len(self.processed_results) == 4`; there is no concurrency-specific observation or assertion. Visible contradiction in tests/sentry/remote_subscriptions/consumers/test_queue_consumer.py:314-326.\n\n\n\n<br/>\n\n  ```diff\n  +    def test_concurrent_processing_different_groups(self):\n+        \"\"\"Test that different subscriptions are processed concurrently.\"\"\"\n...\n+        assert len(self.processed_results) == 4\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1b5a0bd4-c31d-49f5-80db-eb06bf2716b1&type=bug&issue=AssertionError%3A+strategy+concurrency+test+doesn%27t+test+concurrency&path=tests%2Fsentry%2Fremote_subscriptions%2Fconsumers%2Ftest_queue_consumer.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:45:06Z"
+          },
+          {
+            "path": "tests/sentry/uptime/consumers/test_results_consumer.py",
+            "line": 1725,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Flaky timing assertion in background-thread tests\n\n\n  ###### Where:\n\n `tests/sentry/uptime/consumers/test_results_consumer.py`\n\n\n  ###### Description:\n\n Tests fail intermittently when background processing or commits take longer than fixed sleep windows\nNew thread-queue-parallel tests add fixed polling/sleep loops around background queue/Kafka work, e.g. `max_wait = 50` with repeated `time.sleep(0.1)` in tests/sentry/uptime/consumers/test_results_consumer.py:1718-1725, with the same pattern repeated at 1765-1772, 1812-1819, 1860-1867, 1912-1919, 1961-1968, and 2101-2105. These tests depend on asynchronous processing completing within fixed timing windows, which is the recognized flaky test pattern for background workers. This is all newly added code.\n\n\n\n<br/>\n\n  ```diff\n  +            max_wait = 50\n+            for _ in range(max_wait):\n+                ...\n+                time.sleep(0.1)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1b5a0bd4-c31d-49f5-80db-eb06bf2716b1&type=bug&issue=Flaky+timing+assertion+in+background-thread+tests&path=tests%2Fsentry%2Fuptime%2Fconsumers%2Ftest_results_consumer.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:45:09Z"
+          },
+          {
+            "path": "tests/sentry/uptime/consumers/test_results_consumer.py",
+            "line": 1922,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Docstring mismatch in error-handling test\n\n\n  ###### Where:\n\n `tests/sentry/uptime/consumers/test_results_consumer.py`\n\n\n  ###### Description:\n\n Test intent is misleading when body asserts no commits occur for any message\n\n\n\n<br/>\n\n  ```diff\n  +    def test_thread_queue_parallel_error_handling(self) -> None:\n+        \"\"\"\n+        Test that errors in processing don't block offset commits for other messages.\n+        \"\"\"\n...\n+            assert len(committed_offsets) == 0 or test_partition not in committed_offsets\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1b5a0bd4-c31d-49f5-80db-eb06bf2716b1&type=bug&issue=Docstring+mismatch+in+error-handling+test&path=tests%2Fsentry%2Fuptime%2Fconsumers%2Ftest_results_consumer.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:45:11Z"
+          }
+        ]
       }
     ]
   },
@@ -20822,6 +21265,31 @@
             "created_at": "2026-04-07T19:30:55Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry-greptile__cloudaeye__PR2__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR2__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/api/paginator.py",
+            "line": 882,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AssertionError: negative QuerySet slicing enabled\n\n\n  ###### Where:\n\n `src/sentry/api/paginator.py`\n\n\n  ###### Description:\n\n pagination crashes when advanced mode passes a negative offset to a Django QuerySet slice\nNew code in `src/sentry/api/paginator.py:877-882` explicitly permits `queryset[start_offset:stop]` with `start_offset = cursor.offset` when negative. Django QuerySet negative slicing is an error case, so this newly introduced branch is crash-prone. The path is reachable from new caller wiring in `src/sentry/api/endpoints/organization_auditlogs.py:70-83`, which switches audit logs to `OptimizedCursorPaginator` under the new query-param/admin gate.\n\n\n\n<br/>\n\n  ```diff\n  +        if self.enable_advanced_features and cursor.offset < 0:\n+            start_offset = cursor.offset  # Allow negative offsets for advanced pagination\n+            stop = start_offset + limit + extra\n+            results = list(queryset[start_offset:stop])\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=820fa822-79cc-4e33-b37f-994d07c555b0&type=bug&issue=AssertionError%3A+negative+QuerySet+slicing+enabled&path=src%2Fsentry%2Fapi%2Fpaginator.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:31:38Z"
+          },
+          {
+            "path": "src/sentry/api/paginator.py",
+            "line": 840,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: math.floor/ceil on non-numeric keys\n\n\n  ###### Where:\n\n `src/sentry/api/paginator.py`\n\n\n  ###### Description:\n\n cursor building crashes when paginator key is datetime or other non-numeric field\nNew `OptimizedCursorPaginator.get_item_key` in `src/sentry/api/paginator.py:838-840` applies `math.floor`/`math.ceil` and `int(...)` to `getattr(item, self.key)`, which requires a numeric key. But the new caller in `src/sentry/api/endpoints/organization_auditlogs.py:76-80` invokes this paginator with `order_by='-datetime'`, making the key a datetime field. This structurally conflicts with the existing dedicated datetime handling in `DateTimePaginator.get_item_key` at `src/sentry/api/paginator.py:233-236`, so the new path can raise at runtime.\n\n\n\n<br/>\n\n  ```diff\n  +    def get_item_key(self, item, for_prev=False):\n+        value = getattr(item, self.key)\n+        return int(math.floor(value) if self._is_asc(for_prev) else math.ceil(value))\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=820fa822-79cc-4e33-b37f-994d07c555b0&type=bug&issue=TypeError%3A+math.floor%2Fceil+on+non-numeric+keys&path=src%2Fsentry%2Fapi%2Fpaginator.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:31:41Z"
+          },
+          {
+            "path": "src/sentry/api/paginator.py",
+            "line": 184,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AssertionError: prev pagination keeps negative offset\n\n\n  ###### Where:\n\n `src/sentry/api/paginator.py`\n\n\n  ###### Description:\n\n existing paginator now crashes when previous-page cursor has a negative offset\nNew code in `src/sentry/api/paginator.py:182-184` preserves raw `offset` for `cursor.is_prev`, so a negative previous-page offset now flows directly into `queryset[start_offset:stop]`. This behavior was introduced by the PR (previous code sliced from `offset` without the new negative-offset intent/comments), and the same PR explicitly documents/encourages negative offsets in `src/sentry/utils/cursors.py:26-27` and `src/sentry/api/paginator.py:874-882`. Django QuerySet negative slicing is not supported, so this creates an AssertionError edge case on the new path. Reachability is concrete: `src/sentry/api/endpoints/organization_auditlogs.py:70-83` newly wires `OptimizedCursorPaginator` behind a request flag, making the negative-offset pagination path callable from new code.\n\n\n\n<br/>\n\n  ```diff\n  +        start_offset = max(0, offset) if not cursor.is_prev else offset\n+        stop = start_offset + limit + extra\n+        results = list(queryset[start_offset:stop])\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=820fa822-79cc-4e33-b37f-994d07c555b0&type=bug&issue=AssertionError%3A+prev+pagination+keeps+negative+offset&path=src%2Fsentry%2Fapi%2Fpaginator.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:31:44Z"
+          }
+        ]
       }
     ]
   },
@@ -22209,6 +22677,25 @@
             "created_at": "2026-04-07T19:31:12Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry-greptile__cloudaeye__PR3__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR3__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/api/helpers/error_upsampling.py",
+            "line": 27,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: hash() cache key breaks invalidation\n\n\n  ###### Where:\n\n `src/sentry/api/helpers/error_upsampling.py`\n\n\n  ###### Description:\n\n cache reuse and deletion break when different workers compute different hash() values\nNew code in src/sentry/api/helpers/error_upsampling.py:27 and :73 uses Python hash(tuple(...)) in cache keys. Python hash values are process-dependent, so cache lookup/set and invalidate_upsampling_cache can compute different keys across workers, breaking cross-process cache reuse/invalidation. This helper is newly used by src/sentry/api/endpoints/organization_events_stats.py:220-233, 275-277, 295-296 to drive live query rewriting, so the bug is introduced by this PR and is not speculative.\n\n\n\n<br/>\n\n  ```diff\n  +    cache_key = f\"error_upsampling_eligible:{organization.id}:{hash(tuple(sorted(snuba_params.project_ids)))}\"\n...\n+    cache_key = f\"error_upsampling_eligible:{organization_id}:{hash(tuple(sorted(project_ids)))}\"\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=ad1f7afb-ea09-4c00-a454-153d02dd21c2&type=bug&issue=LogicError%3A+hash%28%29+cache+key+breaks+invalidation&path=src%2Fsentry%2Fapi%2Fhelpers%2Ferror_upsampling.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:36:42Z"
+          },
+          {
+            "path": "src/sentry/testutils/factories.py",
+            "line": 355,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Zero sample_rate skipped by falsy guard\n\n\n  ###### Where:\n\n `src/sentry/testutils/factories.py`\n\n\n  ###### Description:\n\n sample_rate propagation breaks when client_sample_rate is 0 or 0.0\nNew helper `_set_sample_rate_from_error_sampling` uses `if client_sample_rate:` before `float(client_sample_rate)`, so valid zero values (`0`/`0.0`) are skipped instead of being propagated to `normalized_data['sample_rate']` at src/sentry/testutils/factories.py:353-355. This is newly introduced code, and the helper is newly invoked from `store_event` at src/sentry/testutils/factories.py:1049, so the edge-case regression is on the added path.\n\n\n\n<br/>\n\n  ```diff\n  +    if client_sample_rate:\n+        try:\n+            normalized_data[\"sample_rate\"] = float(client_sample_rate)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=ad1f7afb-ea09-4c00-a454-153d02dd21c2&type=bug&issue=Zero+sample_rate+skipped+by+falsy+guard&path=src%2Fsentry%2Ftestutils%2Ffactories.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:36:45Z"
+          }
+        ]
       }
     ]
   },
@@ -23255,6 +23742,25 @@
             "created_at": "2026-04-07T19:28:44Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR103633__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR103633__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "pkg/services/authz/rbac/service_test.go",
+            "line": 982,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AssertionError: cache deny test contradicts setup\n\n\n  ###### Where:\n\n `pkg/services/authz/rbac/service_test.go`\n\n\n  ###### Description:\n\n test intent is misleading when cached permission map stores false instead of an allow entry\nNew test code in pkg/services/authz/rbac/service_test.go:981-982 is self-contradictory: the comment says the cache entry should 'Allow access to the dashboard', but the inserted fixture stores map[string]bool{\"dashboards:uid:dash1\": false}. The subtest only asserts denial at pkg/services/authz/rbac/service_test.go:993-994, so this is a direct test comment/setup mismatch in new lines, matching a Go test clarity/naming contradiction.\n\n\n\n<br/>\n\n  ```diff\n  +\t\t// Allow access to the dashboard to prove this is not checked\n+\t\ts.permCache.Set(ctx, userPermCacheKey(\"org-12\", \"test-uid\", \"dashboards:read\"), map[string]bool{\"dashboards:uid:dash1\": false})\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=18586d78-4174-4f4d-9b53-f9979087fec2&type=bug&issue=AssertionError%3A+cache+deny+test+contradicts+setup&path=pkg%2Fservices%2Fauthz%2Frbac%2Fservice_test.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:40:19Z"
+          },
+          {
+            "path": "pkg/services/authz/rbac/service.go",
+            "line": 135,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-SECURITY__ALERT-blue.svg) \n  ![License](https://img.shields.io/badge/Risk_Type-Auth__bypass:__stale__cached__grants__trusted-purple.svg) \n  ![License](https://img.shields.io/badge/Risk_Score-4.0-red.svg)\n\n\n\n  ###### Vulnerability Details: \n revoked access persists when a previously allowed permission remains cached\nNew code in pkg/services/authz/rbac/service.go:123-135 now returns success directly from cached permission maps, while pkg/services/authz/rbac/service.go:342-367 shows getCachedIdentityPermissions only checks permCache presence and does no freshness/revalidation. This asymmetric trust of cached grants is newly introduced in this PR and matches the documented auth_and_access pattern for stale cached grants remaining effective until TTL expiry.\n\n\n\n  ###### Where:\n `pkg/services/authz/rbac/service.go`\n\n\n\n<br/>\n\n  ```diff\n  +\tcachedPerms, err := s.getCachedIdentityPermissions(ctx, checkReq.Namespace, checkReq.IdentityType, checkReq.UserUID, checkReq.Action)\n+\tif err == nil {\n+\t\tallowed, err := s.checkPermission(ctx, cachedPerms, checkReq)\n+\t\tif allowed {\n+\t\t\treturn &authzv1.CheckResponse{Allowed: allowed}, nil\n  ```\n\n\n\n<br/>\n\n  ###### Security Justification:\n\n  revoked access persists when a previously allowed permission remains cached\n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=18586d78-4174-4f4d-9b53-f9979087fec2&type=security&vulnerability_name=Auth+bypass%3A+stale+cached+grants+trusted&path=pkg%2Fservices%2Fauthz%2Frbac%2Fservice.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:40:22Z"
+          }
+        ]
       }
     ]
   },
@@ -24449,6 +24955,25 @@
             "created_at": "2026-04-07T19:30:58Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "sentry__sentry__cloudaeye__PR67876__20260310",
+        "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR67876__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "src/sentry/integrations/github/integration.py",
+            "line": 504,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n KeyError: sender metadata access unchecked\n\n\n  ###### Where:\n\n `src/sentry/integrations/github/integration.py`\n\n\n  ###### Description:\n\n installation flow crashes when existing integration metadata lacks sender.login\nNew code in GitHubInstallation.dispatch dereferences integration.metadata[\"sender\"][\"login\"] with no key guard or exception handling (src/sentry/integrations/github/integration.py:500-505). Integration.metadata is a generic JSONField defaulting to {} and does not guarantee sender/login keys (src/sentry/models/integrations/integration.py:53-55), so this added access can raise KeyError for integrations lacking that metadata.\n\n\n\n<br/>\n\n  ```diff\n  +        if (\n+            pipeline.fetch_state(\"github_authenticated_user\")\n+            != integration.metadata[\"sender\"][\"login\"]\n+        ):\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=15c88600-c698-4436-b24e-176433e307fa&type=bug&issue=KeyError%3A+sender+metadata+access+unchecked&path=src%2Fsentry%2Fintegrations%2Fgithub%2Fintegration.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:36Z"
+          },
+          {
+            "path": "src/sentry/integrations/github/integration.py",
+            "line": 413,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-SECURITY__ALERT-blue.svg) \n  ![License](https://img.shields.io/badge/Risk_Type-CSRF/replay__risk:__predictable__OAuth__state-purple.svg) \n  ![License](https://img.shields.io/badge/Risk_Score-4.0-red.svg)\n\n\n\n  ###### Vulnerability Details: \n OAuth callback validation is bypassable when state is derived from reusable pipeline.signature\nOAuthLoginView.dispatch newly sends OAuth state as pipeline.signature and later accepts callbacks by equality against the same value (src/sentry/integrations/github/integration.py:401-413). Pipeline.signature is deterministically derived from pipeline view class names via md5_text(*pipe_ids), not randomized per request (src/sentry/pipeline/base.py:100-103), so the new OAuth state is predictable/reusable rather than a per-request CSRF token.\n\n\n\n  ###### Where:\n `src/sentry/integrations/github/integration.py`\n\n\n\n<br/>\n\n  ```diff\n  +        if not request.GET.get(\"state\"):\n+            state = pipeline.signature\n  ```\n\n\n\n<br/>\n\n  ###### Security Justification:\n\n  OAuth callback validation is bypassable when state is derived from reusable pipeline.signature\n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=15c88600-c698-4436-b24e-176433e307fa&type=security&vulnerability_name=CSRF%2Freplay+risk%3A+predictable+OAuth+state&path=src%2Fsentry%2Fintegrations%2Fgithub%2Fintegration.py\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T10:24:42Z"
+          }
+        ]
       }
     ]
   },
@@ -25447,6 +25972,19 @@
             "created_at": "2026-04-07T19:33:42Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "keycloak__keycloak__cloudaeye__PR32918__20260310",
+        "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR32918__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "testsuite/integration-arquillian/tests/base/src/test/java/org/keycloak/testsuite/organization/cache/OrganizationCacheTest.java",
+            "line": 425,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: cleanup uses wrong IDP alias\n\n\n  ###### Where:\n\n `testsuite/integration-arquillian/tests/base/src/test/java/org/keycloak/testsuite/organization/cache/OrganizationCacheTest.java`\n\n\n  ###### Description:\n\n cleanup leaks created providers when teardown removes non-existent alias\nNew test method `OrganizationCacheTest.testCacheIDPForLogin` creates providers with aliases `idp-alias-<i>` and `idp-alias-20` (`OrganizationCacheTest.java:374,419`) but registers cleanup against the unrelated literal `\"alias\"` on the added lines (`OrganizationCacheTest.java:381,425`). This identifier contradiction is directly visible in the diff and matches the confirmed logic-error pattern of using the wrong alias in cleanup. Upstream blast radius: JUnit invocation of `testCacheIDPForLogin`; downstream: `getCleanup().addCleanup(...)` and `testRealm().identityProviders().get(...).remove` in the test framework/admin API.\n\n\n\n<br/>\n\n  ```diff\n  +            getCleanup().addCleanup(testRealm().identityProviders().get(\"alias\")::remove);\n...\n+        getCleanup().addCleanup(testRealm().identityProviders().get(\"alias\")::remove);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=8a2c24f2-f463-4ded-afc1-45bd907f47bf&type=bug&issue=LogicError%3A+cleanup+uses+wrong+IDP+alias&path=testsuite%2Fintegration-arquillian%2Ftests%2Fbase%2Fsrc%2Ftest%2Fjava%2Forg%2Fkeycloak%2Ftestsuite%2Forganization%2Fcache%2FOrganizationCacheTest.java\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T16:26:07Z"
+          }
+        ]
       }
     ]
   },
@@ -26384,6 +26922,25 @@
             "created_at": "2026-04-07T19:28:24Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR94942__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR94942__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "pkg/expr/reader.go",
+            "line": 200,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: SQL expressions always disabled\n\n\n  ###### Where:\n\n `pkg/expr/reader.go`\n\n\n  ###### Description:\n\n SQL query parsing breaks when QueryTypeSQL is used\nValid new logic error in pkg/expr/reader.go:194-200: enableSqlExpressions returns false on every path. The new caller gate at pkg/expr/reader.go:129-132 rejects QueryTypeSQL whenever !enabled, so SQL expressions are now unconditionally disabled by this PR.\n\n\n\n<br/>\n\n  ```diff\n  +func enableSqlExpressions(h *ExpressionQueryReader) bool {\n+\tenabled := !h.features.IsEnabledGlobally(featuremgmt.FlagSqlExpressions)\n+\tif enabled {\n+\t\treturn false\n+\t}\n+\treturn false\n+}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=a93f3904-cc8b-4524-b5c3-11610e48e2ad&type=bug&issue=LogicError%3A+SQL+expressions+always+disabled&path=pkg%2Fexpr%2Freader.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:39:42Z"
+          },
+          {
+            "path": "pkg/expr/sql_command.go",
+            "line": 100,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: TypeError from incompatible in-memory DB API\n\n\n  ###### Where:\n\n `pkg/expr/sql_command.go`\n\n\n  ###### Description:\n\n SQL execution breaks when sql.NewInMemoryDB lacks duck-compatible QueryFramesInto behavior\nValid. New code in pkg/expr/sql_command.go:96-100 now instantiates `sql.NewInMemoryDB()` and immediately calls `QueryFramesInto`. That method was introduced in pkg/expr/sql/db.go:20-21 and unconditionally returns `errors.New(\"not implemented\")`, so this execution path now deterministically fails at runtime. Upstream blast radius: `(*SQLCommand).Execute` in pkg/expr/sql_command.go is affected because it depends on this DB call for query execution. Downstream blast radius: the changed call depends on `sql.NewInMemoryDB` (pkg/expr/sql/db.go:24-25) and `(*DB).QueryFramesInto` (pkg/expr/sql/db.go:20-21), plus `data.Frame` from the plugin SDK.\n\n\n\n<br/>\n\n  ```diff\n  +\tdb := sql.NewInMemoryDB()\n+\terr := db.QueryFramesInto(gr.refID, gr.query, allFrames, frame)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=a93f3904-cc8b-4524-b5c3-11610e48e2ad&type=bug&issue=Potential%3A+TypeError+from+incompatible+in-memory+DB+API&path=pkg%2Fexpr%2Fsql_command.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:39:45Z"
+          }
+        ]
       }
     ]
   },
@@ -27221,6 +27778,25 @@
             "created_at": "2026-04-07T19:27:53Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR90939__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR90939__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "pkg/api/webassets/webassets.go",
+            "line": 49,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: incomplete double-checked locking\n\n\n  ###### Where:\n\n `pkg/api/webassets/webassets.go`\n\n\n  ###### Description:\n\n duplicate fetches run when concurrent callers observe empty cache before lock acquisition\nValid new concurrency issue in pkg/api/webassets/webassets.go:GetWebAssets. The PR adds an RLock fast path at lines 41-46 and then acquires the write lock at 48-49, but does not re-check entryPointAssetsCache after Lock. A second goroutine can observe nil before locking, wait, then redundantly rebuild assets after the first goroutine already populated the cache. This incomplete double-checked locking pattern is introduced by the new mutex logic in this PR; it was not present before.\n\n\n\n<br/>\n\n  ```diff\n  + \tentryPointAssetsCacheMu.Lock()\n+ \tdefer entryPointAssetsCacheMu.Unlock()\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=80713d16-52bc-4f83-a408-d7b6aff2607e&type=bug&issue=Race+condition%3A+incomplete+double-checked+locking&path=pkg%2Fapi%2Fwebassets%2Fwebassets.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:39:48Z"
+          },
+          {
+            "path": null,
+            "line": null,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Logic error: nil cache overwrites valid entry\n\n\n  ###### Where:\n\n `pkg/api/webassets/webassets.go`\n\n\n  ###### Description:\n\n cached assets are lost when asset loading fails after a previous successful population\nValid new logic error in pkg/api/webassets/webassets.go:GetWebAssets. After the new fast-path cache check at lines 41-46, the function always assigns entryPointAssetsCache = result at line 70. On loader failure, result can be nil because readWebAssetsFromFile and readWebAssetsFromCDN return nil on error (lines 79-88, 90-106). That means a call that misses the pre-lock fast path can overwrite a previously valid global cache entry with nil, losing cached assets after a transient load error. This overwrite behavior is introduced by the new cache/mutex flow in this PR.\n\n\n\n<br/>\n\n  ```diff\n  + \tentryPointAssetsCacheMu.Lock()\n+ \tdefer entryPointAssetsCacheMu.Unlock()\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=80713d16-52bc-4f83-a408-d7b6aff2607e&type=bug&issue=Logic+error%3A+nil+cache+overwrites+valid+entry&path=pkg%2Fapi%2Fwebassets%2Fwebassets.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:39:54Z"
+          }
+        ]
       }
     ]
   },
@@ -28348,6 +28924,19 @@
             "created_at": "2026-04-07T19:29:11Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR80329__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR80329__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "pkg/services/annotations/annotationsimpl/xorm_store.go",
+            "line": 537,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n SQL logic error floods error logs\n\n\n  ###### Where:\n\n `pkg/services/annotations/annotationsimpl/xorm_store.go`\n\n\n  ###### Description:\n\n error monitoring breaks when routine cleanup logs every batch as Error\nNew code in xorm_store.go logs routine batch progress with r.log.Error inside CleanAnnotations after fetchIDs has already returned successfully; the same pattern appears on both the pre-delete and post-delete progress paths (pkg/services/annotations/annotationsimpl/xorm_store.go:530-538, 550-557). These are normal control-flow messages, not failure-only events, so using Error is a valid code_clarity issue under the Go rule for informational/debug events logged as errors.\n\n\n\n<br/>\n\n  ```diff\n  +\t\t\tr.log.Error(\"Annotations to clean by time\", \"count\", len(ids), \"ids\", ids, \"cond\", cond, \"err\", err)\n+\t\t\tr.log.Error(\"cleaned annotations by time\", \"count\", len(ids), \"affected\", x, \"err\", y)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=2a0c9aa4-b530-425c-9421-77d2023e6d71&type=bug&issue=SQL+logic+error+floods+error+logs&path=pkg%2Fservices%2Fannotations%2Fannotationsimpl%2Fxorm_store.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:44:52Z"
+          }
+        ]
       }
     ]
   },
@@ -29879,6 +30468,37 @@
             "created_at": "2026-04-07T19:28:57Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR90045__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR90045__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "pkg/apiserver/rest/dualwriter_mode3.go",
+            "line": 130,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: wrong metrics recorder on Update failure\n\n\n  ###### Where:\n\n `pkg/apiserver/rest/dualwriter_mode3.go`\n\n\n  ###### Description:\n\n storage failure metrics break when Update errors because legacy recorder is incremented\nNew code in `pkg/apiserver/rest/dualwriter_mode3.go:125-132` is timing the Storage.Update call, but on the error path it calls `d.recordLegacyDuration(...)` instead of `d.recordStorageDuration(...)`. The surrounding method records storage success with `recordStorageDuration` and only the async legacy write uses `recordLegacyDuration`, so this is a direct wrong-recorder logic bug introduced by the `+` line.\n\n\n\n<br/>\n\n  ```diff\n  +\t\td.recordLegacyDuration(true, mode3Str, options.Kind, method, startStorage)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=79be62bf-0063-440d-a737-7a757e6da7a3&type=bug&issue=LogicError%3A+wrong+metrics+recorder+on+Update+failure&path=pkg%2Fapiserver%2Frest%2Fdualwriter_mode3.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:45:39Z"
+          },
+          {
+            "path": "pkg/apiserver/rest/dualwriter_mode3.go",
+            "line": 97,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: context loses log fields in Delete\n\n\n  ###### Where:\n\n `pkg/apiserver/rest/dualwriter_mode3.go`\n\n\n  ###### Description:\n\n structured logging breaks when Delete runs because name/kind/method values are dropped from context\nNew code in `pkg/apiserver/rest/dualwriter_mode3.go:96-97` builds an enriched logger `log := d.Log.WithValues(\"name\", name, \"kind\", options.Kind, \"method\", method)` but then stores plain `d.Log` into context via `klog.NewContext(ctx, d.Log)`. This drops the added fields from context-derived logging. Other new methods in the same file attach the enriched `log` to context (`Create` at lines 37-39, `Get` at 64-66), confirming the intended pattern and making this a direct newly introduced logic error.\n\n\n\n<br/>\n\n  ```diff\n  +\tctx = klog.NewContext(ctx, d.Log)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=79be62bf-0063-440d-a737-7a757e6da7a3&type=bug&issue=LogicError%3A+context+loses+log+fields+in+Delete&path=pkg%2Fapiserver%2Frest%2Fdualwriter_mode3.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:45:41Z"
+          },
+          {
+            "path": "pkg/apiserver/rest/dualwriter_mode3.go",
+            "line": 106,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: wrong metrics label in Delete success\n\n\n  ###### Where:\n\n `pkg/apiserver/rest/dualwriter_mode3.go`\n\n\n  ###### Description:\n\n storage metrics attribution breaks when Delete succeeds because object name is recorded as kind\nConfirmed new logic error in pkg/apiserver/rest/dualwriter_mode3.go:106: Delete success records storage duration with `name` as the third argument (`d.recordStorageDuration(false, mode3Str, name, method, startStorage)`), while the same method's error path uses `options.Kind` at lines 103 and sibling methods Get/List/Update/DeleteCollection also consistently pass `options.Kind` at lines 73, 89, 132, 159. This is a direct wrong-field metrics label regression in the added line.\n\n\n\n<br/>\n\n  ```diff\n  +\td.recordStorageDuration(false, mode3Str, name, method, startStorage)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=79be62bf-0063-440d-a737-7a757e6da7a3&type=bug&issue=LogicError%3A+wrong+metrics+label+in+Delete+success&path=pkg%2Fapiserver%2Frest%2Fdualwriter_mode3.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:45:48Z"
+          },
+          {
+            "path": "pkg/apiserver/rest/dualwriter_mode3.go",
+            "line": 166,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: wrong metrics recorder in DeleteCollection goroutine\n\n\n  ###### Where:\n\n `pkg/apiserver/rest/dualwriter_mode3.go`\n\n\n  ###### Description:\n\n legacy latency/error metrics break when DeleteCollection legacy call finishes because storage recorder is used\nValid new logic error. In pkg/apiserver/rest/dualwriter_mode3.go:161-166, the async legacy DeleteCollection path calls d.Legacy.DeleteCollection(...) but records its result with d.recordStorageDuration(...). Sibling legacy async paths in the same new code use d.recordLegacyDuration at lines 56, 113, and 140, so the recorder choice at line 166 is structurally wrong. This incorrect call is introduced on + lines and was not present before.\n\n\n\n<br/>\n\n  ```diff\n  +\t\td.recordStorageDuration(err != nil, mode3Str, options.Kind, method, startLegacy)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=79be62bf-0063-440d-a737-7a757e6da7a3&type=bug&issue=LogicError%3A+wrong+metrics+recorder+in+DeleteCollection+goroutine&path=pkg%2Fapiserver%2Frest%2Fdualwriter_mode3.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:45:54Z"
+          }
+        ]
       }
     ]
   },
@@ -31076,6 +31696,25 @@
             "created_at": "2026-04-07T19:31:17Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR106778__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR106778__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "public/app/features/alerting/unified/hooks/useAbilities.ts",
+            "line": 370,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n ReferenceError: ctx is undefined\n\n\n  ###### Where:\n\n `public/app/features/alerting/unified/hooks/useAbilities.ts`\n\n\n  ###### Description:\n\n editability checks crash when useIsGrafanaPromRuleEditable runs for a GrafanaPromRuleDTO\nNew code in public/app/features/alerting/unified/hooks/useAbilities.ts:349-377 introduces useIsGrafanaPromRuleEditable(), and the added lines at 369-370 reference `ctx` with no visible binding in that function or surrounding changed code. This is a direct structural bug in the new implementation, not pre-existing. Reachability is confirmed because useAllGrafanaPromRuleAbilities() calls useIsGrafanaPromRuleEditable() at useAbilities.ts:287-289, and both new wrappers useGrafanaPromRuleAbility()/useGrafanaPromRuleAbilities() route through that path at 386-405. New PR call sites in AlertRuleMenu.tsx:73-79 and RuleActionsButtons.V2.tsx:56-59 invoke those wrappers, so the bad identifier is on a live path.\n\n\n\n<br/>\n\n  ```diff\n  +    const canEditGrafanaRules = ctx.hasPermissionInMetadata(rulesPermissions.update, folder);\n+    const canRemoveGrafanaRules = ctx.hasPermissionInMetadata(rulesPermissions.delete, folder);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=4616e479-82bf-4141-a683-5eb40631d837&type=bug&issue=ReferenceError%3A+ctx+is+undefined&path=public%2Fapp%2Ffeatures%2Falerting%2Funified%2Fhooks%2FuseAbilities.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:33:31Z"
+          },
+          {
+            "path": "public/app/features/alerting/unified/rule-list/FilterView.tsx",
+            "line": 161,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: React key omission causes stale row reuse\n\n\n  ###### Where:\n\n `public/app/features/alerting/unified/rule-list/FilterView.tsx`\n\n\n  ###### Description:\n\n row state can mismatch when list order changes because grafana items no longer have explicit keys\nConfirmed directly from new JSX in `rules.map(...)`: the `grafana` branch now returns `<GrafanaRuleListItem ... />` without a `key`, while sibling branches in the same list still provide `key={key}` (FilterView.tsx:149-166). This matches the allowed logic_error pattern 'JSX list render via .map() in + lines where rendered elements are missing the key prop'. The omission is newly introduced by replacing `GrafanaRuleLoader key={key}` with `GrafanaRuleListItem` lacking a key (FilterView.tsx diff at 154-161).\n\n\n\n<br/>\n\n  ```diff\n  +                <GrafanaRuleListItem\n+                  rule={rule}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=4616e479-82bf-4141-a683-5eb40631d837&type=bug&issue=Potential%3A+React+key+omission+causes+stale+row+reuse&path=public%2Fapp%2Ffeatures%2Falerting%2Funified%2Frule-list%2FFilterView.tsx\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:33:33Z"
+          }
+        ]
       }
     ]
   },
@@ -31975,6 +32614,19 @@
             "created_at": "2026-04-07T19:33:42Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR107534__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR107534__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": null,
+            "line": null,
+            "body": "No issues found",
+            "created_at": "2026-04-30T08:50:21Z"
+          }
+        ]
       }
     ]
   },
@@ -33190,6 +33842,43 @@
             "created_at": "2026-04-07T19:29:14Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR79265__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR79265__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "pkg/services/anonymous/anonimpl/anonstore/database.go",
+            "line": 117,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: device limit check overshoots\n\n\n  ###### Where:\n\n `pkg/services/anonymous/anonimpl/anonstore/database.go`\n\n\n  ###### Description:\n\n limit enforcement breaks when concurrent requests pass the count check before inserting\nNew code in anonstore/database.go:CreateOrUpdateDevice performs a separate CountDevices check before proceeding, then only calls updateDevice when count >= deviceLimit, with no transaction or lock visible around the count-and-insert decision (database.go:108-117). This is the Go TOCTOU pattern called out by the validation rules: concurrent requests can all observe count < limit before any write completes. The branch is newly introduced in this PR, so not pre-existing. Upstream impact is confirmed because impl.go:144-147 now propagates TagDevice errors and client.go:44-46 treats ErrDeviceLimitReached specially during Authenticate.\n\n\n\n<br/>\n\n  ```diff\n  +\t\tcount, err := s.CountDevices(ctx, time.Now().UTC().Add(-anonymousDeviceExpiration), time.Now().UTC().Add(time.Minute))\n+\t\tif count >= s.deviceLimit {\n+\t\t\treturn s.updateDevice(ctx, device)\n+\t\t}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1926237f-3111-46d9-980a-def419e69678&type=bug&issue=Race+condition%3A+device+limit+check+overshoots&path=pkg%2Fservices%2Fanonymous%2Fanonimpl%2Fanonstore%2Fdatabase.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:00Z"
+          },
+          {
+            "path": "pkg/services/anonymous/anonimpl/anonstore/database.go",
+            "line": 81,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: inconsistent time window basis\n\n\n  ###### Where:\n\n `pkg/services/anonymous/anonimpl/anonstore/database.go`\n\n\n  ###### Description:\n\n existing devices stop updating when device.UpdatedAt differs from current time\nNew updateDevice code builds its BETWEEN window from device.UpdatedAt (database.go:80-81), while the device-limit path in CreateOrUpdateDevice counts active devices using time.Now().UTC() (database.go:110). That is a directly visible time-window inconsistency in the same feature flow: an existing device can be considered active for the limit check yet fail the update WHERE clause if its stored/request UpdatedAt lags wall clock. The problematic bounds are newly added in this PR, not unchanged code.\n\n\n\n<br/>\n\n  ```diff\n  +\targs := []interface{}{device.ClientIP, device.UserAgent, device.UpdatedAt.UTC(), device.DeviceID,\n+\t\tdevice.UpdatedAt.UTC().Add(-anonymousDeviceExpiration), device.UpdatedAt.UTC().Add(time.Minute),\n+\t}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1926237f-3111-46d9-980a-def419e69678&type=bug&issue=LogicError%3A+inconsistent+time+window+basis&path=pkg%2Fservices%2Fanonymous%2Fanonimpl%2Fanonstore%2Fdatabase.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:05Z"
+          },
+          {
+            "path": "pkg/services/anonymous/anonimpl/anonstore/database.go",
+            "line": 18,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: anonymous auth can fail on limit error\n\n\n  ###### Where:\n\n `pkg/services/anonymous/anonimpl/anonstore/database.go`\n\n\n  ###### Description:\n\n anonymous authentication breaks when callers propagate ErrDeviceLimitReached from device tagging\nNew PR code introduces `anonstore.ErrDeviceLimitReached` in `pkg/services/anonymous/anonimpl/anonstore/database.go:18`, returns it on the device-limit path via `CreateOrUpdateDevice` -> `updateDevice` (`database.go:115-116`, `95-97`), and new caller handling in `pkg/services/anonymous/anonimpl/client.go:44-46` now propagates that specific error as a hard failure instead of only warning like other tagging errors (`client.go:49`). This makes anonymous auth fail on this new sentinel; not pre-existing and rooted in added lines.\n\n\n\n<br/>\n\n  ```diff\n  +var ErrDeviceLimitReached = fmt.Errorf(\"device limit reached\")\n+\n+\t\tif count >= s.deviceLimit {\n+\t\t\treturn s.updateDevice(ctx, device)\n+\t\t}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1926237f-3111-46d9-980a-def419e69678&type=bug&issue=LogicError%3A+anonymous+auth+can+fail+on+limit+error&path=pkg%2Fservices%2Fanonymous%2Fanonimpl%2Fanonstore%2Fdatabase.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:12Z"
+          },
+          {
+            "path": "pkg/services/anonymous/anonimpl/anonstore/database.go",
+            "line": 97,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n MisleadingError: no rows updated returns limit reached\n\n\n  ###### Where:\n\n `pkg/services/anonymous/anonimpl/anonstore/database.go`\n\n\n  ###### Description:\n\n error diagnosis misleads when device is missing or expired\nIn `pkg/services/anonymous/anonimpl/anonstore/database.go:updateDevice`, the new code returns `ErrDeviceLimitReached` whenever `RowsAffected()==0` (`database.go:95-97`). But the SQL predicate updates only matching rows within a time window (`database.go:74-81`), so zero rows can also mean the device row does not exist or is outside the allowed update window, not strictly that the device limit was reached. This is a direct visible mismatch between condition and error name/message, matching the Go code_clarity rule for misleading errors.\n\n\n\n<br/>\n\n  ```diff\n  +\t\tif rowsAffected == 0 {\n+\t\t\treturn ErrDeviceLimitReached\n+\t\t}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1926237f-3111-46d9-980a-def419e69678&type=bug&issue=MisleadingError%3A+no+rows+updated+returns+limit+reached&path=pkg%2Fservices%2Fanonymous%2Fanonimpl%2Fanonstore%2Fdatabase.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:17Z"
+          },
+          {
+            "path": "pkg/services/anonymous/anonimpl/client.go",
+            "line": 46,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: anonymous auth fails on device limit\n\n\n  ###### Where:\n\n `pkg/services/anonymous/anonimpl/client.go`\n\n\n  ###### Description:\n\n anonymous login breaks when TagDevice returns ErrDeviceLimitReached\nNew behavior in pkg/services/anonymous/anonimpl/client.go:44-46 makes Authenticate return an error when TagDevice yields anonstore.ErrDeviceLimitReached. Before this PR, the same tagging call was best-effort in a goroutine and only logged failures (client.go prior hunk at 44-53 in diff). The new error is now reachable because pkg/services/anonymous/anonimpl/anonstore/database.go:115-116 calls updateDevice when the limit is reached, and updateDevice returns ErrDeviceLimitReached on rowsAffected==0 at database.go:95-96. This is a direct logic regression in the added lines.\n\n\n\n<br/>\n\n  ```diff\n  +\tif err := a.anonDeviceService.TagDevice(ctx, httpReqCopy, anonymous.AnonDeviceUI); err != nil {\n+\t\tif errors.Is(err, anonstore.ErrDeviceLimitReached) {\n+\t\t\treturn nil, err\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1926237f-3111-46d9-980a-def419e69678&type=bug&issue=LogicError%3A+anonymous+auth+fails+on+device+limit&path=pkg%2Fservices%2Fanonymous%2Fanonimpl%2Fclient.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:24Z"
+          }
+        ]
       }
     ]
   },
@@ -34117,6 +34806,31 @@
             "created_at": "2026-04-07T19:29:37Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR9__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR9__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "config/initializers/i18n.rb",
+            "line": 17,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: NoMethodError from missing SiteSetting.default_locale\n\n\n  ###### Where:\n\n `config/initializers/i18n.rb`\n\n\n  ###### Description:\n\n boot or translation lookup breaks when SiteSetting.default_locale is nil\nReal issue. config/initializers/i18n.rb:17 unconditionally calls `SiteSetting.default_locale.to_sym`; if `default_locale` is nil, Ruby raises `NoMethodError` before `.compact` can help. This path is newly introduced in `FallbackLocaleList#[]`, which is reached by `ensure_loaded!` at config/initializers/i18n.rb:21 and invoked from app/controllers/application_controller.rb:159. No evidence in the investigated record shows a non-nil guard or initialization guarantee for `SiteSetting.default_locale`, so the nil-deref remains plausible but one upstream guarantee hop is untraced.\n\n\n\n<br/>\n\n  ```diff\n  +    [locale, SiteSetting.default_locale.to_sym, :en].uniq.compact\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=ec98414e-ccb5-4a4e-921e-d269dd14d54c&type=bug&issue=Potential%3A+NoMethodError+from+missing+SiteSetting.default_locale&path=config%2Finitializers%2Fi18n.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:11:19Z"
+          },
+          {
+            "path": "lib/freedom_patches/translate_accelerator.rb",
+            "line": 64,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: race condition in lazy locale loading\n\n\n  ###### Where:\n\n `lib/freedom_patches/translate_accelerator.rb`\n\n\n  ###### Description:\n\n locale cache can double-load or corrupt when multiple threads call concurrently\nReal issue. New code introduces unsynchronized lazy initialization and check-then-act access to shared state: `@loaded_locales ||= []` and `load_locale locale unless @loaded_locales.include?(locale)` at lib/freedom_patches/translate_accelerator.rb:62-64. `load_locale` mutates the same array with `@loaded_locales << locale` at lines 50-58, so concurrent callers can both observe locale as unloaded and both execute the load path. This new path is now invoked from request handling via `ApplicationController#set_locale` calling `I18n.fallbacks.ensure_loaded!` at app/controllers/application_controller.rb:151-159, which in turn calls `I18n.ensure_loaded!` for each fallback at config/initializers/i18n.rb:20-21. One hop remains open on whether deeper internals serialize `load_locale`, so confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +    def ensure_loaded!(locale)\n+      @loaded_locales ||= []\n+      load_locale locale unless @loaded_locales.include?(locale)\n+    end\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=ec98414e-ccb5-4a4e-921e-d269dd14d54c&type=bug&issue=Potential%3A+race+condition+in+lazy+locale+loading&path=lib%2Ffreedom_patches%2Ftranslate_accelerator.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:11:22Z"
+          },
+          {
+            "path": "lib/freedom_patches/translate_accelerator.rb",
+            "line": 64,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: logic error from String/Symbol locale mismatch\n\n\n  ###### Where:\n\n `lib/freedom_patches/translate_accelerator.rb`\n\n\n  ###### Description:\n\n same locale loads twice when callers mix string and symbol forms\nNew code introduces raw locale-key caching in `ensure_loaded!` at lib/freedom_patches/translate_accelerator.rb:62-64 using `@loaded_locales.include?(locale)` with no normalization. New caller path `ApplicationController#set_locale` now invokes `I18n.fallbacks.ensure_loaded!` at app/controllers/application_controller.rb:159, and `FallbackLocaleList#ensure_loaded!` forwards `I18n.locale` plus symbol fallbacks from `SiteSetting.default_locale.to_sym` and `:en` at config/initializers/i18n.rb:17,20-21. Because Ruby treats String and Symbol as distinct keys, the same locale can be considered unloaded twice when mixed forms reach this cache. This is new in the PR. Confidence capped at Medium because the final storage behavior inside `load_locale` was not directly traced.\n\n\n\n<br/>\n\n  ```diff\n  +    def ensure_loaded!(locale)\n+      @loaded_locales ||= []\n+      load_locale locale unless @loaded_locales.include?(locale)\n+    end\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=ec98414e-ccb5-4a4e-921e-d269dd14d54c&type=bug&issue=Potential%3A+logic+error+from+String%2FSymbol+locale+mismatch&path=lib%2Ffreedom_patches%2Ftranslate_accelerator.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:11:25Z"
+          }
+        ]
       }
     ]
   },
@@ -35031,6 +35745,67 @@
             "created_at": "2026-04-07T19:27:58Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "grafana__grafana__cloudaeye__PR76186__20260310",
+        "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR76186__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "pkg/plugins/log/fake.go",
+            "line": 48,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: LogicError loses captured logs\n\n\n  ###### Where:\n\n `pkg/plugins/log/fake.go`\n\n\n  ###### Description:\n\n test assertions break when code logs via FromContext logger\nNew code in pkg/plugins/log/fake.go:46-48 makes TestLogger.FromContext return NewTestLogger(), i.e. a fresh logger instance rather than preserving the receiver's captured state. That is a concrete logic change visible in the diff itself. The new interface method is exercised by production logging at pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go:58 via m.logger.FromContext(ctx).Info(...), so any test using TestLogger through this path would observe logs on the new instance instead of the original receiver.\n\n\n\n<br/>\n\n  ```diff\n  +func (f *TestLogger) FromContext(_ context.Context) Logger {\n+\treturn NewTestLogger()\n+}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=Potential%3A+LogicError+loses+captured+logs&path=pkg%2Fplugins%2Flog%2Ffake.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:45:45Z"
+          },
+          {
+            "path": "pkg/plugins/log/logger.go",
+            "line": 55,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: TypeError fallback drops contextual logger\n\n\n  ###### Where:\n\n `pkg/plugins/log/logger.go`\n\n\n  ###### Description:\n\n contextual fields break when FromContext returns a non-ConcreteLogger implementation\nNew code in pkg/plugins/log/logger.go:48-55 falls back to d.New() when d.l.FromContext(ctx) is not *log.ConcreteLogger. That loses the logger returned by FromContext and therefore drops any context-derived attributes instead of preserving them. The effect is observable by downstream use in pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go:58,68,81,95,109, which logs via m.logger.FromContext(ctx). The interface contract was newly added in pkg/plugins/log/ifaces.go:22-23, so this is new behavior, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +\tconcreteInfraLogger, ok := d.l.FromContext(ctx).(*log.ConcreteLogger)\n+\tif !ok {\n+\t\treturn d.New()\n+\t}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=Potential%3A+TypeError+fallback+drops+contextual+logger&path=pkg%2Fplugins%2Flog%2Flogger.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:45:51Z"
+          },
+          {
+            "path": "pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go",
+            "line": 55,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NilPointer panic on nil request\n\n\n  ###### Where:\n\n `pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go`\n\n\n  ###### Description:\n\n plugin calls panic when middleware receives a nil request\nNew middleware methods dereference req.PluginContext without checking req for nil in pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go:39-56. QueryData/CallResource/CheckHealth/CollectMetrics all do req.PluginContext before calling next, so a nil request panics in this new code path. The middleware is newly wired into the client chain in pkg/services/pluginsintegration/pluginsintegration.go:159-160, expanding impact to plugin client requests.\n\n\n\n<br/>\n\n  ```diff\n  +func (m *ContextualLoggerMiddleware) QueryData(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) {\n+\tctx = instrumentContext(ctx, endpointQueryData, req.PluginContext)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=NilPointer+panic+on+nil+request&path=pkg%2Fservices%2Fpluginsintegration%2Fclientmiddleware%2Fcontextual_logger_middleware.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:45:57Z"
+          },
+          {
+            "path": "pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go",
+            "line": 46,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NilPointer panic on nil CallResourceRequest\n\n\n  ###### Where:\n\n `pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go`\n\n\n  ###### Description:\n\n resource calls panic when middleware receives a nil request\nNew code in pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go:44-46 dereferences req.PluginContext before any nil check. If req is nil, CallResource panics immediately in this middleware. The file is entirely new in this PR, and the middleware is newly added to the production chain in pkg/services/pluginsintegration/pluginsintegration.go:159-160.\n\n\n\n<br/>\n\n  ```diff\n  +func (m *ContextualLoggerMiddleware) CallResource(ctx context.Context, req *backend.CallResourceRequest, sender backend.CallResourceResponseSender) error {\n+\tctx = instrumentContext(ctx, endpointCallResource, req.PluginContext)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=NilPointer+panic+on+nil+CallResourceRequest&path=pkg%2Fservices%2Fpluginsintegration%2Fclientmiddleware%2Fcontextual_logger_middleware.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:03Z"
+          },
+          {
+            "path": "pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go",
+            "line": 51,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NilPointer panic on nil CheckHealthRequest\n\n\n  ###### Where:\n\n `pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go`\n\n\n  ###### Description:\n\n health checks panic when middleware receives a nil request\nNew code in pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go:49-51 dereferences req.PluginContext before any nil check. If req is nil, CheckHealth panics in the middleware before delegating. This is newly introduced behavior in a new file, and the middleware is wired into the main middleware stack at pkg/services/pluginsintegration/pluginsintegration.go:159-160.\n\n\n\n<br/>\n\n  ```diff\n  +func (m *ContextualLoggerMiddleware) CheckHealth(ctx context.Context, req *backend.CheckHealthRequest) (*backend.CheckHealthResult, error) {\n+\tctx = instrumentContext(ctx, endpointCheckHealth, req.PluginContext)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=NilPointer+panic+on+nil+CheckHealthRequest&path=pkg%2Fservices%2Fpluginsintegration%2Fclientmiddleware%2Fcontextual_logger_middleware.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:09Z"
+          },
+          {
+            "path": "pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go",
+            "line": 56,
+            "body": "Review #6\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NilPointer panic on nil CollectMetricsRequest\n\n\n  ###### Where:\n\n `pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go`\n\n\n  ###### Description:\n\n metrics collection panics when middleware receives a nil request\nNew code in pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go:54-56 dereferences req.PluginContext with no nil guard before calling m.next.CollectMetrics. This middleware was newly inserted into the chain in pkg/services/pluginsintegration/pluginsintegration.go:159-160, so the panic path is introduced by this PR rather than pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +func (m *ContextualLoggerMiddleware) CollectMetrics(ctx context.Context, req *backend.CollectMetricsRequest) (*backend.CollectMetricsResult, error) {\n+\tctx = instrumentContext(ctx, endpointCollectMetrics, req.PluginContext)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=NilPointer+panic+on+nil+CollectMetricsRequest&path=pkg%2Fservices%2Fpluginsintegration%2Fclientmiddleware%2Fcontextual_logger_middleware.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:14Z"
+          },
+          {
+            "path": "pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go",
+            "line": 58,
+            "body": "Review #7\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: traceID logging removed\n\n\n  ###### Where:\n\n `pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go`\n\n\n  ###### Description:\n\n request correlation breaks when debugging plugin calls across traced services\nThe PR removed explicit trace extraction from logger_middleware.go: prior code appended tracing.TraceIDFromContext(ctx, false) into logParams, while new code at pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go:58 only does m.logger.FromContext(ctx).Info(...). The replacement context enrichment added in pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go:27-37 restores pluginId/endpoint/ds/user fields but does not add traceID. pluginsintegration.go:159-160 inserts that middleware before LoggerMiddleware, so plugin metadata is recovered through context, but traceID is not. One downstream hop remains open\u2014whether the underlying contextual logger auto-emits trace IDs\u2014so confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +\tm.logger.FromContext(ctx).Info(\"Plugin Request Completed\", logParams...)\n-\ttraceID := tracing.TraceIDFromContext(ctx, false)\n-\tif traceID != \"\" {\n-\t\tlogParams = append(logParams, \"traceID\", traceID)\n-\t}\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=TypeError%3A+traceID+logging+removed&path=pkg%2Fservices%2Fpluginsintegration%2Fclientmiddleware%2Flogger_middleware.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:21Z"
+          },
+          {
+            "path": "pkg/services/pluginsintegration/pluginsintegration.go",
+            "line": 160,
+            "body": "Review #8\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: NilPointer panic in ContextualLoggerMiddleware\n\n\n  ###### Where:\n\n `pkg/services/pluginsintegration/pluginsintegration.go`\n\n\n  ###### Description:\n\n plugin requests panic when a nil request reaches middleware\nNew middleware is inserted in pkg/services/pluginsintegration/pluginsintegration.go:159-160, and the newly added ContextualLoggerMiddleware dereferences req.PluginContext without a nil check in QueryData/CallResource/CheckHealth/CollectMetrics at pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go:39-56. This is new code and matches the Go nil-request middleware golden case.\n\n\n\n<br/>\n\n  ```diff\n  +\t\tclientmiddleware.NewContextualLoggerMiddleware(),\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=Potential%3A+NilPointer+panic+in+ContextualLoggerMiddleware&path=pkg%2Fservices%2Fpluginsintegration%2Fpluginsintegration.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:27Z"
+          },
+          {
+            "path": "pkg/services/pluginsintegration/pluginsintegration.go",
+            "line": 160,
+            "body": "Review #9\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: traceID logging regression in plugin logs\n\n\n  ###### Where:\n\n `pkg/services/pluginsintegration/pluginsintegration.go`\n\n\n  ###### Description:\n\n request correlation breaks when contextual logger omits traceID fields\nThis PR changes LoggerMiddleware from explicitly appending traceID to logParams to using m.logger.FromContext(ctx).Info(...) in pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go:49-58, while the new context enrichment added by instrumentContext only injects endpoint/pluginId/dsName/dsUID/uname and no traceID at pkg/services/pluginsintegration/clientmiddleware/contextual_logger_middleware.go:26-37. CreateMiddlewares now wires NewContextualLoggerMiddleware before NewLoggerMiddleware at pkg/services/pluginsintegration/pluginsintegration.go:159-160, so the prior explicit traceID logging was removed by new code. Confidence is Medium because the remaining open hop is whether pkg/plugins/log/logger.go:48-56 / underlying infra log automatically restores trace metadata from context.\n\n\n\n<br/>\n\n  ```diff\n  +\t\tclientmiddleware.NewContextualLoggerMiddleware(),\nclientmiddleware.NewLoggerMiddleware(cfg, log.New(\"plugin.instrumentation\")),\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7ad8821c-415c-4908-8696-5c1091ae33fb&type=bug&issue=Potential%3A+traceID+logging+regression+in+plugin+logs&path=pkg%2Fservices%2Fpluginsintegration%2Fpluginsintegration.go\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-20T15:46:30Z"
+          }
+        ]
       }
     ]
   },
@@ -36703,6 +37478,31 @@
             "created_at": "2026-04-07T19:31:16Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR10__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR10__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "app/models/embeddable_host.rb",
+            "line": 8,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NoMethodError: before_validation dereferences nil host\n\n\n  ###### Where:\n\n `app/models/embeddable_host.rb`\n\n\n  ###### Description:\n\n validation crashes when host is nil because sub! is called unconditionally\nReal issue. New code in app/models/embeddable_host.rb:5-8 adds a before_validation callback that unconditionally calls self.host.sub! twice. If host is nil, this raises NoMethodError during save/validation before validates_format_of can run. This is directly visible in the added lines and matches the Ruby model callback nil-dereference pattern.\n\n\n\n<br/>\n\n  ```diff\n  +  before_validation do\n+    self.host.sub!(/^https?:\\/\\//, '')\n+    self.host.sub!(/\\/.*$/, '')\n+  end\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=39c73126-d4e3-49fb-a041-8bf8ebf386c0&type=bug&issue=NoMethodError%3A+before_validation+dereferences+nil+host&path=app%2Fmodels%2Fembeddable_host.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:34Z"
+          },
+          {
+            "path": "app/controllers/admin/embeddable_hosts_controller.rb",
+            "line": 16,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NoMethodError on missing EmbeddableHost in update/destroy\n\n\n  ###### Where:\n\n `app/controllers/admin/embeddable_hosts_controller.rb`\n\n\n  ###### Description:\n\n request crashes when params[:id] does not match any record\nNew code introduces nil-unsafe record lookup in Admin::EmbeddableHostsController#update/#destroy: `EmbeddableHost.where(id: params[:id]).first` can return nil, then `save_host(host)` dereferences `host` at app/controllers/admin/embeddable_hosts_controller.rb:22-25 and `host.destroy` is called directly at :16. No nil guard is present. Routes newly expose these actions via config/routes.rb:153, so this is not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +    host = EmbeddableHost.where(id: params[:id]).first\n+    save_host(host)\n...\n+    host = EmbeddableHost.where(id: params[:id]).first\n+    host.destroy\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=39c73126-d4e3-49fb-a041-8bf8ebf386c0&type=bug&issue=NoMethodError+on+missing+EmbeddableHost+in+update%2Fdestroy&path=app%2Fcontrollers%2Fadmin%2Fembeddable_hosts_controller.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:39Z"
+          },
+          {
+            "path": "app/assets/javascripts/admin/components/embeddable-host.js.es6",
+            "line": 47,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: Unhandled rejection from destroyRecord promise\n\n\n  ###### Where:\n\n `app/assets/javascripts/admin/components/embeddable-host.js.es6`\n\n\n  ###### Description:\n\n delete flow silently fails when backend destroy request rejects\nNew code in app/assets/javascripts/admin/components/embeddable-host.js.es6:43-49 calls host.destroyRecord().then(...) with no rejection handler, unlike the sibling save path at :37-40 which explicitly .catch(popupAjaxError). The only upstream callers visible are the delete button in app/assets/javascripts/admin/templates/components/embeddable-host.hbs:17 and row rendering in app/assets/javascripts/admin/templates/embedding.hbs:8-9; no caller-side guard handles destroy failures. Because one remaining hop to framework/global promise handling was not fully traced, confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +          this.get('host').destroyRecord().then(() => {\n+            this.sendAction('deleteHost', this.get('host'));\n+          });\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=39c73126-d4e3-49fb-a041-8bf8ebf386c0&type=bug&issue=Potential%3A+Unhandled+rejection+from+destroyRecord+promise&path=app%2Fassets%2Fjavascripts%2Fadmin%2Fcomponents%2Fembeddable-host.js.es6\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:43Z"
+          }
+        ]
       }
     ]
   },
@@ -37867,6 +38667,19 @@
             "created_at": "2026-04-07T19:28:29Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR7__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR7__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "app/assets/stylesheets/mobile/topic-post.scss",
+            "line": 182,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: light-theme heading lightness changed\n\n\n  ###### Where:\n\n `app/assets/stylesheets/mobile/topic-post.scss`\n\n\n  ###### Description:\n\n heading contrast changes unexpectedly when light theme uses 50% instead of previous 20%\nThe diff shows a concrete new behavior change at app/assets/stylesheets/mobile/topic-post.scss:182 in `.topic-list-item h3`: `scale-color($primary, $lightness: 20%)` became `dark-light-choose(scale-color($primary, $lightness: 50%), scale-color($secondary, $lightness: 50%))`. This is new code, not unchanged context, and it specifically changes the light-theme branch from 20% to 50%, matching the alert. Sibling changes in the same file at :190 and elsewhere show broader theme adaptation, but they do not negate that this exact `+` line introduced a heading-style logic change.\n\n\n\n<br/>\n\n  ```diff\n  +    color: dark-light-choose(scale-color($primary, $lightness: 50%), scale-color($secondary, $lightness: 50%));\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=a8834bea-c1c4-4583-9ced-79e2f7a21aa8&type=bug&issue=LogicError%3A+light-theme+heading+lightness+changed&path=app%2Fassets%2Fstylesheets%2Fmobile%2Ftopic-post.scss\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:19:24Z"
+          }
+        ]
       }
     ]
   },
@@ -39446,6 +40259,37 @@
             "created_at": "2026-04-07T19:31:09Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR8__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR8__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "app/controllers/admin/groups_controller.rb",
+            "line": 71,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n NoMethodError: add_members assumes usernames is String\n\n\n  ###### Where:\n\n `app/controllers/admin/groups_controller.rb`\n\n\n  ###### Description:\n\n member addition crashes when usernames arrives as Array instead of comma string\nNew code in app/controllers/admin/groups_controller.rb:add_members assigns `usernames = params.require(:usernames)` and then unconditionally calls `usernames.split(\",\")`. In Ruby, an Array does not implement `split`, so a non-String payload raises `NoMethodError` at this new code site. The endpoint is newly exposed at config/routes.rb:49-50 (`PUT /admin/groups/members`). No type guard or rescue is present in the action.\n\n\n\n<br/>\n\n  ```diff\n  +    usernames = params.require(:usernames)\n...\n+    usernames.split(\",\").each do |username|\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e910ab5f-1cad-4188-a40a-e1c83092cbe0&type=bug&issue=NoMethodError%3A+add_members+assumes+usernames+is+String&path=app%2Fcontrollers%2Fadmin%2Fgroups_controller.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:19:15Z"
+          },
+          {
+            "path": "app/assets/javascripts/admin/controllers/admin-group.js.es6",
+            "line": 14,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: Off-by-one last page skips members\n\n\n  ###### Where:\n\n `app/assets/javascripts/admin/controllers/admin-group.js.es6`\n\n\n  ###### Description:\n\n pagination can mark last page too early when user_count is divisible by limit\nNew pagination code introduces a real off-by-one. admin-group.js.es6:11-14 computes `totalPages` as `Math.floor(user_count / limit) + 1`; when `user_count` is an exact multiple of `limit`, this overcounts by one. That value feeds `showingLast` at admin-group.js.es6:16-17, so `next` at lines 29-38 allows advancing one page too far. Downstream, group.js:23-29 clamps offset to `user_count` and requests `/groups/:name/members.json` with that offset, producing an empty extra page. This logic is newly introduced in the PR, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +  totalPages: function() {\n+    if (this.get(\"user_count\") == 0) { return 0; }\n+    return Math.floor(this.get(\"user_count\") / this.get(\"limit\")) + 1;\n+  }.property(\"limit\", \"user_count\"),\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e910ab5f-1cad-4188-a40a-e1c83092cbe0&type=bug&issue=Potential%3A+Off-by-one+last+page+skips+members&path=app%2Fassets%2Fjavascripts%2Fadmin%2Fcontrollers%2Fadmin-group.js.es6\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:19:18Z"
+          },
+          {
+            "path": "app/assets/javascripts/admin/routes/admin_group_route.js",
+            "line": 13,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: ignored Promise rejection hides fetch error\n\n\n  ###### Where:\n\n `app/assets/javascripts/admin/routes/admin_group_route.js`\n\n\n  ###### Description:\n\n error reporting breaks when findMembers request fails\nNew code in `app/assets/javascripts/admin/routes/admin_group_route.js:13` calls `model.findMembers()` from synchronous `setupController` without `return`, `then`, or error handling. `findMembers` performs `Discourse.ajax(...).then(...)` (`app/assets/javascripts/discourse/models/group.js:21-36`), so AJAX rejection propagates as a rejected Promise with no local handler. This issue is introduced by the PR because the prior route code returned the Promise from `afterModel` instead of fire-and-forget.\n\n\n\n<br/>\n\n  ```diff\n  +    model.findMembers();\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e910ab5f-1cad-4188-a40a-e1c83092cbe0&type=bug&issue=Potential%3A+ignored+Promise+rejection+hides+fetch+error&path=app%2Fassets%2Fjavascripts%2Fadmin%2Froutes%2Fadmin_group_route.js\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:19:21Z"
+          },
+          {
+            "path": "app/assets/javascripts/discourse/models/group.js",
+            "line": 59,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: stale members race after async reload\n\n\n  ###### Where:\n\n `app/assets/javascripts/discourse/models/group.js`\n\n\n  ###### Description:\n\n member list shows outdated data when multiple reloads finish out of order\nReal issue. In app/assets/javascripts/discourse/models/group.js:40-59, both removeMember() and addMembers() trigger self.findMembers() after async AJAX success without any sequencing/cancellation. app/assets/javascripts/discourse/models/group.js:21-35 then unconditionally applies the returned result via setProperties({ user_count, limit, offset, members }), so overlapping reloads on the same model can resolve out of order and overwrite newer state with stale members. Upstream callers include admin/controllers/admin-group.js.es6:51-65 and route setup reloads in admin/routes/admin_group_route.js:12-13 and discourse/routes/group-members.js.es6:10-11, which increase concurrent reload opportunities.\n\n\n\n<br/>\n\n  ```diff\n  +    }).then(function() {\n+      // reload member list\n+      self.findMembers();\n+    });\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e910ab5f-1cad-4188-a40a-e1c83092cbe0&type=bug&issue=Potential%3A+stale+members+race+after+async+reload&path=app%2Fassets%2Fjavascripts%2Fdiscourse%2Fmodels%2Fgroup.js\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:19:27Z"
+          }
+        ]
       }
     ]
   },
@@ -40598,6 +41442,25 @@
             "created_at": "2026-04-07T19:29:21Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR3__20260407",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR3__20260407/pull/1",
+        "review_comments": [
+          {
+            "path": "app/assets/javascripts/discourse/controllers/create_account_controller.js",
+            "line": 74,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: LogicError stale invalid email rejection\n\n\n  ###### Where:\n\n `app/assets/javascripts/discourse/controllers/create_account_controller.js`\n\n\n  ###### Description:\n\n valid addresses stay blocked when server rejected same email once transiently\nNew code adds a persistent client-side rejection cache: `emailValidation` now fails whenever `rejectedEmails` contains the current email (`app/assets/javascripts/discourse/controllers/create_account_controller.js:69-74`), and the cache is populated on account-creation failure when the server returns `result.errors.email` and `result.values.email` (`:274-276`). The computed property was also updated to react to this list (`:96`). In the examined controller, there is no code clearing `rejectedEmails` on email edits, retry, or success, so a previously rejected address remains blocked for the lifetime of the controller instance. This behavior is newly introduced on + lines, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +    if (this.get('rejectedEmails').contains(email)) {\n+      return Discourse.InputValidation.create({\n+        failed: true,\n+        reason: I18n.t('user.email.invalid')\n+      });\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=4679b00f-0ad2-4873-952c-8525f62acf63&type=bug&issue=Potential%3A+LogicError+stale+invalid+email+rejection&path=app%2Fassets%2Fjavascripts%2Fdiscourse%2Fcontrollers%2Fcreate_account_controller.js\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:30:55Z"
+          },
+          {
+            "path": "app/models/blocked_email.rb",
+            "line": 16,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: race condition loses match_count increments\n\n\n  ###### Where:\n\n `app/models/blocked_email.rb`\n\n\n  ###### Description:\n\n statistics become inaccurate when concurrent checks update the same record\nNew code in app/models/blocked_email.rb:11-18 performs a non-atomic read/modify/write on the same ActiveRecord row: `match_count += 1` followed by `save`. Concurrent requests can both read the same prior value and overwrite each other, losing increments. This path is reachable from validation via lib/validators/email_validator.rb:13, which is wired into User validation at app/models/user.rb:47 and exercised during signup in app/controllers/users_controller.rb:160-210. No locking or atomic counter update is present.\n\n\n\n<br/>\n\n  ```diff\n  +      record.match_count += 1\n+      record.last_match_at = Time.zone.now\n+      record.save\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=4679b00f-0ad2-4873-952c-8525f62acf63&type=bug&issue=Potential%3A+race+condition+loses+match_count+increments&path=app%2Fmodels%2Fblocked_email.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-22T07:30:58Z"
+          }
+        ]
       }
     ]
   },
@@ -41603,6 +42466,25 @@
             "created_at": "2026-04-07T19:28:31Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR5__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR5__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "app/assets/stylesheets/common/foundation/mixins.scss",
+            "line": 121,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n CSS error: invalid -ms-align-items property\n\n\n  ###### Where:\n\n `app/assets/stylesheets/common/foundation/mixins.scss`\n\n\n  ###### Description:\n\n IE/legacy Edge alignment breaks when this mixin is used there\nNew code at app/assets/stylesheets/common/foundation/mixins.scss:117-123 adds align-items mixin, and line 121 introduces `-ms-align-items`, which is not a valid legacy IE/Edge flexbox property. The mixin already emits the correct MS property on line 120 (`-ms-flex-align`), so the added line is a structural vendor-prefix naming error directly visible in the diff. New PR callers are header.scss:18, topic-post.scss:265, and badges.css.scss:57.\n\n\n\n<br/>\n\n  ```diff\n  +    -ms-align-items: $alignment;\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d00d701-003b-475c-9cc8-84f22f1bd296&type=bug&issue=CSS+error%3A+invalid+-ms-align-items+property&path=app%2Fassets%2Fstylesheets%2Fcommon%2Ffoundation%2Fmixins.scss\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:04:49Z"
+          },
+          {
+            "path": "app/assets/stylesheets/common/foundation/mixins.scss",
+            "line": 127,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n CSS error: wrong -webkit-box-ordinal-group mapping\n\n\n  ###### Where:\n\n `app/assets/stylesheets/common/foundation/mixins.scss`\n\n\n  ###### Description:\n\n item order breaks when old WebKit/Firefox box flexbox uses order values\nNew code at app/assets/stylesheets/common/foundation/mixins.scss:125-130 maps modern `order: $val` directly to `-webkit-box-ordinal-group` and `-moz-box-ordinal-group`. Those 2009 flexbox properties use different semantics than modern `order` and are not equivalent one-to-one, so the added mapping is incorrect on its own. This is newly introduced in the PR and used by new call sites in header.scss:39 and topic.scss:30.\n\n\n\n<br/>\n\n  ```diff\n  +  -webkit-box-ordinal-group: $val;\n+  -moz-box-ordinal-group: $val;\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d00d701-003b-475c-9cc8-84f22f1bd296&type=bug&issue=CSS+error%3A+wrong+-webkit-box-ordinal-group+mapping&path=app%2Fassets%2Fstylesheets%2Fcommon%2Ffoundation%2Fmixins.scss\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:04:52Z"
+          }
+        ]
       }
     ]
   },
@@ -42634,6 +43516,19 @@
             "created_at": "2026-04-07T19:29:01Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR6__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR6__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "app/serializers/user_serializer.rb",
+            "line": 154,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n NoMethodError: include_website_name hook may be ignored\n\n\n  ###### Where:\n\n `app/serializers/user_serializer.rb`\n\n\n  ###### Description:\n\n serialization includes website_name unconditionally when serializer expects include_website_name? predicate naming\nNew code introduces `def include_website_name` at app/serializers/user_serializer.rb:153-154, but this serializer\u2019s established include-hook contract uses `include_<attr>?`: see dynamic definitions in `staff_attributes`, `private_attributes`, and `untrusted_attributes` at app/serializers/user_serializer.rb:7-25, plus manual hooks `include_email?` at line 106 and `include_card_image_badge_id?` / `include_card_image_badge?` at lines 161-168. Since `website_name` was newly added as an attribute at line 43, the guard method name does not match the file-local serializer convention and is therefore a real logic error in new code. Exact framework fallback behavior was not traced beyond this class, so confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +  def include_website_name\n+    website.present?\n+  end\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=860ef8fa-67e0-46c9-a30e-b40e6aa39f4b&type=bug&issue=NoMethodError%3A+include_website_name+hook+may+be+ignored&path=app%2Fserializers%2Fuser_serializer.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:10:22Z"
+          }
+        ]
       }
     ]
   },
@@ -44573,6 +45468,79 @@
             "created_at": "2026-04-07T19:29:25Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR4__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR4__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "app/jobs/scheduled/poll_feed.rb",
+            "line": 35,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NoMethodError: missing content crashes feed polling\n\n\n  ###### Where:\n\n `app/jobs/scheduled/poll_feed.rb`\n\n\n  ###### Description:\n\n job crashes when an RSS item lacks a content field\nValid new-code nil dereference. In app/jobs/scheduled/poll_feed.rb:31-36, `poll_feed` iterates feed items and directly executes `CGI.unescapeHTML(i.content.scrub)`. If an item lacks `content`, `i.content` is nil and `.scrub` raises `NoMethodError`. No guard or rescue is present in `Jobs::PollFeed.poll_feed`, and no earlier line in the method validates item content before this use.\n\n\n\n<br/>\n\n  ```diff\n  +        content = CGI.unescapeHTML(i.content.scrub)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=bug&issue=NoMethodError%3A+missing+content+crashes+feed+polling&path=app%2Fjobs%2Fscheduled%2Fpoll_feed.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:04Z"
+          },
+          {
+            "path": "app/jobs/scheduled/poll_feed.rb",
+            "line": 29,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n RuntimeError: network and parse failures are unhandled\n\n\n  ###### Where:\n\n `app/jobs/scheduled/poll_feed.rb`\n\n\n  ###### Description:\n\n scheduled job fails noisily when the feed is unreachable or malformed\nValid new error-handling gap. In app/jobs/scheduled/poll_feed.rb:29, Jobs::PollFeed#poll_feed newly calls `SimpleRSS.parse open(SiteSetting.feed_polling_url)` with no local rescue around either the network fetch or parse. The diff shows no enclosing exception handling in this method, so unreachable/malformed feeds raise out of the job. Reachability is concrete: Jobs::PollFeed#execute calls poll_feed at app/jobs/scheduled/poll_feed.rb:14-18, and lib/topic_retriever.rb:39-42 newly invokes `Jobs::PollFeed.new.execute({})`, expanding blast radius.\n\n\n\n<br/>\n\n  ```diff\n  +      rss = SimpleRSS.parse open(SiteSetting.feed_polling_url)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=bug&issue=RuntimeError%3A+network+and+parse+failures+are+unhandled&path=app%2Fjobs%2Fscheduled%2Fpoll_feed.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:07Z"
+          },
+          {
+            "path": "app/models/topic_embed.rb",
+            "line": 36,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NoMethodError: nil post dereference on stale embed\n\n\n  ###### Where:\n\n `app/models/topic_embed.rb`\n\n\n  ###### Description:\n\n revise crashes when embed exists but associated post is missing\nNew code in `TopicEmbed.import` takes `post = embed.post` and immediately passes it to `PostRevisor.new(post)` / `revise!` with no nil guard (`app/models/topic_embed.rb:32-36`). Because `belongs_to :post` does not guarantee the associated row still exists, a stale `post_id` makes `embed.post` nil and this path raises at runtime. This dereference was introduced in the added update path, so it is new-code, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +      post = embed.post\n+      # Update the topic if it changed\n+      if content_sha1 != embed.content_sha1\n+        revisor = PostRevisor.new(post)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=bug&issue=NoMethodError%3A+nil+post+dereference+on+stale+embed&path=app%2Fmodels%2Ftopic_embed.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:09Z"
+          },
+          {
+            "path": "lib/topic_retriever.rb",
+            "line": 49,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n NoMethodError from nil downcase on missing setting\n\n\n  ###### Where:\n\n `lib/topic_retriever.rb`\n\n\n  ###### Description:\n\n topic retrieval crashes when embed_by_username is unset or nil\nlib/topic_retriever.rb:49 newly calls `SiteSetting.embed_by_username.downcase` with no local nil guard; if the setting is unset, Ruby raises `NoMethodError` on `nil.downcase`. The scheduled guard in app/jobs/scheduled/poll_feed.rb:15-17 does not protect this path, because direct retrieval still reaches lib/topic_retriever.rb:45-52 via app/controllers/embed_controller.rb:15 -> app/jobs/regular/retrieve_topic.rb:17 -> TopicRetriever.retrieve.\n\n\n\n<br/>\n\n  ```diff\n  +      user = User.where(username_lower: SiteSetting.embed_by_username.downcase).first\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=bug&issue=NoMethodError+from+nil+downcase+on+missing+setting&path=lib%2Ftopic_retriever.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:13Z"
+          },
+          {
+            "path": "app/assets/javascripts/embed.js",
+            "line": 12,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: comments container may be null\n\n\n  ###### Where:\n\n `app/assets/javascripts/embed.js`\n\n\n  ###### Description:\n\n script crashes when #discourse-comments is absent on the page\nNew file app/assets/javascripts/embed.js:5-12 reads `document.getElementById('discourse-comments')` into `comments` and immediately dereferences it with `comments.appendChild(iframe)` at line 12, with no null guard and no enclosing try/catch. This matches an unguarded DOM null dereference in new code. Upstream guarantee that the host page always provides `#discourse-comments` was not confirmed in the examined record, so confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +  comments.appendChild(iframe);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=bug&issue=TypeError%3A+comments+container+may+be+null&path=app%2Fassets%2Fjavascripts%2Fembed.js\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:15Z"
+          },
+          {
+            "path": "app/assets/javascripts/embed.js",
+            "line": 17,
+            "body": "Review #6\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: origin check accepts malicious superstrings\n\n\n  ###### Where:\n\n `app/assets/javascripts/embed.js`\n\n\n  ###### Description:\n\n message validation breaks when attacker origin contains discourseUrl as a substring\nNew code in app/assets/javascripts/embed.js:17 validates message origin with `discourseUrl.indexOf(e.origin) === -1`, which is a substring check rather than an exact origin comparison. Under the JS auth/origin rule, this is a structural bypass pattern: an attacker-controlled origin containing the trusted string as a substring can pass. No stronger validation appears later in app/assets/javascripts/embed.js:15-23, and the paired sender flow is newly introduced in app/views/layouts/embed.html.erb:8-12.\n\n\n\n<br/>\n\n  ```diff\n  +    if (discourseUrl.indexOf(e.origin) === -1) { return; }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=bug&issue=TypeError%3A+origin+check+accepts+malicious+superstrings&path=app%2Fassets%2Fjavascripts%2Fembed.js\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:19Z"
+          },
+          {
+            "path": "app/views/embed/best.html.erb",
+            "line": 6,
+            "body": "Review #7\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n SyntaxError: invalid ERB block closing\n\n\n  ###### Where:\n\n `app/views/embed/best.html.erb`\n\n\n  ###### Description:\n\n template rendering breaks when ERB parses invalid `end if` syntax\nNew code in app/views/embed/best.html.erb:2-6 introduces `<%- end if %>` at line 6. ERB evaluates Ruby block syntax, and an `if` block must close with `end`, not `end if`; this is a structural parse/render error visible directly in the added line. Upstream impact is real because config/routes.rb:212 routes `embed/best` to EmbedController#best, and app/controllers/embed_controller.rb:8-16 renders this template on the topic-found path under the embed layout.\n\n\n\n<br/>\n\n  ```diff\n  +  <%- end if %>\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=bug&issue=SyntaxError%3A+invalid+ERB+block+closing&path=app%2Fviews%2Fembed%2Fbest.html.erb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:22Z"
+          },
+          {
+            "path": "spec/controllers/embed_controller_spec.rb",
+            "line": 16,
+            "body": "Review #8\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: Test name/body mismatch on error assertion\n\n\n  ###### Where:\n\n `spec/controllers/embed_controller_spec.rb`\n\n\n  ###### Description:\n\n failures are misattributed when implementation returns 404 instead of raising\nVisible naming contradiction in new lines. The example name says it \"raises an error\" (`spec/controllers/embed_controller_spec.rb:13`) but the body only asserts `response.should_not be_success` (`:15-16`). Controller behavior confirms the mismatch: `EmbedController#best` is protected by `before_filter :ensure_embeddable`, and `ensure_embeddable` actually raises `Discourse::InvalidAccess` when host is missing (`app/controllers/embed_controller.rb:3,20-24`). This is a naming_consistency issue in the spec, not a 404-only behavior.\n\n\n\n<br/>\n\n  ```diff\n  +  it \"raises an error with a missing host\" do\n+    SiteSetting.stubs(:embeddable_host).returns(nil)\n+    get :best, embed_url: embed_url\n+    response.should_not be_success\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=bug&issue=Potential%3A+Test+name%2Fbody+mismatch+on+error+assertion&path=spec%2Fcontrollers%2Fembed_controller_spec.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:24Z"
+          },
+          {
+            "path": "app/jobs/scheduled/poll_feed.rb",
+            "line": 29,
+            "body": "Review #9\n\n\n  ![License](https://img.shields.io/badge/Category-SECURITY__ALERT-blue.svg) \n  ![License](https://img.shields.io/badge/Risk_Type-SSRF:__open--uri__fetches__unvalidated__feed__URL-purple.svg) \n  ![License](https://img.shields.io/badge/Risk_Score-4.0-red.svg)\n\n\n\n  ###### Vulnerability Details: \n internal network access becomes possible when feed_polling_url points to metadata or private hosts\nNew code in app/jobs/scheduled/poll_feed.rb:24-30 fetches `SiteSetting.feed_polling_url` via `open(...)` with only presence checks in execute at lines 15-17. No scheme/host allowlist or other destination validation is visible before this sink. In Ruby, `open-uri` will fetch arbitrary URLs, and admin-configurable site settings are a valid SSRF source under the rules. Related new code in lib/topic_retriever.rb:39-45 even shows host validation was added for embed retrieval paths, but no analogous guard protects `feed_polling_url`.\n\n\n\n  ###### Where:\n `app/jobs/scheduled/poll_feed.rb`\n\n\n\n<br/>\n\n  ```diff\n  +      rss = SimpleRSS.parse open(SiteSetting.feed_polling_url)\n  ```\n\n\n\n<br/>\n\n  ###### Security Justification:\n\n  internal network access becomes possible when feed_polling_url points to metadata or private hosts\n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=security&vulnerability_name=SSRF%3A+open-uri+fetches+unvalidated+feed+URL&path=app%2Fjobs%2Fscheduled%2Fpoll_feed.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:28Z"
+          },
+          {
+            "path": "app/models/topic_embed.rb",
+            "line": 49,
+            "body": "Review #10\n\n\n  ![License](https://img.shields.io/badge/Category-SECURITY__ALERT-blue.svg) \n  ![License](https://img.shields.io/badge/Risk_Type-SSRF:__open__fetches__untrusted__URLs-purple.svg) \n  ![License](https://img.shields.io/badge/Risk_Score-4.0-red.svg)\n\n\n\n  ###### Vulnerability Details: \n internal network access occurs when attacker controls import_remote URL\nNew code introduces a network fetch sink at app/models/topic_embed.rb:48 via open(url).read. A concrete new source-to-sink path exists: lib/topic_retriever.rb:49-52 passes @embed_url into TopicEmbed.import_remote, and TopicRetriever is reached from new controller/job paths (app/controllers/embed_controller.rb:15 and app/jobs/regular/retrieve_topic.rb:17). The only guard in lib/topic_retriever.rb:14-19 checks URI host equality with SiteSetting.embeddable_host; it does not sanitize the URL before open(), so arbitrary fetches to the configured host remain possible. This is sufficient for SSRF validation under the Ruby rule for open(url) without URL validation.\n\n\n\n  ###### Where:\n `app/models/topic_embed.rb`\n\n\n\n<br/>\n\n  ```diff\n  +    doc = Readability::Document.new(open(url).read,\n  ```\n\n\n\n<br/>\n\n  ###### Security Justification:\n\n  internal network access occurs when attacker controls import_remote URL\n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=security&vulnerability_name=SSRF%3A+open%28url%29+fetches+untrusted+URLs&path=app%2Fmodels%2Ftopic_embed.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:30Z"
+          },
+          {
+            "path": "app/models/topic_embed.rb",
+            "line": 13,
+            "body": "Review #11\n\n\n  ![License](https://img.shields.io/badge/Category-SECURITY__ALERT-blue.svg) \n  ![License](https://img.shields.io/badge/Risk_Type-XSS:__unescaped__URL__interpolated__into__HTML-purple.svg) \n  ![License](https://img.shields.io/badge/Risk_Score-4.0-red.svg)\n\n\n\n  ###### Vulnerability Details: \n script injection renders when url contains quotes or HTML\nNew code in app/models/topic_embed.rb:13 builds HTML with raw `url` in both href and link text (`<a href='#{url}'>#{url}</a>`) without escaping. The only guard at app/models/topic_embed.rb:11 checks scheme (`http/https`) and does not sanitize quotes or HTML. Concrete source->sink paths are visible: app/jobs/scheduled/poll_feed.rb:31-36 passes external feed item URLs into `TopicEmbed.import`, and lib/topic_retriever.rb:48-52 -> app/models/topic_embed.rb:48-52 fetches remote content and then calls the same import path. This matches injection_protection guidance for HTML interpolation of user-controlled data.\n\n\n\n  ###### Where:\n `app/models/topic_embed.rb`\n\n\n\n<br/>\n\n  ```diff\n  +    contents << \"\\n<hr>\\n<small>#{I18n.t('embed.imported_from', link: \"<a href='#{url}'>#{url}</a>\")}</small>\\n\"\n  ```\n\n\n\n<br/>\n\n  ###### Security Justification:\n\n  script injection renders when url contains quotes or HTML\n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=b10051a0-b171-42a5-8c66-698feba23737&type=security&vulnerability_name=XSS%3A+unescaped+URL+interpolated+into+HTML&path=app%2Fmodels%2Ftopic_embed.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:20:36Z"
+          }
+        ]
       }
     ]
   },
@@ -45858,6 +46826,37 @@
             "created_at": "2026-04-07T19:29:04Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR1__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR1__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "app/assets/javascripts/discourse/lib/utilities.js",
+            "line": 182,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: site upload limit ignored\n\n\n  ###### Where:\n\n `app/assets/javascripts/discourse/lib/utilities.js`\n\n\n  ###### Description:\n\n size validation breaks when site settings allow larger or smaller uploads\nThe new `+` line in `app/assets/javascripts/discourse/lib/utilities.js` replaces dynamic per-type site settings (`Discourse.SiteSettings['max_' + type + '_size_kb']`) with a hardcoded `10 * 1024`. This is directly visible in the diff and changes acceptance logic for client-side upload validation, so configured limits larger or smaller than 10MB are ignored. This is new in the PR, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +    var maxSizeKB = 10 * 1024; // 10MB\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=8bde84b1-95e7-4596-8ee1-91ed090aecc8&type=bug&issue=LogicError%3A+site+upload+limit+ignored&path=app%2Fassets%2Fjavascripts%2Fdiscourse%2Flib%2Futilities.js\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:04:32Z"
+          },
+          {
+            "path": "app/assets/javascripts/discourse/lib/utilities.js",
+            "line": 246,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: 413 message reports wrong max size\n\n\n  ###### Where:\n\n `app/assets/javascripts/discourse/lib/utilities.js`\n\n\n  ###### Description:\n\n error feedback breaks when server limit differs from hardcoded 10MB\nThe new `+` line in `app/assets/javascripts/discourse/lib/utilities.js` replaces `Discourse.SiteSettings.max_image_size_kb` with a hardcoded `10 * 1024` in the 413 handler. That makes the user-facing 'file too large' message report 10MB even when the configured server/site limit differs. This is a distinct new bug from issue-1 because it affects error reporting rather than the validation gate.\n\n\n\n<br/>\n\n  ```diff\n  +          var maxSizeKB = 10 * 1024; // 10 MB\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=8bde84b1-95e7-4596-8ee1-91ed090aecc8&type=bug&issue=LogicError%3A+413+message+reports+wrong+max+size&path=app%2Fassets%2Fjavascripts%2Fdiscourse%2Flib%2Futilities.js\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:04:35Z"
+          },
+          {
+            "path": "app/controllers/uploads_controller.rb",
+            "line": 67,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n ArgumentError: percentage resize string may break downsize\n\n\n  ###### Where:\n\n `app/controllers/uploads_controller.rb`\n\n\n  ###### Description:\n\n image reduction breaks when animated-image downsize path expects WxH geometry\nNew code in `UploadsController#create_upload` passes the percentage string `\"80%\"` into `OptimizedImage.downsize(...)` (app/controllers/uploads_controller.rb:67). In `OptimizedImage`, that exact `dimensions` value is forwarded unchanged through `downsize` -> `optimize` (app/models/optimized_image.rb:149-156). When `allow_animation` is true and the source path matches `.GIF`, `optimize` switches to the animated instruction builder (`*_animated`) (app/models/optimized_image.rb:153-156), so the new controller path can reach the animated backend with a percentage geometry. This matches the known Ruby edge-case pattern for animated GIF resize handling; one backend-semantics hop remains outside the diff, so confidence is Medium.\n\n\n\n<br/>\n\n  ```diff\n  +          OptimizedImage.downsize(tempfile.path, tempfile.path, \"80%\", allow_animation: SiteSetting.allow_animated_thumbnails)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=8bde84b1-95e7-4596-8ee1-91ed090aecc8&type=bug&issue=ArgumentError%3A+percentage+resize+string+may+break+downsize&path=app%2Fcontrollers%2Fuploads_controller.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:04:38Z"
+          },
+          {
+            "path": "app/models/optimized_image.rb",
+            "line": 150,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n ArgumentError: downsize arity changed by override\n\n\n  ###### Where:\n\n `app/models/optimized_image.rb`\n\n\n  ###### Description:\n\n existing callers break when passing width and height separately\nValid new-code logic error. In app/models/optimized_image.rb:145-146 the PR adds `def self.downsize(from, to, max_width, max_height, opts={})`, then at 149-150 adds a second `def self.downsize(from, to, dimensions, opts={})`; in Ruby the later definition overrides the earlier one, making the 5-arg form unreachable. Existing width/height-style callers would now raise ArgumentError. A new caller in app/controllers/uploads_controller.rb:64-69 uses the new 4-arg dimensions form, confirming the contract was changed in this PR. One repository-wide caller hop remains untraced, so confidence is Medium.\n\n\n\n<br/>\n\n  ```diff\n  +  def self.downsize(from, to, dimensions, opts={})\n+    optimize(\"downsize\", from, to, dimensions, opts)\n+  end\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=8bde84b1-95e7-4596-8ee1-91ed090aecc8&type=bug&issue=ArgumentError%3A+downsize+arity+changed+by+override&path=app%2Fmodels%2Foptimized_image.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:04:41Z"
+          }
+        ]
       }
     ]
   },
@@ -47073,6 +48072,25 @@
             "created_at": "2026-04-07T19:29:36Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "discourse__discourse-graphite__cloudaeye__PR2__20260310",
+        "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR2__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "app/controllers/topics_controller.rb",
+            "line": 107,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n NoMethodError on nil TopicUser unsubscribe\n\n\n  ###### Where:\n\n `app/controllers/topics_controller.rb`\n\n\n  ###### Description:\n\n request crashes when user has no TopicUser row for the topic\nNew action `TopicsController#unsubscribe` dereferences `tu.notification_level` immediately after `TopicUser.find_by(...)` with no nil guard at app/controllers/topics_controller.rb:105-107. `TopicUser.get` itself returns `find_by(...)`, and model write paths explicitly handle missing rows by creating them only when needed (`TopicUser.change`, `track_visit!`, `update_last_read`), which is concrete evidence that a user/topic pair can legitimately have no `topic_users` row yet at app/models/topic_user.rb:57-60, 77-114, 145-218. No caller-side guard for `tu` is present in the action. One hop remains open on whether `TopicView.new` always implies a row exists, so confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +    tu = TopicUser.find_by(user_id: current_user.id, topic_id: params[:topic_id])\n+\n+    if tu.notification_level > TopicUser.notification_levels[:regular]\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=d385a3a8-b3bd-4a2c-895f-a65897af3744&type=bug&issue=NoMethodError+on+nil+TopicUser+unsubscribe&path=app%2Fcontrollers%2Ftopics_controller.rb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:19:05Z"
+          },
+          {
+            "path": "app/views/email/notification.html.erb",
+            "line": 10,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n CSS selector typo breaks previous-discussion styling\n\n\n  ###### Where:\n\n `app/views/email/notification.html.erb`\n\n\n  ###### Description:\n\n heading styling/hooks break when class includes a literal dot\nThe new template line app/views/email/notification.html.erb:10 sets `class='.previous-discussion'`. In HTML, the class attribute should contain class tokens, not CSS selector syntax with a leading dot. This is a direct structural typo in a newly added `+` line, so styling/hooks targeting `previous-discussion` will not match the intended class name. No tracing is needed under code_clarity for an obvious typo visible in the diff.\n\n\n\n<br/>\n\n  ```diff\n  +    <h4 class='.previous-discussion'><%= t \"user_notifications.previous_discussion\" %></h4>\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=d385a3a8-b3bd-4a2c-895f-a65897af3744&type=bug&issue=CSS+selector+typo+breaks+previous-discussion+styling&path=app%2Fviews%2Femail%2Fnotification.html.erb\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T04:19:08Z"
+          }
+        ]
       }
     ]
   },
@@ -48402,6 +49420,31 @@
             "created_at": "2026-04-07T19:31:33Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR22532__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR22532__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "scripts/test-gcal-webhooks.sh",
+            "line": 69,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Runtime error: sed -i fails on GNU sed\n\n\n  ###### Where:\n\n `scripts/test-gcal-webhooks.sh`\n\n\n  ###### Description:\n\n env update breaks when script runs on Linux with GNU sed\nNew code at scripts/test-gcal-webhooks.sh:67-69 uses `sed -i '' -E ...`, which is BSD/macOS-specific syntax. This script is a bash entrypoint (scripts/test-gcal-webhooks.sh:1) and has no OS guard or fallback around the env-update path, so on GNU sed hosts the replacement command itself is the incompatible `+` line. Upstream blast radius: direct script invocation updates `GOOGLE_WEBHOOK_URL`; downstream dependencies: host `sed` CLI semantics and filesystem write to ../.env.\n\n\n\n<br/>\n\n  ```diff\n  +  sed -i '' -E \"s|^GOOGLE_WEBHOOK_URL=.*|GOOGLE_WEBHOOK_URL=$TUNNEL_URL|\" \"$ENV_FILE\"\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7e7aabf0-876e-4cc2-8472-6400a4b56106&type=bug&issue=Runtime+error%3A+sed+-i+fails+on+GNU+sed&path=scripts%2Ftest-gcal-webhooks.sh\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:00:50Z"
+          },
+          {
+            "path": "scripts/test-gcal-webhooks.sh",
+            "line": 35,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: shared log file mixes tmole sessions\n\n\n  ###### Where:\n\n `scripts/test-gcal-webhooks.sh`\n\n\n  ###### Description:\n\n wrong webhook URL is reused when two script instances run concurrently\nNew code hard-codes a shared log path `LOG_FILE=\"/tmp/tmole.log\"` (scripts/test-gcal-webhooks.sh:4), deletes it before startup (line 34), writes tmole output to it (line 35), and later reads the tunnel URL back from that same file (lines 23-25, 42-43), including in the reuse path for an already-running tmole process (lines 27-30). There is no locking, per-process filename, or serialization guard in the script, so concurrent invocations can overwrite or read each other's session log and derive the wrong `TUNNEL_URL`. Upstream blast radius: any parallel invocations of this script; downstream dependencies: tmole stdout/log contents, shell file operations, and subsequent ../.env mutation at lines 67-70.\n\n\n\n<br/>\n\n  ```diff\n  +LOG_FILE=\"/tmp/tmole.log\"\n...\n+  rm -f \"$LOG_FILE\"\n+  tmole $TM_PORT > \"$LOG_FILE\" 2>&1 &\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7e7aabf0-876e-4cc2-8472-6400a4b56106&type=bug&issue=Race+condition%3A+shared+log+file+mixes+tmole+sessions&path=scripts%2Ftest-gcal-webhooks.sh\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:00:53Z"
+          },
+          {
+            "path": "scripts/test-gcal-webhooks.sh",
+            "line": 58,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Edge case: fixed wait timeout misses slow startup\n\n\n  ###### Where:\n\n `scripts/test-gcal-webhooks.sh`\n\n\n  ###### Description:\n\n tunnel setup fails when tmole initializes slower than 10 seconds\nNew script code in scripts/test-gcal-webhooks.sh:41-52 hard-caps startup polling to 20 iterations with sleep 0.5 (~10s total). If no URL is found, scripts/test-gcal-webhooks.sh:55-58 immediately treats that as failure and exits. No caller-side guard in this script guarantees tmole will always initialize within 10s, so the fixed timeout is a real edge-case regression in new code.\n\n\n\n<br/>\n\n  ```diff\n  +  for i in {1..20}; do\n+    if grep -q \"$TM_KEYWORD\" \"$LOG_FILE\"; then\n+    sleep 0.5\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=7e7aabf0-876e-4cc2-8472-6400a4b56106&type=bug&issue=Edge+case%3A+fixed+wait+timeout+misses+slow+startup&path=scripts%2Ftest-gcal-webhooks.sh\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:00:56Z"
+          }
+        ]
       }
     ]
   },
@@ -49618,6 +50661,25 @@
             "created_at": "2026-04-07T19:31:31Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR8330__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR8330__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "packages/trpc/server/routers/viewer/slots.ts",
+            "line": 115,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n logic_error: Dayjs === comparison always false\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/slots.ts`\n\n\n  ###### Description:\n\n override-day detection breaks when start and end represent the same instant\nNew code in packages/trpc/server/routers/viewer/slots.ts:114 compares two freshly created Dayjs objects with `===`, which checks object identity and is always false for equal instants. This matches the known TS logic_error pattern for Dayjs comparison. The branch is newly introduced in `checkIfIsAvailable()` and is exercised by new callers in the same diff at viewer/slots.ts:453-456, 489-492, 514-517, and 579-585.\n\n\n\n<br/>\n\n  ```diff\n  +        if (dayjs(date.start).add(utcOffset, \"minutes\") === dayjs(date.end).add(utcOffset, \"minutes\")) {\n+          return true;\n+        }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=8b875f41-4df5-4f23-81b6-18725afcdc0c&type=bug&issue=logic_error%3A+Dayjs+%3D%3D%3D+comparison+always+false&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fslots.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T02:42:19Z"
+          },
+          {
+            "path": "packages/trpc/server/routers/viewer/slots.ts",
+            "line": 143,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n logic_error: Working-hours check ignores slot end\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/slots.ts`\n\n\n  ###### Description:\n\n availability becomes wrong when a slot starts before endTime but ends after it\nNew code in packages/trpc/server/routers/viewer/slots.ts:141-143 computes both `start` and `end` from `slotStartTime`; `slotEndTime` is never used. This is the known wrong-variable time-range logic_error pattern, so slots that extend past `workingHour.endTime` can be misclassified as available. The faulty `checkIfIsAvailable()` logic is wired into schedule filtering by new call paths at viewer/slots.ts:453-456, 489-492, 514-517, and 579-585.\n\n\n\n<br/>\n\n  ```diff\n  +        const start = slotStartTime.hour() * 60 + slotStartTime.minute();\n+        const end = slotStartTime.hour() * 60 + slotStartTime.minute();\n+        if (start < workingHour.startTime || end > workingHour.endTime) {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=8b875f41-4df5-4f23-81b6-18725afcdc0c&type=bug&issue=logic_error%3A+Working-hours+check+ignores+slot+end&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fslots.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T02:42:22Z"
+          }
+        ]
       }
     ]
   },
@@ -50648,6 +51710,25 @@
             "created_at": "2026-04-07T19:29:08Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR14943__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR14943__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "packages/features/ee/workflows/api/scheduleSMSReminders.ts",
+            "line": 185,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: lost retryCount increment\n\n\n  ###### Where:\n\n `packages/features/ee/workflows/api/scheduleSMSReminders.ts`\n\n\n  ###### Description:\n\n retry tracking breaks when concurrent schedulers update the same reminder\nNew code in packages/features/ee/workflows/api/scheduleSMSReminders.ts:179-185 and 190-196 updates retryCount with `reminder.retryCount + 1` based on a previously read value from findMany (53-60), not an atomic Prisma increment. That is the exact stale-read lost-update pattern allowed for TypeScript concurrency_safety findings. No transaction or atomic `increment` guard is present in the function.\n\n\n\n<br/>\n\n  ```diff\n  +            data: {\n+              retryCount: reminder.retryCount + 1,\n+            },\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=689a86b1-34d3-463c-af93-bb78a9dd1b4d&type=bug&issue=Race+condition%3A+lost+retryCount+increment&path=packages%2Ffeatures%2Fee%2Fworkflows%2Fapi%2FscheduleSMSReminders.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T02:41:07Z"
+          },
+          {
+            "path": "packages/features/ee/workflows/api/scheduleSMSReminders.ts",
+            "line": 42,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Logic error: deleteMany removes non-SMS reminders\n\n\n  ###### Where:\n\n `packages/features/ee/workflows/api/scheduleSMSReminders.ts`\n\n\n  ###### Description:\n\n other workflow reminders are deleted when retryCount exceeds 1\nThe new deleteMany filter in packages/features/ee/workflows/api/scheduleSMSReminders.ts:31-42 changed from a scoped SMS+scheduledDate condition to an `OR` where the second branch is only `retryCount > 1`. Because that branch lacks `method: WorkflowMethods.SMS`, it matches any WorkflowReminder row with retryCount > 1, not just SMS reminders. This is a new unscoped-OR logic error introduced by the PR.\n\n\n\n<br/>\n\n  ```diff\n  +        {\n+          retryCount: {\n+            gt: 1,\n+          },\n+        },\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=689a86b1-34d3-463c-af93-bb78a9dd1b4d&type=bug&issue=Logic+error%3A+deleteMany+removes+non-SMS+reminders&path=packages%2Ffeatures%2Fee%2Fworkflows%2Fapi%2FscheduleSMSReminders.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T02:41:10Z"
+          }
+        ]
       }
     ]
   },
@@ -51616,6 +52697,19 @@
             "created_at": "2026-04-07T19:32:49Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR22345__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR22345__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": null,
+            "line": null,
+            "body": "No issues found",
+            "created_at": "2026-04-30T08:49:56Z"
+          }
+        ]
       }
     ]
   },
@@ -53480,6 +54574,37 @@
             "created_at": "2026-04-07T19:33:06Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR11059__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR11059__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "apps/web/pages/api/webhook/app-credential.ts",
+            "line": 90,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: duplicate credential create\n\n\n  ###### Where:\n\n `apps/web/pages/api/webhook/app-credential.ts`\n\n\n  ###### Description:\n\n writes break when concurrent requests both miss findFirst before create\nNew handler code in apps/web/pages/api/webhook/app-credential.ts:62-90 performs a non-transactional read-then-write (`prisma.credential.findFirst` followed by `update` or `create`). Two concurrent requests for the same user/app can both observe no existing row and both execute `create`. This is not DB-prevented: packages/prisma/schema.prisma:111-127 defines `Credential` with only separate indexes on `userId` and `appId`, and no unique constraint on the `(userId, appId)` pair. The file is newly added in this PR, so the race is new code.\n\n\n\n<br/>\n\n  ```diff\n  +  const appCredential = await prisma.credential.findFirst({\n+  if (appCredential) {\n+  } else {\n+    await prisma.credential.create({\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d984fe1-4b5b-42fa-b8c4-275773265743&type=bug&issue=Race+condition%3A+duplicate+credential+create&path=apps%2Fweb%2Fpages%2Fapi%2Fwebhook%2Fapp-credential.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:58:31Z"
+          },
+          {
+            "path": "packages/app-store/_utils/oauth/parseRefreshTokenResponse.ts",
+            "line": 27,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: hardcoded refresh token returned\n\n\n  ###### Where:\n\n `packages/app-store/_utils/oauth/parseRefreshTokenResponse.ts`\n\n\n  ###### Description:\n\n OAuth refresh breaks when provider omits refresh_token and this placeholder is later persisted\nNew code in packages/app-store/_utils/oauth/parseRefreshTokenResponse.ts:25-27 fabricates a refresh_token value when the parsed response omits it, returning data that no longer matches the provider/sync response. This matches the allowed logic_error pattern of storing the wrong value rather than parsed token payload. One upstream check confirms the helper is used by OAuth refresh flows in googlecalendar/lib/CalendarService.ts:84, office365calendar/lib/CalendarService.ts:244, salesforce/lib/CalendarService.ts:49, and zoomvideo/lib/VideoApiAdapter.ts:75; caller-side persistence was not fully traced, so confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +  if (!refreshTokenResponse.data.refresh_token) {\n+    refreshTokenResponse.data.refresh_token = \"refresh_token\";\n+  }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d984fe1-4b5b-42fa-b8c4-275773265743&type=bug&issue=LogicError%3A+hardcoded+refresh+token+returned&path=packages%2Fapp-store%2F_utils%2Foauth%2FparseRefreshTokenResponse.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:58:34Z"
+          },
+          {
+            "path": "packages/app-store/_utils/oauth/refreshOAuthTokens.ts",
+            "line": 15,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: returns fetch Response instead of token payload\n\n\n  ###### Where:\n\n `packages/app-store/_utils/oauth/refreshOAuthTokens.ts`\n\n\n  ###### Description:\n\n token field access breaks when callers expect parsed OAuth data\nNew branch in packages/app-store/_utils/oauth/refreshOAuthTokens.ts:8-15 returns a raw fetch Response, while the non-sync branch returns the integration-specific refresh result, creating a branch-dependent return-shape mismatch. This is concretely incompatible with callers: packages/app-store/googlecalendar/lib/CalendarService.ts:84-99 reads res?.data.access_token / expiry_date from refreshOAuthTokens output, and packages/app-store/hubspot/lib/CalendarService.ts:175-191 types the result as HubspotToken and immediately reads token fields. A fetch Response has no .data token payload, so this matches the TS input_validation pattern for Response-vs-data misuse.\n\n\n\n<br/>\n\n  ```diff\n  +    const response = await fetch(process.env.CALCOM_CREDENTIAL_SYNC_ENDPOINT, {\n+      method: \"POST\",\n+      body: new URLSearchParams({\n+        calcomUserId: userId.toString(),\n+        appSlug,\n+      }),\n+    });\n+    return response;\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d984fe1-4b5b-42fa-b8c4-275773265743&type=bug&issue=TypeError%3A+returns+fetch+Response+instead+of+token+payload&path=packages%2Fapp-store%2F_utils%2Foauth%2FrefreshOAuthTokens.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:58:37Z"
+          },
+          {
+            "path": "packages/app-store/googlecalendar/lib/CalendarService.ts",
+            "line": 96,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: token fields accessed on fetch Response\n\n\n  ###### Where:\n\n `packages/app-store/googlecalendar/lib/CalendarService.ts`\n\n\n  ###### Description:\n\n token update breaks when refreshOAuthTokens returns a fetch Response without parsed data\nNew Google code in packages/app-store/googlecalendar/lib/CalendarService.ts:86-96 now assigns `const res = await refreshOAuthTokens(...)` and immediately reads `res?.data`. The PR\u2019s sibling call sites show the helper returns a fetch `Response`, not an axios-style object: office365 calendar parses it via `handleErrorsJson(response)` at packages/app-store/office365calendar/lib/CalendarService.ts:246-263, office365 video does the same at packages/app-store/office365video/lib/VideoApiAdapter.ts:61-75, and zoom video parses the returned response body at packages/app-store/zoomvideo/lib/VideoApiAdapter.ts:79-104. Thus `res?.data` is concretely wrong in this new code path and token field access will fail downstream.\n\n\n\n<br/>\n\n  ```diff\n  +        const res = await refreshOAuthTokens(\n+          async () => {\n+            const fetchTokens = await myGoogleAuth.refreshToken(googleCredentials.refresh_token);\n+            return fetchTokens.res;\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=1d984fe1-4b5b-42fa-b8c4-275773265743&type=bug&issue=TypeError%3A+token+fields+accessed+on+fetch+Response&path=packages%2Fapp-store%2Fgooglecalendar%2Flib%2FCalendarService.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:58:40Z"
+          }
+        ]
       }
     ]
   },
@@ -54887,6 +56012,49 @@
             "created_at": "2026-04-07T19:35:41Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR7232__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR7232__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "packages/features/bookings/lib/handleCancelBooking.ts",
+            "line": 490,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Unhandled promise errors from reminder deletions\n\n\n  ###### Where:\n\n `packages/features/bookings/lib/handleCancelBooking.ts`\n\n\n  ###### Description:\n\n Reminder cleanup can fail silently when deleteScheduled* returns a rejected promise\nNew code in handleCancelBooking calls async deleteScheduledEmailReminder/deleteScheduledSMSReminder without await inside nested forEach, and those promises are no longer included in prismaPromises before Promise.all (packages/features/bookings/lib/handleCancelBooking.ts:484-495). The callees perform async DB/network work (emailReminderManager.ts:197-231, smsReminderManager.ts:177-186), so this matches the allowlisted TypeScript error_handling pattern of fire-and-forget async cleanup in forEach.\n\n\n\n<br/>\n\n  ```diff\n  +      if (reminder.method === WorkflowMethods.EMAIL) {\n+        deleteScheduledEmailReminder(reminder.id, reminder.referenceId);\n+      } else if (reminder.method === WorkflowMethods.SMS) {\n+        deleteScheduledSMSReminder(reminder.id, reminder.referenceId);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=083dcd78-f179-4a38-890a-ef521603a5ab&type=bug&issue=Unhandled+promise+errors+from+reminder+deletions&path=packages%2Ffeatures%2Fbookings%2Flib%2FhandleCancelBooking.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:40:51Z"
+          },
+          {
+            "path": "packages/features/bookings/lib/handleNewBooking.ts",
+            "line": 971,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Promise handling error: unawaited reminder deletions\n\n\n  ###### Where:\n\n `packages/features/bookings/lib/handleNewBooking.ts`\n\n\n  ###### Description:\n\n reminder cleanup breaks when delete helpers reject or finish after reschedule continues\nNew code in packages/features/bookings/lib/handleNewBooking.ts:966-971 uses forEach to call async helpers without await. Those helpers are confirmed async in packages/features/ee/workflows/lib/reminders/emailReminderManager.ts:197-231 and packages/features/ee/workflows/lib/reminders/smsReminderManager.ts:177-186, performing external API and DB work. Therefore the surrounding try/catch at handleNewBooking.ts:964-975 cannot reliably catch later promise rejections, matching the TypeScript allowlisted async-forEach error_handling pattern.\n\n\n\n<br/>\n\n  ```diff\n  +      originalRescheduledBooking.workflowReminders.forEach((reminder) => {\n+        if (reminder.method === WorkflowMethods.EMAIL) {\n+          deleteScheduledEmailReminder(reminder.id, reminder.referenceId, true);\n+        } else if (reminder.method === WorkflowMethods.SMS) {\n+          deleteScheduledSMSReminder(reminder.id, reminder.referenceId);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=083dcd78-f179-4a38-890a-ef521603a5ab&type=bug&issue=Promise+handling+error%3A+unawaited+reminder+deletions&path=packages%2Ffeatures%2Fbookings%2Flib%2FhandleNewBooking.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:40:53Z"
+          },
+          {
+            "path": "packages/features/ee/workflows/api/scheduleEmailReminders.ts",
+            "line": 74,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: Error handling aborts remaining cancellations\n\n\n  ###### Where:\n\n `packages/features/ee/workflows/api/scheduleEmailReminders.ts`\n\n\n  ###### Description:\n\n later reminders stay scheduled when one API cancellation request fails\nNew loop in packages/features/ee/workflows/api/scheduleEmailReminders.ts:53-77 wraps all reminder cancellations in one try/catch and does `await client.request(...)` inside the `for` loop at lines 56-64. If any one request rejects, control jumps to the catch at lines 75-76, so later reminders in `remindersToCancel` are skipped and remain scheduled. This behavior is directly visible in the added code and is not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +    for (const reminder of remindersToCancel) {\n+      await client.request({\n+        url: \"/v3/user/scheduled_sends\",\n+        method: \"POST\",\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=083dcd78-f179-4a38-890a-ef521603a5ab&type=bug&issue=Potential%3A+Error+handling+aborts+remaining+cancellations&path=packages%2Ffeatures%2Fee%2Fworkflows%2Fapi%2FscheduleEmailReminders.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:40:56Z"
+          },
+          {
+            "path": "packages/features/ee/workflows/lib/reminders/emailReminderManager.ts",
+            "line": 231,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: LogicError leaves SendGrid batch undeleted\n\n\n  ###### Where:\n\n `packages/features/ee/workflows/lib/reminders/emailReminderManager.ts`\n\n\n  ###### Description:\n\n scheduled sends persist when callers expect deletion without passing immediateDelete\nIn packages/features/ee/workflows/lib/reminders/emailReminderManager.ts:213-231, only the `immediateDelete` branch calls SendGrid cancellation, while the default path merely updates `workflowReminder.cancelled = true` in the DB. New callers in packages/features/bookings/lib/handleCancelBooking.ts:487-488, packages/trpc/server/routers/viewer/bookings.tsx:489-490, and packages/trpc/server/routers/viewer/workflows.tsx:377-378 and 575-576 invoke `deleteScheduledEmailReminder(reminder.id, reminder.referenceId)` without `immediateDelete`, so those paths no longer delete the external SendGrid batch directly. This matches the allowed logic_error pattern where external cancellation is omitted in one branch while another branch performs it.\n\n\n\n<br/>\n\n  ```diff\n  +    if (immediateDelete) {\n+      await client.request({\n+        url: \"/v3/user/scheduled_sends\",\n+        method: \"POST\",\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=083dcd78-f179-4a38-890a-ef521603a5ab&type=bug&issue=Potential%3A+LogicError+leaves+SendGrid+batch+undeleted&path=packages%2Ffeatures%2Fee%2Fworkflows%2Flib%2Freminders%2FemailReminderManager.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:40:59Z"
+          },
+          {
+            "path": "packages/trpc/server/routers/viewer/bookings.tsx",
+            "line": 492,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: stale reminders remain due to unawaited async deletes\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/bookings.tsx`\n\n\n  ###### Description:\n\n reminder cleanup can be skipped when deleteScheduled* returns a Promise that rejects or resolves later\nNew PR code in viewer/bookings.tsx now calls async helpers without await: deleteScheduledEmailReminder(reminder.id, reminder.referenceId) and deleteScheduledSMSReminder(reminder.id, reminder.referenceId). Those helpers were changed in this PR to perform awaited SendGrid/Twilio/Prisma work internally in emailReminderManager.ts:197-231 and smsReminderManager.ts:177-185. Because the calls are fire-and-forget inside forEach, cleanup is no longer part of the enclosing mutation's awaited work, matching the TS allowlisted unawaited async cleanup pattern. This was not pre-existing here: the old code separately awaited Prisma reminder deletions via Promise.all, but that await was removed in this PR.\n\n\n\n<br/>\n\n  ```diff\n  +          if (reminder.method === WorkflowMethods.EMAIL) {\n+            deleteScheduledEmailReminder(reminder.id, reminder.referenceId);\n+          } else if (reminder.method === WorkflowMethods.SMS) {\n+            deleteScheduledSMSReminder(reminder.id, reminder.referenceId);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=083dcd78-f179-4a38-890a-ef521603a5ab&type=bug&issue=Potential%3A+stale+reminders+remain+due+to+unawaited+async+deletes&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fbookings.tsx\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:02Z"
+          },
+          {
+            "path": "packages/trpc/server/routers/viewer/workflows.tsx",
+            "line": 215,
+            "body": "Review #6\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: stale DB reminders after removed deleteMany\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/workflows.tsx`\n\n\n  ###### Description:\n\n edited or disabled reminders can remain in database when helper only unschedules external jobs\nNew immediate-delete callers in packages/trpc/server/routers/viewer/workflows.tsx:213-215 and similarly 520-521 invoke deleteScheduledEmailReminder(..., true). In packages/features/ee/workflows/lib/reminders/emailReminderManager.ts:213-223, the immediateDelete branch cancels SendGrid but returns without deleting or updating the WorkflowReminder row, unlike the null-reference path at 203-210 and non-immediate path at 225-231. This is a new logic regression in the helper/caller combination introduced by the PR; the new cleanup job in packages/features/ee/workflows/api/scheduleEmailReminders.ts:43-75 only targets cancelled=true reminders, so it does not clean up these immediate-delete rows.\n\n\n\n<br/>\n\n  ```diff\n  +      remindersToDelete.flat().forEach((reminder) => {\n+        if (reminder.method === WorkflowMethods.EMAIL) {\n+          deleteScheduledEmailReminder(reminder.id, reminder.referenceId);\n+        } else if (reminder.method === WorkflowMethods.SMS) {\n...\n-      await Promise.all(deleteReminderPromise);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=083dcd78-f179-4a38-890a-ef521603a5ab&type=bug&issue=Potential%3A+stale+DB+reminders+after+removed+deleteMany&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fworkflows.tsx\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:08Z"
+          }
+        ]
       }
     ]
   },
@@ -56457,6 +57625,37 @@
             "created_at": "2026-04-07T19:31:18Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR14740__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR14740__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts",
+            "line": 48,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n AuthorizationError: admin permission check requires both roles\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts`\n\n\n  ###### Description:\n\n access is denied when a team admin is not also the team owner\nNew code in addGuestsHandler uses `(await isTeamAdmin(...)) && (await isTeamOwner(...))` at packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:46-48, then denies access unless that combined flag or organizer/attendee is true at :54-55. This structurally matches the known invalid permission pattern: a team user must be both admin and owner instead of either one. The route is newly introduced in packages/trpc/server/routers/viewer/bookings/_router.tsx:79-95, so this is not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +  const isTeamAdminOrOwner =\n+    (await isTeamAdmin(user.id, booking.eventType?.teamId ?? 0)) &&\n+    (await isTeamOwner(user.id, booking.eventType?.teamId ?? 0));\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=47bf891d-6bfb-40da-b880-eb2986c367f7&type=bug&issue=AuthorizationError%3A+admin+permission+check+requires+both+roles&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fbookings%2FaddGuests.handler.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:40:16Z"
+          },
+          {
+            "path": "packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts",
+            "line": 78,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n ValidationError: duplicate guest emails not deduplicated\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts`\n\n\n  ###### Description:\n\n duplicate attendee rows can be created when input contains repeated emails\nNew code in addGuestsHandler filters guests only against existing booking attendees and blacklist, not against duplicates within the submitted guests array itself (packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:74-78). The surviving values are then passed directly into createMany (packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:100-103), so repeated emails in one request survive to bulk insert. Router input validation only enforces array-of-email shape, not uniqueness (packages/trpc/server/routers/viewer/bookings/_router.tsx:79-95; packages/trpc/server/routers/viewer/bookings/addGuests.schema.ts:3-6). This matches the allowed TS input_validation pattern.\n\n\n\n<br/>\n\n  ```diff\n  +  const uniqueGuests = guests.filter(\n+    (guest) =>\n+      !booking.attendees.some((attendee) => guest === attendee.email) &&\n+      !blacklistedGuestEmails.includes(guest)\n+  );\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=47bf891d-6bfb-40da-b880-eb2986c367f7&type=bug&issue=ValidationError%3A+duplicate+guest+emails+not+deduplicated&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fbookings%2FaddGuests.handler.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:40:19Z"
+          },
+          {
+            "path": "apps/web/components/dialog/AddGuestsDialog.tsx",
+            "line": 32,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n ValidationError: empty-string sentinel blocks add\n\n\n  ###### Where:\n\n `apps/web/components/dialog/AddGuestsDialog.tsx`\n\n\n  ###### Description:\n\n guest submission breaks when dialog opens or resets with no entered emails\nNew code initializes guest state to [\"\"] at apps/web/components/dialog/AddGuestsDialog.tsx:32 and resets to the same sentinel at :39 and :90. handleAdd only returns early for length===0 at :48-50, so the untouched/reset state reaches z.array(z.string().email()) at :52, fails validation, and sets isInvalidEmail at :55-56. MultiEmail also treats any non-empty array as active inputs and itself appends \"\" entries at packages/ui/form/MultiEmail.tsx:19-25, :64-67, and :83-86. Upstream caller BookingListItem only opens the dialog and does not guard this state at apps/web/components/booking/BookingListItem.tsx:357-360.\n\n\n\n<br/>\n\n  ```diff\n  +  const [multiEmailValue, setMultiEmailValue] = useState<string[]>([\"\"]);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=47bf891d-6bfb-40da-b880-eb2986c367f7&type=bug&issue=ValidationError%3A+empty-string+sentinel+blocks+add&path=apps%2Fweb%2Fcomponents%2Fdialog%2FAddGuestsDialog.tsx\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:40:22Z"
+          },
+          {
+            "path": "packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts",
+            "line": 77,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-SECURITY__ALERT-blue.svg) \n  ![License](https://img.shields.io/badge/Risk_Type-AuthorizationBypass:__blacklist__check__is__case--sensitive-purple.svg) \n  ![License](https://img.shields.io/badge/Risk_Score-4.0-red.svg)\n\n\n\n  ###### Vulnerability Details: \n blocked guests can be added when submitted with mixed-case email casing\nAt packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts:70-72 the env blacklist is normalized with `toLowerCase()`, but the submitted `guest` values are compared unchanged in `!blacklistedGuestEmails.includes(guest)` at :74-77. The only upstream validation in packages/trpc/server/routers/viewer/bookings/_router.tsx:79-95 and addGuests.schema.ts:3-6 enforces authenticated access and email syntax, not canonical casing, so mixed-case input can bypass the blacklist. This handler is entirely new in the PR, so the issue is new code.\n\n\n\n  ###### Where:\n `packages/trpc/server/routers/viewer/bookings/addGuests.handler.ts`\n\n\n\n<br/>\n\n  ```diff\n  +  const uniqueGuests = guests.filter(\n+    (guest) =>\n+      !booking.attendees.some((attendee) => guest === attendee.email) &&\n+      !blacklistedGuestEmails.includes(guest)\n  ```\n\n\n\n<br/>\n\n  ###### Security Justification:\n\n  blocked guests can be added when submitted with mixed-case email casing\n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=47bf891d-6bfb-40da-b880-eb2986c367f7&type=security&vulnerability_name=AuthorizationBypass%3A+blacklist+check+is+case-sensitive&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fbookings%2FaddGuests.handler.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:40:25Z"
+          }
+        ]
       }
     ]
   },
@@ -57824,6 +59023,43 @@
             "created_at": "2026-04-07T19:31:37Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR10600__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR10600__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "apps/web/pages/api/auth/two-factor/totp/disable.ts",
+            "line": 51,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Naming mismatch: login error in disable endpoint\n\n\n  ###### Where:\n\n `apps/web/pages/api/auth/two-factor/totp/disable.ts`\n\n\n  ###### Description:\n\n Operational debugging misleads when missing encryption key occurs during disable flow\nThe new disable-endpoint branch in apps/web/pages/api/auth/two-factor/totp/disable.ts:48-50 is explicitly part of the TOTP disable flow, but the added log text says 'cannot proceed with backup code login.' That wording contradicts the endpoint purpose and is a direct naming/message mismatch on a newly added line. The same message was introduced separately for actual login handling in packages/features/auth/lib/next-auth-options.ts:132-145, confirming the disable path reused login wording rather than describing disable behavior.\n\n\n\n<br/>\n\n  ```diff\n  +      console.error(\"Missing encryption key; cannot proceed with backup code login.\");\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=fce08f6c-1b6a-44ae-953c-1bbbd0d06542&type=bug&issue=Naming+mismatch%3A+login+error+in+disable+endpoint&path=apps%2Fweb%2Fpages%2Fapi%2Fauth%2Ftwo-factor%2Ftotp%2Fdisable.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:06Z"
+          },
+          {
+            "path": "packages/features/auth/lib/next-auth-options.ts",
+            "line": 155,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: backup code reused concurrently\n\n\n  ###### Where:\n\n `packages/features/auth/lib/next-auth-options.ts`\n\n\n  ###### Description:\n\n one-time backup codes break when two login requests validate before either update commits\nNew code in packages/features/auth/lib/next-auth-options.ts:139-155 consumes backup codes via a read-decrypt-JSON.parse/indexOf check followed by in-memory mutation and prisma.user.update. There is no transaction, compare-and-swap predicate, or unique constraint involved; both concurrent authorize() requests can read the same user.backupCodes snapshot, both pass the check at :144-145, and both write back at :149-155. This matches the confirmed TypeScript concurrency pattern for one-time backup codes being reusable under concurrent requests.\n\n\n\n<br/>\n\n  ```diff\n  +        const index = backupCodes.indexOf(credentials.backupCode.replaceAll(\"-\", \"\"));\n+        if (index === -1) throw new Error(ErrorCode.IncorrectBackupCode);\n...\n+            backupCodes: symmetricEncrypt(JSON.stringify(backupCodes), process.env.CALENDSO_ENCRYPTION_KEY),\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=fce08f6c-1b6a-44ae-953c-1bbbd0d06542&type=bug&issue=Race+condition%3A+backup+code+reused+concurrently&path=packages%2Ffeatures%2Fauth%2Flib%2Fnext-auth-options.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:11Z"
+          },
+          {
+            "path": "packages/features/auth/lib/next-auth-options.ts",
+            "line": 145,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential TypeError: backup code case mismatch\n\n\n  ###### Where:\n\n `packages/features/auth/lib/next-auth-options.ts`\n\n\n  ###### Description:\n\n valid backup code login breaks when user enters mixed-case code\nNew code compares stored backup codes against `credentials.backupCode.replaceAll(\"-\", \"\")` without case normalization in `packages/features/auth/lib/next-auth-options.ts:144-145`. Setup generates codes via `crypto.randomBytes(5).toString(\"hex\")` in lowercase hex in `apps/web/pages/api/auth/two-factor/totp/setup.ts:60-62`, so mixed-case user input will not match. This is the same structural pattern as the allowed TypeScript input_validation case for case-sensitive backup code validation. No upstream normalization is visible in the changed authorize path.\n\n\n\n<br/>\n\n  ```diff\n  +        const index = backupCodes.indexOf(credentials.backupCode.replaceAll(\"-\", \"\"));\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=fce08f6c-1b6a-44ae-953c-1bbbd0d06542&type=bug&issue=Potential+TypeError%3A+backup+code+case+mismatch&path=packages%2Ffeatures%2Fauth%2Flib%2Fnext-auth-options.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:17Z"
+          },
+          {
+            "path": "apps/web/components/auth/BackupCode.tsx",
+            "line": 7,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: exported component name mismatches file purpose\n\n\n  ###### Where:\n\n `apps/web/components/auth/BackupCode.tsx`\n\n\n  ###### Description:\n\n imports/debugging break when stack traces and component names show TwoFactor for backup code UI\nThe new file `apps/web/components/auth/BackupCode.tsx` renders backup-code-specific UI (`backup_code`, `backupCode`) at lines 13-25, but the default-exported function is named `TwoFactor` at line 7. This is a direct naming contradiction in the added code. Upstream usage imports the default export as `BackupCode` in `apps/web/components/settings/DisableTwoFactorModal.tsx:8,104` and `apps/web/pages/auth/login.tsx:32,221`, so imports do not fail, but the internal exported component name is still inconsistent with the file/API purpose.\n\n\n\n<br/>\n\n  ```diff\n  +export default function TwoFactor({ center = true }) {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=fce08f6c-1b6a-44ae-953c-1bbbd0d06542&type=bug&issue=TypeError%3A+exported+component+name+mismatches+file+purpose&path=apps%2Fweb%2Fcomponents%2Fauth%2FBackupCode.tsx\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:23Z"
+          },
+          {
+            "path": "apps/web/components/settings/EnableTwoFactorModal.tsx",
+            "line": 95,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: TypeError from undefined backupCodes.map\n\n\n  ###### Where:\n\n `apps/web/components/settings/EnableTwoFactorModal.tsx`\n\n\n  ###### Description:\n\n modal crashes when setup response omits or nulls backupCodes\nNew code in `apps/web/components/settings/EnableTwoFactorModal.tsx:92-99` immediately calls `body.backupCodes.map(...)` with no local guard after setup. One upstream caller check shows `apps/web/components/settings/TwoFactorAuthAPI.ts:2-9` returns a raw `fetch` Response from `/api/auth/two-factor/totp/setup` and does not validate or normalize a `backupCodes` field before this component consumes it. If the parsed success body is missing or null for `backupCodes`, this new `.map` call will throw. Server response contract was not traced, so confidence is capped at Medium.\n\n\n\n<br/>\n\n  ```diff\n  +        const textBlob = new Blob([body.backupCodes.map(formatBackupCode).join(\"\\n\")], {\n+          type: \"text/plain\",\n+        });\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=fce08f6c-1b6a-44ae-953c-1bbbd0d06542&type=bug&issue=Potential%3A+TypeError+from+undefined+backupCodes.map&path=apps%2Fweb%2Fcomponents%2Fsettings%2FEnableTwoFactorModal.tsx\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:29Z"
+          }
+        ]
       }
     ]
   },
@@ -59525,6 +60761,61 @@
             "created_at": "2026-04-07T19:29:36Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR10967__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR10967__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "packages/core/CalendarManager.ts",
+            "line": 236,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential TypeError: extra createEvent arg breaks integrations\n\n\n  ###### Where:\n\n `packages/core/CalendarManager.ts`\n\n\n  ###### Description:\n\n event creation breaks when calendar adapters implement the old two-parameter contract differently\nNew call site in packages/core/CalendarManager.ts:236 now passes createEvent(calEvent, credential.id), and the PR also changes the Calendar contract in packages/types/Calendar.d.ts:221 to require createEvent(event, credentialId). But packages/lib/CalendarService.ts:125-198 still implements createEvent(event) with only one parameter, so the interface/signature mismatch is concrete in changed code and can break callers/adapters following the old contract.\n\n\n\n<br/>\n\n  ```diff\n  +        .createEvent(calEvent, credential.id)\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=562a68bf-0a18-445c-8686-08dbabb68df9&type=bug&issue=Potential+TypeError%3A+extra+createEvent+arg+breaks+integrations&path=packages%2Fcore%2FCalendarManager.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:57:16Z"
+          },
+          {
+            "path": "packages/core/EventManager.ts",
+            "line": 119,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: unguarded first calendar access\n\n\n  ###### Where:\n\n `packages/core/EventManager.ts`\n\n\n  ###### Description:\n\n location fallback crashes when destinationCalendar is null or empty\npackages/core/EventManager.ts:118 destructures the first entry from evt.destinationCalendar ?? [], which yields undefined when destinationCalendar is null or empty, and line 119 immediately dereferences mainHostDestinationCalendar.integration without optional chaining or a guard. The field is explicitly nullable in packages/core/builders/CalendarEvent/class.ts:26 and packages/types/Calendar.d.ts:171, so the new code introduces a real null/empty-input crash path.\n\n\n\n<br/>\n\n  ```diff\n  +    const [mainHostDestinationCalendar] = evt.destinationCalendar ?? [];\n+    if (evt.location === MeetLocationType && mainHostDestinationCalendar.integration !== \"google_calendar\") {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=562a68bf-0a18-445c-8686-08dbabb68df9&type=bug&issue=TypeError%3A+unguarded+first+calendar+access&path=packages%2Fcore%2FEventManager.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:57:19Z"
+          },
+          {
+            "path": "packages/types/Calendar.d.ts",
+            "line": 221,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: TypeError from createEvent arity change\n\n\n  ###### Where:\n\n `packages/types/Calendar.d.ts`\n\n\n  ###### Description:\n\n implementations or callers break when they still use the old single-argument signature\nNew interface change at packages/types/Calendar.d.ts:221 requires Calendar.createEvent(event, credentialId). Upstream callers in packages/core/CalendarManager.ts:235-237 and packages/core/EventManager.ts:370-380 now pass two arguments, but downstream concrete implementation packages/lib/CalendarService.ts:createEvent (lines 125-198) still declares only one parameter `createEvent(event)`, creating a signature mismatch introduced by this PR.\n\n\n\n<br/>\n\n  ```diff\n  +  createEvent(event: CalendarEvent, credentialId: number): Promise<NewCalendarEventType>;\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=562a68bf-0a18-445c-8686-08dbabb68df9&type=bug&issue=Potential%3A+TypeError+from+createEvent+arity+change&path=packages%2Ftypes%2FCalendar.d.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:57:22Z"
+          },
+          {
+            "path": "packages/core/builders/CalendarEvent/class.ts",
+            "line": 26,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: TypeError from destinationCalendar shape change\n\n\n  ###### Where:\n\n `packages/core/builders/CalendarEvent/class.ts`\n\n\n  ###### Description:\n\n event consumers break when they still treat destinationCalendar as a single object\nNew API shape change is introduced at packages/core/builders/CalendarEvent/class.ts:26 (destinationCalendar becomes DestinationCalendar[] | null). A concrete broken consumer remains in the same PR at packages/core/EventManager.ts:118-119, where code destructures the first element from evt.destinationCalendar ?? [] and then dereferences mainHostDestinationCalendar.integration without optional chaining, so null/empty destinationCalendar now throws at runtime. Other consumers such as packages/lib/CalendarService.ts:156-163 and 509-515 were explicitly migrated to array-aware access, confirming this PR changed the contract and that consumer updates were required.\n\n\n\n<br/>\n\n  ```diff\n  +  destinationCalendar?: DestinationCalendar[] | null;\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=562a68bf-0a18-445c-8686-08dbabb68df9&type=bug&issue=Potential%3A+TypeError+from+destinationCalendar+shape+change&path=packages%2Fcore%2Fbuilders%2FCalendarEvent%2Fclass.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:57:25Z"
+          },
+          {
+            "path": "packages/app-store/googlecalendar/lib/CalendarService.ts",
+            "line": 256,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n LogicError: impossible calendar lookup fallback\n\n\n  ###### Where:\n\n `packages/app-store/googlecalendar/lib/CalendarService.ts`\n\n\n  ###### Description:\n\n calendar selection falls back incorrectly when externalCalendarId is absent\nValid logic_error. In GoogleCalendarService.updateEvent, the new fallback is `externalCalendarId ? externalCalendarId : event.destinationCalendar?.find((cal) => cal.externalId === externalCalendarId)?.externalId` (packages/app-store/googlecalendar/lib/CalendarService.ts:253-256). When the fallback branch runs, `externalCalendarId` is absent/falsy, so the predicate compares each `cal.externalId` against that absent value and cannot recover the intended destination calendar. This is new code, and it is structurally inconsistent with the nearby createEvent selection introduced in the same PR, which correctly selects from `destinationCalendar` by `credentialId` (packages/app-store/googlecalendar/lib/CalendarService.ts:145-149). Downstream blast radius: the computed `selectedCalendar` is passed to `calendar.events.update({ calendarId: selectedCalendar, ... })`, affecting which Google Calendar receives the update.\n\n\n\n<br/>\n\n  ```diff\n  +      const selectedCalendar = externalCalendarId\n+        ? externalCalendarId\n+        : event.destinationCalendar?.find((cal) => cal.externalId === externalCalendarId)?.externalId;\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=562a68bf-0a18-445c-8686-08dbabb68df9&type=bug&issue=LogicError%3A+impossible+calendar+lookup+fallback&path=packages%2Fapp-store%2Fgooglecalendar%2Flib%2FCalendarService.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:57:28Z"
+          },
+          {
+            "path": "packages/features/bookings/lib/handleNewBooking.ts",
+            "line": 1879,
+            "body": "Review #6\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: team calendars dropped on booking create\n\n\n  ###### Where:\n\n `packages/features/bookings/lib/handleNewBooking.ts`\n\n\n  ###### Description:\n\n collective member calendars are ignored when multiple destination calendars were gathered\nhandleNewBooking now builds `evt.destinationCalendar` as an array at packages/features/bookings/lib/handleNewBooking.ts:1063-1067 and appends collective team member calendars at :1077-1078, but createBooking persists only `evt.destinationCalendar[0]` at :1874-1879. This is new contradictory logic in the same function, so additional collected calendars are silently dropped on booking creation.\n\n\n\n<br/>\n\n  ```diff\n  +      destinationCalendar:\n+        evt.destinationCalendar && evt.destinationCalendar.length > 0\n+          ? {\n+              connect: { id: evt.destinationCalendar[0].id },\n+            }\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=562a68bf-0a18-445c-8686-08dbabb68df9&type=bug&issue=TypeError%3A+team+calendars+dropped+on+booking+create&path=packages%2Ffeatures%2Fbookings%2Flib%2FhandleNewBooking.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:57:31Z"
+          },
+          {
+            "path": "packages/features/bookings/lib/handleCancelBooking.ts",
+            "line": 461,
+            "body": "Review #7\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-Medium-red.svg)\n\n\n\n  ###### Issue:\n\n Potential TypeError: recurring deletes skip DB-fetched credential\n\n\n  ###### Where:\n\n `packages/features/bookings/lib/handleCancelBooking.ts`\n\n\n  ###### Description:\n\n linked recurring events remain undeleted when credential exists only in DB, not user.credentials\nNew recurring-delete branch in handleCancelBooking iterates only `bookingToDelete.user.credentials` (`handleCancelBooking.ts:449-461`) even after adding a DB fallback that can recover the referenced credential into `calendarCredential` when it is missing from that in-memory list (`handleCancelBooking.ts:431-440`). In the non-recurring branch the recovered `calendarCredential` is actually used (`:470`), but in the recurring branch it is ignored, so bookings whose usable calendar credential exists only via the new DB fetch path can miss external deletions. Confirmed new logic gap; not a TypeError.\n\n\n\n<br/>\n\n  ```diff\n  +            const promises = bookingToDelete.user.credentials\n+              .filter((credential) => credential.type.endsWith(\"_calendar\"))\n+              .map(async (credential) => {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=562a68bf-0a18-445c-8686-08dbabb68df9&type=bug&issue=Potential+TypeError%3A+recurring+deletes+skip+DB-fetched+credential&path=packages%2Ffeatures%2Fbookings%2Flib%2FhandleCancelBooking.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:57:34Z"
+          },
+          {
+            "path": "packages/trpc/server/routers/viewer/bookings/editLocation.handler.ts",
+            "line": 88,
+            "body": "Review #8\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Potential: redundant optional chaining obscures non-null branch\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/bookings/editLocation.handler.ts`\n\n\n  ###### Description:\n\n Intent is harder to verify when truthy-checked values are still accessed as optional\nIn `packages/trpc/server/routers/viewer/bookings/editLocation.handler.ts:85-88`, the true branches already prove the values are present (`booking?.destinationCalendar ? [booking?.destinationCalendar]` and `booking?.user?.destinationCalendar ? [booking?.user?.destinationCalendar]`). The extra `?.` inside those branches is therefore redundant. This matches the TS code_clarity allowlist pattern for redundant optional chaining on a value already checked in the ternary condition.\n\n\n\n<br/>\n\n  ```diff\n  +      destinationCalendar: booking?.destinationCalendar\n+        ? [booking?.destinationCalendar]\n+        : booking?.user?.destinationCalendar\n+        ? [booking?.user?.destinationCalendar]\n+        : [],\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=562a68bf-0a18-445c-8686-08dbabb68df9&type=bug&issue=Potential%3A+redundant+optional+chaining+obscures+non-null+branch&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fbookings%2FeditLocation.handler.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:57:37Z"
+          }
+        ]
       }
     ]
   },
@@ -60751,7 +62042,44 @@
             "created_at": "2026-04-07T19:30:55Z"
           }
         ]
+      },
+      {
+        "tool": "cloudaeye",
+        "repo_name": "cal_dot_com__cal.com__cloudaeye__PR8087__20260310",
+        "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR8087__20260310/pull/1",
+        "review_comments": [
+          {
+            "path": "packages/features/bookings/lib/handleCancelBooking.ts",
+            "line": 461,
+            "body": "Review #1\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n TypeError: async forEach errors escape handler\n\n\n  ###### Where:\n\n `packages/features/bookings/lib/handleCancelBooking.ts`\n\n\n  ###### Description:\n\n calendar update failures go unhandled when async callbacks reject after handler continues\nNew code in handleCancelBooking.ts uses `.forEach(async (credential) => { ... await getCalendar(credential) ... })`, which matches the TypeScript allowlisted error_handling bug: async work inside forEach is not awaited, so rejections from calendar update/delete calls escape the surrounding handler flow. Evidence is directly visible in packages/features/bookings/lib/handleCancelBooking.ts:458-469, and the same PR fixes a sibling instance at 474-482 by converting forEach to `for...of` with `await`, confirming this pattern is recognized here.\n\n\n\n<br/>\n\n  ```diff\n  +            .forEach(async (credential) => {\n+              const calendar = await getCalendar(credential);\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e5443a54-54a5-4daa-ab78-cab135ecfe7a&type=bug&issue=TypeError%3A+async+forEach+errors+escape+handler&path=packages%2Ffeatures%2Fbookings%2Flib%2FhandleCancelBooking.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:14Z"
+          },
+          {
+            "path": "packages/trpc/server/routers/viewer/bookings.tsx",
+            "line": 556,
+            "body": "Review #3\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Race condition: deletions continue after handler returns\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/bookings.tsx`\n\n\n  ###### Description:\n\n external calendar cleanup may be incomplete when response proceeds immediately\nNew code in packages/trpc/server/routers/viewer/bookings.tsx:553 changes a synchronous forEach callback to `async`, and line 556 adds `await getCalendar(...)` inside that callback. `Array.forEach` does not await returned promises, so the calendar deletion work becomes fire-and-forget. This exactly matches the TypeScript allowlisted error_handling pattern for async callbacks inside forEach: surrounding control flow cannot await completion or catch rejections from `calendar?.deleteEvent(...)`. The issue is introduced by the `+` lines, not pre-existing.\n\n\n\n<br/>\n\n  ```diff\n  +        bookingRefsFiltered.forEach(async (bookingRef) => {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e5443a54-54a5-4daa-ab78-cab135ecfe7a&type=bug&issue=Race+condition%3A+deletions+continue+after+handler+returns&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fbookings.tsx\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:32Z"
+          },
+          {
+            "path": "packages/app-store/vital/lib/reschedule.ts",
+            "line": 131,
+            "body": "Review #4\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n UnhandledPromiseRejection: async forEach escapes try/catch\n\n\n  ###### Where:\n\n `packages/app-store/vital/lib/reschedule.ts`\n\n\n  ###### Description:\n\n deletion failures go uncaught when async callbacks reject after forEach returns\nNew code changed the loop to `bookingRefsFiltered.forEach(async ...)` in `Reschedule`, so the callback now returns promises that `forEach` does not await. The surrounding `try/catch` at packages/app-store/vital/lib/reschedule.ts:124-136 cannot catch rejections from `await getCalendar(...)` or later async deletions inside that callback. The function then continues to `sendRequestRescheduleEmail(...)` and returns `true` at lines 139-145 without awaiting per-reference cleanup. This matches the TypeScript allowlisted async-forEach error-handling pattern. UPSTREAM: affected outward surface is the file's default-exported `Reschedule`. DOWNSTREAM: the changed callback invokes `getCalendar`, `calendar?.deleteEvent`, and `deleteMeeting`, whose rejected promises escape this `try/catch`.\n\n\n\n<br/>\n\n  ```diff\n  +      bookingRefsFiltered.forEach(async (bookingRef) => {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e5443a54-54a5-4daa-ab78-cab135ecfe7a&type=bug&issue=UnhandledPromiseRejection%3A+async+forEach+escapes+try%2Fcatch&path=packages%2Fapp-store%2Fvital%2Flib%2Freschedule.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:35Z"
+          },
+          {
+            "path": "packages/app-store/wipemycalother/lib/reschedule.ts",
+            "line": 128,
+            "body": "Review #5\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n UnhandledPromiseRejection: async forEach escapes try/catch\n\n\n  ###### Where:\n\n `packages/app-store/wipemycalother/lib/reschedule.ts`\n\n\n  ###### Description:\n\n deletion failures go uncaught when async callbacks reject inside forEach\nNew code in packages/app-store/wipemycalother/lib/reschedule.ts:125 changes the loop to `bookingRefsFiltered.forEach(async (bookingRef) => {`, and line 128 adds `await getCalendar(...)` inside that callback. In JS/TS, `forEach` does not await async callbacks, so promise rejections from the awaited work and subsequent `calendar?.deleteEvent(...)` / `deleteMeeting(...)` calls escape the surrounding try/catch in this function. This exactly matches the allowed TypeScript error_handling pattern for async forEach fire-and-forget.\n\n\n\n<br/>\n\n  ```diff\n  +      bookingRefsFiltered.forEach(async (bookingRef) => {\n+            const calendar = await getCalendar(credentialsMap.get(bookingRef.type));\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e5443a54-54a5-4daa-ab78-cab135ecfe7a&type=bug&issue=UnhandledPromiseRejection%3A+async+forEach+escapes+try%2Fcatch&path=packages%2Fapp-store%2Fwipemycalother%2Flib%2Freschedule.ts\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:38Z"
+          },
+          {
+            "path": null,
+            "line": null,
+            "body": "Review #2\n\n\n  ![License](https://img.shields.io/badge/Category-Bug_Alert-blue.svg)\n  ![License](https://img.shields.io/badge/Risk-High-red.svg)\n\n\n\n  ###### Issue:\n\n Promise error: async forEach cleanup unawaited\n\n\n  ###### Where:\n\n `packages/trpc/server/routers/viewer/bookings.tsx`\n\n\n  ###### Description:\n\n calendar deletions escape error handling when delete/getCalendar rejects\nNew code in packages/trpc/server/routers/viewer/bookings.tsx:553 uses `bookingRefsFiltered.forEach(async (bookingRef) => { ... })`. Inside that unawaited async callback, it awaits `getCalendar(...)` at :556 and returns `calendar?.deleteEvent(...)` / `deleteMeeting(...)` at :558-562. This matches the TypeScript error_handling allowlist: async work inside `forEach` is fire-and-forget, so rejections escape normal control flow while execution proceeds to :567 (`sendRequestRescheduleEmail`).\n\n\n\n<br/>\n\n  ```diff\n  +        bookingRefsFiltered.forEach(async (bookingRef) => {\n  ```\n\n\n<br/>\n\n  \n  \nFeedback: <a href=\"https://console.cloudaeye.com/code-review?review=e5443a54-54a5-4daa-ab78-cab135ecfe7a&type=bug&issue=Promise+error%3A+async+forEach+cleanup+unawaited&path=packages%2Ftrpc%2Fserver%2Frouters%2Fviewer%2Fbookings.tsx\">\ud83d\udc4e</a>",
+            "created_at": "2026-04-21T03:41:26Z"
+          }
+        ]
       }
     ]
   }
-}
\ No newline at end of file
+}
diff --git a/offline/results/openai_gpt-5.2/candidates.json b/offline/results/openai_gpt-5.2/candidates.json
index 69a6e0d..090663a 100644
--- a/offline/results/openai_gpt-5.2/candidates.json
+++ b/offline/results/openai_gpt-5.2/candidates.json
@@ -1301,6 +1301,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Verification crashes with a RuntimeException when the derived English companion _en.properties file is missing because verifySafeHtml() wraps any IOException (including FileNotFound) and aborts verification instead of falling back",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Method name typo: private method is named santizeAnchors instead of sanitizeAnchors, making sanitize-related code harder to find and maintain",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/37634": {
@@ -1935,6 +1949,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "AccessTokenContext constructor performs a null check on grantType twice and fails to validate rawTokenId (uses Objects.requireNonNull(grantType, \"Null rawTokenId not allowed\") instead of checking rawTokenId), allowing rawTokenId to be null and risking NullPointerException later",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "OAuth2GrantTypeFactory.getShortcut Javadoc claims shortcuts are usually 3 letters, but existing implementations use 2-letter shortcuts (e.g., 'ac', 'cc', 'pg'), creating a documentation mismatch",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "DefaultTokenContextEncoderProviderTest.testIncorrectGrantType catches generic RuntimeException instead of the expected IllegalArgumentException, making the test pass for unrelated runtime failures",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/38446": {
@@ -2397,6 +2431,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "RecoveryAuthnCodeInputLoginBean constructor calls Optional.get() on the result of RecoveryAuthnCodesUtils.getCredential(user) without checking presence, risking NoSuchElementException when the user has no recovery code credential and crashing login form rendering",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "BackwardsCompatibilityUserStorage.getCredentials reconstructs a RecoveryAuthnCodesCredentialModel via createFromValues without preserving the previously stored/generated credential id from updateCredential, causing a missing/different (potentially null) id that can break credential removal",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/36882": {
@@ -2642,6 +2690,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "UpdateCompatibilityCheck.run calls picocli.exit(CompatibilityResult.FEATURE_DISABLED) when rolling-updates is disabled, which triggers System.exit and terminates the entire JVM (breaking embedding)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/36880": {
@@ -3171,6 +3227,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Permission cleanup listener in AdminPermissions is incorrectly gated by the V1 feature flag (ADMIN_FINE_GRAINED_AUTHZ), so when V2 is enabled without V1 the role/client/group removal cleanup path does not run",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ClientPermissionsV2.hasPermission looks up per-client resources with resourceStore.findByName(..., client.getId(), server.getId()) using server.getId() as owner, causing the lookup to miss when the resource owner differs and forcing fallback to the type-level \"Clients\" resource, ignoring client-specific grants",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ClientPermissionsV2 defines a private getEvaluationContext(...) helper that is never called because permission evaluation uses root.evaluatePermission(...) directly, leaving dead/unused code and making its custom evaluation attributes ineffective",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "PermissionClientTest.testManageOnlyOneClient assumes at least two default client scopes by indexing get(1) after only asserting the list is non-empty, risking IndexOutOfBoundsException when fewer than two scopes exist",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/37038": {
@@ -3735,6 +3817,50 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "GroupPermissionsV2.hasPermission() looks up a resource by name using resourceStore.findByName(server, groupId) but callers pass a resource ID (e.g., getGroupIdsWithViewPermission passes groupResource.getId()), causing an id-vs-name mismatch and breaking group-specific permission checks when resource name differs from internal id",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GroupPermissions.canManage() was narrowed to only root.hasOneAdminRole(AdminRoles.MANAGE_USERS), dropping prior default/permission-based user-management semantics and breaking group management when defaults grant access without a direct MANAGE_USERS role",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GroupPermissionsV2 repeats the same narrowing for canManage (MANAGE_USERS role-only), propagating the regression into the V2 permissions path",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "RolePermissions role-mapping logic now gates mapping on root.hasOneAdminRole(AdminRoles.MANAGE_USERS) before checkAdminRoles(role), potentially breaking mapping when canManageDefault(RoleModel) grants access without a direct MANAGE_USERS role",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AdminPermissions uses ADMIN_FINE_GRAINED_AUTHZ_V2 to enable V2 management/evaluator paths but guards listener/cleanup registration with ADMIN_FINE_GRAINED_AUTHZ, so cleanup never runs when V2 is enabled without V1",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GroupResourceTypeEvaluationTest asserts subgroup creation returns 204 No Content, but GroupResource.addChild() returns 201 Created for new subgroup creation, making the test assertion incorrect",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GroupPermissionsV2.getGroupIdsWithViewPermission() uses hasPermission(groupResource.getId(), ...) and adds groupResource.getId() to results, but hasPermission treats the argument as a resource name (findByName), causing group filtering to miss VIEW_MEMBERS/MANAGE_MEMBERS grants",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/33832": {
@@ -4269,6 +4395,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "ASN1Decoder.readLength returns -1 for indefinite-length encoding, but callers pass this negative length to read(int), causing NegativeArraySizeException instead of an IOException",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ASN1Decoder.readLength validates decoded length against total input limit (and rejects length >= limit) rather than remaining bytes after consuming tag/length, incorrectly rejecting payloads that exactly consume the remaining bytes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AuthzClientCryptoProvider.concatenatedRSToASN1DER contains dead code: it creates ASN1Encoder instances and writes r/s to them but immediately discards them, so those writes have no effect",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/40940": {
@@ -4740,6 +4886,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Background reader thread is started but never joined/waited for before asserting, creating a race condition and flaky test behavior",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Assertion that caughtExceptions is empty can miss exceptions added by the reader thread after deletedAll is set because the thread may still be running",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1": {
@@ -5228,6 +5388,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "UsernamePasswordForm.authenticate/challenge skips webauthnAuth.fillContextForm on the initial login page because isConditionalPasskeysEnabled(context.getUser()) requires a non-null user, disabling initial passkey setup when no user is selected",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "UsernamePasswordForm.challenge error/retry path skips webauthnAuth.fillContextForm when login fails with an unknown user (context.getUser() is null), causing passkey retry/challenge UI options to disappear on subsequent attempts",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/93824": {
@@ -5687,6 +5861,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "SpanFlusher._ensure_processes_alive kills an unhealthy worker and immediately starts a replacement without waiting/joining for the old process to exit, allowing overlapping shard handlers and duplicate flushing",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SpanFlusher.join breaks out of the process-join loop when the deadline is exceeded, skipping termination/cleanup for remaining processes and leaving child workers running",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Inconsistent metric tag keys for the same shard dimension (using both \"shard\" and \"shards\") fragments metrics/dashboards",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Test monkeypatches time.sleep to a no-op but later relies on time.sleep(0.1) to wait for flusher threads, so the sleep does not actually wait and the test becomes timing-racy/unreliable",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5": {
@@ -6281,6 +6481,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Mutual-exclusion validation between \"age\" and \"timestamp\" is bypassed when either value is 0 because the checks use truthiness (self.initial_data.get(...)) instead of presence, allowing invalid mixed-spec reports to pass",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "fetch_error_details incorrectly pairs error_ids with nodestore get_multi results by zipping error_ids with events.values(), but get_multi returns a dict with no positional ordering guarantee, causing error details to be associated with the wrong IDs when entries are omitted or reordered",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "BaseDetectorTypeValidator.update reads validated_data[\"detector_type\"] even though the serializer validates the field as \"type\", so updates using the provided type are ignored and instance.group_type is used instead",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Analytics event preprod_artifact.api.assemble is recorded before the feature gate check, so requests denied by features.has(...) still emit telemetry and count as assemble events",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1": {
@@ -7035,6 +7261,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "OrganizationAuditLogsEndpoint.get dereferences organization_context.member.has_global_access without a None check, causing AttributeError when auth context has no member (e.g., API key/org auth token)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "OrganizationAuditLogsEndpoint.get uses OptimizedCursorPaginator with order_by='-datetime', but OptimizedCursorPaginator.get_item_key applies math.floor/ceil and int() to the key value, which will TypeError for datetime keys during cursor generation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "BasePaginator.get_result allows negative start_offset for previous-page cursors (start_offset = offset when cursor.is_prev), leading to Django QuerySet negative slicing and AssertionError at runtime",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "OptimizedCursorPaginator.get_result explicitly allows negative cursor.offset when enable_advanced_features is true and slices queryset[start_offset:stop], which can trigger Django ORM AssertionError due to unsupported negative QuerySet slicing",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "OptimizedCursorPaginator.get_item_key assumes ordered field values are numeric by applying math.floor/ceil, so paginating on non-numeric ordered fields (e.g., datetime or string) will crash during cursor key computation",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/97529": {
@@ -7497,6 +7755,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Calling s.search.TotalDocs() during initialization can trigger a concurrent map read/write panic because bleveBackend.TotalDocs iterates b.cache without locking while BuildIndex writes to b.cache under b.cacheMu",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "server.History and server.Origin dereference s.search without a nil guard, causing a nil pointer panic when search is not configured (search is optional and only initialized when non-nil)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "bleveBackend.BuildIndex no longer holds the cache lock for the full function, allowing concurrent callers to build the same index key simultaneously before either caches it, leading to duplicate index builds and cache coherence issues",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "bleveBackend.TotalDocs iterates b.cache without locking, which is unsafe under concurrent access and increases correctness risk alongside the narrowed BuildIndex lock scope",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/80168": {
@@ -7910,6 +8194,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but is empty (pass) and does not implement required abstract methods (get_dedupe_value, get_group_key_values, build_occurrence_and_event_data), making it non-instantiable and causing a runtime TypeError when instantiated via Detector.detector_handler",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "StatefulDetectorHandler adds a new abstract method build_occurrence_and_event_data, but existing subclasses (e.g., MetricAlertDetectorHandler) were not updated to implement it, causing abstract-class instantiation failures",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "A function in src/sentry/workflow_engine/processors/detector.py is annotated to return dict[DetectorGroupKey, DetectorEvaluationResult] but its docstring claims it returns a list of DetectorEvaluationResult, misleading callers about the return shape",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/80528": {
@@ -8206,6 +8510,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "In get_monitor_environment_context, the code copies and humanizes config['schedule_type'] but returns monitor_environment.monitor.config instead of the mutated local config, so the transformation is dropped and the displayed context remains unmodified",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "create_incident_occurrence propagates the untransformed schedule_type outward by inserting get_monitor_environment_context(monitor_env) into event_data['contexts']['monitor'] before producing the occurrence to Kafka",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/77754": {
@@ -8507,6 +8825,44 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Dataclass field default `queued: datetime = timezone.now()` is evaluated at import/class-definition time, causing omitted `queued` values to reuse a frozen timestamp",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "`AssignmentSource.from_integration()` constructs instances without passing `queued`, so new instances incorrectly inherit the frozen default timestamp",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "`AssignmentSource.to_dict()` returns `asdict(self)` including a raw `datetime` (`queued`), which may not be JSON-serializable",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Celery task enqueue passes `assignment_source.to_dict()` directly in `apply_async` kwargs, risking a TypeError/JSON serialization failure due to non-serializable `datetime` in `queued`",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Test name typo: `test_from_dict_inalid_data` is misspelled (`inalid` vs `invalid`), making failures misleading in test output",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Test naming mismatch: `test_from_dict_empty_array` name implies an array but the test passes an empty dict `{}`, making intent unclear",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/95633": {
@@ -9004,6 +9360,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "SimpleQueueProcessingStrategy.close stops the commit loop (sets shutdown_event and joins commit thread) before queue_pool.shutdown drains/joins workers, so offsets completed during shutdown may never be committed",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TestFixedQueuePool.test_concurrent_processing_across_groups claims to verify concurrent processing but only asserts all items completed and that group IDs appeared, so it can pass even with fully serial processing",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "TestSimpleQueueProcessingStrategy.test_concurrent_processing_different_groups claims to verify concurrency but only asserts that four results were processed, with no concurrency-specific assertion",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Background-thread tests in test_results_consumer.py use fixed sleep/poll timing windows (e.g., loops with time.sleep(0.1) and max_wait), making them flaky when async processing/commits take longer than expected",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "test_thread_queue_parallel_error_handling docstring claims errors shouldn't block commits for other messages, but the test assertions expect no commits (or none for the partition), so the test intent/documentation is misleading",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2": {
@@ -9747,6 +10135,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Advanced pagination branch allows negative Django QuerySet slicing (queryset[start_offset:stop] with start_offset < 0), which can crash with AssertionError",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "OptimizedCursorPaginator.get_item_key applies math.floor/ceil and int() to non-numeric keys (e.g., datetime), causing TypeError at runtime when ordering by a datetime field",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Previous-page pagination preserves a negative cursor offset (start_offset = offset when cursor.is_prev), leading to negative QuerySet slicing and potential AssertionError",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3": {
@@ -10354,6 +10762,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Using Python's process-dependent hash(tuple(...)) in cache keys causes different workers to compute different keys, breaking cross-process cache reuse and invalidate_upsampling_cache invalidation in error_upsampling.py",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "_set_sample_rate_from_error_sampling uses a falsy guard `if client_sample_rate:` so valid zero values (0/0.0) are skipped and not propagated to normalized_data['sample_rate'] in factories.py",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/103633": {
@@ -10829,6 +11251,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "In pkg/services/authz/rbac/service_test.go, the cache deny test setup contradicts its comment: it says the cache entry should allow dashboard access, but the fixture stores map[string]bool{\"dashboards:uid:dash1\": false}, creating a misleading/self-contradictory test",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In pkg/services/authz/rbac/service.go, authorization can be bypassed because cached permission grants are trusted without freshness/revalidation: the code returns Allowed based on cached permissions, and getCachedIdentityPermissions only checks cache presence, so revoked access may persist until TTL expiry",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/getsentry/sentry/pull/67876": {
@@ -11310,6 +11746,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "GitHubInstallation.dispatch dereferences integration.metadata[\"sender\"][\"login\"] without guarding for missing keys, which can raise KeyError when integration metadata lacks sender/login",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "OAuthLoginView.dispatch uses pipeline.signature as the OAuth state parameter, making state predictable/reusable and weakening CSRF/replay protection because pipeline.signature is deterministic rather than per-request randomized",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/keycloak/keycloak/pull/32918": {
@@ -11634,6 +12084,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Test cleanup registers removal for identity provider alias \"alias\" instead of the actual created aliases (e.g., \"idp-alias-<i>\" and \"idp-alias-20\"), so teardown targets a non-existent IDP",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Because cleanup uses the wrong alias, identity providers created in OrganizationCacheTest.testCacheIDPForLogin are not removed, leaking created providers between test runs",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/94942": {
@@ -12151,6 +12615,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "enableSqlExpressions in pkg/expr/reader.go always returns false on every path, unconditionally disabling SQL expressions",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "QueryTypeSQL parsing/execution is rejected because the new gate in pkg/expr/reader.go blocks QueryTypeSQL whenever enableSqlExpressions returns false",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "SQLCommand.Execute in pkg/expr/sql_command.go calls db.QueryFramesInto on sql.NewInMemoryDB, but QueryFramesInto is unimplemented and returns \"not implemented\", causing deterministic runtime failure",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/90939": {
@@ -12454,6 +12938,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "GetWebAssets uses incomplete double-checked locking: after the RLock fast-path sees an empty cache, it acquires the write lock but does not re-check entryPointAssetsCache, allowing concurrent callers to redundantly rebuild/fetch assets",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GetWebAssets can overwrite a previously valid entryPointAssetsCache with nil: it assigns entryPointAssetsCache = result even when asset loading fails and result is nil, causing cached assets to be lost after transient load errors",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/80329": {
@@ -13026,7 +13524,21 @@
         "source": "extracted"
       },
       {
-        "text": "Logging full ID slices (\"ids\", ids) can create extremely large log entries at scale, potentially overwhelming log pipelines; should be removed or limited to trace level",
+        "text": "Logging full ID slices (\"ids\", ids) can create extremely large log entries at scale, potentially overwhelming log pipelines; should be removed or limited to trace level",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
+    ],
+    "cloudaeye": [
+      {
+        "text": "CleanAnnotations logs routine batch progress using r.log.Error after fetchIDs succeeds, causing normal control-flow messages to be treated as errors and flood error logs (pre-delete progress path)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "CleanAnnotations logs routine batch progress using r.log.Error after fetchIDs succeeds, causing normal control-flow messages to be treated as errors and flood error logs (post-delete progress path)",
         "path": null,
         "line": null,
         "source": "extracted"
@@ -13875,6 +14387,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "On Storage.Update error path, code records duration with recordLegacyDuration instead of recordStorageDuration, breaking storage failure metrics",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In Delete, enriched logger with name/kind/method is created but plain d.Log is stored in context (klog.NewContext(ctx, d.Log)), dropping structured log fields",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "On Delete success, recordStorageDuration is called with name as the kind/label argument instead of options.Kind, misattributing metrics",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In DeleteCollection async legacy goroutine, legacy DeleteCollection result is recorded with recordStorageDuration instead of recordLegacyDuration, breaking legacy latency/error metrics",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/106778": {
@@ -14539,6 +15077,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "useIsGrafanaPromRuleEditable references an undefined identifier `ctx`, causing a ReferenceError crash during editability checks",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In FilterView.tsx, the Grafana branch of rules.map renders <GrafanaRuleListItem> without a React `key` prop, risking stale row reuse/state mismatch when list order changes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/107534": {
@@ -15490,6 +16042,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Race condition/TOCTOU in CreateOrUpdateDevice: CountDevices check is done separately from insert/update without a transaction or lock, allowing concurrent requests to exceed the device limit",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Inconsistent time-window basis: updateDevice builds its BETWEEN window from device.UpdatedAt while the device-limit count uses time.Now().UTC(), so a device can be counted as active but fail the update WHERE clause if UpdatedAt lags",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Anonymous authentication can fail because ErrDeviceLimitReached is introduced/returned from device tagging and is treated as a hard failure during Authenticate instead of a best-effort tagging error",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Misleading error mapping: updateDevice returns ErrDeviceLimitReached when RowsAffected()==0, but zero rows can also mean the device row is missing or outside the allowed update window, not that the limit was reached",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9": {
@@ -15895,6 +16473,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "config/initializers/i18n.rb unconditionally calls SiteSetting.default_locale.to_sym, which can raise NoMethodError when SiteSetting.default_locale is nil and break boot/translation lookup",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "lib/freedom_patches/translate_accelerator.rb uses unsynchronized lazy initialization and check-then-act logic for @loaded_locales, allowing concurrent threads to double-load locales or corrupt shared state",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "lib/freedom_patches/translate_accelerator.rb caches loaded locales without normalizing String vs Symbol, so the same locale may be treated as different keys and loaded twice when callers mix forms",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/grafana/grafana/pull/76186": {
@@ -16154,6 +16752,56 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "TestLogger.FromContext returns a fresh NewTestLogger() instead of preserving the receiver, causing captured logs/state to be lost and breaking tests that assert on logged output",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In pkg/plugins/log/logger.go, when FromContext(ctx) does not return *log.ConcreteLogger, the code falls back to d.New(), dropping the contextual logger returned by FromContext and losing context-derived fields/attributes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware.QueryData dereferences req.PluginContext without checking if req is nil, causing a nil-pointer panic on nil QueryDataRequest",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware.CallResource dereferences req.PluginContext without checking if req is nil, causing a nil-pointer panic on nil CallResourceRequest",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware.CheckHealth dereferences req.PluginContext without checking if req is nil, causing a nil-pointer panic on nil CheckHealthRequest",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "ContextualLoggerMiddleware.CollectMetrics dereferences req.PluginContext without checking if req is nil, causing a nil-pointer panic on nil CollectMetricsRequest",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "LoggerMiddleware no longer appends traceID from context to log parameters, potentially regressing request correlation in plugin logs if the contextual logger does not automatically emit trace IDs",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "pluginsintegration.go wires in ContextualLoggerMiddleware, which can introduce panics for nil requests because the middleware dereferences req.PluginContext without nil guards",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10": {
@@ -17274,6 +17922,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "before_validation callback in app/models/embeddable_host.rb calls self.host.sub! unconditionally, causing NoMethodError when host is nil during validation/save",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Admin::EmbeddableHostsController#update uses EmbeddableHost.where(id: params[:id]).first without nil handling; save_host(host) can dereference nil when no record matches params[:id]",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Admin::EmbeddableHostsController#destroy uses EmbeddableHost.where(id: params[:id]).first without nil handling; host.destroy can raise when no record matches params[:id]",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Ember component embeddable-host.js.es6 calls host.destroyRecord().then(...) without a rejection handler, risking unhandled promise rejection / silent failure when the destroy request fails",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7": {
@@ -17886,6 +18560,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "In app/assets/stylesheets/mobile/topic-post.scss, the .topic-list-item h3 heading color logic changes light-theme lightness from 20% to 50%, causing unexpected contrast changes in the light theme",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8": {
@@ -18735,6 +19417,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "add_members assumes params[:usernames] is a String and calls split(\",\") unconditionally, causing NoMethodError when usernames is an Array",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Pagination totalPages calculation uses Math.floor(user_count / limit) + 1, overcounting when user_count is an exact multiple of limit and allowing navigation to an empty extra page",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "admin_group_route setupController calls model.findMembers() without returning/awaiting or handling errors, so AJAX failures become unhandled Promise rejections and fetch errors are hidden",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "group model triggers findMembers() after add/remove operations without sequencing/cancellation, allowing overlapping reloads to resolve out of order and overwrite newer member state with stale data",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3": {
@@ -19223,6 +19931,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Client-side rejectedEmails cache causes valid emails to remain blocked after a transient server-side rejection because rejectedEmails is never cleared on email edit/retry/success in create_account_controller.js",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Non-atomic match_count increment in blocked_email.rb (read/modify/write with save) can lose increments under concurrent requests due to missing locking or atomic counter update",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5": {
@@ -19626,6 +20348,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "In app/assets/stylesheets/common/foundation/mixins.scss, the align-items mixin emits `-ms-align-items`, which is not a valid legacy IE/Edge flexbox property (should rely on `-ms-flex-align` instead), causing alignment to break in IE/legacy Edge.",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In app/assets/stylesheets/common/foundation/mixins.scss, the order mixin incorrectly maps modern `order: $val` directly to `-webkit-box-ordinal-group` and `-moz-box-ordinal-group`, which have different semantics in the 2009 flexbox spec, causing item ordering to break in old WebKit/Firefox.",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6": {
@@ -20114,6 +20850,14 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "In app/serializers/user_serializer.rb, the include hook for website_name is defined as include_website_name instead of the expected include_website_name? predicate, so the guard may be ignored and website_name may be serialized unconditionally (logic error / potential NoMethodError depending on framework behavior).",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4": {
@@ -21250,69 +21994,137 @@
         "source": "extracted"
       },
       {
-        "text": "Kernel#open supports pipe syntax (`|command`), creating potential command injection if untrusted URLs reach `import_remote` (e.g., via `disqus.thor` XML-parsed URLs)",
+        "text": "Kernel#open supports pipe syntax (`|command`), creating potential command injection if untrusted URLs reach `import_remote` (e.g., via `disqus.thor` XML-parsed URLs)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "RSS items may have `i.content` as nil; calling `.scrub` on nil raises `NoMethodError`",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "A single bad RSS item can crash the entire scheduled polling job because there is no per-item rescue/handling in the loop",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Origin validation uses `discourseUrl.indexOf(e.origin)`, allowing prefix/substring bypass so a different origin with a matching prefix can pass and send postMessage events",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "`request.referer` is interpolated into a JavaScript string for postMessage targetOrigin; Rails HTML-escaping turns `&` into `&amp;`, corrupting the JS value and breaking functionality",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Interpolating `request.referer` into JS without explicit JS-escaping is a potential XSS vector if escaping is disabled or copied into a raw context",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
+    ],
+    "greptile-v4-1": [
+      {
+        "text": "XSS vulnerability: `request.referer` is interpolated unescaped into a JavaScript string literal in `app/views/layouts/embed.html.erb`, allowing crafted Referer values (e.g., quotes or `</script>`) to inject arbitrary JS",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Invalid Ruby/ERB syntax: `<%- end if %>` in `app/views/embed/best.html.erb` will raise a `SyntaxError` and prevent the template from rendering",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Insecure `postMessage` origin validation in `app/assets/javascripts/embed.js`: using substring matching (`discourseUrl.indexOf(e.origin) === -1`) allows spoofing by origins that are prefixes of the expected URL; should compare exact origins",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Potential crash in feed import job `app/jobs/scheduled/poll_feed.rb`: `i.content` can be `nil` for some RSS/Atom items, so calling `.scrub` on it raises `NoMethodError`; should guard/fallback to summary/empty string",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Bug in `absolutize_urls` port handling: port exclusion logic ignores scheme (`uri.port != 80 && uri.port != 443`), causing non-default ports (e.g., HTTP on 443) to be dropped; should make the check scheme-aware",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
+    ],
+    "cloudaeye": [
+      {
+        "text": "poll_feed crashes with NoMethodError when an RSS item has nil content because it calls i.content.scrub without a nil guard",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "RSS items may have `i.content` as nil; calling `.scrub` on nil raises `NoMethodError`",
+        "text": "poll_feed does not handle exceptions from network fetch or RSS parsing (SimpleRSS.parse open(...)), so unreachable or malformed feeds can crash the scheduled job",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "A single bad RSS item can crash the entire scheduled polling job because there is no per-item rescue/handling in the loop",
+        "text": "TopicEmbed.import can crash when embed.post is nil (stale/missing associated post) because it passes nil into PostRevisor without checking",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "Origin validation uses `discourseUrl.indexOf(e.origin)`, allowing prefix/substring bypass so a different origin with a matching prefix can pass and send postMessage events",
+        "text": "TopicRetriever crashes if SiteSetting.embed_by_username is nil because it calls downcase on a nil setting",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "`request.referer` is interpolated into a JavaScript string for postMessage targetOrigin; Rails HTML-escaping turns `&` into `&amp;`, corrupting the JS value and breaking functionality",
+        "text": "embed.js can crash when #discourse-comments is missing because it calls appendChild on a null element",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "Interpolating `request.referer` into JS without explicit JS-escaping is a potential XSS vector if escaping is disabled or copied into a raw context",
+        "text": "embed.js origin validation is insecure because it uses a substring check (discourseUrl.indexOf(e.origin)) instead of an exact origin comparison, allowing bypass with malicious origins",
         "path": null,
         "line": null,
         "source": "extracted"
-      }
-    ],
-    "greptile-v4-1": [
+      },
       {
-        "text": "XSS vulnerability: `request.referer` is interpolated unescaped into a JavaScript string literal in `app/views/layouts/embed.html.erb`, allowing crafted Referer values (e.g., quotes or `</script>`) to inject arbitrary JS",
+        "text": "app/views/embed/best.html.erb contains invalid ERB/Ruby syntax (<%- end if %>) causing template parse/render failure",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "Invalid Ruby/ERB syntax: `<%- end if %>` in `app/views/embed/best.html.erb` will raise a `SyntaxError` and prevent the template from rendering",
+        "text": "spec/controllers/embed_controller_spec.rb test name claims it raises an error but the assertion only checks response not success, creating a name/body mismatch and potentially misattributing failures",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "Insecure `postMessage` origin validation in `app/assets/javascripts/embed.js`: using substring matching (`discourseUrl.indexOf(e.origin) === -1`) allows spoofing by origins that are prefixes of the expected URL; should compare exact origins",
+        "text": "SSRF risk: poll_feed fetches SiteSetting.feed_polling_url via open-uri without scheme/host allowlisting or destination validation",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "Potential crash in feed import job `app/jobs/scheduled/poll_feed.rb`: `i.content` can be `nil` for some RSS/Atom items, so calling `.scrub` on it raises `NoMethodError`; should guard/fallback to summary/empty string",
+        "text": "SSRF risk: TopicEmbed.import_remote fetches open(url).read on attacker-influenced URLs without sufficient URL sanitization/validation",
         "path": null,
         "line": null,
         "source": "extracted"
       },
       {
-        "text": "Bug in `absolutize_urls` port handling: port exclusion logic ignores scheme (`uri.port != 80 && uri.port != 443`), causing non-default ports (e.g., HTTP on 443) to be dropped; should make the check scheme-aware",
+        "text": "XSS risk: TopicEmbed builds HTML with unescaped url interpolated into an <a> tag (href and link text), allowing injection if url contains quotes/HTML",
         "path": null,
         "line": null,
         "source": "extracted"
@@ -22178,6 +22990,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Client-side upload size validation in app/assets/javascripts/discourse/lib/utilities.js uses a hardcoded 10MB (10 * 1024 KB) instead of per-type site settings (Discourse.SiteSettings['max_' + type + '_size_kb']), causing configured upload limits to be ignored",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "HTTP 413 error handler in app/assets/javascripts/discourse/lib/utilities.js uses a hardcoded 10MB max size instead of Discourse.SiteSettings.max_image_size_kb, causing the user-facing 'file too large' message to report the wrong limit when site/server settings differ",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "UploadsController#create_upload passes a percentage geometry string (\"80%\") into OptimizedImage.downsize, which may break the animated-image downsize/optimize path that expects WxH-style geometry (risk of ArgumentError or failed resize for animated GIFs)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In app/models/optimized_image.rb, defining self.downsize twice causes the later method to override the earlier one, effectively removing the width/height arity; existing callers using separate max_width and max_height arguments may now raise ArgumentError",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2": {
@@ -22854,6 +23692,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "TopicsController#unsubscribe dereferences tu.notification_level without guarding against TopicUser.find_by returning nil, causing NoMethodError when no topic_users row exists for the user/topic",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Email notification template sets class='.previous-discussion' (includes a literal dot), so the intended previous-discussion class won\u2019t match styling/hooks",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/22532": {
@@ -23593,6 +24445,26 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Script uses BSD/macOS-specific `sed -i '' -E` syntax, causing runtime failure on Linux hosts with GNU sed when updating the .env file",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Hard-coded shared log file `/tmp/tmole.log` with no locking or per-process isolation creates a race condition where concurrent script runs can overwrite/read each other\u2019s tmole output and reuse the wrong webhook URL",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Fixed startup polling timeout (~10 seconds) can be too short; if tmole initializes slower, the script incorrectly treats it as failure and exits",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/8330": {
@@ -24229,6 +25101,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Comparing two newly created Dayjs objects with `===` in override-day detection always returns false (object identity comparison), breaking detection when start and end represent the same instant",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Working-hours availability check computes both `start` and `end` from `slotStartTime` and never uses `slotEndTime`, so slots that end after `workingHour.endTime` can be incorrectly marked available",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/14943": {
@@ -24660,6 +25546,20 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Non-atomic update of retryCount using `reminder.retryCount + 1` based on a stale value from `findMany`, causing lost increments under concurrent schedulers (race condition)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "`deleteMany` filter uses an `OR` branch with only `retryCount > 1` and no `method: WorkflowMethods.SMS` constraint, so it can delete non-SMS workflow reminders when retryCount exceeds 1",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/22345": {
@@ -26434,6 +27334,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Non-transactional read-then-write in apps/web/pages/api/webhook/app-credential.ts can race: concurrent requests may both miss findFirst and both create duplicate Credential rows",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Credential model lacks a unique constraint on (userId, appId), so the database does not prevent duplicate credentials for the same user/app pair",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "parseRefreshTokenResponse.ts fabricates a placeholder refresh_token when the provider omits it, causing incorrect token data to be returned and potentially persisted",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "refreshOAuthTokens.ts returns a raw fetch Response in one branch while other branches return parsed token payloads, creating a return-shape mismatch that breaks callers expecting .data token fields",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "googlecalendar CalendarService reads res?.data from refreshOAuthTokens output even when it is a fetch Response, so token field access will fail at runtime",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/7232": {
@@ -27362,6 +28294,44 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "handleCancelBooking.ts calls async deleteScheduledEmailReminder/deleteScheduledSMSReminder inside a forEach without awaiting or including the promises in Promise.all, so reminder deletion failures can be unhandled and cleanup can silently fail",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "handleNewBooking.ts calls async reminder deletion helpers inside a forEach without await, so the surrounding try/catch cannot reliably catch later rejections and rescheduling may continue before cleanup completes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "scheduleEmailReminders.ts wraps all cancellation requests in a single try/catch while awaiting inside a loop, so one failed cancellation aborts the loop and leaves remaining reminders still scheduled",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "emailReminderManager.ts only cancels SendGrid scheduled sends when immediateDelete is true; callers that omit immediateDelete now only mark DB rows cancelled and do not delete the external SendGrid batch as expected",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "viewer/bookings.tsx triggers reminder deletions via async helpers without awaiting them (fire-and-forget in forEach), so cleanup may be skipped or finish after the mutation completes and promise rejections may go unhandled",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "viewer/workflows.tsx uses deleteScheduledEmailReminder(..., true) paths where the helper cancels SendGrid but does not delete/update the WorkflowReminder DB row, leaving stale DB reminders that are not cleaned up by the cancelled=true cleanup job",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/14740": {
@@ -28220,6 +29190,32 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "addGuestsHandler incorrectly requires a team user to be both team admin and team owner (uses &&) to pass the permission check, denying access to admins who are not owners",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "addGuestsHandler does not deduplicate duplicate emails within the submitted guests array, allowing duplicate attendee rows to be created via createMany",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "AddGuestsDialog initializes/resets multiEmailValue to [\"\"] and only guards against length===0, causing validation to fail on untouched/reset state and blocking guest submission",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Blacklist email check is case-sensitive: blacklist entries are lowercased but submitted guest emails are compared without normalization, allowing mixed-case emails to bypass the blacklist",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/10600": {
@@ -28991,6 +29987,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "Disable TOTP endpoint logs an error message about 'backup code login', which mismatches the disable flow and misleads debugging when the encryption key is missing",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Backup code consumption in authorize() is not concurrency-safe (read/check/mutate/write without transaction/CAS), allowing the same one-time backup code to be reused under concurrent login requests",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Backup code comparison does not normalize case, so mixed-case user input may fail to match stored lowercase hex backup codes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "BackupCode.tsx default-exported component is named TwoFactor, causing a naming mismatch with the file/UI purpose and confusing stack traces/debugging",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "EnableTwoFactorModal calls body.backupCodes.map(...) without guarding for missing/null backupCodes, risking a runtime TypeError if the setup response omits or nulls that field",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/10967": {
@@ -29978,6 +31006,56 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "CalendarManager now calls createEvent(calEvent, credential.id) but some adapters/implementations (e.g., CalendarService) still implement createEvent(event) with one parameter, causing an interface/signature mismatch that can break integrations",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "EventManager destructures the first element from evt.destinationCalendar ?? [] and then dereferences mainHostDestinationCalendar.integration without guarding/optional chaining, crashing when destinationCalendar is null or empty",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "Calendar.d.ts changes Calendar.createEvent to require (event, credentialId), but downstream implementations (e.g., packages/lib/CalendarService.ts) still declare createEvent(event) with one parameter, creating a concrete arity mismatch",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "destinationCalendar contract changed to DestinationCalendar[] | null, but some consumers still treat it like a single object / assume a non-empty array, leading to runtime errors (e.g., EventManager accessing [0] then dereferencing without a guard)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "GoogleCalendarService.updateEvent uses a fallback that searches destinationCalendar for cal.externalId === externalCalendarId when externalCalendarId is falsy, making the fallback impossible and potentially selecting the wrong calendarId for updates",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "handleNewBooking collects multiple destination calendars (including team member calendars) but persists only evt.destinationCalendar[0] when creating the booking, silently dropping additional calendars",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "handleCancelBooking recurring-delete path iterates only bookingToDelete.user.credentials and ignores the DB-fetched calendarCredential fallback, so recurring linked events may not be deleted when the credential exists only via the DB fetch",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "editLocation.handler.ts uses redundant optional chaining inside branches that already truthy-check booking.destinationCalendar / booking.user.destinationCalendar, reducing clarity",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   },
   "https://github.com/calcom/cal.com/pull/8087": {
@@ -30617,6 +31695,38 @@
         "line": null,
         "source": "extracted"
       }
+    ],
+    "cloudaeye": [
+      {
+        "text": "In packages/features/bookings/lib/handleCancelBooking.ts, using Array.forEach with an async callback means calendar update/delete promises are not awaited, so rejections escape the handler and failures go unhandled",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In packages/trpc/server/routers/viewer/bookings.tsx, using bookingRefsFiltered.forEach(async ...) makes external calendar deletions fire-and-forget, so cleanup may still be running after the handler returns (race condition)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In packages/trpc/server/routers/viewer/bookings.tsx, errors from getCalendar/deleteEvent/deleteMeeting inside the async forEach callback are not caught/awaited, so promise rejections escape normal error handling while execution continues (e.g., to sendRequestRescheduleEmail)",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In packages/app-store/vital/lib/reschedule.ts, changing to bookingRefsFiltered.forEach(async ...) prevents the surrounding try/catch from catching rejections from getCalendar/deleteEvent/deleteMeeting, and the function proceeds/returns before per-reference cleanup completes",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      },
+      {
+        "text": "In packages/app-store/wipemycalother/lib/reschedule.ts, using bookingRefsFiltered.forEach(async ...) causes getCalendar/deleteEvent/deleteMeeting rejections to escape the surrounding try/catch because forEach does not await async callbacks",
+        "path": null,
+        "line": null,
+        "source": "extracted"
+      }
     ]
   }
-}
\ No newline at end of file
+}
diff --git a/offline/results/openai_gpt-5.2/evaluations.json b/offline/results/openai_gpt-5.2/evaluations.json
index 1cbf1bd..050d975 100644
--- a/offline/results/openai_gpt-5.2/evaluations.json
+++ b/offline/results/openai_gpt-5.2/evaluations.json
@@ -2093,6 +2093,49 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37429__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37429__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The method name 'santizeAnchors' should be 'sanitizeAnchors' (missing 'i').",
+          "severity": "Low",
+          "matched_candidate": "Method name typo: private method is named santizeAnchors instead of sanitizeAnchors, making sanitize-related code harder to find and maintain",
+          "confidence": 0.99,
+          "reasoning": "Both the golden comment and the candidate issue point out the same typo in the method name: 'santizeAnchors' is missing an 'i' and should be 'sanitizeAnchors'."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Verification crashes with a RuntimeException when the derived English companion _en.properties file is missing because verifySafeHtml() wraps any IOException (including FileNotFound) and aborts verification instead of falling back"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The translation is in Italian instead of Lithuanian. This should be translated to Lithuanian to match the file's locale (messages_lt.properties).",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The totpStep1 value uses Traditional Chinese terms in the Simplified Chinese file (zh_CN), which is likely incorrect for this locale. Please verify the locale\u2011appropriate translation.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The anchor sanitization logic has a potential issue where it consumes English matcher groups without proper validation. If the translated text has more anchor tags than the English text, this could lead to incorrect validation results.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 4,
+      "tp": 1,
+      "fp": 1,
+      "fn": 3,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.25,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37429__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37429__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/37634": {
@@ -4120,6 +4163,51 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37634__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37634__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Wrong parameter in null check (grantType vs. rawTokenId)",
+          "severity": "Critical",
+          "matched_candidate": "AccessTokenContext constructor performs a null check on grantType twice and fails to validate rawTokenId (uses Objects.requireNonNull(grantType, \"Null rawTokenId not allowed\") instead of checking rawTokenId), allowing rawTokenId to be null and risking NullPointerException later",
+          "confidence": 0.97,
+          "reasoning": "The candidate issue explicitly states the constructor checks grantType twice and mistakenly uses grantType in the null check meant for rawTokenId, leaving rawTokenId unvalidated. This is the same underlying bug as the golden comment (wrong parameter in null check: grantType vs rawTokenId)."
+        },
+        {
+          "golden_comment": "Javadoc mentions \"usually like 3-letters shortcut\" but some implementations use 2-letter shortcuts (\"ac\", \"cc\", \"rt\", \"te\", \"pc\", \"ci\", \"ro\"). Consider updating documentation to reflect actual usage pattern.",
+          "severity": "Low",
+          "matched_candidate": "OAuth2GrantTypeFactory.getShortcut Javadoc claims shortcuts are usually 3 letters, but existing implementations use 2-letter shortcuts (e.g., 'ac', 'cc', 'pg'), creating a documentation mismatch",
+          "confidence": 0.93,
+          "reasoning": "Both the golden comment and candidate issue flag a mismatch between the Javadoc stating shortcuts are usually 3 letters and actual implementations using 2-letter shortcuts. The candidate gives examples (ac, cc, pg) consistent with the same underlying documentation issue, even if the example list differs slightly."
+        },
+        {
+          "golden_comment": " Catching generic RuntimeException is too broad. The implementation throws IllegalArgumentException specifically - catch that instead for more precise testing.",
+          "severity": "Low",
+          "matched_candidate": "DefaultTokenContextEncoderProviderTest.testIncorrectGrantType catches generic RuntimeException instead of the expected IllegalArgumentException, making the test pass for unrelated runtime failures",
+          "confidence": 0.95,
+          "reasoning": "Both point out that the test is catching a generic RuntimeException when the code under test throws IllegalArgumentException, and that the test should catch the specific exception for precision."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "In isAccessTokenId, the substring for the grant shortcut and the equality check look inverted: the grant shortcut occupies indices 4\u20135 (substring(4,6)), and a match should return true (combined with UUID check), not false.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 4,
+      "tp": 3,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.75,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37634__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37634__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/38446": {
@@ -5743,6 +5831,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR38446__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR38446__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Unsafe raw List deserialization without type safety. Calling Optional.get() directly on the Optional returned by RecoveryAuthnCodesUtils.getCredential(user) without checking isPresent() can lead to a NoSuchElementException if the Optional is empty.",
+          "severity": "Medium",
+          "matched_candidate": "RecoveryAuthnCodeInputLoginBean constructor calls Optional.get() on the result of RecoveryAuthnCodesUtils.getCredential(user) without checking presence, risking NoSuchElementException when the user has no recovery code credential and crashing login form rendering",
+          "confidence": 0.78,
+          "reasoning": "The candidate flags calling Optional.get() without checking presence, which matches the golden issue about potential NoSuchElementException when the Optional is empty. It does not mention raw List deserialization, but it does match the Optional.get() safety concern that is central to the golden comment."
+        },
+        {
+          "golden_comment": "After creating the RecoveryAuthnCodesCredentialModel, consider setting its id from the stored credential (e.g., myUser.recoveryCodes.getId()); otherwise getId() will be null and downstream removal by id (e.g., removeStoredCredentialById in the authenticator flow) may not work.",
+          "severity": "Low",
+          "matched_candidate": "BackwardsCompatibilityUserStorage.getCredentials reconstructs a RecoveryAuthnCodesCredentialModel via createFromValues without preserving the previously stored/generated credential id from updateCredential, causing a missing/different (potentially null) id that can break credential removal",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same underlying problem: when reconstructing/creating a RecoveryAuthnCodesCredentialModel, the stored/generated credential id is not preserved/set, leading to a null or mismatched id that can break downstream removal by id (e.g., removeStoredCredentialById)."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR38446__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR38446__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/36882": {
@@ -6977,6 +7098,32 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR36882__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR36882__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect method call for exit codes. The picocli.exit() method calls System.exit() directly, which is problematic:",
+          "severity": "Medium",
+          "matched_candidate": "UpdateCompatibilityCheck.run calls picocli.exit(CompatibilityResult.FEATURE_DISABLED) when rolling-updates is disabled, which triggers System.exit and terminates the entire JVM (breaking embedding)",
+          "confidence": 0.93,
+          "reasoning": "Both comments flag the same underlying issue: calling picocli.exit() directly invokes System.exit(), which can terminate the whole JVM and is problematic (especially for embedding). The candidate adds context (rolling-updates disabled) but matches the core concern about exit code handling via picocli.exit()."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 1,
+      "tp": 1,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR36882__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR36882__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/36880": {
@@ -8744,6 +8891,53 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR36880__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR36880__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Inconsistent feature flag bug causing orphaned permissions. The AdminPermissions event listener, responsible for cleaning up permissions upon role, client, or group removal, is incorrectly guarded by the ADMIN_FINE_GRAINED_AUTHZ (V1) feature flag. This is inconsistent with other methods in the class that use ADMIN_FINE_GRAINED_AUTHZ_V2. Consequently, if ADMIN_FINE_GRAINED_AUTHZ_V2 is enabled but V1 is not, the permission cleanup logic will not execute, leading to orphaned permission data. Cleanup should occur regardless of which fine-grained authorization version is enabled.",
+          "severity": "High",
+          "matched_candidate": "Permission cleanup listener in AdminPermissions is incorrectly gated by the V1 feature flag (ADMIN_FINE_GRAINED_AUTHZ), so when V2 is enabled without V1 the role/client/group removal cleanup path does not run",
+          "confidence": 0.96,
+          "reasoning": "The candidate describes the same bug: the AdminPermissions cleanup listener is gated by the V1 flag (ADMIN_FINE_GRAINED_AUTHZ), so when only V2 is enabled the cleanup on role/client/group removal does not run, causing orphaned permissions. This matches the golden issue about inconsistent flag usage (V1 vs V2) preventing cleanup."
+        },
+        {
+          "golden_comment": "In hasPermission(ClientModel client, String scope), the resource lookup uses findByName(server, client.getId(), server.getId()), but AdminPermissionsSchema.getOrCreateResource creates per-client resources with the owner set to resourceServer.getClientId(), so this lookup will never find those resources and will always fall back to the 'all-clients' resource, effectively ignoring client-specific permissions.",
+          "severity": "High",
+          "matched_candidate": "ClientPermissionsV2.hasPermission looks up per-client resources with resourceStore.findByName(..., client.getId(), server.getId()) using server.getId() as owner, causing the lookup to miss when the resource owner differs and forcing fallback to the type-level \"Clients\" resource, ignoring client-specific grants",
+          "confidence": 0.93,
+          "reasoning": "The candidate describes the same mismatch: hasPermission searches for a per-client resource using findByName with server.getId() as the owner, but per-client resources are created with a different owner (resourceServer.getClientId()), so the lookup fails and code falls back to the all-clients/type-level resource, effectively ignoring client-specific permissions."
+        },
+        {
+          "golden_comment": "In getClientsWithPermission(String scope), iterating resourceStore.findByType(server, AdminPermissionsSchema.CLIENTS_RESOURCE_TYPE) and returning resource.getName() will only ever consider the type-level 'Clients' resource (per-client resources have no type) and return its name, while AvailableRoleMappingResource#getRoleIdsWithPermissions expects actual client IDs to pass to realm.getClientById, which can lead to incorrect behavior or a null client and subsequent failures.",
+          "severity": "High",
+          "matched_candidate": "ClientPermissionsV2.hasPermission looks up per-client resources with resourceStore.findByName(..., client.getId(), server.getId()) using server.getId() as owner, causing the lookup to miss when the resource owner differs and forcing fallback to the type-level \"Clients\" resource, ignoring client-specific grants",
+          "confidence": 0.74,
+          "reasoning": "Both point to the same underlying problem: the logic ends up using the type-level \"Clients\" resource instead of per-client resources, which breaks client-specific permission handling. The golden comment emphasizes that getClientsWithPermission returns resource.getName() from type-level resources (not actual client IDs), causing downstream realm.getClientById failures. The candidate focuses on why per-client lookup misses (owner mismatch) and thus falls back to the type-level resource, ignoring client-specific grants. While it doesn\u2019t explicitly mention returning names vs IDs/null client, it identifies the same core bug: incorrect resource selection leading to wrong client identification/permissions."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "ClientPermissionsV2 defines a private getEvaluationContext(...) helper that is never called because permission evaluation uses root.evaluatePermission(...) directly, leaving dead/unused code and making its custom evaluation attributes ineffective"
+        },
+        {
+          "candidate": "PermissionClientTest.testManageOnlyOneClient assumes at least two default client scopes by indexing get(1) after only asserting the list is non-empty, risking IndexOutOfBoundsException when fewer than two scopes exist"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 2,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR36880__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR36880__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/37038": {
@@ -10387,6 +10581,52 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR37038__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR37038__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect permission check in canManage() method",
+          "severity": "High",
+          "matched_candidate": "GroupPermissions.canManage() was narrowed to only root.hasOneAdminRole(AdminRoles.MANAGE_USERS), dropping prior default/permission-based user-management semantics and breaking group management when defaults grant access without a direct MANAGE_USERS role",
+          "confidence": 0.93,
+          "reasoning": "The golden issue is an incorrect permission check in canManage(). The candidate explicitly describes canManage() being narrowed to a single role check (MANAGE_USERS), dropping prior semantics and breaking access when defaults should grant it\u2014i.e., the permission check logic is wrong."
+        },
+        {
+          "golden_comment": "In getGroupIdsWithViewPermission, hasPermission is called with groupResource.getId() and the same groupResource.getId() is added to granted, but hasPermission resolves resources by name (treating the argument as a group id) and the GroupPermissionEvaluator contract says this method returns group IDs that are later used as UserModel.GROUPS and in getUsersCount group filters. This mismatch means per-group VIEW_MEMBERS/MANAGE_MEMBERS permissions may not yield the expected group IDs for filtering and counts, and evaluation may effectively only look at the type-level 'all-groups' resource; consider revisiting whether this should operate on the underlying group ids (resource names) instead so it aligns with the JPA queries and the interface contract.",
+          "severity": "High",
+          "matched_candidate": "GroupPermissionsV2.getGroupIdsWithViewPermission() uses hasPermission(groupResource.getId(), ...) and adds groupResource.getId() to results, but hasPermission treats the argument as a resource name (findByName), causing group filtering to miss VIEW_MEMBERS/MANAGE_MEMBERS grants",
+          "confidence": 0.93,
+          "reasoning": "The candidate flags the same core mismatch: getGroupIdsWithViewPermission passes groupResource.getId() into hasPermission and returns that id, but hasPermission interprets the argument as a resource name (via findByName), leading to incorrect/missed per-group VIEW_MEMBERS/MANAGE_MEMBERS results used for group filtering/counts. This aligns with the golden issue about ID vs name/contract mismatch affecting filtering behavior."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "GroupPermissionsV2 repeats the same narrowing for canManage (MANAGE_USERS role-only), propagating the regression into the V2 permissions path"
+        },
+        {
+          "candidate": "RolePermissions role-mapping logic now gates mapping on root.hasOneAdminRole(AdminRoles.MANAGE_USERS) before checkAdminRoles(role), potentially breaking mapping when canManageDefault(RoleModel) grants access without a direct MANAGE_USERS role"
+        },
+        {
+          "candidate": "AdminPermissions uses ADMIN_FINE_GRAINED_AUTHZ_V2 to enable V2 management/evaluator paths but guards listener/cleanup registration with ADMIN_FINE_GRAINED_AUTHZ, so cleanup never runs when V2 is enabled without V1"
+        },
+        {
+          "candidate": "GroupResourceTypeEvaluationTest asserts subgroup creation returns 204 No Content, but GroupResource.addChild() returns 201 Created for new subgroup creation, making the test assertion incorrect"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 7,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 4,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.2857142857142857,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR37038__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR37038__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/33832": {
@@ -12002,6 +12242,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR33832__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR33832__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Dead code exists where ASN1Encoder instances are created and written to, but their results are immediately discarded. The actual encoding is performed by new ASN1Encoder instances created in the subsequent return statement, rendering the earlier operations useless.",
+          "severity": "Low",
+          "matched_candidate": "AuthzClientCryptoProvider.concatenatedRSToASN1DER contains dead code: it creates ASN1Encoder instances and writes r/s to them but immediately discards them, so those writes have no effect",
+          "confidence": 0.94,
+          "reasoning": "The candidate flags dead code in concatenatedRSToASN1DER where ASN1Encoder instances are created and written to but the results are discarded, matching the golden issue that the real encoding happens via new encoders in the return statement making earlier writes useless."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "ASN1Decoder.readLength returns -1 for indefinite-length encoding, but callers pass this negative length to read(int), causing NegativeArraySizeException instead of an IOException"
+        },
+        {
+          "candidate": "ASN1Decoder.readLength validates decoded length against total input limit (and rejects length >= limit) rather than remaining bytes after consuming tag/length, incorrectly rejecting payloads that exactly consume the remaining bytes"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Returns wrong provider (default keystore instead of BouncyCastle)",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR33832__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR33832__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/40940": {
@@ -13495,6 +13773,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR40940__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR40940__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The reader thread isn\u2019t waited for; flipping deletedAll to true and asserting immediately can race and miss exceptions added just after the flag change, making this test flaky.",
+          "severity": "Medium",
+          "matched_candidate": "Background reader thread is started but never joined/waited for before asserting, creating a race condition and flaky test behavior",
+          "confidence": 0.93,
+          "reasoning": "Both comments flag the same underlying problem: the background/reader thread isn\u2019t synchronized (not waited/joined) before assertions, so the test can race and become flaky, potentially missing exceptions added after the flag change."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Assertion that caughtExceptions is empty can miss exceptions added by the reader thread after deletedAll is set because the thread may still be running"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Returning null from getSubGroupsCount() violates the GroupModel contract (Javadoc says it never returns null) and may lead to NPEs in callers that expect a non-null count.",
+          "severity": "Critical"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR40940__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR40940__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/keycloak-greptile/pull/1": {
@@ -15042,6 +15355,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak-greptile__greptile-v4-1__PR1__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak-greptile__greptile-v4-1__PR1__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "ConditionalPasskeysEnabled() called without UserModel parameter",
+          "severity": "Medium",
+          "matched_candidate": "UsernamePasswordForm.authenticate/challenge skips webauthnAuth.fillContextForm on the initial login page because isConditionalPasskeysEnabled(context.getUser()) requires a non-null user, disabling initial passkey setup when no user is selected",
+          "confidence": 0.86,
+          "reasoning": "Golden issue is that ConditionalPasskeysEnabled() is invoked without providing the required UserModel parameter. The candidate describes the same root problem: isConditionalPasskeysEnabled(context.getUser()) depends on a non-null user, so on the initial login page (no user selected) the call effectively fails/returns false and skips passkey context setup. This aligns with the missing/invalid user parameter concern."
+        },
+        {
+          "golden_comment": "With isConditionalPasskeysEnabled(UserModel user) requiring user != null, authenticate(...) will not call webauthnAuth.fillContextForm(context) on the initial login page where context.getUser() is still null, so conditional passkey UI will not be set up for first-time passkey login. Consider whether this should also be enabled when no user has been selected yet so ID-less passkey authentication on the initial login form continues to work.",
+          "severity": "Medium",
+          "matched_candidate": "UsernamePasswordForm.authenticate/challenge skips webauthnAuth.fillContextForm on the initial login page because isConditionalPasskeysEnabled(context.getUser()) requires a non-null user, disabling initial passkey setup when no user is selected",
+          "confidence": 0.93,
+          "reasoning": "The candidate states that authenticate/challenge skips webauthnAuth.fillContextForm on the initial login page because isConditionalPasskeysEnabled(context.getUser()) requires a non-null user, which disables initial passkey setup when no user is selected. This is the same core issue as the golden comment about conditional passkey UI not being set up for first-time/ID-less passkey login when context.getUser() is null."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "UsernamePasswordForm.challenge error/retry path skips webauthnAuth.fillContextForm when login fails with an unknown user (context.getUser() is null), causing passkey retry/challenge UI options to disappear on subsequent attempts"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak-greptile__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak-greptile__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/93824": {
@@ -17180,6 +17530,62 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR93824__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR93824__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Inconsistent metric tagging with 'shard' and 'shards'",
+          "severity": "Medium",
+          "matched_candidate": "Inconsistent metric tag keys for the same shard dimension (using both \"shard\" and \"shards\") fragments metrics/dashboards",
+          "confidence": 0.97,
+          "reasoning": "Both the golden comment and the candidate issue describe the same problem: inconsistent metric tag key naming for the shard dimension, alternating between \"shard\" and \"shards\", which leads to fragmented metrics/dashboards."
+        },
+        {
+          "golden_comment": "Fixed sleep in tests can be flaky; wait on condition instead",
+          "severity": "Low",
+          "matched_candidate": "Test monkeypatches time.sleep to a no-op but later relies on time.sleep(0.1) to wait for flusher threads, so the sleep does not actually wait and the test becomes timing-racy/unreliable",
+          "confidence": 0.9,
+          "reasoning": "Both point out that using sleep-based waiting in tests is unreliable/flaky. The candidate adds that sleep is monkeypatched to no-op, making the timing wait ineffective and the test racy, which is a specific instance of the golden concern (should wait on a condition instead of fixed sleep)."
+        },
+        {
+          "golden_comment": "Sleep in test_consumer.py won\u2019t actually wait because time.sleep was monkeypatched above; consider restoring sleep or using a different sync to ensure the flusher has time to process.",
+          "severity": "Medium",
+          "matched_candidate": "Test monkeypatches time.sleep to a no-op but later relies on time.sleep(0.1) to wait for flusher threads, so the sleep does not actually wait and the test becomes timing-racy/unreliable",
+          "confidence": 0.95,
+          "reasoning": "Both describe that time.sleep was monkeypatched to a no-op in the test, so a later time.sleep call intended to wait for flusher processing does not actually wait, making the test unreliable; both suggest needing a different approach/restoring sleep."
+        },
+        {
+          "golden_comment": "Breaking out of the loop when the deadline has elapsed can skip terminating remaining flusher processes, potentially leaving them running after shutdown; consider ensuring termination is attempted even if the deadline is exceeded.",
+          "severity": "Medium",
+          "matched_candidate": "SpanFlusher.join breaks out of the process-join loop when the deadline is exceeded, skipping termination/cleanup for remaining processes and leaving child workers running",
+          "confidence": 0.94,
+          "reasoning": "Both describe the same issue: the join loop exits when the deadline is exceeded, which can skip terminating/cleaning up remaining flusher processes and leave them running after shutdown."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "SpanFlusher._ensure_processes_alive kills an unhealthy worker and immediately starts a replacement without waiting/joining for the old process to exit, allowing overlapping shard handlers and duplicate flushing"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Because flusher processes are created via multiprocessing.get_context('spawn').Process, they are instances of multiprocessing.context.SpawnProcess, which on POSIX is not a subclass of multiprocessing.Process, so this isinstance check will always be false and hung processes won't be killed here.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR93824__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR93824__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/5": {
@@ -19221,6 +19627,51 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR5__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR5__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Detector validator uses wrong key when updating type",
+          "severity": "Medium",
+          "matched_candidate": "BaseDetectorTypeValidator.update reads validated_data[\"detector_type\"] even though the serializer validates the field as \"type\", so updates using the provided type are ignored and instance.group_type is used instead",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same bug: the detector type validator/update logic uses the wrong key in validated_data (expects/reads \"detector_type\" instead of the validated field \"type\"), causing provided type updates to be ignored."
+        },
+        {
+          "golden_comment": "Using zip(error_ids, events.values()) assumes the get_multi result preserves the input order; dict value order is not guaranteed to match error_ids, so event data can be paired with the wrong ID (missing nodes also shift alignment).",
+          "severity": "Low",
+          "matched_candidate": "fetch_error_details incorrectly pairs error_ids with nodestore get_multi results by zipping error_ids with events.values(), but get_multi returns a dict with no positional ordering guarantee, causing error details to be associated with the wrong IDs when entries are omitted or reordered",
+          "confidence": 0.97,
+          "reasoning": "The candidate flags the same core bug: zipping error_ids with events.values() assumes ordering/alignment from get_multi\u2019s dict result, which isn\u2019t guaranteed and can misassociate IDs especially when entries are missing or reordered."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Mutual-exclusion validation between \"age\" and \"timestamp\" is bypassed when either value is 0 because the checks use truthiness (self.initial_data.get(...)) instead of presence, allowing invalid mixed-spec reports to pass"
+        },
+        {
+          "candidate": "Analytics event preprod_artifact.api.assemble is recorded before the feature gate check, so requests denied by features.has(...) still emit telemetry and count as assemble events"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Breaking changes in error response format",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR5__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR5__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/1": {
@@ -21322,6 +21773,58 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR1__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR1__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Django querysets do not support negative slicing",
+          "severity": "High",
+          "matched_candidate": "BasePaginator.get_result allows negative start_offset for previous-page cursors (start_offset = offset when cursor.is_prev), leading to Django QuerySet negative slicing and AssertionError at runtime",
+          "confidence": 0.93,
+          "reasoning": "The golden issue is that Django QuerySets do not support negative slicing. The candidate explicitly describes a code path where a negative start_offset can occur, causing negative slicing on a Django QuerySet and an AssertionError. This is the same underlying problem (negative slicing on QuerySets)."
+        },
+        {
+          "golden_comment": "When requests are authenticated with API keys or org auth tokens (which have user_id=None), organization_context.member is None. Line 71 attempts to access organization_context.member.has_global_access without checking if member is None, causing an AttributeError crash when optimized_pagination=true is used, even though the request passed all permission checks with valid org:write scope.",
+          "severity": "High",
+          "matched_candidate": "OrganizationAuditLogsEndpoint.get dereferences organization_context.member.has_global_access without a None check, causing AttributeError when auth context has no member (e.g., API key/org auth token)",
+          "confidence": 0.93,
+          "reasoning": "The candidate flags the same root cause: dereferencing organization_context.member.has_global_access without checking for None when the auth context lacks a member (API key/org auth token), leading to an AttributeError. This matches the golden issue; it just omits the optimized_pagination=true trigger and permission-scope context but the underlying bug is identical."
+        },
+        {
+          "golden_comment": "get_item_key assumes a numeric key, but the paginator is used with order_by=-datetime in the audit logs endpoint; calling math.floor/ceil on a datetime will raise a TypeError.",
+          "severity": "High",
+          "matched_candidate": "OrganizationAuditLogsEndpoint.get uses OptimizedCursorPaginator with order_by='-datetime', but OptimizedCursorPaginator.get_item_key applies math.floor/ceil and int() to the key value, which will TypeError for datetime keys during cursor generation",
+          "confidence": 0.97,
+          "reasoning": "The candidate describes the same root bug: the paginator is configured with order_by='-datetime' so the key is a datetime, but get_item_key uses math.floor/ceil and int() assuming a numeric key, causing a TypeError when generating cursors."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "OptimizedCursorPaginator.get_result explicitly allows negative cursor.offset when enable_advanced_features is true and slices queryset[start_offset:stop], which can trigger Django ORM AssertionError due to unsupported negative QuerySet slicing"
+        },
+        {
+          "candidate": "OptimizedCursorPaginator.get_item_key assumes ordered field values are numeric by applying math.floor/ceil, so paginating on non-numeric ordered fields (e.g., datetime or string) will crash during cursor key computation"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Importing non-existent OptimizedCursorPaginator",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 4,
+      "tp": 3,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.6,
+      "recall": 0.75,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/97529": {
@@ -22894,6 +23397,46 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR97529__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR97529__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "A race condition in BuildIndex allows multiple goroutines to concurrently build the same expensive index for the same key. This is caused by moving the b.cacheMu lock from protecting the entire function to only protecting the final cache assignment. ",
+          "severity": "High",
+          "matched_candidate": "bleveBackend.BuildIndex no longer holds the cache lock for the full function, allowing concurrent callers to build the same index key simultaneously before either caches it, leading to duplicate index builds and cache coherence issues",
+          "confidence": 0.96,
+          "reasoning": "The candidate describes the same race condition: the cache lock no longer covers the whole BuildIndex function, so concurrent goroutines can build the same index for the same key before caching, causing duplicate expensive builds and potential cache issues."
+        },
+        {
+          "golden_comment": "Calling s.search.TotalDocs() here may race with concurrent index creation: TotalDocs iterates b.cache without synchronization, and the event watcher goroutine started just above could trigger BuildIndex writes concurrently, potentially causing a concurrent map read/write panic.",
+          "severity": "High",
+          "matched_candidate": "Calling s.search.TotalDocs() during initialization can trigger a concurrent map read/write panic because bleveBackend.TotalDocs iterates b.cache without locking while BuildIndex writes to b.cache under b.cacheMu",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same race: TotalDocs iterates over b.cache without synchronization while concurrent BuildIndex writes (triggered by the watcher goroutine during init) can occur, leading to a concurrent map read/write panic."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "server.History and server.Origin dereference s.search without a nil guard, causing a nil pointer panic when search is not configured (search is optional and only initialized when non-nil)"
+        },
+        {
+          "candidate": "bleveBackend.TotalDocs iterates b.cache without locking, which is unsafe under concurrent access and increases correctness risk alongside the narrowed BuildIndex lock scope"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 2,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR97529__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR97529__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/80168": {
@@ -24360,6 +24903,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR80168__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR80168__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but only contains pass, failing to implement its required abstract methods: counter_names (property), get_dedupe_value(), get_group_key_values(), and build_occurrence_and_event_data(). This will cause a TypeError at runtime when the class is instantiated.",
+          "severity": "High",
+          "matched_candidate": "MetricAlertDetectorHandler inherits from StatefulDetectorHandler but is empty (pass) and does not implement required abstract methods (get_dedupe_value, get_group_key_values, build_occurrence_and_event_data), making it non-instantiable and causing a runtime TypeError when instantiated via Detector.detector_handler",
+          "confidence": 0.93,
+          "reasoning": "Both describe that MetricAlertDetectorHandler subclasses StatefulDetectorHandler but is empty and fails to implement required abstract methods, making it non-instantiable and leading to a runtime TypeError. The candidate lists the same core missing methods (though it omits mentioning the counter_names property explicitly), but the underlying issue is the same."
+        },
+        {
+          "golden_comment": "Docstring says this returns a list of DetectorEvaluationResult, but the method now returns a dict keyed by DetectorGroupKey. Consider updating the docstring to match the new return type.",
+          "severity": "Low",
+          "matched_candidate": "A function in src/sentry/workflow_engine/processors/detector.py is annotated to return dict[DetectorGroupKey, DetectorEvaluationResult] but its docstring claims it returns a list of DetectorEvaluationResult, misleading callers about the return shape",
+          "confidence": 0.96,
+          "reasoning": "Both the golden comment and the candidate issue describe a mismatch between the docstring (claims a list of DetectorEvaluationResult) and the actual return type (a dict keyed by DetectorGroupKey). They point to the same documentation/return-shape inconsistency."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "StatefulDetectorHandler adds a new abstract method build_occurrence_and_event_data, but existing subclasses (e.g., MetricAlertDetectorHandler) were not updated to implement it, causing abstract-class instantiation failures"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR80168__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR80168__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/80528": {
@@ -25775,6 +26355,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR80528__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR80528__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The function modifies the config variable to include display values but then returns the original monitor.config instead of the modified version.",
+          "severity": "High",
+          "matched_candidate": "In get_monitor_environment_context, the code copies and humanizes config['schedule_type'] but returns monitor_environment.monitor.config instead of the mutated local config, so the transformation is dropped and the displayed context remains unmodified",
+          "confidence": 0.94,
+          "reasoning": "Both describe the same bug: the function mutates a local/config copy to add human-readable/display values (e.g., schedule_type) but then returns the original monitor.config from the object, so the modifications are not reflected in the returned context."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "create_incident_occurrence propagates the untransformed schedule_type outward by inserting get_monitor_environment_context(monitor_env) into event_data['contexts']['monitor'] before producing the occurrence to Kafka"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The code fetches MonitorCheckIn objects by ID when the required data already exists in previous_checkins. This creates an unnecessary database query.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR80528__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR80528__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/77754": {
@@ -27536,6 +28151,57 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR77754__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR77754__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Shared mutable default in dataclass timestamp",
+          "severity": "Medium",
+          "matched_candidate": "Dataclass field default `queued: datetime = timezone.now()` is evaluated at import/class-definition time, causing omitted `queued` values to reuse a frozen timestamp",
+          "confidence": 0.96,
+          "reasoning": "The golden issue is about using a shared/evaluated-once default timestamp in a dataclass (mutable/default factory problem). The candidate explicitly notes that `timezone.now()` is evaluated at class-definition time, so instances without an explicit value share the same frozen timestamp. This is the same underlying issue and would be fixed with a `default_factory`."
+        },
+        {
+          "golden_comment": "The method name has a typo: test_from_dict_inalid_data should be test_from_dict_invalid_data.",
+          "severity": "Low",
+          "matched_candidate": "Test name typo: `test_from_dict_inalid_data` is misspelled (`inalid` vs `invalid`), making failures misleading in test output",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue flags the same typo in the test method name (`test_from_dict_inalid_data` using `inalid` instead of `invalid`) as the golden comment and implies it should be corrected."
+        },
+        {
+          "golden_comment": "Method name says 'empty_array' but tests empty dict - consider renaming to 'test_from_dict_empty_dict' for clarity.",
+          "severity": "Low",
+          "matched_candidate": "Test naming mismatch: `test_from_dict_empty_array` name implies an array but the test passes an empty dict `{}`, making intent unclear",
+          "confidence": 0.95,
+          "reasoning": "Both comments flag the same issue: the test method name mentions an empty array but the test input is an empty dict, suggesting the name should be changed for clarity."
+        },
+        {
+          "golden_comment": "to_dict() returns a datetime for queued; if this dict is passed in task kwargs (e.g., via apply_async), JSON serialization may fail depending on the serializer, which can cause enqueue errors.",
+          "severity": "Medium",
+          "matched_candidate": "Celery task enqueue passes `assignment_source.to_dict()` directly in `apply_async` kwargs, risking a TypeError/JSON serialization failure due to non-serializable `datetime` in `queued`",
+          "confidence": 0.96,
+          "reasoning": "Both describe the same issue: `to_dict()` includes a `datetime` field (`queued`) and passing that dict as Celery task kwargs (e.g., via `apply_async`) can trigger JSON serialization/TypeError failures depending on the serializer, causing enqueue errors."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "`AssignmentSource.from_integration()` constructs instances without passing `queued`, so new instances incorrectly inherit the frozen default timestamp"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 6,
+      "total_golden": 4,
+      "tp": 4,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR77754__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR77754__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/95633": {
@@ -29294,6 +29960,54 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR95633__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR95633__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The test test_thread_queue_parallel_error_handling has a docstring that doesn't match the test implementation.",
+          "severity": "Low",
+          "matched_candidate": "test_thread_queue_parallel_error_handling docstring claims errors shouldn't block commits for other messages, but the test assertions expect no commits (or none for the partition), so the test intent/documentation is misleading",
+          "confidence": 0.93,
+          "reasoning": "The golden issue is that the docstring for test_thread_queue_parallel_error_handling does not match what the test actually does. The candidate explicitly states the docstring claims one behavior (errors shouldn't block commits for other messages) while the assertions expect a different behavior (no commits), making the documentation/intent misleading. This is the same underlying mismatch."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "SimpleQueueProcessingStrategy.close stops the commit loop (sets shutdown_event and joins commit thread) before queue_pool.shutdown drains/joins workers, so offsets completed during shutdown may never be committed"
+        },
+        {
+          "candidate": "TestFixedQueuePool.test_concurrent_processing_across_groups claims to verify concurrent processing but only asserts all items completed and that group IDs appeared, so it can pass even with fully serial processing"
+        },
+        {
+          "candidate": "TestSimpleQueueProcessingStrategy.test_concurrent_processing_different_groups claims to verify concurrency but only asserts that four results were processed, with no concurrency-specific assertion"
+        },
+        {
+          "candidate": "Background-thread tests in test_results_consumer.py use fixed sleep/poll timing windows (e.g., loops with time.sleep(0.1) and max_wait), making them flaky when async processing/commits take longer than expected"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The queue.shutdown() method with 'immediate=False' parameter may not exist in the standard Python queue module. This could cause AttributeError at runtime. Verify the correct API or implement a custom shutdown mechanism.",
+          "severity": "High"
+        },
+        {
+          "golden_comment": "The magic number 50 for max_wait is used repeatedly throughout the tests. Consider extracting this as a named constant to improve maintainability.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 3,
+      "tp": 1,
+      "fp": 4,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.2,
+      "recall": 0.3333333333333333,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR95633__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR95633__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/2": {
@@ -31351,6 +32065,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR2__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR2__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "OptimizedCursorPaginator negative-offset branch slices QuerySet with a negative start index",
+          "severity": "Critical",
+          "matched_candidate": "Advanced pagination branch allows negative Django QuerySet slicing (queryset[start_offset:stop] with start_offset < 0), which can crash with AssertionError",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same bug: in the negative-offset/advanced pagination branch, the code slices a Django QuerySet with a negative start index, which is invalid and can raise an AssertionError."
+        },
+        {
+          "golden_comment": "BasePaginator negative-offset branch slices QuerySet with a negative start index",
+          "severity": "High",
+          "matched_candidate": "Advanced pagination branch allows negative Django QuerySet slicing (queryset[start_offset:stop] with start_offset < 0), which can crash with AssertionError",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same underlying bug: in the negative-offset/advanced pagination path, a Django QuerySet is sliced with a negative start index, which is invalid and can raise an AssertionError."
+        },
+        {
+          "golden_comment": "OptimizedCursorPaginator.get_item_key uses floor/ceil on a datetime key (order_by='-datetime'), causing TypeError.",
+          "severity": "High",
+          "matched_candidate": "OptimizedCursorPaginator.get_item_key applies math.floor/ceil and int() to non-numeric keys (e.g., datetime), causing TypeError at runtime when ordering by a datetime field",
+          "confidence": 0.97,
+          "reasoning": "Both describe the same bug: get_item_key uses math.floor/ceil (and related numeric casting) on a datetime ordering key, which triggers a TypeError when ordering by a datetime field."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Previous-page pagination preserves a negative cursor offset (start_offset = offset when cursor.is_prev), leading to negative QuerySet slicing and potential AssertionError"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR2__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR2__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/sentry-greptile/pull/3": {
@@ -33264,6 +34022,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry-greptile__greptile-v4-1__PR3__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry-greptile__greptile-v4-1__PR3__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "sample_rate = 0.0 is falsy and skipped",
+          "severity": "Low",
+          "matched_candidate": "_set_sample_rate_from_error_sampling uses a falsy guard `if client_sample_rate:` so valid zero values (0/0.0) are skipped and not propagated to normalized_data['sample_rate'] in factories.py",
+          "confidence": 0.96,
+          "reasoning": "Both describe the same bug: a sample_rate value of 0.0 is treated as falsy by a guard like `if client_sample_rate:` and therefore skipped/not propagated."
+        },
+        {
+          "golden_comment": "Using Python\u2019s built-in hash() to build cache keys is non-deterministic across processes (hash randomization), so keys won\u2019t match across workers and invalidate_upsampling_cache may fail to delete them. Use a deterministic serialization of project_ids for the cache key.",
+          "severity": "Low",
+          "matched_candidate": "Using Python's process-dependent hash(tuple(...)) in cache keys causes different workers to compute different keys, breaking cross-process cache reuse and invalidate_upsampling_cache invalidation in error_upsampling.py",
+          "confidence": 0.93,
+          "reasoning": "The candidate flags that using Python\u2019s process-dependent hash(tuple(...)) for cache keys yields different keys across workers, breaking cross-process cache reuse and invalidate_upsampling_cache invalidation. This is the same core issue as the golden comment about non-deterministic hash() due to hash randomization and needing deterministic key construction."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "The upsampling eligibility check passes the outer dataset instead of the actual dataset used by scoped_dataset. In paths where the query ultimately runs against discover (e.g., dashboard split) while the original dataset is metrics, upsampling may be skipped even when all projects are allowlisted.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry-greptile__cloudaeye__PR3__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry-greptile__cloudaeye__PR3__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/103633": {
@@ -34878,6 +35674,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR103633__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR103633__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The Check operation exhibits asymmetric cache trust logic: cached permission grants are trusted and returned immediately, but cached denials from the same permission cache are ignored, leading to a fresh database lookup. This allows stale cached grants to provide access to revoked resources, posing a security risk. ",
+          "severity": "High",
+          "matched_candidate": "In pkg/services/authz/rbac/service.go, authorization can be bypassed because cached permission grants are trusted without freshness/revalidation: the code returns Allowed based on cached permissions, and getCachedIdentityPermissions only checks cache presence, so revoked access may persist until TTL expiry",
+          "confidence": 0.86,
+          "reasoning": "Both describe the same core security issue: the Check operation trusts cached permission grants and returns Allowed without revalidation, so revoked permissions can remain effective until cache expiry. The golden also notes asymmetric handling of cached denials (ignored), which the candidate doesn\u2019t explicitly mention, but the underlying risk of stale cached grants granting access is the same."
+        },
+        {
+          "golden_comment": "The test comment says the cached permissions 'allow access', but the map stores false for dashboards:uid:dash1, so checkPermission will still treat this scope as not allowed.",
+          "severity": "Low",
+          "matched_candidate": "In pkg/services/authz/rbac/service_test.go, the cache deny test setup contradicts its comment: it says the cache entry should allow dashboard access, but the fixture stores map[string]bool{\"dashboards:uid:dash1\": false}, creating a misleading/self-contradictory test",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same contradiction in the test: the comment claims cached permissions allow dashboard access, but the cache map stores false for dashboards:uid:dash1, meaning the permission would be treated as not allowed. Candidate captures the misleading/self-contradictory setup noted in the golden comment."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR103633__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR103633__20260310/pull/1"
     }
   },
   "https://github.com/getsentry/sentry/pull/67876": {
@@ -36648,6 +37477,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "sentry__sentry__greptile-v4-1__PR67876__20260406",
       "pr_url": "https://github.com/code-review-benchmark/sentry__sentry__greptile-v4-1__PR67876__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "OAuth state uses pipeline.signature (static) instead of a per-request random value",
+          "severity": "Medium",
+          "matched_candidate": "OAuthLoginView.dispatch uses pipeline.signature as the OAuth state parameter, making state predictable/reusable and weakening CSRF/replay protection because pipeline.signature is deterministic rather than per-request randomized",
+          "confidence": 0.96,
+          "reasoning": "The candidate explicitly states that OAuth state is set to pipeline.signature, which is deterministic/predictable and not per-request randomized, matching the golden issue about using a static value instead of a random per-request state."
+        },
+        {
+          "golden_comment": "The code attempts to access integration.metadata[sender][login] without checking for the existence of the sender key. This causes a KeyError for integrations where the sender metadata was not set during creation",
+          "severity": "High",
+          "matched_candidate": "GitHubInstallation.dispatch dereferences integration.metadata[\"sender\"][\"login\"] without guarding for missing keys, which can raise KeyError when integration metadata lacks sender/login",
+          "confidence": 0.94,
+          "reasoning": "Both describe the same issue: the code accesses integration.metadata['sender']['login'] without verifying that the 'sender' (and implicitly 'login') keys exist, leading to a KeyError when sender metadata wasn't set."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "Null reference if github_authenticated_user state is missing",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "sentry__sentry__cloudaeye__PR67876__20260310",
+      "pr_url": "https://github.com/CloudAEye/sentry__sentry__cloudaeye__PR67876__20260310/pull/1"
     }
   },
   "https://github.com/keycloak/keycloak/pull/32918": {
@@ -38068,6 +38935,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "keycloak__keycloak__greptile-v4-1__PR32918__20260406",
       "pr_url": "https://github.com/code-review-benchmark/keycloak__keycloak__greptile-v4-1__PR32918__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Cleanup reference uses incorrect alias - should be 'idp-alias-' + i instead of 'alias'.",
+          "severity": "Medium",
+          "matched_candidate": "Test cleanup registers removal for identity provider alias \"alias\" instead of the actual created aliases (e.g., \"idp-alias-<i>\" and \"idp-alias-20\"), so teardown targets a non-existent IDP",
+          "confidence": 0.94,
+          "reasoning": "The candidate issue states that cleanup/teardown removes an IDP using the hardcoded alias \"alias\" rather than the dynamically created aliases like \"idp-alias-<i>\". This is the same underlying bug described in the golden comment (incorrect alias used in cleanup; should be 'idp-alias-' + i)."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Because cleanup uses the wrong alias, identity providers created in OrganizationCacheTest.testCacheIDPForLogin are not removed, leaking created providers between test runs"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Recursive caching call using session instead of delegate",
+          "severity": "Critical"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "keycloak__keycloak__cloudaeye__PR32918__20260310",
+      "pr_url": "https://github.com/CloudAEye/keycloak__keycloak__cloudaeye__PR32918__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/94942": {
@@ -39542,6 +40444,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR94942__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR94942__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The enableSqlExpressions function has flawed logic that always returns false, effectively disabling SQL expressions unconditionally:",
+          "severity": "Critical",
+          "matched_candidate": "enableSqlExpressions in pkg/expr/reader.go always returns false on every path, unconditionally disabling SQL expressions",
+          "confidence": 0.98,
+          "reasoning": "The candidate states that enableSqlExpressions always returns false on every path, unconditionally disabling SQL expressions, which is the same flawed-logic issue described in the golden comment."
+        },
+        {
+          "golden_comment": "Several methods such as NewInMemoryDB().RunCommands and db.QueryFramesInto return 'not implemented'.",
+          "severity": "High",
+          "matched_candidate": "SQLCommand.Execute in pkg/expr/sql_command.go calls db.QueryFramesInto on sql.NewInMemoryDB, but QueryFramesInto is unimplemented and returns \"not implemented\", causing deterministic runtime failure",
+          "confidence": 0.86,
+          "reasoning": "The golden comment notes that multiple methods (e.g., NewInMemoryDB().RunCommands and db.QueryFramesInto) are unimplemented and return \"not implemented\". The candidate specifically flags QueryFramesInto on sql.NewInMemoryDB being unimplemented and causing runtime failure. This matches part of the golden issue (QueryFramesInto), though it doesn\u2019t mention RunCommands; still the underlying concern is the same: core DB methods are stubbed/unimplemented."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "QueryTypeSQL parsing/execution is rejected because the new gate in pkg/expr/reader.go blocks QueryTypeSQL whenever enableSqlExpressions returns false"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR94942__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR94942__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/90939": {
@@ -40890,6 +41829,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR90939__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR90939__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The GetWebAssets function implements an incomplete double-checked locking pattern for caching web assets. The function first checks if the cache is populated using a read lock (RLock), and if the cache is empty, it acquires a write lock to populate it. However, it fails to re-check whether the cache was populated by another goroutine while waiting to acquire the write lock.",
+          "severity": "Medium",
+          "matched_candidate": "GetWebAssets uses incomplete double-checked locking: after the RLock fast-path sees an empty cache, it acquires the write lock but does not re-check entryPointAssetsCache, allowing concurrent callers to redundantly rebuild/fetch assets",
+          "confidence": 0.96,
+          "reasoning": "The candidate describes the same incomplete double-checked locking flaw: after seeing an empty cache under RLock, it takes the write lock but fails to re-check whether another goroutine populated the cache while waiting, leading to redundant rebuild/fetch."
+        },
+        {
+          "golden_comment": "In addition to the missing double-check, the function has a critical flaw in its error handling: it unconditionally assigns the fetch result to the cache (line 69: entryPointAssetsCache = result) regardless of whether the fetch succeeded or failed. When an error occurs during asset fetching, result is nil, and this nil value overwrites any previously valid cache entry.",
+          "severity": "High",
+          "matched_candidate": "GetWebAssets can overwrite a previously valid entryPointAssetsCache with nil: it assigns entryPointAssetsCache = result even when asset loading fails and result is nil, causing cached assets to be lost after transient load errors",
+          "confidence": 0.96,
+          "reasoning": "The candidate issue describes the same core bug: the function assigns the fetch result to entryPointAssetsCache even when the fetch fails and result is nil, overwriting a previously valid cache entry and losing cached assets after transient errors."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR90939__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR90939__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/80329": {
@@ -42349,6 +43321,36 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR80329__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR80329__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The code uses Error log level for what appears to be debugging information. This will pollute error logs in production. Consider using Debug or Info level instead.",
+          "severity": "Low",
+          "matched_candidate": "CleanAnnotations logs routine batch progress using r.log.Error after fetchIDs succeeds, causing normal control-flow messages to be treated as errors and flood error logs (pre-delete progress path)",
+          "confidence": 0.93,
+          "reasoning": "Both comments flag that the code logs normal/debug/progress information at Error level, which would incorrectly treat routine messages as errors and clutter production error logs; they suggest the same underlying logging-level misuse."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "CleanAnnotations logs routine batch progress using r.log.Error after fetchIDs succeeds, causing normal control-flow messages to be treated as errors and flood error logs (post-delete progress path)"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 1,
+      "tp": 1,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR80329__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR80329__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/90045": {
@@ -44336,6 +45338,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR90045__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR90045__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The context is being created with d.Log instead of the log variable that was initialized with additional context values (name, kind, method). This means those values won't be propagated to the logging context.",
+          "severity": "Medium",
+          "matched_candidate": "In Delete, enriched logger with name/kind/method is created but plain d.Log is stored in context (klog.NewContext(ctx, d.Log)), dropping structured log fields",
+          "confidence": 0.96,
+          "reasoning": "Both describe the same issue: an enriched logger is created with additional fields (name/kind/method) but the context is populated using d.Log instead of that enriched logger, so the extra fields are not propagated."
+        },
+        {
+          "golden_comment": "Bug: calling recordLegacyDuration when storage operation fails should be recordStorageDuration.",
+          "severity": "High",
+          "matched_candidate": "On Storage.Update error path, code records duration with recordLegacyDuration instead of recordStorageDuration, breaking storage failure metrics",
+          "confidence": 0.96,
+          "reasoning": "The candidate flags that on the Storage.Update error path the code uses recordLegacyDuration instead of recordStorageDuration, which is exactly the bug described in the golden comment (wrong duration metric recorded on storage failure)."
+        },
+        {
+          "golden_comment": "Inconsistency: using name instead of options.Kind for metrics recording differs from other methods.",
+          "severity": "Medium",
+          "matched_candidate": "On Delete success, recordStorageDuration is called with name as the kind/label argument instead of options.Kind, misattributing metrics",
+          "confidence": 0.94,
+          "reasoning": "Both describe the same inconsistency/bug: metrics recording uses `name` as the kind/label argument instead of `options.Kind`, differing from other methods and causing misattributed metrics."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "In DeleteCollection async legacy goroutine, legacy DeleteCollection result is recorded with recordStorageDuration instead of recordLegacyDuration, breaking legacy latency/error metrics"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR90045__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR90045__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/106778": {
@@ -46063,6 +47109,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR106778__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR106778__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The rendered GrafanaRuleListItem is missing the required key prop for React list items. This can cause rendering issues when the list order changes.",
+          "severity": "Medium",
+          "matched_candidate": "In FilterView.tsx, the Grafana branch of rules.map renders <GrafanaRuleListItem> without a React `key` prop, risking stale row reuse/state mismatch when list order changes",
+          "confidence": 0.96,
+          "reasoning": "Both the golden comment and the candidate issue point out that <GrafanaRuleListItem> is rendered in a map without the required React `key` prop, which can lead to incorrect/stale rendering when list order changes."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "useIsGrafanaPromRuleEditable references an undefined identifier `ctx`, causing a ReferenceError crash during editability checks"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "RuleActionsButtons is invoked with only promRule, but SilenceGrafanaRuleDrawer inside RuleActionsButtons still depends on a Grafana Ruler rule being present, so for Grafana rules coming from list views the 'Silence notifications' menu entry (now driven by Grafana Prom abilities) will toggle showSilenceDrawer without ever rendering the drawer. This means clicking 'Silence notifications' for these rules has no visible effect, even when abilities indicate silencing is allowed.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR106778__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR106778__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/107534": {
@@ -47345,6 +48426,29 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR107534__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR107534__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "The applyTemplateVariables method is called with request.filters as the third parameter, but this parameter is not used in the corresponding test setup.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 0,
+      "total_golden": 1,
+      "tp": 0,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR107534__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR107534__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/79265": {
@@ -49574,6 +50678,58 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR79265__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR79265__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Race condition: Multiple concurrent requests could pass the device count check simultaneously and create devices beyond the limit. Consider using a database transaction or lock.",
+          "severity": "High",
+          "matched_candidate": "Race condition/TOCTOU in CreateOrUpdateDevice: CountDevices check is done separately from insert/update without a transaction or lock, allowing concurrent requests to exceed the device limit",
+          "confidence": 0.97,
+          "reasoning": "Both describe the same race condition/TOCTOU: the device count is checked separately from the create/update, so concurrent requests can bypass the limit without a transaction/lock and exceed the allowed number of devices."
+        },
+        {
+          "golden_comment": "Anonymous authentication now fails entirely if anonDeviceService.TagDevice returns ErrDeviceLimitReached. Previously, device tagging was asynchronous and non-blocking. This change prevents anonymous users from authenticating when the device limit is reached.",
+          "severity": "Medium",
+          "matched_candidate": "Anonymous authentication can fail because ErrDeviceLimitReached is introduced/returned from device tagging and is treated as a hard failure during Authenticate instead of a best-effort tagging error",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same regression: TagDevice returning ErrDeviceLimitReached now causes Authenticate to fail for anonymous users, whereas tagging used to be best-effort/asynchronous and non-blocking. The candidate captures the hard-failure behavior tied to ErrDeviceLimitReached during authentication."
+        },
+        {
+          "golden_comment": "Returning ErrDeviceLimitReached when no rows were updated is misleading; the device might not exist.",
+          "severity": "Low",
+          "matched_candidate": "Misleading error mapping: updateDevice returns ErrDeviceLimitReached when RowsAffected()==0, but zero rows can also mean the device row is missing or outside the allowed update window, not that the limit was reached",
+          "confidence": 0.93,
+          "reasoning": "Both point out that mapping RowsAffected()==0 to ErrDeviceLimitReached is misleading because zero affected rows can occur when the device doesn't exist (or other non-limit reasons). This matches the golden concern."
+        },
+        {
+          "golden_comment": "Time window calculation inconsistency: Using device.UpdatedAt.UTC().Add(-anonymousDeviceExpiration) as the lower bound but device.UpdatedAt as the current time may not match the intended logic. Consider using time.Now().UTC() consistently.",
+          "severity": "Low",
+          "matched_candidate": "Inconsistent time-window basis: updateDevice builds its BETWEEN window from device.UpdatedAt while the device-limit count uses time.Now().UTC(), so a device can be counted as active but fail the update WHERE clause if UpdatedAt lags",
+          "confidence": 0.9,
+          "reasoning": "Both point out an inconsistency in the time basis used for the expiration/window logic: one part uses device.UpdatedAt (and its UTC-adjusted subtraction) while another uses a different 'current' time reference (time.Now().UTC()), leading to mismatched window boundaries and potential logic errors."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "This call won\u2019t compile: dbSession.Exec(args...) is given a []interface{} where the first element is the query, but Exec\u2019s signature requires a first parameter of type string (not an interface{} splat).",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR79265__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR79265__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/9": {
@@ -51094,6 +52250,43 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR9__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR9__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Thread-safety issue with lazy @loaded_locales",
+          "severity": "Low",
+          "matched_candidate": "lib/freedom_patches/translate_accelerator.rb uses unsynchronized lazy initialization and check-then-act logic for @loaded_locales, allowing concurrent threads to double-load locales or corrupt shared state",
+          "confidence": 0.96,
+          "reasoning": "The golden comment flags a thread-safety problem around lazy initialization of @loaded_locales. The candidate explicitly describes unsynchronized lazy initialization/check-then-act on @loaded_locales leading to concurrent double-loads or corrupted shared state, which is the same underlying issue."
+        },
+        {
+          "golden_comment": "Consider normalizing the input locale (e.g., to a symbol) when checking/loading here to avoid double-loading if the same locale is passed as a String vs Symbol (also applies to other locations in the PR).",
+          "severity": "Low",
+          "matched_candidate": "lib/freedom_patches/translate_accelerator.rb caches loaded locales without normalizing String vs Symbol, so the same locale may be treated as different keys and loaded twice when callers mix forms",
+          "confidence": 0.93,
+          "reasoning": "Both comments flag the same underlying issue: locales are cached/checked without normalizing String vs Symbol, which can cause the same locale to be treated as different keys and loaded twice. The candidate is essentially a concrete restatement of the golden suggestion."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "config/initializers/i18n.rb unconditionally calls SiteSetting.default_locale.to_sym, which can raise NoMethodError when SiteSetting.default_locale is nil and break boot/translation lookup"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.6666666666666666,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR9__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR9__20260310/pull/1"
     }
   },
   "https://github.com/grafana/grafana/pull/76186": {
@@ -52542,6 +53735,55 @@
       "tool": "greptile-v4-1",
       "repo_name": "grafana__grafana__greptile-v4-1__PR76186__20260405",
       "pr_url": "https://github.com/code-review-benchmark/grafana__grafana__greptile-v4-1__PR76186__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The ContextualLoggerMiddleware methods (QueryData, CallResource, CheckHealth, CollectMetrics) panic when a nil request is received. This occurs because they directly access req.PluginContext (via the instrumentContext function) without first checking if req is nil. This is a regression, as previous middleware layers gracefully handled nil requests.",
+          "severity": "High",
+          "matched_candidate": "pluginsintegration.go wires in ContextualLoggerMiddleware, which can introduce panics for nil requests because the middleware dereferences req.PluginContext without nil guards",
+          "confidence": 0.9,
+          "reasoning": "The candidate flags that wiring in ContextualLoggerMiddleware can panic on nil requests due to dereferencing req.PluginContext without nil checks. This matches the golden issue about ContextualLoggerMiddleware methods panicking when req is nil because instrumentContext accesses req.PluginContext directly, noting it as a regression in nil-handling."
+        },
+        {
+          "golden_comment": "The traceID is no longer logged for plugin requests. During a refactoring, the tracing import and the logic to extract and add traceID from the context to log parameters were removed from the LoggerMiddleware. The newly introduced ContextualLoggerMiddleware does not add this information, resulting in missing traceID in plugin request logs and impacting debugging and request tracing capabilities.",
+          "severity": "Low",
+          "matched_candidate": "LoggerMiddleware no longer appends traceID from context to log parameters, potentially regressing request correlation in plugin logs if the contextual logger does not automatically emit trace IDs",
+          "confidence": 0.9,
+          "reasoning": "The candidate notes that LoggerMiddleware no longer appends traceID from context, which can regress request correlation in plugin logs if the new contextual logger doesn\u2019t emit trace IDs. This aligns with the golden issue: traceID extraction/logging logic was removed during refactor and the new ContextualLoggerMiddleware doesn\u2019t add it, causing missing traceIDs in plugin request logs."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "TestLogger.FromContext returns a fresh NewTestLogger() instead of preserving the receiver, causing captured logs/state to be lost and breaking tests that assert on logged output"
+        },
+        {
+          "candidate": "In pkg/plugins/log/logger.go, when FromContext(ctx) does not return *log.ConcreteLogger, the code falls back to d.New(), dropping the contextual logger returned by FromContext and losing context-derived fields/attributes"
+        },
+        {
+          "candidate": "ContextualLoggerMiddleware.CallResource dereferences req.PluginContext without checking if req is nil, causing a nil-pointer panic on nil CallResourceRequest"
+        },
+        {
+          "candidate": "ContextualLoggerMiddleware.CheckHealth dereferences req.PluginContext without checking if req is nil, causing a nil-pointer panic on nil CheckHealthRequest"
+        },
+        {
+          "candidate": "ContextualLoggerMiddleware.CollectMetrics dereferences req.PluginContext without checking if req is nil, causing a nil-pointer panic on nil CollectMetricsRequest"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 8,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 5,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.25,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "grafana__grafana__cloudaeye__PR76186__20260310",
+      "pr_url": "https://github.com/CloudAEye/grafana__grafana__cloudaeye__PR76186__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/10": {
@@ -55007,6 +56249,55 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR10__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR10__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "NoMethodError before_validation in EmbeddableHost",
+          "severity": "Critical",
+          "matched_candidate": "before_validation callback in app/models/embeddable_host.rb calls self.host.sub! unconditionally, causing NoMethodError when host is nil during validation/save",
+          "confidence": 0.95,
+          "reasoning": "Golden expects a NoMethodError occurring in a before_validation callback in EmbeddableHost. The candidate pinpoints that before_validation calls `self.host.sub!` unconditionally, which raises NoMethodError when `host` is nil. This is the same underlying issue with more detail."
+        },
+        {
+          "golden_comment": "The update and destroy methods in Admin::EmbeddableHostsController do not validate the existence of the EmbeddableHost record retrieved by ID. If EmbeddableHost.where(id: params[:id]).first returns nil (i.e., the host does not exist), attempting to call methods on the nil object (e.g., save_host or destroy) will result in a NoMethodError.",
+          "severity": "Medium",
+          "matched_candidate": "Admin::EmbeddableHostsController#update uses EmbeddableHost.where(id: params[:id]).first without nil handling; save_host(host) can dereference nil when no record matches params[:id]",
+          "confidence": 0.86,
+          "reasoning": "The candidate flags that update fetches EmbeddableHost via where(...).first without handling nil, leading to save_host(host) potentially calling methods on nil when no record is found. This matches the golden issue about missing existence validation causing NoMethodError (at least for update)."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Admin::EmbeddableHostsController#destroy uses EmbeddableHost.where(id: params[:id]).first without nil handling; host.destroy can raise when no record matches params[:id]"
+        },
+        {
+          "candidate": "Ember component embeddable-host.js.es6 calls host.destroyRecord().then(...) without a rejection handler, risking unhandled promise rejection / silent failure when the destroy request fails"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "record_for_host compares lower(host) = ? but does not normalize the parameter\u2019s case, so mixed\u2011case referer hosts may fail to match even though comparison intends to be case\u2011insensitive.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "Because this migration inserts embeddable_hosts rows with raw SQL, any existing embeddable_hosts values that include http:// or /https:// or path segments won\u2019t go through the EmbeddableHost model\u2019s normalization, so the new host lookup (which compares only the bare host) may fail for migrated data. Consider ensuring that migrated hosts are normalized to the same format as newly created EmbeddableHost records so existing embedding configurations keep working.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 4,
+      "tp": 2,
+      "fp": 2,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR10__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR10__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/7": {
@@ -56575,115 +57866,274 @@
       "errors_count": 0,
       "precision": 1.0,
       "recall": 0.3333333333333333,
-      "tool": "macroscope",
-      "repo_name": "discourse__discourse-graphite__macroscope__PR7__20260309",
-      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__macroscope__PR7__20260309/pull/1"
-    },
-    "baz": {
-      "skipped": false,
-      "true_positives": [
-        {
-          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
-          "severity": "Low",
-          "matched_candidate": "In the light theme, the topic-meta link color was changed to scale-color($primary, $lightness: 70%) (from 30%) when wrapping in dark-light-choose, making links nearly white and reducing contrast; the light branch should preserve 30% lightness.",
-          "confidence": 0.96,
-          "reasoning": "Both describe the same change in `.topic-meta-data h5 a`: the light-theme color went from `scale-color($primary, $lightness: 30%)` to `scale-color($primary, $lightness: 70%)` when introducing `dark-light-choose`, causing an inverted/dramatically lighter link color and contrast issues. Candidate also notes the fix (keep 30% in light branch), aligning with the golden concern."
-        },
-        {
-          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
-          "severity": "Low",
-          "matched_candidate": "In the light theme, the topic-meta link color was changed to scale-color($primary, $lightness: 70%) (from 30%) when wrapping in dark-light-choose, making links nearly white and reducing contrast; the light branch should preserve 30% lightness.",
-          "confidence": 0.78,
-          "reasoning": "Both point to the same underlying problem: in the light theme the change increases the $primary/lightness adjustment from 30% to 50%/70% instead of preserving the original value, leading to overly light (low-contrast) link color. The candidate focuses on the specific scale-color lightness change, which is the concrete manifestation of the golden comment\u2019s concern about $primary being altered in the light theme rather than kept consistent (with complements moved to $secondary for dark)."
-        },
-        {
-          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
-          "severity": "Low",
-          "matched_candidate": "For .custom-message-length, the light-branch color was changed to scale-color($primary, $lightness: 30%) (from 70%), making helper text much darker than before; the light branch should keep 70% lightness while adding a dark branch.",
-          "confidence": 0.95,
-          "reasoning": "Both comments flag the same regression: in the light theme branch the scale-color lightness was changed from 70% to 30%, resulting in much darker text than intended. The candidate even specifies restoring 70% for the light branch."
-        }
-      ],
-      "false_positives": [
-        {
-          "candidate": "Light-theme h3 color was changed to scale-color($primary, $lightness: 50%) (from 20%) when moving to dark-light-choose, brightening header text and regressing contrast; the light branch should keep 20% lightness while still providing a dark branch."
-        }
-      ],
-      "false_negatives": [],
-      "errors": [],
-      "total_candidates": 3,
-      "total_golden": 3,
-      "tp": 3,
-      "fp": 1,
-      "fn": 0,
-      "errors_count": 0,
-      "precision": 1.0,
-      "recall": 1.0,
-      "tool": "baz",
-      "repo_name": "discourse__discourse-graphite__baz__PR7__20260122",
-      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__baz__PR7__20260122/pull/1"
-    },
-    "propel-v2": {
-      "skipped": false,
-      "true_positives": [],
-      "false_positives": [],
-      "false_negatives": [
-        {
-          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
-          "severity": "Low"
-        },
-        {
-          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
-          "severity": "Low"
-        },
-        {
-          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
-          "severity": "Low"
-        }
-      ],
-      "errors": [],
-      "total_candidates": 0,
-      "total_golden": 3,
-      "tp": 0,
-      "fp": 0,
-      "fn": 3,
-      "errors_count": 0,
-      "precision": 0.0,
-      "recall": 0.0,
-      "tool": "propel-v2",
-      "repo_name": "discourse__discourse-graphite__propel-v2__PR7__20260325",
-      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__propel-v2__PR7__20260325/pull/1"
+      "tool": "macroscope",
+      "repo_name": "discourse__discourse-graphite__macroscope__PR7__20260309",
+      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__macroscope__PR7__20260309/pull/1"
+    },
+    "baz": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
+          "severity": "Low",
+          "matched_candidate": "In the light theme, the topic-meta link color was changed to scale-color($primary, $lightness: 70%) (from 30%) when wrapping in dark-light-choose, making links nearly white and reducing contrast; the light branch should preserve 30% lightness.",
+          "confidence": 0.96,
+          "reasoning": "Both describe the same change in `.topic-meta-data h5 a`: the light-theme color went from `scale-color($primary, $lightness: 30%)` to `scale-color($primary, $lightness: 70%)` when introducing `dark-light-choose`, causing an inverted/dramatically lighter link color and contrast issues. Candidate also notes the fix (keep 30% in light branch), aligning with the golden concern."
+        },
+        {
+          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
+          "severity": "Low",
+          "matched_candidate": "In the light theme, the topic-meta link color was changed to scale-color($primary, $lightness: 70%) (from 30%) when wrapping in dark-light-choose, making links nearly white and reducing contrast; the light branch should preserve 30% lightness.",
+          "confidence": 0.78,
+          "reasoning": "Both point to the same underlying problem: in the light theme the change increases the $primary/lightness adjustment from 30% to 50%/70% instead of preserving the original value, leading to overly light (low-contrast) link color. The candidate focuses on the specific scale-color lightness change, which is the concrete manifestation of the golden comment\u2019s concern about $primary being altered in the light theme rather than kept consistent (with complements moved to $secondary for dark)."
+        },
+        {
+          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
+          "severity": "Low",
+          "matched_candidate": "For .custom-message-length, the light-branch color was changed to scale-color($primary, $lightness: 30%) (from 70%), making helper text much darker than before; the light branch should keep 70% lightness while adding a dark branch.",
+          "confidence": 0.95,
+          "reasoning": "Both comments flag the same regression: in the light theme branch the scale-color lightness was changed from 70% to 30%, resulting in much darker text than intended. The candidate even specifies restoring 70% for the light branch."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Light-theme h3 color was changed to scale-color($primary, $lightness: 50%) (from 20%) when moving to dark-light-choose, brightening header text and regressing contrast; the light branch should keep 20% lightness while still providing a dark branch."
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "baz",
+      "repo_name": "discourse__discourse-graphite__baz__PR7__20260122",
+      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__baz__PR7__20260122/pull/1"
+    },
+    "propel-v2": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 0,
+      "total_golden": 3,
+      "tp": 0,
+      "fp": 0,
+      "fn": 3,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "propel-v2",
+      "repo_name": "discourse__discourse-graphite__propel-v2__PR7__20260325",
+      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__propel-v2__PR7__20260325/pull/1"
+    },
+    "codeant-v2": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [
+        {
+          "candidate": "In app/assets/stylesheets/common/base/login.scss (.create-account .user-field .controls p), dark-light-choose arguments are reversed so dark theme uses a scale-color($primary, ...) value instead of the required $secondary-based value, causing incorrect dark-mode palette/contrast"
+        },
+        {
+          "candidate": "In app/assets/stylesheets/common/base/login.scss (button#login-link, button#new-account-link), dark-light-choose arguments are reversed so dark theme uses a $primary-based scaled color instead of $secondary, leading to wrong palette/possible contrast issues in dark mode"
+        },
+        {
+          "candidate": "In app/assets/stylesheets/common/base/search.scss (.blurb, .date, .search-highlight), dark-light-choose uses $primary for the dark-theme branch and $secondary for the light-theme branch, violating the requirement that dark-theme scaling be based on $secondary and miscoloring search results in dark mode"
+        },
+        {
+          "candidate": "In app/assets/stylesheets/common/components/buttons.css.scss (.btn[disabled]), the disabled text color override is incorrectly nested under :hover, so disabled buttons keep the normal color when not hovered and visually react to hover despite being disabled"
+        },
+        {
+          "candidate": "In app/assets/stylesheets/desktop/queued-posts.scss (.queued-posts .queued-post .post-info span), dark-light-choose arguments are reversed so dark theme derives from $primary and light theme derives from $secondary, producing incorrect colors in both themes"
+        },
+        {
+          "candidate": "In app/assets/stylesheets/desktop/upload.scss (.upload-selector .description and .hint), dark-light-choose arguments are swapped so dark theme uses a $primary-based scaled color instead of the intended $secondary-based value, risking incorrect palette/contrast"
+        },
+        {
+          "candidate": "In app/assets/stylesheets/desktop/user.scss (.user-preferences .instructions), dark-light-choose uses $primary in the dark-theme position instead of $secondary, miscoloring instructions text in dark mode"
+        },
+        {
+          "candidate": "In app/assets/stylesheets/desktop/user.scss (.user-main .user-content table th), dark-light-choose uses a scaled $primary as the dark-theme color instead of $secondary, causing inconsistent header theming in dark mode"
+        },
+        {
+          "candidate": "In app/assets/stylesheets/desktop/user.scss (.user-stream .notification .fa/.icon), dark-light-choose uses a scaled $primary as the dark-theme color instead of $secondary, miscoloring notification icons in dark mode"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 9,
+      "total_golden": 3,
+      "tp": 0,
+      "fp": 9,
+      "fn": 3,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "codeant-v2",
+      "repo_name": "discourse__discourse-graphite__codeant-v2__PR7__20260325",
+      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__codeant-v2__PR7__20260325/pull/1"
+    },
+    "qodo-extended-v2": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
+          "severity": "Low",
+          "matched_candidate": "In desktop/topic-post.scss, .topic-meta-data h5 a uses dark-light-choose with inverted/lightened values (primary 70% for light theme, secondary 30% for dark), changing light-theme link contrast and conflicting with the standard 30/70 link mapping.",
+          "confidence": 0.93,
+          "reasoning": "Both point to the same change in .topic-meta-data h5 a: replacing a single scale-color($primary, $lightness: 30%) with dark-light-choose where the light-theme value becomes scale-color($primary, $lightness: 70%), effectively inverting/dramatically altering the intended lightness/contrast mapping."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "In mobile/modal.scss, .custom-message-length uses inverted light/dark lightness values versus desktop (primary 30% for light theme, secondary 70% for dark), making the hint text darker in light theme and brighter in dark theme than intended."
+        },
+        {
+          "candidate": "In mobile/topic-post.scss, .topic-map h3 was changed to a 50/50 lightness mapping, causing a light-theme regression and inconsistency with desktop\u2019s intended 20/80 heading contrast."
+        },
+        {
+          "candidate": "In desktop/user.scss and mobile/user.scss, .group-member-info .name was changed to the same 50/50 mapping as .title, removing the intended visual hierarchy and reducing name/title distinguishability."
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 1,
+      "fp": 3,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.25,
+      "recall": 0.3333333333333333,
+      "tool": "qodo-extended-v2",
+      "repo_name": "discourse__discourse-graphite__qodo-extended-v2__PR7__20260329",
+      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__qodo-extended-v2__PR7__20260329/pull/1"
+    },
+    "mergemonkey": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
+          "severity": "Low",
+          "matched_candidate": "For `.embedded-posts .topic-meta-data h5 a`, the `dark-light-choose()` light/dark arguments are swapped (light-mode lightness incorrectly changed from 30% to 70%), making the link text too faint on a white background; should use 30% for light and 70% for dark",
+          "confidence": 0.9,
+          "reasoning": "Both point to the same underlying problem in `.topic-meta-data h5 a`: the light-theme color lightness was changed from 30% to 70% via `dark-light-choose(...)`, effectively inverting/incorrectly setting the light-mode value and harming contrast. The candidate additionally mentions swapped arguments, but still identifies the key issue of 30% vs 70% for light mode."
+        },
+        {
+          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
+          "severity": "Low",
+          "matched_candidate": "In desktop/user.scss for `.group-member-info .name`, light-mode lightness was changed from 30% to 50% (using 50%/50% instead of the standard 30% light / 70% dark pattern), deviating from the mechanical conversion pattern and lightening the name text in light mode",
+          "confidence": 0.86,
+          "reasoning": "Both point out that in the desktop variant the light theme lightness for the primary/name color was changed from 30% to 50%, deviating from the expected mechanical conversion (keeping primary at 30% and moving the complement to secondary for dark). The candidate specifically calls out `.group-member-info .name` in desktop, which the golden comment mentions as a similar change; it doesn\u2019t mention the broader $primary context or mobile, but it matches the core issue of the unintended 30%\u219250% change in light mode."
+        },
+        {
+          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
+          "severity": "Low",
+          "matched_candidate": "In mobile/modal.scss for `.custom-message-length`, the `dark-light-choose()` light/dark arguments are swapped (light-mode lightness incorrectly changed from 70% to 30%), significantly darkening text in light mode; should use 70% for light and 30% for dark to match desktop/modal.scss:94",
+          "confidence": 0.78,
+          "reasoning": "Both describe the same underlying problem: the light theme lightness value was changed from 70% to 30%, making text too dark in light mode. The candidate adds extra context (different file/selector and swapped arguments), but it still targets the same incorrect light-theme lightness regression."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "In mobile/topic-post.scss for the topic map `h3`, the conversion uses 50%/50% instead of preserving the original 20% light-mode lightness and using 80% for dark mode, breaking parity with desktop/topic-post.scss:321 and making the heading lighter on mobile in light mode"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "mergemonkey",
+      "repo_name": "discourse__discourse-graphite__mergemonkey__PR7__20260403",
+      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__mergemonkey__PR7__20260403/pull/1"
+    },
+    "greptile-v4-1": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
+          "severity": "Low",
+          "matched_candidate": "In app/assets/stylesheets/desktop/topic-post.scss (around line 291), the light-theme lightness value for .topic-meta-data h5 a was accidentally changed/swapped from 30% to 70%, making the link color washed out in light theme; primary should remain 30% and secondary (dark theme) should use 70% in dark-light-choose.",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same regression in `.topic-meta-data h5 a`: the light-theme `scale-color($primary, $lightness: 30%)` was changed to use `70%` inside `dark-light-choose`, which inverts/dramatically alters the intended light-theme lightness. Candidate explicitly notes the 30%\u219270% swap and its effect."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 3,
+      "tp": 1,
+      "fp": 0,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.3333333333333333,
+      "tool": "greptile-v4-1",
+      "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR7__20260405",
+      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR7__20260405/pull/1"
     },
-    "codeant-v2": {
+    "cloudaeye": {
       "skipped": false,
       "true_positives": [],
       "false_positives": [
         {
-          "candidate": "In app/assets/stylesheets/common/base/login.scss (.create-account .user-field .controls p), dark-light-choose arguments are reversed so dark theme uses a scale-color($primary, ...) value instead of the required $secondary-based value, causing incorrect dark-mode palette/contrast"
-        },
-        {
-          "candidate": "In app/assets/stylesheets/common/base/login.scss (button#login-link, button#new-account-link), dark-light-choose arguments are reversed so dark theme uses a $primary-based scaled color instead of $secondary, leading to wrong palette/possible contrast issues in dark mode"
-        },
-        {
-          "candidate": "In app/assets/stylesheets/common/base/search.scss (.blurb, .date, .search-highlight), dark-light-choose uses $primary for the dark-theme branch and $secondary for the light-theme branch, violating the requirement that dark-theme scaling be based on $secondary and miscoloring search results in dark mode"
-        },
-        {
-          "candidate": "In app/assets/stylesheets/common/components/buttons.css.scss (.btn[disabled]), the disabled text color override is incorrectly nested under :hover, so disabled buttons keep the normal color when not hovered and visually react to hover despite being disabled"
-        },
-        {
-          "candidate": "In app/assets/stylesheets/desktop/queued-posts.scss (.queued-posts .queued-post .post-info span), dark-light-choose arguments are reversed so dark theme derives from $primary and light theme derives from $secondary, producing incorrect colors in both themes"
-        },
-        {
-          "candidate": "In app/assets/stylesheets/desktop/upload.scss (.upload-selector .description and .hint), dark-light-choose arguments are swapped so dark theme uses a $primary-based scaled color instead of the intended $secondary-based value, risking incorrect palette/contrast"
-        },
-        {
-          "candidate": "In app/assets/stylesheets/desktop/user.scss (.user-preferences .instructions), dark-light-choose uses $primary in the dark-theme position instead of $secondary, miscoloring instructions text in dark mode"
-        },
-        {
-          "candidate": "In app/assets/stylesheets/desktop/user.scss (.user-main .user-content table th), dark-light-choose uses a scaled $primary as the dark-theme color instead of $secondary, causing inconsistent header theming in dark mode"
-        },
-        {
-          "candidate": "In app/assets/stylesheets/desktop/user.scss (.user-stream .notification .fa/.icon), dark-light-choose uses a scaled $primary as the dark-theme color instead of $secondary, miscoloring notification icons in dark mode"
+          "candidate": "In app/assets/stylesheets/mobile/topic-post.scss, the .topic-list-item h3 heading color logic changes light-theme lightness from 20% to 50%, causing unexpected contrast changes in the light theme"
         }
       ],
       "false_negatives": [
@@ -56701,141 +58151,17 @@
         }
       ],
       "errors": [],
-      "total_candidates": 9,
+      "total_candidates": 1,
       "total_golden": 3,
       "tp": 0,
-      "fp": 9,
+      "fp": 1,
       "fn": 3,
       "errors_count": 0,
       "precision": 0.0,
       "recall": 0.0,
-      "tool": "codeant-v2",
-      "repo_name": "discourse__discourse-graphite__codeant-v2__PR7__20260325",
-      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__codeant-v2__PR7__20260325/pull/1"
-    },
-    "qodo-extended-v2": {
-      "skipped": false,
-      "true_positives": [
-        {
-          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
-          "severity": "Low",
-          "matched_candidate": "In desktop/topic-post.scss, .topic-meta-data h5 a uses dark-light-choose with inverted/lightened values (primary 70% for light theme, secondary 30% for dark), changing light-theme link contrast and conflicting with the standard 30/70 link mapping.",
-          "confidence": 0.93,
-          "reasoning": "Both point to the same change in .topic-meta-data h5 a: replacing a single scale-color($primary, $lightness: 30%) with dark-light-choose where the light-theme value becomes scale-color($primary, $lightness: 70%), effectively inverting/dramatically altering the intended lightness/contrast mapping."
-        }
-      ],
-      "false_positives": [
-        {
-          "candidate": "In mobile/modal.scss, .custom-message-length uses inverted light/dark lightness values versus desktop (primary 30% for light theme, secondary 70% for dark), making the hint text darker in light theme and brighter in dark theme than intended."
-        },
-        {
-          "candidate": "In mobile/topic-post.scss, .topic-map h3 was changed to a 50/50 lightness mapping, causing a light-theme regression and inconsistency with desktop\u2019s intended 20/80 heading contrast."
-        },
-        {
-          "candidate": "In desktop/user.scss and mobile/user.scss, .group-member-info .name was changed to the same 50/50 mapping as .title, removing the intended visual hierarchy and reducing name/title distinguishability."
-        }
-      ],
-      "false_negatives": [
-        {
-          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
-          "severity": "Low"
-        },
-        {
-          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
-          "severity": "Low"
-        }
-      ],
-      "errors": [],
-      "total_candidates": 4,
-      "total_golden": 3,
-      "tp": 1,
-      "fp": 3,
-      "fn": 2,
-      "errors_count": 0,
-      "precision": 0.25,
-      "recall": 0.3333333333333333,
-      "tool": "qodo-extended-v2",
-      "repo_name": "discourse__discourse-graphite__qodo-extended-v2__PR7__20260329",
-      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__qodo-extended-v2__PR7__20260329/pull/1"
-    },
-    "mergemonkey": {
-      "skipped": false,
-      "true_positives": [
-        {
-          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
-          "severity": "Low",
-          "matched_candidate": "For `.embedded-posts .topic-meta-data h5 a`, the `dark-light-choose()` light/dark arguments are swapped (light-mode lightness incorrectly changed from 30% to 70%), making the link text too faint on a white background; should use 30% for light and 70% for dark",
-          "confidence": 0.9,
-          "reasoning": "Both point to the same underlying problem in `.topic-meta-data h5 a`: the light-theme color lightness was changed from 30% to 70% via `dark-light-choose(...)`, effectively inverting/incorrectly setting the light-mode value and harming contrast. The candidate additionally mentions swapped arguments, but still identifies the key issue of 30% vs 70% for light mode."
-        },
-        {
-          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
-          "severity": "Low",
-          "matched_candidate": "In desktop/user.scss for `.group-member-info .name`, light-mode lightness was changed from 30% to 50% (using 50%/50% instead of the standard 30% light / 70% dark pattern), deviating from the mechanical conversion pattern and lightening the name text in light mode",
-          "confidence": 0.86,
-          "reasoning": "Both point out that in the desktop variant the light theme lightness for the primary/name color was changed from 30% to 50%, deviating from the expected mechanical conversion (keeping primary at 30% and moving the complement to secondary for dark). The candidate specifically calls out `.group-member-info .name` in desktop, which the golden comment mentions as a similar change; it doesn\u2019t mention the broader $primary context or mobile, but it matches the core issue of the unintended 30%\u219250% change in light mode."
-        },
-        {
-          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
-          "severity": "Low",
-          "matched_candidate": "In mobile/modal.scss for `.custom-message-length`, the `dark-light-choose()` light/dark arguments are swapped (light-mode lightness incorrectly changed from 70% to 30%), significantly darkening text in light mode; should use 70% for light and 30% for dark to match desktop/modal.scss:94",
-          "confidence": 0.78,
-          "reasoning": "Both describe the same underlying problem: the light theme lightness value was changed from 70% to 30%, making text too dark in light mode. The candidate adds extra context (different file/selector and swapped arguments), but it still targets the same incorrect light-theme lightness regression."
-        }
-      ],
-      "false_positives": [
-        {
-          "candidate": "In mobile/topic-post.scss for the topic map `h3`, the conversion uses 50%/50% instead of preserving the original 20% light-mode lightness and using 80% for dark mode, breaking parity with desktop/topic-post.scss:321 and making the heading lighter on mobile in light mode"
-        }
-      ],
-      "false_negatives": [],
-      "errors": [],
-      "total_candidates": 4,
-      "total_golden": 3,
-      "tp": 3,
-      "fp": 1,
-      "fn": 0,
-      "errors_count": 0,
-      "precision": 0.75,
-      "recall": 1.0,
-      "tool": "mergemonkey",
-      "repo_name": "discourse__discourse-graphite__mergemonkey__PR7__20260403",
-      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__mergemonkey__PR7__20260403/pull/1"
-    },
-    "greptile-v4-1": {
-      "skipped": false,
-      "true_positives": [
-        {
-          "golden_comment": "In .topic-meta-data h5 a, the original code had color: scale-color($primary, $lightness: 30%) but was changed to dark-light-choose(scale-color($primary, $lightness: 70%), scale-color($secondary, $lightness: 30%)). The lightness for the light theme changed from 30% to 70%, which is a dramatic inversion",
-          "severity": "Low",
-          "matched_candidate": "In app/assets/stylesheets/desktop/topic-post.scss (around line 291), the light-theme lightness value for .topic-meta-data h5 a was accidentally changed/swapped from 30% to 70%, making the link color washed out in light theme; primary should remain 30% and secondary (dark theme) should use 70% in dark-light-choose.",
-          "confidence": 0.93,
-          "reasoning": "Both describe the same regression in `.topic-meta-data h5 a`: the light-theme `scale-color($primary, $lightness: 30%)` was changed to use `70%` inside `dark-light-choose`, which inverts/dramatically alters the intended light-theme lightness. Candidate explicitly notes the 30%\u219270% swap and its effect."
-        }
-      ],
-      "false_positives": [],
-      "false_negatives": [
-        {
-          "golden_comment": "This change for desktop/user.css changes $primary from 30% to 50% for the light theme; most other changes preserve the original $primary value and move the complement to $secondary for dark. Consider reviewing this (also applies to a similar .name change in the mobile variant).",
-          "severity": "Low"
-        },
-        {
-          "golden_comment": "In topic-post.css the original code used $lightness: 70% but the replacement uses $lightness: 30% for the light theme. This makes the text significantly darker than intended.",
-          "severity": "Low"
-        }
-      ],
-      "errors": [],
-      "total_candidates": 1,
-      "total_golden": 3,
-      "tp": 1,
-      "fp": 0,
-      "fn": 2,
-      "errors_count": 0,
-      "precision": 1.0,
-      "recall": 0.3333333333333333,
-      "tool": "greptile-v4-1",
-      "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR7__20260405",
-      "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR7__20260405/pull/1"
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR7__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR7__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/8": {
@@ -58868,6 +60194,51 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR8__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR8__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": " The findMembers() call is now asynchronous and unhandled. The controller may not have member data immediately available, creating a race condition.",
+          "severity": "High",
+          "matched_candidate": "admin_group_route setupController calls model.findMembers() without returning/awaiting or handling errors, so AJAX failures become unhandled Promise rejections and fetch errors are hidden",
+          "confidence": 0.9,
+          "reasoning": "Both point out that setupController invokes model.findMembers() asynchronously without awaiting/returning/handling it, leading to the controller not having member data when needed (race condition). The candidate additionally mentions unhandled promise rejections/errors, which is consistent with the same underlying unhandled async call."
+        },
+        {
+          "golden_comment": "In the next action, capping the next offset at user_count can produce an empty page (e.g., total equal to limit results in offset == total, showing 2/2 with no members). This can cause confusing UX on the last page.",
+          "severity": "Medium",
+          "matched_candidate": "Pagination totalPages calculation uses Math.floor(user_count / limit) + 1, overcounting when user_count is an exact multiple of limit and allowing navigation to an empty extra page",
+          "confidence": 0.9,
+          "reasoning": "Both describe the same pagination edge case: when user_count is an exact multiple of limit, the logic allows navigating to an extra/last page where offset == total, resulting in an empty page. The candidate frames it as totalPages overcounting; the golden frames it as capping next offset at user_count causing an empty page. Same underlying UX/bug outcome."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "add_members assumes params[:usernames] is a String and calls split(\",\") unconditionally, causing NoMethodError when usernames is an Array"
+        },
+        {
+          "candidate": "group model triggers findMembers() after add/remove operations without sequencing/cancellation, allowing overlapping reloads to resolve out of order and overwrite newer member state with stale data"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "HTTP method mismatch in .remove_member - test uses PUT but remove_member action expects DELETE",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 2,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.6666666666666666,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR8__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR8__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/3": {
@@ -60451,6 +61822,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR3__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR3__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "BlockedEmail.should_block_email? method has side effects during a read operation - it updates statistics even when just checking if an email should be blocked. This could cause race conditions in concurrent environments and makes the method name misleading.",
+          "severity": "Medium",
+          "matched_candidate": "Non-atomic match_count increment in blocked_email.rb (read/modify/write with save) can lose increments under concurrent requests due to missing locking or atomic counter update",
+          "confidence": 0.86,
+          "reasoning": "Both point to the same underlying problem: should_block_email? performs a side effect by incrementing/saving match_count during what should be a read/check operation, and this update is unsafe under concurrency. The candidate focuses on the non-atomic increment/race condition aspect, which is explicitly part of the golden concern."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Client-side rejectedEmails cache causes valid emails to remain blocked after a transient server-side rejection because rejectedEmails is never cleared on email edit/retry/success in create_account_controller.js"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Regex pattern @(#{domains}) only matches domain suffixes, not full domains. evil.example.com would match whitelist entry example.com.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR3__20260407",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR3__20260407/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/5": {
@@ -61989,6 +63395,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR5__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR5__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "-ms-align-items never existed in any version of IE/Edge; the correct legacy property is -ms-flex-align.",
+          "severity": "Low",
+          "matched_candidate": "In app/assets/stylesheets/common/foundation/mixins.scss, the align-items mixin emits `-ms-align-items`, which is not a valid legacy IE/Edge flexbox property (should rely on `-ms-flex-align` instead), causing alignment to break in IE/legacy Edge.",
+          "confidence": 0.98,
+          "reasoning": "The candidate flags that the mixin outputs `-ms-align-items`, notes it is not a valid IE/legacy Edge flexbox property, and specifies the correct property should be `-ms-flex-align`, which is exactly the golden issue."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "In app/assets/stylesheets/common/foundation/mixins.scss, the order mixin incorrectly maps modern `order: $val` directly to `-webkit-box-ordinal-group` and `-moz-box-ordinal-group`, which have different semantics in the 2009 flexbox spec, causing item ordering to break in old WebKit/Firefox."
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Mixing float: left with flexbox causes layout issues. Further this PR removes the float-based right alignment for .d-header .panel, which may cause the login panel in the non-Ember/noscript header (where .panel is nested inside .row and not a flex item) to stack under the title instead of remaining right-aligned.",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR5__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR5__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/6": {
@@ -63387,6 +64828,33 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR6__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR6__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [
+        {
+          "candidate": "In app/serializers/user_serializer.rb, the include hook for website_name is defined as include_website_name instead of the expected include_website_name? predicate, so the guard may be ignored and website_name may be serialized unconditionally (logic error / potential NoMethodError depending on framework behavior)."
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The include_website_name method is missing the required ? suffix. Rails serializers expect include_ methods to end with ? for conditional attribute inclusion, a convention followed by other methods in this serializer. Without it, the website_name attribute may not be conditionally included as intended. Additionally, the '.' << website_host string concatenation should be replaced with '.' + website_host or '.#{website_host}' to avoid mutating string literals, which can lead to issues.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 1,
+      "total_golden": 1,
+      "tp": 0,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR6__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR6__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/4": {
@@ -66276,6 +67744,84 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR4__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR4__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "SSRF vulnerability using open(url) without validation",
+          "severity": "Critical",
+          "matched_candidate": "SSRF risk: poll_feed fetches SiteSetting.feed_polling_url via open-uri without scheme/host allowlisting or destination validation",
+          "confidence": 0.96,
+          "reasoning": "Both describe an SSRF vulnerability caused by fetching a user-controlled URL using open/open-uri without proper validation or allowlisting of scheme/host/destination."
+        },
+        {
+          "golden_comment": "The current origin validation using indexOf is insufficient and can be bypassed. An attacker could use a malicious domain like evil-discourseUrl.com to pass this check.",
+          "severity": "Medium",
+          "matched_candidate": "embed.js origin validation is insecure because it uses a substring check (discourseUrl.indexOf(e.origin)) instead of an exact origin comparison, allowing bypass with malicious origins",
+          "confidence": 0.95,
+          "reasoning": "Both describe the same flaw: origin validation relies on indexOf/substring matching rather than exact origin comparison, which can be bypassed by crafting a malicious domain that contains the trusted domain string (e.g., evil-discourseUrl.com)."
+        },
+        {
+          "golden_comment": "The ERB block closes with end if, which is invalid Ruby/ERB and will raise at render; it should just be end to close the if block.",
+          "severity": "Medium",
+          "matched_candidate": "app/views/embed/best.html.erb contains invalid ERB/Ruby syntax (<%- end if %>) causing template parse/render failure",
+          "confidence": 0.96,
+          "reasoning": "Both the golden comment and the candidate issue flag the same problem: the ERB template closes an if block with `end if` (`<%- end if %>`), which is invalid syntax in this context and will cause a render/parse failure; it should be just `end`."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "poll_feed crashes with NoMethodError when an RSS item has nil content because it calls i.content.scrub without a nil guard"
+        },
+        {
+          "candidate": "poll_feed does not handle exceptions from network fetch or RSS parsing (SimpleRSS.parse open(...)), so unreachable or malformed feeds can crash the scheduled job"
+        },
+        {
+          "candidate": "TopicEmbed.import can crash when embed.post is nil (stale/missing associated post) because it passes nil into PostRevisor without checking"
+        },
+        {
+          "candidate": "TopicRetriever crashes if SiteSetting.embed_by_username is nil because it calls downcase on a nil setting"
+        },
+        {
+          "candidate": "embed.js can crash when #discourse-comments is missing because it calls appendChild on a null element"
+        },
+        {
+          "candidate": "spec/controllers/embed_controller_spec.rb test name claims it raises an error but the assertion only checks response not success, creating a name/body mismatch and potentially misattributing failures"
+        },
+        {
+          "candidate": "SSRF risk: TopicEmbed.import_remote fetches open(url).read on attacker-influenced URLs without sufficient URL sanitization/validation"
+        },
+        {
+          "candidate": "XSS risk: TopicEmbed builds HTML with unescaped url interpolated into an <a> tag (href and link text), allowing injection if url contains quotes/HTML"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "postMessage targetOrigin should be the origin (scheme+host+port), not the full referrer URL; using the full URL will cause the message to be dropped and prevent resizing.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The code sets X-Frame-Options: ALLOWALL which completely disables clickjacking protection. The referer validation can be bypassed (referer headers are easily spoofed), and the fallback to empty string for nil referer masks validation failures.",
+          "severity": "Medium"
+        },
+        {
+          "golden_comment": "The TopicEmbed.import method is susceptible to a NoMethodError if the contents parameter is nil when attempting to append a string, and an XSS vulnerability due to unescaped url interpolation in the generated HTML.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 11,
+      "total_golden": 6,
+      "tp": 3,
+      "fp": 8,
+      "fn": 3,
+      "errors_count": 0,
+      "precision": 0.2727272727272727,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR4__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR4__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/1": {
@@ -68205,6 +69751,50 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR1__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR1__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The downsize method is defined twice. The second definition, which expects a single dimensions string parameter, overrides the first, which expected separate max_width and max_height parameters. This makes the original method unreachable and breaks existing code that calls it with separate width and height arguments.",
+          "severity": "Medium",
+          "matched_candidate": "In app/models/optimized_image.rb, defining self.downsize twice causes the later method to override the earlier one, effectively removing the width/height arity; existing callers using separate max_width and max_height arguments may now raise ArgumentError",
+          "confidence": 0.96,
+          "reasoning": "The candidate notes that self.downsize is defined twice and the later definition overrides the earlier one, changing/removing the width/height arity and potentially breaking existing callers with ArgumentError. This is the same underlying issue as the golden comment (method override making the original unreachable and breaking callers)."
+        },
+        {
+          "golden_comment": "Hardcoding maxSizeKB = 10 * 1024 ignores Discourse.SiteSettings['max_' + type + '_size_kb'], so the client-side limit can diverge from server-side and per-type settings (also applies to the 413 handler below).",
+          "severity": "Low",
+          "matched_candidate": "Client-side upload size validation in app/assets/javascripts/discourse/lib/utilities.js uses a hardcoded 10MB (10 * 1024 KB) instead of per-type site settings (Discourse.SiteSettings['max_' + type + '_size_kb']), causing configured upload limits to be ignored",
+          "confidence": 0.93,
+          "reasoning": "The candidate flags the same core issue: a hardcoded 10*1024 KB client-side upload limit instead of using Discourse.SiteSettings['max_' + type + '_size_kb'], leading to divergence from configured/per-type server limits. It doesn\u2019t mention the 413 handler explicitly, but the primary issue matches."
+        },
+        {
+          "golden_comment": "Passing 80% as the dimensions can fail for animated GIFs when allow_animated_thumbnails is true, since the animated path uses gifsicle --resize-fit which expects WxH geometry, not a percentage; downsizing would then silently fail.",
+          "severity": "Medium",
+          "matched_candidate": "UploadsController#create_upload passes a percentage geometry string (\"80%\") into OptimizedImage.downsize, which may break the animated-image downsize/optimize path that expects WxH-style geometry (risk of ArgumentError or failed resize for animated GIFs)",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same underlying issue: passing \"80%\" as geometry into the animated GIF thumbnail/downsizing path when animated thumbnails are enabled. The golden specifies gifsicle --resize-fit requires WxH and percentage will cause silent failure; the candidate likewise notes percentage geometry may break the animated path expecting WxH, causing failed resize or error."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "HTTP 413 error handler in app/assets/javascripts/discourse/lib/utilities.js uses a hardcoded 10MB max size instead of Discourse.SiteSettings.max_image_size_kb, causing the user-facing 'file too large' message to report the wrong limit when site/server settings differ"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 3,
+      "tp": 3,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.75,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR1__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR1__20260310/pull/1"
     }
   },
   "https://github.com/ai-code-review-evaluation/discourse-graphite/pull/2": {
@@ -69925,6 +71515,41 @@
       "tool": "greptile-v4-1",
       "repo_name": "discourse__discourse-graphite__greptile-v4-1__PR2__20260405",
       "pr_url": "https://github.com/code-review-benchmark/discourse__discourse-graphite__greptile-v4-1__PR2__20260405/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "logic: Potential nil pointer exception - if no TopicUser record exists, tu will be nil and calling methods on it will crash",
+          "severity": "High",
+          "matched_candidate": "TopicsController#unsubscribe dereferences tu.notification_level without guarding against TopicUser.find_by returning nil, causing NoMethodError when no topic_users row exists for the user/topic",
+          "confidence": 0.96,
+          "reasoning": "Both describe the same bug: TopicUser.find_by may return nil, and the code then dereferences/calls methods on tu (e.g., notification_level), leading to a NoMethodError/nil pointer crash when no TopicUser record exists."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Email notification template sets class='.previous-discussion' (includes a literal dot), so the intended previous-discussion class won\u2019t match styling/hooks"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Typo in property name: 'stopNotificiationsText' should be 'stopNotificationsText' (missing 'n' in 'Notifications')",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 1,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "discourse__discourse-graphite__cloudaeye__PR2__20260310",
+      "pr_url": "https://github.com/CloudAEye/discourse__discourse-graphite__cloudaeye__PR2__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/22532": {
@@ -71659,6 +73284,44 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR22532__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR22532__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "logic: macOS-specific sed syntax with empty string after -i flag will fail on Linux systems",
+          "severity": "Low",
+          "matched_candidate": "Script uses BSD/macOS-specific `sed -i '' -E` syntax, causing runtime failure on Linux hosts with GNU sed when updating the .env file",
+          "confidence": 0.98,
+          "reasoning": "Both describe the same portability issue: using macOS/BSD `sed -i ''` (empty backup extension) syntax, which fails on Linux/GNU sed."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Hard-coded shared log file `/tmp/tmole.log` with no locking or per-process isolation creates a race condition where concurrent script runs can overwrite/read each other\u2019s tmole output and reuse the wrong webhook URL"
+        },
+        {
+          "candidate": "Fixed startup polling timeout (~10 seconds) can be too short; if tmole initializes slower, the script incorrectly treats it as failure and exits"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "The updateManyByCredentialId call uses an empty data object, which prevents Prisma's @updatedAt decorator from updating the updatedAt timestamp. This results in inaccurate cache status tracking, as the timestamp isn't updated when the cache is refreshed. To fix this, explicitly set the updatedAt field.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 3,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 2,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR22532__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR22532__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/8330": {
@@ -73312,6 +74975,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR8330__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR8330__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Incorrect end time calculation using slotStartTime instead of slotEndTime",
+          "severity": "Medium",
+          "matched_candidate": "Working-hours availability check computes both `start` and `end` from `slotStartTime` and never uses `slotEndTime`, so slots that end after `workingHour.endTime` can be incorrectly marked available",
+          "confidence": 0.96,
+          "reasoning": "The candidate explicitly states that both start and end are computed from slotStartTime and slotEndTime is never used, leading to incorrect availability when slots extend past workingHour.endTime. This matches the golden issue of incorrect end time calculation using slotStartTime instead of slotEndTime."
+        },
+        {
+          "golden_comment": "Using === for dayjs object comparison will always return false as it compares object references, not values. Use .isSame() method instead: dayjs(date.start).add(utcOffset, 'minutes').isSame(dayjs(date.end).add(utcOffset, minutes))",
+          "severity": "Medium",
+          "matched_candidate": "Comparing two newly created Dayjs objects with `===` in override-day detection always returns false (object identity comparison), breaking detection when start and end represent the same instant",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same bug: using `===` to compare Dayjs instances compares object identity, so two separately created Dayjs objects will not be equal even if they represent the same time. The golden suggests using `.isSame()`; the candidate explains the same failure mode in override-day detection."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR8330__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR8330__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/14943": {
@@ -74789,6 +76485,39 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR14943__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR14943__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Using retryCount: reminder.retryCount + 1 reads a possibly stale value and can lose increments under concurrency; consider an atomic increment via Prisma (increment: 1) to avoid race conditions (also applies to the similar update in the catch block).",
+          "severity": "High",
+          "matched_candidate": "Non-atomic update of retryCount using `reminder.retryCount + 1` based on a stale value from `findMany`, causing lost increments under concurrent schedulers (race condition)",
+          "confidence": 0.93,
+          "reasoning": "The candidate flags the same core issue: retryCount is updated non-atomically using a previously read value (from findMany), which can be stale and lead to lost increments under concurrency/race conditions. This matches the golden comment\u2019s concern about using an atomic increment (e.g., Prisma increment: 1)."
+        },
+        {
+          "golden_comment": "The deletion logic in scheduleSMSReminders.ts incorrectly deletes non-SMS workflow reminders (e.g., Email, WhatsApp) that have retryCount > 1. This occurs because the retryCount condition within the OR clause for deletion lacks a method: WorkflowMethods.SMS filter, causing it to apply to all reminder types instead of only SMS reminders, which is the intended scope of this function.",
+          "severity": "High",
+          "matched_candidate": "`deleteMany` filter uses an `OR` branch with only `retryCount > 1` and no `method: WorkflowMethods.SMS` constraint, so it can delete non-SMS workflow reminders when retryCount exceeds 1",
+          "confidence": 0.97,
+          "reasoning": "The candidate flags that the deleteMany OR branch with only `retryCount > 1` lacks a `method: WorkflowMethods.SMS` constraint, leading to deletion of non-SMS reminders\u2014exactly the bug described in the golden comment."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 2,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 0,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR14943__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR14943__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/22345": {
@@ -76287,6 +78016,33 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR22345__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR22345__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "In getBaseConditions(), the else if (filterConditions) and final else branches are unreachable. This is because getAuthorizationConditions() always returns a non-null Prisma.Sql object, making authConditions always truthy, which means only the first two if/else if conditions are ever evaluated.",
+          "severity": "Low"
+        },
+        {
+          "golden_comment": "Fetching userIdsFromOrg only when teamsFromOrg.length > 0 can exclude org-level members for orgs without child teams; consider deriving from teamIds (which includes orgId) or removing the guard so org-only orgs still include member user bookings.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 0,
+      "total_golden": 2,
+      "tp": 0,
+      "fp": 0,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR22345__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR22345__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/11059": {
@@ -79078,6 +80834,65 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR11059__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR11059__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The parseRefreshTokenResponse function incorrectly sets refresh_token to the hardcoded string 'refresh_token' when it's missing from the OAuth refresh token response. This invalidates the token, breaking subsequent token refreshes and causing authentication failures.",
+          "severity": "High",
+          "matched_candidate": "parseRefreshTokenResponse.ts fabricates a placeholder refresh_token when the provider omits it, causing incorrect token data to be returned and potentially persisted",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same bug: parseRefreshTokenResponse sets/fabricates a placeholder refresh_token when the provider response lacks one, leading to invalid/incorrect token data and downstream refresh/auth failures."
+        },
+        {
+          "golden_comment": "When APP_CREDENTIAL_SHARING_ENABLED and CALCOM_CREDENTIAL_SYNC_ENDPOINT are set, the refreshFunction helper returns the fetch Response, but several callers (for example GoogleCalendarService.refreshAccessToken expecting res.data, and HubspotCalendarService.refreshAccessToken expecting a HubspotToken) assume it returns the integration-specific token object. That mismatch will cause runtime errors in the sync-enabled path unless the return type or those call sites are adjusted.",
+          "severity": "High",
+          "matched_candidate": "refreshOAuthTokens.ts returns a raw fetch Response in one branch while other branches return parsed token payloads, creating a return-shape mismatch that breaks callers expecting .data token fields",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same core bug: in the sync-enabled/configured branch the refresh helper returns a raw fetch Response, while callers expect an integration-specific parsed token object (e.g., res.data / HubspotToken). This return-shape mismatch would cause runtime errors unless the return type or call sites are adjusted."
+        },
+        {
+          "golden_comment": "When the sync endpoint path is used, res is a fetch Response and has no .data; res?.data will be undefined and token.access_token will throw at runtime. This relies on a consistent return shape from refreshOAuthTokens, which isn\u2019t guaranteed currently.",
+          "severity": "High",
+          "matched_candidate": "refreshOAuthTokens.ts returns a raw fetch Response in one branch while other branches return parsed token payloads, creating a return-shape mismatch that breaks callers expecting .data token fields",
+          "confidence": 0.93,
+          "reasoning": "The candidate flags that refreshOAuthTokens returns a raw fetch Response in one branch and parsed token payloads in others, causing a return-shape mismatch that breaks callers expecting token fields (e.g., .data/access_token). This is the same underlying issue as the golden comment about res being a Response without .data on the sync endpoint path, leading to runtime errors."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Non-transactional read-then-write in apps/web/pages/api/webhook/app-credential.ts can race: concurrent requests may both miss findFirst and both create duplicate Credential rows"
+        },
+        {
+          "candidate": "Credential model lacks a unique constraint on (userId, appId), so the database does not prevent duplicate credentials for the same user/app pair"
+        },
+        {
+          "candidate": "googlecalendar CalendarService reads res?.data from refreshOAuthTokens output even when it is a fetch Response, so token field access will fail at runtime"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Invalid Zod schema syntax. Computed property keys like [z.string().toString()] are not valid in Zod object schemas and will cause runtime errors. ",
+          "severity": "High"
+        },
+        {
+          "golden_comment": "parseRefreshTokenResponse returns a Zod safeParse result ({ success, data, error }), not the credential key object. Persisting that as key stores the wrapper instead of the token payload; we should store the parsed data or use schema parse.",
+          "severity": "High"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 5,
+      "tp": 3,
+      "fp": 3,
+      "fn": 2,
+      "errors_count": 0,
+      "precision": 0.6,
+      "recall": 0.6,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR11059__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR11059__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/7232": {
@@ -80909,6 +82724,52 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR7232__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR7232__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Asynchronous functions deleteScheduledEmailReminder and deleteScheduledSMSReminder are called without await inside forEach loops. This occurs during booking rescheduling/cancellation, and workflow/workflow step deletion/updates. Consequently, scheduled workflow reminders may not be reliably cancelled, potentially leaving them active.",
+          "severity": "Medium",
+          "matched_candidate": "handleCancelBooking.ts calls async deleteScheduledEmailReminder/deleteScheduledSMSReminder inside a forEach without awaiting or including the promises in Promise.all, so reminder deletion failures can be unhandled and cleanup can silently fail",
+          "confidence": 0.9,
+          "reasoning": "Both describe the same core issue: async deleteScheduledEmailReminder/deleteScheduledSMSReminder are invoked inside a forEach without awaiting (or aggregating via Promise.all), leading to unreliable reminder cancellation/cleanup and potentially unhandled failures."
+        },
+        {
+          "golden_comment": "When immediateDelete is true, the deleteScheduledEmailReminder function cancels the SendGrid email but fails to delete the corresponding WorkflowReminder record from the database. This creates orphaned database entries and is inconsistent with the immediateDelete: false path, which marks the record as cancelled. The SendGrid DELETE API call is also omitted in this path.",
+          "severity": "High",
+          "matched_candidate": "viewer/workflows.tsx uses deleteScheduledEmailReminder(..., true) paths where the helper cancels SendGrid but does not delete/update the WorkflowReminder DB row, leaving stale DB reminders that are not cleaned up by the cancelled=true cleanup job",
+          "confidence": 0.9,
+          "reasoning": "The candidate flags that when deleteScheduledEmailReminder is called with immediateDelete=true, it cancels the SendGrid email but does not delete or update (e.g., mark cancelled) the corresponding WorkflowReminder DB row, leaving stale/orphaned reminders not handled by the cancelled=true cleanup job. This matches the golden issue about inconsistent handling vs immediateDelete=false and orphaned DB entries (and implies the missing proper cleanup in that path)."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "handleNewBooking.ts calls async reminder deletion helpers inside a forEach without await, so the surrounding try/catch cannot reliably catch later rejections and rescheduling may continue before cleanup completes"
+        },
+        {
+          "candidate": "scheduleEmailReminders.ts wraps all cancellation requests in a single try/catch while awaiting inside a loop, so one failed cancellation aborts the loop and leaves remaining reminders still scheduled"
+        },
+        {
+          "candidate": "emailReminderManager.ts only cancels SendGrid scheduled sends when immediateDelete is true; callers that omit immediateDelete now only mark DB rows cancelled and do not delete the external SendGrid batch as expected"
+        },
+        {
+          "candidate": "viewer/bookings.tsx triggers reminder deletions via async helpers without awaiting them (fire-and-forget in forEach), so cleanup may be skipped or finish after the mutation completes and promise rejections may go unhandled"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 6,
+      "total_golden": 2,
+      "tp": 2,
+      "fp": 4,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.3333333333333333,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR7232__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR7232__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/14740": {
@@ -83298,6 +85159,58 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR14740__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR14740__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Case sensitivity bypass in email blacklist",
+          "severity": "High",
+          "matched_candidate": "Blacklist email check is case-sensitive: blacklist entries are lowercased but submitted guest emails are compared without normalization, allowing mixed-case emails to bypass the blacklist",
+          "confidence": 0.97,
+          "reasoning": "Both describe the same issue: the email blacklist check can be bypassed due to case-sensitive comparison (blacklist normalized to lowercase while input email is not), allowing mixed-case emails through."
+        },
+        {
+          "golden_comment": "The logic for checking team admin/owner permissions is incorrect. This condition uses AND (&&) which requires both isTeamAdmin AND isTeamOwner to be true, but it should use OR (||) since a user needs to be either an admin OR an owner to have permission.",
+          "severity": "Critical",
+          "matched_candidate": "addGuestsHandler incorrectly requires a team user to be both team admin and team owner (uses &&) to pass the permission check, denying access to admins who are not owners",
+          "confidence": 0.98,
+          "reasoning": "The candidate issue describes the same permission-check bug: using && requires a user to be both team admin and team owner, which wrongly blocks admins who aren\u2019t owners. This matches the golden comment\u2019s OR-vs-AND logic error."
+        },
+        {
+          "golden_comment": "uniqueGuests filters out existing attendees and blacklisted emails but does not deduplicate duplicates within the input; createMany can insert duplicate attendee rows if the client submits repeated emails.",
+          "severity": "Medium",
+          "matched_candidate": "addGuestsHandler does not deduplicate duplicate emails within the submitted guests array, allowing duplicate attendee rows to be created via createMany",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same underlying issue: the handler/uniqueGuests logic does not deduplicate repeated emails within the incoming guests list, so createMany may insert duplicate attendee rows when the client submits duplicates."
+        },
+        {
+          "golden_comment": "Starting with an array containing an empty string may cause validation issues. Consider starting with an empty array [] and handling the empty state in the MultiEmail component instead.",
+          "severity": "Low",
+          "matched_candidate": "AddGuestsDialog initializes/resets multiEmailValue to [\"\"] and only guards against length===0, causing validation to fail on untouched/reset state and blocking guest submission",
+          "confidence": 0.93,
+          "reasoning": "Both point out that initializing the multi-email array as [\"\"] (instead of []) leads to an invalid/untouched state that passes the length>0 guard but fails validation, preventing submission. The candidate describes the same root cause and impact as the golden comment."
+        }
+      ],
+      "false_positives": [],
+      "false_negatives": [
+        {
+          "golden_comment": "This calls the email sender with the original guests, so existing attendees included in the input will be treated as new when sending notifications, leading to incorrect emails.",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 4,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 0,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 1.0,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR14740__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR14740__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/10600": {
@@ -85517,6 +87430,57 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR10600__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR10600__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The exported function TwoFactor handles backup codes and is in BackupCode.tsx. Inconsistent naming.",
+          "severity": "Low",
+          "matched_candidate": "BackupCode.tsx default-exported component is named TwoFactor, causing a naming mismatch with the file/UI purpose and confusing stack traces/debugging",
+          "confidence": 0.93,
+          "reasoning": "Both comments flag the same underlying issue: in BackupCode.tsx the exported component/function is named TwoFactor, which is inconsistent with the file\u2019s backup-code purpose and creates confusing naming/mismatch."
+        },
+        {
+          "golden_comment": "Error message mentions 'backup code login' but this is a disable endpoint, not login",
+          "severity": "Low",
+          "matched_candidate": "Disable TOTP endpoint logs an error message about 'backup code login', which mismatches the disable flow and misleads debugging when the encryption key is missing",
+          "confidence": 0.93,
+          "reasoning": "Both point out that the disable TOTP endpoint emits an error message referring to 'backup code login', which is incorrect/misleading because this is a disable flow, not a login flow."
+        },
+        {
+          "golden_comment": "Backup code validation is case-sensitive due to the use of indexOf(). This causes validation to fail if a user enters uppercase hex characters, as backup codes should be case-insensitive for a better user experience.",
+          "severity": "Medium",
+          "matched_candidate": "Backup code comparison does not normalize case, so mixed-case user input may fail to match stored lowercase hex backup codes",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same underlying problem: backup code validation/comparison is case-sensitive because input isn\u2019t normalized, so uppercase/mixed-case hex entered by the user won\u2019t match stored lowercase codes."
+        },
+        {
+          "golden_comment": "Because backupCodes are decrypted and mutated in memory before being written back, two concurrent login requests using the same backupCode could both pass this check and update, so a single backup code may effectively be accepted more than once if used concurrently, weakening the intended one-time-use semantics.",
+          "severity": "High",
+          "matched_candidate": "Backup code consumption in authorize() is not concurrency-safe (read/check/mutate/write without transaction/CAS), allowing the same one-time backup code to be reused under concurrent login requests",
+          "confidence": 0.95,
+          "reasoning": "Both describe a race condition in backup code consumption: concurrent requests can read the same unused backup code, both pass the check, and then both mutate/write back, effectively allowing one-time codes to be reused. Candidate explicitly mentions lack of transaction/CAS and concurrent reuse, matching the golden issue."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "EnableTwoFactorModal calls body.backupCodes.map(...) without guarding for missing/null backupCodes, risking a runtime TypeError if the setup response omits or nulls that field"
+        }
+      ],
+      "false_negatives": [],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 4,
+      "tp": 4,
+      "fp": 1,
+      "fn": 0,
+      "errors_count": 0,
+      "precision": 0.8,
+      "recall": 1.0,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR10600__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR10600__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/10967": {
@@ -88038,6 +90002,71 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR10967__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR10967__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "Potential null reference if mainHostDestinationCalendar is undefined if evt.destinationCalendar is null or an empty array ",
+          "severity": "High",
+          "matched_candidate": "EventManager destructures the first element from evt.destinationCalendar ?? [] and then dereferences mainHostDestinationCalendar.integration without guarding/optional chaining, crashing when destinationCalendar is null or empty",
+          "confidence": 0.94,
+          "reasoning": "Both describe the same issue: mainHostDestinationCalendar can be undefined when evt.destinationCalendar is null/empty, and dereferencing integration without a guard can cause a crash/null reference."
+        },
+        {
+          "golden_comment": "The optional chaining on mainHostDestinationCalendar?.integration is redundant since you already check mainHostDestinationCalendar in the ternary condition.",
+          "severity": "Low",
+          "matched_candidate": "editLocation.handler.ts uses redundant optional chaining inside branches that already truthy-check booking.destinationCalendar / booking.user.destinationCalendar, reducing clarity",
+          "confidence": 0.78,
+          "reasoning": "Both comments flag redundant optional chaining used after an object has already been checked for truthiness in a conditional/ternary branch. The candidate refers to destinationCalendar checks rather than mainHostDestinationCalendar specifically, but it describes the same underlying pattern and issue in the same file/context."
+        },
+        {
+          "golden_comment": "Logic error: when externalCalendarId is provided, you're searching for a calendar where externalId === externalCalendarId, but this will always fail since you're looking for a calendar that matches itself. Should likely find by credentialId or use different logic.",
+          "severity": "High",
+          "matched_candidate": "GoogleCalendarService.updateEvent uses a fallback that searches destinationCalendar for cal.externalId === externalCalendarId when externalCalendarId is falsy, making the fallback impossible and potentially selecting the wrong calendarId for updates",
+          "confidence": 0.72,
+          "reasoning": "Both comments flag the same flawed calendar-selection logic involving externalCalendarId and comparing cal.externalId to externalCalendarId in a way that makes the lookup ineffective/incorrect. The candidate\u2019s wording about the fallback condition is a bit confused (mentions externalCalendarId falsy), but it still identifies the core issue: the search condition makes the fallback/selection wrong or impossible."
+        },
+        {
+          "golden_comment": "The Calendar interface now requires createEvent(event, credentialId), but some implementations (e.g., Lark/Office365) still declare createEvent(event) only\u2014this breaks the interface contract (also applies to other locations in the PR).",
+          "severity": "Low",
+          "matched_candidate": "CalendarManager now calls createEvent(calEvent, credential.id) but some adapters/implementations (e.g., CalendarService) still implement createEvent(event) with one parameter, causing an interface/signature mismatch that can break integrations",
+          "confidence": 0.93,
+          "reasoning": "Both describe the same underlying problem: the Calendar interface/signature changed to require a second parameter (credentialId), but some implementations/adapters still define createEvent with only one argument, causing a contract/signature mismatch and potential breakage."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "Calendar.d.ts changes Calendar.createEvent to require (event, credentialId), but downstream implementations (e.g., packages/lib/CalendarService.ts) still declare createEvent(event) with one parameter, creating a concrete arity mismatch"
+        },
+        {
+          "candidate": "destinationCalendar contract changed to DestinationCalendar[] | null, but some consumers still treat it like a single object / assume a non-empty array, leading to runtime errors (e.g., EventManager accessing [0] then dereferencing without a guard)"
+        },
+        {
+          "candidate": "handleNewBooking collects multiple destination calendars (including team member calendars) but persists only evt.destinationCalendar[0] when creating the booking, silently dropping additional calendars"
+        },
+        {
+          "candidate": "handleCancelBooking recurring-delete path iterates only bookingToDelete.user.credentials and ignores the DB-fetched calendarCredential fallback, so recurring linked events may not be deleted when the credential exists only via the DB fetch"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Logic inversion in organization creation: The slug property is now conditionally set when IS_TEAM_BILLING_ENABLED is true, instead of when it's false as originally intended. This change, combined with requestedSlug still being set when IS_TEAM_BILLING_ENABLED is true, results in both properties being set when billing is enabled, and neither when disabled",
+          "severity": "Medium"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 8,
+      "total_golden": 5,
+      "tp": 4,
+      "fp": 4,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.5,
+      "recall": 0.8,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR10967__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR10967__20260310/pull/1"
     }
   },
   "https://github.com/calcom/cal.com/pull/8087": {
@@ -89698,6 +91727,47 @@
       "tool": "greptile-v4-1",
       "repo_name": "cal_dot_com__cal.com__greptile-v4-1__PR8087__20260406",
       "pr_url": "https://github.com/code-review-benchmark/cal_dot_com__cal.com__greptile-v4-1__PR8087__20260406/pull/1"
+    },
+    "cloudaeye": {
+      "skipped": false,
+      "true_positives": [
+        {
+          "golden_comment": "The code uses forEach with async callbacks, which causes asynchronous operations (e.g., calendar/video event deletions, payment refunds) to run concurrently without being awaited. This 'fire-and-forget' behavior leads to unhandled promise rejections, race conditions, and incomplete cleanup, as surrounding try-catch blocks cannot properly handle errors from these unawaited promises. Replace forEach with for...of loops or Promise.all() with map() to ensure proper sequential execution and error handling.",
+          "severity": "Critical",
+          "matched_candidate": "In packages/app-store/vital/lib/reschedule.ts, changing to bookingRefsFiltered.forEach(async ...) prevents the surrounding try/catch from catching rejections from getCalendar/deleteEvent/deleteMeeting, and the function proceeds/returns before per-reference cleanup completes",
+          "confidence": 0.93,
+          "reasoning": "The candidate flags using forEach with an async callback, noting that try/catch won\u2019t catch rejections and the function can return before cleanup completes. This is the same core issue as the golden comment about unawaited concurrent async operations causing fire-and-forget behavior and improper error handling."
+        }
+      ],
+      "false_positives": [
+        {
+          "candidate": "In packages/trpc/server/routers/viewer/bookings.tsx, using bookingRefsFiltered.forEach(async ...) makes external calendar deletions fire-and-forget, so cleanup may still be running after the handler returns (race condition)"
+        },
+        {
+          "candidate": "In packages/trpc/server/routers/viewer/bookings.tsx, errors from getCalendar/deleteEvent/deleteMeeting inside the async forEach callback are not caught/awaited, so promise rejections escape normal error handling while execution continues (e.g., to sendRequestRescheduleEmail)"
+        },
+        {
+          "candidate": "In packages/app-store/wipemycalother/lib/reschedule.ts, using bookingRefsFiltered.forEach(async ...) causes getCalendar/deleteEvent/deleteMeeting rejections to escape the surrounding try/catch because forEach does not await async callbacks"
+        }
+      ],
+      "false_negatives": [
+        {
+          "golden_comment": "Consider adding try-catch around the await to handle import failures gracefully",
+          "severity": "Low"
+        }
+      ],
+      "errors": [],
+      "total_candidates": 5,
+      "total_golden": 2,
+      "tp": 1,
+      "fp": 3,
+      "fn": 1,
+      "errors_count": 0,
+      "precision": 0.2,
+      "recall": 0.5,
+      "tool": "cloudaeye",
+      "repo_name": "cal_dot_com__cal.com__cloudaeye__PR8087__20260310",
+      "pr_url": "https://github.com/CloudAEye/cal_dot_com__cal.com__cloudaeye__PR8087__20260310/pull/1"
     }
   }
-}
\ No newline at end of file
+}