From d870730203d690d0cd1841bfb842cb86fee1f70b Mon Sep 17 00:00:00 2001
From: Dmitrii Korolev <dmkorolev@microsoft.com>
Date: Sat, 7 Mar 2026 12:55:06 +0100
Subject: [PATCH 1/2] use BPE tokenizer

---
 .../src/Services/SkillProfiler.cs             | 30 ++++++++++++-------
 eng/skill-validator/src/SkillValidator.csproj |  4 ++-
 .../tests/SkillProfileTests.cs                |  5 ++--
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/eng/skill-validator/src/Services/SkillProfiler.cs b/eng/skill-validator/src/Services/SkillProfiler.cs
index eef736d147..42800718f7 100644
--- a/eng/skill-validator/src/Services/SkillProfiler.cs
+++ b/eng/skill-validator/src/Services/SkillProfiler.cs
@@ -1,4 +1,5 @@
 using System.Text.RegularExpressions;
+using Microsoft.ML.Tokenizers;
 using SkillValidator.Models;
 
 namespace SkillValidator.Services;
@@ -6,6 +7,7 @@ namespace SkillValidator.Services;
 public sealed record SkillProfile(
     string Name,
     int TokenCount,
+    int BpeTokenCount,
     string ComplexityTier, // "compact" | "detailed" | "standard" | "comprehensive"
     int SectionCount,
     int CodeBlockCount,
@@ -25,6 +27,10 @@ public static partial class SkillProfiler
     private const int TokenSweetHigh = 2500;
     private const int TokenWarnHigh = 5000;
     internal const int MaxDescriptionLength = 1024;
+
+    // Lazy-initialized BPE tokenizer (cl100k_base, same BPE family as GPT-4/Claude)
+    private static readonly Lazy<TiktokenTokenizer> s_bpeTokenizer = new(
+        () => TiktokenTokenizer.CreateForModel("gpt-4"));
     internal const int MaxAggregateDescriptionLength = 15_000;
     private const int MaxNameLength = 64;
     private const int MaxCompatibilityLength = 500;
@@ -33,7 +39,8 @@ public static partial class SkillProfiler
     public static SkillProfile AnalyzeSkill(SkillInfo skill)
     {
         var content = skill.SkillMdContent;
-        int tokenCount = (int)Math.Ceiling(content.Length / 4.0);
+        int chars4TokenCount = (int)Math.Ceiling(content.Length / 4.0);
+        int bpeTokenCount = s_bpeTokenizer.Value.CountTokens(content);
 
         bool hasFrontmatter = FrontmatterRegex().IsMatch(content);
 
@@ -48,7 +55,7 @@ public static SkillProfile AnalyzeSkill(SkillInfo skill)
         bool hasWhenToUse = WhenToUseRegex().IsMatch(body);
         bool hasWhenNotToUse = WhenNotToUseRegex().IsMatch(body);
 
-        string complexityTier = tokenCount switch
+        string complexityTier = bpeTokenCount switch
         {
             < 400 => "compact",
             <= 2500 => "detailed",
@@ -134,21 +141,21 @@ public static SkillProfile AnalyzeSkill(SkillInfo skill)
             }
         }
 
-        // --- Token size warnings ---
-        if (tokenCount > TokenWarnHigh)
+        // --- Token size warnings (based on BPE token count) ---
+        if (bpeTokenCount > TokenWarnHigh)
         {
             warnings.Add(
-                $"Skill is {tokenCount:N0} tokens — \"comprehensive\" skills hurt performance by 2.9pp on average. Consider splitting into 2–3 focused skills.");
+                $"Skill is {bpeTokenCount:N0} BPE tokens (chars/4 estimate: {chars4TokenCount:N0}) — \"comprehensive\" skills hurt performance by 2.9pp on average. Consider splitting into 2–3 focused skills.");
         }
-        else if (tokenCount > TokenSweetHigh)
+        else if (bpeTokenCount > TokenSweetHigh)
         {
             warnings.Add(
-                $"Skill is {tokenCount:N0} tokens — approaching \"comprehensive\" range where gains diminish.");
+                $"Skill is {bpeTokenCount:N0} BPE tokens (chars/4 estimate: {chars4TokenCount:N0}) — approaching \"comprehensive\" range where gains diminish.");
         }
-        else if (tokenCount < TokenSweetLow)
+        else if (bpeTokenCount < TokenSweetLow)
         {
             warnings.Add(
-                $"Skill is only {tokenCount} tokens — may be too sparse to provide actionable guidance.");
+                $"Skill is only {bpeTokenCount} BPE tokens (chars/4 estimate: {chars4TokenCount}) — may be too sparse to provide actionable guidance.");
         }
 
         if (sectionCount == 0)
@@ -177,7 +184,8 @@ public static SkillProfile AnalyzeSkill(SkillInfo skill)
 
         return new SkillProfile(
             Name: skill.Name,
-            TokenCount: tokenCount,
+            TokenCount: chars4TokenCount,
+            BpeTokenCount: bpeTokenCount,
             ComplexityTier: complexityTier,
             SectionCount: sectionCount,
             CodeBlockCount: codeBlockCount,
@@ -230,7 +238,7 @@ public static string FormatProfileLine(SkillProfile profile)
         };
 
         return
-            $"📊 {profile.Name}: {profile.TokenCount:N0} tokens ({profile.ComplexityTier} {tierIndicator}), " +
+            $"📊 {profile.Name}: {profile.BpeTokenCount:N0} BPE tokens [chars/4: {profile.TokenCount:N0}] ({profile.ComplexityTier} {tierIndicator}), " +
             $"{profile.SectionCount} sections, {profile.CodeBlockCount} code blocks";
     }
 
diff --git a/eng/skill-validator/src/SkillValidator.csproj b/eng/skill-validator/src/SkillValidator.csproj
index 0a5bd5dc6f..c48b07bdcf 100644
--- a/eng/skill-validator/src/SkillValidator.csproj
+++ b/eng/skill-validator/src/SkillValidator.csproj
@@ -18,7 +18,7 @@
     <PublishAot>true</PublishAot>
 
     <!-- dotnet run args for local invocation -->
-    <RunArguments>--results-dir &quot;$([MSBuild]::NormalizePath('$(ArtifactsPath)', 'TestResults', '$(AssemblyName)'))&quot; --parallel-skills 3 --parallel-scenarios 3 --parallel-runs 3</RunArguments>
+    <RunArguments>--results-dir "$([MSBuild]::NormalizePath('$(ArtifactsPath)', 'TestResults', '$(AssemblyName)'))" --parallel-skills 3 --parallel-scenarios 3 --parallel-runs 3</RunArguments>
   </PropertyGroup>
 
   <ItemGroup>
@@ -31,6 +31,8 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.FileSystemGlobbing" Version="10.0.3" />
+    <PackageReference Include="Microsoft.ML.Tokenizers" Version="2.0.0" />
+    <PackageReference Include="Microsoft.ML.Tokenizers.Data.Cl100kBase" Version="2.0.0" />
     <PackageReference Include="System.CommandLine" Version="2.0.3" />
     <!-- external -->
     <PackageReference Include="GitHub.Copilot.SDK" Version="0.1.30" />
diff --git a/eng/skill-validator/tests/SkillProfileTests.cs b/eng/skill-validator/tests/SkillProfileTests.cs
index 6371981062..b853664ef9 100644
--- a/eng/skill-validator/tests/SkillProfileTests.cs
+++ b/eng/skill-validator/tests/SkillProfileTests.cs
@@ -70,8 +70,9 @@ public void ClassifiesCompactSkills()
     [Fact]
     public void ClassifiesComprehensiveSkillsAndWarns()
     {
-        // >5000 tokens = >20000 chars
-        var content = "---\nname: foo\n---\n# Big\n" + new string('x', 25000);
+        // >5000 BPE tokens — use varied text since BPE compresses repeated chars efficiently
+        var content = "---\nname: foo\n---\n# Big\n" + string.Concat(
+            Enumerable.Range(0, 5000).Select(i => $"word{i} "));
         var profile = SkillProfiler.AnalyzeSkill(MakeSkill(content));
         Assert.Equal("comprehensive", profile.ComplexityTier);
         Assert.Contains(profile.Warnings, w => w.Contains("comprehensive"));

From 77eb1be7ac59475c9e56975188818e2fb002311a Mon Sep 17 00:00:00 2001
From: Korolev Dmitry <dmkorolev@microsoft.com>
Date: Sun, 8 Mar 2026 19:31:52 +0100
Subject: [PATCH 2/2] selectivity test

---
 .../src/Commands/ValidateCommand.cs           | 114 +++++++++++++++++-
 eng/skill-validator/src/Models/Models.cs      |  23 +++-
 .../src/Services/AgentRunner.cs               |  71 +++++++++++
 .../src/Services/EvalSchema.cs                |  11 +-
 eng/skill-validator/src/Services/Reporter.cs  |  51 ++++++--
 .../src/SkillValidatorJsonContext.cs          |   2 +
 .../src/SkillValidatorYamlContext.cs          |   1 +
 .../build-perf-diagnostics/eval.yaml          |  14 +++
 8 files changed, 271 insertions(+), 16 deletions(-)

diff --git a/eng/skill-validator/src/Commands/ValidateCommand.cs b/eng/skill-validator/src/Commands/ValidateCommand.cs
index 15746cdd50..0493ba9bf0 100644
--- a/eng/skill-validator/src/Commands/ValidateCommand.cs
+++ b/eng/skill-validator/src/Commands/ValidateCommand.cs
@@ -30,6 +30,9 @@ public static RootCommand Create()
         var reporterOpt = new Option<string[]>("--reporter") { Description = "Reporter (console, json, junit, markdown). Can be repeated.", AllowMultipleArgumentsPerToken = true };
         var noOverfittingCheckOpt = new Option<bool>("--no-overfitting-check") { Description = "Disable LLM-based overfitting analysis (on by default)" };
         var overfittingFixOpt = new Option<bool>("--overfitting-fix") { Description = "Generate a fixed eval.yaml with improved rubric items/assertions" };
+        var selectivityTestOpt = new Option<bool>("--selectivity-test") { Description = "Run selectivity test using should_activate / should_not_activate prompts from eval.yaml" };
+        var selectivityMinRecallOpt = new Option<double>("--selectivity-min-recall") { Description = "Minimum recall (activation on should_activate prompts) to pass (0-1)", DefaultValueFactory = _ => 0.8 };
+        var selectivityMinPrecisionOpt = new Option<double>("--selectivity-min-precision") { Description = "Minimum precision (non-activation on should_not_activate prompts) to pass (0-1)", DefaultValueFactory = _ => 0.8 };
 
         var command = new RootCommand("Validate that agent skills meaningfully improve agent performance")
         {
@@ -53,6 +56,9 @@ public static RootCommand Create()
             reporterOpt,
             noOverfittingCheckOpt,
             overfittingFixOpt,
+            selectivityTestOpt,
+            selectivityMinRecallOpt,
+            selectivityMinPrecisionOpt,
         };
 
         command.SetAction(async (parseResult, _) =>
@@ -98,6 +104,9 @@ public static RootCommand Create()
                 TestsDir = parseResult.GetValue(testsDirOpt),
                 OverfittingCheck = !parseResult.GetValue(noOverfittingCheckOpt),
                 OverfittingFix = parseResult.GetValue(overfittingFixOpt),
+                SelectivityTest = parseResult.GetValue(selectivityTestOpt),
+                SelectivityMinRecall = parseResult.GetValue(selectivityMinRecallOpt),
+                SelectivityMinPrecision = parseResult.GetValue(selectivityMinPrecisionOpt),
             };
 
             return await Run(config);
@@ -333,6 +342,36 @@ internal static List<string> CheckAggregateDescriptionLimits(IReadOnlyList<Skill
             };
         }
 
+        // Selectivity-only mode: skip full evaluation, just probe skill activation
+        if (config.SelectivityTest)
+        {
+            if (skill.EvalConfig is not null
+                && (skill.EvalConfig.ShouldActivatePrompts is { Count: > 0 } || skill.EvalConfig.ShouldNotActivatePrompts is { Count: > 0 }))
+            {
+                log("🎯 Running selectivity test (standalone)...");
+                var selectivityResult = await ExecuteSelectivityTest(skill, config, spinner);
+                log($"🎯 Selectivity: recall={selectivityResult.Recall:P0}, precision={selectivityResult.Precision:P0} — {(selectivityResult.Passed ? "PASSED" : "FAILED")}");
+
+                return new SkillVerdict
+                {
+                    SkillName = skill.Name,
+                    SkillPath = skill.Path,
+                    Passed = selectivityResult.Passed,
+                    Scenarios = [],
+                    OverallImprovementScore = 0,
+                    Reason = selectivityResult.Passed
+                        ? "Selectivity test passed"
+                        : $"Selectivity test failed: {selectivityResult.Reason}",
+                    FailureKind = selectivityResult.Passed ? null : "selectivity_failure",
+                    ProfileWarnings = profile.Warnings,
+                    SelectivityResult = selectivityResult,
+                };
+            }
+
+            log("⏭  Skipping (no selectivity prompts in eval.yaml)");
+            return null;
+        }
+
         // Launch overfitting check in parallel with scenario execution
         var workDir = Path.GetTempPath();
         Task<OverfittingResult?> overfittingTask = Task.FromResult<OverfittingResult?>(null);
@@ -496,8 +535,8 @@ private static async Task<RunExecutionResult> ExecuteRun(
             runLog("running agents...");
 
         var agentTasks = await Task.WhenAll(
-            AgentRunner.RunAgent(new RunOptions(scenario, null, skill.EvalPath, config.Model, config.Verbose, runLog)),
-            AgentRunner.RunAgent(new RunOptions(scenario, skill, skill.EvalPath, config.Model, config.Verbose, runLog)));
+            AgentRunner.RunAgent(new RunOptions(scenario, null, skill.EvalPath, config.Model, config.Verbose, Log: runLog)),
+            AgentRunner.RunAgent(new RunOptions(scenario, skill, skill.EvalPath, config.Model, config.Verbose, Log: runLog)));
         var baselineMetrics = agentTasks[0];
         var withSkillMetrics = agentTasks[1];
 
@@ -642,4 +681,75 @@ private static string SanitizeErrorMessage(string? message)
         var singleLine = raw.ReplaceLineEndings(" ");
         return singleLine.Length > 150 ? singleLine[..150] + "…" : singleLine;
     }
+
+    private static async Task<SelectivityResult> ExecuteSelectivityTest(SkillInfo skill, ValidatorConfig config, Spinner spinner)
+    {
+        var prefix = $"[{skill.Name}/selectivity]";
+        var log = (string msg) => spinner.Log($"{prefix} {msg}");
+
+        // Launch all probes in parallel
+        var tasks = new List<Task<SelectivityPromptResult>>();
+
+        if (skill.EvalConfig!.ShouldActivatePrompts is { } activatePrompts)
+        {
+            foreach (var prompt in activatePrompts)
+            {
+                log($"Testing should_activate: \"{Truncate(prompt, 60)}\"");
+                tasks.Add(ProbeAndLog(skill, prompt, expectedActivation: true, config, log));
+            }
+        }
+
+        if (skill.EvalConfig.ShouldNotActivatePrompts is { } deactivatePrompts)
+        {
+            foreach (var prompt in deactivatePrompts)
+            {
+                log($"Testing should_not_activate: \"{Truncate(prompt, 60)}\"");
+                tasks.Add(ProbeAndLog(skill, prompt, expectedActivation: false, config, log));
+            }
+        }
+
+        var promptResults = (await Task.WhenAll(tasks)).ToList();
+
+        // Calculate recall: fraction of should_activate prompts that actually activated
+        var shouldActivateResults = promptResults.Where(r => r.ExpectedActivation).ToList();
+        double recall = shouldActivateResults.Count > 0
+            ? (double)shouldActivateResults.Count(r => r.SkillActivated) / shouldActivateResults.Count
+            : 1.0;
+
+        // Calculate precision: fraction of should_not_activate prompts that correctly did NOT activate
+        var shouldNotActivateResults = promptResults.Where(r => !r.ExpectedActivation).ToList();
+        double precision = shouldNotActivateResults.Count > 0
+            ? (double)shouldNotActivateResults.Count(r => !r.SkillActivated) / shouldNotActivateResults.Count
+            : 1.0;
+
+        bool passed = recall >= config.SelectivityMinRecall && precision >= config.SelectivityMinPrecision;
+        var reasons = new List<string>();
+        if (recall < config.SelectivityMinRecall)
+            reasons.Add($"Recall {recall:P0} below threshold {config.SelectivityMinRecall:P0}");
+        if (precision < config.SelectivityMinPrecision)
+            reasons.Add($"Precision {precision:P0} below threshold {config.SelectivityMinPrecision:P0}");
+        string reason = passed ? "Selectivity test passed" : string.Join("; ", reasons);
+
+        return new SelectivityResult(promptResults, recall, precision, passed, reason);
+    }
+
+    private static async Task<SelectivityPromptResult> ProbeAndLog(
+        SkillInfo skill, string prompt, bool expectedActivation, ValidatorConfig config, Action<string> log)
+    {
+        var activated = await TestSkillActivation(skill, prompt, config);
+        if (expectedActivation)
+            log($"  → {(activated ? "✅ activated" : "❌ NOT activated")}: \"{Truncate(prompt, 50)}\"");
+        else
+            log($"  → {(activated ? "❌ activated (unexpected)" : "✅ correctly NOT activated")}: \"{Truncate(prompt, 50)}\"");
+        return new SelectivityPromptResult(prompt, ExpectedActivation: expectedActivation, SkillActivated: activated);
+    }
+
+    private static async Task<bool> TestSkillActivation(SkillInfo skill, string prompt, ValidatorConfig config)
+    {
+        var scenario = new EvalScenario(Name: "selectivity-probe", Prompt: prompt, Rubric: [], Timeout: 15);
+        return await AgentRunner.ProbeSkillActivation(new RunOptions(scenario, skill, skill.EvalPath, config.Model, config.Verbose));
+    }
+
+    private static string Truncate(string value, int maxLength) =>
+        value.Length <= maxLength ? value : value[..(maxLength - 1)] + "…";
 }
diff --git a/eng/skill-validator/src/Models/Models.cs b/eng/skill-validator/src/Models/Models.cs
index 1c076e45b9..7158e15688 100644
--- a/eng/skill-validator/src/Models/Models.cs
+++ b/eng/skill-validator/src/Models/Models.cs
@@ -70,7 +70,10 @@ public sealed record EvalScenario(
     int? MaxTokens = null,
     bool ExpectActivation = true);
 
-public sealed record EvalConfig(IReadOnlyList<EvalScenario> Scenarios);
+public sealed record EvalConfig(
+    IReadOnlyList<EvalScenario> Scenarios,
+    IReadOnlyList<string>? ShouldActivatePrompts = null,
+    IReadOnlyList<string>? ShouldNotActivatePrompts = null);
 
 // --- Skill info ---
 
@@ -227,6 +230,7 @@ public sealed class SkillVerdict
     public IReadOnlyList<string>? ProfileWarnings { get; set; }
     public bool SkillNotActivated { get; set; }
     public OverfittingResult? OverfittingResult { get; set; }
+    public SelectivityResult? SelectivityResult { get; set; }
 }
 
 // --- Overfitting assessment ---
@@ -274,6 +278,20 @@ public sealed record OverfittingJudgeOptions(
     int Timeout,
     string WorkDir);
 
+// --- Selectivity test ---
+
+public sealed record SelectivityPromptResult(
+    string Prompt,
+    bool ExpectedActivation,
+    bool SkillActivated);
+
+public sealed record SelectivityResult(
+    IReadOnlyList<SelectivityPromptResult> PromptResults,
+    double Recall,
+    double Precision,
+    bool Passed,
+    string Reason);
+
 // --- Config ---
 
 public sealed record ReporterSpec(ReporterType Type);
@@ -308,6 +326,9 @@ public sealed record ValidatorConfig
     public string? TestsDir { get; init; }
     public bool OverfittingCheck { get; init; } = true;
     public bool OverfittingFix { get; init; }
+    public bool SelectivityTest { get; init; }
+    public double SelectivityMinRecall { get; init; } = 0.8;
+    public double SelectivityMinPrecision { get; init; } = 0.8;
 }
 
 public static class DefaultWeights
diff --git a/eng/skill-validator/src/Services/AgentRunner.cs b/eng/skill-validator/src/Services/AgentRunner.cs
index 7a79a630e6..13be42a481 100644
--- a/eng/skill-validator/src/Services/AgentRunner.cs
+++ b/eng/skill-validator/src/Services/AgentRunner.cs
@@ -287,6 +287,77 @@ public static async Task<RunMetrics> RunAgent(RunOptions options)
         return metrics;
     }
 
+    /// <summary>
+    /// Lightweight probe that sends a prompt and checks whether the skill is activated.
+    /// Exits immediately when a SkillInvokedEvent is seen, or waits for the session to
+    /// complete/timeout. Designed to run many probes in parallel via Task.WhenAll.
+    /// </summary>
+    public static async Task<bool> ProbeSkillActivation(RunOptions options)
+    {
+        var workDir = Path.Combine(Path.GetTempPath(), $"sv-{Guid.NewGuid():N}");
+        Directory.CreateDirectory(workDir);
+        _workDirs.Add(workDir);
+
+        if (options.Verbose)
+        {
+            var write = options.Log ?? (msg => Console.Error.WriteLine(msg));
+            write($"      📂 {workDir} (skilled)");
+        }
+
+        bool skillActivated = false;
+        var done = new TaskCompletionSource<bool>();
+
+        try
+        {
+            var client = await GetSharedClient(options.Verbose);
+            await using var session = await client.CreateSessionAsync(
+                BuildSessionConfig(options.Skill, options.Model, workDir, options.Skill?.McpServers));
+
+            // 30s timeout — enough for the agent to reach the skill-loading decision
+            using var cts = new CancellationTokenSource(30_000);
+            cts.Token.Register(() => done.TrySetResult(skillActivated));
+
+            session.On(evt =>
+            {
+                switch (evt)
+                {
+                    // Skill loaded → we have our answer, bail immediately
+                    case SkillInvokedEvent:
+                        skillActivated = true;
+                        done.TrySetResult(true);
+                        break;
+
+                    // Session finished without loading the skill → not activated
+                    case SessionIdleEvent:
+                        done.TrySetResult(skillActivated);
+                        break;
+
+                    case SessionErrorEvent err:
+                        done.TrySetException(new InvalidOperationException(err.Data.Message ?? "Session error"));
+                        break;
+                }
+
+                if (options.Verbose && evt is SkillInvokedEvent si)
+                {
+                    var write = options.Log ?? (m => Console.Error.WriteLine(m));
+                    write($"      📘 Skill invoked: {si.Data.Name}");
+                }
+                if (options.Verbose && evt is ToolExecutionStartEvent ts)
+                {
+                    var write = options.Log ?? (m => Console.Error.WriteLine(m));
+                    write($"      🔧 {ts.Data.ToolName}");
+                }
+            });
+
+            await session.SendAsync(new MessageOptions { Prompt = options.Scenario.Prompt });
+            return await done.Task;
+        }
+        catch
+        {
+            return skillActivated;
+        }
+    }
+
     private static async Task<string> SetupWorkDir(EvalScenario scenario, string? skillPath, string? evalPath)
     {
         var workDir = Path.Combine(Path.GetTempPath(), $"sv-{Guid.NewGuid():N}");
diff --git a/eng/skill-validator/src/Services/EvalSchema.cs b/eng/skill-validator/src/Services/EvalSchema.cs
index d89ea92de8..b8c3e36928 100644
--- a/eng/skill-validator/src/Services/EvalSchema.cs
+++ b/eng/skill-validator/src/Services/EvalSchema.cs
@@ -21,7 +21,7 @@ public static EvalConfig ParseEvalConfig(string yamlContent)
         if (scenarios is not { Count: > 0 })
             throw new InvalidOperationException("Eval config must have at least one scenario");
 
-        return new EvalConfig(scenarios);
+        return new EvalConfig(scenarios, raw.Selectivity?.ShouldActivate, raw.Selectivity?.ShouldNotActivate);
     }
 
     public static (bool Success, EvalConfig? Data, IReadOnlyList<string>? Errors) ValidateEvalConfig(string yamlContent)
@@ -122,6 +122,15 @@ internal sealed class RawFrontmatter
     internal sealed class RawEvalConfig
     {
         public List<RawScenario>? Scenarios { get; set; }
+        public RawSelectivity? Selectivity { get; set; }
+    }
+
+    internal sealed class RawSelectivity
+    {
+        [YamlMember(Alias = "should_activate")]
+        public List<string>? ShouldActivate { get; set; }
+        [YamlMember(Alias = "should_not_activate")]
+        public List<string>? ShouldNotActivate { get; set; }
     }
 
     internal sealed class RawScenario
diff --git a/eng/skill-validator/src/Services/Reporter.cs b/eng/skill-validator/src/Services/Reporter.cs
index 86e8fccbb3..ffe00e13e3 100644
--- a/eng/skill-validator/src/Services/Reporter.cs
+++ b/eng/skill-validator/src/Services/Reporter.cs
@@ -62,22 +62,34 @@ private static void ReportConsole(IReadOnlyList<SkillVerdict> verdicts, bool ver
         {
             var icon = verdict.Passed ? "\x1b[32m✓\x1b[0m" : "\x1b[31m✗\x1b[0m";
             var name = $"\x1b[1m{verdict.SkillName}\x1b[0m";
-            var score = FormatScore(verdict.OverallImprovementScore);
 
-            var scoreLine = $"{icon} {name}  {score}";
-            if (verdict.ConfidenceInterval is { } ci)
+            // Selectivity-only verdict: no scenarios or score to display
+            bool isSelectivityOnly = verdict.Scenarios.Count == 0 && verdict.SelectivityResult is not null;
+
+            if (isSelectivityOnly)
             {
-                var ciStr = $"[{FormatPct(ci.Low)}, {FormatPct(ci.High)}]";
-                var sigStr = verdict.IsSignificant == true
-                    ? "\x1b[32msignificant\x1b[0m"
-                    : "\x1b[33mnot significant\x1b[0m";
-                scoreLine += $"  \x1b[2m{ciStr}\x1b[0m {sigStr}";
+                Console.WriteLine($"{icon} {name}  \x1b[2m(selectivity only)\x1b[0m");
+                Console.WriteLine($"  \x1b[2m{verdict.Reason}\x1b[0m");
             }
-            if (verdict.NormalizedGain is { } ng)
-                scoreLine += $"  \x1b[2m(g={FormatPct(ng)})\x1b[0m";
+            else
+            {
+                var score = FormatScore(verdict.OverallImprovementScore);
+
+                var scoreLine = $"{icon} {name}  {score}";
+                if (verdict.ConfidenceInterval is { } ci)
+                {
+                    var ciStr = $"[{FormatPct(ci.Low)}, {FormatPct(ci.High)}]";
+                    var sigStr = verdict.IsSignificant == true
+                        ? "\x1b[32msignificant\x1b[0m"
+                        : "\x1b[33mnot significant\x1b[0m";
+                    scoreLine += $"  \x1b[2m{ciStr}\x1b[0m {sigStr}";
+                }
+                if (verdict.NormalizedGain is { } ng)
+                    scoreLine += $"  \x1b[2m(g={FormatPct(ng)})\x1b[0m";
 
-            Console.WriteLine(scoreLine);
-            Console.WriteLine($"  \x1b[2m{verdict.Reason}\x1b[0m");
+                Console.WriteLine(scoreLine);
+                Console.WriteLine($"  \x1b[2m{verdict.Reason}\x1b[0m");
+            }
 
             if (!verdict.Passed && verdict.ProfileWarnings is { Count: > 0 })
             {
@@ -132,6 +144,21 @@ private static void ReportConsole(IReadOnlyList<SkillVerdict> verdicts, bool ver
                         Console.WriteLine($"    \x1b[2m•\x1b[0m [{item.Classification}] \x1b[2m{item.AssertionSummary}\x1b[0m\n      \x1b[2m— {item.Reasoning}\x1b[0m");
                 }
             }
+            if (verdict.SelectivityResult is { } selResult)
+            {
+                Console.WriteLine();
+                var selIcon = selResult.Passed ? "✅" : "🔴";
+                Console.WriteLine($"  🎯 Selectivity: recall={selResult.Recall:P0}, precision={selResult.Precision:P0} {selIcon}");
+                foreach (var pr in selResult.PromptResults)
+                {
+                    var expected = pr.ExpectedActivation ? "should activate" : "should NOT activate";
+                    var correct = (pr.ExpectedActivation == pr.SkillActivated);
+                    var prIcon = correct ? "\x1b[32m✓\x1b[0m" : "\x1b[31m✗\x1b[0m";
+                    var activatedStr = pr.SkillActivated ? "activated" : "not activated";
+                    var prompt = pr.Prompt.Length > 60 ? pr.Prompt[..59] + "…" : pr.Prompt;
+                    Console.WriteLine($"    {prIcon} \x1b[2m\"{prompt}\" — {expected} → {activatedStr}\x1b[0m");
+                }
+            }
             if (verdict.Scenarios.Count > 0)
             {
                 Console.WriteLine();
diff --git a/eng/skill-validator/src/SkillValidatorJsonContext.cs b/eng/skill-validator/src/SkillValidatorJsonContext.cs
index 90e5420e72..fb370ef526 100644
--- a/eng/skill-validator/src/SkillValidatorJsonContext.cs
+++ b/eng/skill-validator/src/SkillValidatorJsonContext.cs
@@ -33,6 +33,8 @@ namespace SkillValidator;
 [JsonSerializable(typeof(PairwiseMagnitude))]
 [JsonSerializable(typeof(AssertionType))]
 [JsonSerializable(typeof(MCPServerDef))]
+[JsonSerializable(typeof(SelectivityPromptResult))]
+[JsonSerializable(typeof(SelectivityResult))]
 [JsonSerializable(typeof(JsonElement))]
 [JsonSerializable(typeof(Dictionary<string, int>))]
 [JsonSerializable(typeof(Dictionary<string, JsonNode?>))]
diff --git a/eng/skill-validator/src/SkillValidatorYamlContext.cs b/eng/skill-validator/src/SkillValidatorYamlContext.cs
index 9f1dadf584..c671d7007b 100644
--- a/eng/skill-validator/src/SkillValidatorYamlContext.cs
+++ b/eng/skill-validator/src/SkillValidatorYamlContext.cs
@@ -10,4 +10,5 @@ namespace SkillValidator;
 [YamlSerializable(typeof(EvalSchema.RawSetup))]
 [YamlSerializable(typeof(EvalSchema.RawSetupFile))]
 [YamlSerializable(typeof(EvalSchema.RawAssertion))]
+[YamlSerializable(typeof(EvalSchema.RawSelectivity))]
 public partial class SkillValidatorYamlContext : StaticContext;
diff --git a/tests/dotnet-msbuild/build-perf-diagnostics/eval.yaml b/tests/dotnet-msbuild/build-perf-diagnostics/eval.yaml
index be492fba00..8982b987e5 100644
--- a/tests/dotnet-msbuild/build-perf-diagnostics/eval.yaml
+++ b/tests/dotnet-msbuild/build-perf-diagnostics/eval.yaml
@@ -15,3 +15,17 @@ scenarios:
       - "Identified EnforceCodeStyleInBuild should be conditional on CI, not always true"
       - "Solution preserves full analyzer enforcement in CI pipelines while speeding dev builds"
     timeout: 160
+
+selectivity:
+  should_activate:
+    - "My .NET build takes over 5 minutes, how can I speed it up?"
+    - "How do I analyze a binlog to find slow targets in MSBuild?"
+    - "Roslyn analyzers are making my compilation really slow, what can I do?"
+    - "I want to profile my MSBuild build to understand where time is being spent"
+    - "Our CI builds are fast but local dev builds are painfully slow, how do I diagnose this?"
+  should_not_activate:
+    - "How do I add a NuGet package reference to my project?"
+    - "My unit tests are failing with a NullReferenceException"
+    - "How do I configure Docker for my .NET application?"
+    - "What's the difference between .NET 8 and .NET 9?"
+    - "How do I set up Entity Framework Core migrations?"