From e1e162357a6c4c956cfbba7801a0a70053fa083c Mon Sep 17 00:00:00 2001
From: Stephen Belanger <stephen.belanger@braintrustdata.com>
Date: Tue, 26 May 2026 00:58:28 +0800
Subject: [PATCH 1/2] feat: add classifier support

Adds the classifiers feature from the spec
(braintrust-spec/docs/features/classifiers.md). Classifiers return
structured Classification items (id, optional label, optional metadata)
keyed by name and run in parallel with scorers. At least one of scorers
or classifiers is now required.

Includes ITracedClassifier (parallel to ITracedScorer) so classifiers
can inspect intermediate trace spans, e.g. to label conversation
patterns.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Braintrust.Sdk.sln                            |   7 +
 .../ClassifiersExample.csproj                 |  14 +
 examples/ClassifiersExample/Program.cs        | 152 +++++
 src/Braintrust.Sdk/Eval/Classification.cs     |  14 +
 src/Braintrust.Sdk/Eval/Eval.cs               | 258 +++++++-
 src/Braintrust.Sdk/Eval/FunctionClassifier.cs |  73 +++
 src/Braintrust.Sdk/Eval/IClassifier.cs        |  30 +
 src/Braintrust.Sdk/Eval/ITracedClassifier.cs  |  24 +
 .../Eval/ClassifierTest.cs                    | 566 ++++++++++++++++++
 9 files changed, 1125 insertions(+), 13 deletions(-)
 create mode 100644 examples/ClassifiersExample/ClassifiersExample.csproj
 create mode 100644 examples/ClassifiersExample/Program.cs
 create mode 100644 src/Braintrust.Sdk/Eval/Classification.cs
 create mode 100644 src/Braintrust.Sdk/Eval/FunctionClassifier.cs
 create mode 100644 src/Braintrust.Sdk/Eval/IClassifier.cs
 create mode 100644 src/Braintrust.Sdk/Eval/ITracedClassifier.cs
 create mode 100644 tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs

diff --git a/Braintrust.Sdk.sln b/Braintrust.Sdk.sln
index b9f05d0..e7c2dfb 100644
--- a/Braintrust.Sdk.sln
+++ b/Braintrust.Sdk.sln
@@ -19,6 +19,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenAIInstrumentation", "ex
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EvalExample", "examples\EvalExample\EvalExample.csproj", "{DFAA25AA-72B1-4246-BAB9-A10CCF115406}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ClassifiersExample", "examples\ClassifiersExample\ClassifiersExample.csproj", "{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}"
+EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TraceScoring", "examples\TraceScoring\TraceScoring.csproj", "{66D24AFB-3541-429D-9402-72A344D99115}"
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Braintrust.Sdk.OpenAI", "src\Braintrust.Sdk.OpenAI\Braintrust.Sdk.OpenAI.csproj", "{B3C7D1A2-4E5F-6789-ABCD-EF0123456789}"
@@ -72,6 +74,10 @@ Global
 		{DFAA25AA-72B1-4246-BAB9-A10CCF115406}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{DFAA25AA-72B1-4246-BAB9-A10CCF115406}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{DFAA25AA-72B1-4246-BAB9-A10CCF115406}.Release|Any CPU.Build.0 = Release|Any CPU
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}.Release|Any CPU.Build.0 = Release|Any CPU
 		{66D24AFB-3541-429D-9402-72A344D99115}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{66D24AFB-3541-429D-9402-72A344D99115}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{66D24AFB-3541-429D-9402-72A344D99115}.Release|Any CPU.ActiveCfg = Release|Any CPU
@@ -127,6 +133,7 @@ Global
 		{5A09E90C-6BCB-440C-AC03-5212B2AAE6C2} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
 		{929EDD10-7B06-4C4F-B70F-E4E51072A724} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
 		{DFAA25AA-72B1-4246-BAB9-A10CCF115406} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
 		{66D24AFB-3541-429D-9402-72A344D99115} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
 		{A8A1C23E-7D6F-47FE-9959-B90E9CEF7B2C} = {6530DEC3-1D19-4854-80AC-2D6D02BEAECC}
 		{446D2C4A-41D6-4E4F-AC4C-6809E2416A98} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
diff --git a/examples/ClassifiersExample/ClassifiersExample.csproj b/examples/ClassifiersExample/ClassifiersExample.csproj
new file mode 100644
index 0000000..4cc28eb
--- /dev/null
+++ b/examples/ClassifiersExample/ClassifiersExample.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\Braintrust.Sdk\Braintrust.Sdk.csproj" />
+  </ItemGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+</Project>
diff --git a/examples/ClassifiersExample/Program.cs b/examples/ClassifiersExample/Program.cs
new file mode 100644
index 0000000..4727ddd
--- /dev/null
+++ b/examples/ClassifiersExample/Program.cs
@@ -0,0 +1,152 @@
+using Braintrust.Sdk.Eval;
+
+namespace Braintrust.Sdk.Examples.ClassifiersExample;
+
+// Example: Classifiers
+//
+// Classifiers categorize and label eval outputs. Unlike scorers (which return
+// numeric 0-1 values), classifiers return structured Classification items —
+// each with an Id, an optional Label, and optional Metadata.
+//
+// Results are stored as a dictionary keyed by classifier name:
+//
+//   { "sentiment": [{ id: "positive", label: "Positive" }] }
+//
+// Three patterns are shown:
+//
+//   1. Inline single-label FunctionClassifier
+//   2. Inline multi-label FunctionClassifier (returns IReadOnlyList<Classification>)
+//   3. Class-based classifier implementing IClassifier<TInput, TOutput>
+//
+// Classifiers and scorers run independently. You can use both together, or
+// use only classifiers when you don't need numeric scores.
+
+sealed class ResponseQualityClassifier : IClassifier<string, string>
+{
+    public string Name => "response_quality";
+
+    public Task<IReadOnlyList<Classification>> Classify(TaskResult<string, string> taskResult)
+    {
+        var output = taskResult.Result;
+        var wordCount = output.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
+
+        string id;
+        if (string.IsNullOrWhiteSpace(output))
+        {
+            id = "no_response";
+        }
+        else if (wordCount < 5)
+        {
+            id = "too_short";
+        }
+        else if (output.Contains("immediately", StringComparison.OrdinalIgnoreCase)
+            || output.Contains("right away", StringComparison.OrdinalIgnoreCase)
+            || output.Contains("look into", StringComparison.OrdinalIgnoreCase))
+        {
+            id = "action_oriented";
+        }
+        else
+        {
+            id = "informational";
+        }
+
+        var label = char.ToUpperInvariant(id[0]) + id[1..].Replace('_', ' ');
+
+        IReadOnlyList<Classification> results = new[]
+        {
+            new Classification(
+                id,
+                Label: label,
+                Metadata: new Dictionary<string, object> { ["word_count"] = wordCount })
+        };
+        return Task.FromResult(results);
+    }
+}
+
+class Program
+{
+    private static readonly (string Input, string Expected)[] Messages =
+    {
+        ("Hi! I just wanted to say thank you, the product is amazing!", "praise"),
+        ("I've been waiting 2 weeks for my order. This is unacceptable!", "follow_up"),
+        ("How do I reset my password? I can't find the option anywhere.", "how_to"),
+        ("The item arrived damaged. I need a refund immediately.", "complaint"),
+        ("Just checking in — any update on my ticket #4821?", "follow_up")
+    };
+
+    static string GenerateResponse(string message)
+    {
+        if (Regex("thank").IsMatch(message))
+            return "You're welcome! So glad you're enjoying it.";
+        if (Regex("waiting|order").IsMatch(message))
+            return "I sincerely apologise for the delay. Let me look into this right away.";
+        if (Regex("password|reset").IsMatch(message))
+            return "To reset your password, go to Settings > Account > Reset Password.";
+        if (Regex("damaged|refund").IsMatch(message))
+            return "I'm sorry to hear that. I'll process your refund immediately.";
+        return "Thanks for reaching out! Let me check on that for you.";
+    }
+
+    static System.Text.RegularExpressions.Regex Regex(string pattern)
+        => new(pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
+
+    static async Task Main()
+    {
+        var braintrust = Braintrust.Get();
+
+        // Pattern 1: inline single-label classifier
+        var intentClassifier = new FunctionClassifier<string, string>(
+            "intent",
+            taskResult =>
+            {
+                var input = taskResult.DatasetCase.Input;
+                string id =
+                    Regex("thank").IsMatch(input) ? "praise" :
+                    Regex("waiting|order|update").IsMatch(input) ? "follow_up" :
+                    Regex("password|reset|find").IsMatch(input) ? "how_to" :
+                    Regex("damaged|refund").IsMatch(input) ? "complaint" :
+                    "other";
+
+                return new Classification(
+                    id,
+                    Label: char.ToUpperInvariant(id[0]) + id[1..].Replace('_', ' '));
+            });
+
+        // Pattern 2: inline multi-label classifier — returns a list
+        var toneClassifier = new FunctionClassifier<string, string>(
+            "tone",
+            taskResult =>
+            {
+                var input = taskResult.DatasetCase.Input;
+                var labels = new List<Classification>();
+                if (Regex("immediately|unacceptable|waiting").IsMatch(input))
+                    labels.Add(new Classification("urgent", Label: "Urgent"));
+                if (Regex("please|thank|just checking").IsMatch(input))
+                    labels.Add(new Classification("polite", Label: "Polite"));
+                if (Regex("unacceptable|damaged|waiting").IsMatch(input))
+                    labels.Add(new Classification("frustrated", Label: "Frustrated"));
+                if (labels.Count == 0)
+                    labels.Add(new Classification("neutral", Label: "Neutral"));
+                return (IReadOnlyList<Classification>)labels;
+            });
+
+        // Pattern 3: class-based classifier (see ResponseQualityClassifier above)
+        var qualityClassifier = new ResponseQualityClassifier();
+
+        var cases = Messages
+            .Select(m => DatasetCase.Of(m.Input, m.Expected))
+            .ToArray();
+
+        var eval = await braintrust
+            .EvalBuilder<string, string>()
+            .Name($"dotnet-classifiers-example-{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}")
+            .Tags("classifiers-example", "dotnet-sdk")
+            .Cases(cases)
+            .TaskFunction(GenerateResponse)
+            .Classifiers(intentClassifier, toneClassifier, qualityClassifier)
+            .BuildAsync();
+
+        var result = await eval.RunAsync();
+        Console.WriteLine($"\n\n{result.CreateReportString()}");
+    }
+}
diff --git a/src/Braintrust.Sdk/Eval/Classification.cs b/src/Braintrust.Sdk/Eval/Classification.cs
new file mode 100644
index 0000000..a69b7a6
--- /dev/null
+++ b/src/Braintrust.Sdk/Eval/Classification.cs
@@ -0,0 +1,14 @@
+namespace Braintrust.Sdk.Eval;
+
+/// <summary>
+/// A structured label produced by a classifier.
+/// </summary>
+/// <param name="Id">Stable identifier for filtering and grouping. Required.</param>
+/// <param name="Name">Grouping key in the per-case classifications dictionary. If null or empty, the runner defaults this to the classifier's resolved name.</param>
+/// <param name="Label">Optional display label. Consumers may fall back to <paramref name="Id"/> when omitted.</param>
+/// <param name="Metadata">Optional arbitrary metadata associated with this classification.</param>
+public readonly record struct Classification(
+    string Id,
+    string? Name = null,
+    string? Label = null,
+    IReadOnlyDictionary<string, object>? Metadata = null);
diff --git a/src/Braintrust.Sdk/Eval/Eval.cs b/src/Braintrust.Sdk/Eval/Eval.cs
index fbe6672..ff10611 100644
--- a/src/Braintrust.Sdk/Eval/Eval.cs
+++ b/src/Braintrust.Sdk/Eval/Eval.cs
@@ -32,6 +32,7 @@ public sealed class Eval<TInput, TOutput>
     private readonly IDataset<TInput, TOutput> _dataset;
     private readonly ITask<TInput, TOutput> _task;
     private readonly IReadOnlyList<IScorer<TInput, TOutput>> _scorers;
+    private readonly IReadOnlyList<IClassifier<TInput, TOutput>> _classifiers;
     private readonly IReadOnlyList<string>? _experimentTags;
     private readonly IReadOnlyDictionary<string, object>? _experimentMetadata;
     private readonly int? _maxConcurrency;
@@ -49,6 +50,7 @@ private Eval(Builder builder, OrganizationAndProjectInfo orgAndProject, RepoInfo
         _dataset = builder._dataset ?? throw new ArgumentNullException(nameof(builder._dataset));
         _task = builder._task ?? throw new ArgumentNullException(nameof(builder._task));
         _scorers = builder._scorers.ToList();
+        _classifiers = builder._classifiers.ToList();
         _experimentTags = builder._experimentTags;
         _experimentMetadata = builder._experimentMetadata;
         _maxConcurrency = builder._maxConcurrency;
@@ -165,12 +167,13 @@ private async Task EvalOne(string experimentId, DatasetCase<TInput, TOutput> dat
             }
             if (taskException == null)
             {
-                // Task succeeded — record output and run all scorers in parallel, each in their own span
+                // Task succeeded — record output and run all scorers and classifiers in parallel, each in their own span
                 rootActivity.SetTag("braintrust.output_json", ToJson(new { output = taskResult!.Value.Result }));
 
-                // Flush OTel spans to Braintrust before scoring so traced scorers can access them
-                var hasTracedScorers = _scorers.OfType<ITracedScorer<TInput, TOutput>>().Any();
-                if (hasTracedScorers)
+                // Flush OTel spans to Braintrust before scoring so traced scorers/classifiers can access them
+                var needsTraceFlush = _scorers.OfType<ITracedScorer<TInput, TOutput>>().Any()
+                    || _classifiers.OfType<ITracedClassifier<TInput, TOutput>>().Any();
+                if (needsTraceFlush)
                 {
                     BraintrustTracing.ForceFlush();
                 }
@@ -179,7 +182,8 @@ private async Task EvalOne(string experimentId, DatasetCase<TInput, TOutput> dat
                 var rootSpanId = rootActivity.TraceId.ToHexString();
                 var trace = new EvalTrace(ct => _btqlClient.QuerySpansAsync(experimentId, rootSpanId, ct));
 
-                await RunScorers(experimentId, rootActivity, taskResult!.Value, trace).ConfigureAwait(false);
+                await RunScorersAndClassifiers(experimentId, rootActivity, taskResult!.Value, trace, datasetCase.Metadata)
+                    .ConfigureAwait(false);
             }
             else
             {
@@ -234,19 +238,28 @@ private async Task RunSingleScorerForTaskException(
     }
 
     /// <summary>
-    /// Runs all scorers for a successful task result, each in their own score span.
-    /// Calls <see cref="IScorer{TInput,TOutput}.Score"/> (or <see cref="ITracedScorer{TInput,TOutput}.ScoreAsync"/>
-    /// for traced scorers) and falls back to <see cref="IScorer{TInput,TOutput}.ScoreForScorerException"/> on error.
+    /// Runs all scorers and classifiers for a successful task result in parallel, each in their own span.
+    /// After completion, aggregates classifier results onto the root span as <c>braintrust.classifications</c>
+    /// and merges any classifier errors into the root span's <c>braintrust.metadata</c> under
+    /// <c>classifier_errors</c>.
     /// </summary>
-    private async Task RunScorers(
+    private async Task RunScorersAndClassifiers(
         string experimentId,
         Activity rootActivity,
         TaskResult<TInput, TOutput> taskResult,
-        EvalTrace trace)
+        EvalTrace trace,
+        IReadOnlyDictionary<string, object> caseMetadata)
     {
         var scorerTasks = _scorers.Select(scorer =>
             RunSingleScorer(experimentId, rootActivity, scorer, taskResult, trace));
-        await Task.WhenAll(scorerTasks).ConfigureAwait(false);
+
+        var classifierOutcomes = new ClassifierOutcome?[_classifiers.Count];
+        var classifierTasks = _classifiers.Select((classifier, index) =>
+            RunSingleClassifier(experimentId, rootActivity, classifier, index, taskResult, trace, classifierOutcomes));
+
+        await Task.WhenAll(scorerTasks.Concat(classifierTasks)).ConfigureAwait(false);
+
+        AggregateClassifierOutcomes(rootActivity, caseMetadata, classifierOutcomes);
     }
 
     private async Task RunSingleScorer(
@@ -327,6 +340,214 @@ private static void RecordScores(
         }
     }
 
+    /// <summary>
+    /// Per-classifier outcome captured after running. Either a successful list of normalized items
+    /// (already grouped by resolved name) or an error message.
+    /// </summary>
+    private sealed class ClassifierOutcome
+    {
+        public string ClassifierName { get; }
+        public IReadOnlyList<(string Name, Dictionary<string, object> Item)>? Items { get; }
+        public string? ErrorMessage { get; }
+
+        private ClassifierOutcome(
+            string classifierName,
+            IReadOnlyList<(string Name, Dictionary<string, object> Item)>? items,
+            string? errorMessage)
+        {
+            ClassifierName = classifierName;
+            Items = items;
+            ErrorMessage = errorMessage;
+        }
+
+        public static ClassifierOutcome Success(
+            string classifierName,
+            IReadOnlyList<(string Name, Dictionary<string, object> Item)> items)
+            => new(classifierName, items, null);
+
+        public static ClassifierOutcome Error(string classifierName, string errorMessage)
+            => new(classifierName, null, errorMessage);
+    }
+
+    private async Task RunSingleClassifier(
+        string experimentId,
+        Activity rootActivity,
+        IClassifier<TInput, TOutput> classifier,
+        int classifierIndex,
+        TaskResult<TInput, TOutput> taskResult,
+        EvalTrace trace,
+        ClassifierOutcome?[] outcomes)
+    {
+        var resolvedName = string.IsNullOrWhiteSpace(classifier.Name)
+            ? $"classifier_{classifierIndex}"
+            : classifier.Name;
+
+        var classifierActivity = _activitySource.StartActivity($"classifier:{resolvedName}");
+        classifierActivity?.SetTag(BraintrustTracing.ParentKey, $"experiment_id:{experimentId}");
+        classifierActivity?.SetTag(
+            "braintrust.span_attributes",
+            ToJson(new { type = "classifier", purpose = "scorer" }));
+
+        var datasetCase = taskResult.DatasetCase;
+        classifierActivity?.SetTag(
+            "braintrust.input_json",
+            ToJson(new
+            {
+                input = datasetCase.Input,
+                expected = datasetCase.Expected,
+                output = taskResult.Result,
+                metadata = datasetCase.Metadata
+            }));
+
+        try
+        {
+            using var classifierScope = BraintrustContext.OfExperiment(experimentId).MakeCurrent();
+
+            IReadOnlyList<Classification> rawResults;
+            try
+            {
+                rawResults = classifier is ITracedClassifier<TInput, TOutput> tracedClassifier
+                    ? await tracedClassifier.Classify(taskResult, trace).ConfigureAwait(false)
+                    : await classifier.Classify(taskResult).ConfigureAwait(false);
+
+                if (rawResults == null)
+                {
+                    rawResults = Array.Empty<Classification>();
+                }
+            }
+            catch (Exception ex)
+            {
+                classifierActivity?.SetStatus(ActivityStatusCode.Error, ex.Message);
+                classifierActivity?.AddEvent(CreateExceptionEvent(ex));
+                outcomes[classifierIndex] = ClassifierOutcome.Error(resolvedName, ex.Message);
+                return;
+            }
+
+            // Normalize: resolve name + validate, build storage items (no Name key).
+            var normalized = new List<(string Name, Dictionary<string, object> Item)>(rawResults.Count);
+            try
+            {
+                foreach (var classification in rawResults)
+                {
+                    if (string.IsNullOrEmpty(classification.Id))
+                    {
+                        throw new InvalidOperationException(
+                            "When returning structured classifier results, each classification must be a non-empty object.");
+                    }
+
+                    var groupingName = string.IsNullOrWhiteSpace(classification.Name)
+                        ? resolvedName
+                        : classification.Name!;
+
+                    var item = new Dictionary<string, object> { ["id"] = classification.Id };
+                    if (classification.Label != null)
+                    {
+                        item["label"] = classification.Label;
+                    }
+                    if (classification.Metadata != null && classification.Metadata.Count > 0)
+                    {
+                        item["metadata"] = classification.Metadata;
+                    }
+
+                    normalized.Add((groupingName, item));
+                }
+            }
+            catch (Exception ex)
+            {
+                classifierActivity?.SetStatus(ActivityStatusCode.Error, ex.Message);
+                classifierActivity?.AddEvent(CreateExceptionEvent(ex));
+                outcomes[classifierIndex] = ClassifierOutcome.Error(resolvedName, ex.Message);
+                return;
+            }
+
+            // Build output_json keyed by resolved name for the classifier span.
+            if (normalized.Count > 0)
+            {
+                var outputByName = new Dictionary<string, List<Dictionary<string, object>>>();
+                foreach (var (name, item) in normalized)
+                {
+                    if (!outputByName.TryGetValue(name, out var list))
+                    {
+                        list = new List<Dictionary<string, object>>();
+                        outputByName[name] = list;
+                    }
+                    list.Add(item);
+                }
+                classifierActivity?.SetTag("braintrust.output_json", ToJson(outputByName));
+            }
+
+            outcomes[classifierIndex] = ClassifierOutcome.Success(resolvedName, normalized);
+        }
+        finally
+        {
+            classifierActivity?.Stop();
+        }
+    }
+
+    /// <summary>
+    /// Aggregates per-classifier outcomes onto the root span:
+    /// <list type="bullet">
+    ///   <item>Sets <c>braintrust.classifications</c> when any classifications were produced.</item>
+    ///   <item>Merges any classifier errors into <c>braintrust.metadata</c> under <c>classifier_errors</c>.</item>
+    /// </list>
+    /// </summary>
+    private static void AggregateClassifierOutcomes(
+        Activity rootActivity,
+        IReadOnlyDictionary<string, object> caseMetadata,
+        ClassifierOutcome?[] outcomes)
+    {
+        if (outcomes.Length == 0)
+        {
+            return;
+        }
+
+        var classifications = new Dictionary<string, List<Dictionary<string, object>>>();
+        var classifierErrors = new Dictionary<string, string>();
+
+        foreach (var outcome in outcomes)
+        {
+            if (outcome == null)
+            {
+                continue;
+            }
+
+            if (outcome.ErrorMessage != null)
+            {
+                classifierErrors[outcome.ClassifierName] = outcome.ErrorMessage;
+                continue;
+            }
+
+            if (outcome.Items == null)
+            {
+                continue;
+            }
+
+            foreach (var (name, item) in outcome.Items)
+            {
+                if (!classifications.TryGetValue(name, out var list))
+                {
+                    list = new List<Dictionary<string, object>>();
+                    classifications[name] = list;
+                }
+                list.Add(item);
+            }
+        }
+
+        if (classifications.Count > 0)
+        {
+            rootActivity.SetTag("braintrust.classifications", ToJson(classifications));
+        }
+
+        if (classifierErrors.Count > 0)
+        {
+            var merged = new Dictionary<string, object>(caseMetadata)
+            {
+                ["classifier_errors"] = classifierErrors
+            };
+            rootActivity.SetTag("braintrust.metadata", ToJson(merged));
+        }
+    }
+
     private static string ToJson(object obj)
     {
         return JsonSerializer.Serialize(obj, JsonOptions);
@@ -388,6 +609,7 @@ public sealed class Builder
         internal IDataset<TInput, TOutput>? _dataset;
         internal ITask<TInput, TOutput>? _task;
         internal List<IScorer<TInput, TOutput>> _scorers = new();
+        internal List<IClassifier<TInput, TOutput>> _classifiers = new();
         internal IReadOnlyList<string>? _experimentTags;
         internal IReadOnlyDictionary<string, object>? _experimentMetadata;
         internal int? _maxConcurrency = 10;
@@ -406,9 +628,9 @@ public async Task<Eval<TInput, TOutput>> BuildAsync()
             _apiClient ??= BraintrustApiClient.Of(_config);
             _btqlClient ??= new BtqlClient(_config);
 
-            if (_scorers.Count == 0)
+            if (_scorers.Count == 0 && _classifiers.Count == 0)
             {
-                throw new InvalidOperationException("Must provide at least one scorer");
+                throw new InvalidOperationException("Must provide at least one scorer or classifier");
             }
 
             if (_dataset == null)
@@ -561,6 +783,16 @@ public Builder Scorers(params IScorer<TInput, TOutput>[] scorers)
             return this;
         }
 
+        /// <summary>
+        /// Set the classifiers.
+        /// At least one of <see cref="Scorers"/> or <see cref="Classifiers"/> must be provided.
+        /// </summary>
+        public Builder Classifiers(params IClassifier<TInput, TOutput>[] classifiers)
+        {
+            _classifiers = classifiers.ToList();
+            return this;
+        }
+
         /// <summary>
         /// Set the experiment-level tags.
         /// These tags are applied to the experiment itself, not individual cases.
diff --git a/src/Braintrust.Sdk/Eval/FunctionClassifier.cs b/src/Braintrust.Sdk/Eval/FunctionClassifier.cs
new file mode 100644
index 0000000..5aeb473
--- /dev/null
+++ b/src/Braintrust.Sdk/Eval/FunctionClassifier.cs
@@ -0,0 +1,73 @@
+namespace Braintrust.Sdk.Eval;
+
+/// <summary>
+/// Implementation of a classifier from a function.
+/// Supports synchronous and asynchronous functions returning either a single <see cref="Classification"/>
+/// or a list. Returning <c>null</c> means "no classifications for this case".
+/// </summary>
+public class FunctionClassifier<TInput, TOutput> : IClassifier<TInput, TOutput>
+    where TInput : notnull
+    where TOutput : notnull
+{
+    private static readonly IReadOnlyList<Classification> Empty = Array.Empty<Classification>();
+
+    private readonly Func<TaskResult<TInput, TOutput>, Task<IReadOnlyList<Classification>>> _classifierFn;
+
+    /// <summary>
+    /// Create a classifier from a synchronous function returning a single classification (or null).
+    /// </summary>
+    public FunctionClassifier(string name, Func<TaskResult<TInput, TOutput>, Classification?> classifierFn)
+    {
+        Name = name;
+        _classifierFn = taskResult =>
+        {
+            var result = classifierFn(taskResult);
+            return Task.FromResult<IReadOnlyList<Classification>>(
+                result.HasValue ? new[] { result.Value } : Empty);
+        };
+    }
+
+    /// <summary>
+    /// Create a classifier from a synchronous function returning a list of classifications (or null).
+    /// </summary>
+    public FunctionClassifier(string name, Func<TaskResult<TInput, TOutput>, IReadOnlyList<Classification>?> classifierFn)
+    {
+        Name = name;
+        _classifierFn = taskResult =>
+        {
+            var result = classifierFn(taskResult);
+            return Task.FromResult<IReadOnlyList<Classification>>(result ?? Empty);
+        };
+    }
+
+    /// <summary>
+    /// Create a classifier from an asynchronous function returning a single classification (or null).
+    /// </summary>
+    public FunctionClassifier(string name, Func<TaskResult<TInput, TOutput>, Task<Classification?>> classifierFn)
+    {
+        Name = name;
+        _classifierFn = async taskResult =>
+        {
+            var result = await classifierFn(taskResult).ConfigureAwait(false);
+            return result.HasValue ? new[] { result.Value } : Empty;
+        };
+    }
+
+    /// <summary>
+    /// Create a classifier from an asynchronous function returning a list of classifications (or null).
+    /// </summary>
+    public FunctionClassifier(string name, Func<TaskResult<TInput, TOutput>, Task<IReadOnlyList<Classification>?>> classifierFn)
+    {
+        Name = name;
+        _classifierFn = async taskResult =>
+        {
+            var result = await classifierFn(taskResult).ConfigureAwait(false);
+            return result ?? Empty;
+        };
+    }
+
+    public string Name { get; }
+
+    public Task<IReadOnlyList<Classification>> Classify(TaskResult<TInput, TOutput> taskResult)
+        => _classifierFn(taskResult);
+}
diff --git a/src/Braintrust.Sdk/Eval/IClassifier.cs b/src/Braintrust.Sdk/Eval/IClassifier.cs
new file mode 100644
index 0000000..f85f502
--- /dev/null
+++ b/src/Braintrust.Sdk/Eval/IClassifier.cs
@@ -0,0 +1,30 @@
+namespace Braintrust.Sdk.Eval;
+
+/// <summary>
+/// A classifier categorizes and labels eval outputs.
+/// Unlike <see cref="IScorer{TInput,TOutput}"/> (which returns numeric 0-1 values),
+/// classifiers return structured <see cref="Classification"/> items with an id and optional label and metadata.
+/// </summary>
+/// <remarks>
+/// Implementations must be thread-safe as classifiers may be executed concurrently.
+/// Classifier failures are non-fatal: an exception thrown by <see cref="Classify"/> is recorded
+/// under <c>classifier_errors</c> in the eval span's metadata and does not abort the evaluation.
+/// </remarks>
+/// <typeparam name="TInput">Type of the input data</typeparam>
+/// <typeparam name="TOutput">Type of the output data</typeparam>
+public interface IClassifier<TInput, TOutput>
+    where TInput : notnull
+    where TOutput : notnull
+{
+    /// <summary>
+    /// Gets the name of this classifier. Used as the classifier span name and as the
+    /// default grouping key when a returned <see cref="Classification"/> has no <c>Name</c>.
+    /// </summary>
+    string Name { get; }
+
+    /// <summary>
+    /// Classify the task result and return zero or more classifications.
+    /// Return an empty list to indicate no classifications for this case.
+    /// </summary>
+    Task<IReadOnlyList<Classification>> Classify(TaskResult<TInput, TOutput> taskResult);
+}
diff --git a/src/Braintrust.Sdk/Eval/ITracedClassifier.cs b/src/Braintrust.Sdk/Eval/ITracedClassifier.cs
new file mode 100644
index 0000000..97fd6d7
--- /dev/null
+++ b/src/Braintrust.Sdk/Eval/ITracedClassifier.cs
@@ -0,0 +1,24 @@
+namespace Braintrust.Sdk.Eval;
+
+/// <summary>
+/// A classifier that receives access to the distributed trace (spans) of the task that was evaluated.
+/// This allows classifiers to inspect intermediate LLM calls and tool-use chains, not just the final output.
+///
+/// Implement this interface when your classifier needs to examine multi-turn conversations or tool-use chains
+/// (e.g. classifying a conversation pattern as "single-turn", "tool-heavy", or "clarification-loop").
+/// When a classifier implements this interface, <see cref="Classify(TaskResult{TInput,TOutput},EvalTrace)"/>
+/// is called instead of <see cref="IClassifier{TInput,TOutput}.Classify(TaskResult{TInput,TOutput})"/>.
+/// Backward-compatible: classifiers that only implement <see cref="IClassifier{TInput,TOutput}"/> continue to work without change.
+/// </summary>
+/// <typeparam name="TInput">The type of input data for the evaluation</typeparam>
+/// <typeparam name="TOutput">The type of output produced by the task</typeparam>
+public interface ITracedClassifier<TInput, TOutput> : IClassifier<TInput, TOutput>
+    where TInput : notnull
+    where TOutput : notnull
+{
+    /// <summary>
+    /// Classify the task result using the distributed trace for additional context.
+    /// Called instead of <see cref="IClassifier{TInput,TOutput}.Classify(TaskResult{TInput,TOutput})"/> when trace is available.
+    /// </summary>
+    Task<IReadOnlyList<Classification>> Classify(TaskResult<TInput, TOutput> taskResult, EvalTrace trace);
+}
diff --git a/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs b/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs
new file mode 100644
index 0000000..b99bcb7
--- /dev/null
+++ b/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs
@@ -0,0 +1,566 @@
+using System.Diagnostics;
+using System.Text.Json;
+using Braintrust.Sdk.Config;
+using Braintrust.Sdk.Eval;
+
+namespace Braintrust.Sdk.Tests.Eval;
+
+[Collection("BraintrustGlobals")]
+public class ClassifierTest : IDisposable
+{
+    private readonly ActivityListener _activityListener;
+
+    public ClassifierTest()
+    {
+        Braintrust.ResetForTest();
+        _activityListener = new ActivityListener
+        {
+            ShouldListenTo = source => source.Name == "braintrust-dotnet",
+            Sample = (ref ActivityCreationOptions<ActivityContext> _) => ActivitySamplingResult.AllDataAndRecorded
+        };
+        ActivitySource.AddActivityListener(_activityListener);
+    }
+
+    public void Dispose()
+    {
+        _activityListener?.Dispose();
+        Braintrust.ResetForTest();
+    }
+
+    // =====================================================================
+    // FunctionClassifier shape normalization
+    // =====================================================================
+
+    [Fact]
+    public async Task FunctionClassifierReturnsSingleClassification()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "category",
+            _ => new Classification("greeting", Label: "Greeting"));
+
+        var taskResult = MakeTaskResult("hello", "hi");
+        var results = await classifier.Classify(taskResult);
+
+        Assert.Single(results);
+        Assert.Equal("greeting", results[0].Id);
+        Assert.Equal("Greeting", results[0].Label);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierReturnsList()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "sentiment",
+            _ => (IReadOnlyList<Classification>)new[]
+            {
+                new Classification("positive", Label: "Positive"),
+                new Classification("enthusiastic", Label: "Enthusiastic")
+            });
+
+        var results = await classifier.Classify(MakeTaskResult("great!", ""));
+
+        Assert.Equal(2, results.Count);
+        Assert.Equal("positive", results[0].Id);
+        Assert.Equal("enthusiastic", results[1].Id);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierNullReturnsEmptyList()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "maybe",
+            _ => (Classification?)null);
+
+        var results = await classifier.Classify(MakeTaskResult("hello", "hi"));
+        Assert.Empty(results);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierNullListReturnsEmptyList()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "maybe",
+            _ => (IReadOnlyList<Classification>?)null);
+
+        var results = await classifier.Classify(MakeTaskResult("hello", "hi"));
+        Assert.Empty(results);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierAsyncSingle()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "category",
+            _ => Task.FromResult<Classification?>(new Classification("greeting")));
+
+        var results = await classifier.Classify(MakeTaskResult("hello", "hi"));
+        Assert.Single(results);
+        Assert.Equal("greeting", results[0].Id);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierAsyncList()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "category",
+            _ => Task.FromResult<IReadOnlyList<Classification>?>(new[]
+            {
+                new Classification("a"),
+                new Classification("b")
+            }));
+
+        var results = await classifier.Classify(MakeTaskResult("hello", "hi"));
+        Assert.Equal(2, results.Count);
+    }
+
+    // =====================================================================
+    // Builder validation
+    // =====================================================================
+
+    [Fact]
+    public async Task EvalRequiresAtLeastScorersOrClassifiers()
+    {
+        var config = BraintrustConfig.Of(("BRAINTRUST_API_KEY", "test-key"));
+        var mockClient = new MockBraintrustApiClient();
+
+        var ex = await Assert.ThrowsAsync<InvalidOperationException>(() =>
+            Eval<string, string>.NewBuilder()
+                .Name("test-eval")
+                .Config(config)
+                .ApiClient(mockClient)
+                .Cases(DatasetCase.Of("input", "expected"))
+                .TaskFunction(x => x)
+                .BuildAsync());
+
+        Assert.Contains("at least one scorer or classifier", ex.Message);
+    }
+
+    [Fact]
+    public async Task EvalBuildsWithClassifiersOnly()
+    {
+        var config = BraintrustConfig.Of(
+            ("BRAINTRUST_API_KEY", "test-key"),
+            ("BRAINTRUST_APP_URL", "https://braintrust.dev"),
+            ("BRAINTRUST_DEFAULT_PROJECT_NAME", "test-project"));
+        var mockClient = new MockBraintrustApiClient();
+
+        var eval = await Eval<string, string>.NewBuilder()
+            .Name("test-eval")
+            .Config(config)
+            .ApiClient(mockClient)
+            .Cases(DatasetCase.Of("hello", "hi"))
+            .TaskFunction(x => x)
+            .Classifiers(new FunctionClassifier<string, string>(
+                "category",
+                _ => new Classification("greeting")))
+            .BuildAsync();
+
+        var result = await eval.RunAsync();
+        Assert.NotNull(result.ExperimentUrl);
+    }
+
+    // =====================================================================
+    // Runner — classifier results on the eval span
+    // =====================================================================
+
+    [Fact]
+    public async Task RunnerWritesClassificationsToEvalSpan()
+    {
+        var (rootSpans, classifierSpans) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "category",
+                    _ => new Classification("greeting", Label: "Greeting"))
+            });
+
+        var root = Assert.Single(rootSpans);
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        Assert.True(classifications.RootElement.TryGetProperty("category", out var categoryItems));
+        Assert.Equal(1, categoryItems.GetArrayLength());
+        Assert.Equal("greeting", categoryItems[0].GetProperty("id").GetString());
+        Assert.Equal("Greeting", categoryItems[0].GetProperty("label").GetString());
+
+        // Single classifier span produced
+        Assert.Single(classifierSpans);
+    }
+
+    [Fact]
+    public async Task RunnerWritesNoClassificationsTagWhenAllNull()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>("maybe", _ => (Classification?)null)
+            });
+
+        var root = Assert.Single(rootSpans);
+        Assert.Null(root.GetTagItem("braintrust.classifications"));
+    }
+
+    [Fact]
+    public async Task RunnerCombinesScorersAndClassifiers()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            scorers: new IScorer<string, string>[]
+            {
+                new FunctionScorer<string, string>("exact", (e, a) => e == a ? 1.0 : 0.0)
+            },
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>("category", _ => new Classification("greeting"))
+            });
+
+        var root = Assert.Single(rootSpans);
+        Assert.NotNull(root.GetTagItem("braintrust.classifications"));
+        // The eval span does not store scores itself; verify the classification path was hit
+        // independently from the scorer path. Score span coverage is in EvalTest.
+    }
+
+    [Fact]
+    public async Task RunnerHandlesClassifierExceptionWithoutAbortingEval()
+    {
+        var (rootSpans, classifierSpans) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new ThrowingClassifier("broken", "classifier boom"),
+                new FunctionClassifier<string, string>("working", _ => new Classification("ok"))
+            });
+
+        var root = Assert.Single(rootSpans);
+
+        // Classifier errors merged into braintrust.metadata under classifier_errors
+        var metadataJson = root.GetTagItem("braintrust.metadata") as string;
+        Assert.NotNull(metadataJson);
+        using var doc = JsonDocument.Parse(metadataJson);
+        Assert.True(doc.RootElement.TryGetProperty("classifier_errors", out var errors));
+        Assert.Equal("classifier boom", errors.GetProperty("broken").GetString());
+
+        // The working classifier still wrote its classification
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        Assert.True(classifications.RootElement.TryGetProperty("working", out _));
+
+        // The broken classifier span has error status + exception event
+        var brokenSpan = classifierSpans.First(s => s.DisplayName == "classifier:broken");
+        Assert.Equal(ActivityStatusCode.Error, brokenSpan.Status);
+        Assert.NotEmpty(brokenSpan.Events);
+
+        // The eval (root) span itself is not marked Error by a classifier failure
+        Assert.Equal(ActivityStatusCode.Unset, root.Status);
+    }
+
+    [Fact]
+    public async Task RunnerWritesClassifierSpanAttributes()
+    {
+        var (_, classifierSpans) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "my_classifier",
+                    _ => new Classification("foo"))
+            });
+
+        var span = Assert.Single(classifierSpans);
+        Assert.Equal("classifier:my_classifier", span.DisplayName);
+
+        var attrsJson = span.GetTagItem("braintrust.span_attributes") as string;
+        Assert.NotNull(attrsJson);
+        using var doc = JsonDocument.Parse(attrsJson);
+        Assert.Equal("classifier", doc.RootElement.GetProperty("type").GetString());
+        Assert.Equal("scorer", doc.RootElement.GetProperty("purpose").GetString());
+    }
+
+    [Fact]
+    public async Task RunnerMultiLabelResultPreservesOrder()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("great!", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "sentiment",
+                    _ => (IReadOnlyList<Classification>)new[]
+                    {
+                        new Classification("positive", Label: "Positive"),
+                        new Classification("enthusiastic", Label: "Enthusiastic")
+                    })
+            });
+
+        var root = Assert.Single(rootSpans);
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        var items = classifications.RootElement.GetProperty("sentiment");
+        Assert.Equal(2, items.GetArrayLength());
+        Assert.Equal("positive", items[0].GetProperty("id").GetString());
+        Assert.Equal("enthusiastic", items[1].GetProperty("id").GetString());
+    }
+
+    [Fact]
+    public async Task RunnerClassificationNameDefaultsToClassifierName()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                // Classification has no Name set
+                new FunctionClassifier<string, string>(
+                    "my_classifier",
+                    _ => new Classification("foo"))
+            });
+
+        var root = Assert.Single(rootSpans);
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        Assert.True(classifications.RootElement.TryGetProperty("my_classifier", out _));
+    }
+
+    [Fact]
+    public async Task RunnerClassificationExplicitNameOverridesClassifierName()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "my_classifier",
+                    _ => new Classification("foo", Name: "override_name"))
+            });
+
+        var root = Assert.Single(rootSpans);
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        Assert.True(classifications.RootElement.TryGetProperty("override_name", out _));
+        Assert.False(classifications.RootElement.TryGetProperty("my_classifier", out _));
+    }
+
+    [Fact]
+    public async Task RunnerEmptyClassificationItemIsRecordedAsError()
+    {
+        var (rootSpans, classifierSpans) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                // Default(Classification) — Id is null/empty, so should fail validation
+                new FunctionClassifier<string, string>(
+                    "bad",
+                    _ => (Classification?)default(Classification))
+            });
+
+        var root = Assert.Single(rootSpans);
+        var metadataJson = root.GetTagItem("braintrust.metadata") as string;
+        Assert.NotNull(metadataJson);
+        using var doc = JsonDocument.Parse(metadataJson);
+        var errors = doc.RootElement.GetProperty("classifier_errors");
+        var brokenError = errors.GetProperty("bad").GetString();
+        Assert.NotNull(brokenError);
+        Assert.Contains("each classification must be a non-empty object", brokenError);
+
+        var brokenSpan = Assert.Single(classifierSpans);
+        Assert.Equal(ActivityStatusCode.Error, brokenSpan.Status);
+    }
+
+    [Fact]
+    public async Task RunnerAccumulatesClassificationsAcrossCases()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[]
+            {
+                DatasetCase.Of("hi", "x"),
+                DatasetCase.Of("hello", "x"),
+                DatasetCase.Of("ok", "x")
+            },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "category",
+                    tr => new Classification(tr.Result.Length > 3 ? "long" : "short"))
+            });
+
+        Assert.Equal(3, rootSpans.Count);
+        foreach (var root in rootSpans)
+        {
+            var classifications = ReadClassifications(root);
+            Assert.NotNull(classifications);
+            Assert.True(classifications.RootElement.TryGetProperty("category", out _));
+        }
+    }
+
+    [Fact]
+    public async Task RunnerClassifierInputContainsAllScoringArgs()
+    {
+        var (_, classifierSpans) = await RunEval(
+            cases: new[]
+            {
+                DatasetCase.Of(
+                    "hello", "hi",
+                    new List<string>(),
+                    new Dictionary<string, object> { ["k"] = "v" })
+            },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>("category", _ => new Classification("greeting"))
+            });
+
+        var span = Assert.Single(classifierSpans);
+        var inputJson = span.GetTagItem("braintrust.input_json") as string;
+        Assert.NotNull(inputJson);
+        using var doc = JsonDocument.Parse(inputJson);
+        Assert.Equal("hello", doc.RootElement.GetProperty("input").GetString());
+        Assert.Equal("hi", doc.RootElement.GetProperty("expected").GetString());
+        Assert.Equal("hello", doc.RootElement.GetProperty("output").GetString());
+        Assert.True(doc.RootElement.TryGetProperty("metadata", out var md));
+        Assert.Equal("v", md.GetProperty("k").GetString());
+    }
+
+    // =====================================================================
+    // ITracedClassifier
+    // =====================================================================
+
+    [Fact]
+    public async Task TracedClassifierReceivesEvalTrace()
+    {
+        var spans = new[]
+        {
+            MockBtqlClient.MakeSpan("llm", input: new { messages = new[] { new { role = "user", content = "hi" } } },
+                output: new { choices = new[] { new { message = new { role = "assistant", content = "hello" } } } })
+        };
+        var mockBtql = new MockBtqlClient(spans);
+
+        var capturedSpanCount = -1;
+        var classifier = new TracedClassifier(
+            "trace_inspector",
+            async (_, trace) =>
+            {
+                var fetched = await trace.GetSpansAsync("llm");
+                capturedSpanCount = fetched.Count;
+                return new[] { new Classification("multi_turn") };
+            });
+
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[] { classifier },
+            btqlClient: mockBtql);
+
+        Assert.Single(rootSpans);
+        Assert.Equal(1, capturedSpanCount);
+        Assert.Equal(1, mockBtql.QueryCount);
+    }
+
+    // =====================================================================
+    // Helpers
+    // =====================================================================
+
+    private static TaskResult<string, string> MakeTaskResult(string input, string output)
+        => new(output, new DatasetCase<string, string>(input, ""));
+
+    private static JsonDocument? ReadClassifications(Activity span)
+    {
+        var json = span.GetTagItem("braintrust.classifications") as string;
+        return json == null ? null : JsonDocument.Parse(json);
+    }
+
+    private async Task<(List<Activity> RootSpans, List<Activity> ClassifierSpans)> RunEval(
+        DatasetCase<string, string>[] cases,
+        Func<string, string> taskFn,
+        IScorer<string, string>[]? scorers = null,
+        IClassifier<string, string>[]? classifiers = null,
+        MockBtqlClient? btqlClient = null)
+    {
+        var config = BraintrustConfig.Of(
+            ("BRAINTRUST_API_KEY", "test-key"),
+            ("BRAINTRUST_APP_URL", "https://braintrust.dev"),
+            ("BRAINTRUST_DEFAULT_PROJECT_NAME", "test-project"));
+        var mockClient = new MockBraintrustApiClient();
+        btqlClient ??= new MockBtqlClient();
+
+        var captured = new List<Activity>();
+        using var listener = new ActivityListener
+        {
+            ShouldListenTo = source => source.Name == "braintrust-dotnet",
+            Sample = (ref ActivityCreationOptions<ActivityContext> _) => ActivitySamplingResult.AllDataAndRecorded,
+            ActivityStopped = captured.Add
+        };
+        ActivitySource.AddActivityListener(listener);
+
+        var builder = Eval<string, string>.NewBuilder()
+            .Name("classifier-test")
+            .Config(config)
+            .ApiClient(mockClient)
+            .BtqlClient(btqlClient)
+            .Cases(cases)
+            .TaskFunction(taskFn);
+
+        if (scorers != null && scorers.Length > 0)
+        {
+            builder.Scorers(scorers);
+        }
+
+        if (classifiers != null && classifiers.Length > 0)
+        {
+            builder.Classifiers(classifiers);
+        }
+        else if (scorers == null || scorers.Length == 0)
+        {
+            // The validator forbids zero classifiers and zero scorers; tests using RunEval should specify at least one.
+            throw new InvalidOperationException("Test setup error: provide at least one scorer or classifier.");
+        }
+
+        var eval = await builder.BuildAsync();
+        await eval.RunAsync();
+
+        var rootSpans = captured.Where(a => a.DisplayName == "eval").ToList();
+        var classifierSpans = captured.Where(a => a.DisplayName.StartsWith("classifier:")).ToList();
+        return (rootSpans, classifierSpans);
+    }
+
+    private sealed class ThrowingClassifier : IClassifier<string, string>
+    {
+        private readonly string _message;
+        public ThrowingClassifier(string name, string message)
+        {
+            Name = name;
+            _message = message;
+        }
+        public string Name { get; }
+        public Task<IReadOnlyList<Classification>> Classify(TaskResult<string, string> taskResult)
+            => throw new InvalidOperationException(_message);
+    }
+
+    private sealed class TracedClassifier : ITracedClassifier<string, string>
+    {
+        private readonly Func<TaskResult<string, string>, EvalTrace, Task<IReadOnlyList<Classification>>> _fn;
+        public TracedClassifier(
+            string name,
+            Func<TaskResult<string, string>, EvalTrace, Task<IReadOnlyList<Classification>>> fn)
+        {
+            Name = name;
+            _fn = fn;
+        }
+        public string Name { get; }
+
+        public Task<IReadOnlyList<Classification>> Classify(TaskResult<string, string> taskResult)
+            => Task.FromResult<IReadOnlyList<Classification>>(Array.Empty<Classification>());
+
+        public Task<IReadOnlyList<Classification>> Classify(TaskResult<string, string> taskResult, EvalTrace trace)
+            => _fn(taskResult, trace);
+    }
+}

From 40212c06aaeb306d788a9b4d5dd15710d62681cc Mon Sep 17 00:00:00 2001
From: Stephen Belanger <stephen.belanger@braintrustdata.com>
Date: Sat, 30 May 2026 01:03:13 +0800
Subject: [PATCH 2/2] Align classifier span name + span_attributes with
 canonical Ruby spec

The Ruby reference implementation
(sdk-ruby/lib/braintrust/eval/runner.rb:391, 416-420) uses the
classifier name directly as the span name and includes a `name` key in
braintrust.span_attributes. Java already follows this pattern
(Eval.java:290, 297). .NET was using a "classifier:" prefix on the span
name and omitting the name attribute, which prevented consistent
classifier-span discovery downstream.

Update tests that asserted the prefixed display name; the test helper
now identifies classifier spans by their span_attributes.type tag
instead of a name prefix.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/Braintrust.Sdk/Eval/Eval.cs                   |  4 ++--
 tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs | 13 ++++++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/Braintrust.Sdk/Eval/Eval.cs b/src/Braintrust.Sdk/Eval/Eval.cs
index ff10611..8afa9d4 100644
--- a/src/Braintrust.Sdk/Eval/Eval.cs
+++ b/src/Braintrust.Sdk/Eval/Eval.cs
@@ -382,11 +382,11 @@ private async Task RunSingleClassifier(
             ? $"classifier_{classifierIndex}"
             : classifier.Name;
 
-        var classifierActivity = _activitySource.StartActivity($"classifier:{resolvedName}");
+        var classifierActivity = _activitySource.StartActivity(resolvedName);
         classifierActivity?.SetTag(BraintrustTracing.ParentKey, $"experiment_id:{experimentId}");
         classifierActivity?.SetTag(
             "braintrust.span_attributes",
-            ToJson(new { type = "classifier", purpose = "scorer" }));
+            ToJson(new { type = "classifier", name = resolvedName, purpose = "scorer" }));
 
         var datasetCase = taskResult.DatasetCase;
         classifierActivity?.SetTag(
diff --git a/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs b/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs
index b99bcb7..b790130 100644
--- a/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs
+++ b/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs
@@ -251,7 +251,7 @@ public async Task RunnerHandlesClassifierExceptionWithoutAbortingEval()
         Assert.True(classifications.RootElement.TryGetProperty("working", out _));
 
         // The broken classifier span has error status + exception event
-        var brokenSpan = classifierSpans.First(s => s.DisplayName == "classifier:broken");
+        var brokenSpan = classifierSpans.First(s => s.DisplayName == "broken");
         Assert.Equal(ActivityStatusCode.Error, brokenSpan.Status);
         Assert.NotEmpty(brokenSpan.Events);
 
@@ -273,12 +273,13 @@ public async Task RunnerWritesClassifierSpanAttributes()
             });
 
         var span = Assert.Single(classifierSpans);
-        Assert.Equal("classifier:my_classifier", span.DisplayName);
+        Assert.Equal("my_classifier", span.DisplayName);
 
         var attrsJson = span.GetTagItem("braintrust.span_attributes") as string;
         Assert.NotNull(attrsJson);
         using var doc = JsonDocument.Parse(attrsJson);
         Assert.Equal("classifier", doc.RootElement.GetProperty("type").GetString());
+        Assert.Equal("my_classifier", doc.RootElement.GetProperty("name").GetString());
         Assert.Equal("scorer", doc.RootElement.GetProperty("purpose").GetString());
     }
 
@@ -528,7 +529,13 @@ private static TaskResult<string, string> MakeTaskResult(string input, string ou
         await eval.RunAsync();
 
         var rootSpans = captured.Where(a => a.DisplayName == "eval").ToList();
-        var classifierSpans = captured.Where(a => a.DisplayName.StartsWith("classifier:")).ToList();
+        var classifierSpans = captured
+            .Where(a =>
+            {
+                var attrs = a.GetTagItem("braintrust.span_attributes") as string;
+                return attrs != null && attrs.Contains("\"type\":\"classifier\"");
+            })
+            .ToList();
         return (rootSpans, classifierSpans);
     }