diff --git a/Braintrust.Sdk.sln b/Braintrust.Sdk.sln
index b9f05d0..e7c2dfb 100644
--- a/Braintrust.Sdk.sln
+++ b/Braintrust.Sdk.sln
@@ -19,6 +19,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenAIInstrumentation", "ex
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EvalExample", "examples\EvalExample\EvalExample.csproj", "{DFAA25AA-72B1-4246-BAB9-A10CCF115406}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ClassifiersExample", "examples\ClassifiersExample\ClassifiersExample.csproj", "{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}"
+EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TraceScoring", "examples\TraceScoring\TraceScoring.csproj", "{66D24AFB-3541-429D-9402-72A344D99115}"
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Braintrust.Sdk.OpenAI", "src\Braintrust.Sdk.OpenAI\Braintrust.Sdk.OpenAI.csproj", "{B3C7D1A2-4E5F-6789-ABCD-EF0123456789}"
@@ -72,6 +74,10 @@ Global
 		{DFAA25AA-72B1-4246-BAB9-A10CCF115406}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{DFAA25AA-72B1-4246-BAB9-A10CCF115406}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{DFAA25AA-72B1-4246-BAB9-A10CCF115406}.Release|Any CPU.Build.0 = Release|Any CPU
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB}.Release|Any CPU.Build.0 = Release|Any CPU
 		{66D24AFB-3541-429D-9402-72A344D99115}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{66D24AFB-3541-429D-9402-72A344D99115}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{66D24AFB-3541-429D-9402-72A344D99115}.Release|Any CPU.ActiveCfg = Release|Any CPU
@@ -127,6 +133,7 @@ Global
 		{5A09E90C-6BCB-440C-AC03-5212B2AAE6C2} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
 		{929EDD10-7B06-4C4F-B70F-E4E51072A724} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
 		{DFAA25AA-72B1-4246-BAB9-A10CCF115406} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
+		{0A934BA7-BEBB-4EF0-88A6-9A5355E6D0BB} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
 		{66D24AFB-3541-429D-9402-72A344D99115} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
 		{A8A1C23E-7D6F-47FE-9959-B90E9CEF7B2C} = {6530DEC3-1D19-4854-80AC-2D6D02BEAECC}
 		{446D2C4A-41D6-4E4F-AC4C-6809E2416A98} = {A1BDA853-65BE-4CC8-8070-CCBA22069A7A}
diff --git a/examples/ClassifiersExample/ClassifiersExample.csproj b/examples/ClassifiersExample/ClassifiersExample.csproj
new file mode 100644
index 0000000..4cc28eb
--- /dev/null
+++ b/examples/ClassifiersExample/ClassifiersExample.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\Braintrust.Sdk\Braintrust.Sdk.csproj" />
+  </ItemGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+</Project>
diff --git a/examples/ClassifiersExample/Program.cs b/examples/ClassifiersExample/Program.cs
new file mode 100644
index 0000000..4727ddd
--- /dev/null
+++ b/examples/ClassifiersExample/Program.cs
@@ -0,0 +1,152 @@
+using Braintrust.Sdk.Eval;
+
+namespace Braintrust.Sdk.Examples.ClassifiersExample;
+
+// Example: Classifiers
+//
+// Classifiers categorize and label eval outputs. Unlike scorers (which return
+// numeric 0-1 values), classifiers return structured Classification items —
+// each with an Id, an optional Label, and optional Metadata.
+//
+// Results are stored as a dictionary keyed by classifier name:
+//
+//   { "sentiment": [{ id: "positive", label: "Positive" }] }
+//
+// Three patterns are shown:
+//
+//   1. Inline single-label FunctionClassifier
+//   2. Inline multi-label FunctionClassifier (returns IReadOnlyList<Classification>)
+//   3. Class-based classifier implementing IClassifier<TInput, TOutput>
+//
+// Classifiers and scorers run independently. You can use both together, or
+// use only classifiers when you don't need numeric scores.
+
+sealed class ResponseQualityClassifier : IClassifier<string, string>
+{
+    public string Name => "response_quality";
+
+    public Task<IReadOnlyList<Classification>> Classify(TaskResult<string, string> taskResult)
+    {
+        var output = taskResult.Result;
+        var wordCount = output.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
+
+        string id;
+        if (string.IsNullOrWhiteSpace(output))
+        {
+            id = "no_response";
+        }
+        else if (wordCount < 5)
+        {
+            id = "too_short";
+        }
+        else if (output.Contains("immediately", StringComparison.OrdinalIgnoreCase)
+            || output.Contains("right away", StringComparison.OrdinalIgnoreCase)
+            || output.Contains("look into", StringComparison.OrdinalIgnoreCase))
+        {
+            id = "action_oriented";
+        }
+        else
+        {
+            id = "informational";
+        }
+
+        var label = char.ToUpperInvariant(id[0]) + id[1..].Replace('_', ' ');
+
+        IReadOnlyList<Classification> results = new[]
+        {
+            new Classification(
+                id,
+                Label: label,
+                Metadata: new Dictionary<string, object> { ["word_count"] = wordCount })
+        };
+        return Task.FromResult(results);
+    }
+}
+
+class Program
+{
+    private static readonly (string Input, string Expected)[] Messages =
+    {
+        ("Hi! I just wanted to say thank you, the product is amazing!", "praise"),
+        ("I've been waiting 2 weeks for my order. This is unacceptable!", "follow_up"),
+        ("How do I reset my password? I can't find the option anywhere.", "how_to"),
+        ("The item arrived damaged. I need a refund immediately.", "complaint"),
+        ("Just checking in — any update on my ticket #4821?", "follow_up")
+    };
+
+    static string GenerateResponse(string message)
+    {
+        if (Regex("thank").IsMatch(message))
+            return "You're welcome! So glad you're enjoying it.";
+        if (Regex("waiting|order").IsMatch(message))
+            return "I sincerely apologise for the delay. Let me look into this right away.";
+        if (Regex("password|reset").IsMatch(message))
+            return "To reset your password, go to Settings > Account > Reset Password.";
+        if (Regex("damaged|refund").IsMatch(message))
+            return "I'm sorry to hear that. I'll process your refund immediately.";
+        return "Thanks for reaching out! Let me check on that for you.";
+    }
+
+    static System.Text.RegularExpressions.Regex Regex(string pattern)
+        => new(pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
+
+    static async Task Main()
+    {
+        var braintrust = Braintrust.Get();
+
+        // Pattern 1: inline single-label classifier
+        var intentClassifier = new FunctionClassifier<string, string>(
+            "intent",
+            taskResult =>
+            {
+                var input = taskResult.DatasetCase.Input;
+                string id =
+                    Regex("thank").IsMatch(input) ? "praise" :
+                    Regex("waiting|order|update").IsMatch(input) ? "follow_up" :
+                    Regex("password|reset|find").IsMatch(input) ? "how_to" :
+                    Regex("damaged|refund").IsMatch(input) ? "complaint" :
+                    "other";
+
+                return new Classification(
+                    id,
+                    Label: char.ToUpperInvariant(id[0]) + id[1..].Replace('_', ' '));
+            });
+
+        // Pattern 2: inline multi-label classifier — returns a list
+        var toneClassifier = new FunctionClassifier<string, string>(
+            "tone",
+            taskResult =>
+            {
+                var input = taskResult.DatasetCase.Input;
+                var labels = new List<Classification>();
+                if (Regex("immediately|unacceptable|waiting").IsMatch(input))
+                    labels.Add(new Classification("urgent", Label: "Urgent"));
+                if (Regex("please|thank|just checking").IsMatch(input))
+                    labels.Add(new Classification("polite", Label: "Polite"));
+                if (Regex("unacceptable|damaged|waiting").IsMatch(input))
+                    labels.Add(new Classification("frustrated", Label: "Frustrated"));
+                if (labels.Count == 0)
+                    labels.Add(new Classification("neutral", Label: "Neutral"));
+                return (IReadOnlyList<Classification>)labels;
+            });
+
+        // Pattern 3: class-based classifier (see ResponseQualityClassifier above)
+        var qualityClassifier = new ResponseQualityClassifier();
+
+        var cases = Messages
+            .Select(m => DatasetCase.Of(m.Input, m.Expected))
+            .ToArray();
+
+        var eval = await braintrust
+            .EvalBuilder<string, string>()
+            .Name($"dotnet-classifiers-example-{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}")
+            .Tags("classifiers-example", "dotnet-sdk")
+            .Cases(cases)
+            .TaskFunction(GenerateResponse)
+            .Classifiers(intentClassifier, toneClassifier, qualityClassifier)
+            .BuildAsync();
+
+        var result = await eval.RunAsync();
+        Console.WriteLine($"\n\n{result.CreateReportString()}");
+    }
+}
diff --git a/src/Braintrust.Sdk/Eval/Classification.cs b/src/Braintrust.Sdk/Eval/Classification.cs
new file mode 100644
index 0000000..a69b7a6
--- /dev/null
+++ b/src/Braintrust.Sdk/Eval/Classification.cs
@@ -0,0 +1,14 @@
+namespace Braintrust.Sdk.Eval;
+
+/// <summary>
+/// A structured label produced by a classifier.
+/// </summary>
+/// <param name="Id">Stable identifier for filtering and grouping. Required.</param>
+/// <param name="Name">Grouping key in the per-case classifications dictionary. If null or empty, the runner defaults this to the classifier's resolved name.</param>
+/// <param name="Label">Optional display label. Consumers may fall back to <paramref name="Id"/> when omitted.</param>
+/// <param name="Metadata">Optional arbitrary metadata associated with this classification.</param>
+public readonly record struct Classification(
+    string Id,
+    string? Name = null,
+    string? Label = null,
+    IReadOnlyDictionary<string, object>? Metadata = null);
diff --git a/src/Braintrust.Sdk/Eval/Eval.cs b/src/Braintrust.Sdk/Eval/Eval.cs
index fbe6672..8afa9d4 100644
--- a/src/Braintrust.Sdk/Eval/Eval.cs
+++ b/src/Braintrust.Sdk/Eval/Eval.cs
@@ -32,6 +32,7 @@ public sealed class Eval<TInput, TOutput>
     private readonly IDataset<TInput, TOutput> _dataset;
     private readonly ITask<TInput, TOutput> _task;
     private readonly IReadOnlyList<IScorer<TInput, TOutput>> _scorers;
+    private readonly IReadOnlyList<IClassifier<TInput, TOutput>> _classifiers;
     private readonly IReadOnlyList<string>? _experimentTags;
     private readonly IReadOnlyDictionary<string, object>? _experimentMetadata;
     private readonly int? _maxConcurrency;
@@ -49,6 +50,7 @@ private Eval(Builder builder, OrganizationAndProjectInfo orgAndProject, RepoInfo
         _dataset = builder._dataset ?? throw new ArgumentNullException(nameof(builder._dataset));
         _task = builder._task ?? throw new ArgumentNullException(nameof(builder._task));
         _scorers = builder._scorers.ToList();
+        _classifiers = builder._classifiers.ToList();
         _experimentTags = builder._experimentTags;
         _experimentMetadata = builder._experimentMetadata;
         _maxConcurrency = builder._maxConcurrency;
@@ -165,12 +167,13 @@ private async Task EvalOne(string experimentId, DatasetCase<TInput, TOutput> dat
             }
             if (taskException == null)
             {
-                // Task succeeded — record output and run all scorers in parallel, each in their own span
+                // Task succeeded — record output and run all scorers and classifiers in parallel, each in their own span
                 rootActivity.SetTag("braintrust.output_json", ToJson(new { output = taskResult!.Value.Result }));
 
-                // Flush OTel spans to Braintrust before scoring so traced scorers can access them
-                var hasTracedScorers = _scorers.OfType<ITracedScorer<TInput, TOutput>>().Any();
-                if (hasTracedScorers)
+                // Flush OTel spans to Braintrust before scoring so traced scorers/classifiers can access them
+                var needsTraceFlush = _scorers.OfType<ITracedScorer<TInput, TOutput>>().Any()
+                    || _classifiers.OfType<ITracedClassifier<TInput, TOutput>>().Any();
+                if (needsTraceFlush)
                 {
                     BraintrustTracing.ForceFlush();
                 }
@@ -179,7 +182,8 @@ private async Task EvalOne(string experimentId, DatasetCase<TInput, TOutput> dat
                 var rootSpanId = rootActivity.TraceId.ToHexString();
                 var trace = new EvalTrace(ct => _btqlClient.QuerySpansAsync(experimentId, rootSpanId, ct));
 
-                await RunScorers(experimentId, rootActivity, taskResult!.Value, trace).ConfigureAwait(false);
+                await RunScorersAndClassifiers(experimentId, rootActivity, taskResult!.Value, trace, datasetCase.Metadata)
+                    .ConfigureAwait(false);
             }
             else
             {
@@ -234,19 +238,28 @@ private async Task RunSingleScorerForTaskException(
     }
 
     /// <summary>
-    /// Runs all scorers for a successful task result, each in their own score span.
-    /// Calls <see cref="IScorer{TInput,TOutput}.Score"/> (or <see cref="ITracedScorer{TInput,TOutput}.ScoreAsync"/>
-    /// for traced scorers) and falls back to <see cref="IScorer{TInput,TOutput}.ScoreForScorerException"/> on error.
+    /// Runs all scorers and classifiers for a successful task result in parallel, each in their own span.
+    /// After completion, aggregates classifier results onto the root span as <c>braintrust.classifications</c>
+    /// and merges any classifier errors into the root span's <c>braintrust.metadata</c> under
+    /// <c>classifier_errors</c>.
     /// </summary>
-    private async Task RunScorers(
+    private async Task RunScorersAndClassifiers(
         string experimentId,
         Activity rootActivity,
         TaskResult<TInput, TOutput> taskResult,
-        EvalTrace trace)
+        EvalTrace trace,
+        IReadOnlyDictionary<string, object> caseMetadata)
     {
         var scorerTasks = _scorers.Select(scorer =>
             RunSingleScorer(experimentId, rootActivity, scorer, taskResult, trace));
-        await Task.WhenAll(scorerTasks).ConfigureAwait(false);
+
+        var classifierOutcomes = new ClassifierOutcome?[_classifiers.Count];
+        var classifierTasks = _classifiers.Select((classifier, index) =>
+            RunSingleClassifier(experimentId, rootActivity, classifier, index, taskResult, trace, classifierOutcomes));
+
+        await Task.WhenAll(scorerTasks.Concat(classifierTasks)).ConfigureAwait(false);
+
+        AggregateClassifierOutcomes(rootActivity, caseMetadata, classifierOutcomes);
     }
 
     private async Task RunSingleScorer(
@@ -327,6 +340,214 @@ private static void RecordScores(
         }
     }
 
+    /// <summary>
+    /// Per-classifier outcome captured after running. Either a successful list of normalized items
+    /// (already grouped by resolved name) or an error message.
+    /// </summary>
+    private sealed class ClassifierOutcome
+    {
+        public string ClassifierName { get; }
+        public IReadOnlyList<(string Name, Dictionary<string, object> Item)>? Items { get; }
+        public string? ErrorMessage { get; }
+
+        private ClassifierOutcome(
+            string classifierName,
+            IReadOnlyList<(string Name, Dictionary<string, object> Item)>? items,
+            string? errorMessage)
+        {
+            ClassifierName = classifierName;
+            Items = items;
+            ErrorMessage = errorMessage;
+        }
+
+        public static ClassifierOutcome Success(
+            string classifierName,
+            IReadOnlyList<(string Name, Dictionary<string, object> Item)> items)
+            => new(classifierName, items, null);
+
+        public static ClassifierOutcome Error(string classifierName, string errorMessage)
+            => new(classifierName, null, errorMessage);
+    }
+
+    private async Task RunSingleClassifier(
+        string experimentId,
+        Activity rootActivity,
+        IClassifier<TInput, TOutput> classifier,
+        int classifierIndex,
+        TaskResult<TInput, TOutput> taskResult,
+        EvalTrace trace,
+        ClassifierOutcome?[] outcomes)
+    {
+        var resolvedName = string.IsNullOrWhiteSpace(classifier.Name)
+            ? $"classifier_{classifierIndex}"
+            : classifier.Name;
+
+        var classifierActivity = _activitySource.StartActivity(resolvedName);
+        classifierActivity?.SetTag(BraintrustTracing.ParentKey, $"experiment_id:{experimentId}");
+        classifierActivity?.SetTag(
+            "braintrust.span_attributes",
+            ToJson(new { type = "classifier", name = resolvedName, purpose = "scorer" }));
+
+        var datasetCase = taskResult.DatasetCase;
+        classifierActivity?.SetTag(
+            "braintrust.input_json",
+            ToJson(new
+            {
+                input = datasetCase.Input,
+                expected = datasetCase.Expected,
+                output = taskResult.Result,
+                metadata = datasetCase.Metadata
+            }));
+
+        try
+        {
+            using var classifierScope = BraintrustContext.OfExperiment(experimentId).MakeCurrent();
+
+            IReadOnlyList<Classification> rawResults;
+            try
+            {
+                rawResults = classifier is ITracedClassifier<TInput, TOutput> tracedClassifier
+                    ? await tracedClassifier.Classify(taskResult, trace).ConfigureAwait(false)
+                    : await classifier.Classify(taskResult).ConfigureAwait(false);
+
+                if (rawResults == null)
+                {
+                    rawResults = Array.Empty<Classification>();
+                }
+            }
+            catch (Exception ex)
+            {
+                classifierActivity?.SetStatus(ActivityStatusCode.Error, ex.Message);
+                classifierActivity?.AddEvent(CreateExceptionEvent(ex));
+                outcomes[classifierIndex] = ClassifierOutcome.Error(resolvedName, ex.Message);
+                return;
+            }
+
+            // Normalize: resolve name + validate, build storage items (no Name key).
+            var normalized = new List<(string Name, Dictionary<string, object> Item)>(rawResults.Count);
+            try
+            {
+                foreach (var classification in rawResults)
+                {
+                    if (string.IsNullOrEmpty(classification.Id))
+                    {
+                        throw new InvalidOperationException(
+                            "When returning structured classifier results, each classification must be a non-empty object.");
+                    }
+
+                    var groupingName = string.IsNullOrWhiteSpace(classification.Name)
+                        ? resolvedName
+                        : classification.Name!;
+
+                    var item = new Dictionary<string, object> { ["id"] = classification.Id };
+                    if (classification.Label != null)
+                    {
+                        item["label"] = classification.Label;
+                    }
+                    if (classification.Metadata != null && classification.Metadata.Count > 0)
+                    {
+                        item["metadata"] = classification.Metadata;
+                    }
+
+                    normalized.Add((groupingName, item));
+                }
+            }
+            catch (Exception ex)
+            {
+                classifierActivity?.SetStatus(ActivityStatusCode.Error, ex.Message);
+                classifierActivity?.AddEvent(CreateExceptionEvent(ex));
+                outcomes[classifierIndex] = ClassifierOutcome.Error(resolvedName, ex.Message);
+                return;
+            }
+
+            // Build output_json keyed by resolved name for the classifier span.
+            if (normalized.Count > 0)
+            {
+                var outputByName = new Dictionary<string, List<Dictionary<string, object>>>();
+                foreach (var (name, item) in normalized)
+                {
+                    if (!outputByName.TryGetValue(name, out var list))
+                    {
+                        list = new List<Dictionary<string, object>>();
+                        outputByName[name] = list;
+                    }
+                    list.Add(item);
+                }
+                classifierActivity?.SetTag("braintrust.output_json", ToJson(outputByName));
+            }
+
+            outcomes[classifierIndex] = ClassifierOutcome.Success(resolvedName, normalized);
+        }
+        finally
+        {
+            classifierActivity?.Stop();
+        }
+    }
+
+    /// <summary>
+    /// Aggregates per-classifier outcomes onto the root span:
+    /// <list type="bullet">
+    ///   <item>Sets <c>braintrust.classifications</c> when any classifications were produced.</item>
+    ///   <item>Merges any classifier errors into <c>braintrust.metadata</c> under <c>classifier_errors</c>.</item>
+    /// </list>
+    /// </summary>
+    private static void AggregateClassifierOutcomes(
+        Activity rootActivity,
+        IReadOnlyDictionary<string, object> caseMetadata,
+        ClassifierOutcome?[] outcomes)
+    {
+        if (outcomes.Length == 0)
+        {
+            return;
+        }
+
+        var classifications = new Dictionary<string, List<Dictionary<string, object>>>();
+        var classifierErrors = new Dictionary<string, string>();
+
+        foreach (var outcome in outcomes)
+        {
+            if (outcome == null)
+            {
+                continue;
+            }
+
+            if (outcome.ErrorMessage != null)
+            {
+                classifierErrors[outcome.ClassifierName] = outcome.ErrorMessage;
+                continue;
+            }
+
+            if (outcome.Items == null)
+            {
+                continue;
+            }
+
+            foreach (var (name, item) in outcome.Items)
+            {
+                if (!classifications.TryGetValue(name, out var list))
+                {
+                    list = new List<Dictionary<string, object>>();
+                    classifications[name] = list;
+                }
+                list.Add(item);
+            }
+        }
+
+        if (classifications.Count > 0)
+        {
+            rootActivity.SetTag("braintrust.classifications", ToJson(classifications));
+        }
+
+        if (classifierErrors.Count > 0)
+        {
+            var merged = new Dictionary<string, object>(caseMetadata)
+            {
+                ["classifier_errors"] = classifierErrors
+            };
+            rootActivity.SetTag("braintrust.metadata", ToJson(merged));
+        }
+    }
+
     private static string ToJson(object obj)
     {
         return JsonSerializer.Serialize(obj, JsonOptions);
@@ -388,6 +609,7 @@ public sealed class Builder
         internal IDataset<TInput, TOutput>? _dataset;
         internal ITask<TInput, TOutput>? _task;
         internal List<IScorer<TInput, TOutput>> _scorers = new();
+        internal List<IClassifier<TInput, TOutput>> _classifiers = new();
         internal IReadOnlyList<string>? _experimentTags;
         internal IReadOnlyDictionary<string, object>? _experimentMetadata;
         internal int? _maxConcurrency = 10;
@@ -406,9 +628,9 @@ public async Task<Eval<TInput, TOutput>> BuildAsync()
             _apiClient ??= BraintrustApiClient.Of(_config);
             _btqlClient ??= new BtqlClient(_config);
 
-            if (_scorers.Count == 0)
+            if (_scorers.Count == 0 && _classifiers.Count == 0)
             {
-                throw new InvalidOperationException("Must provide at least one scorer");
+                throw new InvalidOperationException("Must provide at least one scorer or classifier");
             }
 
             if (_dataset == null)
@@ -561,6 +783,16 @@ public Builder Scorers(params IScorer<TInput, TOutput>[] scorers)
             return this;
         }
 
+        /// <summary>
+        /// Set the classifiers.
+        /// At least one of <see cref="Scorers"/> or <see cref="Classifiers"/> must be provided.
+        /// </summary>
+        public Builder Classifiers(params IClassifier<TInput, TOutput>[] classifiers)
+        {
+            _classifiers = classifiers.ToList();
+            return this;
+        }
+
         /// <summary>
         /// Set the experiment-level tags.
         /// These tags are applied to the experiment itself, not individual cases.
diff --git a/src/Braintrust.Sdk/Eval/FunctionClassifier.cs b/src/Braintrust.Sdk/Eval/FunctionClassifier.cs
new file mode 100644
index 0000000..5aeb473
--- /dev/null
+++ b/src/Braintrust.Sdk/Eval/FunctionClassifier.cs
@@ -0,0 +1,73 @@
+namespace Braintrust.Sdk.Eval;
+
+/// <summary>
+/// Implementation of a classifier from a function.
+/// Supports synchronous and asynchronous functions returning either a single <see cref="Classification"/>
+/// or a list. Returning <c>null</c> means "no classifications for this case".
+/// </summary>
+public class FunctionClassifier<TInput, TOutput> : IClassifier<TInput, TOutput>
+    where TInput : notnull
+    where TOutput : notnull
+{
+    private static readonly IReadOnlyList<Classification> Empty = Array.Empty<Classification>();
+
+    private readonly Func<TaskResult<TInput, TOutput>, Task<IReadOnlyList<Classification>>> _classifierFn;
+
+    /// <summary>
+    /// Create a classifier from a synchronous function returning a single classification (or null).
+    /// </summary>
+    public FunctionClassifier(string name, Func<TaskResult<TInput, TOutput>, Classification?> classifierFn)
+    {
+        Name = name;
+        _classifierFn = taskResult =>
+        {
+            var result = classifierFn(taskResult);
+            return Task.FromResult<IReadOnlyList<Classification>>(
+                result.HasValue ? new[] { result.Value } : Empty);
+        };
+    }
+
+    /// <summary>
+    /// Create a classifier from a synchronous function returning a list of classifications (or null).
+    /// </summary>
+    public FunctionClassifier(string name, Func<TaskResult<TInput, TOutput>, IReadOnlyList<Classification>?> classifierFn)
+    {
+        Name = name;
+        _classifierFn = taskResult =>
+        {
+            var result = classifierFn(taskResult);
+            return Task.FromResult<IReadOnlyList<Classification>>(result ?? Empty);
+        };
+    }
+
+    /// <summary>
+    /// Create a classifier from an asynchronous function returning a single classification (or null).
+    /// </summary>
+    public FunctionClassifier(string name, Func<TaskResult<TInput, TOutput>, Task<Classification?>> classifierFn)
+    {
+        Name = name;
+        _classifierFn = async taskResult =>
+        {
+            var result = await classifierFn(taskResult).ConfigureAwait(false);
+            return result.HasValue ? new[] { result.Value } : Empty;
+        };
+    }
+
+    /// <summary>
+    /// Create a classifier from an asynchronous function returning a list of classifications (or null).
+    /// </summary>
+    public FunctionClassifier(string name, Func<TaskResult<TInput, TOutput>, Task<IReadOnlyList<Classification>?>> classifierFn)
+    {
+        Name = name;
+        _classifierFn = async taskResult =>
+        {
+            var result = await classifierFn(taskResult).ConfigureAwait(false);
+            return result ?? Empty;
+        };
+    }
+
+    public string Name { get; }
+
+    public Task<IReadOnlyList<Classification>> Classify(TaskResult<TInput, TOutput> taskResult)
+        => _classifierFn(taskResult);
+}
diff --git a/src/Braintrust.Sdk/Eval/IClassifier.cs b/src/Braintrust.Sdk/Eval/IClassifier.cs
new file mode 100644
index 0000000..f85f502
--- /dev/null
+++ b/src/Braintrust.Sdk/Eval/IClassifier.cs
@@ -0,0 +1,30 @@
+namespace Braintrust.Sdk.Eval;
+
+/// <summary>
+/// A classifier categorizes and labels eval outputs.
+/// Unlike <see cref="IScorer{TInput,TOutput}"/> (which returns numeric 0-1 values),
+/// classifiers return structured <see cref="Classification"/> items with an id and optional label and metadata.
+/// </summary>
+/// <remarks>
+/// Implementations must be thread-safe as classifiers may be executed concurrently.
+/// Classifier failures are non-fatal: an exception thrown by <see cref="Classify"/> is recorded
+/// under <c>classifier_errors</c> in the eval span's metadata and does not abort the evaluation.
+/// </remarks>
+/// <typeparam name="TInput">Type of the input data</typeparam>
+/// <typeparam name="TOutput">Type of the output data</typeparam>
+public interface IClassifier<TInput, TOutput>
+    where TInput : notnull
+    where TOutput : notnull
+{
+    /// <summary>
+    /// Gets the name of this classifier. Used as the classifier span name and as the
+    /// default grouping key when a returned <see cref="Classification"/> has no <c>Name</c>.
+    /// </summary>
+    string Name { get; }
+
+    /// <summary>
+    /// Classify the task result and return zero or more classifications.
+    /// Return an empty list to indicate no classifications for this case.
+    /// </summary>
+    Task<IReadOnlyList<Classification>> Classify(TaskResult<TInput, TOutput> taskResult);
+}
diff --git a/src/Braintrust.Sdk/Eval/ITracedClassifier.cs b/src/Braintrust.Sdk/Eval/ITracedClassifier.cs
new file mode 100644
index 0000000..97fd6d7
--- /dev/null
+++ b/src/Braintrust.Sdk/Eval/ITracedClassifier.cs
@@ -0,0 +1,24 @@
+namespace Braintrust.Sdk.Eval;
+
+/// <summary>
+/// A classifier that receives access to the distributed trace (spans) of the task that was evaluated.
+/// This allows classifiers to inspect intermediate LLM calls and tool-use chains, not just the final output.
+///
+/// Implement this interface when your classifier needs to examine multi-turn conversations or tool-use chains
+/// (e.g. classifying a conversation pattern as "single-turn", "tool-heavy", or "clarification-loop").
+/// When a classifier implements this interface, <see cref="Classify(TaskResult{TInput,TOutput},EvalTrace)"/>
+/// is called instead of <see cref="IClassifier{TInput,TOutput}.Classify(TaskResult{TInput,TOutput})"/>.
+/// Backward-compatible: classifiers that only implement <see cref="IClassifier{TInput,TOutput}"/> continue to work without change.
+/// </summary>
+/// <typeparam name="TInput">The type of input data for the evaluation</typeparam>
+/// <typeparam name="TOutput">The type of output produced by the task</typeparam>
+public interface ITracedClassifier<TInput, TOutput> : IClassifier<TInput, TOutput>
+    where TInput : notnull
+    where TOutput : notnull
+{
+    /// <summary>
+    /// Classify the task result using the distributed trace for additional context.
+    /// Called instead of <see cref="IClassifier{TInput,TOutput}.Classify(TaskResult{TInput,TOutput})"/> when trace is available.
+    /// </summary>
+    Task<IReadOnlyList<Classification>> Classify(TaskResult<TInput, TOutput> taskResult, EvalTrace trace);
+}
diff --git a/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs b/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs
new file mode 100644
index 0000000..b790130
--- /dev/null
+++ b/tests/Braintrust.Sdk.Tests/Eval/ClassifierTest.cs
@@ -0,0 +1,573 @@
+using System.Diagnostics;
+using System.Text.Json;
+using Braintrust.Sdk.Config;
+using Braintrust.Sdk.Eval;
+
+namespace Braintrust.Sdk.Tests.Eval;
+
+[Collection("BraintrustGlobals")]
+public class ClassifierTest : IDisposable
+{
+    private readonly ActivityListener _activityListener;
+
+    public ClassifierTest()
+    {
+        Braintrust.ResetForTest();
+        _activityListener = new ActivityListener
+        {
+            ShouldListenTo = source => source.Name == "braintrust-dotnet",
+            Sample = (ref ActivityCreationOptions<ActivityContext> _) => ActivitySamplingResult.AllDataAndRecorded
+        };
+        ActivitySource.AddActivityListener(_activityListener);
+    }
+
+    public void Dispose()
+    {
+        _activityListener?.Dispose();
+        Braintrust.ResetForTest();
+    }
+
+    // =====================================================================
+    // FunctionClassifier shape normalization
+    // =====================================================================
+
+    [Fact]
+    public async Task FunctionClassifierReturnsSingleClassification()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "category",
+            _ => new Classification("greeting", Label: "Greeting"));
+
+        var taskResult = MakeTaskResult("hello", "hi");
+        var results = await classifier.Classify(taskResult);
+
+        Assert.Single(results);
+        Assert.Equal("greeting", results[0].Id);
+        Assert.Equal("Greeting", results[0].Label);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierReturnsList()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "sentiment",
+            _ => (IReadOnlyList<Classification>)new[]
+            {
+                new Classification("positive", Label: "Positive"),
+                new Classification("enthusiastic", Label: "Enthusiastic")
+            });
+
+        var results = await classifier.Classify(MakeTaskResult("great!", ""));
+
+        Assert.Equal(2, results.Count);
+        Assert.Equal("positive", results[0].Id);
+        Assert.Equal("enthusiastic", results[1].Id);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierNullReturnsEmptyList()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "maybe",
+            _ => (Classification?)null);
+
+        var results = await classifier.Classify(MakeTaskResult("hello", "hi"));
+        Assert.Empty(results);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierNullListReturnsEmptyList()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "maybe",
+            _ => (IReadOnlyList<Classification>?)null);
+
+        var results = await classifier.Classify(MakeTaskResult("hello", "hi"));
+        Assert.Empty(results);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierAsyncSingle()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "category",
+            _ => Task.FromResult<Classification?>(new Classification("greeting")));
+
+        var results = await classifier.Classify(MakeTaskResult("hello", "hi"));
+        Assert.Single(results);
+        Assert.Equal("greeting", results[0].Id);
+    }
+
+    [Fact]
+    public async Task FunctionClassifierAsyncList()
+    {
+        var classifier = new FunctionClassifier<string, string>(
+            "category",
+            _ => Task.FromResult<IReadOnlyList<Classification>?>(new[]
+            {
+                new Classification("a"),
+                new Classification("b")
+            }));
+
+        var results = await classifier.Classify(MakeTaskResult("hello", "hi"));
+        Assert.Equal(2, results.Count);
+    }
+
+    // =====================================================================
+    // Builder validation
+    // =====================================================================
+
+    [Fact]
+    public async Task EvalRequiresAtLeastScorersOrClassifiers()
+    {
+        var config = BraintrustConfig.Of(("BRAINTRUST_API_KEY", "test-key"));
+        var mockClient = new MockBraintrustApiClient();
+
+        var ex = await Assert.ThrowsAsync<InvalidOperationException>(() =>
+            Eval<string, string>.NewBuilder()
+                .Name("test-eval")
+                .Config(config)
+                .ApiClient(mockClient)
+                .Cases(DatasetCase.Of("input", "expected"))
+                .TaskFunction(x => x)
+                .BuildAsync());
+
+        Assert.Contains("at least one scorer or classifier", ex.Message);
+    }
+
+    [Fact]
+    public async Task EvalBuildsWithClassifiersOnly()
+    {
+        var config = BraintrustConfig.Of(
+            ("BRAINTRUST_API_KEY", "test-key"),
+            ("BRAINTRUST_APP_URL", "https://braintrust.dev"),
+            ("BRAINTRUST_DEFAULT_PROJECT_NAME", "test-project"));
+        var mockClient = new MockBraintrustApiClient();
+
+        var eval = await Eval<string, string>.NewBuilder()
+            .Name("test-eval")
+            .Config(config)
+            .ApiClient(mockClient)
+            .Cases(DatasetCase.Of("hello", "hi"))
+            .TaskFunction(x => x)
+            .Classifiers(new FunctionClassifier<string, string>(
+                "category",
+                _ => new Classification("greeting")))
+            .BuildAsync();
+
+        var result = await eval.RunAsync();
+        Assert.NotNull(result.ExperimentUrl);
+    }
+
+    // =====================================================================
+    // Runner — classifier results on the eval span
+    // =====================================================================
+
+    [Fact]
+    public async Task RunnerWritesClassificationsToEvalSpan()
+    {
+        var (rootSpans, classifierSpans) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "category",
+                    _ => new Classification("greeting", Label: "Greeting"))
+            });
+
+        var root = Assert.Single(rootSpans);
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        Assert.True(classifications.RootElement.TryGetProperty("category", out var categoryItems));
+        Assert.Equal(1, categoryItems.GetArrayLength());
+        Assert.Equal("greeting", categoryItems[0].GetProperty("id").GetString());
+        Assert.Equal("Greeting", categoryItems[0].GetProperty("label").GetString());
+
+        // Single classifier span produced
+        Assert.Single(classifierSpans);
+    }
+
+    [Fact]
+    public async Task RunnerWritesNoClassificationsTagWhenAllNull()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>("maybe", _ => (Classification?)null)
+            });
+
+        var root = Assert.Single(rootSpans);
+        Assert.Null(root.GetTagItem("braintrust.classifications"));
+    }
+
+    [Fact]
+    public async Task RunnerCombinesScorersAndClassifiers()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            scorers: new IScorer<string, string>[]
+            {
+                new FunctionScorer<string, string>("exact", (e, a) => e == a ? 1.0 : 0.0)
+            },
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>("category", _ => new Classification("greeting"))
+            });
+
+        var root = Assert.Single(rootSpans);
+        Assert.NotNull(root.GetTagItem("braintrust.classifications"));
+        // The eval span does not store scores itself; verify the classification path was hit
+        // independently from the scorer path. Score span coverage is in EvalTest.
+    }
+
+    [Fact]
+    public async Task RunnerHandlesClassifierExceptionWithoutAbortingEval()
+    {
+        var (rootSpans, classifierSpans) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new ThrowingClassifier("broken", "classifier boom"),
+                new FunctionClassifier<string, string>("working", _ => new Classification("ok"))
+            });
+
+        var root = Assert.Single(rootSpans);
+
+        // Classifier errors merged into braintrust.metadata under classifier_errors
+        var metadataJson = root.GetTagItem("braintrust.metadata") as string;
+        Assert.NotNull(metadataJson);
+        using var doc = JsonDocument.Parse(metadataJson);
+        Assert.True(doc.RootElement.TryGetProperty("classifier_errors", out var errors));
+        Assert.Equal("classifier boom", errors.GetProperty("broken").GetString());
+
+        // The working classifier still wrote its classification
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        Assert.True(classifications.RootElement.TryGetProperty("working", out _));
+
+        // The broken classifier span has error status + exception event
+        var brokenSpan = classifierSpans.First(s => s.DisplayName == "broken");
+        Assert.Equal(ActivityStatusCode.Error, brokenSpan.Status);
+        Assert.NotEmpty(brokenSpan.Events);
+
+        // The eval (root) span itself is not marked Error by a classifier failure
+        Assert.Equal(ActivityStatusCode.Unset, root.Status);
+    }
+
+    [Fact]
+    public async Task RunnerWritesClassifierSpanAttributes()
+    {
+        var (_, classifierSpans) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "my_classifier",
+                    _ => new Classification("foo"))
+            });
+
+        var span = Assert.Single(classifierSpans);
+        Assert.Equal("my_classifier", span.DisplayName);
+
+        var attrsJson = span.GetTagItem("braintrust.span_attributes") as string;
+        Assert.NotNull(attrsJson);
+        using var doc = JsonDocument.Parse(attrsJson);
+        Assert.Equal("classifier", doc.RootElement.GetProperty("type").GetString());
+        Assert.Equal("my_classifier", doc.RootElement.GetProperty("name").GetString());
+        Assert.Equal("scorer", doc.RootElement.GetProperty("purpose").GetString());
+    }
+
+    [Fact]
+    public async Task RunnerMultiLabelResultPreservesOrder()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("great!", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "sentiment",
+                    _ => (IReadOnlyList<Classification>)new[]
+                    {
+                        new Classification("positive", Label: "Positive"),
+                        new Classification("enthusiastic", Label: "Enthusiastic")
+                    })
+            });
+
+        var root = Assert.Single(rootSpans);
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        var items = classifications.RootElement.GetProperty("sentiment");
+        Assert.Equal(2, items.GetArrayLength());
+        Assert.Equal("positive", items[0].GetProperty("id").GetString());
+        Assert.Equal("enthusiastic", items[1].GetProperty("id").GetString());
+    }
+
+    [Fact]
+    public async Task RunnerClassificationNameDefaultsToClassifierName()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                // Classification has no Name set
+                new FunctionClassifier<string, string>(
+                    "my_classifier",
+                    _ => new Classification("foo"))
+            });
+
+        var root = Assert.Single(rootSpans);
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        Assert.True(classifications.RootElement.TryGetProperty("my_classifier", out _));
+    }
+
+    [Fact]
+    public async Task RunnerClassificationExplicitNameOverridesClassifierName()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "my_classifier",
+                    _ => new Classification("foo", Name: "override_name"))
+            });
+
+        var root = Assert.Single(rootSpans);
+        var classifications = ReadClassifications(root);
+        Assert.NotNull(classifications);
+        Assert.True(classifications.RootElement.TryGetProperty("override_name", out _));
+        Assert.False(classifications.RootElement.TryGetProperty("my_classifier", out _));
+    }
+
+    [Fact]
+    public async Task RunnerEmptyClassificationItemIsRecordedAsError()
+    {
+        var (rootSpans, classifierSpans) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                // Default(Classification) — Id is null/empty, so should fail validation
+                new FunctionClassifier<string, string>(
+                    "bad",
+                    _ => (Classification?)default(Classification))
+            });
+
+        var root = Assert.Single(rootSpans);
+        var metadataJson = root.GetTagItem("braintrust.metadata") as string;
+        Assert.NotNull(metadataJson);
+        using var doc = JsonDocument.Parse(metadataJson);
+        var errors = doc.RootElement.GetProperty("classifier_errors");
+        var brokenError = errors.GetProperty("bad").GetString();
+        Assert.NotNull(brokenError);
+        Assert.Contains("each classification must be a non-empty object", brokenError);
+
+        var brokenSpan = Assert.Single(classifierSpans);
+        Assert.Equal(ActivityStatusCode.Error, brokenSpan.Status);
+    }
+
+    [Fact]
+    public async Task RunnerAccumulatesClassificationsAcrossCases()
+    {
+        var (rootSpans, _) = await RunEval(
+            cases: new[]
+            {
+                DatasetCase.Of("hi", "x"),
+                DatasetCase.Of("hello", "x"),
+                DatasetCase.Of("ok", "x")
+            },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>(
+                    "category",
+                    tr => new Classification(tr.Result.Length > 3 ? "long" : "short"))
+            });
+
+        Assert.Equal(3, rootSpans.Count);
+        foreach (var root in rootSpans)
+        {
+            var classifications = ReadClassifications(root);
+            Assert.NotNull(classifications);
+            Assert.True(classifications.RootElement.TryGetProperty("category", out _));
+        }
+    }
+
+    [Fact]
+    public async Task RunnerClassifierInputContainsAllScoringArgs()
+    {
+        var (_, classifierSpans) = await RunEval(
+            cases: new[]
+            {
+                DatasetCase.Of(
+                    "hello", "hi",
+                    new List<string>(),
+                    new Dictionary<string, object> { ["k"] = "v" })
+            },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[]
+            {
+                new FunctionClassifier<string, string>("category", _ => new Classification("greeting"))
+            });
+
+        var span = Assert.Single(classifierSpans);
+        var inputJson = span.GetTagItem("braintrust.input_json") as string;
+        Assert.NotNull(inputJson);
+        using var doc = JsonDocument.Parse(inputJson);
+        Assert.Equal("hello", doc.RootElement.GetProperty("input").GetString());
+        Assert.Equal("hi", doc.RootElement.GetProperty("expected").GetString());
+        Assert.Equal("hello", doc.RootElement.GetProperty("output").GetString());
+        Assert.True(doc.RootElement.TryGetProperty("metadata", out var md));
+        Assert.Equal("v", md.GetProperty("k").GetString());
+    }
+
+    // =====================================================================
+    // ITracedClassifier
+    // =====================================================================
+
+    [Fact]
+    public async Task TracedClassifierReceivesEvalTrace()
+    {
+        var spans = new[]
+        {
+            MockBtqlClient.MakeSpan("llm", input: new { messages = new[] { new { role = "user", content = "hi" } } },
+                output: new { choices = new[] { new { message = new { role = "assistant", content = "hello" } } } })
+        };
+        var mockBtql = new MockBtqlClient(spans);
+
+        var capturedSpanCount = -1;
+        var classifier = new TracedClassifier(
+            "trace_inspector",
+            async (_, trace) =>
+            {
+                var fetched = await trace.GetSpansAsync("llm");
+                capturedSpanCount = fetched.Count;
+                return new[] { new Classification("multi_turn") };
+            });
+
+        var (rootSpans, _) = await RunEval(
+            cases: new[] { DatasetCase.Of("hello", "hi") },
+            taskFn: x => x,
+            classifiers: new IClassifier<string, string>[] { classifier },
+            btqlClient: mockBtql);
+
+        Assert.Single(rootSpans);
+        Assert.Equal(1, capturedSpanCount);
+        Assert.Equal(1, mockBtql.QueryCount);
+    }
+
+    // =====================================================================
+    // Helpers
+    // =====================================================================
+
+    private static TaskResult<string, string> MakeTaskResult(string input, string output)
+        => new(output, new DatasetCase<string, string>(input, ""));
+
+    private static JsonDocument? ReadClassifications(Activity span)
+    {
+        var json = span.GetTagItem("braintrust.classifications") as string;
+        return json == null ? null : JsonDocument.Parse(json);
+    }
+
+    private async Task<(List<Activity> RootSpans, List<Activity> ClassifierSpans)> RunEval(
+        DatasetCase<string, string>[] cases,
+        Func<string, string> taskFn,
+        IScorer<string, string>[]? scorers = null,
+        IClassifier<string, string>[]? classifiers = null,
+        MockBtqlClient? btqlClient = null)
+    {
+        var config = BraintrustConfig.Of(
+            ("BRAINTRUST_API_KEY", "test-key"),
+            ("BRAINTRUST_APP_URL", "https://braintrust.dev"),
+            ("BRAINTRUST_DEFAULT_PROJECT_NAME", "test-project"));
+        var mockClient = new MockBraintrustApiClient();
+        btqlClient ??= new MockBtqlClient();
+
+        var captured = new List<Activity>();
+        using var listener = new ActivityListener
+        {
+            ShouldListenTo = source => source.Name == "braintrust-dotnet",
+            Sample = (ref ActivityCreationOptions<ActivityContext> _) => ActivitySamplingResult.AllDataAndRecorded,
+            ActivityStopped = captured.Add
+        };
+        ActivitySource.AddActivityListener(listener);
+
+        var builder = Eval<string, string>.NewBuilder()
+            .Name("classifier-test")
+            .Config(config)
+            .ApiClient(mockClient)
+            .BtqlClient(btqlClient)
+            .Cases(cases)
+            .TaskFunction(taskFn);
+
+        if (scorers != null && scorers.Length > 0)
+        {
+            builder.Scorers(scorers);
+        }
+
+        if (classifiers != null && classifiers.Length > 0)
+        {
+            builder.Classifiers(classifiers);
+        }
+        else if (scorers == null || scorers.Length == 0)
+        {
+            // The validator forbids zero classifiers and zero scorers; tests using RunEval should specify at least one.
+            throw new InvalidOperationException("Test setup error: provide at least one scorer or classifier.");
+        }
+
+        var eval = await builder.BuildAsync();
+        await eval.RunAsync();
+
+        var rootSpans = captured.Where(a => a.DisplayName == "eval").ToList();
+        var classifierSpans = captured
+            .Where(a =>
+            {
+                var attrs = a.GetTagItem("braintrust.span_attributes") as string;
+                return attrs != null && attrs.Contains("\"type\":\"classifier\"");
+            })
+            .ToList();
+        return (rootSpans, classifierSpans);
+    }
+
+    private sealed class ThrowingClassifier : IClassifier<string, string>
+    {
+        private readonly string _message;
+        public ThrowingClassifier(string name, string message)
+        {
+            Name = name;
+            _message = message;
+        }
+        public string Name { get; }
+        public Task<IReadOnlyList<Classification>> Classify(TaskResult<string, string> taskResult)
+            => throw new InvalidOperationException(_message);
+    }
+
+    private sealed class TracedClassifier : ITracedClassifier<string, string>
+    {
+        private readonly Func<TaskResult<string, string>, EvalTrace, Task<IReadOnlyList<Classification>>> _fn;
+        public TracedClassifier(
+            string name,
+            Func<TaskResult<string, string>, EvalTrace, Task<IReadOnlyList<Classification>>> fn)
+        {
+            Name = name;
+            _fn = fn;
+        }
+        public string Name { get; }
+
+        public Task<IReadOnlyList<Classification>> Classify(TaskResult<string, string> taskResult)
+            => Task.FromResult<IReadOnlyList<Classification>>(Array.Empty<Classification>());
+
+        public Task<IReadOnlyList<Classification>> Classify(TaskResult<string, string> taskResult, EvalTrace trace)
+            => _fn(taskResult, trace);
+    }
+}