From 2ef4ce9496c56132512313177c146ec2e4c72dbb Mon Sep 17 00:00:00 2001 From: Robin Mennens Date: Fri, 3 Jul 2026 17:06:38 +0200 Subject: [PATCH 1/2] feat(apollo-vertex): custom evaluator renderer registry for solution-tests Verticals can register custom evaluator result renderers keyed by evaluator id via `SolutionTestsConfig.evaluatorRenderers`, merged over the built-ins (`resolveEvaluatorRenderer` precedence: vertical > built-in > generic). This is the FE counterpart to the solution-test backend's custom evaluator builders. Renderer-specific config is bound at registration, not threaded through the runtime call site: `genericRenderer({ label })` captures a display label and `makeRenderer(schema, Component, bound)` spreads bound props into the component, so `EvaluatorRenderer`'s runtime signature never changes as renderers gain params. Labels now live on the registry entries; the standalone EVALUATOR_LABELS table is removed. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01EioRufJjChLWS7wQ2DsCLd --- .../registry/solution-tests/config.ts | 9 +- .../registry/solution-tests/constants.ts | 6 -- .../solution-tests/evaluator-results-view.tsx | 4 +- .../evaluators/generic-evaluator-result.tsx | 8 +- .../solution-tests/evaluators/registry.tsx | 87 ++++++++++--------- .../registry/solution-tests/index.ts | 15 ++++ 6 files changed, 77 insertions(+), 52 deletions(-) diff --git a/apps/apollo-vertex/registry/solution-tests/config.ts b/apps/apollo-vertex/registry/solution-tests/config.ts index ba0686718..9fa4f79d6 100644 --- a/apps/apollo-vertex/registry/solution-tests/config.ts +++ b/apps/apollo-vertex/registry/solution-tests/config.ts @@ -1,5 +1,6 @@ import type { ColumnDef } from "@tanstack/react-table"; import { DEFAULT_PASS_THRESHOLD } from "./constants"; +import type { EvaluatorRenderers } from "./evaluators/registry"; import type { SolutionTest } from "./types"; /** Per-vertical presentation config; everything else is hard-coded in `constants`. */ @@ -11,9 +12,11 @@ export interface SolutionTestsConfig { subjectNoun?: { singular: string; plural: string }; /** Score at/above which a result passes (drives pass color + KPI trend line). Defaults to 0.9. */ passThreshold?: number; - /** Debug aid: also render the raw Expected/Actual *input* panels in run-result - * details (typically wired to a dev-only flag). Defaults to false. */ + /** Show the Expected/Actual input panels in run-result details. Defaults to false (outputs only). */ showInputs?: boolean; + /** Custom evaluator-id -> renderer map; wins over the built-in registry. + * The FE counterpart to the BE `custom_evaluator_builders`. */ + evaluatorRenderers?: EvaluatorRenderers; } /** Config with defaults applied — what components read from context. */ @@ -23,6 +26,7 @@ export interface ResolvedSolutionTestsConfig { subjectNoun?: { singular: string; plural: string }; passThreshold: number; showInputs: boolean; + evaluatorRenderers: EvaluatorRenderers; } export function resolveConfig( @@ -34,5 +38,6 @@ export function resolveConfig( subjectNoun: config.subjectNoun, passThreshold: config.passThreshold ?? DEFAULT_PASS_THRESHOLD, showInputs: config.showInputs ?? false, + evaluatorRenderers: config.evaluatorRenderers ?? {}, }; } diff --git a/apps/apollo-vertex/registry/solution-tests/constants.ts b/apps/apollo-vertex/registry/solution-tests/constants.ts index af983cbe4..0898ae743 100644 --- a/apps/apollo-vertex/registry/solution-tests/constants.ts +++ b/apps/apollo-vertex/registry/solution-tests/constants.ts @@ -36,12 +36,6 @@ export const DEFAULT_PASS_THRESHOLD = 0.9; /** Most recent completed batches plotted on the KPI score-trend chart. */ export const MAX_TREND_POINTS = 10; -/** Evaluator id → display label. */ -export const EVALUATOR_LABELS: Record = { - "uipath-json-similarity": "JSON Similarity", - "uipath-llm-judge-output-semantic-similarity": "LLM Judge", -}; - /** Numeric status enum → English label. */ export const defaultTestStatusLabels: Record = { [SolutionTestStatus.Pending]: "Pending", diff --git a/apps/apollo-vertex/registry/solution-tests/evaluator-results-view.tsx b/apps/apollo-vertex/registry/solution-tests/evaluator-results-view.tsx index 16eca1051..5f11ab451 100644 --- a/apps/apollo-vertex/registry/solution-tests/evaluator-results-view.tsx +++ b/apps/apollo-vertex/registry/solution-tests/evaluator-results-view.tsx @@ -2,6 +2,7 @@ import { Fragment } from "react"; import { z } from "zod"; +import { useSolutionTestsConfig } from "./context"; import { resolveEvaluatorRenderer } from "./evaluators/registry"; import type { SolutionTestRunResult } from "./types"; @@ -50,11 +51,12 @@ export const EvaluatorResultsView = ({ actualOutput, result, }: EvaluatorResultsViewProps) => { + const { evaluatorRenderers } = useSolutionTestsConfig(); return (
{Object.entries(data).map(([evaluatorId, evaluator]) => ( - {resolveEvaluatorRenderer(evaluatorId)({ + {resolveEvaluatorRenderer(evaluatorId, evaluatorRenderers)({ evaluatorId, score: evaluator.score, rawDetails: evaluator.details, diff --git a/apps/apollo-vertex/registry/solution-tests/evaluators/generic-evaluator-result.tsx b/apps/apollo-vertex/registry/solution-tests/evaluators/generic-evaluator-result.tsx index fc4f4ced5..58f74dccf 100644 --- a/apps/apollo-vertex/registry/solution-tests/evaluators/generic-evaluator-result.tsx +++ b/apps/apollo-vertex/registry/solution-tests/evaluators/generic-evaluator-result.tsx @@ -2,7 +2,6 @@ import { z } from "zod"; import { useTranslation } from "react-i18next"; -import { EVALUATOR_LABELS } from "../constants"; import { useSolutionTestsConfig } from "../context"; import { JsonPanel } from "./output-panels"; import type { EvaluatorResultProps } from "./registry"; @@ -26,14 +25,15 @@ function scoreColorClass( export const GenericEvaluatorResult = ({ evaluatorId, + label, score, evaluatorDetails, expectedOutput, actualOutput, -}: EvaluatorResultProps) => { +}: EvaluatorResultProps & { label?: string }) => { const { t } = useTranslation(); const { passThreshold } = useSolutionTestsConfig(); - const label = EVALUATOR_LABELS[evaluatorId] ?? evaluatorId; + const displayLabel = label ?? evaluatorId; const justification = evaluatorDetails.justification; const scoreStr = score == null ? "—" : `${Math.round(score * 100)}%`; @@ -41,7 +41,7 @@ export const GenericEvaluatorResult = ({
- {label} + {displayLabel} diff --git a/apps/apollo-vertex/registry/solution-tests/evaluators/registry.tsx b/apps/apollo-vertex/registry/solution-tests/evaluators/registry.tsx index 5ef8ba07a..f209e0aa7 100644 --- a/apps/apollo-vertex/registry/solution-tests/evaluators/registry.tsx +++ b/apps/apollo-vertex/registry/solution-tests/evaluators/registry.tsx @@ -10,68 +10,77 @@ import { import { IxpExtractionResult } from "./ixp-extraction/ixp-extraction-result"; import { IxpDetailsSchema } from "./ixp-extraction/schema"; -const JSON_SIMILARITY_EVALUATOR_ID = "uipath-json-similarity"; -const LLM_JUDGE_EVALUATOR_ID = "uipath-llm-judge-output-semantic-similarity"; -const IXP_EXTRACTION_EVALUATOR_ID = "uipath-ixp-document-extraction"; +// Built-in evaluator ids — mirror the Python runner's wire contract +// (shared/solution_tests/evaluator.py). +export const JSON_SIMILARITY_EVALUATOR_ID = "uipath-json-similarity"; +export const LLM_JUDGE_EVALUATOR_ID = + "uipath-llm-judge-output-semantic-similarity"; +export const IXP_EXTRACTION_EVALUATOR_ID = "uipath-ixp-document-extraction"; -export interface EvaluatorResultProps { +export interface EvaluatorRenderArgs { evaluatorId: string; score: number | undefined; - /** The evaluator's `details`, already validated + typed by the registry. */ - evaluatorDetails: TDetails; - expectedOutput: unknown; - actualOutput: unknown; - result: SolutionTestRunResult; -} - -interface EvaluatorRenderArgs { - evaluatorId: string; - score: number | undefined; - /** The raw, unvalidated `details` off the EvaluatorResults attachment. */ + /** Raw, unvalidated `details` off the EvaluatorResults attachment. */ rawDetails: unknown; expectedOutput: unknown; actualOutput: unknown; result: SolutionTestRunResult; } -/** Bind an evaluator's schema to its component so `details` is validated once, - * centrally, and each component receives a typed `evaluatorDetails`. Closing - * over the concrete `` keeps the map value a uniform function type — - * no `unknown`/`any` leaks to component authors. Returns null if validation - * fails (a malformed payload just renders nothing). */ -function makeRenderer( +export type EvaluatorResultProps = Omit< + EvaluatorRenderArgs, + "rawDetails" +> & { evaluatorDetails: TDetails }; + +export type EvaluatorRenderer = (args: EvaluatorRenderArgs) => ReactNode; + +export type EvaluatorRenderers = Record; + +/** Bind an evaluator's schema + component (and any registration-time `bound` + * props) into a renderer. */ +export function makeRenderer( schema: z.ZodType, - Component: ComponentType>, -) { + Component: ComponentType & TExtra>, + bound: TExtra, +): EvaluatorRenderer { return ({ rawDetails, ...rest }: EvaluatorRenderArgs): ReactNode => { const parsed = schema.safeParse(rawDetails); if (!parsed.success) return null; - return ; + return ; }; } -const GENERIC_RENDERER = makeRenderer( - GenericEvaluatorDetailsSchema, - GenericEvaluatorResult, -); +/** The generic card; reuse for a custom evaluator that keeps the + * `{score, justification}` contract, binding an optional display `label`. */ +export function genericRenderer( + options: { label?: string } = {}, +): EvaluatorRenderer { + return makeRenderer(GenericEvaluatorDetailsSchema, GenericEvaluatorResult, { + label: options.label, + }); +} + +/** Fallback for unknown ids (the card then shows the raw evaluator id). */ +export const GENERIC_RENDERER: EvaluatorRenderer = genericRenderer(); -// Discriminator is the evaluator id — the key the result is stored under in the -// EvaluatorResults attachment — so no schema-sniffing is needed. Unknown ids -// fall back to the generic renderer. -const EVALUATOR_RENDERERS: Record< - string, - (args: EvaluatorRenderArgs) => ReactNode -> = { - [JSON_SIMILARITY_EVALUATOR_ID]: GENERIC_RENDERER, - [LLM_JUDGE_EVALUATOR_ID]: GENERIC_RENDERER, +const EVALUATOR_RENDERERS: EvaluatorRenderers = { + [JSON_SIMILARITY_EVALUATOR_ID]: genericRenderer({ label: "JSON Similarity" }), + [LLM_JUDGE_EVALUATOR_ID]: genericRenderer({ label: "LLM Judge" }), [IXP_EXTRACTION_EVALUATOR_ID]: makeRenderer( IxpDetailsSchema, IxpExtractionResult, + {}, ), }; +/** Precedence: vertical renderers > built-ins > generic fallback. */ export function resolveEvaluatorRenderer( evaluatorId: string, -): (args: EvaluatorRenderArgs) => ReactNode { - return EVALUATOR_RENDERERS[evaluatorId] ?? GENERIC_RENDERER; + verticalRenderers?: EvaluatorRenderers, +): EvaluatorRenderer { + return ( + verticalRenderers?.[evaluatorId] ?? + EVALUATOR_RENDERERS[evaluatorId] ?? + GENERIC_RENDERER + ); } diff --git a/apps/apollo-vertex/registry/solution-tests/index.ts b/apps/apollo-vertex/registry/solution-tests/index.ts index 0b51c0c49..046815e46 100644 --- a/apps/apollo-vertex/registry/solution-tests/index.ts +++ b/apps/apollo-vertex/registry/solution-tests/index.ts @@ -31,6 +31,21 @@ export type { SolutionTestsConfig, ResolvedSolutionTestsConfig, } from "./config"; +export { + makeRenderer, + genericRenderer, + resolveEvaluatorRenderer, + GENERIC_RENDERER, + JSON_SIMILARITY_EVALUATOR_ID, + LLM_JUDGE_EVALUATOR_ID, + IXP_EXTRACTION_EVALUATOR_ID, +} from "./evaluators/registry"; +export type { + EvaluatorRenderer, + EvaluatorRenderers, + EvaluatorRenderArgs, + EvaluatorResultProps, +} from "./evaluators/registry"; export { useSolutionTests, useSolutionTestBatchRuns, From d612605fe90385264f8ef9ef34943d2e3cd253e3 Mon Sep 17 00:00:00 2001 From: Robin Mennens Date: Fri, 3 Jul 2026 17:39:25 +0200 Subject: [PATCH 2/2] style(apollo-vertex): biome-format evaluator-results-view Wrap the resolveEvaluatorRenderer call per biome (lineWidth 100) to fix the Format CI check. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01EioRufJjChLWS7wQ2DsCLd --- .../registry/solution-tests/evaluator-results-view.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/apollo-vertex/registry/solution-tests/evaluator-results-view.tsx b/apps/apollo-vertex/registry/solution-tests/evaluator-results-view.tsx index 5f11ab451..65af8ed62 100644 --- a/apps/apollo-vertex/registry/solution-tests/evaluator-results-view.tsx +++ b/apps/apollo-vertex/registry/solution-tests/evaluator-results-view.tsx @@ -56,7 +56,10 @@ export const EvaluatorResultsView = ({
{Object.entries(data).map(([evaluatorId, evaluator]) => ( - {resolveEvaluatorRenderer(evaluatorId, evaluatorRenderers)({ + {resolveEvaluatorRenderer( + evaluatorId, + evaluatorRenderers, + )({ evaluatorId, score: evaluator.score, rawDetails: evaluator.details,