Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions apps/apollo-vertex/registry/solution-tests/config.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { ColumnDef } from "@tanstack/react-table";
import { DEFAULT_PASS_THRESHOLD } from "./constants";
import type { EvaluatorRenderers } from "./evaluators/registry";
import type { SolutionTest } from "./types";

/** Per-vertical presentation config; everything else is hard-coded in `constants`. */
Expand All @@ -11,9 +12,11 @@ export interface SolutionTestsConfig {
subjectNoun?: { singular: string; plural: string };
/** Score at/above which a result passes (drives pass color + KPI trend line). Defaults to 0.9. */
passThreshold?: number;
/** Debug aid: also render the raw Expected/Actual *input* panels in run-result
* details (typically wired to a dev-only flag). Defaults to false. */
/** Show the Expected/Actual input panels in run-result details. Defaults to false (outputs only). */
showInputs?: boolean;
/** Custom evaluator-id -> renderer map; wins over the built-in registry.
* The FE counterpart to the BE `custom_evaluator_builders`. */
evaluatorRenderers?: EvaluatorRenderers;
}

/** Config with defaults applied — what components read from context. */
Expand All @@ -23,6 +26,7 @@ export interface ResolvedSolutionTestsConfig {
subjectNoun?: { singular: string; plural: string };
passThreshold: number;
showInputs: boolean;
evaluatorRenderers: EvaluatorRenderers;
}

export function resolveConfig(
Expand All @@ -34,5 +38,6 @@ export function resolveConfig(
subjectNoun: config.subjectNoun,
passThreshold: config.passThreshold ?? DEFAULT_PASS_THRESHOLD,
showInputs: config.showInputs ?? false,
evaluatorRenderers: config.evaluatorRenderers ?? {},
};
}
6 changes: 0 additions & 6 deletions apps/apollo-vertex/registry/solution-tests/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,6 @@ export const DEFAULT_PASS_THRESHOLD = 0.9;
/** Most recent completed batches plotted on the KPI score-trend chart. */
export const MAX_TREND_POINTS = 10;

/** Evaluator id → display label. */
export const EVALUATOR_LABELS: Record<string, string> = {
"uipath-json-similarity": "JSON Similarity",
"uipath-llm-judge-output-semantic-similarity": "LLM Judge",
};

/** Numeric status enum → English label. */
export const defaultTestStatusLabels: Record<number, string> = {
[SolutionTestStatus.Pending]: "Pending",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import { Fragment } from "react";
import { z } from "zod";
import { useSolutionTestsConfig } from "./context";
import { resolveEvaluatorRenderer } from "./evaluators/registry";
import type { SolutionTestRunResult } from "./types";

Expand Down Expand Up @@ -50,11 +51,15 @@ export const EvaluatorResultsView = ({
actualOutput,
result,
}: EvaluatorResultsViewProps) => {
const { evaluatorRenderers } = useSolutionTestsConfig();
return (
<div className="flex flex-col gap-4">
{Object.entries(data).map(([evaluatorId, evaluator]) => (
<Fragment key={evaluatorId}>
{resolveEvaluatorRenderer(evaluatorId)({
{resolveEvaluatorRenderer(
evaluatorId,
evaluatorRenderers,
)({
evaluatorId,
score: evaluator.score,
rawDetails: evaluator.details,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import { z } from "zod";
import { useTranslation } from "react-i18next";
import { EVALUATOR_LABELS } from "../constants";
import { useSolutionTestsConfig } from "../context";
import { JsonPanel } from "./output-panels";
import type { EvaluatorResultProps } from "./registry";
Expand All @@ -26,22 +25,23 @@ function scoreColorClass(

export const GenericEvaluatorResult = ({
evaluatorId,
label,
score,
evaluatorDetails,
expectedOutput,
actualOutput,
}: EvaluatorResultProps<GenericEvaluatorDetails>) => {
}: EvaluatorResultProps<GenericEvaluatorDetails> & { label?: string }) => {
const { t } = useTranslation();
const { passThreshold } = useSolutionTestsConfig();
const label = EVALUATOR_LABELS[evaluatorId] ?? evaluatorId;
const displayLabel = label ?? evaluatorId;
const justification = evaluatorDetails.justification;
const scoreStr = score == null ? "—" : `${Math.round(score * 100)}%`;

return (
<div className="flex flex-col gap-4">
<div className="rounded-md border bg-muted/50 p-3">
<div className="flex items-center justify-between">
<span className="text-sm font-medium">{label}</span>
<span className="text-sm font-medium">{displayLabel}</span>
<span
className={`text-sm font-semibold ${scoreColorClass(score, passThreshold)}`}
>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,68 +10,77 @@ import {
import { IxpExtractionResult } from "./ixp-extraction/ixp-extraction-result";
import { IxpDetailsSchema } from "./ixp-extraction/schema";

const JSON_SIMILARITY_EVALUATOR_ID = "uipath-json-similarity";
const LLM_JUDGE_EVALUATOR_ID = "uipath-llm-judge-output-semantic-similarity";
const IXP_EXTRACTION_EVALUATOR_ID = "uipath-ixp-document-extraction";
// Built-in evaluator ids — mirror the Python runner's wire contract
// (shared/solution_tests/evaluator.py).
export const JSON_SIMILARITY_EVALUATOR_ID = "uipath-json-similarity";
export const LLM_JUDGE_EVALUATOR_ID =
"uipath-llm-judge-output-semantic-similarity";
export const IXP_EXTRACTION_EVALUATOR_ID = "uipath-ixp-document-extraction";

export interface EvaluatorResultProps<TDetails = unknown> {
export interface EvaluatorRenderArgs {
evaluatorId: string;
score: number | undefined;
/** The evaluator's `details`, already validated + typed by the registry. */
evaluatorDetails: TDetails;
expectedOutput: unknown;
actualOutput: unknown;
result: SolutionTestRunResult;
}

interface EvaluatorRenderArgs {
evaluatorId: string;
score: number | undefined;
/** The raw, unvalidated `details` off the EvaluatorResults attachment. */
/** Raw, unvalidated `details` off the EvaluatorResults attachment. */
rawDetails: unknown;
expectedOutput: unknown;
actualOutput: unknown;
result: SolutionTestRunResult;
}

/** Bind an evaluator's schema to its component so `details` is validated once,
* centrally, and each component receives a typed `evaluatorDetails`. Closing
* over the concrete `<TDetails>` keeps the map value a uniform function type —
* no `unknown`/`any` leaks to component authors. Returns null if validation
* fails (a malformed payload just renders nothing). */
function makeRenderer<TDetails>(
export type EvaluatorResultProps<TDetails = unknown> = Omit<
EvaluatorRenderArgs,
"rawDetails"
> & { evaluatorDetails: TDetails };

export type EvaluatorRenderer = (args: EvaluatorRenderArgs) => ReactNode;

export type EvaluatorRenderers = Record<string, EvaluatorRenderer>;

/** Bind an evaluator's schema + component (and any registration-time `bound`
* props) into a renderer. */
export function makeRenderer<TDetails, TExtra extends object>(
schema: z.ZodType<TDetails>,
Component: ComponentType<EvaluatorResultProps<TDetails>>,
) {
Component: ComponentType<EvaluatorResultProps<TDetails> & TExtra>,
bound: TExtra,
): EvaluatorRenderer {
Comment on lines +41 to +45
return ({ rawDetails, ...rest }: EvaluatorRenderArgs): ReactNode => {
const parsed = schema.safeParse(rawDetails);
if (!parsed.success) return null;
return <Component evaluatorDetails={parsed.data} {...rest} />;
return <Component evaluatorDetails={parsed.data} {...bound} {...rest} />;
};
}

const GENERIC_RENDERER = makeRenderer(
GenericEvaluatorDetailsSchema,
GenericEvaluatorResult,
);
/** The generic card; reuse for a custom evaluator that keeps the
* `{score, justification}` contract, binding an optional display `label`. */
export function genericRenderer(
options: { label?: string } = {},
): EvaluatorRenderer {
return makeRenderer(GenericEvaluatorDetailsSchema, GenericEvaluatorResult, {
label: options.label,
});
}

/** Fallback for unknown ids (the card then shows the raw evaluator id). */
export const GENERIC_RENDERER: EvaluatorRenderer = genericRenderer();

// Discriminator is the evaluator id — the key the result is stored under in the
// EvaluatorResults attachment — so no schema-sniffing is needed. Unknown ids
// fall back to the generic renderer.
const EVALUATOR_RENDERERS: Record<
string,
(args: EvaluatorRenderArgs) => ReactNode
> = {
[JSON_SIMILARITY_EVALUATOR_ID]: GENERIC_RENDERER,
[LLM_JUDGE_EVALUATOR_ID]: GENERIC_RENDERER,
const EVALUATOR_RENDERERS: EvaluatorRenderers = {
[JSON_SIMILARITY_EVALUATOR_ID]: genericRenderer({ label: "JSON Similarity" }),
[LLM_JUDGE_EVALUATOR_ID]: genericRenderer({ label: "LLM Judge" }),
[IXP_EXTRACTION_EVALUATOR_ID]: makeRenderer(
IxpDetailsSchema,
IxpExtractionResult,
{},
),
};

/** Precedence: vertical renderers > built-ins > generic fallback. */
export function resolveEvaluatorRenderer(
evaluatorId: string,
): (args: EvaluatorRenderArgs) => ReactNode {
return EVALUATOR_RENDERERS[evaluatorId] ?? GENERIC_RENDERER;
verticalRenderers?: EvaluatorRenderers,
): EvaluatorRenderer {
return (
verticalRenderers?.[evaluatorId] ??
EVALUATOR_RENDERERS[evaluatorId] ??
GENERIC_RENDERER
);
}
15 changes: 15 additions & 0 deletions apps/apollo-vertex/registry/solution-tests/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ export type {
SolutionTestsConfig,
ResolvedSolutionTestsConfig,
} from "./config";
export {
makeRenderer,
genericRenderer,
resolveEvaluatorRenderer,
GENERIC_RENDERER,
JSON_SIMILARITY_EVALUATOR_ID,
LLM_JUDGE_EVALUATOR_ID,
IXP_EXTRACTION_EVALUATOR_ID,
} from "./evaluators/registry";
export type {
EvaluatorRenderer,
EvaluatorRenderers,
EvaluatorRenderArgs,
EvaluatorResultProps,
} from "./evaluators/registry";
export {
useSolutionTests,
useSolutionTestBatchRuns,
Expand Down
Loading