From eda7dad34f13efeb3a387abb7fc462a1229f1695 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari Date: Tue, 21 Apr 2026 11:41:02 -0700 Subject: [PATCH 1/2] fix: actually grab metric measurements in UI verifications query --- .../pkg/db/job_verification_metric.sql.go | 45 +++++++++++++++++-- .../db/queries/job_verification_metric.sql | 43 ++++++++++++++++-- 2 files changed, 82 insertions(+), 6 deletions(-) diff --git a/apps/workspace-engine/pkg/db/job_verification_metric.sql.go b/apps/workspace-engine/pkg/db/job_verification_metric.sql.go index 4a8def5d9..cacd3e813 100644 --- a/apps/workspace-engine/pkg/db/job_verification_metric.sql.go +++ b/apps/workspace-engine/pkg/db/job_verification_metric.sql.go @@ -17,8 +17,14 @@ SELECT CASE WHEN COUNT(*) = 0 THEN '' WHEN bool_or(COALESCE(mc.failures, 0) > COALESCE(jvm.failure_threshold, 0)) THEN 'failed' - WHEN bool_or(COALESCE(mc.total, 0) < jvm.count - AND COALESCE(mc.failures, 0) <= COALESCE(jvm.failure_threshold, 0)) THEN 'running' + WHEN bool_or( + COALESCE(mc.total, 0) < jvm.count + AND COALESCE(mc.failures, 0) <= COALESCE(jvm.failure_threshold, 0) + AND NOT ( + COALESCE(jvm.success_threshold, 0) > 0 + AND COALESCE(cp.consecutive_passes, 0) >= jvm.success_threshold + ) + ) THEN 'running' ELSE 'passed' END::text AS status FROM job_verification_metric jvm @@ -29,11 +35,27 @@ LEFT JOIN LATERAL ( FROM job_verification_metric_measurement mm WHERE mm.job_verification_metric_status_id = jvm.id ) mc ON true +LEFT JOIN LATERAL ( + SELECT COUNT(*)::int AS consecutive_passes + FROM job_verification_metric_measurement mm + WHERE mm.job_verification_metric_status_id = jvm.id + AND mm.status = 'passed' + AND mm.measured_at > COALESCE( + (SELECT MAX(measured_at) + FROM job_verification_metric_measurement + WHERE job_verification_metric_status_id = jvm.id + AND status <> 'passed'), + '-infinity'::timestamptz + ) +) cp ON true WHERE jvm.job_id = $1 ` // Returns the aggregate verification status for a job: -// 'passed' if all metrics have completed with enough measurements and no failures above threshold, +// 'passed' if all metrics have completed (either by reaching count, hitting the success_threshold +// +// consecutive-pass early-termination, or by exhausting count without exceeding failure_threshold), +// // 'running' if any metric is still incomplete, // 'failed' if any metric has exceeded its failure threshold. // Returns ” (empty string) if the job has no verification metrics. @@ -180,6 +202,10 @@ SELECT ( COALESCE(mc.total, 0) >= m.count OR COALESCE(mc.failures, 0) > COALESCE(m.failure_threshold, 0) + OR ( + COALESCE(m.success_threshold, 0) > 0 + AND COALESCE(cp.consecutive_passes, 0) >= m.success_threshold + ) )::boolean AS is_terminal, (COALESCE(mc.failures, 0) > COALESCE(m.failure_threshold, 0))::boolean AS is_failed FROM job_verification_metric m @@ -190,6 +216,19 @@ LEFT JOIN LATERAL ( FROM job_verification_metric_measurement mm WHERE mm.job_verification_metric_status_id = m.id ) mc ON true +LEFT JOIN LATERAL ( + SELECT COUNT(*)::int AS consecutive_passes + FROM job_verification_metric_measurement mm + WHERE mm.job_verification_metric_status_id = m.id + AND mm.status = 'passed' + AND mm.measured_at > COALESCE( + (SELECT MAX(measured_at) + FROM job_verification_metric_measurement + WHERE job_verification_metric_status_id = m.id + AND status <> 'passed'), + '-infinity'::timestamptz + ) +) cp ON true WHERE m.job_id = (SELECT job_id FROM job_verification_metric WHERE id = $1) ` diff --git a/apps/workspace-engine/pkg/db/queries/job_verification_metric.sql b/apps/workspace-engine/pkg/db/queries/job_verification_metric.sql index 32d636175..dc2ab1f5a 100644 --- a/apps/workspace-engine/pkg/db/queries/job_verification_metric.sql +++ b/apps/workspace-engine/pkg/db/queries/job_verification_metric.sql @@ -49,6 +49,10 @@ SELECT ( COALESCE(mc.total, 0) >= m.count OR COALESCE(mc.failures, 0) > COALESCE(m.failure_threshold, 0) + OR ( + COALESCE(m.success_threshold, 0) > 0 + AND COALESCE(cp.consecutive_passes, 0) >= m.success_threshold + ) )::boolean AS is_terminal, (COALESCE(mc.failures, 0) > COALESCE(m.failure_threshold, 0))::boolean AS is_failed FROM job_verification_metric m @@ -59,6 +63,19 @@ LEFT JOIN LATERAL ( FROM job_verification_metric_measurement mm WHERE mm.job_verification_metric_status_id = m.id ) mc ON true +LEFT JOIN LATERAL ( + SELECT COUNT(*)::int AS consecutive_passes + FROM job_verification_metric_measurement mm + WHERE mm.job_verification_metric_status_id = m.id + AND mm.status = 'passed' + AND mm.measured_at > COALESCE( + (SELECT MAX(measured_at) + FROM job_verification_metric_measurement + WHERE job_verification_metric_status_id = m.id + AND status <> 'passed'), + '-infinity'::timestamptz + ) +) cp ON true WHERE m.job_id = (SELECT job_id FROM job_verification_metric WHERE id = $1); -- name: GetReleaseTargetForMetric :one @@ -75,7 +92,8 @@ WHERE jvm.id = $1; -- name: GetAggregateJobVerificationStatus :one -- Returns the aggregate verification status for a job: --- 'passed' if all metrics have completed with enough measurements and no failures above threshold, +-- 'passed' if all metrics have completed (either by reaching count, hitting the success_threshold +-- consecutive-pass early-termination, or by exhausting count without exceeding failure_threshold), -- 'running' if any metric is still incomplete, -- 'failed' if any metric has exceeded its failure threshold. -- Returns '' (empty string) if the job has no verification metrics. @@ -83,8 +101,14 @@ SELECT CASE WHEN COUNT(*) = 0 THEN '' WHEN bool_or(COALESCE(mc.failures, 0) > COALESCE(jvm.failure_threshold, 0)) THEN 'failed' - WHEN bool_or(COALESCE(mc.total, 0) < jvm.count - AND COALESCE(mc.failures, 0) <= COALESCE(jvm.failure_threshold, 0)) THEN 'running' + WHEN bool_or( + COALESCE(mc.total, 0) < jvm.count + AND COALESCE(mc.failures, 0) <= COALESCE(jvm.failure_threshold, 0) + AND NOT ( + COALESCE(jvm.success_threshold, 0) > 0 + AND COALESCE(cp.consecutive_passes, 0) >= jvm.success_threshold + ) + ) THEN 'running' ELSE 'passed' END::text AS status FROM job_verification_metric jvm @@ -95,6 +119,19 @@ LEFT JOIN LATERAL ( FROM job_verification_metric_measurement mm WHERE mm.job_verification_metric_status_id = jvm.id ) mc ON true +LEFT JOIN LATERAL ( + SELECT COUNT(*)::int AS consecutive_passes + FROM job_verification_metric_measurement mm + WHERE mm.job_verification_metric_status_id = jvm.id + AND mm.status = 'passed' + AND mm.measured_at > COALESCE( + (SELECT MAX(measured_at) + FROM job_verification_metric_measurement + WHERE job_verification_metric_status_id = jvm.id + AND status <> 'passed'), + '-infinity'::timestamptz + ) +) cp ON true WHERE jvm.job_id = @job_id; -- name: GetJobVerificationsWithMeasurements :many From 9a18598cb214766fb31b3577d74df3eeed48d4b2 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari Date: Tue, 21 Apr 2026 13:09:27 -0700 Subject: [PATCH 2/2] cleanup --- .../pkg/db/job_verification_metric.sql.go | 45 +------ .../db/queries/job_verification_metric.sql | 43 +------ packages/trpc/src/routes/verifications.ts | 115 +++++++++++++++--- 3 files changed, 107 insertions(+), 96 deletions(-) diff --git a/apps/workspace-engine/pkg/db/job_verification_metric.sql.go b/apps/workspace-engine/pkg/db/job_verification_metric.sql.go index cacd3e813..4a8def5d9 100644 --- a/apps/workspace-engine/pkg/db/job_verification_metric.sql.go +++ b/apps/workspace-engine/pkg/db/job_verification_metric.sql.go @@ -17,14 +17,8 @@ SELECT CASE WHEN COUNT(*) = 0 THEN '' WHEN bool_or(COALESCE(mc.failures, 0) > COALESCE(jvm.failure_threshold, 0)) THEN 'failed' - WHEN bool_or( - COALESCE(mc.total, 0) < jvm.count - AND COALESCE(mc.failures, 0) <= COALESCE(jvm.failure_threshold, 0) - AND NOT ( - COALESCE(jvm.success_threshold, 0) > 0 - AND COALESCE(cp.consecutive_passes, 0) >= jvm.success_threshold - ) - ) THEN 'running' + WHEN bool_or(COALESCE(mc.total, 0) < jvm.count + AND COALESCE(mc.failures, 0) <= COALESCE(jvm.failure_threshold, 0)) THEN 'running' ELSE 'passed' END::text AS status FROM job_verification_metric jvm @@ -35,27 +29,11 @@ LEFT JOIN LATERAL ( FROM job_verification_metric_measurement mm WHERE mm.job_verification_metric_status_id = jvm.id ) mc ON true -LEFT JOIN LATERAL ( - SELECT COUNT(*)::int AS consecutive_passes - FROM job_verification_metric_measurement mm - WHERE mm.job_verification_metric_status_id = jvm.id - AND mm.status = 'passed' - AND mm.measured_at > COALESCE( - (SELECT MAX(measured_at) - FROM job_verification_metric_measurement - WHERE job_verification_metric_status_id = jvm.id - AND status <> 'passed'), - '-infinity'::timestamptz - ) -) cp ON true WHERE jvm.job_id = $1 ` // Returns the aggregate verification status for a job: -// 'passed' if all metrics have completed (either by reaching count, hitting the success_threshold -// -// consecutive-pass early-termination, or by exhausting count without exceeding failure_threshold), -// +// 'passed' if all metrics have completed with enough measurements and no failures above threshold, // 'running' if any metric is still incomplete, // 'failed' if any metric has exceeded its failure threshold. // Returns ” (empty string) if the job has no verification metrics. @@ -202,10 +180,6 @@ SELECT ( COALESCE(mc.total, 0) >= m.count OR COALESCE(mc.failures, 0) > COALESCE(m.failure_threshold, 0) - OR ( - COALESCE(m.success_threshold, 0) > 0 - AND COALESCE(cp.consecutive_passes, 0) >= m.success_threshold - ) )::boolean AS is_terminal, (COALESCE(mc.failures, 0) > COALESCE(m.failure_threshold, 0))::boolean AS is_failed FROM job_verification_metric m @@ -216,19 +190,6 @@ LEFT JOIN LATERAL ( FROM job_verification_metric_measurement mm WHERE mm.job_verification_metric_status_id = m.id ) mc ON true -LEFT JOIN LATERAL ( - SELECT COUNT(*)::int AS consecutive_passes - FROM job_verification_metric_measurement mm - WHERE mm.job_verification_metric_status_id = m.id - AND mm.status = 'passed' - AND mm.measured_at > COALESCE( - (SELECT MAX(measured_at) - FROM job_verification_metric_measurement - WHERE job_verification_metric_status_id = m.id - AND status <> 'passed'), - '-infinity'::timestamptz - ) -) cp ON true WHERE m.job_id = (SELECT job_id FROM job_verification_metric WHERE id = $1) ` diff --git a/apps/workspace-engine/pkg/db/queries/job_verification_metric.sql b/apps/workspace-engine/pkg/db/queries/job_verification_metric.sql index dc2ab1f5a..32d636175 100644 --- a/apps/workspace-engine/pkg/db/queries/job_verification_metric.sql +++ b/apps/workspace-engine/pkg/db/queries/job_verification_metric.sql @@ -49,10 +49,6 @@ SELECT ( COALESCE(mc.total, 0) >= m.count OR COALESCE(mc.failures, 0) > COALESCE(m.failure_threshold, 0) - OR ( - COALESCE(m.success_threshold, 0) > 0 - AND COALESCE(cp.consecutive_passes, 0) >= m.success_threshold - ) )::boolean AS is_terminal, (COALESCE(mc.failures, 0) > COALESCE(m.failure_threshold, 0))::boolean AS is_failed FROM job_verification_metric m @@ -63,19 +59,6 @@ LEFT JOIN LATERAL ( FROM job_verification_metric_measurement mm WHERE mm.job_verification_metric_status_id = m.id ) mc ON true -LEFT JOIN LATERAL ( - SELECT COUNT(*)::int AS consecutive_passes - FROM job_verification_metric_measurement mm - WHERE mm.job_verification_metric_status_id = m.id - AND mm.status = 'passed' - AND mm.measured_at > COALESCE( - (SELECT MAX(measured_at) - FROM job_verification_metric_measurement - WHERE job_verification_metric_status_id = m.id - AND status <> 'passed'), - '-infinity'::timestamptz - ) -) cp ON true WHERE m.job_id = (SELECT job_id FROM job_verification_metric WHERE id = $1); -- name: GetReleaseTargetForMetric :one @@ -92,8 +75,7 @@ WHERE jvm.id = $1; -- name: GetAggregateJobVerificationStatus :one -- Returns the aggregate verification status for a job: --- 'passed' if all metrics have completed (either by reaching count, hitting the success_threshold --- consecutive-pass early-termination, or by exhausting count without exceeding failure_threshold), +-- 'passed' if all metrics have completed with enough measurements and no failures above threshold, -- 'running' if any metric is still incomplete, -- 'failed' if any metric has exceeded its failure threshold. -- Returns '' (empty string) if the job has no verification metrics. @@ -101,14 +83,8 @@ SELECT CASE WHEN COUNT(*) = 0 THEN '' WHEN bool_or(COALESCE(mc.failures, 0) > COALESCE(jvm.failure_threshold, 0)) THEN 'failed' - WHEN bool_or( - COALESCE(mc.total, 0) < jvm.count - AND COALESCE(mc.failures, 0) <= COALESCE(jvm.failure_threshold, 0) - AND NOT ( - COALESCE(jvm.success_threshold, 0) > 0 - AND COALESCE(cp.consecutive_passes, 0) >= jvm.success_threshold - ) - ) THEN 'running' + WHEN bool_or(COALESCE(mc.total, 0) < jvm.count + AND COALESCE(mc.failures, 0) <= COALESCE(jvm.failure_threshold, 0)) THEN 'running' ELSE 'passed' END::text AS status FROM job_verification_metric jvm @@ -119,19 +95,6 @@ LEFT JOIN LATERAL ( FROM job_verification_metric_measurement mm WHERE mm.job_verification_metric_status_id = jvm.id ) mc ON true -LEFT JOIN LATERAL ( - SELECT COUNT(*)::int AS consecutive_passes - FROM job_verification_metric_measurement mm - WHERE mm.job_verification_metric_status_id = jvm.id - AND mm.status = 'passed' - AND mm.measured_at > COALESCE( - (SELECT MAX(measured_at) - FROM job_verification_metric_measurement - WHERE job_verification_metric_status_id = jvm.id - AND status <> 'passed'), - '-infinity'::timestamptz - ) -) cp ON true WHERE jvm.job_id = @job_id; -- name: GetJobVerificationsWithMeasurements :many diff --git a/packages/trpc/src/routes/verifications.ts b/packages/trpc/src/routes/verifications.ts index 2654fb7d4..e86b061be 100644 --- a/packages/trpc/src/routes/verifications.ts +++ b/packages/trpc/src/routes/verifications.ts @@ -1,12 +1,64 @@ -import { TRPCError } from "@trpc/server"; import { z } from "zod"; -import { desc, eq } from "@ctrlplane/db"; +import { asc, desc, eq, inArray } from "@ctrlplane/db"; import * as schema from "@ctrlplane/db/schema"; -import { getClientFor } from "@ctrlplane/workspace-engine-sdk"; import { protectedProcedure, router } from "../trpc.js"; +type MeasurementStatus = "failed" | "inconclusive" | "passed"; +type JobVerificationStatus = "passed" | "failed" | "running" | ""; + +function getFailedCount(measurements: MeasurementStatus[]): number { + return measurements.filter((s) => s === "failed").length; +} + +function getConsecutiveSuccessCount(measurements: MeasurementStatus[]): number { + let i = measurements.length; + while (i > 0 && measurements[i - 1] === "passed") i--; + return measurements.length - i; +} + +type MeasurementRow = { metricId: string | null; status: MeasurementStatus }; + +function groupMeasurementsByMetric( + metricIds: string[], + measurements: MeasurementRow[], +): Map { + return new Map( + metricIds.map((id) => [ + id, + measurements.filter((r) => r.metricId === id).map((r) => r.status), + ]), + ); +} + +function computeMetricPhase( + measurements: MeasurementStatus[], + count: number, + successThreshold: number | null, + failureThreshold: number | null, +): "passed" | "failed" | "running" { + const failureLimit = failureThreshold ?? 0; + const failedCount = getFailedCount(measurements); + const consecutiveSuccessCount = getConsecutiveSuccessCount(measurements); + + const hasAnyFailures = failedCount > 0; + const isFailureLimitExceeded = failureLimit > 0 && failedCount > failureLimit; + if ((failureLimit === 0 && hasAnyFailures) || isFailureLimitExceeded) + return "failed"; + + if ( + successThreshold != null && + successThreshold > 0 && + consecutiveSuccessCount >= successThreshold + ) + return "passed"; + + if (measurements.length >= count) return "passed"; + + return "running"; +} + export const verificationsRouter = router({ measurements: protectedProcedure .input(z.uuid()) @@ -31,19 +83,54 @@ export const verificationsRouter = router({ status: protectedProcedure .input(z.object({ jobId: z.string().uuid() })) - .query(async ({ input }) => { - const result = await getClientFor().GET( - "/v1/jobs/{jobId}/verification-status", - { params: { path: { jobId: input.jobId } } }, + .query(async ({ input, ctx }) => { + const metrics = await ctx.db + .select({ + id: schema.jobVerificationMetricStatus.id, + count: schema.jobVerificationMetricStatus.count, + successThreshold: schema.jobVerificationMetricStatus.successThreshold, + failureThreshold: schema.jobVerificationMetricStatus.failureThreshold, + }) + .from(schema.jobVerificationMetricStatus) + .where(eq(schema.jobVerificationMetricStatus.jobId, input.jobId)); + + if (metrics.length === 0) return { status: "" as JobVerificationStatus }; + + const measurements = await ctx.db + .select({ + metricId: + schema.jobVerificationMetricMeasurement + .jobVerificationMetricStatusId, + status: schema.jobVerificationMetricMeasurement.status, + }) + .from(schema.jobVerificationMetricMeasurement) + .where( + inArray( + schema.jobVerificationMetricMeasurement + .jobVerificationMetricStatusId, + metrics.map((m) => m.id), + ), + ) + .orderBy(asc(schema.jobVerificationMetricMeasurement.measuredAt)); + + const byMetric = groupMeasurementsByMetric( + metrics.map((m) => m.id), + measurements, ); - if (result.error != null) { - throw new TRPCError({ - code: "INTERNAL_SERVER_ERROR", - message: `Failed to get verification status: ${JSON.stringify(result.error)}`, - }); - } + const phases = metrics.map((m) => + computeMetricPhase( + byMetric.get(m.id) ?? [], + m.count, + m.successThreshold, + m.failureThreshold, + ), + ); - return result.data; + if (phases.some((p) => p === "failed")) + return { status: "failed" as JobVerificationStatus }; + if (phases.some((p) => p === "running")) + return { status: "running" as JobVerificationStatus }; + return { status: "passed" as JobVerificationStatus }; }), });