From 4cf6ce6a125672429060be504c2be00d700d0fe4 Mon Sep 17 00:00:00 2001 From: Matthew Staebler Date: Wed, 1 Jul 2026 12:06:53 -0400 Subject: [PATCH 1/2] TRT-2741: Expand variant keys and decouple daily summaries from variant_combination_id Add 8 variant keys to importantVariants (Procedure, Aggregation, NetworkAccess, Scheduler, Suite, ContainerRuntime, CGroupMode, LayeredProduct). Release-family keys remain excluded as they are redundant with prow_jobs.release and the Upgrade variant. Remove variant_combination_id from test_daily_summaries. The matview pre_agg CTE now joins prow_jobs to resolve the variant_combination_id at refresh time. This decouples daily summaries from variant changes, eliminating the need to rebuild 69M rows when variants are updated. Benchmarking shows <1% matview refresh overhead from the added join. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/db/dailysummary/dailysummary.go | 6 ++-- pkg/db/db.go | 9 ++---- ...daily_summary_variant_combination.down.sql | 2 ++ ...p_daily_summary_variant_combination.up.sql | 5 +++ pkg/db/models/prow.go | 19 ++++++------ pkg/db/views.go | 31 ++++++++++--------- pkg/testidentification/ocp_variants.go | 16 ++++++++-- 7 files changed, 50 insertions(+), 38 deletions(-) create mode 100644 pkg/db/migrations/000004_drop_daily_summary_variant_combination.down.sql create mode 100644 pkg/db/migrations/000004_drop_daily_summary_variant_combination.up.sql diff --git a/pkg/db/dailysummary/dailysummary.go b/pkg/db/dailysummary/dailysummary.go index e8c86b91f..bec180f19 100644 --- a/pkg/db/dailysummary/dailysummary.go +++ b/pkg/db/dailysummary/dailysummary.go @@ -17,7 +17,7 @@ const ( parallelWorkers = 4 ) -var valueColumns = []string{"variant_combination_id", "successes", "failures", "flakes", "runs"} +var valueColumns = []string{"successes", "failures", "flakes", "runs"} var ( insertSQL = buildInsertSQL() @@ -33,17 +33,15 @@ func buildInsertSQL() string { COALESCE(pjrt.suite_id, 0), pjrt.prow_job_run_release, date(pjrt.prow_job_run_timestamp), - pj.variant_combination_id, COUNT(*) FILTER (WHERE pjrt.status = 1), COUNT(*) FILTER (WHERE pjrt.status = 12), COUNT(*) FILTER (WHERE pjrt.status = 13), COUNT(*) FROM prow_job_run_tests pjrt - JOIN prow_jobs pj ON pjrt.prow_job_id = pj.id WHERE pjrt.prow_job_run_timestamp >= ?::date AND pjrt.prow_job_run_timestamp < (?::date + INTERVAL '1 day') AND pjrt.prow_job_run_release = ? - GROUP BY pjrt.test_id, pjrt.prow_job_id, COALESCE(pjrt.suite_id, 0), pjrt.prow_job_run_release, date(pjrt.prow_job_run_timestamp), pj.variant_combination_id`, + GROUP BY pjrt.test_id, pjrt.prow_job_id, COALESCE(pjrt.suite_id, 0), pjrt.prow_job_run_release, date(pjrt.prow_job_run_timestamp)`, strings.Join(valueColumns, ", ")) } diff --git a/pkg/db/db.go b/pkg/db/db.go index 3a76773d9..4b3f91b48 100644 --- a/pkg/db/db.go +++ b/pkg/db/db.go @@ -547,10 +547,9 @@ func ensureTriageSymptomCascade(db *gorm.DB) error { // function is created by migration 000003; the table is created by // AutoMigrate, so this must run after both. // -// When the trigger is first attached, existing rows are backfilled -// and test_daily_summaries is truncated so the next refresh -// populates it with variant_combination_id set. In steady state -// (trigger already exists) this is a single catalog lookup. +// When the trigger is first attached, existing rows are backfilled. +// In steady state (trigger already exists) this is a single catalog +// lookup. func ensureVariantCombinationTrigger(db *gorm.DB) error { return db.Exec(` DO $$ @@ -576,8 +575,6 @@ func ensureVariantCombinationTrigger(db *gorm.DB) error { WHERE prow_jobs.variants = vc.variants AND prow_jobs.variants IS NOT NULL AND prow_jobs.variant_combination_id IS NULL; - - TRUNCATE test_daily_summaries; END IF; END $$`).Error } diff --git a/pkg/db/migrations/000004_drop_daily_summary_variant_combination.down.sql b/pkg/db/migrations/000004_drop_daily_summary_variant_combination.down.sql new file mode 100644 index 000000000..d70873168 --- /dev/null +++ b/pkg/db/migrations/000004_drop_daily_summary_variant_combination.down.sql @@ -0,0 +1,2 @@ +TRUNCATE test_daily_summaries; +ALTER TABLE test_daily_summaries ADD COLUMN IF NOT EXISTS variant_combination_id BIGINT; diff --git a/pkg/db/migrations/000004_drop_daily_summary_variant_combination.up.sql b/pkg/db/migrations/000004_drop_daily_summary_variant_combination.up.sql new file mode 100644 index 000000000..bfc3f90bc --- /dev/null +++ b/pkg/db/migrations/000004_drop_daily_summary_variant_combination.up.sql @@ -0,0 +1,5 @@ +DROP MATERIALIZED VIEW IF EXISTS prow_test_report_7d_collapsed_matview; +DROP MATERIALIZED VIEW IF EXISTS prow_test_report_2d_collapsed_matview; +DROP MATERIALIZED VIEW IF EXISTS prow_test_report_7d_matview; +DROP MATERIALIZED VIEW IF EXISTS prow_test_report_2d_matview; +ALTER TABLE test_daily_summaries DROP COLUMN IF EXISTS variant_combination_id; diff --git a/pkg/db/models/prow.go b/pkg/db/models/prow.go index 1bcbf2fd1..1dc507fdc 100644 --- a/pkg/db/models/prow.go +++ b/pkg/db/models/prow.go @@ -172,16 +172,15 @@ type TestAnalysisByJobByDate struct { // TestDailySummary stores pre-aggregated daily test results used to // accelerate matview refreshes. Table managed by migration 000002. type TestDailySummary struct { - TestID uint `gorm:"column:test_id;not null"` - ProwJobID uint `gorm:"column:prow_job_id;not null"` - SuiteID uint `gorm:"column:suite_id;not null;default:0"` - Release string `gorm:"column:release;not null"` - SummaryDate time.Time `gorm:"column:summary_date;type:date;not null"` - VariantCombinationID *uint `gorm:"column:variant_combination_id"` - Successes int32 `gorm:"column:successes;not null;default:0"` - Failures int32 `gorm:"column:failures;not null;default:0"` - Flakes int32 `gorm:"column:flakes;not null;default:0"` - Runs int32 `gorm:"column:runs;not null;default:0"` + TestID uint `gorm:"column:test_id;not null"` + ProwJobID uint `gorm:"column:prow_job_id;not null"` + SuiteID uint `gorm:"column:suite_id;not null;default:0"` + Release string `gorm:"column:release;not null"` + SummaryDate time.Time `gorm:"column:summary_date;type:date;not null"` + Successes int32 `gorm:"column:successes;not null;default:0"` + Failures int32 `gorm:"column:failures;not null;default:0"` + Flakes int32 `gorm:"column:flakes;not null;default:0"` + Runs int32 `gorm:"column:runs;not null;default:0"` } // Bug represents a Jira bug. diff --git a/pkg/db/views.go b/pkg/db/views.go index fc6d53c69..7b2a390f8 100644 --- a/pkg/db/views.go +++ b/pkg/db/views.go @@ -315,24 +315,25 @@ FROM ( ), pre_agg AS ( SELECT - variant_combination_id, - test_id, - suite_id, - release AS prow_job_run_release, - COALESCE(SUM(successes) FILTER (WHERE summary_date >= |||START||| AND summary_date < |||BOUNDARY|||), 0) AS previous_successes, - COALESCE(SUM(flakes) FILTER (WHERE summary_date >= |||START||| AND summary_date < |||BOUNDARY|||), 0) AS previous_flakes, - COALESCE(SUM(failures) FILTER (WHERE summary_date >= |||START||| AND summary_date < |||BOUNDARY|||), 0) AS previous_failures, - COALESCE(SUM(runs) FILTER (WHERE summary_date >= |||START||| AND summary_date < |||BOUNDARY|||), 0) AS previous_runs, - COALESCE(SUM(successes) FILTER (WHERE summary_date >= |||BOUNDARY||| AND summary_date <= |||END|||), 0) AS current_successes, - COALESCE(SUM(flakes) FILTER (WHERE summary_date >= |||BOUNDARY||| AND summary_date <= |||END|||), 0) AS current_flakes, - COALESCE(SUM(failures) FILTER (WHERE summary_date >= |||BOUNDARY||| AND summary_date <= |||END|||), 0) AS current_failures, - COALESCE(SUM(runs) FILTER (WHERE summary_date >= |||BOUNDARY||| AND summary_date <= |||END|||), 0) AS current_runs + pj.variant_combination_id, + tds.test_id, + tds.suite_id, + tds.release AS prow_job_run_release, + COALESCE(SUM(tds.successes) FILTER (WHERE tds.summary_date >= |||START||| AND tds.summary_date < |||BOUNDARY|||), 0) AS previous_successes, + COALESCE(SUM(tds.flakes) FILTER (WHERE tds.summary_date >= |||START||| AND tds.summary_date < |||BOUNDARY|||), 0) AS previous_flakes, + COALESCE(SUM(tds.failures) FILTER (WHERE tds.summary_date >= |||START||| AND tds.summary_date < |||BOUNDARY|||), 0) AS previous_failures, + COALESCE(SUM(tds.runs) FILTER (WHERE tds.summary_date >= |||START||| AND tds.summary_date < |||BOUNDARY|||), 0) AS previous_runs, + COALESCE(SUM(tds.successes) FILTER (WHERE tds.summary_date >= |||BOUNDARY||| AND tds.summary_date <= |||END|||), 0) AS current_successes, + COALESCE(SUM(tds.flakes) FILTER (WHERE tds.summary_date >= |||BOUNDARY||| AND tds.summary_date <= |||END|||), 0) AS current_flakes, + COALESCE(SUM(tds.failures) FILTER (WHERE tds.summary_date >= |||BOUNDARY||| AND tds.summary_date <= |||END|||), 0) AS current_failures, + COALESCE(SUM(tds.runs) FILTER (WHERE tds.summary_date >= |||BOUNDARY||| AND tds.summary_date <= |||END|||), 0) AS current_runs FROM - test_daily_summaries + test_daily_summaries tds + JOIN prow_jobs pj ON tds.prow_job_id = pj.id WHERE - summary_date >= |||START||| AND summary_date <= |||END||| + tds.summary_date >= |||START||| AND tds.summary_date <= |||END||| GROUP BY - variant_combination_id, test_id, suite_id, release + pj.variant_combination_id, tds.test_id, tds.suite_id, tds.release ) SELECT tests.id, diff --git a/pkg/testidentification/ocp_variants.go b/pkg/testidentification/ocp_variants.go index 5bc6deed4..09e7efe86 100644 --- a/pkg/testidentification/ocp_variants.go +++ b/pkg/testidentification/ocp_variants.go @@ -24,6 +24,11 @@ import ( var openshiftJobsNeverStableRaw string var openshiftJobsNeverStable = strings.Split(openshiftJobsNeverStableRaw, "\n") +// importantVariants controls which BigQuery variant keys are stored in +// prow_jobs.variants in PostgreSQL. Release-family keys (Release, +// ReleaseMinor, ReleaseMajor, FromRelease, FromReleaseMinor, +// FromReleaseMajor) are excluded because they are redundant with +// prow_jobs.release and the Upgrade variant. var importantVariants = []string{ "Platform", "Architecture", @@ -39,6 +44,14 @@ var importantVariants = []string{ "Component", "Capability", "OS", + "Procedure", + "Aggregation", + "NetworkAccess", + "Scheduler", + "Suite", + "ContainerRuntime", + "CGroupMode", + "LayeredProduct", } const ( @@ -133,9 +146,6 @@ func (v *openshiftVariants) IdentifyVariants(jobName string) []string { allVariants = append(allVariants, NeverStable) } - // Ensure filtered by important variants; including them all - // significantly increases cardinality and slows matview refreshes - // to a crawl. return filterVariants(allVariants, importantVariants) } From fa1cbef53a79e54b095bf12e534955924b8b270e Mon Sep 17 00:00:00 2001 From: Matthew Staebler Date: Wed, 1 Jul 2026 12:06:59 -0400 Subject: [PATCH 2/2] TRT-2741: Add migration manifest and verify-migrations target Add LATEST manifest listing all migrations by version and name. Two PRs adding the same migration number will conflict on this file. Add hack/verify-migrations.sh and a make target to enforce sequential numbering, no gaps, and matching up/down files. Co-Authored-By: Claude Opus 4.6 (1M context) --- Makefile | 7 +++-- hack/verify-migrations.sh | 58 ++++++++++++++++++++++++++++++++++++++ pkg/db/migrations/MANIFEST | 13 +++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) create mode 100755 hack/verify-migrations.sh create mode 100644 pkg/db/migrations/MANIFEST diff --git a/Makefile b/Makefile index 34e438486..482fb917f 100644 --- a/Makefile +++ b/Makefile @@ -17,8 +17,8 @@ all: test build build: builddir clean npm frontend sippy sippy-daemon -.PHONY: verify apm verify-apm -verify: lint verify-apm +.PHONY: verify apm verify-apm verify-migrations +verify: lint verify-apm verify-migrations builddir: mkdir -p sippy-ng/build @@ -75,6 +75,9 @@ apm: uvx --from apm-cli@0.13.0 apm install uvx --from apm-cli@0.13.0 apm compile +verify-migrations: + ./hack/verify-migrations.sh + verify-apm: apm @if ! git diff --quiet HEAD -- .claude .cursor .gemini .opencode AGENTS.md CLAUDE.md GEMINI.md sippy-ng/AGENTS.md sippy-ng/CLAUDE.md mcp/AGENTS.md mcp/CLAUDE.md; then \ echo "ERROR: Generated APM files are out of date. Run 'make apm' and commit the results."; \ diff --git a/hack/verify-migrations.sh b/hack/verify-migrations.sh new file mode 100755 index 000000000..d99489a62 --- /dev/null +++ b/hack/verify-migrations.sh @@ -0,0 +1,58 @@ +#!/bin/bash +set -euo pipefail + +MIGRATIONS_DIR="pkg/db/migrations" +MANIFEST="${MIGRATIONS_DIR}/MANIFEST" + +if [ ! -f "$MANIFEST" ]; then + echo "ERROR: ${MANIFEST} not found." + exit 1 +fi + +errors=0 + +# Check each manifest entry has matching .up.sql and .down.sql files. +prev_num=0 +while IFS= read -r entry; do + [ -z "$entry" ] && continue + [[ "$entry" == \#* ]] && continue + + num=$(echo "$entry" | grep -oE '^[0-9]+' | sed 's/^0*//') + if [ -z "$num" ]; then + echo "ERROR: Invalid manifest entry '${entry}' (must start with a zero-padded number)." + errors=$((errors + 1)) + continue + fi + + # Check sequential numbering. + expected=$((prev_num + 1)) + if [ "$num" -ne "$expected" ]; then + echo "ERROR: Expected migration $(printf '%06d' "$expected") but found $(printf '%06d' "$num") (gap or duplicate)." + errors=$((errors + 1)) + fi + prev_num=$num + + if [ ! -f "${MIGRATIONS_DIR}/${entry}.up.sql" ]; then + echo "ERROR: Manifest lists '${entry}' but ${MIGRATIONS_DIR}/${entry}.up.sql does not exist." + errors=$((errors + 1)) + fi + if [ ! -f "${MIGRATIONS_DIR}/${entry}.down.sql" ]; then + echo "ERROR: Manifest lists '${entry}' but ${MIGRATIONS_DIR}/${entry}.down.sql does not exist." + errors=$((errors + 1)) + fi +done < "$MANIFEST" + +# Check no extra SQL files exist beyond what the manifest lists. +expected_sql=$((prev_num * 2)) +actual_sql=$(find "$MIGRATIONS_DIR" -maxdepth 1 -name '*.sql' | wc -l | tr -d ' ') +if [ "$actual_sql" -ne "$expected_sql" ]; then + echo "ERROR: Expected ${expected_sql} .sql files (${prev_num} up + ${prev_num} down) but found ${actual_sql}." + errors=$((errors + 1)) +fi + +if [ "$errors" -gt 0 ]; then + echo "FAILED: ${errors} migration verification error(s)." + exit 1 +fi + +echo "Migrations OK: ${prev_num} migrations verified." diff --git a/pkg/db/migrations/MANIFEST b/pkg/db/migrations/MANIFEST new file mode 100644 index 000000000..4f65f8444 --- /dev/null +++ b/pkg/db/migrations/MANIFEST @@ -0,0 +1,13 @@ +# Migration manifest. Each line is a migration prefix (NNNNNN_name) that +# must have matching .up.sql and .down.sql files in this directory. +# Versions must be sequential with no gaps or duplicates. +# +# When adding a new migration, append it here. This file forces a git +# merge conflict if two PRs independently add the same version number. +# +# Run `make verify-migrations` to validate. + +000001_create_partitioned_tables +000002_create_test_daily_summaries +000003_create_variant_combinations +000004_drop_daily_summary_variant_combination