Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 104 additions & 39 deletions pkg/variantregistry/ocp.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,31 @@ ORDER BY j.prowjob_job_name;
dur := time.Since(start)
log.WithField("count", count.Load()).Infof("processed primary job list in %s", dur)

var errs []string
for jobName, variants := range variantsByJob {
if err := validateSpotCheckVariants(jobName, variants); err != nil {
errs = append(errs, err.Error())
}
}
if len(errs) > 0 {
sort.Strings(errs)
return nil, errors.New("variant registry validation failed:\n" + strings.Join(errs, "\n"))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: You can build a bit friendly error by wrapping the underlying errors instead of appending the errors' strings into a new error's string.

return nil, fmt.Errorf("variant registry validation failed:\n %w", errors.Join(errs...))

}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

return variantsByJob, nil
}

// validateSpotCheckVariants returns an error if a job has JobTier=spotcheck without both
// SpotCheckComponent and SpotCheckCapability defined.
func validateSpotCheckVariants(jobName string, variants map[string]string) error {
if strings.HasPrefix(variants[VariantJobTier], "spotcheck-") {
if variants[VariantSpotCheckComponent] == "" || variants[VariantSpotCheckCapability] == "" {
return fmt.Errorf("job %q has JobTier=%s but is missing SpotCheckComponent or SpotCheckCapability", jobName, variants[VariantJobTier])
}
}
return nil
}

// fileVariantsToIgnore are values in the cluster-data.json that vary by run, and are not consistent for the job itself.
// These are unsuited for variants.
var fileVariantsToIgnore = map[string]bool{
Expand Down Expand Up @@ -427,34 +449,36 @@ var (
)

const (
VariantAggregation = "Aggregation" // aggregated or none
VariantArch = "Architecture"
VariantFeatureSet = "FeatureSet" // techpreview / standard
VariantInstaller = "Installer" // ipi / upi / assisted
VariantNetwork = "Network"
VariantNetworkAccess = "NetworkAccess" // disconnected / proxy / standard
VariantNetworkStack = "NetworkStack" // ipv4 / ipv6 / dual
VariantOwner = "Owner" // eng / osd
VariantPlatform = "Platform"
VariantScheduler = "Scheduler" // realtime / standard
VariantSecurityMode = "SecurityMode" // fips / default
VariantSuite = "Suite" // parallel / serial
VariantProcedure = "Procedure" // for jobs that do a specific procedure on the cluster (etcd scaling, cpu partitioning, etc.), and then optionally run conformance
VariantJobTier = "JobTier" // specifies rare, blocking, informing, standard jobs
VariantTopology = "Topology" // ha / single / compact / external
VariantUpgrade = "Upgrade"
VariantContainerRuntime = "ContainerRuntime" // runc / crun
VariantCGroupMode = "CGroupMode" // v2 / v1
VariantRelease = "Release"
VariantReleaseMinor = "ReleaseMinor"
VariantReleaseMajor = "ReleaseMajor"
VariantFromRelease = "FromRelease"
VariantFromReleaseMinor = "FromReleaseMinor"
VariantFromReleaseMajor = "FromReleaseMajor"
VariantLayeredProduct = "LayeredProduct"
VariantOS = "OS"
VariantDefaultValue = "default"
VariantNoValue = "none"
VariantAggregation = "Aggregation" // aggregated or none
VariantArch = "Architecture"
VariantFeatureSet = "FeatureSet" // techpreview / standard
VariantInstaller = "Installer" // ipi / upi / assisted
VariantNetwork = "Network"
VariantNetworkAccess = "NetworkAccess" // disconnected / proxy / standard
VariantNetworkStack = "NetworkStack" // ipv4 / ipv6 / dual
VariantOwner = "Owner" // eng / osd
VariantPlatform = "Platform"
VariantScheduler = "Scheduler" // realtime / standard
VariantSecurityMode = "SecurityMode" // fips / default
VariantSuite = "Suite" // parallel / serial
VariantProcedure = "Procedure" // for jobs that do a specific procedure on the cluster (etcd scaling, cpu partitioning, etc.), and then optionally run conformance
VariantJobTier = "JobTier" // specifies rare, blocking, informing, standard jobs
VariantTopology = "Topology" // ha / single / compact / external
VariantUpgrade = "Upgrade"
VariantContainerRuntime = "ContainerRuntime" // runc / crun
VariantCGroupMode = "CGroupMode" // v2 / v1
VariantRelease = "Release"
VariantReleaseMinor = "ReleaseMinor"
VariantReleaseMajor = "ReleaseMajor"
VariantFromRelease = "FromRelease"
VariantFromReleaseMinor = "FromReleaseMinor"
VariantFromReleaseMajor = "FromReleaseMajor"
VariantLayeredProduct = "LayeredProduct"
VariantOS = "OS"
VariantSpotCheckComponent = "SpotCheckComponent" // component readiness component for spot-check jobs
VariantSpotCheckCapability = "SpotCheckCapability" // component readiness capability for spot-check jobs
VariantDefaultValue = "default"
VariantNoValue = "none"
)

func (v *OCPVariantLoader) IdentifyVariants(jLog logrus.FieldLogger, jobName string) map[string]string {
Expand All @@ -480,6 +504,7 @@ func (v *OCPVariantLoader) IdentifyVariants(jLog logrus.FieldLogger, jobName str
setContainerRuntime,
setProcedure,
setOS,
setSpotCheckClassification,
v.setJobTier, // Keep this near last, it relies on other variants like owner
} {
setter(jLog, variants, jobName)
Expand Down Expand Up @@ -726,29 +751,69 @@ func (v *OCPVariantLoader) setRelease(logger logrus.FieldLogger, variants map[st
}
}

// setSpotCheckClassification identifies jobs that should be evaluated as spot-check jobs
// in Component Readiness. These jobs run infrequently ("rare" tier historically) and
// must fully pass at least once in the sample window. (with retries if needed)
// They are intended for stable, non-core functionality that does not need in depth
// statistical regression monitoring.
//
// The SpotCheckComponent and SpotCheckCapability variants control where these synthetic
// results appear in the component readiness report.
//
// Be sure to use real Component names from OCPBUGS.
func setSpotCheckClassification(_ logrus.FieldLogger, variants map[string]string, jobName string) {
jobNameLower := strings.ToLower(jobName)

spotCheckPatterns := []struct {
substrings []string
component string
capability string
}{
{[]string{"-cpu-partitioning"}, "Node / Kubelet", "CPU Partitioning"},
{[]string{"-etcd-scaling"}, "Etcd", "Scaling"},
}

for _, p := range spotCheckPatterns {
allMatch := true
for _, sub := range p.substrings {
if !strings.Contains(jobNameLower, sub) {
allMatch = false
break
}
}
if allMatch {
variants[VariantSpotCheckComponent] = p.component
variants[VariantSpotCheckCapability] = p.capability
return
}
}
}

// setJobTier sets the jobTier for a job, with values like this:
//
// blocking: blocking job on payloads, covered by component readiness
// informing: informing job on payloads, covered by component readiness
// standard: should be visible in default views (component readiness, sippy), covered by component readiness
// rare: highly reliable jobs that run at a reduced frequency
// candidate: not covered by component readiness, but may be promoted in the future
// hidden: data should still be synced, but not shown by default
// excluded: data should not be synced, and excluded from all views
// blocking: blocking job on payloads, covered by component readiness
// informing: informing job on payloads, covered by component readiness
// standard: should be visible in default views (component readiness, sippy), covered by component readiness
// spotcheck: jobs evaluated by spot-check analysis (job pass/fail, not junit); views opt in via JobTier include
// candidate: not covered by component readiness, but may be promoted in the future
// hidden: data should still be synced, but not shown by default
// excluded: data should not be synced, and excluded from all views
//
// Note: blocking/informing/standard tiers may be downgraded to candidate by
// adjustJobTierBasedOnView if the job's variants don't match the release-main view.
func (v *OCPVariantLoader) setJobTier(_ logrus.FieldLogger, variants map[string]string, jobName string) {
// Jobs classified as spot-check get the spotcheck-30d tier automatically.
if _, ok := variants[VariantSpotCheckComponent]; ok {
variants[VariantJobTier] = "spotcheck-30d"
return
}

jobNameLower := strings.ToLower(jobName)

jobTierPatterns := []struct {
substrings []string
jobTier string
}{
// Rarely run
{[]string{"-cpu-partitioning"}, "rare"},
{[]string{"-etcd-scaling"}, "rare"},

// QE jobs allowlisted for Component Readiness
{[]string{"-automated-release"}, "standard"},

Expand Down
66 changes: 63 additions & 3 deletions pkg/variantregistry/ocp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2204,6 +2204,66 @@ func TestVariantSyncer(t *testing.T) {
VariantOS: "unknown",
},
},
{
job: "periodic-ci-openshift-release-main-nightly-4.18-e2e-aws-ovn-cpu-partitioning",
expected: map[string]string{
VariantRelease: "4.18",
VariantReleaseMajor: "4",
VariantReleaseMinor: "18",
VariantArch: "amd64",
VariantInstaller: "ipi",
VariantPlatform: "aws",
VariantNetwork: "ovn",
VariantNetworkStack: "ipv4",
VariantOwner: "eng",
VariantTopology: "ha",
VariantSuite: "unknown",
VariantUpgrade: VariantNoValue,
VariantProcedure: "cpu-partitioning",
VariantJobTier: "spotcheck-30d",
VariantAggregation: VariantNoValue,
VariantSecurityMode: VariantDefaultValue,
VariantFeatureSet: VariantDefaultValue,
VariantNetworkAccess: VariantDefaultValue,
VariantScheduler: VariantDefaultValue,
VariantContainerRuntime: "crun",
VariantCGroupMode: "v2",
VariantLayeredProduct: VariantNoValue,
VariantOS: "rhcos9",
VariantSpotCheckComponent: "Node / Kubelet",
VariantSpotCheckCapability: "CPU Partitioning",
},
},
{
job: "periodic-ci-openshift-release-main-nightly-4.18-e2e-gcp-ovn-etcd-scaling",
expected: map[string]string{
VariantRelease: "4.18",
VariantReleaseMajor: "4",
VariantReleaseMinor: "18",
VariantArch: "amd64",
VariantInstaller: "ipi",
VariantPlatform: "gcp",
VariantNetwork: "ovn",
VariantNetworkStack: "ipv4",
VariantOwner: "eng",
VariantTopology: "ha",
VariantSuite: "etcd-scaling",
VariantUpgrade: VariantNoValue,
VariantProcedure: "etcd-scaling",
VariantJobTier: "spotcheck-30d",
VariantAggregation: VariantNoValue,
VariantSecurityMode: VariantDefaultValue,
VariantFeatureSet: VariantDefaultValue,
VariantNetworkAccess: VariantDefaultValue,
VariantScheduler: VariantDefaultValue,
VariantContainerRuntime: "crun",
VariantCGroupMode: "v2",
VariantLayeredProduct: VariantNoValue,
VariantOS: "rhcos9",
VariantSpotCheckComponent: "Etcd",
VariantSpotCheckCapability: "Scaling",
},
},
}
for _, test := range tests {
t.Run(test.job, func(t *testing.T) {
Expand Down Expand Up @@ -2573,16 +2633,16 @@ func TestAdjustJobTierBasedOnView(t *testing.T) {
expectedTier: "excluded",
},
{
name: "rare job is not adjusted even with non-matching variants",
name: "spotcheck job is not adjusted even with non-matching variants",
variants: map[string]string{
VariantRelease: "4.22",
VariantJobTier: "rare",
VariantJobTier: "spotcheck-30d",
VariantArch: "s390x",
VariantPlatform: "rosa",
VariantNetwork: "sdn",
VariantOwner: "chaos",
},
expectedTier: "rare",
expectedTier: "spotcheck-30d",
},
{
name: "job with no release is not adjusted",
Expand Down
14 changes: 13 additions & 1 deletion pkg/variantregistry/snapshot.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package variantregistry

import (
"fmt"
"os"
"sort"
"strings"

"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -34,6 +36,7 @@ func NewVariantSnapshot(config *v1.SippyConfig, views []crview.View, syntheticRe
func (s *VariantSnapshot) Identify() (JobVariants, error) {
newVariants := map[string]map[string]string{}
variantSyncer := OCPVariantLoader{config: s.config, views: s.views, syntheticReleaseJobOverrides: s.syntheticReleaseJobOverrides}
var errs []string
for _, releaseCfg := range s.config.Releases {
for job := range releaseCfg.Jobs {
if isIgnoredJob(job) {
Expand All @@ -42,10 +45,19 @@ func (s *VariantSnapshot) Identify() (JobVariants, error) {
if _, done := newVariants[job]; done {
continue
}
newVariants[job] = variantSyncer.CalculateVariantsForJob(s.log, job, nil)
variants := variantSyncer.CalculateVariantsForJob(s.log, job, nil)
newVariants[job] = variants
if err := validateSpotCheckVariants(job, variants); err != nil {
errs = append(errs, err.Error())
}
}
}

if len(errs) > 0 {
sort.Strings(errs)
return nil, fmt.Errorf("variant registry validation failed:\n%s", strings.Join(errs, "\n"))
}

return newVariants, nil
}

Expand Down
Loading