From 8147c32cb8788a62744563c463dad95639de2221 Mon Sep 17 00:00:00 2001 From: Max Cao Date: Thu, 16 Apr 2026 10:43:07 -0700 Subject: [PATCH] feat(metrics): include Karpenter node vCPUs in hypershift_cluster_vcpus billing metric Add VCPUs field to AutoNodeStatus, computed by karpenter-operator from NodeClaim capacity and cross-referenced against live Node objects. The metrics collector seeds per-cluster vCPU count from this field before accumulating native NodePool vCPUs on top. - NodeClaim is the authority for Karpenter ownership (no label dependency) - e2e tests validate vCPU status + metric at 0, scale-up, and consolidation Made-with: Cursor Signed-off-by: Max Cao --- api/hypershift/v1beta1/hostedcluster_types.go | 10 ++ .../v1beta1/zz_generated.deepcopy.go | 5 + .../AAA_ungated.yaml | 11 ++ .../ClusterUpdateAcceptRisks.yaml | 11 ++ .../ClusterVersionOperatorConfiguration.yaml | 11 ++ .../ExternalOIDC.yaml | 11 ++ ...ernalOIDCWithUIDAndExtraClaimMappings.yaml | 11 ++ .../ExternalOIDCWithUpstreamParity.yaml | 11 ++ .../GCPPlatform.yaml | 11 ++ .../HCPEtcdBackup.yaml | 11 ++ ...perShiftOnlyDynamicResourceAllocation.yaml | 11 ++ .../ImageStreamImportMode.yaml | 11 ++ .../KMSEncryptionProvider.yaml | 11 ++ .../OpenStack.yaml | 11 ++ .../TLSAdherence.yaml | 11 ++ .../AAA_ungated.yaml | 11 ++ .../ClusterUpdateAcceptRisks.yaml | 11 ++ .../ClusterVersionOperatorConfiguration.yaml | 11 ++ .../ExternalOIDC.yaml | 11 ++ ...ernalOIDCWithUIDAndExtraClaimMappings.yaml | 11 ++ .../ExternalOIDCWithUpstreamParity.yaml | 11 ++ .../GCPPlatform.yaml | 11 ++ .../HCPEtcdBackup.yaml | 11 ++ ...perShiftOnlyDynamicResourceAllocation.yaml | 11 ++ .../ImageStreamImportMode.yaml | 11 ++ .../KMSEncryptionProvider.yaml | 11 ++ .../OpenStack.yaml | 11 ++ .../TLSAdherence.yaml | 11 ++ .../hypershift/v1beta1/autonodestatus.go | 9 + ...usters-Hypershift-CustomNoUpgrade.crd.yaml | 11 ++ ...hostedclusters-Hypershift-Default.crd.yaml | 11 ++ ...s-Hypershift-TechPreviewNoUpgrade.crd.yaml | 11 ++ ...planes-Hypershift-CustomNoUpgrade.crd.yaml | 11 ++ ...dcontrolplanes-Hypershift-Default.crd.yaml | 11 ++ ...s-Hypershift-TechPreviewNoUpgrade.crd.yaml | 11 ++ docs/content/reference/aggregated-docs.md | 16 ++ docs/content/reference/api.md | 16 ++ .../controllers/nodepool/metrics/metrics.go | 8 +- .../nodepool/metrics/metrics_test.go | 169 ++++++++++++++++++ .../karpenter/karpenter_controller.go | 48 ++++- .../karpenter/karpenter_controller_test.go | 90 ++++++++++ test/e2e/karpenter_test.go | 111 +++++++++++- .../hypershift/v1beta1/hostedcluster_types.go | 10 ++ .../v1beta1/zz_generated.deepcopy.go | 5 + 44 files changed, 843 insertions(+), 6 deletions(-) diff --git a/api/hypershift/v1beta1/hostedcluster_types.go b/api/hypershift/v1beta1/hostedcluster_types.go index 8d4db6c2bb34..c2d634988708 100644 --- a/api/hypershift/v1beta1/hostedcluster_types.go +++ b/api/hypershift/v1beta1/hostedcluster_types.go @@ -2205,6 +2205,16 @@ type AutoNodeStatus struct { // +kubebuilder:validation:Minimum=0 // +optional NodeClaimCount *int32 `json:"nodeClaimCount,omitempty"` + + // vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + // that have registered and reported capacity. This is the sum of CPU capacity + // from each NodeClaim whose corresponding node exists (status.nodeName is set). + // This value is 0 when no Karpenter nodes are provisioned. + // Used by the metrics collector for billing aggregation. + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1000000 + // +optional + VCPUs *int32 `json:"vcpus,omitempty"` } // PlatformStatus contains platform-specific status diff --git a/api/hypershift/v1beta1/zz_generated.deepcopy.go b/api/hypershift/v1beta1/zz_generated.deepcopy.go index 0f69bb3bcdeb..d429305a30b3 100644 --- a/api/hypershift/v1beta1/zz_generated.deepcopy.go +++ b/api/hypershift/v1beta1/zz_generated.deepcopy.go @@ -584,6 +584,11 @@ func (in *AutoNodeStatus) DeepCopyInto(out *AutoNodeStatus) { *out = new(int32) **out = **in } + if in.VCPUs != nil { + in, out := &in.VCPUs, &out.VCPUs + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoNodeStatus. diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/AAA_ungated.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/AAA_ungated.yaml index 1abed4220a44..7838ef9d8f66 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/AAA_ungated.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/AAA_ungated.yaml @@ -6523,6 +6523,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ClusterUpdateAcceptRisks.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ClusterUpdateAcceptRisks.yaml index 7e2dae3eaec1..29186fae1769 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ClusterUpdateAcceptRisks.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ClusterUpdateAcceptRisks.yaml @@ -6506,6 +6506,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ClusterVersionOperatorConfiguration.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ClusterVersionOperatorConfiguration.yaml index 015ff6a7d6dd..75a647659faa 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ClusterVersionOperatorConfiguration.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ClusterVersionOperatorConfiguration.yaml @@ -6526,6 +6526,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDC.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDC.yaml index 04ba98af406d..9de23f8d67e4 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDC.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDC.yaml @@ -6838,6 +6838,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDCWithUIDAndExtraClaimMappings.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDCWithUIDAndExtraClaimMappings.yaml index 12466df76a99..c62d6ad473fe 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDCWithUIDAndExtraClaimMappings.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDCWithUIDAndExtraClaimMappings.yaml @@ -6978,6 +6978,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDCWithUpstreamParity.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDCWithUpstreamParity.yaml index a92de6768f77..3e972abdcf76 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDCWithUpstreamParity.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ExternalOIDCWithUpstreamParity.yaml @@ -6969,6 +6969,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/GCPPlatform.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/GCPPlatform.yaml index e1e309035f69..dae5c617ab62 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/GCPPlatform.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/GCPPlatform.yaml @@ -6952,6 +6952,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/HCPEtcdBackup.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/HCPEtcdBackup.yaml index a36992c0a928..d0327faa3dec 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/HCPEtcdBackup.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/HCPEtcdBackup.yaml @@ -6571,6 +6571,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/HyperShiftOnlyDynamicResourceAllocation.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/HyperShiftOnlyDynamicResourceAllocation.yaml index 1d9546e3d067..14c27c8efc19 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/HyperShiftOnlyDynamicResourceAllocation.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/HyperShiftOnlyDynamicResourceAllocation.yaml @@ -6528,6 +6528,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ImageStreamImportMode.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ImageStreamImportMode.yaml index 2bcf37c85023..5dccbaf9408d 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ImageStreamImportMode.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/ImageStreamImportMode.yaml @@ -6524,6 +6524,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/KMSEncryptionProvider.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/KMSEncryptionProvider.yaml index e2bb25a53715..27d0b7170ef2 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/KMSEncryptionProvider.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/KMSEncryptionProvider.yaml @@ -6582,6 +6582,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/OpenStack.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/OpenStack.yaml index 362959a2001d..99173632616b 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/OpenStack.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/OpenStack.yaml @@ -7057,6 +7057,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/TLSAdherence.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/TLSAdherence.yaml index 2f0e7c60afdd..cb7aaf7b0b2d 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/TLSAdherence.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedclusters.hypershift.openshift.io/TLSAdherence.yaml @@ -6546,6 +6546,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/AAA_ungated.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/AAA_ungated.yaml index f7bc1753b857..873dd1d3b534 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/AAA_ungated.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/AAA_ungated.yaml @@ -6328,6 +6328,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ClusterUpdateAcceptRisks.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ClusterUpdateAcceptRisks.yaml index e6ddd5c97dd1..81e5c722d2d1 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ClusterUpdateAcceptRisks.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ClusterUpdateAcceptRisks.yaml @@ -6311,6 +6311,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ClusterVersionOperatorConfiguration.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ClusterVersionOperatorConfiguration.yaml index 125ee0e3a972..33eacd58b502 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ClusterVersionOperatorConfiguration.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ClusterVersionOperatorConfiguration.yaml @@ -6331,6 +6331,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDC.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDC.yaml index 6f9dc3e1a883..44ad9d8ed2be 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDC.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDC.yaml @@ -6643,6 +6643,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDCWithUIDAndExtraClaimMappings.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDCWithUIDAndExtraClaimMappings.yaml index 0f42b7340d8e..968290ae2fa2 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDCWithUIDAndExtraClaimMappings.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDCWithUIDAndExtraClaimMappings.yaml @@ -6783,6 +6783,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDCWithUpstreamParity.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDCWithUpstreamParity.yaml index 1b11ade5f959..4b868e1a3ef2 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDCWithUpstreamParity.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ExternalOIDCWithUpstreamParity.yaml @@ -6774,6 +6774,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/GCPPlatform.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/GCPPlatform.yaml index 953a14ed8a31..53291008cc18 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/GCPPlatform.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/GCPPlatform.yaml @@ -6757,6 +6757,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/HCPEtcdBackup.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/HCPEtcdBackup.yaml index b71f564f0ca9..e4450913ffac 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/HCPEtcdBackup.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/HCPEtcdBackup.yaml @@ -6376,6 +6376,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/HyperShiftOnlyDynamicResourceAllocation.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/HyperShiftOnlyDynamicResourceAllocation.yaml index ee900daaec0e..6fe328d94dd0 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/HyperShiftOnlyDynamicResourceAllocation.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/HyperShiftOnlyDynamicResourceAllocation.yaml @@ -6333,6 +6333,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ImageStreamImportMode.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ImageStreamImportMode.yaml index f2a648db71f2..8760da617218 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ImageStreamImportMode.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/ImageStreamImportMode.yaml @@ -6329,6 +6329,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/KMSEncryptionProvider.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/KMSEncryptionProvider.yaml index 0f7161e22eb9..e5d2caea6507 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/KMSEncryptionProvider.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/KMSEncryptionProvider.yaml @@ -6387,6 +6387,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/OpenStack.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/OpenStack.yaml index ceb36a1478e4..22f92db2537f 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/OpenStack.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/OpenStack.yaml @@ -6862,6 +6862,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/TLSAdherence.yaml b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/TLSAdherence.yaml index 808ec45afbb7..30962b187798 100644 --- a/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/TLSAdherence.yaml +++ b/api/hypershift/v1beta1/zz_generated.featuregated-crd-manifests/hostedcontrolplanes.hypershift.openshift.io/TLSAdherence.yaml @@ -6351,6 +6351,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/client/applyconfiguration/hypershift/v1beta1/autonodestatus.go b/client/applyconfiguration/hypershift/v1beta1/autonodestatus.go index 16f8bd7878b4..84e43dad5547 100644 --- a/client/applyconfiguration/hypershift/v1beta1/autonodestatus.go +++ b/client/applyconfiguration/hypershift/v1beta1/autonodestatus.go @@ -22,6 +22,7 @@ package v1beta1 type AutoNodeStatusApplyConfiguration struct { NodeCount *int32 `json:"nodeCount,omitempty"` NodeClaimCount *int32 `json:"nodeClaimCount,omitempty"` + VCPUs *int32 `json:"vcpus,omitempty"` } // AutoNodeStatusApplyConfiguration constructs a declarative configuration of the AutoNodeStatus type for use with @@ -45,3 +46,11 @@ func (b *AutoNodeStatusApplyConfiguration) WithNodeClaimCount(value int32) *Auto b.NodeClaimCount = &value return b } + +// WithVCPUs sets the VCPUs field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the VCPUs field is set to the value of the last call. +func (b *AutoNodeStatusApplyConfiguration) WithVCPUs(value int32) *AutoNodeStatusApplyConfiguration { + b.VCPUs = &value + return b +} diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-CustomNoUpgrade.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-CustomNoUpgrade.crd.yaml index 66b68c64dd75..6de80f8f0a78 100644 --- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-CustomNoUpgrade.crd.yaml +++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-CustomNoUpgrade.crd.yaml @@ -8344,6 +8344,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-Default.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-Default.crd.yaml index 932383bd7374..8226337d326d 100644 --- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-Default.crd.yaml +++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-Default.crd.yaml @@ -7015,6 +7015,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-TechPreviewNoUpgrade.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-TechPreviewNoUpgrade.crd.yaml index bbff8d81bedc..919e3b55bbfd 100644 --- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-TechPreviewNoUpgrade.crd.yaml +++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedclusters-Hypershift-TechPreviewNoUpgrade.crd.yaml @@ -8215,6 +8215,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-CustomNoUpgrade.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-CustomNoUpgrade.crd.yaml index 92fafedc7ec8..b3be64d7b3b7 100644 --- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-CustomNoUpgrade.crd.yaml +++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-CustomNoUpgrade.crd.yaml @@ -8149,6 +8149,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-Default.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-Default.crd.yaml index d0000d26f183..6f530195fc0d 100644 --- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-Default.crd.yaml +++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-Default.crd.yaml @@ -6820,6 +6820,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-TechPreviewNoUpgrade.crd.yaml b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-TechPreviewNoUpgrade.crd.yaml index 6a697cb8f723..26ee13c96054 100644 --- a/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-TechPreviewNoUpgrade.crd.yaml +++ b/cmd/install/assets/crds/hypershift-operator/zz_generated.crd-manifests/hostedcontrolplanes-Hypershift-TechPreviewNoUpgrade.crd.yaml @@ -8020,6 +8020,17 @@ spec: format: int32 minimum: 0 type: integer + vcpus: + description: |- + vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + that have registered and reported capacity. This is the sum of CPU capacity + from each NodeClaim whose corresponding node exists (status.nodeName is set). + This value is 0 when no Karpenter nodes are provisioned. + Used by the metrics collector for billing aggregation. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer type: object conditions: description: |- diff --git a/docs/content/reference/aggregated-docs.md b/docs/content/reference/aggregated-docs.md index 428abe478d12..9ef5da727200 100644 --- a/docs/content/reference/aggregated-docs.md +++ b/docs/content/reference/aggregated-docs.md @@ -34919,6 +34919,22 @@ int32 This represents what Karpenter intends to provision, whether or not the node object exists yet.

+ + +vcpus
+ +int32 + + + +(Optional) +

vcpus is the total number of virtual CPUs across all Karpenter-managed nodes +that have registered and reported capacity. This is the sum of CPU capacity +from each NodeClaim whose corresponding node exists (status.nodeName is set). +This value is 0 when no Karpenter nodes are provisioned. +Used by the metrics collector for billing aggregation.

+ + ###AvailabilityPolicy { #hypershift.openshift.io/v1beta1.AvailabilityPolicy } diff --git a/docs/content/reference/api.md b/docs/content/reference/api.md index e89303956e53..a9ce510fc013 100644 --- a/docs/content/reference/api.md +++ b/docs/content/reference/api.md @@ -3091,6 +3091,22 @@ int32 This represents what Karpenter intends to provision, whether or not the node object exists yet.

+ + +vcpus
+ +int32 + + + +(Optional) +

vcpus is the total number of virtual CPUs across all Karpenter-managed nodes +that have registered and reported capacity. This is the sum of CPU capacity +from each NodeClaim whose corresponding node exists (status.nodeName is set). +This value is 0 when no Karpenter nodes are provisioned. +Used by the metrics collector for billing aggregation.

+ + ###AvailabilityPolicy { #hypershift.openshift.io/v1beta1.AvailabilityPolicy } diff --git a/hypershift-operator/controllers/nodepool/metrics/metrics.go b/hypershift-operator/controllers/nodepool/metrics/metrics.go index b3aaf2bb393f..515e96e5643c 100644 --- a/hypershift-operator/controllers/nodepool/metrics/metrics.go +++ b/hypershift-operator/controllers/nodepool/metrics/metrics.go @@ -355,12 +355,18 @@ func (c *nodePoolsMetricsCollector) Collect(ch chan<- prometheus.Metric) { for k := range hclusters.Items { hcluster := &hclusters.Items[k] - hclusterPathToData[hcluster.Namespace+"/"+hcluster.Name] = &hclusterData{ + data := &hclusterData{ id: hcluster.Spec.ClusterID, namespace: hcluster.Namespace, name: hcluster.Name, platform: hcluster.Spec.Platform.Type, } + // Seed with Karpenter-managed vCPUs from AutoNode status. + // Native NodePool vCPUs accumulate on top in the NodePool loop below. + if hcluster.Status.AutoNode.VCPUs != nil { + data.vCpusCount = *hcluster.Status.AutoNode.VCPUs + } + hclusterPathToData[hcluster.Namespace+"/"+hcluster.Name] = data } } diff --git a/hypershift-operator/controllers/nodepool/metrics/metrics_test.go b/hypershift-operator/controllers/nodepool/metrics/metrics_test.go index ceff96d4c369..92356900ee58 100644 --- a/hypershift-operator/controllers/nodepool/metrics/metrics_test.go +++ b/hypershift-operator/controllers/nodepool/metrics/metrics_test.go @@ -550,3 +550,172 @@ func TestErrorCache(t *testing.T) { g.Expect(ec2CallCount).To(Equal(2), "EC2 API should be retried when its error was transient") }) } + +func TestReportVCpusWithKarpenterAutoNode(t *testing.T) { + testCases := []struct { + name string + autoVCPUs *int32 + npsParams []nodePoolParams + + MockedEC2DescribeInstanceTypesFunc func(ctx context.Context, input *ec2v2.DescribeInstanceTypesInput, optFns ...func(*ec2v2.Options)) (*ec2v2.DescribeInstanceTypesOutput, error) + + expectedVCpusCount float64 + expectedVCpusCountErrorReason string + }{ + { + name: "When cluster has only Karpenter nodes, it should report AutoNode vCPUs", + autoVCPUs: ptr.To[int32](12), + npsParams: []nodePoolParams{}, + expectedVCpusCount: 12, + }, + { + name: "When cluster has both Karpenter and native nodes, it should sum both", + autoVCPUs: ptr.To[int32](20), + npsParams: []nodePoolParams{ + {availableNodesCount: 2, ec2InstanceType: "m5.xlarge"}, + }, + MockedEC2DescribeInstanceTypesFunc: func(ctx context.Context, input *ec2v2.DescribeInstanceTypesInput, optFns ...func(*ec2v2.Options)) (*ec2v2.DescribeInstanceTypesOutput, error) { + return initDescribeInstanceTypesOutput([]ec2typesv2.InstanceTypeInfo{ + initInstanceTypeInfo("m5.xlarge", 4)}), nil + }, + expectedVCpusCount: 28, // 20 Karpenter + 2*4 native + }, + { + name: "When AutoNode.VCPUs is nil, it should report only native vCPUs", + autoVCPUs: nil, + npsParams: []nodePoolParams{ + {availableNodesCount: 2, ec2InstanceType: "m5.xlarge"}, + }, + MockedEC2DescribeInstanceTypesFunc: func(ctx context.Context, input *ec2v2.DescribeInstanceTypesInput, optFns ...func(*ec2v2.Options)) (*ec2v2.DescribeInstanceTypesOutput, error) { + return initDescribeInstanceTypesOutput([]ec2typesv2.InstanceTypeInfo{ + initInstanceTypeInfo("m5.xlarge", 4)}), nil + }, + expectedVCpusCount: 8, + }, + { + name: "When native lookup fails with Karpenter vCPUs, it should report -1", + autoVCPUs: ptr.To[int32](20), + npsParams: []nodePoolParams{ + {availableNodesCount: 2, ec2InstanceType: "unknown-type"}, + }, + MockedEC2DescribeInstanceTypesFunc: func(ctx context.Context, input *ec2v2.DescribeInstanceTypesInput, optFns ...func(*ec2v2.Options)) (*ec2v2.DescribeInstanceTypesOutput, error) { + return &ec2v2.DescribeInstanceTypesOutput{}, nil + }, + expectedVCpusCount: -1, + expectedVCpusCountErrorReason: errRosaCPUsInstanceTypesConfigNotFound.Error(), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + hcluster := &hyperv1.HostedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hc", + Namespace: "any", + }, + Spec: hyperv1.HostedClusterSpec{ + ClusterID: "id", + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AWSPlatform, + }, + }, + Status: hyperv1.HostedClusterStatus{ + AutoNode: hyperv1.AutoNodeStatus{ + VCPUs: tc.autoVCPUs, + }, + }, + } + + clientBuilder := fake.NewClientBuilder().WithScheme(api.Scheme).WithObjects(hcluster) + + ec2MockedClient := &Ec2ClientMock{} + if tc.MockedEC2DescribeInstanceTypesFunc != nil { + ec2MockedClient.MockedDescribeInstanceTypesFunc = tc.MockedEC2DescribeInstanceTypesFunc + } + + for k, npParam := range tc.npsParams { + nodePool := &hyperv1.NodePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: strconv.Itoa(k), + Namespace: "any", + }, + Spec: hyperv1.NodePoolSpec{ + ClusterName: "hc", + Platform: hyperv1.NodePoolPlatform{ + Type: hyperv1.AWSPlatform, + AWS: &hyperv1.AWSNodePoolPlatform{ + InstanceType: npParam.ec2InstanceType, + }, + }, + }, + Status: hyperv1.NodePoolStatus{ + Replicas: npParam.availableNodesCount, + }, + } + clientBuilder = clientBuilder.WithObjects(nodePool) + } + + reg := prometheus.NewPedanticRegistry() + reg.MustRegister(createNodePoolsMetricsCollector(clientBuilder.Build(), ec2MockedClient, clock.RealClock{})) + + allMetricsValues, err := reg.Gather() + if err != nil { + t.Fatalf("gathering metrics failed: %v", err) + } + + var vCpusCountMetricValue *dto.MetricFamily + var vCpusComputationErrorMetricValue *dto.MetricFamily + var expectedVCpusComputationErrorMetricValue *dto.MetricFamily + + for _, metricValue := range allMetricsValues { + if metricValue != nil && metricValue.Name != nil { + switch *metricValue.Name { + case VCpusCountByHClusterMetricName: + vCpusCountMetricValue = metricValue + case VCpusComputationErrorByHClusterMetricName: + vCpusComputationErrorMetricValue = metricValue + } + } + } + + expectedBaseLabels := []*dto.LabelPair{ + {Name: ptr.To("_id"), Value: ptr.To("id")}, + {Name: ptr.To("name"), Value: ptr.To("hc")}, + {Name: ptr.To("namespace"), Value: ptr.To("any")}, + {Name: ptr.To("platform"), Value: ptr.To(string(hyperv1.AWSPlatform))}, + } + + expectedVCpusCountMetricValue := &dto.MetricFamily{ + Name: ptr.To(VCpusCountByHClusterMetricName), + Help: ptr.To(VCpusCountByHClusterMetricHelp), + Type: func() *dto.MetricType { v := dto.MetricType(1); return &v }(), + Metric: []*dto.Metric{{ + Label: expectedBaseLabels, + Gauge: &dto.Gauge{Value: ptr.To(tc.expectedVCpusCount)}, + }}, + } + + if tc.expectedVCpusCountErrorReason != "" { + expectedVCpusComputationErrorMetricValue = &dto.MetricFamily{ + Name: ptr.To(VCpusComputationErrorByHClusterMetricName), + Help: ptr.To(VCpusComputationErrorByHClusterMetricHelp), + Type: func() *dto.MetricType { v := dto.MetricType(1); return &v }(), + Metric: []*dto.Metric{{ + Label: append(expectedBaseLabels, &dto.LabelPair{ + Name: ptr.To("reason"), Value: ptr.To(tc.expectedVCpusCountErrorReason), + }), + Gauge: &dto.Gauge{Value: ptr.To[float64](1.0)}, + }}, + } + } + + if diff := cmp.Diff(vCpusCountMetricValue, expectedVCpusCountMetricValue, ignoreUnexportedDto); diff != "" { + t.Errorf("vCpus count differs from expected: %s", diff) + } + + if diff := cmp.Diff(vCpusComputationErrorMetricValue, expectedVCpusComputationErrorMetricValue, ignoreUnexportedDto); diff != "" { + t.Errorf("vCpus error metric differs from expected: %s", diff) + } + }) + } +} diff --git a/karpenter-operator/controllers/karpenter/karpenter_controller.go b/karpenter-operator/controllers/karpenter/karpenter_controller.go index 32c34a61d1ac..3fa06270f4d3 100644 --- a/karpenter-operator/controllers/karpenter/karpenter_controller.go +++ b/karpenter-operator/controllers/karpenter/karpenter_controller.go @@ -146,12 +146,34 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, man GenericFunc: func(e event.GenericEvent) bool { return false }, } + // NodeClaim predicate: fire on create/delete (count changes) and also when + // CPU capacity changes (for vCPU billing). Capacity is populated after the + // node registers, so we need updates for that transition. + nodeClaimPredicate := predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { return true }, + DeleteFunc: func(e event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldNC, ok1 := e.ObjectOld.(*karpenterv1.NodeClaim) + newNC, ok2 := e.ObjectNew.(*karpenterv1.NodeClaim) + if !ok1 || !ok2 { + return false + } + oldCPU := oldNC.Status.Capacity[corev1.ResourceCPU] + newCPU := newNC.Status.Capacity[corev1.ResourceCPU] + if !oldCPU.Equal(newCPU) { + return true + } + return oldNC.Status.NodeName != newNC.Status.NodeName + }, + GenericFunc: func(e event.GenericEvent) bool { return false }, + } + // Watch NodeClaims guest side to trigger reconcile when NodeClaims change. if err := c.Watch(source.Kind[client.Object](mgr.GetCache(), &karpenterv1.NodeClaim{}, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, _ client.Object) []ctrl.Request { return []ctrl.Request{{NamespacedName: client.ObjectKey{Namespace: r.Namespace}}} }), - countChangePredicate, + nodeClaimPredicate, )); err != nil { return fmt.Errorf("failed to watch NodeClaims: %w", err) } @@ -332,8 +354,10 @@ func (r *Reconciler) reconcileAutoNodeStatus(ctx context.Context, hcp *hyperv1.H return fmt.Errorf("failed to list nodes: %w", err) } + liveNodes := make(map[string]struct{}, len(nodes.Items)) var karpenterNodeCount int32 for i := range nodes.Items { + liveNodes[nodes.Items[i].Name] = struct{}{} if _, hasLabel := nodes.Items[i].Labels[karpenterv1.NodePoolLabelKey]; hasLabel { karpenterNodeCount++ } @@ -346,11 +370,14 @@ func (r *Reconciler) reconcileAutoNodeStatus(ctx context.Context, hcp *hyperv1.H return fmt.Errorf("failed to list NodeClaims: %w", err) } + vcpus := sumNodeClaimVCPUs(nodeClaims.Items, liveNodes) + statusCfg := hypershiftv1beta1applyconfigurations.HostedControlPlaneStatus(). WithAutoNode( hypershiftv1beta1applyconfigurations.AutoNodeStatus(). WithNodeCount(karpenterNodeCount). - WithNodeClaimCount(int32(len(nodeClaims.Items))), + WithNodeClaimCount(int32(len(nodeClaims.Items))). + WithVCPUs(vcpus), ) cfg := hypershiftv1beta1applyconfigurations.HostedControlPlane(hcp.Name, hcp.Namespace) cfg.Status = statusCfg @@ -370,6 +397,23 @@ func (r *Reconciler) reconcileAutoNodeStatus(ctx context.Context, hcp *hyperv1.H return nil } +// sumNodeClaimVCPUs returns the total vCPU count across NodeClaims whose +// backing Node still exists in the cluster and has reported CPU capacity. +// The NodeClaim is the authoritative record of Karpenter ownership. +func sumNodeClaimVCPUs(nodeClaims []karpenterv1.NodeClaim, liveNodes map[string]struct{}) int32 { + var total int64 + for i := range nodeClaims { + nc := &nodeClaims[i] + if _, ok := liveNodes[nc.Status.NodeName]; !ok { + continue + } + if cpu, ok := nc.Status.Capacity[corev1.ResourceCPU]; ok { + total += cpu.Value() + } + } + return int32(total) +} + // reconcileCRDs reconcile the Karpenter CRDs, if onlyCreate is true it uses an only write non cached client. func (r *Reconciler) reconcileCRDs(ctx context.Context, onlyCreate bool) error { log := ctrl.LoggerFrom(ctx) diff --git a/karpenter-operator/controllers/karpenter/karpenter_controller_test.go b/karpenter-operator/controllers/karpenter/karpenter_controller_test.go index 18a8af70cb49..887ac7bd08ee 100644 --- a/karpenter-operator/controllers/karpenter/karpenter_controller_test.go +++ b/karpenter-operator/controllers/karpenter/karpenter_controller_test.go @@ -17,6 +17,7 @@ import ( corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" @@ -375,3 +376,92 @@ func TestReconcileCRDsConcurrentAccess(t *testing.T) { "global CRD %q had resourceVersion set by concurrent reconcileCRDs calls", crd.Name) } } + +func TestSumNodeClaimVCPUs(t *testing.T) { + tests := []struct { + name string + nodeClaims []karpenterv1.NodeClaim + liveNodes map[string]struct{} + expected int32 + }{ + { + name: "When there are no NodeClaims, it should return 0", + nodeClaims: nil, + liveNodes: nil, + expected: 0, + }, + { + name: "When NodeClaims have live nodes with capacity, it should sum their CPUs", + nodeClaims: []karpenterv1.NodeClaim{ + nodeClaimWithCapacity("nc-1", "node-1", "4"), + nodeClaimWithCapacity("nc-2", "node-2", "8"), + nodeClaimWithCapacity("nc-3", "node-3", "16"), + }, + liveNodes: map[string]struct{}{"node-1": {}, "node-2": {}, "node-3": {}}, + expected: 28, + }, + { + name: "When NodeClaims have no registered node, it should skip them", + nodeClaims: []karpenterv1.NodeClaim{ + nodeClaimWithCapacity("nc-1", "", "4"), + nodeClaimWithCapacity("nc-2", "", "8"), + }, + liveNodes: nil, + expected: 0, + }, + { + name: "When NodeClaims have empty capacity, it should skip them", + nodeClaims: []karpenterv1.NodeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: "nc-1"}, + Status: karpenterv1.NodeClaimStatus{NodeName: "node-1"}, + }, + }, + liveNodes: map[string]struct{}{"node-1": {}}, + expected: 0, + }, + { + name: "When there is a mix of registered and unregistered NodeClaims, it should only count registered ones", + nodeClaims: []karpenterv1.NodeClaim{ + nodeClaimWithCapacity("nc-1", "node-1", "4"), + nodeClaimWithCapacity("nc-2", "", "8"), + nodeClaimWithCapacity("nc-3", "node-3", "16"), + { + ObjectMeta: metav1.ObjectMeta{Name: "nc-4"}, + Status: karpenterv1.NodeClaimStatus{NodeName: "node-4"}, + }, + }, + liveNodes: map[string]struct{}{"node-1": {}, "node-3": {}, "node-4": {}}, + expected: 20, + }, + { + name: "When NodeClaim references a node that no longer exists, it should not count it", + nodeClaims: []karpenterv1.NodeClaim{ + nodeClaimWithCapacity("nc-1", "node-1", "4"), + nodeClaimWithCapacity("nc-2", "node-2", "8"), + }, + liveNodes: map[string]struct{}{"node-1": {}}, + expected: 4, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + g.Expect(sumNodeClaimVCPUs(tt.nodeClaims, tt.liveNodes)).To(Equal(tt.expected)) + }) + } +} + +func nodeClaimWithCapacity(name, nodeName, cpus string) karpenterv1.NodeClaim { + nc := karpenterv1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Status: karpenterv1.NodeClaimStatus{ + NodeName: nodeName, + Capacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(cpus), + }, + }, + } + return nc +} diff --git a/test/e2e/karpenter_test.go b/test/e2e/karpenter_test.go index 31e847c2e7f9..1d48f9326f5d 100644 --- a/test/e2e/karpenter_test.go +++ b/test/e2e/karpenter_test.go @@ -26,6 +26,7 @@ import ( karpentercpov2 "github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/v2/karpenter" karpenteroperatorcpov2 "github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/v2/karpenteroperator" "github.com/openshift/hypershift/hypershift-operator/controllers/manifests" + npmetrics "github.com/openshift/hypershift/hypershift-operator/controllers/nodepool/metrics" karpenterassets "github.com/openshift/hypershift/karpenter-operator/controllers/karpenter/assets" karpenterutil "github.com/openshift/hypershift/support/karpenter" "github.com/openshift/hypershift/support/releaseinfo" @@ -85,7 +86,7 @@ func TestKarpenter(t *testing.T) { // This test intentionally leaves dangling resources so cluster teardown must // force-terminate nodes despite a blocking PDB. It must run last. - t.Run("Karpenter consolidation and cluster deletion with blocking PDB", testConsolidationAndPDB(ctx, guestClient, hostedCluster)) + t.Run("Billing vCPUs, consolidation, and cluster deletion with blocking PDB", testBillingConsolidationAndPDB(ctx, mgtClient, guestClient, hostedCluster)) }).Execute(&clusterOpts, globalOpts.Platform, globalOpts.ArtifactDir, "karpenter", globalOpts.ServiceAccountSigningKey) } @@ -131,6 +132,9 @@ func testKarpenterPlumbing(ctx context.Context, mgtClient, guestClient crclient. }) g.Expect(err).NotTo(HaveOccurred(), "failed to validate Karpenter metrics") + t.Log("Checking AutoNode vCPUs status (no Karpenter nodes provisioned)") + waitForAutoNodeStatusVCPUs(t, ctx, mgtClient, hostedCluster, 0) + t.Log("Checking Karpenter version is logged") cfg, err := e2eutil.GetConfig() g.Expect(err).NotTo(HaveOccurred(), "failed to get client config") @@ -1233,10 +1237,17 @@ func testAutoNodeLifecycle(ctx context.Context, mgtClient crclient.Client, hoste } } -func testConsolidationAndPDB(ctx context.Context, guestClient crclient.Client, hostedCluster *hyperv1.HostedCluster) func(t *testing.T) { +func testBillingConsolidationAndPDB(ctx context.Context, mgtClient, guestClient crclient.Client, hostedCluster *hyperv1.HostedCluster) func(t *testing.T) { return func(t *testing.T) { g := NewWithT(t) + // Before any Karpenter nodes are provisioned, Karpenter vCPUs should be 0. + waitForAutoNodeStatusVCPUs(t, ctx, mgtClient, hostedCluster, 0) + + baseline, found := getVCPUsMetric(t, ctx, mgtClient, hostedCluster) + g.Expect(found).To(BeTrue(), "billing metric should exist before Karpenter nodes are provisioned") + t.Logf("Baseline billing metric vCPUs from native NodePools: %d", baseline) + karpenterNodePool := baseNodePool("on-demand", "default") workLoads := testWorkload("web-app", 2, map[string]string{ karpenterv1.NodePoolLabelKey: karpenterNodePool.Name, @@ -1251,8 +1262,13 @@ func testConsolidationAndPDB(ctx context.Context, guestClient crclient.Client, h t.Logf("Created workloads with 2 replicas") _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 2, nodeLabels) - t.Logf("Both nodes ready, scaling workload to 1 replica to verify deprovisioning and consolidation") + t.Logf("Both nodes ready, validating billing vCPUs") + + // t3.xlarge = 4 vCPUs; 2 nodes = 8 Karpenter vCPUs on top of baseline + waitForAutoNodeStatusVCPUs(t, ctx, mgtClient, hostedCluster, 8) + waitForBillingMetricVCPUs(t, ctx, mgtClient, hostedCluster, baseline+8) + t.Logf("Scaling workload to 1 replica to verify deprovisioning and consolidation") err := e2eutil.UpdateObject(t, ctx, guestClient, workLoads, func(obj *appsv1.Deployment) { obj.Spec.Replicas = ptr.To(int32(1)) }) @@ -1261,6 +1277,10 @@ func testConsolidationAndPDB(ctx context.Context, guestClient crclient.Client, h _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 1, nodeLabels) t.Logf("Karpenter consolidated the extra node") + // t3.xlarge = 4 vCPUs; 1 node = 4 Karpenter vCPUs on top of baseline + waitForAutoNodeStatusVCPUs(t, ctx, mgtClient, hostedCluster, 4) + waitForBillingMetricVCPUs(t, ctx, mgtClient, hostedCluster, baseline+4) + // Create a blocking PDB and leave everything dangling so cluster teardown // must force-terminate nodes despite a blocking PDB. pdb := &policyv1.PodDisruptionBudget{ @@ -1416,6 +1436,91 @@ func waitForNodeClaimDrifted(t *testing.T, ctx context.Context, client crclient. ) } +// waitForAutoNodeStatusVCPUs polls until HostedCluster.Status.AutoNode.VCPUs +// converges to the expected value. This checks only the status field (Karpenter-only vCPUs), +// not the billing metric. +func waitForAutoNodeStatusVCPUs(t *testing.T, ctx context.Context, mgtClient crclient.Client, hostedCluster *hyperv1.HostedCluster, expected int32) { + t.Helper() + + t.Logf("Validating AutoNode.VCPUs converges to %d", expected) + e2eutil.EventuallyObject(t, ctx, + fmt.Sprintf("HostedCluster %s/%s AutoNode.VCPUs=%d", hostedCluster.Namespace, hostedCluster.Name, expected), + func(ctx context.Context) (*hyperv1.HostedCluster, error) { + hc := &hyperv1.HostedCluster{} + err := mgtClient.Get(ctx, crclient.ObjectKeyFromObject(hostedCluster), hc) + return hc, err + }, + []e2eutil.Predicate[*hyperv1.HostedCluster]{ + func(hc *hyperv1.HostedCluster) (bool, string, error) { + if hc.Status.AutoNode.VCPUs == nil { + return false, "AutoNode.VCPUs is nil", nil + } + actual := *hc.Status.AutoNode.VCPUs + if actual != expected { + return false, fmt.Sprintf("AutoNode.VCPUs=%d, want %d", actual, expected), nil + } + return true, fmt.Sprintf("AutoNode.VCPUs=%d", actual), nil + }, + }, + e2eutil.WithTimeout(1*time.Minute), + ) +} + +// waitForBillingMetricVCPUs polls until the hypershift_cluster_vcpus metric +// converges to the expected total (native + Karpenter). +func waitForBillingMetricVCPUs(t *testing.T, ctx context.Context, mgtClient crclient.Client, hostedCluster *hyperv1.HostedCluster, expectedTotal int32) { + t.Helper() + g := NewWithT(t) + + t.Logf("Validating %s converges to %d", npmetrics.VCpusCountByHClusterMetricName, expectedTotal) + err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 1*time.Minute, true, func(ctx context.Context) (bool, error) { + actual, found := getVCPUsMetric(t, ctx, mgtClient, hostedCluster) + if !found { + t.Logf("metric %s not found for cluster %s/%s", npmetrics.VCpusCountByHClusterMetricName, hostedCluster.Namespace, hostedCluster.Name) + return false, nil + } + if actual == expectedTotal { + t.Logf("%s=%d for cluster %s/%s", npmetrics.VCpusCountByHClusterMetricName, actual, hostedCluster.Namespace, hostedCluster.Name) + return true, nil + } + t.Logf("%s=%d, want %d for cluster %s/%s", npmetrics.VCpusCountByHClusterMetricName, actual, expectedTotal, hostedCluster.Namespace, hostedCluster.Name) + return false, nil + }) + g.Expect(err).NotTo(HaveOccurred(), "failed to validate %s metric", npmetrics.VCpusCountByHClusterMetricName) +} + +// getVCPUsMetric reads the current hypershift_cluster_vcpus metric for the given cluster. +func getVCPUsMetric(t *testing.T, ctx context.Context, mgtClient crclient.Client, hostedCluster *hyperv1.HostedCluster) (int32, bool) { + t.Helper() + + mf, err := e2eutil.GetMetricsFromPod(ctx, mgtClient, "operator", "operator", "hypershift", "9000") + if err != nil { + t.Logf("unable to get metrics from hypershift-operator: %v", err) + return 0, false + } + + family, ok := mf[npmetrics.VCpusCountByHClusterMetricName] + if !ok { + return 0, false + } + + for _, m := range family.Metric { + var matchedName, matchedNamespace bool + for _, l := range m.GetLabel() { + if l.GetName() == "name" && l.GetValue() == hostedCluster.Name { + matchedName = true + } + if l.GetName() == "namespace" && l.GetValue() == hostedCluster.Namespace { + matchedNamespace = true + } + } + if matchedName && matchedNamespace { + return int32(m.GetGauge().GetValue()), true + } + } + return 0, false +} + func baseNodePool(name, nodeClassName string) *karpenterv1.NodePool { return &karpenterv1.NodePool{ ObjectMeta: metav1.ObjectMeta{Name: name}, diff --git a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_types.go b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_types.go index 8d4db6c2bb34..c2d634988708 100644 --- a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_types.go +++ b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_types.go @@ -2205,6 +2205,16 @@ type AutoNodeStatus struct { // +kubebuilder:validation:Minimum=0 // +optional NodeClaimCount *int32 `json:"nodeClaimCount,omitempty"` + + // vcpus is the total number of virtual CPUs across all Karpenter-managed nodes + // that have registered and reported capacity. This is the sum of CPU capacity + // from each NodeClaim whose corresponding node exists (status.nodeName is set). + // This value is 0 when no Karpenter nodes are provisioned. + // Used by the metrics collector for billing aggregation. + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1000000 + // +optional + VCPUs *int32 `json:"vcpus,omitempty"` } // PlatformStatus contains platform-specific status diff --git a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/zz_generated.deepcopy.go b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/zz_generated.deepcopy.go index 0f69bb3bcdeb..d429305a30b3 100644 --- a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/zz_generated.deepcopy.go +++ b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/zz_generated.deepcopy.go @@ -584,6 +584,11 @@ func (in *AutoNodeStatus) DeepCopyInto(out *AutoNodeStatus) { *out = new(int32) **out = **in } + if in.VCPUs != nil { + in, out := &in.VCPUs, &out.VCPUs + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoNodeStatus.