Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions assets/performanceprofile/configs/dedicatedcpus.slice
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[Unit]
Description=Top level slice for dedicated CPUs used by services/applications that requires full isolation.

[Slice]
AllowedCPUs={{ .DedicatedCpus }}
38 changes: 36 additions & 2 deletions assets/performanceprofile/scripts/clear-irqbalance-banned-cpus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,51 @@ IRQBALANCE_CONF="${1:-/etc/sysconfig/irqbalance}"
CRIO_ORIG_BANNED_CPUS="${2:-/etc/sysconfig/orig_irq_banned_cpus}"
NONE=0

# BANNED_CPUS: the final hex mask written to IRQBALANCE_BANNED_CPUS.
# 1. If DEDICATED_CPUS is set (via systemd Environment), use it.
# 2. Otherwise, default to 0 (no CPUs banned, all participate in balancing).
if [ -n "${DEDICATED_CPUS:-}" ]; then
BANNED_CPUS="${DEDICATED_CPUS}"
else
BANNED_CPUS="${NONE}"
fi

[ ! -f "${IRQBALANCE_CONF}" ] && exit 0

${SED} -i '/^\s*IRQBALANCE_BANNED_CPUS\b/d' "${IRQBALANCE_CONF}" || exit 0
# CPU numbers which have their corresponding bits set to one in this mask
# will not have any irq's assigned to them on rebalance.
# so zero means all cpus are participating in load balancing.
echo "IRQBALANCE_BANNED_CPUS=${NONE}" >> "${IRQBALANCE_CONF}"
echo "IRQBALANCE_BANNED_CPUS=${BANNED_CPUS}" >> "${IRQBALANCE_CONF}"

# we now own this configuration. But CRI-O has code to restore the configuration,
# and until it gains the option to disable this restore flow, we need to make
# the configuration consistent such as the CRI-O restore will do nothing.
if [ -n "${CRIO_ORIG_BANNED_CPUS}" ] && [ -f "${CRIO_ORIG_BANNED_CPUS}" ]; then
echo "${NONE}" > "${CRIO_ORIG_BANNED_CPUS}"
echo "${BANNED_CPUS}" > "${CRIO_ORIG_BANNED_CPUS}"
fi

# CRI-O reads /proc/irq/default_smp_affinity to derive the IRQ banned mask
# when pods with irq-load-balancing.crio.io=disable are scheduled.
# If we don't remove the dedicated CPUs from default_smp_affinity here,
# CRI-O will overwrite IRQBALANCE_BANNED_CPUS while ignoring the dedicated CPUs.
SMP_AFFINITY="/proc/irq/default_smp_affinity"
if [ "${BANNED_CPUS}" != "${NONE}" ] && [ -f "${SMP_AFFINITY}" ]; then
# default_smp_affinity is comma-separated 32-bit hex groups (e.g. "ff,ffffffff,ffffffff")
IFS=',' read -ra smp < "${SMP_AFFINITY}"
n=${#smp[@]}
# pad BANNED_CPUS with leading zeros to match the same number of hex chars
padded="${BANNED_CPUS}"
while [ ${#padded} -lt $(( n * 8 )) ]; do
padded="0${padded}"
done
# clear banned bits from each 32-bit group: result = smp & ~banned
result=""
for (( i=0; i<n; i++ )); do
ban="${padded:$(( i * 8 )):8}"
val=$(printf "%08x" $(( 0x${smp[$i]} & ~0x${ban} )))
result+="${result:+,}${val}"
done
echo "Setting default_smp_affinity to ${result} (removing dedicated CPUs ${BANNED_CPUS} from default_smp_affinity mask)"
echo "${result}" > "${SMP_AFFINITY}"
fi
Comment thread
coderabbitai[bot] marked this conversation as resolved.
34 changes: 34 additions & 0 deletions assets/performanceprofile/scripts/dedicated-cpus-configure.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

# dedicated-cpus-configure.sh configures the dedicatedcpus.slice cpuset
# partition so that the dedicated CPUs are fully isolated from the kernel
# scheduler (equivalent to isolcpus=domain for those specific CPUs).

set -euo pipefail

DEDICATED_CPUS="{{ .DedicatedCpus }}"

if [ -z "$DEDICATED_CPUS" ]; then
echo "No dedicated CPUs configured, nothing to do"
exit 0
fi

CGROUP_PATH="/sys/fs/cgroup/dedicatedcpus.slice"

if [ ! -d "$CGROUP_PATH" ]; then
echo "ERROR: dedicatedcpus.slice cgroup does not exist at $CGROUP_PATH" >&2
exit 1
fi

# Set exclusive CPUs on the dedicated slice
echo "$DEDICATED_CPUS" > "$CGROUP_PATH/cpuset.cpus.exclusive"

# Set the CPUs available to the slice
echo "$DEDICATED_CPUS" > "$CGROUP_PATH/cpuset.cpus"

# Create an isolated partition — removes these CPUs from the parent's
# scheduling domain, giving kernel-level isolation equivalent to
# isolcpus=domain without affecting other CPUs.
echo "isolated" > "$CGROUP_PATH/cpuset.cpus.partition"
Comment thread
coderabbitai[bot] marked this conversation as resolved.

echo "Configured dedicatedcpus.slice as isolated partition for CPUs: $DEDICATED_CPUS"
15 changes: 15 additions & 0 deletions manifests/20-performance-profile.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,16 @@ spec:
offloads the complexity of cpu load balancing to the application.
Defaults to "true"
type: boolean
dedicated:
description: |-
Dedicated defines a set of CPUs fully isolated from the operating system
and Kubernetes scheduling, intended for exclusive use by user-space
processes (for example, infrastructure networking workloads such as
DPDK-based vSwitch or vRouter). These CPUs receive full kernel-level
isolation (isolcpus=domain,managed_irq, nohz_full, rcu_nocbs), are
excluded from Kubelet scheduling (all QoS classes), banned from
irqbalance, and excluded from systemd CPU affinity.
type: string
isolated:
description: |-
Isolated defines a set of CPUs that will be used to give to application threads the most execution time possible,
Expand Down Expand Up @@ -625,6 +635,11 @@ spec:
vendorID:
description: Network device vendor ID represnted as a 16 bit Hexmadecimal number.
type: string
disableOvsDynamicPinning:
description: |-
DisableOvsDynamicPinning when set to true, prevents OVN-Kubernetes
from dynamically adjusting OVS thread CPU affinity at runtime.
type: boolean
userLevelNetworking:
description: UserLevelNetworking when enabled - sets either all or specified network devices queue size to the amount of reserved CPUs. Defaults to "false".
type: boolean
Expand Down
11 changes: 11 additions & 0 deletions pkg/apis/performanceprofile/v2/performanceprofile_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ type CPU struct {
// alongside the isolated, exclusive resources that are being used already by those workloads.
// +optional
Shared *CPUSet `json:"shared,omitempty"`
// Dedicated defines a set of CPUs fully isolated from the operating system
// and Kubernetes scheduling, intended for exclusive use by user-space
// processes (for example, infrastructure networking workloads such as
// DPDK-based vSwitch or vRouter). WorkloadPartitioning or --strict-cpu-reservation
// kubelet CPUManager policy option are a prerequisite for this feature.
// +optional
Dedicated *CPUSet `json:"dedicated,omitempty"`
}

// CPUfrequency defines cpu frequencies for isolated and reserved cpus
Expand Down Expand Up @@ -203,6 +210,10 @@ type Net struct {
// set with a netqueue count equal to CPU.Reserved .
// If no devices are specified then the default is all devices.
Devices []Device `json:"devices,omitempty"`
// DisableOvsDynamicPinning when set to true, prevents OVN-Kubernetes
// from dynamically adjusting OVS thread CPU affinity at runtime.
// +optional
DisableOvsDynamicPinning *bool `json:"disableOvsDynamicPinning,omitempty"`
}

// Device defines a way to represent a network device in several options:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,17 @@ func (r *PerformanceProfile) validateCPUs() field.ErrorList {
}

if cpus.Isolated != nil && cpus.Reserved != nil {
var offlined, shared string
var offlined, shared, dedicated string
if cpus.Offlined != nil {
offlined = string(*cpus.Offlined)
}
if cpus.Shared != nil {
shared = string(*cpus.Shared)
}
cpuLists, err := components.NewCPULists(string(*cpus.Reserved), string(*cpus.Isolated), offlined, shared)
if cpus.Dedicated != nil {
dedicated = string(*cpus.Dedicated)
}
cpuLists, err := components.NewCPULists(string(*cpus.Reserved), string(*cpus.Isolated), offlined, shared, dedicated)
if err != nil {
allErrs = append(allErrs, field.InternalError(field.NewPath("spec.cpu"), err))
// If err != nil then the cpuList is nil and we can't continue with the function logic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,11 @@ func FuzzValidateCPUs(f *testing.F) {
}
f.Fuzz(func(t *testing.T, input string) {
cpuFields := map[string]func(*PerformanceProfile, CPUSet){
"reserved": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Reserved = &input },
"isolated": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Isolated = &input },
"shared": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Shared = &input },
"offline": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Offlined = &input },
"reserved": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Reserved = &input },
"isolated": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Isolated = &input },
"shared": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Shared = &input },
"offline": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Offlined = &input },
"dedicated": func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Dedicated = &input },
}

for fieldName, setField := range cpuFields {
Expand Down Expand Up @@ -296,6 +297,74 @@ var _ = Describe("PerformanceProfile", func() {
Expect(errors).NotTo(BeEmpty(), "should have validation error when isolated and shared CPUs have overlap")
Expect(errors[0].Error()).To(Or(ContainSubstring("isolated and shared cpus overlap"), ContainSubstring("shared and isolated cpus overlap")))
})

It("should allow valid dedicated CPUs that do not overlap with other sets", func() {
reservedCPUs := CPUSet("0-1")
isolatedCPUs := CPUSet("4-7")
dedicatedCPUs := CPUSet("2-3")
profile.Spec.CPU.Reserved = &reservedCPUs
profile.Spec.CPU.Isolated = &isolatedCPUs
profile.Spec.CPU.Offlined = nil
profile.Spec.CPU.Dedicated = &dedicatedCPUs
errors := profile.validateCPUs()
Expect(errors).To(BeEmpty(), "should not have validation errors with non-overlapping dedicated CPUs")
})

It("should reject cpus allocation with overlapping sets between dedicated and reserved", func() {
reservedCPUs := CPUSet("0-3")
isolatedCPUs := CPUSet("8-11")
dedicatedCPUs := CPUSet("2-5")
profile.Spec.CPU.Reserved = &reservedCPUs
profile.Spec.CPU.Isolated = &isolatedCPUs
profile.Spec.CPU.Offlined = nil
profile.Spec.CPU.Dedicated = &dedicatedCPUs
errors := profile.validateCPUs()
Expect(errors).NotTo(BeEmpty(), "should have validation error when dedicated and reserved CPUs have overlap")
Expect(errors[0].Error()).To(Or(ContainSubstring("dedicated and reserved cpus overlap"), ContainSubstring("reserved and dedicated cpus overlap")))
})

It("should reject cpus allocation with overlapping sets between dedicated and isolated", func() {
reservedCPUs := CPUSet("0-1")
isolatedCPUs := CPUSet("4-7")
dedicatedCPUs := CPUSet("6-9")
profile.Spec.CPU.Reserved = &reservedCPUs
profile.Spec.CPU.Isolated = &isolatedCPUs
profile.Spec.CPU.Offlined = nil
profile.Spec.CPU.Dedicated = &dedicatedCPUs
errors := profile.validateCPUs()
Expect(errors).NotTo(BeEmpty(), "should have validation error when dedicated and isolated CPUs have overlap")
Expect(errors[0].Error()).To(Or(ContainSubstring("dedicated and isolated cpus overlap"), ContainSubstring("isolated and dedicated cpus overlap")))
})

It("should reject cpus allocation with overlapping sets between dedicated and offlined", func() {
reservedCPUs := CPUSet("0-1")
isolatedCPUs := CPUSet("4-5")
offlinedCPUs := CPUSet("6-7")
dedicatedCPUs := CPUSet("2-3,7")
profile.Spec.CPU.Reserved = &reservedCPUs
profile.Spec.CPU.Isolated = &isolatedCPUs
profile.Spec.CPU.Offlined = &offlinedCPUs
profile.Spec.CPU.Dedicated = &dedicatedCPUs
errors := profile.validateCPUs()
Expect(errors).NotTo(BeEmpty(), "should have validation error when dedicated and offlined CPUs have overlap")
Expect(errors[0].Error()).To(Or(ContainSubstring("dedicated and offlined cpus overlap"), ContainSubstring("offlined and dedicated cpus overlap")))
})

It("should reject cpus allocation with overlapping sets between dedicated and shared", func() {
reservedCPUs := CPUSet("0-1")
isolatedCPUs := CPUSet("4-5")
sharedCPUs := CPUSet("6-7")
dedicatedCPUs := CPUSet("2-3,6")
profile.Spec.CPU.Reserved = &reservedCPUs
profile.Spec.CPU.Isolated = &isolatedCPUs
profile.Spec.CPU.Offlined = nil
profile.Spec.CPU.Shared = &sharedCPUs
profile.Spec.CPU.Dedicated = &dedicatedCPUs
errors := profile.validateCPUs()
Expect(errors).NotTo(BeEmpty(), "should have validation error when dedicated and shared CPUs have overlap")
Expect(errors[0].Error()).To(Or(ContainSubstring("dedicated and shared cpus overlap"), ContainSubstring("shared and dedicated cpus overlap")))
})

DescribeTable("should reject invalid input that does not represent CPU sets",
func(fieldSetter func(*PerformanceProfile, CPUSet), cpusField string) {
garbageInput := CPUSet("garbage")
Expand All @@ -308,6 +377,7 @@ var _ = Describe("PerformanceProfile", func() {
Entry("isolated CPUs", func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Isolated = &input }, "isolated CPUs"),
Entry("shared CPUs", func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Shared = &input }, "shared CPUs"),
Entry("offline CPUs", func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Offlined = &input }, "offline CPUs"),
Entry("dedicated CPUs", func(p *PerformanceProfile, input CPUSet) { p.Spec.CPU.Dedicated = &input }, "dedicated CPUs"),
)
})

Expand Down
10 changes: 10 additions & 0 deletions pkg/apis/performanceprofile/v2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ type Options struct {
}

type MachineConfigOptions struct {
PinningMode *apiconfigv1.CPUPartitioningMode
MixedCPUsEnabled bool
PinningMode *apiconfigv1.CPUPartitioningMode
MixedCPUsEnabled bool
DisableOVSDynamicPinning bool
DedicatedCPUs string
}

type KubeletConfigOptions struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,18 @@ func (h *handler) Apply(ctx context.Context, obj client.Object, recorder record.
klog.Infof("Ignoring reconcile loop for pause performance profile %s", profile.Name)
return nil
}
// set missing options
opts.MachineConfig.MixedCPUsEnabled = opts.MixedCPUsFeatureGateEnabled && profileutil.IsMixedCPUsEnabled(profile)
opts.DRAResourceManagement = profileutil.IsDRAManaged(profile)

profileutil.SetMissingOptions(profile, opts)

components, err := manifestset.GetNewComponents(profile, opts)
if err != nil {
return err
}

if err := profileutil.ValidateDedicatedCPUsPrerequisites(profile, opts, components.KubeletConfig); err != nil {
return err
}

for _, componentObj := range components.ToObjects() {
if err := controllerutil.SetControllerReference(profile, componentObj, h.scheme); err != nil {
return err
Expand Down
Loading