From d0feebe4229354c8ea7c391771756e31941dc976 Mon Sep 17 00:00:00 2001 From: Marek Skrobacki Date: Mon, 1 Jun 2026 08:38:06 +0100 Subject: [PATCH 1/4] dnsmasq: stop scheduling with ironic conductor In the past we needed the dnsmasq pods to run on the same physical host as the Ironic conductors. This was necessary to allow dnsmasq and ironic to share disk volume and exchange the information about hosts this way. But at some point we have switched to PVC volume which can be mounted on multiple nodes if the access mode is set to RWX. In order to scale the number of Ironic conductors, we need to decouple dnsmasq from Ironic conductor. --- components/ironic/dnsmasq-ss.yaml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/components/ironic/dnsmasq-ss.yaml b/components/ironic/dnsmasq-ss.yaml index 538230e8f..9a03dc53e 100644 --- a/components/ironic/dnsmasq-ss.yaml +++ b/components/ironic/dnsmasq-ss.yaml @@ -34,16 +34,7 @@ spec: application: ironic-dnsmasq spec: nodeSelector: - ironic_role: conductor - - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - topologyKey: kubernetes.io/hostname - labelSelector: - matchLabels: - application: ironic - component: conductor + dhcp_role: server hostNetwork: true containers: - name: dnsmasq From 6fd99dad1a4f91a45e63827b76f77b9beccc8346 Mon Sep 17 00:00:00 2001 From: Marek Skrobacki Date: Mon, 1 Jun 2026 08:59:53 +0100 Subject: [PATCH 2/4] chore(dnsmasq): change PVCs to RWX --- components/ironic/dnsmasq-pvc.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/components/ironic/dnsmasq-pvc.yaml b/components/ironic/dnsmasq-pvc.yaml index dc358cfb0..63e0b350e 100644 --- a/components/ironic/dnsmasq-pvc.yaml +++ b/components/ironic/dnsmasq-pvc.yaml @@ -7,7 +7,8 @@ metadata: namespace: openstack spec: accessModes: - - ReadWriteOnce + - ReadWriteMany + storageClassName: ceph-fs-ec resources: requests: storage: 16Mi @@ -21,7 +22,8 @@ metadata: namespace: openstack spec: accessModes: - - ReadWriteOnce + - ReadWriteMany + storageClassName: ceph-fs-ec resources: requests: storage: 16Mi From b6f74b9144da0ae138cb1319d308f8278572beda Mon Sep 17 00:00:00 2001 From: Marek Skrobacki Date: Mon, 1 Jun 2026 09:46:59 +0100 Subject: [PATCH 3/4] ironic: add anti affinity rules This prevents multiple conductor pods from being placed on the same host. --- components/ironic/values.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/components/ironic/values.yaml b/components/ironic/values.yaml index 3da59b2ed..ccd2d8eed 100644 --- a/components/ironic/values.yaml +++ b/components/ironic/values.yaml @@ -285,6 +285,12 @@ pod: limits: memory: "2048Mi" cpu: "1000m" + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution + topologyKey: + default: kubernetes.io/hostname annotations: # we need to modify the annotations on OpenStack Helm From 31384993625209172fe4bba400f32caff7a42182 Mon Sep 17 00:00:00 2001 From: Marek Skrobacki Date: Mon, 1 Jun 2026 11:07:37 +0100 Subject: [PATCH 4/4] ironic: adjust CPU and power sync state settings Currently most of the ironic-conductor's time is spent doing the power state syncs. This is partly because the BMCs are quite slow to respond, but also due to very limited CPU. Conductor will happily use more than one core, but it's currently limited to just 1. In theory we should be scaling up by increasing number of conductors, but at the moment we are limited to 1 conductor per host due to host networking. --- components/ironic/values.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/components/ironic/values.yaml b/components/ironic/values.yaml index ccd2d8eed..4cd3c9616 100644 --- a/components/ironic/values.yaml +++ b/components/ironic/values.yaml @@ -82,7 +82,8 @@ conf: verify_step_priority_override: management.clear_job_queue:90 # (nicholas.kuechler) tuning for idrac hardware type # https://docs.openstack.org/ironic/latest/admin/drivers/idrac.html#nodes-go-into-maintenance-mode - sync_power_state_interval: 70 + sync_power_state_interval: 300 + sync_power_state_workers: 20 agent: # (nicholas.kuechler) tuning for idrac hardware type # https://docs.openstack.org/ironic/latest/admin/drivers/idrac.html#timeout-when-powering-off @@ -284,7 +285,7 @@ pod: cpu: "100m" limits: memory: "2048Mi" - cpu: "1000m" + cpu: "8000m" affinity: anti: type: