From 0cb6510a8abc76f82e09c3d38acc498547d957a7 Mon Sep 17 00:00:00 2001 From: daniel-gines Date: Thu, 30 Apr 2026 22:33:43 -0300 Subject: [PATCH] fix(resource-quotas): bump vault to fit 256Mi req per pod (Block 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit on cortex prd 2026-04-30 found vault sts blocked from rolling update because the namespace ResourceQuota platform-quota-vault was sized for chart-default 128Mi req per pod (3-pod baseline 384Mi < 600Mi quota). After Block 3 right-sizing bumped the per-pod request to 256Mi (peak observed ~190Mi), the 3-pod total (768Mi) exceeds the 600Mi quota — sts-controller cannot create the 4th surge pod during rolling-update, blocking config rollout. Bumps quota to: requests.cpu 300m -> 1 requests.memory 600Mi -> 1200Mi limits.cpu 1500m -> 2 limits.memory 1500Mi -> 2400Mi Sized for 4-pod surge at 256Mi req per pod = 1024Mi + 200Mi tracker headroom = 1200Mi total. --- components/resource-quotas/values.yaml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/components/resource-quotas/values.yaml b/components/resource-quotas/values.yaml index 5c3e6fc..c0fa128 100644 --- a/components/resource-quotas/values.yaml +++ b/components/resource-quotas/values.yaml @@ -234,13 +234,20 @@ namespaces: vault: enabled: true # Sized for 3-replica Raft HA at chart defaults (50m/128Mi requests, - # 250m/256Mi limits per pod). 50% headroom over steady state for - # rolling-update surges (4 pods briefly). - # PVC count = 3 (one per Raft replica, gp3/Premium_LRS). + # 250m/256Mi limits per pod) PLUS rolling-update surge headroom + # (4th pod briefly during sts updates) PLUS downstream override room + # for clusters that bump per-pod request to ~256Mi (peak observed + # ~190Mi on cortex prd 2026-04-30). + # + # 3-pod baseline: 3 × 256Mi req = 768Mi + # 4-pod surge: 4 × 256Mi req = 1024Mi → 1Gi quota + # Headroom: +200Mi for tracker + # + # PVC count = 3 (one per Raft replica, gp3/Premium_LRS) + 2 surge. hard: - requests.cpu: "300m" - requests.memory: 600Mi - limits.cpu: "1500m" - limits.memory: 1500Mi + requests.cpu: "1" + requests.memory: 1200Mi + limits.cpu: "2" + limits.memory: 2400Mi pods: "10" persistentvolumeclaims: "5"