From 0cb6510a8abc76f82e09c3d38acc498547d957a7 Mon Sep 17 00:00:00 2001
From: daniel-gines <dangines@gmail.com>
Date: Thu, 30 Apr 2026 22:33:43 -0300
Subject: [PATCH] fix(resource-quotas): bump vault to fit 256Mi req per pod
 (Block 3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Audit on cortex prd 2026-04-30 found vault sts blocked from rolling
update because the namespace ResourceQuota platform-quota-vault was
sized for chart-default 128Mi req per pod (3-pod baseline 384Mi <
600Mi quota). After Block 3 right-sizing bumped the per-pod request
to 256Mi (peak observed ~190Mi), the 3-pod total (768Mi) exceeds the
600Mi quota — sts-controller cannot create the 4th surge pod during
rolling-update, blocking config rollout.

Bumps quota to:
  requests.cpu     300m    -> 1
  requests.memory  600Mi   -> 1200Mi
  limits.cpu       1500m   -> 2
  limits.memory    1500Mi  -> 2400Mi

Sized for 4-pod surge at 256Mi req per pod = 1024Mi + 200Mi tracker
headroom = 1200Mi total.
---
 components/resource-quotas/values.yaml | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/components/resource-quotas/values.yaml b/components/resource-quotas/values.yaml
index 5c3e6fc..c0fa128 100644
--- a/components/resource-quotas/values.yaml
+++ b/components/resource-quotas/values.yaml
@@ -234,13 +234,20 @@ namespaces:
   vault:
     enabled: true
     # Sized for 3-replica Raft HA at chart defaults (50m/128Mi requests,
-    # 250m/256Mi limits per pod). 50% headroom over steady state for
-    # rolling-update surges (4 pods briefly).
-    # PVC count = 3 (one per Raft replica, gp3/Premium_LRS).
+    # 250m/256Mi limits per pod) PLUS rolling-update surge headroom
+    # (4th pod briefly during sts updates) PLUS downstream override room
+    # for clusters that bump per-pod request to ~256Mi (peak observed
+    # ~190Mi on cortex prd 2026-04-30).
+    #
+    # 3-pod baseline: 3 × 256Mi req = 768Mi
+    # 4-pod surge:    4 × 256Mi req = 1024Mi → 1Gi quota
+    # Headroom:        +200Mi for tracker
+    #
+    # PVC count = 3 (one per Raft replica, gp3/Premium_LRS) + 2 surge.
     hard:
-      requests.cpu: "300m"
-      requests.memory: 600Mi
-      limits.cpu: "1500m"
-      limits.memory: 1500Mi
+      requests.cpu: "1"
+      requests.memory: 1200Mi
+      limits.cpu: "2"
+      limits.memory: 2400Mi
       pods: "10"
       persistentvolumeclaims: "5"