From 09463c332b28a3c1c674d52f83952eabe59b30e2 Mon Sep 17 00:00:00 2001
From: mblos <marcel.gute@sap.com>
Date: Tue, 24 Mar 2026 13:36:31 +0100
Subject: [PATCH 1/6] fix cr controller race condition

---
 .../commitments/api_change_commitments.go     | 22 ++++++---
 .../api_change_commitments_test.go            | 33 +++++++++----
 .../reservations/commitments/controller.go    | 48 ++++++++-----------
 .../commitments/controller_test.go            | 30 +++++++++---
 4 files changed, 81 insertions(+), 52 deletions(-)

diff --git a/internal/scheduling/reservations/commitments/api_change_commitments.go b/internal/scheduling/reservations/commitments/api_change_commitments.go
index 1ef003e30..e8e2ec172 100644
--- a/internal/scheduling/reservations/commitments/api_change_commitments.go
+++ b/internal/scheduling/reservations/commitments/api_change_commitments.go
@@ -376,18 +376,26 @@ func watchReservationsUntilReady(
 
 			switch readyCond.Status {
 			case metav1.ConditionTrue:
-				// check if host is not set in spec or status: if so, no capacity left to schedule the reservation
+				// Only consider truly ready if Status.Host is populated
+				// This handles the two-phase reconcile pattern where Ready=True
+				// is set atomically with Status.Host in the controller
 				if current.Spec.TargetHost == "" || current.Status.Host == "" {
 					allAreReady = false
-					failedReservations = append(failedReservations, current)
-					logger.Info("insufficient capacity for reservation", "reservation", current.Name, "reason", readyCond.Reason, "message", readyCond.Message, "targetHostInSpec", current.Spec.TargetHost, "hostInStatus", current.Status.Host)
-				} else {
-					// Reservation is successfully scheduled, no further action needed
-					logger.Info("reservation ready", "reservation", current.Name, "host", current.Spec.TargetHost)
+					stillWaiting = append(stillWaiting, res)
+					continue
 				}
+				// Reservation is successfully scheduled
+				logger.Info("reservation ready", "reservation", current.Name, "host", current.Status.Host)
 
 			case metav1.ConditionFalse:
-				failedReservations = append(failedReservations, res)
+				// Check the reason - NoHostsFound means no capacity
+				if readyCond.Reason == "NoHostsFound" || readyCond.Reason == "NoHostsAvailable" {
+					failedReservations = append(failedReservations, current)
+					logger.Info("insufficient capacity for reservation", "reservation", current.Name, "reason", readyCond.Reason, "message", readyCond.Message)
+				} else {
+					// Other failure reasons
+					logger.Info("reservation failed", "reservation", current.Name, "reason", readyCond.Reason, "message", readyCond.Message)
+				}
 			case metav1.ConditionUnknown:
 				allAreReady = false
 				stillWaiting = append(stillWaiting, res)
diff --git a/internal/scheduling/reservations/commitments/api_change_commitments_test.go b/internal/scheduling/reservations/commitments/api_change_commitments_test.go
index 4d378af55..d996c498e 100644
--- a/internal/scheduling/reservations/commitments/api_change_commitments_test.go
+++ b/internal/scheduling/reservations/commitments/api_change_commitments_test.go
@@ -1277,7 +1277,8 @@ func (env *CommitmentTestEnv) processNewReservation(res *v1alpha1.Reservation) {
 	}
 }
 
-// markReservationSchedulerProcessedStatus updates a reservation to have Ready=True status (scheduling can be succeeded or not - depending on host status)
+// markReservationSchedulerProcessedStatus updates a reservation status based on scheduling result.
+// If host is non-empty, sets Ready=True (success). If host is empty, sets Ready=False with NoHostsFound (failure).
 func (env *CommitmentTestEnv) markReservationSchedulerProcessedStatus(res *v1alpha1.Reservation, host string) {
 	ctx := context.Background()
 
@@ -1288,16 +1289,28 @@ func (env *CommitmentTestEnv) markReservationSchedulerProcessedStatus(res *v1alp
 		return
 	}
 
-	// Then update status
+	// Then update status - Ready=True only if host was found, Ready=False otherwise
 	res.Status.Host = host
-	res.Status.Conditions = []metav1.Condition{
-		{
-			Type:               v1alpha1.ReservationConditionReady,
-			Status:             metav1.ConditionTrue,
-			Reason:             "ReservationActive",
-			Message:            "Reservation is ready (set by test controller)",
-			LastTransitionTime: metav1.Now(),
-		},
+	if host != "" {
+		res.Status.Conditions = []metav1.Condition{
+			{
+				Type:               v1alpha1.ReservationConditionReady,
+				Status:             metav1.ConditionTrue,
+				Reason:             "ReservationActive",
+				Message:            "Reservation is ready (set by test controller)",
+				LastTransitionTime: metav1.Now(),
+			},
+		}
+	} else {
+		res.Status.Conditions = []metav1.Condition{
+			{
+				Type:               v1alpha1.ReservationConditionReady,
+				Status:             metav1.ConditionFalse,
+				Reason:             "NoHostsFound",
+				Message:            "No hosts with sufficient capacity (set by test controller)",
+				LastTransitionTime: metav1.Now(),
+			},
+		}
 	}
 	if err := env.K8sClient.Status().Update(ctx, res); err != nil {
 		env.T.Logf("Warning: Failed to update reservation status: %v", err)
diff --git a/internal/scheduling/reservations/commitments/controller.go b/internal/scheduling/reservations/commitments/controller.go
index f68e35cde..59b11a9bd 100644
--- a/internal/scheduling/reservations/commitments/controller.go
+++ b/internal/scheduling/reservations/commitments/controller.go
@@ -135,13 +135,17 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr
 
 	// Sync Spec values to Status fields for non-pre-allocated reservations
 	// This ensures the observed state reflects the desired state from Spec
-	needsStatusUpdate := false
+	// When TargetHost is set in Spec but not synced to Status, this means
+	// the scheduler found a host and we need to mark the reservation as ready.
 	if res.Spec.TargetHost != "" && res.Status.Host != res.Spec.TargetHost {
-		res.Status.Host = res.Spec.TargetHost
-		needsStatusUpdate = true
-	}
-	if needsStatusUpdate {
 		old := res.DeepCopy()
+		res.Status.Host = res.Spec.TargetHost
+		meta.SetStatusCondition(&res.Status.Conditions, metav1.Condition{
+			Type:    v1alpha1.ReservationConditionReady,
+			Status:  metav1.ConditionTrue,
+			Reason:  "ReservationActive",
+			Message: "reservation is successfully scheduled",
+		})
 		patch := client.MergeFrom(old)
 		if err := r.Status().Patch(ctx, &res, patch); err != nil {
 			// Ignore not-found errors during background deletion
@@ -152,7 +156,9 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr
 			// Object was deleted, no need to continue
 			return ctrl.Result{}, nil
 		}
-		logger.Info("synced spec to status", "host", res.Status.Host)
+		logger.Info("synced spec to status and marked ready", "host", res.Status.Host)
+		// Return and let next reconcile handle allocation verification
+		return ctrl.Result{}, nil
 	}
 
 	// Get project ID from CommittedResourceReservation spec if available.
@@ -259,7 +265,7 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr
 	}
 
 	if len(scheduleResp.Hosts) == 0 {
-		logger.Info("no hosts found for reservation")
+		logger.Info("no hosts found for reservation", "reservation", res.Name, "flavorName", resourceName)
 		old := res.DeepCopy()
 		meta.SetStatusCondition(&res.Status.Conditions, metav1.Condition{
 			Type:    v1alpha1.ReservationConditionReady,
@@ -280,11 +286,12 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr
 		return ctrl.Result{}, nil // No need to requeue, we didn't find a host.
 	}
 
-	// Update the reservation with the found host (idx 0)
+	// Update the reservation Spec with the found host (idx 0)
+	// Only update Spec here - the Status will be synced in the next reconcile cycle
+	// This avoids race conditions from doing two patches in one reconcile
 	host := scheduleResp.Hosts[0]
 	logger.Info("found host for reservation", "host", host)
 
-	// First update Spec
 	old := res.DeepCopy()
 	res.Spec.TargetHost = host
 	if err := r.Patch(ctx, &res, client.MergeFrom(old)); err != nil {
@@ -297,26 +304,9 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr
 		return ctrl.Result{}, nil
 	}
 
-	// Then update Status
-	old = res.DeepCopy()
-	meta.SetStatusCondition(&res.Status.Conditions, metav1.Condition{
-		Type:    v1alpha1.ReservationConditionReady,
-		Status:  metav1.ConditionTrue,
-		Reason:  "ReservationActive",
-		Message: "reservation is successfully scheduled",
-	})
-	res.Status.Host = host
-	patch := client.MergeFrom(old)
-	if err := r.Status().Patch(ctx, &res, patch); err != nil {
-		// Ignore not-found errors during background deletion
-		if client.IgnoreNotFound(err) != nil {
-			logger.Error(err, "failed to patch reservation status")
-			return ctrl.Result{}, err
-		}
-		// Object was deleted, no need to continue
-		return ctrl.Result{}, nil
-	}
-	return ctrl.Result{}, nil // No need to requeue, the reservation is now active.
+	// The Spec patch will trigger a re-reconcile, which will sync Status in the
+	// "Sync Spec values to Status" section above
+	return ctrl.Result{}, nil
 }
 
 // reconcileAllocations verifies all allocations in Spec against actual Nova VM state.
diff --git a/internal/scheduling/reservations/commitments/controller_test.go b/internal/scheduling/reservations/commitments/controller_test.go
index 7433165a9..5af5dfca9 100644
--- a/internal/scheduling/reservations/commitments/controller_test.go
+++ b/internal/scheduling/reservations/commitments/controller_test.go
@@ -292,21 +292,39 @@ func TestCommitmentReservationController_reconcileInstanceReservation_Success(t
 		},
 	}
 
+	// First reconcile: schedules the reservation and sets Spec.TargetHost
 	result, err := reconciler.Reconcile(context.Background(), req)
-
 	if err != nil {
-		t.Errorf("reconcileInstanceReservation() error = %v", err)
+		t.Errorf("First reconcile error = %v", err)
 		return
 	}
+	if result.RequeueAfter > 0 {
+		t.Errorf("Expected no requeue after first reconcile but got %v", result.RequeueAfter)
+	}
 
+	// Verify Spec.TargetHost is set after first reconcile
+	var afterFirstReconcile v1alpha1.Reservation
+	if err = client.Get(context.Background(), req.NamespacedName, &afterFirstReconcile); err != nil {
+		t.Errorf("Failed to get reservation after first reconcile: %v", err)
+		return
+	}
+	if afterFirstReconcile.Spec.TargetHost != "test-host-1" {
+		t.Errorf("Expected Spec.TargetHost=%v after first reconcile, got %v", "test-host-1", afterFirstReconcile.Spec.TargetHost)
+	}
+
+	// Second reconcile: syncs Spec.TargetHost to Status and sets Ready=True
+	result, err = reconciler.Reconcile(context.Background(), req)
+	if err != nil {
+		t.Errorf("Second reconcile error = %v", err)
+		return
+	}
 	if result.RequeueAfter > 0 {
-		t.Errorf("Expected no requeue but got %v", result.RequeueAfter)
+		t.Errorf("Expected no requeue after second reconcile but got %v", result.RequeueAfter)
 	}
 
-	// Verify the reservation status
+	// Verify the reservation status after second reconcile
 	var updated v1alpha1.Reservation
-	err = client.Get(context.Background(), req.NamespacedName, &updated)
-	if err != nil {
+	if err = client.Get(context.Background(), req.NamespacedName, &updated); err != nil {
 		t.Errorf("Failed to get updated reservation: %v", err)
 		return
 	}

From 9b92e08dbb34ff23ebde7ae2c0022fe31e145290 Mon Sep 17 00:00:00 2001
From: mblos <marcel.gute@sap.com>
Date: Tue, 24 Mar 2026 13:58:03 +0100
Subject: [PATCH 2/6] logs

---
 api/v1alpha1/reservation_types.go              |  7 +++++++
 .../commitments/api_change_commitments.go      | 11 ++++++-----
 .../reservations/commitments/controller.go     | 18 +++++++++++-------
 .../commitments/reservation_manager.go         | 12 ++++++------
 .../reservations/commitments/state.go          |  2 ++
 5 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/api/v1alpha1/reservation_types.go b/api/v1alpha1/reservation_types.go
index 4a7fe5cf2..97950a395 100644
--- a/api/v1alpha1/reservation_types.go
+++ b/api/v1alpha1/reservation_types.go
@@ -36,6 +36,13 @@ const (
 	ReservationTypeLabelFailover          = "failover"
 )
 
+// Annotation keys for Reservation metadata.
+const (
+	// AnnotationCreatorRequestID tracks the request ID that created this reservation.
+	// Used for end-to-end traceability across API calls, controller reconciles, and scheduler invocations.
+	AnnotationCreatorRequestID = "reservations.cortex.cloud/creator-request-id"
+)
+
 // CommittedResourceAllocation represents a workload's assignment to a committed resource reservation slot.
 // The workload could be a VM (Nova/IronCore), Pod (Kubernetes), or other resource.
 type CommittedResourceAllocation struct {
diff --git a/internal/scheduling/reservations/commitments/api_change_commitments.go b/internal/scheduling/reservations/commitments/api_change_commitments.go
index e8e2ec172..818692e60 100644
--- a/internal/scheduling/reservations/commitments/api_change_commitments.go
+++ b/internal/scheduling/reservations/commitments/api_change_commitments.go
@@ -132,7 +132,7 @@ func (api *HTTPAPI) processCommitmentChanges(ctx context.Context, w http.Respons
 	manager := NewReservationManager(api.client)
 	requireRollback := false
 	failedCommitments := make(map[string]string) // commitmentUUID to reason for failure, for better response messages in case of rollback
-	logger.Info("processing commitment change request", "availabilityZone", req.AZ, "dryRun", req.DryRun, "affectedProjects", len(req.ByProject))
+	creatorRequestID := reservations.GlobalRequestIDFromContext(ctx)
 
 	knowledge := &reservations.FlavorGroupKnowledgeClient{Client: api.client}
 	flavorGroups, err := knowledge.GetAllFlavorGroups(ctx, nil)
@@ -194,8 +194,7 @@ ProcessLoop:
 			}
 
 			for _, commitment := range resourceChanges.Commitments {
-				// Additional per-commitment validation if needed
-				logger.Info("processing commitment change", "commitmentUUID", commitment.UUID, "projectID", projectID, "resourceName", resourceName, "oldStatus", commitment.OldStatus.UnwrapOr("none"), "newStatus", commitment.NewStatus.UnwrapOr("none"))
+				logger.V(1).Info("processing commitment", "commitmentUUID", commitment.UUID, "oldStatus", commitment.OldStatus.UnwrapOr("none"), "newStatus", commitment.NewStatus.UnwrapOr("none"))
 
 				// TODO add configurable upper limit validation for commitment size (number of instances) to prevent excessive reservation creation
 				// TODO add domain
@@ -247,8 +246,10 @@ ProcessLoop:
 					requireRollback = true
 					break ProcessLoop
 				}
+				// Set creator request ID for traceability across controller reconciles
+				stateDesired.CreatorRequestID = creatorRequestID
 
-				logger.Info("applying commitment state change", "commitmentUUID", commitment.UUID, "oldState", stateBefore, "desiredState", stateDesired)
+				logger.V(1).Info("applying commitment state change", "commitmentUUID", commitment.UUID, "oldMemory", stateBefore.TotalMemoryBytes, "desiredMemory", stateDesired.TotalMemoryBytes)
 
 				touchedReservations, deletedReservations, err := manager.ApplyCommitmentState(ctx, logger, stateDesired, flavorGroups, "changeCommitmentsApi")
 				if err != nil {
@@ -257,7 +258,7 @@ ProcessLoop:
 					requireRollback = true
 					break ProcessLoop
 				}
-				logger.Info("applied commitment state change", "commitmentUUID", commitment.UUID, "touchedReservations", len(touchedReservations), "deletedReservations", len(deletedReservations))
+				logger.V(1).Info("applied commitment state change", "commitmentUUID", commitment.UUID, "touchedReservations", len(touchedReservations), "deletedReservations", len(deletedReservations))
 				reservationsToWatch = append(reservationsToWatch, touchedReservations...)
 			}
 		}
diff --git a/internal/scheduling/reservations/commitments/controller.go b/internal/scheduling/reservations/commitments/controller.go
index 59b11a9bd..e03958151 100644
--- a/internal/scheduling/reservations/commitments/controller.go
+++ b/internal/scheduling/reservations/commitments/controller.go
@@ -50,7 +50,8 @@ type CommitmentReservationController struct {
 // Note: This controller only handles commitment reservations, as filtered by the predicate.
 func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	ctx = WithNewGlobalRequestID(ctx)
-	logger := LoggerFromContext(ctx).WithValues("component", "controller", "reservation", req.Name, "namespace", req.Namespace)
+	logger := LoggerFromContext(ctx).WithValues("component", "controller", "reservation", req.Name)
+
 	// Fetch the reservation object.
 	var res v1alpha1.Reservation
 	if err := r.Get(ctx, req.NamespacedName, &res); err != nil {
@@ -58,6 +59,11 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr
 		return ctrl.Result{}, client.IgnoreNotFound(err)
 	}
 
+	// Extract creator request ID from annotation for end-to-end traceability
+	if creatorReq := res.Annotations[v1alpha1.AnnotationCreatorRequestID]; creatorReq != "" {
+		logger = logger.WithValues("creatorReq", creatorReq)
+	}
+
 	// filter for CR reservations
 	resourceName := ""
 	if res.Spec.CommittedResourceReservation != nil {
@@ -86,7 +92,7 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr
 	}
 
 	if meta.IsStatusConditionTrue(res.Status.Conditions, v1alpha1.ReservationConditionReady) {
-		logger.Info("reservation is active, verifying allocations")
+		logger.V(1).Info("reservation is active, verifying allocations")
 
 		// Verify all allocations in Spec against actual VM state from database
 		if err := r.reconcileAllocations(ctx, &res); err != nil {
@@ -323,7 +329,7 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte
 
 	// Skip if no allocations to verify
 	if len(res.Spec.CommittedResourceReservation.Allocations) == 0 {
-		logger.Info("no allocations to verify", "reservation", res.Name)
+		logger.V(1).Info("no allocations to verify", "reservation", res.Name)
 		return nil
 	}
 
@@ -349,9 +355,8 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte
 			actualHost := server.OSEXTSRVATTRHost
 			newStatusAllocations[vmUUID] = actualHost
 
-			logger.Info("verified VM allocation",
+			logger.V(1).Info("verified VM allocation",
 				"vm", vmUUID,
-				"reservation", res.Name,
 				"actualHost", actualHost,
 				"expectedHost", res.Status.Host)
 		} else {
@@ -381,8 +386,7 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte
 		return fmt.Errorf("failed to patch reservation status: %w", err)
 	}
 
-	logger.Info("reconciled allocations",
-		"reservation", res.Name,
+	logger.V(1).Info("reconciled allocations",
 		"specAllocations", len(res.Spec.CommittedResourceReservation.Allocations),
 		"statusAllocations", len(newStatusAllocations))
 
diff --git a/internal/scheduling/reservations/commitments/reservation_manager.go b/internal/scheduling/reservations/commitments/reservation_manager.go
index 773f74122..e09d44d4e 100644
--- a/internal/scheduling/reservations/commitments/reservation_manager.go
+++ b/internal/scheduling/reservations/commitments/reservation_manager.go
@@ -221,12 +221,9 @@ func (m *ReservationManager) syncReservationMetadata(
 		(state.StartTime != nil && (reservation.Spec.StartTime == nil || !reservation.Spec.StartTime.Time.Equal(*state.StartTime))) ||
 		(state.EndTime != nil && (reservation.Spec.EndTime == nil || !reservation.Spec.EndTime.Time.Equal(*state.EndTime))) {
 		// Apply patch
-		logger.Info("syncing reservation metadata",
-			"reservation", reservation,
-			"desired commitmentUUID", state.CommitmentUUID,
-			"desired availabilityZone", state.AvailabilityZone,
-			"desired startTime", state.StartTime,
-			"desired endTime", state.EndTime)
+		logger.V(1).Info("syncing reservation metadata",
+			"reservation", reservation.Name,
+			"commitmentUUID", state.CommitmentUUID)
 
 		patch := client.MergeFrom(reservation.DeepCopy())
 
@@ -322,6 +319,9 @@ func (m *ReservationManager) newReservation(
 			Labels: map[string]string{
 				v1alpha1.LabelReservationType: v1alpha1.ReservationTypeLabelCommittedResource,
 			},
+			Annotations: map[string]string{
+				v1alpha1.AnnotationCreatorRequestID: state.CreatorRequestID,
+			},
 		},
 		Spec: spec,
 	}
diff --git a/internal/scheduling/reservations/commitments/state.go b/internal/scheduling/reservations/commitments/state.go
index dbc3ad72d..c8e75ff83 100644
--- a/internal/scheduling/reservations/commitments/state.go
+++ b/internal/scheduling/reservations/commitments/state.go
@@ -46,6 +46,8 @@ type CommitmentState struct {
 	StartTime *time.Time
 	// EndTime is when the commitment expires
 	EndTime *time.Time
+	// CreatorRequestID is the request ID that triggered this state change (for traceability)
+	CreatorRequestID string
 }
 
 // FromCommitment converts Limes commitment to CommitmentState.

From 15f4f407c29885a8120e058d43b82ec8e067ad8b Mon Sep 17 00:00:00 2001
From: mblos <marcel.gute@sap.com>
Date: Tue, 24 Mar 2026 14:14:26 +0100
Subject: [PATCH 3/6] logs

---
 .../commitments/api_change_commitments.go     | 57 ++++++++++---------
 .../commitments/reservation_manager.go        | 40 +++++--------
 2 files changed, 44 insertions(+), 53 deletions(-)

diff --git a/internal/scheduling/reservations/commitments/api_change_commitments.go b/internal/scheduling/reservations/commitments/api_change_commitments.go
index 818692e60..6da3ca024 100644
--- a/internal/scheduling/reservations/commitments/api_change_commitments.go
+++ b/internal/scheduling/reservations/commitments/api_change_commitments.go
@@ -319,6 +319,7 @@ ProcessLoop:
 }
 
 // watchReservationsUntilReady polls until all reservations reach Ready=True or timeout.
+// Returns failed reservations and any errors encountered.
 func watchReservationsUntilReady(
 	ctx context.Context,
 	logger logr.Logger,
@@ -333,19 +334,32 @@ func watchReservationsUntilReady(
 	}
 
 	deadline := time.Now().Add(timeout)
+	startTime := time.Now()
+	totalReservations := len(reservations)
 
 	reservationsToWatch := make([]v1alpha1.Reservation, len(reservations))
 	copy(reservationsToWatch, reservations)
 
+	// Track successful reservations for summary
+	var successfulReservations []string
+	pollCount := 0
+
 	for {
+		pollCount++
 		var stillWaiting []v1alpha1.Reservation
 		if time.Now().After(deadline) {
 			errors = append(errors, fmt.Errorf("timeout after %v waiting for reservations to become ready", timeout))
+			// Log summary on timeout
+			logger.Info("reservation watch completed (timeout)",
+				"total", totalReservations,
+				"ready", len(successfulReservations),
+				"failed", len(failedReservations),
+				"timedOut", len(reservationsToWatch),
+				"duration", time.Since(startTime).Round(time.Millisecond),
+				"polls", pollCount)
 			return failedReservations, errors
 		}
 
-		allAreReady := true
-
 		for _, res := range reservationsToWatch {
 			// Fetch current state
 			var current v1alpha1.Reservation
@@ -355,9 +369,7 @@ func watchReservationsUntilReady(
 			}
 
 			if err := k8sClient.Get(ctx, nn, &current); err != nil {
-				allAreReady = false
-				// Reservation is still in process of being created, or there is a transient error, continue waiting for it
-				logger.V(1).Info("transient error getting reservation, will retry", "reservation", res.Name, "error", err)
+				// Reservation is still in process of being created, or there is a transient error
 				stillWaiting = append(stillWaiting, res)
 				continue
 			}
@@ -370,7 +382,6 @@ func watchReservationsUntilReady(
 
 			if readyCond == nil {
 				// Condition not set yet, keep waiting
-				allAreReady = false
 				stillWaiting = append(stillWaiting, res)
 				continue
 			}
@@ -378,43 +389,33 @@ func watchReservationsUntilReady(
 			switch readyCond.Status {
 			case metav1.ConditionTrue:
 				// Only consider truly ready if Status.Host is populated
-				// This handles the two-phase reconcile pattern where Ready=True
-				// is set atomically with Status.Host in the controller
 				if current.Spec.TargetHost == "" || current.Status.Host == "" {
-					allAreReady = false
 					stillWaiting = append(stillWaiting, res)
 					continue
 				}
-				// Reservation is successfully scheduled
-				logger.Info("reservation ready", "reservation", current.Name, "host", current.Status.Host)
+				// Reservation is successfully scheduled - track for summary
+				successfulReservations = append(successfulReservations, current.Name)
 
 			case metav1.ConditionFalse:
-				// Check the reason - NoHostsFound means no capacity
-				if readyCond.Reason == "NoHostsFound" || readyCond.Reason == "NoHostsAvailable" {
-					failedReservations = append(failedReservations, current)
-					logger.Info("insufficient capacity for reservation", "reservation", current.Name, "reason", readyCond.Reason, "message", readyCond.Message)
-				} else {
-					// Other failure reasons
-					logger.Info("reservation failed", "reservation", current.Name, "reason", readyCond.Reason, "message", readyCond.Message)
-				}
+				// Any failure reason counts as failed
+				failedReservations = append(failedReservations, current)
 			case metav1.ConditionUnknown:
-				allAreReady = false
 				stillWaiting = append(stillWaiting, res)
 			}
 		}
 
-		if allAreReady || len(stillWaiting) == 0 {
-			logger.Info("all reservations checked",
-				"failed", len(failedReservations))
+		if len(stillWaiting) == 0 {
+			// All reservations have reached a terminal state - log summary
+			logger.Info("reservation watch completed",
+				"total", totalReservations,
+				"ready", len(successfulReservations),
+				"failed", len(failedReservations),
+				"duration", time.Since(startTime).Round(time.Millisecond),
+				"polls", pollCount)
 			return failedReservations, errors
 		}
 
 		reservationsToWatch = stillWaiting
-		// Log progress
-		logger.V(1).Info("waiting for reservations to become ready",
-			"notReady", len(reservationsToWatch),
-			"total", len(reservations),
-			"timeRemaining", time.Until(deadline).Round(time.Second))
 
 		// Wait before next poll
 		select {
diff --git a/internal/scheduling/reservations/commitments/reservation_manager.go b/internal/scheduling/reservations/commitments/reservation_manager.go
index e09d44d4e..40cfa08ef 100644
--- a/internal/scheduling/reservations/commitments/reservation_manager.go
+++ b/internal/scheduling/reservations/commitments/reservation_manager.go
@@ -82,18 +82,15 @@ func (m *ReservationManager) ApplyCommitmentState(
 		deltaMemoryBytes -= memoryQuantity.Value()
 	}
 
-	log.Info("applying commitment state",
-		"commitmentUUID", desiredState.CommitmentUUID,
-		"desiredMemoryBytes", desiredState.TotalMemoryBytes,
-		"deltaMemoryBytes", deltaMemoryBytes,
-		"existingSlots", len(existing),
-	)
+	// Log only if there's actual work to do (delta != 0)
+	hasChanges := deltaMemoryBytes != 0
 
 	nextSlotIndex := GetNextSlotIndex(existing)
 
 	// Phase 3 (DELETE): Delete inconsistent reservations (wrong flavor group/project)
 	// They will be recreated with correct metadata in subsequent phases.
 	var validReservations []v1alpha1.Reservation
+	var repairedCount int
 	for _, res := range existing {
 		if res.Spec.CommittedResourceReservation.ResourceGroup != desiredState.FlavorGroupName ||
 			res.Spec.CommittedResourceReservation.ProjectID != desiredState.ProjectID {
@@ -104,6 +101,7 @@ func (m *ReservationManager) ApplyCommitmentState(
 				"actualFlavorGroup", res.Spec.CommittedResourceReservation.ResourceGroup,
 				"expectedProjectID", desiredState.ProjectID,
 				"actualProjectID", res.Spec.CommittedResourceReservation.ProjectID)
+			repairedCount++
 			removedReservations = append(removedReservations, res)
 			memValue := res.Spec.Resources[hv1.ResourceMemory]
 			deltaMemoryBytes += memValue.Value()
@@ -145,19 +143,13 @@ func (m *ReservationManager) ApplyCommitmentState(
 		memValue := reservationToDelete.Spec.Resources[hv1.ResourceMemory]
 		deltaMemoryBytes += memValue.Value()
 
-		log.Info("deleting reservation (capacity decrease)",
-			"commitmentUUID", desiredState.CommitmentUUID,
-			"deltaMemoryBytes", deltaMemoryBytes,
-			"name", reservationToDelete.Name,
-			"numAllocations", len(reservationToDelete.Spec.CommittedResourceReservation.Allocations),
-			"memoryBytes", memValue.Value())
-
 		if err := m.Delete(ctx, reservationToDelete); err != nil {
 			return touchedReservations, removedReservations, fmt.Errorf("failed to delete reservation %s: %w", reservationToDelete.Name, err)
 		}
 	}
 
 	// Phase 5 (CREATE): Create new reservations (capacity increased)
+	var createdCount int
 	for deltaMemoryBytes > 0 {
 		// Need to create new reservation slots, always prefer largest flavor within the group
 		// TODO more sophisticated flavor selection, especially with flavors of different cpu/memory ratio
@@ -165,13 +157,7 @@ func (m *ReservationManager) ApplyCommitmentState(
 		touchedReservations = append(touchedReservations, *reservation)
 		memValue := reservation.Spec.Resources[hv1.ResourceMemory]
 		deltaMemoryBytes -= memValue.Value()
-
-		log.Info("creating reservation",
-			"commitmentUUID", desiredState.CommitmentUUID,
-			"deltaMemoryBytes", deltaMemoryBytes,
-			"flavorName", reservation.Spec.CommittedResourceReservation.ResourceName,
-			"name", reservation.Name,
-			"memoryBytes", memValue.Value())
+		createdCount++
 
 		if err := m.Create(ctx, reservation); err != nil {
 			if apierrors.IsAlreadyExists(err) {
@@ -198,11 +184,15 @@ func (m *ReservationManager) ApplyCommitmentState(
 		}
 	}
 
-	log.Info("completed commitment state sync",
-		"commitmentUUID", desiredState.CommitmentUUID,
-		"totalReservations", len(existing),
-		"created", len(touchedReservations)-len(existing),
-		"deleted", len(removedReservations))
+	// Only log if there were actual changes
+	if hasChanges || createdCount > 0 || len(removedReservations) > 0 || repairedCount > 0 {
+		log.Info("commitment state sync completed",
+			"commitmentUUID", desiredState.CommitmentUUID,
+			"created", createdCount,
+			"deleted", len(removedReservations),
+			"repaired", repairedCount,
+			"total", len(existing)+createdCount)
+	}
 
 	return touchedReservations, removedReservations, nil
 }

From 4021af51b519752d39984ef2d9f36f635edf520a Mon Sep 17 00:00:00 2001
From: mblos <marcel.gute@sap.com>
Date: Tue, 24 Mar 2026 14:25:25 +0100
Subject: [PATCH 4/6] initialize cr prom metrics

---
 .../api_change_commitments_monitor.go         | 19 ++++++++++++++-
 .../api_change_commitments_monitor_test.go    | 24 ++++++++++++-------
 .../api_report_capacity_monitor.go            | 14 ++++++++++-
 .../commitments/api_report_usage_monitor.go   | 14 ++++++++++-
 4 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/internal/scheduling/reservations/commitments/api_change_commitments_monitor.go b/internal/scheduling/reservations/commitments/api_change_commitments_monitor.go
index 30a09d49e..d8522008b 100644
--- a/internal/scheduling/reservations/commitments/api_change_commitments_monitor.go
+++ b/internal/scheduling/reservations/commitments/api_change_commitments_monitor.go
@@ -16,8 +16,10 @@ type ChangeCommitmentsAPIMonitor struct {
 }
 
 // NewChangeCommitmentsAPIMonitor creates a new monitor with Prometheus metrics.
+// Metrics are pre-initialized with zero values for common HTTP status codes
+// to ensure they appear in Prometheus before the first request.
 func NewChangeCommitmentsAPIMonitor() ChangeCommitmentsAPIMonitor {
-	return ChangeCommitmentsAPIMonitor{
+	m := ChangeCommitmentsAPIMonitor{
 		requestCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
 			Name: "cortex_committed_resource_change_api_requests_total",
 			Help: "Total number of committed resource change API requests by HTTP status code",
@@ -35,6 +37,21 @@ func NewChangeCommitmentsAPIMonitor() ChangeCommitmentsAPIMonitor {
 			Help: "Total number of commitment change requests that timed out while waiting for reservations to become ready",
 		}),
 	}
+
+	// Pre-initialize metrics with zero values for common HTTP status codes.
+	// This ensures metrics exist in Prometheus before the first request,
+	// preventing "metric missing" warnings in alerting rules.
+	for _, statusCode := range []string{"200", "400", "409", "500", "503"} {
+		m.requestCounter.WithLabelValues(statusCode)
+		m.requestDuration.WithLabelValues(statusCode)
+	}
+
+	// Pre-initialize commitment change result labels
+	for _, result := range []string{"accepted", "rejected"} {
+		m.commitmentChanges.WithLabelValues(result)
+	}
+
+	return m
 }
 
 // Describe implements prometheus.Collector.
diff --git a/internal/scheduling/reservations/commitments/api_change_commitments_monitor_test.go b/internal/scheduling/reservations/commitments/api_change_commitments_monitor_test.go
index 322d01aae..255f05bd8 100644
--- a/internal/scheduling/reservations/commitments/api_change_commitments_monitor_test.go
+++ b/internal/scheduling/reservations/commitments/api_change_commitments_monitor_test.go
@@ -102,11 +102,13 @@ func TestChangeCommitmentsAPIMonitor_MetricLabels(t *testing.T) {
 	// Verify request counter has correct labels
 	for _, family := range families {
 		if *family.Name == "cortex_committed_resource_change_api_requests_total" {
-			if len(family.Metric) != 3 {
-				t.Errorf("Expected 3 request counter metrics, got %d", len(family.Metric))
+			// At minimum we expect the 3 labels we added (200, 409, 503)
+			// Plus pre-initialized labels (400, 500) - so >= 5 total
+			if len(family.Metric) < 3 {
+				t.Errorf("Expected at least 3 request counter metrics, got %d", len(family.Metric))
 			}
 
-			// Check label names
+			// Check all metrics have the status_code label
 			for _, metric := range family.Metric {
 				labelNames := make(map[string]bool)
 				for _, label := range metric.Label {
@@ -120,11 +122,13 @@ func TestChangeCommitmentsAPIMonitor_MetricLabels(t *testing.T) {
 		}
 
 		if *family.Name == "cortex_committed_resource_change_api_request_duration_seconds" {
-			if len(family.Metric) != 1 {
-				t.Errorf("Expected 1 histogram metric, got %d", len(family.Metric))
+			// At minimum we expect the label we used (200)
+			// Plus pre-initialized labels - so >= 1 total
+			if len(family.Metric) < 1 {
+				t.Errorf("Expected at least 1 histogram metric, got %d", len(family.Metric))
 			}
 
-			// Check label names
+			// Check all metrics have the status_code label
 			for _, metric := range family.Metric {
 				labelNames := make(map[string]bool)
 				for _, label := range metric.Label {
@@ -138,11 +142,13 @@ func TestChangeCommitmentsAPIMonitor_MetricLabels(t *testing.T) {
 		}
 
 		if *family.Name == "cortex_committed_resource_change_api_commitment_changes_total" {
-			if len(family.Metric) != 2 {
-				t.Errorf("Expected 2 commitment changes metrics, got %d", len(family.Metric))
+			// At minimum we expect the 2 labels we added (success, rejected)
+			// Plus pre-initialized labels (accepted) - so >= 2 total
+			if len(family.Metric) < 2 {
+				t.Errorf("Expected at least 2 commitment changes metrics, got %d", len(family.Metric))
 			}
 
-			// Check label names
+			// Check all metrics have the result label
 			for _, metric := range family.Metric {
 				labelNames := make(map[string]bool)
 				for _, label := range metric.Label {
diff --git a/internal/scheduling/reservations/commitments/api_report_capacity_monitor.go b/internal/scheduling/reservations/commitments/api_report_capacity_monitor.go
index d484b6f27..d78af6cc7 100644
--- a/internal/scheduling/reservations/commitments/api_report_capacity_monitor.go
+++ b/internal/scheduling/reservations/commitments/api_report_capacity_monitor.go
@@ -14,8 +14,10 @@ type ReportCapacityAPIMonitor struct {
 }
 
 // NewReportCapacityAPIMonitor creates a new monitor with Prometheus metrics.
+// Metrics are pre-initialized with zero values for common HTTP status codes
+// to ensure they appear in Prometheus before the first request.
 func NewReportCapacityAPIMonitor() ReportCapacityAPIMonitor {
-	return ReportCapacityAPIMonitor{
+	m := ReportCapacityAPIMonitor{
 		requestCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
 			Name: "cortex_committed_resource_capacity_api_requests_total",
 			Help: "Total number of committed resource capacity API requests by HTTP status code",
@@ -26,6 +28,16 @@ func NewReportCapacityAPIMonitor() ReportCapacityAPIMonitor {
 			Buckets: []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10},
 		}, []string{"status_code"}),
 	}
+
+	// Pre-initialize metrics with zero values for common HTTP status codes.
+	// This ensures metrics exist in Prometheus before the first request,
+	// preventing "metric missing" warnings in alerting rules.
+	for _, statusCode := range []string{"200", "500", "503"} {
+		m.requestCounter.WithLabelValues(statusCode)
+		m.requestDuration.WithLabelValues(statusCode)
+	}
+
+	return m
 }
 
 // Describe implements prometheus.Collector.
diff --git a/internal/scheduling/reservations/commitments/api_report_usage_monitor.go b/internal/scheduling/reservations/commitments/api_report_usage_monitor.go
index d3fc68018..bbcaaaf86 100644
--- a/internal/scheduling/reservations/commitments/api_report_usage_monitor.go
+++ b/internal/scheduling/reservations/commitments/api_report_usage_monitor.go
@@ -14,8 +14,10 @@ type ReportUsageAPIMonitor struct {
 }
 
 // NewReportUsageAPIMonitor creates a new monitor with Prometheus metrics.
+// Metrics are pre-initialized with zero values for common HTTP status codes
+// to ensure they appear in Prometheus before the first request.
 func NewReportUsageAPIMonitor() ReportUsageAPIMonitor {
-	return ReportUsageAPIMonitor{
+	m := ReportUsageAPIMonitor{
 		requestCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
 			Name: "cortex_committed_resource_usage_api_requests_total",
 			Help: "Total number of committed resource usage API requests by HTTP status code",
@@ -26,6 +28,16 @@ func NewReportUsageAPIMonitor() ReportUsageAPIMonitor {
 			Buckets: []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10},
 		}, []string{"status_code"}),
 	}
+
+	// Pre-initialize metrics with zero values for common HTTP status codes.
+	// This ensures metrics exist in Prometheus before the first request,
+	// preventing "metric missing" warnings in alerting rules.
+	for _, statusCode := range []string{"200", "400", "404", "500", "503"} {
+		m.requestCounter.WithLabelValues(statusCode)
+		m.requestDuration.WithLabelValues(statusCode)
+	}
+
+	return m
 }
 
 // Describe implements prometheus.Collector.

From 8167f78d7e21014b1f14156dd2aef000eda2b9b7 Mon Sep 17 00:00:00 2001
From: mblos <marcel.gute@sap.com>
Date: Tue, 24 Mar 2026 14:38:19 +0100
Subject: [PATCH 5/6] cr off switch for capacity and usage reporting

---
 .../reservations/commitments/api_report_capacity.go  |  8 ++++++++
 .../reservations/commitments/api_report_usage.go     |  8 ++++++++
 .../scheduling/reservations/commitments/config.go    | 12 ++++++++++++
 3 files changed, 28 insertions(+)

diff --git a/internal/scheduling/reservations/commitments/api_report_capacity.go b/internal/scheduling/reservations/commitments/api_report_capacity.go
index 2f7618ced..836ae017f 100644
--- a/internal/scheduling/reservations/commitments/api_report_capacity.go
+++ b/internal/scheduling/reservations/commitments/api_report_capacity.go
@@ -20,6 +20,14 @@ func (api *HTTPAPI) HandleReportCapacity(w http.ResponseWriter, r *http.Request)
 	startTime := time.Now()
 	statusCode := http.StatusOK
 
+	// Check if API is enabled
+	if !api.config.EnableReportCapacityAPI {
+		statusCode = http.StatusServiceUnavailable
+		http.Error(w, "report-capacity API is disabled", statusCode)
+		api.recordCapacityMetrics(statusCode, startTime)
+		return
+	}
+
 	ctx := WithNewGlobalRequestID(r.Context())
 	logger := LoggerFromContext(ctx).WithValues("component", "api", "endpoint", "/v1/commitments/report-capacity")
 
diff --git a/internal/scheduling/reservations/commitments/api_report_usage.go b/internal/scheduling/reservations/commitments/api_report_usage.go
index 86b514d87..a533d4e40 100644
--- a/internal/scheduling/reservations/commitments/api_report_usage.go
+++ b/internal/scheduling/reservations/commitments/api_report_usage.go
@@ -24,6 +24,14 @@ func (api *HTTPAPI) HandleReportUsage(w http.ResponseWriter, r *http.Request) {
 	startTime := time.Now()
 	statusCode := http.StatusOK
 
+	// Check if API is enabled
+	if !api.config.EnableReportUsageAPI {
+		statusCode = http.StatusServiceUnavailable
+		http.Error(w, "report-usage API is disabled", statusCode)
+		api.recordUsageMetrics(statusCode, startTime)
+		return
+	}
+
 	requestID := r.Header.Get("X-Request-ID")
 	if requestID == "" {
 		requestID = fmt.Sprintf("req-%d", time.Now().UnixNano())
diff --git a/internal/scheduling/reservations/commitments/config.go b/internal/scheduling/reservations/commitments/config.go
index 0004cf19b..98a5d59ae 100644
--- a/internal/scheduling/reservations/commitments/config.go
+++ b/internal/scheduling/reservations/commitments/config.go
@@ -47,6 +47,16 @@ type Config struct {
 	// When false, the endpoint will return HTTP 503 Service Unavailable.
 	// The info endpoint remains available for health checks.
 	EnableChangeCommitmentsAPI bool `json:"committedResourceEnableChangeCommitmentsAPI"`
+
+	// EnableReportUsageAPI controls whether the report-usage API endpoint is active.
+	// When false, the endpoint will return HTTP 503 Service Unavailable.
+	// This can be used as an emergency switch if the usage reporting is causing issues.
+	EnableReportUsageAPI bool `json:"committedResourceEnableReportUsageAPI"`
+
+	// EnableReportCapacityAPI controls whether the report-capacity API endpoint is active.
+	// When false, the endpoint will return HTTP 503 Service Unavailable.
+	// This can be used as an emergency switch if the capacity reporting is causing issues.
+	EnableReportCapacityAPI bool `json:"committedResourceEnableReportCapacityAPI"`
 }
 
 func DefaultConfig() Config {
@@ -58,5 +68,7 @@ func DefaultConfig() Config {
 		ChangeAPIWatchReservationsTimeout:      10 * time.Second,
 		ChangeAPIWatchReservationsPollInterval: 500 * time.Millisecond,
 		EnableChangeCommitmentsAPI:             true,
+		EnableReportUsageAPI:                   true,
+		EnableReportCapacityAPI:                true,
 	}
 }

From 716e622b3bebb2a4770ac099ff6495d3b31d6981 Mon Sep 17 00:00:00 2001
From: mblos <marcel.gute@sap.com>
Date: Tue, 24 Mar 2026 14:49:10 +0100
Subject: [PATCH 6/6] header with logging id

---
 .../commitments/api_change_commitments.go       | 12 +++++++-----
 .../reservations/commitments/api_info.go        | 11 ++++++++++-
 .../commitments/api_report_capacity.go          | 11 ++++++++++-
 .../commitments/api_report_usage.go             | 17 +++++++++++------
 4 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/internal/scheduling/reservations/commitments/api_change_commitments.go b/internal/scheduling/reservations/commitments/api_change_commitments.go
index 6da3ca024..953319dbe 100644
--- a/internal/scheduling/reservations/commitments/api_change_commitments.go
+++ b/internal/scheduling/reservations/commitments/api_change_commitments.go
@@ -49,6 +49,13 @@ func (api *HTTPAPI) HandleChangeCommitments(w http.ResponseWriter, r *http.Reque
 	req := liquid.CommitmentChangeRequest{}
 	statusCode := http.StatusOK
 
+	// Extract or generate request ID for tracing - always set in response header
+	requestID := r.Header.Get("X-Request-ID")
+	if requestID == "" {
+		requestID = uuid.New().String()
+	}
+	w.Header().Set("X-Request-ID", requestID)
+
 	// Check if API is enabled
 	if !api.config.EnableChangeCommitmentsAPI {
 		statusCode = http.StatusServiceUnavailable
@@ -61,11 +68,6 @@ func (api *HTTPAPI) HandleChangeCommitments(w http.ResponseWriter, r *http.Reque
 	api.changeMutex.Lock()
 	defer api.changeMutex.Unlock()
 
-	// Extract or generate request ID for tracing
-	requestID := r.Header.Get("X-Request-ID")
-	if requestID == "" {
-		requestID = uuid.New().String()
-	}
 	ctx := reservations.WithGlobalRequestID(context.Background(), "committed-resource-"+requestID)
 	logger := LoggerFromContext(ctx).WithValues("component", "api", "endpoint", "/v1/change-commitments")
 
diff --git a/internal/scheduling/reservations/commitments/api_info.go b/internal/scheduling/reservations/commitments/api_info.go
index 71a84feb4..c189c859a 100644
--- a/internal/scheduling/reservations/commitments/api_info.go
+++ b/internal/scheduling/reservations/commitments/api_info.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
 	"github.com/go-logr/logr"
+	"github.com/google/uuid"
 	liquid "github.com/sapcc/go-api-declarations/liquid"
 )
 
@@ -19,7 +20,15 @@ import (
 // See: https://github.com/sapcc/go-api-declarations/blob/main/liquid/commitment.go
 // See: https://pkg.go.dev/github.com/sapcc/go-api-declarations/liquid
 func (api *HTTPAPI) HandleInfo(w http.ResponseWriter, r *http.Request) {
-	ctx := WithNewGlobalRequestID(r.Context())
+	// Extract or generate request ID for tracing
+	requestID := r.Header.Get("X-Request-ID")
+	if requestID == "" {
+		requestID = uuid.New().String()
+	}
+	// Set request ID in response header for client correlation
+	w.Header().Set("X-Request-ID", requestID)
+
+	ctx := reservations.WithGlobalRequestID(r.Context(), "committed-resource-"+requestID)
 	logger := LoggerFromContext(ctx).WithValues("component", "api", "endpoint", "/v1/info")
 
 	// Only accept GET method
diff --git a/internal/scheduling/reservations/commitments/api_report_capacity.go b/internal/scheduling/reservations/commitments/api_report_capacity.go
index 836ae017f..19b7fb24c 100644
--- a/internal/scheduling/reservations/commitments/api_report_capacity.go
+++ b/internal/scheduling/reservations/commitments/api_report_capacity.go
@@ -9,6 +9,8 @@ import (
 	"strconv"
 	"time"
 
+	"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
+	"github.com/google/uuid"
 	"github.com/sapcc/go-api-declarations/liquid"
 )
 
@@ -20,6 +22,13 @@ func (api *HTTPAPI) HandleReportCapacity(w http.ResponseWriter, r *http.Request)
 	startTime := time.Now()
 	statusCode := http.StatusOK
 
+	// Extract or generate request ID for tracing - always set in response header
+	requestID := r.Header.Get("X-Request-ID")
+	if requestID == "" {
+		requestID = uuid.New().String()
+	}
+	w.Header().Set("X-Request-ID", requestID)
+
 	// Check if API is enabled
 	if !api.config.EnableReportCapacityAPI {
 		statusCode = http.StatusServiceUnavailable
@@ -28,7 +37,7 @@ func (api *HTTPAPI) HandleReportCapacity(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
-	ctx := WithNewGlobalRequestID(r.Context())
+	ctx := reservations.WithGlobalRequestID(r.Context(), "committed-resource-"+requestID)
 	logger := LoggerFromContext(ctx).WithValues("component", "api", "endpoint", "/v1/commitments/report-capacity")
 
 	// Only accept POST method
diff --git a/internal/scheduling/reservations/commitments/api_report_usage.go b/internal/scheduling/reservations/commitments/api_report_usage.go
index a533d4e40..558758bab 100644
--- a/internal/scheduling/reservations/commitments/api_report_usage.go
+++ b/internal/scheduling/reservations/commitments/api_report_usage.go
@@ -11,6 +11,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/google/uuid"
 	"github.com/sapcc/go-api-declarations/liquid"
 )
 
@@ -24,20 +25,24 @@ func (api *HTTPAPI) HandleReportUsage(w http.ResponseWriter, r *http.Request) {
 	startTime := time.Now()
 	statusCode := http.StatusOK
 
+	// Extract or generate request ID for tracing - always set in response header
+	requestID := r.Header.Get("X-Request-ID")
+	if requestID == "" {
+		requestID = uuid.New().String()
+	}
+	w.Header().Set("X-Request-ID", requestID)
+
+	log := baseLog.WithValues("requestID", requestID, "endpoint", "report-usage")
+
 	// Check if API is enabled
 	if !api.config.EnableReportUsageAPI {
 		statusCode = http.StatusServiceUnavailable
+		log.Info("report-usage API is disabled, rejecting request")
 		http.Error(w, "report-usage API is disabled", statusCode)
 		api.recordUsageMetrics(statusCode, startTime)
 		return
 	}
 
-	requestID := r.Header.Get("X-Request-ID")
-	if requestID == "" {
-		requestID = fmt.Sprintf("req-%d", time.Now().UnixNano())
-	}
-	log := baseLog.WithValues("requestID", requestID, "endpoint", "report-usage")
-
 	if r.Method != http.MethodPost {
 		statusCode = http.StatusMethodNotAllowed
 		http.Error(w, "Method not allowed", statusCode)