From 4c889b502c3f3ee51586e9b766b12ea6099e2d64 Mon Sep 17 00:00:00 2001
From: Harshal Patil <12152047+harche@users.noreply.github.com>
Date: Fri, 19 Jun 2026 10:53:02 -0400
Subject: [PATCH 1/2] Drop estimatedImpact; align verification schema with CRD
 (#162)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

estimatedImpact on ProposalResult was +required in the AnalysisResult CRD
but only optional in the LLM output schema, so analyses where the agent
omitted it were rejected by CRD validation at status-patch time —
persisting 0 options, marking the proposal Failed, and orphaning the
analysis sandbox pod.

- Remove estimatedImpact from the API type, the LLM output schema, the
  generated CRD, and the mock agent.
- Tighten the verification branch of the LLM schema to match the CRD
  (steps[] require name+type; verification requires description), fixing
  the same latent drift class.
- Add a guard test asserting every CRD-required field is also required
  in the corresponding LLM output schema.
---
 api/v1alpha1/proposal_analysis_types.go       |   8 -
 .../agentic.openshift.io_analysisresults.yaml |  10 --
 controller/proposal/schema_crd_drift_test.go  | 143 ++++++++++++++++++
 controller/proposal/schemas.go                |   7 +-
 test/agent/main.go                            |   3 +-
 5 files changed, 148 insertions(+), 23 deletions(-)
 create mode 100644 controller/proposal/schema_crd_drift_test.go

diff --git a/api/v1alpha1/proposal_analysis_types.go b/api/v1alpha1/proposal_analysis_types.go
index 61e63bc5..0814eb44 100644
--- a/api/v1alpha1/proposal_analysis_types.go
+++ b/api/v1alpha1/proposal_analysis_types.go
@@ -137,14 +137,6 @@ type ProposalResult struct {
 	// Must be one of: Reversible, Irreversible, Partial.
 	// +optional
 	Reversible Reversibility `json:"reversible,omitempty"`
-	// estimatedImpact is a Markdown-formatted description of the expected
-	// impact of the remediation on the system
-	// (e.g., "Brief pod restart, ~30s downtime").
-	// Maximum 1024 characters.
-	// +required
-	// +kubebuilder:validation:MinLength=1
-	// +kubebuilder:validation:MaxLength=1024
-	EstimatedImpact string `json:"estimatedImpact,omitempty"`
 	// rollbackPlan describes how to undo the remediation if execution fails
 	// or causes unexpected issues. Only the execution step mutates cluster
 	// state, so rollback lives here alongside the actions it would undo.
diff --git a/config/crd/bases/agentic.openshift.io_analysisresults.yaml b/config/crd/bases/agentic.openshift.io_analysisresults.yaml
index 62922c52..85917640 100644
--- a/config/crd/bases/agentic.openshift.io_analysisresults.yaml
+++ b/config/crd/bases/agentic.openshift.io_analysisresults.yaml
@@ -243,15 +243,6 @@ spec:
                           maxLength: 8192
                           minLength: 1
                           type: string
-                        estimatedImpact:
-                          description: |-
-                            estimatedImpact is a Markdown-formatted description of the expected
-                            impact of the remediation on the system
-                            (e.g., "Brief pod restart, ~30s downtime").
-                            Maximum 1024 characters.
-                          maxLength: 1024
-                          minLength: 1
-                          type: string
                         reversible:
                           description: |-
                             reversible indicates whether the remediation can be rolled back
@@ -298,7 +289,6 @@ spec:
                       required:
                       - actions
                       - description
-                      - estimatedImpact
                       - risk
                       type: object
                     rbac:
diff --git a/controller/proposal/schema_crd_drift_test.go b/controller/proposal/schema_crd_drift_test.go
new file mode 100644
index 00000000..8ed1a0cc
--- /dev/null
+++ b/controller/proposal/schema_crd_drift_test.go
@@ -0,0 +1,143 @@
+package proposal
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+
+	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
+	"sigs.k8s.io/yaml"
+)
+
+// TestAnalysisSchemaCoversCRDRequiredFields guards against the schema/CRD
+// contract drift described in lightspeed-agentic-operator#162: the JSON
+// schema sent to the LLM for structured output must mark as required every
+// field the AnalysisResult CRD requires. When the two disagree, a valid LLM
+// response can be rejected by CRD validation at status-patch time, which
+// fails the analysis and orphans the sandbox pod.
+//
+// The generated CRD (which encodes the +required markers) is the source of
+// truth. For every object node the two schemas share, the CRD's required set
+// must be a subset of the LLM schema's required set. This would have caught
+// both the original estimatedImpact incident and the latent verification gap.
+func TestAnalysisSchemaCoversCRDRequiredFields(t *testing.T) {
+	crdPath := filepath.Join("..", "..", "config", "crd", "bases", "agentic.openshift.io_analysisresults.yaml")
+	raw, err := os.ReadFile(crdPath)
+	if err != nil {
+		t.Fatalf("read CRD: %v", err)
+	}
+
+	var crd apiextensionsv1.CustomResourceDefinition
+	if err := yaml.Unmarshal(raw, &crd); err != nil {
+		t.Fatalf("unmarshal CRD: %v", err)
+	}
+	if len(crd.Spec.Versions) == 0 {
+		t.Fatal("CRD declares no versions")
+	}
+
+	// The LLM analysis schema describes a single option; its per-option shape
+	// lives at properties.options.items.
+	var llm map[string]any
+	if err := json.Unmarshal(AnalysisOutputSchema, &llm); err != nil {
+		t.Fatalf("unmarshal AnalysisOutputSchema: %v", err)
+	}
+	llmOption, ok := digObject(llm, "properties", "options", "items")
+	if !ok {
+		t.Fatal("AnalysisOutputSchema is missing properties.options.items")
+	}
+
+	for _, v := range crd.Spec.Versions {
+		if v.Schema == nil || v.Schema.OpenAPIV3Schema == nil {
+			continue
+		}
+		// The matching per-option schema in the CRD is at status.options.items.
+		status, ok := v.Schema.OpenAPIV3Schema.Properties["status"]
+		if !ok {
+			continue
+		}
+		options, ok := status.Properties["options"]
+		if !ok || options.Items == nil || options.Items.Schema == nil {
+			t.Fatalf("CRD version %s is missing status.options.items schema", v.Name)
+		}
+		assertRequiredCoverage(t, "options[]", options.Items.Schema, llmOption)
+	}
+}
+
+// assertRequiredCoverage walks a CRD schema node and the corresponding LLM
+// schema node in parallel, asserting that every field required by the CRD is
+// also required by the LLM schema (for fields the LLM schema actually models).
+func assertRequiredCoverage(t *testing.T, path string, crd *apiextensionsv1.JSONSchemaProps, llm map[string]any) {
+	t.Helper()
+	if crd == nil || llm == nil {
+		return
+	}
+
+	// Arrays: descend into the element schema on both sides.
+	if crd.Type == "array" {
+		if crd.Items == nil || crd.Items.Schema == nil {
+			return
+		}
+		llmItems, ok := asJSONObject(llm["items"])
+		if !ok {
+			return
+		}
+		assertRequiredCoverage(t, path+"[]", crd.Items.Schema, llmItems)
+		return
+	}
+
+	llmProps, _ := asJSONObject(llm["properties"])
+	llmRequired := requiredSet(llm["required"])
+	for _, req := range crd.Required {
+		// Only enforce coverage for fields the LLM schema actually models;
+		// the LLM may legitimately omit a field the CRD allows.
+		if _, modeled := llmProps[req]; !modeled {
+			continue
+		}
+		if !llmRequired[req] {
+			t.Errorf("schema/CRD drift at %s: CRD requires %q but the LLM output schema does not mark it required", path, req)
+		}
+	}
+
+	// Recurse into properties present in both schemas.
+	for name := range crd.Properties {
+		child := crd.Properties[name]
+		llmChild, ok := asJSONObject(llmProps[name])
+		if !ok {
+			continue
+		}
+		assertRequiredCoverage(t, path+"."+name, &child, llmChild)
+	}
+}
+
+// digObject walks nested object properties of a decoded JSON schema.
+func digObject(m map[string]any, keys ...string) (map[string]any, bool) {
+	cur := m
+	for _, k := range keys {
+		next, ok := asJSONObject(cur[k])
+		if !ok {
+			return nil, false
+		}
+		cur = next
+	}
+	return cur, true
+}
+
+func asJSONObject(v any) (map[string]any, bool) {
+	m, ok := v.(map[string]any)
+	return m, ok
+}
+
+func requiredSet(v any) map[string]bool {
+	out := map[string]bool{}
+	arr, ok := v.([]any)
+	if !ok {
+		return out
+	}
+	for _, e := range arr {
+		if s, ok := e.(string); ok {
+			out[s] = true
+		}
+	}
+	return out
+}
diff --git a/controller/proposal/schemas.go b/controller/proposal/schemas.go
index b8d490de..715bd428 100644
--- a/controller/proposal/schemas.go
+++ b/controller/proposal/schemas.go
@@ -50,7 +50,6 @@ var AnalysisOutputSchema = json.RawMessage(`{
               },
               "risk": { "type": "string", "enum": ["Low", "Medium", "High", "Critical"], "description": "Risk assessment. Low: safe to apply. Medium: review recommended. High: careful review required. Critical: manual approval strongly recommended" },
               "reversible": { "type": "string", "enum": ["Reversible", "Irreversible", "Partial"], "description": "Whether this remediation can be rolled back. Reversible: fully undoable. Irreversible: cannot undo. Partial: some actions undoable" },
-              "estimatedImpact": { "type": "string", "description": "Expected impact on the system (e.g., 'Brief pod restart, ~30s downtime')" },
               "rollbackPlan": {
                 "type": "object",
                 "description": "How to undo the remediation if it fails or causes issues. Required when reversible is Reversible or Partial.",
@@ -77,10 +76,12 @@ var AnalysisOutputSchema = json.RawMessage(`{
                     "command": { "type": "string", "description": "Command or API call to run (e.g., 'oc get pod -n production -l app=web -o jsonpath={.status.phase}')" },
                     "expected": { "type": "string", "description": "Expected output or condition (e.g., 'Running', 'ready=true')" },
                     "type": { "type": "string", "description": "Check category (e.g., 'command', 'metric', 'condition')" }
-                  }
+                  },
+                  "required": ["name", "type"]
                 }
               }
-            }
+            },
+            "required": ["description"]
           },
           "rbac": {
             "type": "object",
diff --git a/test/agent/main.go b/test/agent/main.go
index 44a20694..ebf5bb78 100644
--- a/test/agent/main.go
+++ b/test/agent/main.go
@@ -255,8 +255,7 @@ func cannedResponse(phase, targetNS string) []byte {
           { "type": "patch", "description": "mock proposed action" }
         ],
         "risk": "Low",
-        "reversible": "Reversible",
-        "estimatedImpact": "Brief pod restart, ~30s downtime"
+        "reversible": "Reversible"
       },
       "verification": {
         "description": "mock verification plan",

From a192d1f33f94e88d1313760e3221633e702973a7 Mon Sep 17 00:00:00 2001
From: Harshal Patil <12152047+harche@users.noreply.github.com>
Date: Fri, 19 Jun 2026 11:22:58 -0400
Subject: [PATCH 2/2] test: flag CRD-required fields absent from the LLM schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review feedback: the drift guard silently skipped CRD-required
fields that were not modeled at all in the LLM output schema, so removing
such a field could pass undetected. A required field that the agent is
never asked for is rejected at status-patch time, which is exactly the
drift this guard exists to catch — so error in that case too, not only
when a modeled field lacks the required marker.
---
 controller/proposal/schema_crd_drift_test.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/controller/proposal/schema_crd_drift_test.go b/controller/proposal/schema_crd_drift_test.go
index 8ed1a0cc..c64d7922 100644
--- a/controller/proposal/schema_crd_drift_test.go
+++ b/controller/proposal/schema_crd_drift_test.go
@@ -89,9 +89,12 @@ func assertRequiredCoverage(t *testing.T, path string, crd *apiextensionsv1.JSON
 	llmProps, _ := asJSONObject(llm["properties"])
 	llmRequired := requiredSet(llm["required"])
 	for _, req := range crd.Required {
-		// Only enforce coverage for fields the LLM schema actually models;
-		// the LLM may legitimately omit a field the CRD allows.
+		// A field the CRD requires must be both modeled and required in the LLM
+		// output schema. If it is absent entirely the agent is never asked for
+		// it, so the status patch is rejected at runtime — the exact drift this
+		// guard exists to catch.
 		if _, modeled := llmProps[req]; !modeled {
+			t.Errorf("schema/CRD drift at %s: CRD requires %q but the LLM output schema does not model the field", path, req)
 			continue
 		}
 		if !llmRequired[req] {