From 1155906f0cf7680251f1d1021659b7719d05c66d Mon Sep 17 00:00:00 2001 From: Vimal Kumar Date: Thu, 25 Jun 2026 11:37:29 +0530 Subject: [PATCH] OLS-3274: inject audit env vars into sandbox pods from AgenticOLSConfig Read the AgenticOLSConfig CR's audit config and set LIGHTSPEED_AUDIT_ENABLED and OTEL_EXPORTER_OTLP_ENDPOINT on sandbox containers in both bare-pod and sandbox-template paths. Include audit config in the template hash so config changes trigger new templates. Co-Authored-By: Claude Opus 4.6 Signed-off-by: Vimal Kumar --- controller/proposal/bare_pod_manager.go | 4 + controller/proposal/bare_pod_manager_test.go | 96 +++++++++++++++++++ controller/proposal/helpers.go | 11 +++ controller/proposal/podspec_builder.go | 16 ++++ controller/proposal/sandbox_templates.go | 29 +++++- controller/proposal/sandbox_templates_test.go | 31 +++++- controller/proposal/sandbox_test.go | 2 + 7 files changed, 183 insertions(+), 6 deletions(-) diff --git a/controller/proposal/bare_pod_manager.go b/controller/proposal/bare_pod_manager.go index 3a7cf303..a09a38d9 100644 --- a/controller/proposal/bare_pod_manager.go +++ b/controller/proposal/bare_pod_manager.go @@ -67,6 +67,10 @@ func (m *BarePodManager) Claim(ctx context.Context, proposalName, step, _ string return "", fmt.Errorf("%s: %w", ErrBuildPodSpec, err) } + if err := appendAuditEnvVars(ctx, m.Client, &podSpec.Containers[0]); err != nil { + return "", fmt.Errorf("append audit env vars: %w", err) + } + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: podName, diff --git a/controller/proposal/bare_pod_manager_test.go b/controller/proposal/bare_pod_manager_test.go index 5c931001..8d8ed50d 100644 --- a/controller/proposal/bare_pod_manager_test.go +++ b/controller/proposal/bare_pod_manager_test.go @@ -17,6 +17,7 @@ import ( func newBarePodClient() *fake.ClientBuilder { s := runtime.NewScheme() utilruntime.Must(corev1.AddToScheme(s)) + utilruntime.Must(agenticv1alpha1.AddToScheme(s)) return fake.NewClientBuilder().WithScheme(s) } @@ -131,6 +132,101 @@ func TestBarePodManager_Release_NotFound(t *testing.T) { } } +func TestBarePodManager_Claim_AuditEnabled_DefaultsTrue(t *testing.T) { + fc := newBarePodClient().Build() + builder := &PodSpecBuilder{Image: "quay.io/test/sandbox:latest"} + m := NewBarePodManager(fc, builder, "test-ns") + m.SetStep( + &agenticv1alpha1.Agent{Spec: agenticv1alpha1.AgentSpec{Model: "claude-opus-4-6"}}, + testLLMProvider(agenticv1alpha1.LLMProviderAnthropic), + nil, + defaultSandboxSA, + ) + + name, err := m.Claim(context.Background(), "my-proposal", "analysis", "") + if err != nil { + t.Fatalf("Claim: %v", err) + } + + var pod corev1.Pod + if err := fc.Get(context.Background(), types.NamespacedName{Name: name, Namespace: "test-ns"}, &pod); err != nil { + t.Fatalf("pod not created: %v", err) + } + env := envToMap(pod.Spec.Containers[0].Env) + if env["LIGHTSPEED_AUDIT_ENABLED"] != "true" { + t.Errorf("LIGHTSPEED_AUDIT_ENABLED = %q, want true", env["LIGHTSPEED_AUDIT_ENABLED"]) + } + if _, ok := env["OTEL_EXPORTER_OTLP_ENDPOINT"]; ok { + t.Error("OTEL_EXPORTER_OTLP_ENDPOINT should not be set when no config CR exists") + } +} + +func TestBarePodManager_Claim_AuditWithOTELEndpoint(t *testing.T) { + config := &agenticv1alpha1.AgenticOLSConfig{} + config.Name = "cluster" + config.Spec.Audit = agenticv1alpha1.AuditConfig{ + Logging: agenticv1alpha1.AuditLoggingEnabled, + OTEL: agenticv1alpha1.AuditOTELConfig{Endpoint: "jaeger:4317"}, + } + fc := newBarePodClient().WithObjects(config).Build() + builder := &PodSpecBuilder{Image: "quay.io/test/sandbox:latest"} + m := NewBarePodManager(fc, builder, "test-ns") + m.SetStep( + &agenticv1alpha1.Agent{Spec: agenticv1alpha1.AgentSpec{Model: "claude-opus-4-6"}}, + testLLMProvider(agenticv1alpha1.LLMProviderAnthropic), + nil, + defaultSandboxSA, + ) + + name, err := m.Claim(context.Background(), "my-proposal", "analysis", "") + if err != nil { + t.Fatalf("Claim: %v", err) + } + + var pod corev1.Pod + if err := fc.Get(context.Background(), types.NamespacedName{Name: name, Namespace: "test-ns"}, &pod); err != nil { + t.Fatalf("pod not created: %v", err) + } + env := envToMap(pod.Spec.Containers[0].Env) + if env["LIGHTSPEED_AUDIT_ENABLED"] != "true" { + t.Errorf("LIGHTSPEED_AUDIT_ENABLED = %q, want true", env["LIGHTSPEED_AUDIT_ENABLED"]) + } + if env["OTEL_EXPORTER_OTLP_ENDPOINT"] != "jaeger:4317" { + t.Errorf("OTEL_EXPORTER_OTLP_ENDPOINT = %q, want jaeger:4317", env["OTEL_EXPORTER_OTLP_ENDPOINT"]) + } +} + +func TestBarePodManager_Claim_AuditDisabled(t *testing.T) { + config := &agenticv1alpha1.AgenticOLSConfig{} + config.Name = "cluster" + config.Spec.Audit = agenticv1alpha1.AuditConfig{ + Logging: agenticv1alpha1.AuditLoggingDisabled, + } + fc := newBarePodClient().WithObjects(config).Build() + builder := &PodSpecBuilder{Image: "quay.io/test/sandbox:latest"} + m := NewBarePodManager(fc, builder, "test-ns") + m.SetStep( + &agenticv1alpha1.Agent{Spec: agenticv1alpha1.AgentSpec{Model: "claude-opus-4-6"}}, + testLLMProvider(agenticv1alpha1.LLMProviderAnthropic), + nil, + defaultSandboxSA, + ) + + name, err := m.Claim(context.Background(), "my-proposal", "analysis", "") + if err != nil { + t.Fatalf("Claim: %v", err) + } + + var pod corev1.Pod + if err := fc.Get(context.Background(), types.NamespacedName{Name: name, Namespace: "test-ns"}, &pod); err != nil { + t.Fatalf("pod not created: %v", err) + } + env := envToMap(pod.Spec.Containers[0].Env) + if _, ok := env["LIGHTSPEED_AUDIT_ENABLED"]; ok { + t.Error("LIGHTSPEED_AUDIT_ENABLED should not be set when audit logging is disabled") + } +} + func TestBarePodManager_WaitReady_ImmediateReady(t *testing.T) { pod := &corev1.Pod{} pod.Name = "ls-analysis-my-proposal" diff --git a/controller/proposal/helpers.go b/controller/proposal/helpers.go index f278aa7c..384336ea 100644 --- a/controller/proposal/helpers.go +++ b/controller/proposal/helpers.go @@ -72,6 +72,17 @@ func isSuspended(ctx context.Context, c client.Client) (bool, error) { return config.Spec.Suspended, nil } +func readAuditConfig(ctx context.Context, c client.Client) (*agenticv1alpha1.AuditConfig, error) { + var config agenticv1alpha1.AgenticOLSConfig + if err := c.Get(ctx, client.ObjectKey{Name: "cluster"}, &config); err != nil { + if client.IgnoreNotFound(err) == nil { + return nil, nil + } + return nil, err + } + return &config.Spec.Audit, nil +} + // failStep marks a step as failed and creates a failure result CR. // The caller must have set the step condition to ConditionUnknown before // calling failStep so that conditionTime can extract the start time. diff --git a/controller/proposal/podspec_builder.go b/controller/proposal/podspec_builder.go index fedac898..f04405a6 100644 --- a/controller/proposal/podspec_builder.go +++ b/controller/proposal/podspec_builder.go @@ -1,6 +1,7 @@ package proposal import ( + "context" "encoding/json" "fmt" "path" @@ -9,6 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" agenticv1alpha1 "github.com/openshift/lightspeed-agentic-operator/api/v1alpha1" ) @@ -129,6 +131,20 @@ func (b *PodSpecBuilder) Build( }, nil } +func appendAuditEnvVars(ctx context.Context, c client.Client, container *corev1.Container) error { + audit, err := readAuditConfig(ctx, c) + if err != nil { + return fmt.Errorf("read audit config: %w", err) + } + if audit.LoggingEnabled() { + container.Env = append(container.Env, corev1.EnvVar{Name: "LIGHTSPEED_AUDIT_ENABLED", Value: "true"}) + } + if endpoint := audit.OTELEndpoint(); endpoint != "" { + container.Env = append(container.Env, corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: endpoint}) + } + return nil +} + func (b *PodSpecBuilder) addProviderSpecificEnv(container *corev1.Container, llm *agenticv1alpha1.LLMProvider) { switch llm.Spec.Type { case agenticv1alpha1.LLMProviderAnthropic: diff --git a/controller/proposal/sandbox_templates.go b/controller/proposal/sandbox_templates.go index fcb5d3ce..2906be56 100644 --- a/controller/proposal/sandbox_templates.go +++ b/controller/proposal/sandbox_templates.go @@ -95,6 +95,8 @@ type templateHashInput struct { Step string `json:"step"` BaseResourceVersion string `json:"baseRV"` ServiceAccount string `json:"serviceAccount"` + AuditLogging bool `json:"auditLogging"` + OTELEndpoint string `json:"otelEndpoint,omitempty"` } func computeTemplateHash( @@ -106,6 +108,7 @@ func computeTemplateHash( step string, baseResourceVersion string, serviceAccount string, + audit *agenticv1alpha1.AuditConfig, ) (string, error) { input := templateHashInput{ LLM: llm.Spec, @@ -116,6 +119,8 @@ func computeTemplateHash( Step: step, BaseResourceVersion: baseResourceVersion, ServiceAccount: serviceAccount, + AuditLogging: audit.LoggingEnabled(), + OTELEndpoint: audit.OTELEndpoint(), } data, err := json.Marshal(input) if err != nil { @@ -168,7 +173,11 @@ func EnsureAgentTemplate( requiredSecrets = tools.RequiredSecrets } - hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, step, base.GetResourceVersion(), serviceAccount) + audit, err := readAuditConfig(ctx, c) + if err != nil { + return "", fmt.Errorf("read audit config: %w", err) + } + hash, err := computeTemplateHash(llm, agent.Spec.Model, skills, mcpServers, requiredSecrets, step, base.GetResourceVersion(), serviceAccount, audit) if err != nil { return "", fmt.Errorf("%s: %w", ErrComputeTemplateHash, err) } @@ -222,6 +231,10 @@ func EnsureAgentTemplate( return "", fmt.Errorf("%s: %w", ErrPatchLLMCredentials, err) } + if err := patchAuditEnvVars(derived, audit); err != nil { + return "", fmt.Errorf("patch audit env vars: %w", err) + } + if len(mcpServers) > 0 { if err := patchMCPServers(derived, mcpServers); err != nil { return "", fmt.Errorf("%s: %w", ErrPatchMCPServers, err) @@ -735,6 +748,20 @@ type mcpHeaderEnvEntry struct { SecretName string `json:"secretName,omitempty"` } +func patchAuditEnvVars(tmpl *unstructured.Unstructured, audit *agenticv1alpha1.AuditConfig) error { + if audit.LoggingEnabled() { + if err := setEnvVar(tmpl, "LIGHTSPEED_AUDIT_ENABLED", "true"); err != nil { + return fmt.Errorf("set LIGHTSPEED_AUDIT_ENABLED: %w", err) + } + } + if endpoint := audit.OTELEndpoint(); endpoint != "" { + if err := setEnvVar(tmpl, "OTEL_EXPORTER_OTLP_ENDPOINT", endpoint); err != nil { + return fmt.Errorf("set OTEL_EXPORTER_OTLP_ENDPOINT: %w", err) + } + } + return nil +} + func patchMCPServers(tmpl *unstructured.Unstructured, servers []agenticv1alpha1.MCPServerConfig) error { entries := make([]mcpServerEnvEntry, 0, len(servers)) for _, s := range servers { diff --git a/controller/proposal/sandbox_templates_test.go b/controller/proposal/sandbox_templates_test.go index 4ffb3fbc..6dae02bb 100644 --- a/controller/proposal/sandbox_templates_test.go +++ b/controller/proposal/sandbox_templates_test.go @@ -77,7 +77,7 @@ func emptyTemplate() *unstructured.Unstructured { func mustHash(t *testing.T, llm *agenticv1alpha1.LLMProvider, model string, skills []agenticv1alpha1.SkillsSource, requiredSecrets []agenticv1alpha1.SecretRequirement, phase string) string { t.Helper() - h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, phase, "", "") + h, err := computeTemplateHash(llm, model, skills, nil, requiredSecrets, phase, "", "", nil) if err != nil { t.Fatalf("computeTemplateHash: %v", err) } @@ -627,11 +627,11 @@ func TestComputeTemplateHash_DifferentBaseResourceVersion(t *testing.T) { llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex) skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}} - h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "") + h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "", nil) if err != nil { t.Fatal(err) } - h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "2000", "") + h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "2000", "", nil) if err != nil { t.Fatal(err) } @@ -645,11 +645,11 @@ func TestComputeTemplateHash_SameBaseResourceVersion(t *testing.T) { llm := testLLMProvider(agenticv1alpha1.LLMProviderGoogleCloudVertex) skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}} - h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "") + h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "", nil) if err != nil { t.Fatal(err) } - h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "") + h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "", nil) if err != nil { t.Fatal(err) } @@ -659,6 +659,27 @@ func TestComputeTemplateHash_SameBaseResourceVersion(t *testing.T) { } } +func TestComputeTemplateHash_DifferentAuditConfig(t *testing.T) { + llm := testLLMProvider(agenticv1alpha1.LLMProviderAnthropic) + skills := []agenticv1alpha1.SkillsSource{{Image: "quay.io/test/skills:latest"}} + + h1, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "", nil) + if err != nil { + t.Fatal(err) + } + audit := &agenticv1alpha1.AuditConfig{ + OTEL: agenticv1alpha1.AuditOTELConfig{Endpoint: "jaeger:4317"}, + } + h2, err := computeTemplateHash(llm, "claude-opus-4-6", skills, nil, nil, "analysis", "1000", "", audit) + if err != nil { + t.Fatal(err) + } + + if h1 == h2 { + t.Error("different audit config should produce different hashes") + } +} + func TestPatchProbes(t *testing.T) { t.Run("sets readiness and liveness probes on first container", func(t *testing.T) { tmpl := emptyTemplate() diff --git a/controller/proposal/sandbox_test.go b/controller/proposal/sandbox_test.go index 8773f68c..833789e7 100644 --- a/controller/proposal/sandbox_test.go +++ b/controller/proposal/sandbox_test.go @@ -10,6 +10,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -18,6 +19,7 @@ import ( func newSandboxClient(objects ...client.Object) client.Client { s := runtime.NewScheme() + utilruntime.Must(agenticv1alpha1.AddToScheme(s)) mapper := apimeta.NewDefaultRESTMapper([]schema.GroupVersion{ {Group: "extensions.agents.x-k8s.io", Version: "v1alpha1"},