From aafae6a747faef68bec9c180d2d257cf2f4d4685 Mon Sep 17 00:00:00 2001 From: Swarup Ghosh Date: Fri, 27 Feb 2026 14:29:49 +0530 Subject: [PATCH] Add evals for plan_mustgather prompt in the openshift toolset Co-authored-by: Claude Opus 4.6 Signed-off-by: Swarup Ghosh --- evals/claude-code/eval.yaml | 13 +++++ .../plan-mustgather-audit-logs.yaml | 46 +++++++++++++++++ .../plan-mustgather-custom-images.yaml | 46 +++++++++++++++++ .../plan-mustgather-custom-namespace.yaml | 40 +++++++++++++++ .../plan-mustgather-default.yaml | 46 +++++++++++++++++ .../plan-mustgather-host-network.yaml | 46 +++++++++++++++++ .../plan-mustgather-node-selector.yaml | 46 +++++++++++++++++ .../plan-mustgather-timeout-since.yaml | 49 +++++++++++++++++++ 8 files changed, 332 insertions(+) create mode 100644 evals/tasks/openshift/plan-mustgather-audit-logs/plan-mustgather-audit-logs.yaml create mode 100644 evals/tasks/openshift/plan-mustgather-custom-images/plan-mustgather-custom-images.yaml create mode 100644 evals/tasks/openshift/plan-mustgather-custom-namespace/plan-mustgather-custom-namespace.yaml create mode 100644 evals/tasks/openshift/plan-mustgather-default/plan-mustgather-default.yaml create mode 100644 evals/tasks/openshift/plan-mustgather-host-network/plan-mustgather-host-network.yaml create mode 100644 evals/tasks/openshift/plan-mustgather-node-selector/plan-mustgather-node-selector.yaml create mode 100644 evals/tasks/openshift/plan-mustgather-timeout-since/plan-mustgather-timeout-since.yaml diff --git a/evals/claude-code/eval.yaml b/evals/claude-code/eval.yaml index 4a46b8131..7235fbe6a 100644 --- a/evals/claude-code/eval.yaml +++ b/evals/claude-code/eval.yaml @@ -72,3 +72,16 @@ config: toolPattern: ".*" minToolCalls: 1 maxToolCalls: 20 + # OpenShift tasks + - glob: ../tasks/openshift/*/*.yaml + labelSelector: + suite: openshift + assertions: + promptsUsed: + - server: kubernetes + prompt: plan_mustgather + toolsUsed: + - server: kubernetes + tool: resource_create_or_update + minToolCalls: 1 + maxToolCalls: 15 diff --git a/evals/tasks/openshift/plan-mustgather-audit-logs/plan-mustgather-audit-logs.yaml b/evals/tasks/openshift/plan-mustgather-audit-logs/plan-mustgather-audit-logs.yaml new file mode 100644 index 000000000..77e961346 --- /dev/null +++ b/evals/tasks/openshift/plan-mustgather-audit-logs/plan-mustgather-audit-logs.yaml @@ -0,0 +1,46 @@ +kind: Task +metadata: + labels: + suite: openshift + name: plan-mustgather-audit-logs + difficulty: easy +steps: + setup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Cleanup any existing must-gather resources from previous runs + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + verify: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Find must-gather pod and verify it uses gather_audit_logs command + POD_JSON=$(kubectl get pods -A -o json | jq -r '.items[] | select(.metadata.namespace | startswith("openshift-must-gather"))') + if [ -z "$POD_JSON" ]; then + echo "No must-gather pod found" + exit 1 + fi + NS=$(echo "$POD_JSON" | jq -r '.metadata.namespace' | head -1) + POD=$(echo "$POD_JSON" | jq -r '.metadata.name' | head -1) + # Verify pod uses gather_audit_logs command + kubectl get pod "$POD" -n "$NS" -o yaml | grep -q "gather_audit_logs" + cleanup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Delete all must-gather namespaces and clusterrolebindings + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + prompt: + inline: I need to collect audit logs from my OpenShift cluster for a security investigation. Please set up a must-gather that specifically gathers audit logs and apply it. + assertions: + promptsUsed: + - server: kubernetes + prompt: plan_mustgather + toolsUsed: + - server: kubernetes + tool: resource_create_or_update + minToolCalls: 1 + maxToolCalls: 15 diff --git a/evals/tasks/openshift/plan-mustgather-custom-images/plan-mustgather-custom-images.yaml b/evals/tasks/openshift/plan-mustgather-custom-images/plan-mustgather-custom-images.yaml new file mode 100644 index 000000000..ae02a5e30 --- /dev/null +++ b/evals/tasks/openshift/plan-mustgather-custom-images/plan-mustgather-custom-images.yaml @@ -0,0 +1,46 @@ +kind: Task +metadata: + labels: + suite: openshift + name: plan-mustgather-custom-images + difficulty: medium +steps: + setup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Cleanup any existing must-gather resources from previous runs + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + verify: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Find must-gather pod and verify it uses custom images + POD_JSON=$(kubectl get pods -A -o json | jq -r '.items[] | select(.metadata.namespace | startswith("openshift-must-gather"))') + if [ -z "$POD_JSON" ]; then + echo "No must-gather pod found" + exit 1 + fi + NS=$(echo "$POD_JSON" | jq -r '.metadata.namespace' | head -1) + POD=$(echo "$POD_JSON" | jq -r '.metadata.name' | head -1) + # Verify pod uses the logging operator image + kubectl get pod "$POD" -n "$NS" -o yaml | grep -q "cluster-logging-rhel9-operator" + cleanup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Delete all must-gather namespaces and clusterrolebindings + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + prompt: + inline: I'm debugging logging issues on my OpenShift 4.15 cluster. Set up and apply a must-gather using both the platform image registry.redhat.io/openshift4/ose-must-gather:v4.15 and the logging operator image registry.redhat.io/openshift-logging/cluster-logging-rhel9-operator:latest. + assertions: + promptsUsed: + - server: kubernetes + prompt: plan_mustgather + toolsUsed: + - server: kubernetes + tool: resource_create_or_update + minToolCalls: 1 + maxToolCalls: 15 diff --git a/evals/tasks/openshift/plan-mustgather-custom-namespace/plan-mustgather-custom-namespace.yaml b/evals/tasks/openshift/plan-mustgather-custom-namespace/plan-mustgather-custom-namespace.yaml new file mode 100644 index 000000000..dde535e2d --- /dev/null +++ b/evals/tasks/openshift/plan-mustgather-custom-namespace/plan-mustgather-custom-namespace.yaml @@ -0,0 +1,40 @@ +kind: Task +metadata: + labels: + suite: openshift + name: plan-mustgather-custom-namespace + difficulty: easy +steps: + setup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Cleanup any existing resources + kubectl delete ns my-debug-namespace --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep my-debug-namespace-must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + verify: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Verify resources exist in the custom namespace + kubectl get ns my-debug-namespace + kubectl get sa must-gather-collector -n my-debug-namespace + kubectl get pod -n my-debug-namespace -o name | grep -q must-gather + cleanup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Delete the custom namespace and clusterrolebinding + kubectl delete ns my-debug-namespace --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep my-debug-namespace-must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + prompt: + inline: I want to run a must-gather but need it in a specific namespace called "my-debug-namespace" due to our cluster policies. Can you set this up and apply it? + assertions: + promptsUsed: + - server: kubernetes + prompt: plan_mustgather + toolsUsed: + - server: kubernetes + tool: resource_create_or_update + minToolCalls: 1 + maxToolCalls: 15 diff --git a/evals/tasks/openshift/plan-mustgather-default/plan-mustgather-default.yaml b/evals/tasks/openshift/plan-mustgather-default/plan-mustgather-default.yaml new file mode 100644 index 000000000..06049679c --- /dev/null +++ b/evals/tasks/openshift/plan-mustgather-default/plan-mustgather-default.yaml @@ -0,0 +1,46 @@ +kind: Task +metadata: + labels: + suite: openshift + name: plan-mustgather-default + difficulty: easy +steps: + setup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Cleanup any existing must-gather resources from previous runs + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + verify: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Find must-gather pod and verify it exists with default image + POD_JSON=$(kubectl get pods -A -o json | jq -r '.items[] | select(.metadata.namespace | startswith("openshift-must-gather"))') + if [ -z "$POD_JSON" ]; then + echo "No must-gather pod found" + exit 1 + fi + NS=$(echo "$POD_JSON" | jq -r '.metadata.namespace' | head -1) + POD=$(echo "$POD_JSON" | jq -r '.metadata.name' | head -1) + # Verify pod uses default must-gather image + kubectl get pod "$POD" -n "$NS" -o yaml | grep -q "ose-must-gather" + cleanup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Delete all must-gather namespaces and clusterrolebindings + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + prompt: + inline: I need to collect diagnostic data from my OpenShift cluster for a support case. Can you help me plan and apply a must-gather collection? + assertions: + promptsUsed: + - server: kubernetes + prompt: plan_mustgather + toolsUsed: + - server: kubernetes + tool: resource_create_or_update + minToolCalls: 1 + maxToolCalls: 15 diff --git a/evals/tasks/openshift/plan-mustgather-host-network/plan-mustgather-host-network.yaml b/evals/tasks/openshift/plan-mustgather-host-network/plan-mustgather-host-network.yaml new file mode 100644 index 000000000..ac2256d7a --- /dev/null +++ b/evals/tasks/openshift/plan-mustgather-host-network/plan-mustgather-host-network.yaml @@ -0,0 +1,46 @@ +kind: Task +metadata: + labels: + suite: openshift + name: plan-mustgather-host-network + difficulty: easy +steps: + setup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Cleanup any existing must-gather resources from previous runs + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + verify: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Find must-gather pod and verify it has hostNetwork enabled + POD_JSON=$(kubectl get pods -A -o json | jq -r '.items[] | select(.metadata.namespace | startswith("openshift-must-gather"))') + if [ -z "$POD_JSON" ]; then + echo "No must-gather pod found" + exit 1 + fi + NS=$(echo "$POD_JSON" | jq -r '.metadata.namespace' | head -1) + POD=$(echo "$POD_JSON" | jq -r '.metadata.name' | head -1) + # Verify pod has hostNetwork: true + kubectl get pod "$POD" -n "$NS" -o yaml | grep -q "hostNetwork: true" + cleanup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Delete all must-gather namespaces and clusterrolebindings + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + prompt: + inline: I'm troubleshooting network connectivity issues on my cluster. Set up and apply a must-gather that uses host networking to capture network-level diagnostics, and keep the resources around after collection so I can inspect them. + assertions: + promptsUsed: + - server: kubernetes + prompt: plan_mustgather + toolsUsed: + - server: kubernetes + tool: resource_create_or_update + minToolCalls: 1 + maxToolCalls: 15 diff --git a/evals/tasks/openshift/plan-mustgather-node-selector/plan-mustgather-node-selector.yaml b/evals/tasks/openshift/plan-mustgather-node-selector/plan-mustgather-node-selector.yaml new file mode 100644 index 000000000..a1437337a --- /dev/null +++ b/evals/tasks/openshift/plan-mustgather-node-selector/plan-mustgather-node-selector.yaml @@ -0,0 +1,46 @@ +kind: Task +metadata: + labels: + suite: openshift + name: plan-mustgather-node-selector + difficulty: medium +steps: + setup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Cleanup any existing must-gather resources from previous runs + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + verify: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Find must-gather pod and verify it has the correct nodeSelector + POD_JSON=$(kubectl get pods -A -o json | jq -r '.items[] | select(.metadata.namespace | startswith("openshift-must-gather"))') + if [ -z "$POD_JSON" ]; then + echo "No must-gather pod found" + exit 1 + fi + NS=$(echo "$POD_JSON" | jq -r '.metadata.namespace' | head -1) + POD=$(echo "$POD_JSON" | jq -r '.metadata.name' | head -1) + # Verify pod has worker node selector + kubectl get pod "$POD" -n "$NS" -o yaml | grep -q "node-role.kubernetes.io/worker" + cleanup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Delete all must-gather namespaces and clusterrolebindings + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + prompt: + inline: I need to collect diagnostics but only want the must-gather pod to run on worker nodes, not on control plane nodes. The node selector should be node-role.kubernetes.io/worker. Can you set this up and apply it? + assertions: + promptsUsed: + - server: kubernetes + prompt: plan_mustgather + toolsUsed: + - server: kubernetes + tool: resource_create_or_update + minToolCalls: 1 + maxToolCalls: 15 diff --git a/evals/tasks/openshift/plan-mustgather-timeout-since/plan-mustgather-timeout-since.yaml b/evals/tasks/openshift/plan-mustgather-timeout-since/plan-mustgather-timeout-since.yaml new file mode 100644 index 000000000..d1913ebe9 --- /dev/null +++ b/evals/tasks/openshift/plan-mustgather-timeout-since/plan-mustgather-timeout-since.yaml @@ -0,0 +1,49 @@ +kind: Task +metadata: + labels: + suite: openshift + name: plan-mustgather-timeout-since + difficulty: medium +steps: + setup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Cleanup any existing must-gather resources from previous runs + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + verify: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Find must-gather pod and verify it has timeout and since configured + POD_JSON=$(kubectl get pods -A -o json | jq -r '.items[] | select(.metadata.namespace | startswith("openshift-must-gather"))') + if [ -z "$POD_JSON" ]; then + echo "No must-gather pod found" + exit 1 + fi + NS=$(echo "$POD_JSON" | jq -r '.metadata.namespace' | head -1) + POD=$(echo "$POD_JSON" | jq -r '.metadata.name' | head -1) + POD_YAML=$(kubectl get pod "$POD" -n "$NS" -o yaml) + # Verify pod has MUST_GATHER_SINCE env var + echo "$POD_YAML" | grep -q "MUST_GATHER_SINCE" + # Verify pod has timeout in command + echo "$POD_YAML" | grep -q "/usr/bin/timeout" + cleanup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + # Delete all must-gather namespaces and clusterrolebindings + kubectl get ns -o name 2>/dev/null | grep openshift-must-gather | xargs -r kubectl delete --wait=false 2>/dev/null || true + kubectl get clusterrolebinding -o name 2>/dev/null | grep must-gather-collector | xargs -r kubectl delete 2>/dev/null || true + prompt: + inline: My cluster had an incident about an hour ago. I need a must-gather that only collects logs and data from the last 2 hours to keep the archive small. Also set a 30 minute timeout so it doesn't run forever. Set this up and apply it. + assertions: + promptsUsed: + - server: kubernetes + prompt: plan_mustgather + toolsUsed: + - server: kubernetes + tool: resource_create_or_update + minToolCalls: 1 + maxToolCalls: 15