From 46a6cb1ce712e4f05095130759818c4de2366c6b Mon Sep 17 00:00:00 2001
From: "rw-codebundle-agent[bot]"
 <rw-codebundle-agent[bot]@users.noreply.github.com>
Date: Thu, 25 Jun 2026 15:39:46 +0000
Subject: [PATCH] Add vast-k8s-csi-health CodeBundle for VAST CSI monitoring.

Monitors VAST CSI driver health, NFS xprt metrics, PVC-to-view tracing,
workload mount status, StorageClass config, and optional VMS correlation.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../generation-rules/vast-k8s-csi-health.yaml |  54 +++
 .../templates/vast-k8s-csi-health-sli.yaml    |  52 +++
 .../templates/vast-k8s-csi-health-slx.yaml    |  27 ++
 .../vast-k8s-csi-health-taskset.yaml          |  51 +++
 .../vast-k8s-csi-health/.test/README.md       |  18 +
 .../vast-k8s-csi-health/.test/Taskfile.yaml   | 132 ++++++
 .../.test/kubernetes/manifest.yaml            |  63 +++
 codebundles/vast-k8s-csi-health/README.md     |  69 ++++
 .../vast-k8s-csi-health/check-csi-metrics.sh  | 139 +++++++
 .../check-csi-pod-health.sh                   | 132 ++++++
 .../check-nfs-xprt-health.sh                  | 124 ++++++
 .../check-pod-mount-health.sh                 | 112 +++++
 .../check-vast-storageclass-config.sh         | 101 +++++
 .../correlate-k8s-vast-events.sh              | 117 ++++++
 codebundles/vast-k8s-csi-health/runbook.robot | 382 ++++++++++++++++++
 .../sli-vast-csi-health-score.sh              |  95 +++++
 codebundles/vast-k8s-csi-health/sli.robot     | 103 +++++
 .../vast-k8s-csi-health/trace-pvc-to-vast.sh  | 119 ++++++
 .../vast-k8s-csi-health/vast-csi-common.sh    | 122 ++++++
 19 files changed, 2012 insertions(+)
 create mode 100644 codebundles/vast-k8s-csi-health/.runwhen/generation-rules/vast-k8s-csi-health.yaml
 create mode 100644 codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-sli.yaml
 create mode 100644 codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-slx.yaml
 create mode 100644 codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-taskset.yaml
 create mode 100644 codebundles/vast-k8s-csi-health/.test/README.md
 create mode 100644 codebundles/vast-k8s-csi-health/.test/Taskfile.yaml
 create mode 100644 codebundles/vast-k8s-csi-health/.test/kubernetes/manifest.yaml
 create mode 100644 codebundles/vast-k8s-csi-health/README.md
 create mode 100755 codebundles/vast-k8s-csi-health/check-csi-metrics.sh
 create mode 100755 codebundles/vast-k8s-csi-health/check-csi-pod-health.sh
 create mode 100755 codebundles/vast-k8s-csi-health/check-nfs-xprt-health.sh
 create mode 100755 codebundles/vast-k8s-csi-health/check-pod-mount-health.sh
 create mode 100755 codebundles/vast-k8s-csi-health/check-vast-storageclass-config.sh
 create mode 100755 codebundles/vast-k8s-csi-health/correlate-k8s-vast-events.sh
 create mode 100644 codebundles/vast-k8s-csi-health/runbook.robot
 create mode 100755 codebundles/vast-k8s-csi-health/sli-vast-csi-health-score.sh
 create mode 100644 codebundles/vast-k8s-csi-health/sli.robot
 create mode 100755 codebundles/vast-k8s-csi-health/trace-pvc-to-vast.sh
 create mode 100755 codebundles/vast-k8s-csi-health/vast-csi-common.sh

diff --git a/codebundles/vast-k8s-csi-health/.runwhen/generation-rules/vast-k8s-csi-health.yaml b/codebundles/vast-k8s-csi-health/.runwhen/generation-rules/vast-k8s-csi-health.yaml
new file mode 100644
index 00000000..2a7a29c7
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/.runwhen/generation-rules/vast-k8s-csi-health.yaml
@@ -0,0 +1,54 @@
+apiVersion: runwhen.com/v1
+kind: GenerationRules
+spec:
+  generationRules:
+    # One SLX per namespace that has at least one VAST-backed PVC.
+    - resourceTypes:
+        - persistentvolumeclaim
+      matchRules:
+        - type: pattern
+          pattern: ".+"
+          properties: [name]
+          mode: substring
+        - type: or
+          matches:
+            - type: pattern
+              pattern: "vast"
+              properties: [spec/storageClassName]
+              mode: substring
+            - type: pattern
+              pattern: "vast"
+              properties: [metadata/annotations]
+              mode: substring
+      slxs:
+        - baseName: vast-k8s-csi-health
+          shortenedBaseName: vast-csi-hlth
+          qualifiers: ["namespace", "cluster"]
+          baseTemplateName: vast-k8s-csi-health
+          levelOfDetail: basic
+          outputItems:
+            - type: slx
+            - type: sli
+            - type: runbook
+              templateName: vast-k8s-csi-health-taskset.yaml
+
+    # Optional cluster-level SLX for the CSI driver install namespace when
+    # operators want driver health monitoring before workload namespaces exist.
+    - resourceTypes:
+        - namespace
+      matchRules:
+        - type: pattern
+          pattern: "vast-csi"
+          properties: [name]
+          mode: substring
+      slxs:
+        - baseName: vast-csi-driver
+          shortenedBaseName: vast-csi-drv
+          qualifiers: ["namespace", "cluster"]
+          baseTemplateName: vast-k8s-csi-health
+          levelOfDetail: basic
+          outputItems:
+            - type: slx
+            - type: sli
+            - type: runbook
+              templateName: vast-k8s-csi-health-taskset.yaml
diff --git a/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-sli.yaml b/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-sli.yaml
new file mode 100644
index 00000000..74d64095
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-sli.yaml
@@ -0,0 +1,52 @@
+apiVersion: runwhen.com/v1
+kind: ServiceLevelIndicator
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  displayUnitsLong: OK
+  displayUnitsShort: ok
+  locations:
+    - {{default_location}}
+  description: Measures VAST CSI health using CSI pod readiness, PVC binding, mount success, and NFS xprt congestion.
+  codeBundle:
+    {% if repo_url %}
+    repoUrl: {{repo_url}}
+    {% else %}
+    repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
+    {% endif %}
+    {% if ref %}
+    ref: {{ref}}
+    {% else %}
+    ref: main
+    {% endif %}
+    pathToRobot: codebundles/vast-k8s-csi-health/sli.robot
+  intervalStrategy: intermezzo
+  intervalSeconds: 300
+  configProvided:
+    - name: CONTEXT
+      value: "{{context}}"
+    - name: NAMESPACE
+      value: "{{match_resource.resource.metadata.namespace | default(namespace.name)}}"
+    - name: CSI_NAMESPACE
+      value: "{{ custom.vast_csi_namespace | default('vast-csi') }}"
+    - name: KUBERNETES_DISTRIBUTION_BINARY
+      value: "{{ custom.kubernetes_distribution_binary | default('kubectl') }}"
+    - name: XPRT_PENDING_THRESHOLD
+      value: "{{ custom.xprt_pending_threshold | default('100') }}"
+    - name: RPC_ERROR_RATE_THRESHOLD
+      value: "{{ custom.rpc_error_rate_threshold | default('5') }}"
+  secretsProvided:
+  {% if wb_version %}
+    {% include "kubernetes-auth.yaml" ignore missing %}
+  {% else %}
+    - name: kubeconfig
+      workspaceKey: {{ custom.kubeconfig_secret_name | default("kubeconfig") }}
+  {% endif %}
+  alertConfig:
+    tasks:
+      persona: eager-edgar
+      sessionTTL: 10m
diff --git a/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-slx.yaml b/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-slx.yaml
new file mode 100644
index 00000000..20630f35
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-slx.yaml
@@ -0,0 +1,27 @@
+apiVersion: runwhen.com/v1
+kind: ServiceLevelX
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/kubernetes/resources/labeled/pvc.svg
+  alias: {{namespace.name}} VAST CSI Health
+  asMeasuredBy: Aggregate score from CSI pod readiness, PVC binding, mount health, and NFS xprt metrics.
+  configProvided:
+  - name: OBJECT_NAME
+    value: {{match_resource.resource.metadata.name}}
+  owners:
+  - {{workspace.owner_email}}
+  statement: VAST CSI-backed storage in this namespace should have healthy driver pods, bound PVCs, and successful workload mounts.
+  additionalContext:
+    {% include "kubernetes-hierarchy.yaml" ignore missing %}
+    qualified_name: "{{ match_resource.qualified_name }}"
+  tags:
+    {% include "kubernetes-tags.yaml" ignore missing %}
+    - name: access
+      value: read-only
+    - name: storage
+      value: vast-csi
diff --git a/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-taskset.yaml b/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-taskset.yaml
new file mode 100644
index 00000000..04f740b9
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/.runwhen/templates/vast-k8s-csi-health-taskset.yaml
@@ -0,0 +1,51 @@
+apiVersion: runwhen.com/v1
+kind: Runbook
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  location: {{default_location}}
+  description: Monitors VAST CSI driver health and traces workload storage for the namespace.
+  codeBundle:
+    {% if repo_url %}
+    repoUrl: {{repo_url}}
+    {% else %}
+    repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
+    {% endif %}
+    {% if ref %}
+    ref: {{ref}}
+    {% else %}
+    ref: main
+    {% endif %}
+    pathToRobot: codebundles/vast-k8s-csi-health/runbook.robot
+  configProvided:
+    - name: CONTEXT
+      value: "{{context}}"
+    - name: NAMESPACE
+      value: "{{match_resource.resource.metadata.namespace | default(namespace.name)}}"
+    - name: CSI_NAMESPACE
+      value: "{{ custom.vast_csi_namespace | default('vast-csi') }}"
+    - name: KUBERNETES_DISTRIBUTION_BINARY
+      value: "{{ custom.kubernetes_distribution_binary | default('kubectl') }}"
+    - name: VAST_VMS_ENDPOINT
+      value: "{{ custom.vast_vms_endpoint | default('') }}"
+    - name: VAST_CLUSTER_NAME
+      value: "{{ custom.vast_cluster_name | default('') }}"
+    - name: XPRT_PENDING_THRESHOLD
+      value: "{{ custom.xprt_pending_threshold | default('100') }}"
+    - name: RPC_ERROR_RATE_THRESHOLD
+      value: "{{ custom.rpc_error_rate_threshold | default('5') }}"
+  secretsProvided:
+  {% if wb_version %}
+    {% include "kubernetes-auth.yaml" ignore missing %}
+  {% else %}
+    - name: kubeconfig
+      workspaceKey: {{ custom.kubeconfig_secret_name | default("kubeconfig") }}
+  {% endif %}
+  {% if custom.vast_vms_credentials_secret_name %}
+    - name: vast_vms_credentials
+      workspaceKey: {{ custom.vast_vms_credentials_secret_name }}
+  {% endif %}
diff --git a/codebundles/vast-k8s-csi-health/.test/README.md b/codebundles/vast-k8s-csi-health/.test/README.md
new file mode 100644
index 00000000..fd0974a5
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/.test/README.md
@@ -0,0 +1,18 @@
+# Test infrastructure for vast-k8s-csi-health
+
+Static Kubernetes manifests under `kubernetes/manifest.yaml` create:
+
+- Namespace `test-vast-csi-health`
+- StorageClass `vast-test-sc` with provisioner `csi.vastdata.com`
+- PVC and Deployment referencing VAST storage
+
+## Usage
+
+```bash
+task build-infra          # kubectl apply manifests
+task validate-generation-rules
+task default              # requires pushed commits + RunWhen Local
+task clean
+```
+
+The PVC will remain Pending without a real VAST CSI driver; generation rules still match the StorageClass name and annotations for SLX discovery testing.
diff --git a/codebundles/vast-k8s-csi-health/.test/Taskfile.yaml b/codebundles/vast-k8s-csi-health/.test/Taskfile.yaml
new file mode 100644
index 00000000..735d8679
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/.test/Taskfile.yaml
@@ -0,0 +1,132 @@
+version: "3"
+
+tasks:
+  default:
+    desc: "Run/refresh config"
+    cmds:
+      - task: check-unpushed-commits
+      - task: generate-rwl-config
+      - task: run-rwl-discovery
+
+  clean:
+    desc: "Run cleanup tasks"
+    cmds:
+      - task: remove-kubernetes-objects
+      - task: delete-slxs
+      - task: clean-rwl-discovery
+
+  build-infra:
+    desc: "Build test infrastructure"
+    cmds:
+      - task: create-kubernetes-objects
+
+  create-kubernetes-objects:
+    desc: "Apply manifests from kubernetes directory using kubectl"
+    cmds:
+      - kubectl apply -f kubernetes/*
+    silent: true
+
+  remove-kubernetes-objects:
+    desc: "Delete kubernetes objects"
+    cmds:
+      - kubectl delete -f kubernetes/* --ignore-not-found
+    silent: true
+
+  check-unpushed-commits:
+    desc: Check if outstanding commits or file updates need to be pushed before testing.
+    vars:
+      BASE_DIR: "../"
+    cmds:
+      - |
+        echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..."
+        UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true)
+        if [ -n "$UNCOMMITTED_FILES" ]; then
+          echo "Uncommitted changes found:"
+          echo "$UNCOMMITTED_FILES"
+          exit 1
+        fi
+      - |
+        echo "Checking for unpushed commits..."
+        git fetch origin
+        UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD) HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true)
+        if [ -n "$UNPUSHED_FILES" ]; then
+          echo "Unpushed commits found:"
+          echo "$UNPUSHED_FILES"
+          exit 1
+        fi
+    silent: true
+
+  generate-rwl-config:
+    desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)"
+    env:
+      RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}'
+    cmds:
+      - |
+        repo_url=$(git config --get remote.origin.url)
+        branch_name=$(git rev-parse --abbrev-ref HEAD)
+        codebundle=$(basename "$(dirname "$PWD")")
+        namespace=$(yq e 'select(.kind == "Namespace") | .metadata.name' kubernetes/manifest.yaml -N)
+        cat <<EOF > workspaceInfo.yaml
+        workspaceName: "$RW_WORKSPACE"
+        workspaceOwnerEmail: authors@runwhen.com
+        defaultLocation: location-01
+        defaultLOD: none
+        cloudConfig:
+          kubernetes:
+            kubeconfigFile: /shared/kubeconfig
+            namespaceLODs:
+              $namespace: detailed
+            namespaces:
+              - $namespace
+        codeCollections:
+        - repoURL: "$repo_url"
+          branch: "$branch_name"
+          codeBundles: ["$codebundle"]
+        custom:
+          kubeconfig_secret_name: "kubeconfig"
+          kubernetes_distribution_binary: kubectl
+          vast_csi_namespace: vast-csi
+        EOF
+    silent: true
+
+  run-rwl-discovery:
+    desc: "Run RunWhen Local Discovery on test infrastructure"
+    cmds:
+      - |
+        CONTAINER_NAME="RunWhenLocal"
+        docker rm -f $CONTAINER_NAME 2>/dev/null || true
+        sudo rm -rf output || true
+        mkdir -p output && chmod 777 output
+        kubeconfig=$(echo "$RW_FROM_FILE" | jq -r .kubeconfig)
+        docker run --name $CONTAINER_NAME -p 8081:8081 \
+          -v "$(pwd)":/shared -v "$kubeconfig":/shared/kubeconfig \
+          -d ghcr.io/runwhen-contrib/runwhen-local:latest
+        docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh --verbose
+    silent: true
+
+  validate-generation-rules:
+    desc: "Validate YAML files in .runwhen/generation-rules"
+    cmds:
+      - |
+        temp_dir=$(mktemp -d)
+        curl -s -o "$temp_dir/generation-rule-schema.json" \
+          https://raw.githubusercontent.com/runwhen-contrib/runwhen-local/refs/heads/main/src/generation-rule-schema.json
+        for yaml_file in ../.runwhen/generation-rules/*.yaml; do
+          json_file="$temp_dir/$(basename "${yaml_file%.*}.json")"
+          yq -o=json "$yaml_file" > "$json_file"
+          ajv validate -s "$temp_dir/generation-rule-schema.json" -d "$json_file" --spec=draft2020 --strict=false
+        done
+        rm -rf "$temp_dir"
+
+  clean-rwl-discovery:
+    desc: "Clean up RunWhen Local discovery output"
+    cmds:
+      - sudo rm -rf output
+      - rm -f workspaceInfo.yaml
+    silent: true
+
+  delete-slxs:
+    desc: "Placeholder for platform SLX deletion (requires RW API credentials)"
+    cmds:
+      - echo "Configure RW_WORKSPACE, RW_API, and RW_PAT to delete SLXs from the platform."
+    silent: true
diff --git a/codebundles/vast-k8s-csi-health/.test/kubernetes/manifest.yaml b/codebundles/vast-k8s-csi-health/.test/kubernetes/manifest.yaml
new file mode 100644
index 00000000..619aab6d
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/.test/kubernetes/manifest.yaml
@@ -0,0 +1,63 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: test-vast-csi-health
+  labels:
+    app.kubernetes.io/part-of: vast-k8s-csi-health-test
+
+---
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: vast-test-sc
+provisioner: csi.vastdata.com
+parameters:
+  endpoint: "192.0.2.10"
+  view_policy: "default"
+  tenant: "test-tenant"
+reclaimPolicy: Delete
+volumeBindingMode: Immediate
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: test-vast-pvc
+  namespace: test-vast-csi-health
+  annotations:
+    volume.kubernetes.io/storage-provisioner: csi.vastdata.com
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: vast-test-sc
+  resources:
+    requests:
+      storage: 1Gi
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: test-vast-consumer
+  namespace: test-vast-csi-health
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: test-vast-consumer
+  template:
+    metadata:
+      labels:
+        app: test-vast-consumer
+    spec:
+      containers:
+        - name: app
+          image: busybox:1.36
+          command: ["/bin/sh", "-c", "while true; do sleep 30; done"]
+          volumeMounts:
+            - name: data
+              mountPath: /data
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: test-vast-pvc
diff --git a/codebundles/vast-k8s-csi-health/README.md b/codebundles/vast-k8s-csi-health/README.md
new file mode 100644
index 00000000..9af0a243
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/README.md
@@ -0,0 +1,69 @@
+# VAST Data Kubernetes CSI Health
+
+Monitor the VAST CSI driver in Kubernetes and trace application storage from PVC/PV through to VAST views. Detects CSI driver failures, NFS transport congestion, mount issues, and optionally correlates in-cluster storage symptoms with VAST backend health.
+
+## Overview
+
+- **CSI driver health**: Controller and node pod readiness, CrashLoopBackOff, and restart counts in the CSI install namespace
+- **CSI metrics**: RPC failure rates and slow operations from Prometheus `/metrics` on ports 9090 (node) and 9091 (controller)
+- **NFS transport**: `csi_node_nfs_xprt_*` congestion, unhealthy VIPs, and pending request thresholds
+- **PVC tracing**: Maps PVC → PV → StorageClass to VAST view path, tenant, and VIP identifiers
+- **Workload mounts**: Pod mount failures, warning events, and VolumeAttachment issues for VAST volumes
+- **StorageClass validation**: Endpoint, view policy, tenant, mount options, and expansion settings
+- **VMS correlation**: Optional cross-reference of failing PVCs with VMS tenant capacity/QoS metrics
+
+## Configuration
+
+### Required Variables
+
+- `CONTEXT`: Kubernetes context name
+- `NAMESPACE`: Kubernetes namespace for workload PVC tracing and mount checks
+
+### Optional Variables
+
+- `CSI_NAMESPACE`: Namespace where the VAST CSI driver is installed (default: `vast-csi`)
+- `KUBERNETES_DISTRIBUTION_BINARY`: Kubernetes CLI binary (default: `kubectl`)
+- `VAST_VMS_ENDPOINT`: Optional VMS REST base URL for backend correlation (e.g. `https://vms.example.com`)
+- `VAST_CLUSTER_NAME`: Optional VAST cluster name used in correlation titles
+- `XPRT_PENDING_THRESHOLD`: `csi_node_nfs_xprt_pending_requests` count that triggers an issue (default: `100`)
+- `RPC_ERROR_RATE_THRESHOLD`: CSI RPC error rate percent threshold (default: `5`)
+
+### Secrets
+
+- `kubeconfig`: Standard kubeconfig YAML for Kubernetes cluster access
+- `vast_vms_credentials` (optional): JSON object with `USERNAME` and `PASSWORD`, or `API_TOKEN`, for VMS API access when `VAST_VMS_ENDPOINT` is set
+
+## Tasks Overview
+
+### Check VAST CSI Driver Pod Health
+Verifies CSI controller and node pods are Running/Ready; detects CrashLoopBackOff, not-Ready pods, high restarts, and replica gaps.
+
+### Check CSI Node and Controller Metrics for RPC Failures
+Scrapes `/metrics` from CSI pods or headless metrics Services; flags elevated `csi_plugin_operations` error rates and slow RPC durations.
+
+### Check NFS Transport Health on CSI Nodes
+Analyzes `csi_node_nfs_xprt_unhealthy`, `csi_node_nfs_xprt_congested_state`, and pending request metrics for VIP connectivity and congestion.
+
+### Trace Kubernetes PVCs to VAST Views
+Produces a trace report linking each VAST-backed PVC to PV volumeHandle, StorageClass parameters, view path, tenant, and VIP.
+
+### Check End-to-End Pod Mount Health
+Finds pods using VAST PVCs that are not Ready, plus mount-related warning events and VolumeAttachment failures.
+
+### Check VAST StorageClass Configuration
+Validates VAST StorageClass parameters (endpoint, view policy, tenant, mount options) for misconfigurations.
+
+### Correlate Kubernetes Storage Events with VAST Tenant Metrics
+When `VAST_VMS_ENDPOINT` is configured, fetches `/api/prometheusmetrics/tenants` and correlates unbound or failing PVCs with tenant signals. Skips gracefully with an informational report when the endpoint is unset.
+
+## Platform Notes
+
+- VAST CSI metrics are exposed at `GET /metrics` on node port **9090** and controller port **9091** (override via Helm `metrics.port`)
+- Enable metrics in the Helm chart: `metrics.enabled=true`
+- StorageClass provisioner ID: `csi.vastdata.com`
+- See [VAST CSI metrics documentation](https://kb.vastdata.com/documentation/docs/exporting-vast-csi-driver-metrics-to-prometheus)
+
+## Related Bundles
+
+- `k8s-pvc-healthcheck`: General PVC health; this bundle adds VAST-specific CSI metrics and tracing
+- `vast-tenant-storage-health`: Backend tenant QoS and capacity (complements this Kubernetes front-end view)
diff --git a/codebundles/vast-k8s-csi-health/check-csi-metrics.sh b/codebundles/vast-k8s-csi-health/check-csi-metrics.sh
new file mode 100755
index 00000000..94d6dc5d
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/check-csi-metrics.sh
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+# -----------------------------------------------------------------------------
+# REQUIRED ENV VARS: CONTEXT, CSI_NAMESPACE
+# OPTIONAL: RPC_ERROR_RATE_THRESHOLD (default 5)
+# Scrapes CSI node (9090) and controller (9091) /metrics for RPC failures.
+# Writes JSON array to csi_metrics_issues.json
+# -----------------------------------------------------------------------------
+: "${CONTEXT:?Must set CONTEXT}"
+: "${CSI_NAMESPACE:?Must set CSI_NAMESPACE}"
+
+OUTPUT_FILE="csi_metrics_issues.json"
+RPC_ERROR_RATE_THRESHOLD="${RPC_ERROR_RATE_THRESHOLD:-5}"
+NODE_METRICS_PORT="${NODE_METRICS_PORT:-9090}"
+CONTROLLER_METRICS_PORT="${CONTROLLER_METRICS_PORT:-9091}"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=vast-csi-common.sh
+source "${SCRIPT_DIR}/vast-csi-common.sh"
+
+issues_json='[]'
+metrics_body=""
+
+print_report() {
+  { set +x; } 2>/dev/null || true
+  echo
+  echo "=== VAST CSI metrics probe (context '${CONTEXT}', namespace '${CSI_NAMESPACE}') ==="
+  echo "RPC error rate threshold: ${RPC_ERROR_RATE_THRESHOLD}%"
+  if [[ -n "$metrics_body" ]]; then
+    echo "$metrics_body" | head -n 40
+    echo "... (truncated)"
+  else
+    echo "  No metrics payload retrieved."
+  fi
+  echo
+  if [[ -s "$OUTPUT_FILE" ]]; then
+    jq -r '.[] | "  - [sev=\(.severity)] \(.title)"' "$OUTPUT_FILE" 2>/dev/null || true
+  fi
+}
+trap print_report EXIT
+
+fetch_metrics() {
+  local role="$1"
+  local port="$2"
+  local pods_json svc_json body
+
+  pods_json=$([ "$role" == "node" ] && find_csi_node_pods || find_csi_controller_pods)
+  local pod
+  pod=$(echo "$pods_json" | jq -r '.items[0].metadata.name // empty')
+  if [[ -n "$pod" ]]; then
+    body=$(curl_pod_metrics "$pod" "${CSI_NAMESPACE}" "$port")
+    if [[ -n "$body" ]]; then
+      echo "$body"
+      return 0
+    fi
+  fi
+
+  svc_json=$(find_metrics_services)
+  local svc
+  svc=$(echo "$svc_json" | jq -r --arg role "$role" '
+    [.[] | select(.name | test($role; "i")) | .name][0] // empty
+  ')
+  if [[ -z "$svc" ]]; then
+    svc=$(echo "$svc_json" | jq -r '.[0].name // empty')
+  fi
+  if [[ -n "$svc" ]]; then
+    body=$(curl_service_metrics "$svc" "${CSI_NAMESPACE}" "$port")
+    if [[ -n "$body" ]]; then
+      echo "$body"
+      return 0
+    fi
+  fi
+  return 1
+}
+
+analyze_rpc_metrics() {
+  local role="$1"
+  local body="$2"
+  [[ -z "$body" ]] && return
+
+  local total failed rate slow_ops
+  total=$(echo "$body" | awk '/^csi_plugin_operations_total\{/{sum+=$NF} END{print sum+0}')
+  failed=$(echo "$body" | awk '/^csi_plugin_operations_total\{[^}]*grpc_code="(Internal|Unknown|Unavailable|DeadlineExceeded|ResourceExhausted|Aborted|FailedPrecondition)"/{sum+=$NF} END{print sum+0}')
+  if [[ "${total:-0}" -gt 0 ]]; then
+    rate=$(awk "BEGIN {printf \"%.2f\", (${failed:-0}/${total})*100}")
+    if awk "BEGIN {exit !(${rate} > ${RPC_ERROR_RATE_THRESHOLD})}"; then
+      issues_json=$(append_issue "$issues_json" \
+        "Elevated CSI RPC error rate on ${role} metrics (context \`${CONTEXT}\`)" \
+        "csi_plugin_operations_total failures=${failed} of ${total} (${rate}% > threshold ${RPC_ERROR_RATE_THRESHOLD}%)." \
+        3 \
+        "Inspect ${role} pod logs in ${CSI_NAMESPACE}. Correlate with VMS health and NFS xprt congestion metrics.")
+    fi
+  fi
+
+  slow_ops=$(echo "$body" | awk '/^csi_plugin_operations_seconds\{/{if ($NF > 5) c++} END{print c+0}')
+  if [[ "${slow_ops:-0}" -gt 0 ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "Slow CSI RPC operations detected on ${role} metrics" \
+      "Found ${slow_ops} csi_plugin_operations_seconds samples exceeding 5s in ${CSI_NAMESPACE}." \
+        3 \
+        "Check VMS latency, network path to VIPs, and node CPU pressure on CSI ${role} pods.")
+  fi
+
+  if ! echo "$body" | grep -q '^csi_plugin_operations_total'; then
+    issues_json=$(append_issue "$issues_json" \
+      "CSI plugin operation metrics missing from ${role} endpoint" \
+      "Metrics endpoint responded but csi_plugin_operations_total was not present; metrics may be disabled." \
+      4 \
+      "Enable metrics in the VAST CSI Helm chart (metrics.enabled=true) and verify ServiceMonitor or headless metrics Services.")
+  fi
+}
+
+node_metrics=""
+controller_metrics=""
+
+if node_metrics=$(fetch_metrics "node" "$NODE_METRICS_PORT"); then
+  metrics_body+=$'\n'"# Node metrics (port ${NODE_METRICS_PORT})"$'\n'"${node_metrics}"
+  analyze_rpc_metrics "node" "$node_metrics"
+else
+  issues_json=$(append_issue "$issues_json" \
+    "Unable to scrape VAST CSI node metrics in namespace \`${CSI_NAMESPACE}\`" \
+    "Could not reach /metrics on node pods (port ${NODE_METRICS_PORT}) or metrics Services." \
+    3 \
+    "Enable node metrics in Helm values. Verify pod exec/network access from the RunWhen execution environment.")
+fi
+
+if controller_metrics=$(fetch_metrics "controller" "$CONTROLLER_METRICS_PORT"); then
+  metrics_body+=$'\n'"# Controller metrics (port ${CONTROLLER_METRICS_PORT})"$'\n'"${controller_metrics}"
+  analyze_rpc_metrics "controller" "$controller_metrics"
+else
+  issues_json=$(append_issue "$issues_json" \
+    "Unable to scrape VAST CSI controller metrics in namespace \`${CSI_NAMESPACE}\`" \
+    "Could not reach /metrics on controller pods (port ${CONTROLLER_METRICS_PORT}) or metrics Services." \
+    3 \
+    "Enable controller metrics in Helm values and confirm the controller metrics Service has endpoints.")
+fi
+
+write_issues "$OUTPUT_FILE" "$issues_json"
diff --git a/codebundles/vast-k8s-csi-health/check-csi-pod-health.sh b/codebundles/vast-k8s-csi-health/check-csi-pod-health.sh
new file mode 100755
index 00000000..accbb3dc
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/check-csi-pod-health.sh
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+# -----------------------------------------------------------------------------
+# REQUIRED ENV VARS: CONTEXT, CSI_NAMESPACE
+# Checks VAST CSI controller and node pods for readiness and restart issues.
+# Writes JSON array to csi_pod_health_issues.json
+# -----------------------------------------------------------------------------
+: "${CONTEXT:?Must set CONTEXT}"
+: "${CSI_NAMESPACE:?Must set CSI_NAMESPACE}"
+
+OUTPUT_FILE="csi_pod_health_issues.json"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=vast-csi-common.sh
+source "${SCRIPT_DIR}/vast-csi-common.sh"
+
+issues_json='[]'
+
+print_report() {
+  { set +x; } 2>/dev/null || true
+  echo
+  echo "=== VAST CSI pods in namespace '${CSI_NAMESPACE}' (context '${CONTEXT}') ==="
+  k8s get pods -n "${CSI_NAMESPACE}" -o wide 2>/dev/null || echo "  (unable to list pods)"
+  echo
+  if [[ -s "$OUTPUT_FILE" ]]; then
+    local ic
+    ic=$(jq 'length' "$OUTPUT_FILE" 2>/dev/null || echo 0)
+    echo "=== Findings (${ic}) ==="
+    jq -r '.[] | "  - [sev=\(.severity)] \(.title)"' "$OUTPUT_FILE" 2>/dev/null || true
+  fi
+}
+trap print_report EXIT
+
+if ! k8s get ns "${CSI_NAMESPACE}" -o name &>/dev/null; then
+  issues_json=$(append_issue "$issues_json" \
+    "VAST CSI namespace \`${CSI_NAMESPACE}\` not found in context \`${CONTEXT}\`" \
+    "The configured CSI_NAMESPACE does not exist; driver health cannot be assessed." \
+    3 \
+    "Verify CSI_NAMESPACE (default: vast-csi) and confirm the VAST CSI Helm release is installed.")
+  write_issues "$OUTPUT_FILE" "$issues_json"
+  exit 0
+fi
+
+check_pods() {
+  local role="$1"
+  local pods_json="$2"
+  local count
+  count=$(echo "$pods_json" | jq '.items | length')
+  if [[ "$count" -eq 0 ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "No VAST CSI ${role} pods found in namespace \`${CSI_NAMESPACE}\`" \
+      "Expected ${role} DaemonSet/Deployment pods for the VAST CSI driver were not discovered." \
+      2 \
+      "Confirm the Helm release installed node/controller components. Check labels and pod selectors in ${CSI_NAMESPACE}.")
+    return
+  fi
+
+  while IFS= read -r line; do
+    [[ -z "$line" ]] && continue
+    local name phase ready restarts crash
+    name=$(echo "$line" | jq -r '.name')
+    phase=$(echo "$line" | jq -r '.phase')
+    ready=$(echo "$line" | jq -r '.ready')
+    restarts=$(echo "$line" | jq -r '.restarts')
+    crash=$(echo "$line" | jq -r '.crash')
+
+    if [[ "$crash" == "true" ]]; then
+      issues_json=$(append_issue "$issues_json" \
+        "VAST CSI ${role} pod \`${name}\` is in CrashLoopBackOff" \
+        "Pod phase=${phase}, ready=${ready}, restarts=${restarts} in namespace ${CSI_NAMESPACE}." \
+        2 \
+        "Inspect logs: ${KUBECTL} logs -n ${CSI_NAMESPACE} ${name} --context ${CONTEXT}. Check VMS connectivity and mount permissions.")
+    elif [[ "$ready" != "True" ]]; then
+      issues_json=$(append_issue "$issues_json" \
+        "VAST CSI ${role} pod \`${name}\` is not Ready" \
+        "Pod phase=${phase}, restarts=${restarts} in namespace ${CSI_NAMESPACE}." \
+        2 \
+        "Describe pod: ${KUBECTL} describe pod -n ${CSI_NAMESPACE} ${name} --context ${CONTEXT}.")
+    fi
+
+    if [[ "${restarts}" =~ ^[0-9]+$ ]] && [[ "$restarts" -gt 5 ]]; then
+      issues_json=$(append_issue "$issues_json" \
+        "Elevated restarts on VAST CSI ${role} pod \`${name}\`" \
+        "Total container restarts: ${restarts} within namespace ${CSI_NAMESPACE}." \
+        2 \
+        "Review recent logs and node NFS transport metrics; check for OOM or VMS endpoint instability.")
+    fi
+  done < <(echo "$pods_json" | jq -c '.items[] | {
+    name: .metadata.name,
+    phase: (.status.phase // "Unknown"),
+    ready: ((.status.conditions // []) | map(select(.type=="Ready")) | .[0].status // "False"),
+    restarts: ([.status.containerStatuses[]? | .restartCount // 0] | add // 0),
+    crash: ([.status.containerStatuses[]? | .state.waiting.reason? // empty] | map(select(. == "CrashLoopBackOff")) | length > 0)
+  }')
+}
+
+node_pods=$(find_csi_node_pods)
+controller_pods=$(find_csi_controller_pods)
+
+# Fallback: all pods in namespace if selectors miss custom installs
+if [[ $(echo "$node_pods" | jq '.items | length') -eq 0 && $(echo "$controller_pods" | jq '.items | length') -eq 0 ]]; then
+  all_pods=$(k8s get pods -n "${CSI_NAMESPACE}" -o json 2>/dev/null || echo '{"items":[]}')
+  node_pods=$(echo "$all_pods" | jq '{items: [.items[] | select(.metadata.name | test("node"; "i"))]}')
+  controller_pods=$(echo "$all_pods" | jq '{items: [.items[] | select(.metadata.name | test("controller"; "i"))]}')
+fi
+
+check_pods "node" "$node_pods"
+check_pods "controller" "$controller_pods"
+
+# Deployment / DaemonSet replica alignment
+for kind in deploy statefulset daemonset; do
+  resources=$(k8s get "$kind" -n "${CSI_NAMESPACE}" -o json 2>/dev/null || echo '{"items":[]}')
+  while IFS= read -r dline; do
+    [[ -z "$dline" ]] && continue
+    dname=$(echo "$dline" | jq -r '.name')
+    want=$(echo "$dline" | jq -r '.desired')
+    have=$(echo "$dline" | jq -r '.ready')
+    if [[ "$want" =~ ^[0-9]+$ ]] && [[ "$have" =~ ^[0-9]+$ ]] && [[ "$want" -gt 0 ]] && [[ "$have" -lt "$want" ]]; then
+      issues_json=$(append_issue "$issues_json" \
+        "VAST CSI ${kind} \`${dname}\` is not fully Ready" \
+        "readyReplicas=${have}, desired=${want} in namespace ${CSI_NAMESPACE}." \
+        2 \
+        "${KUBECTL} describe ${kind} -n ${CSI_NAMESPACE} ${dname} --context ${CONTEXT}")
+    fi
+  done < <(echo "$resources" | jq -c '.items[] | select(.metadata.name | test("vast|csi"; "i")) | {
+    name: .metadata.name,
+    desired: (.spec.replicas // (.status.desiredNumberScheduled // 0)),
+    ready: (.status.readyReplicas // (.status.numberReady // 0))
+  }')
+done
+
+write_issues "$OUTPUT_FILE" "$issues_json"
diff --git a/codebundles/vast-k8s-csi-health/check-nfs-xprt-health.sh b/codebundles/vast-k8s-csi-health/check-nfs-xprt-health.sh
new file mode 100755
index 00000000..8ef0b8a0
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/check-nfs-xprt-health.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+# -----------------------------------------------------------------------------
+# REQUIRED ENV VARS: CONTEXT, CSI_NAMESPACE
+# OPTIONAL: XPRT_PENDING_THRESHOLD (default 100)
+# Analyzes csi_node_nfs_xprt_* metrics for congestion and unhealthy VIPs.
+# Writes JSON array to nfs_xprt_issues.json
+# -----------------------------------------------------------------------------
+: "${CONTEXT:?Must set CONTEXT}"
+: "${CSI_NAMESPACE:?Must set CSI_NAMESPACE}"
+
+OUTPUT_FILE="nfs_xprt_issues.json"
+XPRT_PENDING_THRESHOLD="${XPRT_PENDING_THRESHOLD:-100}"
+NODE_METRICS_PORT="${NODE_METRICS_PORT:-9090}"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=vast-csi-common.sh
+source "${SCRIPT_DIR}/vast-csi-common.sh"
+
+issues_json='[]'
+metrics_body=""
+
+print_report() {
+  { set +x; } 2>/dev/null || true
+  echo
+  echo "=== NFS xprt metrics (namespace '${CSI_NAMESPACE}', threshold pending=${XPRT_PENDING_THRESHOLD}) ==="
+  if [[ -n "$metrics_body" ]]; then
+    echo "$metrics_body" | grep -E '^csi_node_nfs_xprt' | head -n 30 || echo "  (no csi_node_nfs_xprt_* lines found)"
+  else
+    echo "  No metrics retrieved."
+  fi
+}
+trap print_report EXIT
+
+fetch_node_metrics() {
+  local pods_json pod body
+  pods_json=$(find_csi_node_pods)
+  while IFS= read -r pod; do
+    [[ -z "$pod" ]] && continue
+    body=$(curl_pod_metrics "$pod" "${CSI_NAMESPACE}" "$NODE_METRICS_PORT")
+    if [[ -n "$body" ]] && echo "$body" | grep -q 'csi_node_nfs_xprt'; then
+      echo "$body"
+      return 0
+    fi
+  done < <(echo "$pods_json" | jq -r '.items[].metadata.name // empty')
+
+  local svc
+  svc=$(find_metrics_services | jq -r '[.[] | select(.name | test("node"; "i")) | .name][0] // empty')
+  if [[ -n "$svc" ]]; then
+    body=$(curl_service_metrics "$svc" "${CSI_NAMESPACE}" "$NODE_METRICS_PORT")
+    [[ -n "$body" ]] && echo "$body" && return 0
+  fi
+  return 1
+}
+
+if ! metrics_body=$(fetch_node_metrics); then
+  issues_json=$(append_issue "$issues_json" \
+    "NFS transport metrics unavailable from VAST CSI node pods" \
+    "Could not retrieve csi_node_nfs_xprt_* metrics from ${CSI_NAMESPACE} on context ${CONTEXT}." \
+    3 \
+    "Enable node metrics and ensure VIP connections are established. Metrics export only while VIPs are connected.")
+  write_issues "$OUTPUT_FILE" "$issues_json"
+  exit 0
+fi
+
+# Unhealthy transports
+while IFS= read -r line; do
+  [[ -z "$line" ]] && continue
+  dest=$(echo "$line" | sed -n 's/.*destination="\([^"]*\)".*/\1/p')
+  issues_json=$(append_issue "$issues_json" \
+    "Unhealthy NFS transport to VIP \`${dest:-unknown}\` on CSI node" \
+    "Metric line: ${line}" \
+    3 \
+    "Verify VIP reachability from worker nodes, check network ACLs, and inspect VMS cluster health for the destination VIP.")
+done < <(echo "$metrics_body" | awk '/^csi_node_nfs_xprt_unhealthy\{/{if ($NF >= 1) print}' || true)
+
+# Congested state
+while IFS= read -r line; do
+  [[ -z "$line" ]] && continue
+  dest=$(echo "$line" | sed -n 's/.*destination="\([^"]*\)".*/\1/p')
+  pending=$(echo "$metrics_body" | awk -v d="$dest" '
+    /^csi_node_nfs_xprt_pending_requests\{/ {
+      if ($0 ~ "destination=\"" d "\"" && $NF >= '"${XPRT_PENDING_THRESHOLD}"') { print $NF; exit }
+    }')
+  details="Congested transport detected. Line: ${line}"
+  if [[ -n "${pending:-}" ]]; then
+    details="${details} pending_requests=${pending} (threshold ${XPRT_PENDING_THRESHOLD})."
+  fi
+  issues_json=$(append_issue "$issues_json" \
+    "NFS transport congestion toward VIP \`${dest:-unknown}\`" \
+    "$details" \
+    3 \
+    "Investigate network congestion between workers and VAST VIPs. Review tenant QoS limits and workload I/O patterns.")
+done < <(echo "$metrics_body" | awk '/^csi_node_nfs_xprt_congested_state\{/{if ($NF >= 1) print}' || true)
+
+# Pending requests threshold without congestion flag
+while IFS= read -r line; do
+  [[ -z "$line" ]] && continue
+  val=$(echo "$line" | awk '{print $NF}')
+  dest=$(echo "$line" | sed -n 's/.*destination="\([^"]*\)".*/\1/p')
+  if [[ "${val%%.*}" =~ ^[0-9]+$ ]] && [[ "${val%%.*}" -gt "${XPRT_PENDING_THRESHOLD}" ]]; then
+    if ! echo "$issues_json" | jq -e --arg d "${dest:-unknown}" '.[] | select(.title | contains($d))' >/dev/null 2>&1; then
+      issues_json=$(append_issue "$issues_json" \
+        "High pending NFS requests toward VIP \`${dest:-unknown}\`" \
+        "csi_node_nfs_xprt_pending_requests=${val} exceeds threshold ${XPRT_PENDING_THRESHOLD}. Line: ${line}" \
+        3 \
+        "Check for slow VMS responses or network latency. Consider scaling tenant QoS or reducing concurrent mount pressure.")
+    fi
+  fi
+done < <(echo "$metrics_body" | awk '/^csi_node_nfs_xprt_pending_requests\{/{print}' || true)
+
+# No transports connected at all
+xprt_total=$(echo "$metrics_body" | awk '/^csi_node_nfs_xprt_total /{print $NF; exit}')
+xprt_connected=$(echo "$metrics_body" | awk '/^csi_node_nfs_xprt_connected /{print $NF; exit}')
+if [[ "${xprt_total:-1}" == "0.0" || "${xprt_total:-1}" == "0" ]]; then
+  issues_json=$(append_issue "$issues_json" \
+    "No NFS transports registered on VAST CSI node metrics" \
+    "csi_node_nfs_xprt_total=${xprt_total:-0} indicates no active VIP connections." \
+    2 \
+    "Confirm StorageClass endpoint/VIP configuration and that workloads have attempted mounts on this node.")
+fi
+
+write_issues "$OUTPUT_FILE" "$issues_json"
diff --git a/codebundles/vast-k8s-csi-health/check-pod-mount-health.sh b/codebundles/vast-k8s-csi-health/check-pod-mount-health.sh
new file mode 100755
index 00000000..0e82977a
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/check-pod-mount-health.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+# -----------------------------------------------------------------------------
+# REQUIRED ENV VARS: CONTEXT, NAMESPACE
+# Finds pods using VAST CSI volumes with mount / VolumeAttachment failures.
+# Writes JSON array to pod_mount_issues.json
+# -----------------------------------------------------------------------------
+: "${CONTEXT:?Must set CONTEXT}"
+: "${NAMESPACE:?Must set NAMESPACE}"
+
+OUTPUT_FILE="pod_mount_issues.json"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=vast-csi-common.sh
+source "${SCRIPT_DIR}/vast-csi-common.sh"
+
+issues_json='[]'
+
+print_report() {
+  { set +x; } 2>/dev/null || true
+  echo
+  echo "=== Pod mount health for VAST volumes in '${NAMESPACE}' ==="
+  k8s get pods -n "${NAMESPACE}" -o wide 2>/dev/null || true
+  echo
+  if [[ -s "$OUTPUT_FILE" ]]; then
+    jq -r '.[] | "  - [sev=\(.severity)] \(.title)"' "$OUTPUT_FILE" 2>/dev/null || true
+  fi
+}
+trap print_report EXIT
+
+vast_pvcs=$(list_vast_pvcs_json "${NAMESPACE}")
+pvc_names=$(echo "$vast_pvcs" | jq -r '.items[].metadata.name // empty')
+
+if [[ -z "$pvc_names" ]]; then
+  all_pvcs=$(k8s get pvc -n "${NAMESPACE}" -o json 2>/dev/null || echo '{"items":[]}')
+  pvc_names=$(while IFS= read -r line; do
+    [[ -z "$line" ]] && continue
+    is_vast_pvc_json "$line" && echo "$line" | jq -r '.metadata.name'
+  done < <(echo "$all_pvcs" | jq -c '.items[]?'))
+fi
+
+if [[ -z "$pvc_names" ]]; then
+  write_issues "$OUTPUT_FILE" "$issues_json"
+  exit 0
+fi
+
+while IFS= read -r pvc; do
+  [[ -z "$pvc" ]] && continue
+  pods_using=$(k8s get pods -n "${NAMESPACE}" -o json 2>/dev/null | jq -r --arg pvc "$pvc" '
+    .items[] | select(.spec.volumes[]?.persistentVolumeClaim.claimName == $pvc) | .metadata.name
+  ')
+  while IFS= read -r pod; do
+    [[ -z "$pod" ]] && continue
+    pod_json=$(k8s get pod "$pod" -n "${NAMESPACE}" -o json 2>/dev/null || echo '{}')
+    phase=$(echo "$pod_json" | jq -r '.status.phase // "Unknown"')
+    mount_fail=$(echo "$pod_json" | jq -r '
+      [.status.containerStatuses[]?.state.waiting.reason? // empty,
+       .status.initContainerStatuses[]?.state.waiting.reason? // empty] |
+      map(select(. == "ContainerCreating" or . == "CreateContainerError")) | length
+    ')
+
+    not_ready=$(echo "$pod_json" | jq -r '
+      ([.status.conditions[]? | select(.type=="Ready") | .status][0] // "False")
+    ')
+
+    if [[ "$phase" == "Pending" || "$not_ready" == "False" ]]; then
+      issues_json=$(append_issue "$issues_json" \
+        "Pod \`${pod}\` using VAST PVC \`${pvc}\` is not running/ready" \
+        "Pod phase=${phase}, ready=${not_ready}, mount-related waits=${mount_fail} in namespace ${NAMESPACE}." \
+        3 \
+        "Describe pod ${pod} and check for FailedMount / FailedAttachVolume events. Inspect CSI node logs on the scheduled node.")
+    fi
+
+    events=$(k8s get events -n "${NAMESPACE}" --field-selector "involvedObject.name=${pod}" -o json 2>/dev/null || echo '{"items":[]}')
+    while IFS= read -r ev; do
+      [[ -z "$ev" ]] && continue
+      msg=$(echo "$ev" | jq -r '.message')
+      reason=$(echo "$ev" | jq -r '.reason')
+      if echo "$msg $reason" | grep -qiE 'mount|publish|attach|volume|nfs|vast|csi'; then
+        issues_json=$(append_issue "$issues_json" \
+          "Mount-related event for pod \`${pod}\` (PVC \`${pvc}\`)" \
+          "Event reason=${reason}: ${msg}" \
+          3 \
+          "Review VolumeAttachment objects and CSI node logs. Correlate with NFS xprt metrics if mounts hang.")
+      fi
+    done < <(echo "$events" | jq -c '.items[]? | select(.type == "Warning")')
+  done <<< "$pods_using"
+done <<< "$pvc_names"
+
+# VolumeAttachment issues for VAST PVs in this namespace
+vas=$(k8s get volumeattachment -o json 2>/dev/null || echo '{"items":[]}')
+while IFS= read -r va; do
+  [[ -z "$va" ]] && continue
+  attached=$(echo "$va" | jq -r '.status.attached // false')
+  err=$(echo "$va" | jq -r '.status.attachError.message // empty')
+  pv=$(echo "$va" | jq -r '.spec.source.persistentVolumeName // empty')
+  pod_ref=$(echo "$va" | jq -r '.spec.source.inlineVolumeSpec.claimRef.name // empty')
+  driver=$(echo "$va" | jq -r '.spec.attacher // empty')
+
+  [[ "$driver" != "csi.vastdata.com" ]] && continue
+  [[ -n "$pod_ref" ]] && ! echo "$pvc_names" | grep -qx "$pod_ref" && continue
+
+  if [[ "$attached" != "true" || -n "$err" ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "VolumeAttachment failure for VAST PV \`${pv:-unknown}\`" \
+      "attached=${attached}, error=${err:-none}, claimRef=${pod_ref:-n/a}." \
+      2 \
+      "Describe volumeattachment and verify node driver registrar health. Check for stale attachments after node drains.")
+  fi
+done < <(echo "$vas" | jq -c '.items[]?')
+
+write_issues "$OUTPUT_FILE" "$issues_json"
diff --git a/codebundles/vast-k8s-csi-health/check-vast-storageclass-config.sh b/codebundles/vast-k8s-csi-health/check-vast-storageclass-config.sh
new file mode 100755
index 00000000..8b944825
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/check-vast-storageclass-config.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+# -----------------------------------------------------------------------------
+# REQUIRED ENV VARS: CONTEXT
+# Validates VAST StorageClass parameters for common misconfigurations.
+# Writes JSON array to storageclass_config_issues.json
+# -----------------------------------------------------------------------------
+: "${CONTEXT:?Must set CONTEXT}"
+
+OUTPUT_FILE="storageclass_config_issues.json"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=vast-csi-common.sh
+source "${SCRIPT_DIR}/vast-csi-common.sh"
+
+issues_json='[]'
+
+print_report() {
+  { set +x; } 2>/dev/null || true
+  echo
+  echo "=== VAST StorageClasses in context '${CONTEXT}' ==="
+  k8s get storageclass -o custom-columns=NAME:.metadata.name,PROVISIONER:.provisioner 2>/dev/null \
+    | awk 'NR==1 || /vast|csi\.vastdata/' || true
+}
+trap print_report EXIT
+
+scs=$(k8s get storageclass -o json 2>/dev/null || echo '{"items":[]}')
+vast_scs=$(echo "$scs" | jq -c '[.items[] | select(
+  (.provisioner == "csi.vastdata.com") or
+  (.provisioner == "kubernetes.io/csi/csi.vastdata.com") or
+  (.metadata.name | test("vast"; "i"))
+)]')
+
+count=$(echo "$vast_scs" | jq 'length')
+if [[ "$count" -eq 0 ]]; then
+  issues_json=$(append_issue "$issues_json" \
+    "No VAST CSI StorageClasses found in context \`${CONTEXT}\`" \
+    "No StorageClass uses provisioner csi.vastdata.com." \
+    3 \
+    "Install or register a VAST StorageClass via the CSI Helm chart. Confirm provisioner ID csi.vastdata.com.")
+  write_issues "$OUTPUT_FILE" "$issues_json"
+  exit 0
+fi
+
+while IFS= read -r sc; do
+  [[ -z "$sc" ]] && continue
+  name=$(echo "$sc" | jq -r '.metadata.name')
+  params=$(echo "$sc" | jq -r '.parameters // {}')
+  mount_opts=$(echo "$sc" | jq -r '.mountOptions // [] | join(",")')
+  reclaim=$(echo "$sc" | jq -r '.reclaimPolicy // "Delete"')
+  vol_expansion=$(echo "$sc" | jq -r '.allowVolumeExpansion // false')
+
+  endpoint=$(echo "$params" | jq -r '.endpoint // .vip_pool // .vip // empty')
+  view_policy=$(echo "$params" | jq -r '.view_policy // .view // .root_export // empty')
+  tenant=$(echo "$params" | jq -r '.tenant // .tenant_name // empty')
+  qos=$(echo "$params" | jq -r '.qos_policy // .qos // empty')
+
+  echo "StorageClass ${name}: endpoint=${endpoint:-missing}, view=${view_policy:-missing}, tenant=${tenant:-missing}, qos=${qos:-n/a}, mountOptions=${mount_opts:-none}"
+
+  if [[ -z "$endpoint" ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "VAST StorageClass \`${name}\` missing endpoint/VIP parameter" \
+      "parameters.endpoint (or vip_pool/vip) is not set; dynamic provisioning may fail or use incorrect VIPs." \
+      3 \
+      "Set endpoint to a reachable VAST VIP or DNS name in the StorageClass parameters.")
+  fi
+
+  if [[ -z "$view_policy" ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "VAST StorageClass \`${name}\` missing view policy parameter" \
+      "No view_policy/view/root_export parameter found; view creation defaults may not match tenant layout." \
+      4 \
+      "Align view_policy with VMS view templates for the target tenant ${tenant:-unknown}.")
+  fi
+
+  if [[ -z "$tenant" ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "VAST StorageClass \`${name}\` has no explicit tenant parameter" \
+      "Tenant is not specified; volumes may land in an unexpected tenant context." \
+      4 \
+      "Set tenant or tenant_name to the intended VMS tenant for capacity and QoS tracking.")
+  fi
+
+  if [[ "$reclaim" == "Retain" && "$vol_expansion" != "true" ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "VAST StorageClass \`${name}\` retains PVs without volume expansion enabled" \
+      "reclaimPolicy=Retain with allowVolumeExpansion=false can block operational growth for stateful workloads." \
+      4 \
+      "Enable allowVolumeExpansion or document manual expansion procedures for Retain volumes.")
+  fi
+
+  if echo "$mount_opts" | grep -qi 'sync' && ! echo "$mount_opts" | grep -qi 'noatime'; then
+    issues_json=$(append_issue "$issues_json" \
+      "VAST StorageClass \`${name}\` uses strict sync mount options" \
+      "mountOptions=${mount_opts} may increase latency-sensitive workload impact on NFS." \
+      4 \
+      "Review mountOptions (mountUmountTimeout, resolveMountSymlinks) against workload latency requirements.")
+  fi
+done < <(echo "$vast_scs" | jq -c '.[]')
+
+write_issues "$OUTPUT_FILE" "$issues_json"
diff --git a/codebundles/vast-k8s-csi-health/correlate-k8s-vast-events.sh b/codebundles/vast-k8s-csi-health/correlate-k8s-vast-events.sh
new file mode 100755
index 00000000..a63f02de
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/correlate-k8s-vast-events.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+# -----------------------------------------------------------------------------
+# REQUIRED ENV VARS: CONTEXT, NAMESPACE
+# OPTIONAL: VAST_VMS_ENDPOINT, VAST_CLUSTER_NAME, vast_vms_credentials secret
+# Cross-references failing PVCs with VMS tenant metrics when configured.
+# Writes JSON array to vast_correlation_issues.json
+# -----------------------------------------------------------------------------
+: "${CONTEXT:?Must set CONTEXT}"
+: "${NAMESPACE:?Must set NAMESPACE}"
+
+OUTPUT_FILE="vast_correlation_issues.json"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=vast-csi-common.sh
+source "${SCRIPT_DIR}/vast-csi-common.sh"
+
+issues_json='[]'
+info_report=""
+
+print_report() {
+  { set +x; } 2>/dev/null || true
+  echo
+  echo "=== Kubernetes/VMS correlation for namespace '${NAMESPACE}' ==="
+  echo "${info_report:-  (no correlation output)}"
+}
+trap print_report EXIT
+
+if [[ -z "${VAST_VMS_ENDPOINT:-}" ]]; then
+  info_report="VAST_VMS_ENDPOINT is not configured; skipping backend correlation (informational only)."
+  issues_json=$(append_issue "$issues_json" \
+    "VMS backend correlation skipped for namespace \`${NAMESPACE}\`" \
+    "Set VAST_VMS_ENDPOINT and optional vast_vms_credentials to cross-reference tenant capacity/QoS with Kubernetes storage events." \
+    4 \
+    "Configure VAST_VMS_ENDPOINT to the VMS REST base URL (e.g. https://vms.example.com) and provide API credentials.")
+  write_issues "$OUTPUT_FILE" "$issues_json"
+  exit 0
+fi
+
+# Parse optional credentials from env (injected by platform from secret)
+VMS_USER="${VMS_USERNAME:-${USERNAME:-}}"
+VMS_PASS="${VMS_PASSWORD:-${PASSWORD:-}}"
+VMS_TOKEN="${VMS_API_TOKEN:-${API_TOKEN:-}}"
+if [[ -n "${vast_vms_credentials:-}" ]]; then
+  VMS_USER="${VMS_USER:-$(echo "$vast_vms_credentials" | jq -r '.USERNAME // .username // empty')}"
+  VMS_PASS="${VMS_PASS:-$(echo "$vast_vms_credentials" | jq -r '.PASSWORD // .password // empty')}"
+  VMS_TOKEN="${VMS_TOKEN:-$(echo "$vast_vms_credentials" | jq -r '.API_TOKEN // .api_token // empty')}"
+fi
+
+fetch_vms_metrics() {
+  local path="$1"
+  local url="${VAST_VMS_ENDPOINT%/}${path}"
+  if [[ -n "$VMS_TOKEN" ]]; then
+    curl -sf -H "Authorization: Bearer ${VMS_TOKEN}" "$url" 2>/dev/null || true
+  elif [[ -n "$VMS_USER" && -n "$VMS_PASS" ]]; then
+    curl -sf -u "${VMS_USER}:${VMS_PASS}" "$url" 2>/dev/null || true
+  else
+    curl -sf "$url" 2>/dev/null || true
+  fi
+}
+
+metrics=$(fetch_vms_metrics "/api/prometheusmetrics/tenants")
+if [[ -z "$metrics" ]]; then
+  issues_json=$(append_issue "$issues_json" \
+    "Unable to fetch VMS tenant metrics from \`${VAST_VMS_ENDPOINT}\`" \
+    "Prometheus-format tenant metrics were not returned; verify credentials and network access." \
+    3 \
+    "Confirm vast_vms_credentials (USERNAME/PASSWORD or API_TOKEN) and VMS API reachability from the execution environment.")
+  write_issues "$OUTPUT_FILE" "$issues_json"
+  exit 0
+fi
+
+info_report+="VMS tenant metrics sample (first 20 lines):"$'\n'
+info_report+=$(echo "$metrics" | head -n 20)
+
+# Collect failing / pressured PVCs in namespace
+failing_pvcs=$(k8s get pvc -n "${NAMESPACE}" -o json 2>/dev/null | jq -c '
+  [.items[] | select(.status.phase != "Bound" or (.metadata.annotations["volume.kubernetes.io/storage-provisioner"]? // "" | test("vast"; "i")))]
+')
+
+while IFS= read -r pvc_line; do
+  [[ -z "$pvc_line" ]] && continue
+  is_vast_pvc_json "$pvc_line" || continue
+  pvc_name=$(echo "$pvc_line" | jq -r '.metadata.name')
+  phase=$(echo "$pvc_line" | jq -r '.status.phase')
+  sc=$(echo "$pvc_line" | jq -r '.spec.storageClassName // empty')
+  sc_json=$(k8s get storageclass "$sc" -o json 2>/dev/null || echo '{}')
+  tenant=$(echo "$sc_json" | jq -r '.parameters.tenant // .parameters.tenant_name // empty')
+
+  [[ -z "$tenant" ]] && tenant="unknown"
+  tenant_pattern=$(echo "$tenant" | sed 's/[][\/.^$*+?{}|()-]/\\&/g')
+
+  cap_line=$(echo "$metrics" | grep -i "tenant.*${tenant_pattern}.*capacity" | head -n 1 || true)
+  qos_line=$(echo "$metrics" | grep -i "tenant.*${tenant_pattern}.*qos" | head -n 1 || true)
+
+  if [[ "$phase" != "Bound" ]]; then
+    details="PVC ${pvc_name} phase=${phase}, tenant=${tenant}."
+    [[ -n "$cap_line" ]] && details+=" VMS capacity hint: ${cap_line}"
+    [[ -n "$qos_line" ]] && details+=" VMS QoS hint: ${qos_line}"
+    cluster_label="${VAST_CLUSTER_NAME:-${CONTEXT}}"
+    issues_json=$(append_issue "$issues_json" \
+      "Kubernetes PVC \`${pvc_name}\` failures may correlate with VMS tenant \`${tenant}\` on cluster \`${cluster_label}\`" \
+      "$details" \
+      3 \
+      "Compare CSI driver logs with VMS tenant capacity/QoS dashboards. Expand tenant quota or resolve QoS throttling if backend pressure is confirmed.")
+  fi
+done < <(echo "$failing_pvcs" | jq -c '.[]?')
+
+if [[ $(echo "$issues_json" | jq 'length') -eq 0 ]]; then
+  issues_json=$(append_issue "$issues_json" \
+    "VMS correlation completed for namespace \`${NAMESPACE}\`" \
+    "No failing VAST PVCs required backend correlation. VMS endpoint ${VAST_VMS_ENDPOINT} responded successfully." \
+    4 \
+    "Re-run when PVC mount or binding failures occur to distinguish driver vs backend pressure.")
+fi
+
+write_issues "$OUTPUT_FILE" "$issues_json"
diff --git a/codebundles/vast-k8s-csi-health/runbook.robot b/codebundles/vast-k8s-csi-health/runbook.robot
new file mode 100644
index 00000000..d220a9c2
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/runbook.robot
@@ -0,0 +1,382 @@
+*** Settings ***
+Documentation       Monitors the VAST CSI driver and traces Kubernetes workload storage from PVCs through to VAST views, detecting driver failures, NFS congestion, and mount issues.
+Metadata            Author    rw-codebundle-agent
+Metadata            Display Name    VAST Data Kubernetes CSI Health
+Metadata            Supports    Kubernetes    VAST    CSI    NFS    storage    persistentvolumeclaim
+
+Force Tags          Kubernetes    VAST    CSI    storage    health
+
+Library             String
+Library             BuiltIn
+Library             RW.Core
+Library             RW.CLI
+Library             RW.platform
+Library             RW.K8sHelper
+
+Suite Setup         Suite Initialization
+
+
+*** Tasks ***
+Check VAST CSI Driver Pod Health in Namespace `${CSI_NAMESPACE}` on Cluster `${CONTEXT}`
+    [Documentation]    Verifies CSI controller Deployment/StatefulSet and node DaemonSet pods are Running/Ready; checks for CrashLoopBackOff and recent restarts.
+    [Tags]    Kubernetes    VAST    CSI    access:read-only    data:logs-config
+
+    ${result}=    RW.CLI.Run Bash File
+    ...    bash_file=check-csi-pod-health.sh
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=180
+    ...    include_in_history=false
+    ...    show_in_rwl_cheatsheet=true
+    ...    cmd_override=CONTEXT="${CONTEXT}" CSI_NAMESPACE="${CSI_NAMESPACE}" ./check-csi-pod-health.sh
+
+    ${raw}=    RW.CLI.Run Cli
+    ...    cmd=cat csi_pod_health_issues.json
+    ...    env=${env}
+    ...    include_in_history=false
+
+    TRY
+        ${issue_list}=    Evaluate    json.loads(r'''${raw.stdout}''')    json
+    EXCEPT
+        Log    Failed to parse JSON for CSI pod health task.    WARN
+        ${issue_list}=    Create List
+    END
+
+    IF    len(@{issue_list}) > 0
+        FOR    ${issue}    IN    @{issue_list}
+            RW.Core.Add Issue
+            ...    severity=${issue['severity']}
+            ...    expected=VAST CSI controller and node pods should be Ready in namespace `${CSI_NAMESPACE}`
+            ...    actual=Unhealthy CSI pod signals detected on context `${CONTEXT}`
+            ...    title=${issue['title']}
+            ...    reproduce_hint=${result.cmd}
+            ...    details=${issue['details']}
+            ...    next_steps=${issue['next_steps']}
+        END
+    END
+
+    RW.Core.Add Pre To Report    VAST CSI pod health analysis:
+    RW.Core.Add Pre To Report    ${result.stdout}
+
+Check CSI Node and Controller Metrics for RPC Failures in Namespace `${CSI_NAMESPACE}`
+    [Documentation]    Scrapes /metrics from CSI node and controller endpoints; detects elevated csi_plugin_operations failures and slow RPC durations.
+    [Tags]    Kubernetes    VAST    CSI    metrics    access:read-only    data:metrics
+
+    ${result}=    RW.CLI.Run Bash File
+    ...    bash_file=check-csi-metrics.sh
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=180
+    ...    include_in_history=false
+    ...    show_in_rwl_cheatsheet=true
+    ...    cmd_override=CONTEXT="${CONTEXT}" CSI_NAMESPACE="${CSI_NAMESPACE}" RPC_ERROR_RATE_THRESHOLD="${RPC_ERROR_RATE_THRESHOLD}" ./check-csi-metrics.sh
+
+    ${raw}=    RW.CLI.Run Cli
+    ...    cmd=cat csi_metrics_issues.json
+    ...    env=${env}
+    ...    include_in_history=false
+
+    TRY
+        ${issue_list}=    Evaluate    json.loads(r'''${raw.stdout}''')    json
+    EXCEPT
+        Log    Failed to parse JSON for CSI metrics task.    WARN
+        ${issue_list}=    Create List
+    END
+
+    IF    len(@{issue_list}) > 0
+        FOR    ${issue}    IN    @{issue_list}
+            RW.Core.Add Issue
+            ...    severity=${issue['severity']}
+            ...    expected=CSI RPC error rates should remain below `${RPC_ERROR_RATE_THRESHOLD}` percent
+            ...    actual=CSI metrics analysis reported issues in namespace `${CSI_NAMESPACE}`
+            ...    title=${issue['title']}
+            ...    reproduce_hint=${result.cmd}
+            ...    details=${issue['details']}
+            ...    next_steps=${issue['next_steps']}
+        END
+    END
+
+    RW.Core.Add Pre To Report    VAST CSI metrics analysis:
+    RW.Core.Add Pre To Report    ${result.stdout}
+
+Check NFS Transport Health on CSI Nodes in Namespace `${CSI_NAMESPACE}`
+    [Documentation]    Analyzes csi_node_nfs_xprt metrics for network congestion and unhealthy VIP connections on CSI node pods.
+    [Tags]    Kubernetes    VAST    NFS    metrics    access:read-only    data:metrics
+
+    ${result}=    RW.CLI.Run Bash File
+    ...    bash_file=check-nfs-xprt-health.sh
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=180
+    ...    include_in_history=false
+    ...    cmd_override=CONTEXT="${CONTEXT}" CSI_NAMESPACE="${CSI_NAMESPACE}" XPRT_PENDING_THRESHOLD="${XPRT_PENDING_THRESHOLD}" ./check-nfs-xprt-health.sh
+
+    ${raw}=    RW.CLI.Run Cli
+    ...    cmd=cat nfs_xprt_issues.json
+    ...    env=${env}
+    ...    include_in_history=false
+
+    TRY
+        ${issue_list}=    Evaluate    json.loads(r'''${raw.stdout}''')    json
+    EXCEPT
+        Log    Failed to parse JSON for NFS xprt task.    WARN
+        ${issue_list}=    Create List
+    END
+
+    IF    len(@{issue_list}) > 0
+        FOR    ${issue}    IN    @{issue_list}
+            RW.Core.Add Issue
+            ...    severity=${issue['severity']}
+            ...    expected=NFS transports to VAST VIPs should be healthy with pending requests below `${XPRT_PENDING_THRESHOLD}`
+            ...    actual=NFS xprt congestion or unhealthy VIP signals detected on context `${CONTEXT}`
+            ...    title=${issue['title']}
+            ...    reproduce_hint=${result.cmd}
+            ...    details=${issue['details']}
+            ...    next_steps=${issue['next_steps']}
+        END
+    END
+
+    RW.Core.Add Pre To Report    NFS transport (xprt) analysis:
+    RW.Core.Add Pre To Report    ${result.stdout}
+
+Trace Kubernetes PVCs to VAST Views for Namespace `${NAMESPACE}`
+    [Documentation]    Maps PVC to PV to StorageClass parameters and produces a trace report linking workload storage to VAST view, tenant, and VIP identifiers.
+    [Tags]    Kubernetes    VAST    PVC    trace    access:read-only    data:config
+
+    ${result}=    RW.CLI.Run Bash File
+    ...    bash_file=trace-pvc-to-vast.sh
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=180
+    ...    include_in_history=false
+    ...    show_in_rwl_cheatsheet=true
+    ...    cmd_override=CONTEXT="${CONTEXT}" NAMESPACE="${NAMESPACE}" ./trace-pvc-to-vast.sh
+
+    ${raw}=    RW.CLI.Run Cli
+    ...    cmd=cat pvc_trace_issues.json
+    ...    env=${env}
+    ...    include_in_history=false
+
+    TRY
+        ${issue_list}=    Evaluate    json.loads(r'''${raw.stdout}''')    json
+    EXCEPT
+        Log    Failed to parse JSON for PVC trace task.    WARN
+        ${issue_list}=    Create List
+    END
+
+    IF    len(@{issue_list}) > 0
+        FOR    ${issue}    IN    @{issue_list}
+            RW.Core.Add Issue
+            ...    severity=${issue['severity']}
+            ...    expected=VAST-backed PVCs in `${NAMESPACE}` should bind and expose traceable VAST identifiers
+            ...    actual=PVC trace analysis completed for namespace `${NAMESPACE}`
+            ...    title=${issue['title']}
+            ...    reproduce_hint=${result.cmd}
+            ...    details=${issue['details']}
+            ...    next_steps=${issue['next_steps']}
+        END
+    END
+
+    RW.Core.Add Pre To Report    PVC to VAST trace report:
+    RW.Core.Add Pre To Report    ${result.stdout}
+
+Check End-to-End Pod Mount Health for VAST Storage in Namespace `${NAMESPACE}`
+    [Documentation]    Identifies pods using VAST CSI volumes with mount failures, VolumeAttachment issues, or NodePublishVolume errors in events.
+    [Tags]    Kubernetes    VAST    pod    mount    access:read-only    data:logs-config
+
+    ${result}=    RW.CLI.Run Bash File
+    ...    bash_file=check-pod-mount-health.sh
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=180
+    ...    include_in_history=false
+    ...    cmd_override=CONTEXT="${CONTEXT}" NAMESPACE="${NAMESPACE}" ./check-pod-mount-health.sh
+
+    ${raw}=    RW.CLI.Run Cli
+    ...    cmd=cat pod_mount_issues.json
+    ...    env=${env}
+    ...    include_in_history=false
+
+    TRY
+        ${issue_list}=    Evaluate    json.loads(r'''${raw.stdout}''')    json
+    EXCEPT
+        Log    Failed to parse JSON for pod mount health task.    WARN
+        ${issue_list}=    Create List
+    END
+
+    IF    len(@{issue_list}) > 0
+        FOR    ${issue}    IN    @{issue_list}
+            RW.Core.Add Issue
+            ...    severity=${issue['severity']}
+            ...    expected=Pods using VAST PVCs in `${NAMESPACE}` should mount successfully and reach Ready state
+            ...    actual=Mount or attachment issues detected for VAST storage workloads
+            ...    title=${issue['title']}
+            ...    reproduce_hint=${result.cmd}
+            ...    details=${issue['details']}
+            ...    next_steps=${issue['next_steps']}
+        END
+    END
+
+    RW.Core.Add Pre To Report    Pod mount health analysis:
+    RW.Core.Add Pre To Report    ${result.stdout}
+
+Check VAST StorageClass Configuration for Cluster `${CONTEXT}`
+    [Documentation]    Validates StorageClass parameters such as endpoint, view policy, mount options, and QoS settings for misconfigurations that limit workloads.
+    [Tags]    Kubernetes    VAST    StorageClass    access:read-only    data:config
+
+    ${result}=    RW.CLI.Run Bash File
+    ...    bash_file=check-vast-storageclass-config.sh
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=180
+    ...    include_in_history=false
+    ...    cmd_override=CONTEXT="${CONTEXT}" ./check-vast-storageclass-config.sh
+
+    ${raw}=    RW.CLI.Run Cli
+    ...    cmd=cat storageclass_config_issues.json
+    ...    env=${env}
+    ...    include_in_history=false
+
+    TRY
+        ${issue_list}=    Evaluate    json.loads(r'''${raw.stdout}''')    json
+    EXCEPT
+        Log    Failed to parse JSON for StorageClass config task.    WARN
+        ${issue_list}=    Create List
+    END
+
+    IF    len(@{issue_list}) > 0
+        FOR    ${issue}    IN    @{issue_list}
+            RW.Core.Add Issue
+            ...    severity=${issue['severity']}
+            ...    expected=VAST StorageClasses should define endpoint, view policy, and tenant parameters correctly
+            ...    actual=StorageClass configuration review found gaps on context `${CONTEXT}`
+            ...    title=${issue['title']}
+            ...    reproduce_hint=${result.cmd}
+            ...    details=${issue['details']}
+            ...    next_steps=${issue['next_steps']}
+        END
+    END
+
+    RW.Core.Add Pre To Report    VAST StorageClass configuration review:
+    RW.Core.Add Pre To Report    ${result.stdout}
+
+Correlate Kubernetes Storage Events with VAST Tenant Metrics for Namespace `${NAMESPACE}`
+    [Documentation]    When VAST_VMS_ENDPOINT is configured, cross-references failing PVCs with tenant capacity and QoS metrics from VMS to distinguish driver vs backend issues.
+    [Tags]    Kubernetes    VAST    VMS    correlation    access:read-only    data:metrics
+
+    ${result}=    RW.CLI.Run Bash File
+    ...    bash_file=correlate-k8s-vast-events.sh
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    secret__vast_vms_credentials=${VMS_CREDENTIALS}
+    ...    timeout_seconds=180
+    ...    include_in_history=false
+    ...    cmd_override=CONTEXT="${CONTEXT}" NAMESPACE="${NAMESPACE}" VAST_VMS_ENDPOINT="${VAST_VMS_ENDPOINT}" ./correlate-k8s-vast-events.sh
+
+    ${raw}=    RW.CLI.Run Cli
+    ...    cmd=cat vast_correlation_issues.json
+    ...    env=${env}
+    ...    include_in_history=false
+
+    TRY
+        ${issue_list}=    Evaluate    json.loads(r'''${raw.stdout}''')    json
+    EXCEPT
+        Log    Failed to parse JSON for VMS correlation task.    WARN
+        ${issue_list}=    Create List
+    END
+
+    IF    len(@{issue_list}) > 0
+        FOR    ${issue}    IN    @{issue_list}
+            RW.Core.Add Issue
+            ...    severity=${issue['severity']}
+            ...    expected=Kubernetes storage symptoms should be explainable by VMS tenant health when backend correlation is enabled
+            ...    actual=VMS correlation analysis completed for namespace `${NAMESPACE}`
+            ...    title=${issue['title']}
+            ...    reproduce_hint=${result.cmd}
+            ...    details=${issue['details']}
+            ...    next_steps=${issue['next_steps']}
+        END
+    END
+
+    RW.Core.Add Pre To Report    Kubernetes/VMS correlation results:
+    RW.Core.Add Pre To Report    ${result.stdout}
+
+
+*** Keywords ***
+Suite Initialization
+    ${kubeconfig}=    RW.Core.Import Secret
+    ...    kubeconfig
+    ...    type=string
+    ...    description=Kubernetes kubeconfig for cluster access.
+    ...    pattern=\w*
+
+    TRY
+        ${vms_credentials}=    RW.Core.Import Secret
+        ...    vast_vms_credentials
+        ...    type=string
+        ...    description=Optional VMS credentials JSON with USERNAME, PASSWORD or API_TOKEN.
+        ...    pattern=\w*
+        Set Suite Variable    ${VMS_CREDENTIALS}    ${vms_credentials}
+    EXCEPT
+        Log    vast_vms_credentials not found; VMS correlation will skip unless endpoint allows anonymous access.    WARN
+        Set Suite Variable    ${VMS_CREDENTIALS}    ${EMPTY}
+    END
+
+    ${CONTEXT}=    RW.Core.Import User Variable    CONTEXT
+    ...    type=string
+    ...    description=Kubernetes context name.
+    ...    pattern=\w*
+    ${NAMESPACE}=    RW.Core.Import User Variable    NAMESPACE
+    ...    type=string
+    ...    description=Kubernetes namespace for workload PVC tracing.
+    ...    pattern=\w*
+    ${CSI_NAMESPACE}=    RW.Core.Import User Variable    CSI_NAMESPACE
+    ...    type=string
+    ...    description=Namespace where the VAST CSI driver is installed.
+    ...    pattern=\w*
+    ...    default=vast-csi
+    ${KUBERNETES_DISTRIBUTION_BINARY}=    RW.Core.Import User Variable    KUBERNETES_DISTRIBUTION_BINARY
+    ...    type=string
+    ...    description=Kubernetes CLI binary (kubectl or oc).
+    ...    enum=[kubectl,oc]
+    ...    default=kubectl
+    ${VAST_VMS_ENDPOINT}=    RW.Core.Import User Variable    VAST_VMS_ENDPOINT
+    ...    type=string
+    ...    description=Optional VMS endpoint for backend correlation task.
+    ...    pattern=.*
+    ...    default=
+    ${VAST_CLUSTER_NAME}=    RW.Core.Import User Variable    VAST_CLUSTER_NAME
+    ...    type=string
+    ...    description=Optional VAST cluster name for correlation task titles.
+    ...    pattern=.*
+    ...    default=
+    ${XPRT_PENDING_THRESHOLD}=    RW.Core.Import User Variable    XPRT_PENDING_THRESHOLD
+    ...    type=string
+    ...    description=csi_node_nfs_xprt_pending_requests count that triggers an issue.
+    ...    pattern=^\d+$
+    ...    default=100
+    ${RPC_ERROR_RATE_THRESHOLD}=    RW.Core.Import User Variable    RPC_ERROR_RATE_THRESHOLD
+    ...    type=string
+    ...    description=CSI RPC error rate percent threshold.
+    ...    pattern=^\d+$
+    ...    default=5
+
+    Set Suite Variable    ${kubeconfig}    ${kubeconfig}
+    Set Suite Variable    ${CONTEXT}    ${CONTEXT}
+    Set Suite Variable    ${NAMESPACE}    ${NAMESPACE}
+    Set Suite Variable    ${CSI_NAMESPACE}    ${CSI_NAMESPACE}
+    Set Suite Variable    ${KUBERNETES_DISTRIBUTION_BINARY}    ${KUBERNETES_DISTRIBUTION_BINARY}
+    Set Suite Variable    ${VAST_VMS_ENDPOINT}    ${VAST_VMS_ENDPOINT}
+    Set Suite Variable    ${VAST_CLUSTER_NAME}    ${VAST_CLUSTER_NAME}
+    Set Suite Variable    ${XPRT_PENDING_THRESHOLD}    ${XPRT_PENDING_THRESHOLD}
+    Set Suite Variable    ${RPC_ERROR_RATE_THRESHOLD}    ${RPC_ERROR_RATE_THRESHOLD}
+
+    Set Suite Variable
+    ...    ${env}
+    ...    {"KUBECONFIG":"./${kubeconfig.key}","CONTEXT":"${CONTEXT}","NAMESPACE":"${NAMESPACE}","CSI_NAMESPACE":"${CSI_NAMESPACE}","KUBERNETES_DISTRIBUTION_BINARY":"${KUBERNETES_DISTRIBUTION_BINARY}","VAST_VMS_ENDPOINT":"${VAST_VMS_ENDPOINT}","VAST_CLUSTER_NAME":"${VAST_CLUSTER_NAME}","XPRT_PENDING_THRESHOLD":"${XPRT_PENDING_THRESHOLD}","RPC_ERROR_RATE_THRESHOLD":"${RPC_ERROR_RATE_THRESHOLD}"}
+
+    RW.K8sHelper.Verify Cluster Connectivity
+    ...    binary=${KUBERNETES_DISTRIBUTION_BINARY}
+    ...    context=${CONTEXT}
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
diff --git a/codebundles/vast-k8s-csi-health/sli-vast-csi-health-score.sh b/codebundles/vast-k8s-csi-health/sli-vast-csi-health-score.sh
new file mode 100755
index 00000000..7091e1d5
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/sli-vast-csi-health-score.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+set -euo pipefail
+# Lightweight SLI dimensions for VAST CSI health (stdout JSON object).
+: "${CONTEXT:?Must set CONTEXT}"
+: "${NAMESPACE:?Must set NAMESPACE}"
+: "${CSI_NAMESPACE:?Must set CSI_NAMESPACE}"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=vast-csi-common.sh
+source "${SCRIPT_DIR}/vast-csi-common.sh"
+
+XPRT_PENDING_THRESHOLD="${XPRT_PENDING_THRESHOLD:-100}"
+RPC_ERROR_RATE_THRESHOLD="${RPC_ERROR_RATE_THRESHOLD:-5}"
+
+csi_pod_score=1
+pvc_bound_score=1
+mount_score=1
+xprt_score=1
+
+# CSI controller/node readiness
+if k8s get ns "${CSI_NAMESPACE}" -o name &>/dev/null; then
+  node_pods=$(find_csi_node_pods)
+  controller_pods=$(find_csi_controller_pods)
+  pods=$(jq -n --argjson n "$node_pods" --argjson c "$controller_pods" '{items: ($n.items + $c.items)}')
+  not_ready=$(echo "$pods" | jq '[.items[] | select(
+    ((.status.conditions // []) | map(select(.type=="Ready")) | .[0].status // "False") != "True"
+  )] | length')
+  crash=$(echo "$pods" | jq '[.items[] | select(
+    ([.status.containerStatuses[]? | .state.waiting.reason? // empty] | index("CrashLoopBackOff"))
+  )] | length')
+  [[ "${not_ready:-0}" -gt 0 || "${crash:-0}" -gt 0 ]] && csi_pod_score=0
+else
+  csi_pod_score=0
+fi
+
+# VAST PVC binding in workload namespace
+pvcs=$(list_vast_pvcs_json "${NAMESPACE}")
+if [[ $(echo "$pvcs" | jq '.items | length') -eq 0 ]]; then
+  all=$(k8s get pvc -n "${NAMESPACE}" -o json 2>/dev/null || echo '{"items":[]}')
+  unbound=0
+  total=0
+  while IFS= read -r line; do
+    [[ -z "$line" ]] && continue
+    is_vast_pvc_json "$line" || continue
+    total=$((total + 1))
+    phase=$(echo "$line" | jq -r '.status.phase')
+    [[ "$phase" != "Bound" ]] && unbound=$((unbound + 1))
+  done < <(echo "$all" | jq -c '.items[]?')
+  [[ "$total" -gt 0 && "$unbound" -gt 0 ]] && pvc_bound_score=0
+else
+  unbound=$(echo "$pvcs" | jq '[.items[] | select(.status.phase != "Bound")] | length')
+  [[ "${unbound:-0}" -gt 0 ]] && pvc_bound_score=0
+fi
+
+# Mount health: pods using vast PVCs not ready
+mount_problems=0
+while IFS= read -r pvc; do
+  [[ -z "$pvc" ]] && continue
+  pods=$(k8s get pods -n "${NAMESPACE}" -o json 2>/dev/null | jq -r --arg p "$pvc" '
+    .items[] | select(.spec.volumes[]?.persistentVolumeClaim.claimName == $p) | .metadata.name')
+  while IFS= read -r pod; do
+    [[ -z "$pod" ]] && continue
+    ready=$(k8s get pod "$pod" -n "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo False)
+    phase=$(k8s get pod "$pod" -n "${NAMESPACE}" -o jsonpath='{.status.phase}' 2>/dev/null || echo Unknown)
+    if [[ "$ready" != "True" || "$phase" == "Pending" ]]; then
+      mount_problems=$((mount_problems + 1))
+    fi
+  done <<< "$pods"
+done < <(echo "$pvcs" | jq -r '.items[].metadata.name // empty')
+
+[[ "$mount_problems" -gt 0 ]] && mount_score=0
+
+# NFS xprt congestion (best effort)
+node_pod=$(find_csi_node_pods | jq -r '.items[0].metadata.name // empty')
+if [[ -n "$node_pod" ]]; then
+  body=$(curl_pod_metrics "$node_pod" "${CSI_NAMESPACE}" "${NODE_METRICS_PORT:-9090}")
+  if echo "$body" | grep -q 'csi_node_nfs_xprt_congested_state'; then
+    if echo "$body" | awk '/^csi_node_nfs_xprt_congested_state\{/{if ($NF >= 1) found=1} END{exit !found}'; then
+      xprt_score=0
+    fi
+  fi
+  if echo "$body" | awk -v th "$XPRT_PENDING_THRESHOLD" '/^csi_node_nfs_xprt_pending_requests\{/{if ($NF > th) found=1} END{exit !found}'; then
+    xprt_score=0
+  fi
+  if echo "$body" | awk '/^csi_node_nfs_xprt_unhealthy\{/{if ($NF >= 1) found=1} END{exit !found}'; then
+    xprt_score=0
+  fi
+fi
+
+jq -n \
+  --argjson c "$csi_pod_score" \
+  --argjson p "$pvc_bound_score" \
+  --argjson m "$mount_score" \
+  --argjson x "$xprt_score" \
+  '{csi_pods: $c, pvc_bound: $p, mounts: $m, nfs_xprt: $x}'
diff --git a/codebundles/vast-k8s-csi-health/sli.robot b/codebundles/vast-k8s-csi-health/sli.robot
new file mode 100644
index 00000000..e75c10b2
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/sli.robot
@@ -0,0 +1,103 @@
+*** Settings ***
+Documentation       Measures VAST CSI health by scoring CSI pod readiness, PVC binding, workload mounts, and NFS transport metrics. Produces a value between 0 (failing) and 1 (healthy).
+Metadata            Author    rw-codebundle-agent
+Metadata            Display Name    VAST Data Kubernetes CSI Health
+Metadata            Supports    Kubernetes    VAST    CSI    NFS    storage
+
+Library             BuiltIn
+Library             RW.Core
+Library             RW.CLI
+Library             RW.platform
+Library             Collections
+
+Suite Setup         Suite Initialization
+
+
+*** Keywords ***
+Suite Initialization
+    ${kubeconfig}=    RW.Core.Import Secret
+    ...    kubeconfig
+    ...    type=string
+    ...    description=Kubernetes kubeconfig for cluster access.
+    ...    pattern=\w*
+    ${CONTEXT}=    RW.Core.Import User Variable    CONTEXT
+    ...    type=string
+    ...    description=Kubernetes context name.
+    ...    pattern=\w*
+    ${NAMESPACE}=    RW.Core.Import User Variable    NAMESPACE
+    ...    type=string
+    ...    description=Kubernetes namespace for workload PVC tracing.
+    ...    pattern=\w*
+    ${CSI_NAMESPACE}=    RW.Core.Import User Variable    CSI_NAMESPACE
+    ...    type=string
+    ...    description=Namespace where the VAST CSI driver is installed.
+    ...    pattern=\w*
+    ...    default=vast-csi
+    ${KUBERNETES_DISTRIBUTION_BINARY}=    RW.Core.Import User Variable    KUBERNETES_DISTRIBUTION_BINARY
+    ...    type=string
+    ...    description=Kubernetes CLI binary (kubectl or oc).
+    ...    enum=[kubectl,oc]
+    ...    default=kubectl
+    ${XPRT_PENDING_THRESHOLD}=    RW.Core.Import User Variable    XPRT_PENDING_THRESHOLD
+    ...    type=string
+    ...    description=csi_node_nfs_xprt_pending_requests count that triggers a failing NFS score.
+    ...    pattern=^\d+$
+    ...    default=100
+    ${RPC_ERROR_RATE_THRESHOLD}=    RW.Core.Import User Variable    RPC_ERROR_RATE_THRESHOLD
+    ...    type=string
+    ...    description=CSI RPC error rate percent threshold (reserved for future SLI expansion).
+    ...    pattern=^\d+$
+    ...    default=5
+
+    Set Suite Variable    ${kubeconfig}    ${kubeconfig}
+    Set Suite Variable    ${CONTEXT}    ${CONTEXT}
+    Set Suite Variable    ${NAMESPACE}    ${NAMESPACE}
+    Set Suite Variable    ${CSI_NAMESPACE}    ${CSI_NAMESPACE}
+    Set Suite Variable    ${KUBERNETES_DISTRIBUTION_BINARY}    ${KUBERNETES_DISTRIBUTION_BINARY}
+    Set Suite Variable    ${XPRT_PENDING_THRESHOLD}    ${XPRT_PENDING_THRESHOLD}
+    Set Suite Variable    ${RPC_ERROR_RATE_THRESHOLD}    ${RPC_ERROR_RATE_THRESHOLD}
+    Set Suite Variable
+    ...    ${env}
+    ...    {"KUBECONFIG":"./${kubeconfig.key}","CONTEXT":"${CONTEXT}","NAMESPACE":"${NAMESPACE}","CSI_NAMESPACE":"${CSI_NAMESPACE}","KUBERNETES_DISTRIBUTION_BINARY":"${KUBERNETES_DISTRIBUTION_BINARY}","XPRT_PENDING_THRESHOLD":"${XPRT_PENDING_THRESHOLD}","RPC_ERROR_RATE_THRESHOLD":"${RPC_ERROR_RATE_THRESHOLD}"}
+
+
+*** Tasks ***
+Score VAST CSI Health Dimensions for Namespace `${NAMESPACE}`
+    [Documentation]    Runs a compact probe returning binary scores for CSI pods, PVC binding, mounts, and NFS xprt health.
+    [Tags]    access:read-only    data:metrics
+
+    ${result}=    RW.CLI.Run Bash File
+    ...    bash_file=sli-vast-csi-health-score.sh
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=120
+    ...    include_in_history=false
+    ...    cmd_override=./sli-vast-csi-health-score.sh
+
+    TRY
+        ${dims}=    Evaluate    json.loads(r'''${result.stdout}''')    json
+        ${csi}=    Get From Dictionary    ${dims}    csi_pods
+        ${pvc}=    Get From Dictionary    ${dims}    pvc_bound
+        ${mount}=    Get From Dictionary    ${dims}    mounts
+        ${xprt}=    Get From Dictionary    ${dims}    nfs_xprt
+        ${csi}=    Convert To Integer    ${csi}
+        ${pvc}=    Convert To Integer    ${pvc}
+        ${mount}=    Convert To Integer    ${mount}
+        ${xprt}=    Convert To Integer    ${xprt}
+    EXCEPT
+        Log    SLI JSON parse failed; reporting zero health.    WARN
+        ${csi}=    Convert To Integer    0
+        ${pvc}=    Convert To Integer    0
+        ${mount}=    Convert To Integer    0
+        ${xprt}=    Convert To Integer    0
+    END
+
+    RW.Core.Push Metric    ${csi}    sub_name=csi_pods
+    RW.Core.Push Metric    ${pvc}    sub_name=pvc_bound
+    RW.Core.Push Metric    ${mount}    sub_name=mounts
+    RW.Core.Push Metric    ${xprt}    sub_name=nfs_xprt
+
+    ${health_score}=    Evaluate    (${csi} + ${pvc} + ${mount} + ${xprt}) / 4.0
+    ${health_score}=    Convert to Number    ${health_score}    2
+    RW.Core.Add to Report    Health Score: ${health_score}
+    RW.Core.Push Metric    ${health_score}
diff --git a/codebundles/vast-k8s-csi-health/trace-pvc-to-vast.sh b/codebundles/vast-k8s-csi-health/trace-pvc-to-vast.sh
new file mode 100755
index 00000000..750c24e7
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/trace-pvc-to-vast.sh
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+# -----------------------------------------------------------------------------
+# REQUIRED ENV VARS: CONTEXT, NAMESPACE
+# Maps PVC -> PV -> StorageClass -> VAST identifiers. Informational (severity 4).
+# Writes JSON array to pvc_trace_issues.json
+# -----------------------------------------------------------------------------
+: "${CONTEXT:?Must set CONTEXT}"
+: "${NAMESPACE:?Must set NAMESPACE}"
+
+OUTPUT_FILE="pvc_trace_issues.json"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=vast-csi-common.sh
+source "${SCRIPT_DIR}/vast-csi-common.sh"
+
+issues_json='[]'
+trace_report=""
+
+print_report() {
+  { set +x; } 2>/dev/null || true
+  echo
+  echo "=== VAST PVC trace for namespace '${NAMESPACE}' (context '${CONTEXT}') ==="
+  echo "${trace_report:-  No VAST-backed PVCs found.}"
+}
+trap print_report EXIT
+
+pvcs_json=$(list_vast_pvcs_json "${NAMESPACE}")
+pvc_count=$(echo "$pvcs_json" | jq '.items | length')
+
+if [[ "$pvc_count" -eq 0 ]]; then
+  # Refine: scan all PVCs and filter by bound PV driver / storage class provisioner
+  all_pvcs=$(k8s get pvc -n "${NAMESPACE}" -o json 2>/dev/null || echo '{"items":[]}')
+  pvcs_json=$(echo "$all_pvcs" | jq -c '{items: []}')
+  while IFS= read -r pvc_line; do
+    [[ -z "$pvc_line" ]] && continue
+    if is_vast_pvc_json "$pvc_line"; then
+      pvcs_json=$(echo "$pvcs_json" | jq -c --argjson item "$pvc_line" '.items += [$item]')
+    fi
+  done < <(echo "$all_pvcs" | jq -c '.items[]?')
+  pvc_count=$(echo "$pvcs_json" | jq '.items | length')
+fi
+
+if [[ "$pvc_count" -eq 0 ]]; then
+  issues_json=$(append_issue "$issues_json" \
+    "No VAST CSI-backed PVCs found in namespace \`${NAMESPACE}\`" \
+    "No PersistentVolumeClaims using csi.vastdata.com (or VAST-named StorageClasses) were discovered." \
+    4 \
+    "Confirm workloads in this namespace use a VAST StorageClass. Adjust generation rules if this namespace should not be monitored.")
+  write_issues "$OUTPUT_FILE" "$issues_json"
+  exit 0
+fi
+
+while IFS= read -r pvc_line; do
+  [[ -z "$pvc_line" ]] && continue
+  pvc_name=$(echo "$pvc_line" | jq -r '.metadata.name')
+  sc_name=$(echo "$pvc_line" | jq -r '.spec.storageClassName // "default"')
+  pv_name=$(echo "$pvc_line" | jq -r '.spec.volumeName // empty')
+  phase=$(echo "$pvc_line" | jq -r '.status.phase // "Unknown"')
+
+  sc_json=$(k8s get storageclass "$sc_name" -o json 2>/dev/null || echo '{}')
+  sc_params=$(echo "$sc_json" | jq -c '.parameters // {}')
+  provisioner=$(echo "$sc_json" | jq -r '.provisioner // "unknown"')
+
+  pv_json='{}'
+  volume_handle=""
+  driver=""
+  view_path=""
+  tenant=""
+  vip=""
+  if [[ -n "$pv_name" ]]; then
+    pv_json=$(k8s get pv "$pv_name" -o json 2>/dev/null || echo '{}')
+    volume_handle=$(echo "$pv_json" | jq -r '.spec.csi.volumeHandle // empty')
+    driver=$(echo "$pv_json" | jq -r '.spec.csi.driver // empty')
+    view_path=$(echo "$pv_json" | jq -r '.spec.csi.volumeAttributes.view_path // .spec.csi.volumeAttributes.root_export // empty')
+    tenant=$(echo "$pv_json" | jq -r '.spec.csi.volumeAttributes.tenant // .spec.csi.volumeAttributes.tenant_name // empty')
+    vip=$(echo "$pv_json" | jq -r '.spec.csi.volumeAttributes.vip // .spec.csi.volumeAttributes.endpoint // empty')
+  fi
+
+  if [[ -z "$view_path" ]]; then
+    view_path=$(echo "$sc_params" | jq -r '.view_policy // .root_export // .view // empty')
+  fi
+  if [[ -z "$tenant" ]]; then
+    tenant=$(echo "$sc_params" | jq -r '.tenant // .tenant_name // empty')
+  fi
+  if [[ -z "$vip" ]]; then
+    vip=$(echo "$sc_params" | jq -r '.endpoint // .vip_pool // .vip // empty')
+  fi
+
+  trace_report+=$'\n'"--- PVC: ${pvc_name} (phase=${phase})"
+  trace_report+=$'\n'"    StorageClass: ${sc_name} (provisioner=${provisioner})"
+  trace_report+=$'\n'"    PV: ${pv_name:-unbound} (driver=${driver:-n/a})"
+  trace_report+=$'\n'"    volumeHandle: ${volume_handle:-n/a}"
+  trace_report+=$'\n'"    VAST view/path: ${view_path:-unknown}"
+  trace_report+=$'\n'"    tenant: ${tenant:-unknown}"
+  trace_report+=$'\n'"    VIP/endpoint: ${vip:-unknown}"
+
+  if [[ "$phase" != "Bound" ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "VAST PVC \`${pvc_name}\` is not Bound in namespace \`${NAMESPACE}\`" \
+      "PVC phase=${phase}. StorageClass=${sc_name}, PV=${pv_name:-none}. Trace: view=${view_path:-?}, tenant=${tenant:-?}, vip=${vip:-?}." \
+      3 \
+      "Inspect PVC events and controller logs in ${CSI_NAMESPACE:-vast-csi}. Verify VMS view policy and quota for tenant ${tenant:-unknown}.")
+  elif [[ -z "$volume_handle" && "$driver" != "csi.vastdata.com" ]]; then
+    issues_json=$(append_issue "$issues_json" \
+      "VAST PVC \`${pvc_name}\` missing CSI volumeHandle metadata" \
+      "Bound PVC ${pvc_name} lacks parseable VAST identifiers in PV ${pv_name}." \
+      4 \
+      "Describe PV ${pv_name} and confirm the VAST CSI driver populated volumeHandle and volumeAttributes.")
+  else
+    issues_json=$(append_issue "$issues_json" \
+      "VAST storage trace for PVC \`${pvc_name}\` in namespace \`${NAMESPACE}\`" \
+      "PVC ${pvc_name} -> PV ${pv_name} -> SC ${sc_name}. view=${view_path:-unknown}, tenant=${tenant:-unknown}, vip=${vip:-unknown}, volumeHandle=${volume_handle:-n/a}." \
+      4 \
+      "Use this mapping when correlating workload symptoms with VMS tenant/view metrics.")
+  fi
+done < <(echo "$pvcs_json" | jq -c '.items[]')
+
+write_issues "$OUTPUT_FILE" "$issues_json"
diff --git a/codebundles/vast-k8s-csi-health/vast-csi-common.sh b/codebundles/vast-k8s-csi-health/vast-csi-common.sh
new file mode 100755
index 00000000..1c823f90
--- /dev/null
+++ b/codebundles/vast-k8s-csi-health/vast-csi-common.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# Shared helpers for VAST CSI health scripts.
+set -euo pipefail
+
+KUBECTL="${KUBERNETES_DISTRIBUTION_BINARY:-kubectl}"
+VAST_CSI_PROVISIONER="${VAST_CSI_PROVISIONER:-csi.vastdata.com}"
+VAST_CSI_PROVISIONER_LEGACY="${VAST_CSI_PROVISIONER_LEGACY:-kubernetes.io/csi/csi.vastdata.com}"
+
+k8s() {
+  "${KUBECTL}" "$@" --context "${CONTEXT}"
+}
+
+is_vast_storage_class() {
+  local sc="$1"
+  [[ -z "$sc" || "$sc" == "null" ]] && return 1
+  local prov
+  prov=$(k8s get storageclass "$sc" -o jsonpath='{.provisioner}' 2>/dev/null || true)
+  [[ "$prov" == "$VAST_CSI_PROVISIONER" || "$prov" == "$VAST_CSI_PROVISIONER_LEGACY" ]] && return 0
+  [[ "$sc" =~ [Vv][Aa][Ss][Tt] ]] && return 0
+  return 1
+}
+
+is_vast_pv() {
+  local pv="$1"
+  [[ -z "$pv" || "$pv" == "null" ]] && return 1
+  local driver
+  driver=$(k8s get pv "$pv" -o jsonpath='{.spec.csi.driver}' 2>/dev/null || true)
+  [[ "$driver" == "$VAST_CSI_PROVISIONER" ]] && return 0
+  return 1
+}
+
+is_vast_pvc_json() {
+  local pvc_json="$1"
+  local sc pv
+  sc=$(echo "$pvc_json" | jq -r '.spec.storageClassName // empty')
+  pv=$(echo "$pvc_json" | jq -r '.spec.volumeName // empty')
+  if is_vast_storage_class "$sc"; then
+    return 0
+  fi
+  if is_vast_pv "$pv"; then
+    return 0
+  fi
+  return 1
+}
+
+list_vast_pvcs_json() {
+  local ns="${1:?namespace required}"
+  k8s get pvc -n "$ns" -o json 2>/dev/null | jq -c --arg ns "$ns" '
+    .items // [] | map(select(
+      (.spec.storageClassName // "" | test("vast"; "i")) or
+      (.metadata.annotations["volume.beta.kubernetes.io/storage-provisioner"]? // "" | test("vast"; "i"))
+    )) | {items: .}
+  ' || echo '{"items":[]}'
+}
+
+find_csi_node_pods() {
+  local ns="${CSI_NAMESPACE:?Must set CSI_NAMESPACE}"
+  k8s get pods -n "$ns" -o json 2>/dev/null | jq -c '
+    {items: [.items[] | select(
+      (.metadata.labels["app.kubernetes.io/component"]? // "" | test("node"; "i")) or
+      (.metadata.labels["app"]? // "" | test("vast.*node|node"; "i")) or
+      (.metadata.name | test("vast.*node|node"; "i"))
+    )]}
+  ' || echo '{"items":[]}'
+}
+
+find_csi_controller_pods() {
+  local ns="${CSI_NAMESPACE:?Must set CSI_NAMESPACE}"
+  k8s get pods -n "$ns" -o json 2>/dev/null | jq -c '
+    {items: [.items[] | select(
+      (.metadata.labels["app.kubernetes.io/component"]? // "" | test("controller"; "i")) or
+      (.metadata.labels["app"]? // "" | test("vast.*controller|controller"; "i")) or
+      (.metadata.name | test("vast.*controller|controller"; "i"))
+    )]}
+  ' || echo '{"items":[]}'
+}
+
+curl_pod_metrics() {
+  local pod="$1"
+  local ns="$2"
+  local port="${3:?port required}"
+  k8s exec -n "$ns" "$pod" -- sh -c "wget -qO- http://127.0.0.1:${port}/metrics 2>/dev/null || curl -sf http://127.0.0.1:${port}/metrics 2>/dev/null" 2>/dev/null || true
+}
+
+curl_service_metrics() {
+  local svc="$1"
+  local ns="$2"
+  local port="$3"
+  k8s run "vast-metrics-probe-$$" -n "$ns" --rm -i --restart=Never \
+    --image=curlimages/curl:8.5.0 --command -- \
+    curl -sf --max-time 15 "http://${svc}.${ns}.svc.cluster.local:${port}/metrics" 2>/dev/null || true
+}
+
+find_metrics_services() {
+  local ns="${CSI_NAMESPACE:?Must set CSI_NAMESPACE}"
+  k8s get svc -n "$ns" -o json 2>/dev/null | jq -c '
+    [.items[] | select(.metadata.name | test("metrics|vast"; "i")) | {
+      name: .metadata.name,
+      ports: [.spec.ports[]? | {name: (.name // ""), port: .port}]
+    }]
+  ' || echo '[]'
+}
+
+append_issue() {
+  local issues_json="$1"
+  local title="$2"
+  local details="$3"
+  local severity="$4"
+  local next_steps="$5"
+  echo "$issues_json" | jq \
+    --arg title "$title" \
+    --arg details "$details" \
+    --argjson severity "$severity" \
+    --arg next_steps "$next_steps" \
+    '. += [{title: $title, details: $details, severity: $severity, next_steps: $next_steps}]'
+}
+
+write_issues() {
+  local file="$1"
+  local issues_json="$2"
+  echo "$issues_json" >"$file"
+}