From fabe182306cb15aba641273741c4609784da654b Mon Sep 17 00:00:00 2001
From: Ahmad Hassan <ahmad.hassan@canonical.com>
Date: Tue, 27 Jan 2026 17:58:12 +0500
Subject: [PATCH 1/5] Add Sunbeam feature functional test suite

---
 .../tests/functional/feature/.gitignore       |  21 ++
 .../tests/functional/feature/README.md        |  64 +++++
 .../tests/functional/feature/__init__.py      |   8 +
 .../tests/functional/feature/conftest.py      |  67 ++++++
 .../functional/feature/features/__init__.py   |   4 +
 .../tests/functional/feature/features/base.py | 218 ++++++++++++++++++
 .../tests/functional/feature/features/caas.py |  49 ++++
 .../tests/functional/feature/features/dns.py  |  54 +++++
 .../feature/features/images_sync.py           |  57 +++++
 .../feature/features/instance_recovery.py     |  56 +++++
 .../feature/features/loadbalancer.py          |  56 +++++
 .../feature/features/observability.py         |  52 +++++
 .../feature/features/orchestration.py         |  56 +++++
 .../feature/features/resource_optimization.py |  51 ++++
 .../feature/features/shared_filesystem.py     |  49 ++++
 .../functional/feature/features/telemetry.py  |  57 +++++
 .../tests/functional/feature/features/tls.py  | 160 +++++++++++++
 .../functional/feature/features/vault.py      |  34 +++
 .../tests/functional/feature/pytest.ini       |  22 ++
 .../tests/functional/feature/requirements.txt |   4 +
 .../feature/test_config.yaml.example          |  13 ++
 .../tests/functional/feature/test_features.py | 126 ++++++++++
 .../functional/feature/utils/__init__.py      |   4 +
 .../tests/functional/feature/utils/juju.py    | 145 ++++++++++++
 .../tests/functional/feature/utils/sunbeam.py |  87 +++++++
 sunbeam-python/tox.ini                        |  25 +-
 26 files changed, 1526 insertions(+), 13 deletions(-)
 create mode 100644 sunbeam-python/tests/functional/feature/.gitignore
 create mode 100644 sunbeam-python/tests/functional/feature/README.md
 create mode 100644 sunbeam-python/tests/functional/feature/__init__.py
 create mode 100644 sunbeam-python/tests/functional/feature/conftest.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/__init__.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/base.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/caas.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/dns.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/images_sync.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/instance_recovery.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/loadbalancer.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/observability.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/orchestration.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/resource_optimization.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/shared_filesystem.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/telemetry.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/tls.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/vault.py
 create mode 100644 sunbeam-python/tests/functional/feature/pytest.ini
 create mode 100644 sunbeam-python/tests/functional/feature/requirements.txt
 create mode 100644 sunbeam-python/tests/functional/feature/test_config.yaml.example
 create mode 100644 sunbeam-python/tests/functional/feature/test_features.py
 create mode 100644 sunbeam-python/tests/functional/feature/utils/__init__.py
 create mode 100644 sunbeam-python/tests/functional/feature/utils/juju.py
 create mode 100644 sunbeam-python/tests/functional/feature/utils/sunbeam.py

diff --git a/sunbeam-python/tests/functional/feature/.gitignore b/sunbeam-python/tests/functional/feature/.gitignore
new file mode 100644
index 000000000..db362e0b5
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/.gitignore
@@ -0,0 +1,21 @@
+test_config.yaml
+features/adminrc
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+
+venv/
+env/
+ENV/
+
+.vscode/
+.idea/
+*.swp
+*.swo
+
+.pytest_cache/
+.coverage
+htmlcov/
+*.log
diff --git a/sunbeam-python/tests/functional/feature/README.md b/sunbeam-python/tests/functional/feature/README.md
new file mode 100644
index 000000000..e6171e0b8
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/README.md
@@ -0,0 +1,64 @@
+# Sunbeam Feature Functional Tests
+
+Functional tests for Sunbeam feature enablement/disablement. These tests
+connect to an **existing Sunbeam deployment** and run the enable/verify/disable
+lifecycle for each feature, logging timing and basic behaviour checks.
+
+The suite is designed to be run via `tox` from the `sunbeam-python` tree.
+
+## Prerequisites
+
+- **Existing Sunbeam deployment** already bootstrapped and reachable
+- `sunbeam` CLI on `PATH` and configured to talk to that deployment
+  - e.g. `sunbeam deployment list` shows your deployment
+- `openstack` CLI configured for that cloud
+  - e.g. `openstack endpoint list` works
+- `juju` CLI installed and able to access the controller/model that backs the
+  Sunbeam deployment
+
+## Configuration
+
+Create a config file from the example:
+
+```bash
+cd sunbeam-python
+cp tests/functional/feature/test_config.yaml.example tests/functional/feature/test_config.yaml
+```
+
+Then edit `tests/functional/feature/test_config.yaml`:
+
+```yaml
+sunbeam:
+  deployment_name: "ps6"        # Name shown by `sunbeam deployment list`
+
+juju:
+  model: "openstack"            # Juju model backing the cloud
+  # controller: "my-controller" # Optional; auto-detected if omitted
+```
+
+### Run the full feature functional suite
+
+```bash
+tox -e functional-feature
+```
+
+### Run a single feature functional test
+
+You can pass standard `pytest` selectors through tox via `posargs`. For example:
+
+- **Instance Recovery**:
+
+  ```bash
+  tox -e functional-feature -- tests/functional/feature/test_features.py::test_instance_recovery
+  ```
+
+- **TLS CA**:
+
+  ```bash
+  tox -e functional-feature -- tests/functional/feature/test_features.py::test_tls_ca
+  ```
+
+## Notes
+
+- Disable failures are **logged and ignored** so that the suite continues
+  to the next feature, matching the behaviour of the original tests.
diff --git a/sunbeam-python/tests/functional/feature/__init__.py b/sunbeam-python/tests/functional/feature/__init__.py
new file mode 100644
index 000000000..f3adc6677
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Sunbeam feature functional test suite.
+
+These tests exercise `sunbeam enable/disable` for individual features
+against an existing Sunbeam deployment.
+"""
diff --git a/sunbeam-python/tests/functional/feature/conftest.py b/sunbeam-python/tests/functional/feature/conftest.py
new file mode 100644
index 000000000..9bde47e33
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/conftest.py
@@ -0,0 +1,67 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Pytest configuration and fixtures for Sunbeam feature functional tests."""
+
+from pathlib import Path
+
+import pytest
+import yaml
+
+from .utils.juju import JujuClient
+from .utils.sunbeam import SunbeamClient
+
+
+def pytest_addoption(parser):
+    """Add custom command-line options."""
+    parser.addoption(
+        "--config",
+        action="store",
+        default="test_config.yaml",
+        help="Path to test configuration file",
+    )
+
+
+@pytest.fixture(scope="session")
+def test_config(request):
+    """Load test configuration from YAML file."""
+    config_path = request.config.getoption("--config")
+    # Resolve relative to this feature functional directory
+    config_file = Path(__file__).parent / config_path
+
+    if not config_file.exists():
+        pytest.skip(f"Configuration file not found: {config_file}")
+
+    with open(config_file, "r") as f:
+        config = yaml.safe_load(f)
+
+    return config
+
+
+@pytest.fixture(scope="session")
+def sunbeam_client(test_config):
+    """Create Sunbeam client for test session."""
+    deployment_name = test_config.get("sunbeam", {}).get("deployment_name")
+    if not deployment_name:
+        pytest.skip("deployment_name not configured in test_config.yaml")
+
+    client = SunbeamClient(deployment_name)
+
+    if not client.is_connected():
+        pytest.skip(f"Cannot connect to Sunbeam deployment '{deployment_name}'.")
+
+    return client
+
+
+@pytest.fixture(scope="session")
+def juju_client(test_config):
+    """Create Juju client for test session."""
+    model = test_config.get("juju", {}).get("model", "openstack")
+    controller = test_config.get("juju", {}).get("controller")
+
+    client = JujuClient(model=model, controller=controller)
+
+    if not client.is_connected():
+        pytest.skip(f"Cannot connect to Juju model '{model}'.")
+
+    return client
diff --git a/sunbeam-python/tests/functional/feature/features/__init__.py b/sunbeam-python/tests/functional/feature/features/__init__.py
new file mode 100644
index 000000000..0f78a8aa4
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/__init__.py
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Feature test classes for Sunbeam feature functional tests."""
diff --git a/sunbeam-python/tests/functional/feature/features/base.py b/sunbeam-python/tests/functional/feature/features/base.py
new file mode 100644
index 000000000..dcdc7c4b3
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/base.py
@@ -0,0 +1,218 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Base class for Sunbeam feature functional tests."""
+
+import logging
+import os
+import time
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from ..utils.juju import JujuClient
+from ..utils.sunbeam import SunbeamClient
+
+logger = logging.getLogger(__name__)
+
+
+class BaseFeatureTest:
+    """Base class for testing Sunbeam features."""
+
+    feature_name: str = ""
+    expected_units: List[str] = []
+    expected_applications: List[str] = []
+    timeout_seconds: int = 300
+    enable_args: List[str] = []
+    disable_args: List[str] = []
+
+    def __init__(
+        self,
+        sunbeam_client: SunbeamClient,
+        juju_client: JujuClient,
+        config: Optional[Dict] = None,
+    ):
+        self.sunbeam = sunbeam_client
+        self.juju = juju_client
+        self.config = config or {}
+
+        feature_config = self.config.get("features", {}).get(self.feature_name, {})
+        self.expected_units = feature_config.get("expected_units", self.expected_units)
+        self.expected_applications = feature_config.get(
+            "expected_applications",
+            self.expected_applications,
+        )
+        self.timeout_seconds = feature_config.get(
+            "timeout_seconds",
+            self.timeout_seconds,
+        )
+        self.enable_args = feature_config.get("enable_args", self.enable_args)
+        self.disable_args = feature_config.get("disable_args", self.disable_args)
+
+        self._ensure_openstack_env()
+
+    def enable(self) -> bool:
+        """Enable the feature."""
+        logger.info("Enabling feature: '%s'", self.feature_name)
+        return self.sunbeam.enable_feature(
+            self.feature_name,
+            extra_args=self.enable_args,
+        )
+
+    def disable(self) -> bool:
+        """Disable the feature.
+
+        Returns True if successful, False otherwise.
+        """
+        logger.info("Disabling feature: '%s'", self.feature_name)
+        try:
+            return self.sunbeam.disable_feature(
+                self.feature_name,
+                extra_args=self.disable_args,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning(
+                "Failed to disable feature '%s': %s",
+                self.feature_name,
+                exc,
+            )
+            return False
+
+    def run_full_lifecycle(self) -> bool:
+        """Run enable/disable lifecycle with timing.
+
+        Disable failures are logged but do not fail the overall test.
+        """
+        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
+
+        enable_start = time.time()
+        logger.info("[ENABLE] Starting enable for '%s'...", self.feature_name)
+        enable_success = self.enable()
+        enable_duration = time.time() - enable_start
+        if enable_success:
+            logger.info(
+                "[ENABLE] SUCCESS for '%s' - Time taken: %.2f seconds",
+                self.feature_name,
+                enable_duration,
+            )
+        else:
+            logger.error(
+                "[ENABLE] FAILED for '%s' - Time taken: %.2f seconds",
+                self.feature_name,
+                enable_duration,
+            )
+            return False
+
+        try:
+            self.verify_validate_feature_behavior()
+        except Exception:  # noqa: BLE001
+            logger.exception(
+                "Validation failed for feature '%s' after enable", self.feature_name
+            )
+            # Best-effort cleanup – if disable also fails, log and continue.
+            try:
+                self.disable()
+            except Exception:  # noqa: BLE001
+                logger.warning(
+                    "Disable also failed while handling validation error for '%s'",
+                    self.feature_name,
+                )
+            return False
+
+        disable_start = time.time()
+        logger.info("[DISABLE] Starting disable for '%s'...", self.feature_name)
+        disable_success = self.disable()
+        disable_duration = time.time() - disable_start
+        if disable_success:
+            logger.info(
+                "[DISABLE] SUCCESS for '%s' - Time taken: %.2f seconds",
+                self.feature_name,
+                disable_duration,
+            )
+        else:
+            logger.warning(
+                "[DISABLE] FAILED for '%s' - Time taken: %.2f seconds (continuing anyway)",
+                self.feature_name,
+                disable_duration,
+            )
+
+        total_duration = time.time() - enable_start
+        logger.info(
+            "[SUMMARY] Feature '%s' - Enable: %.2fs, Disable: %.2fs (%s), Total: %.2fs",
+            self.feature_name,
+            enable_duration,
+            disable_duration,
+            "SUCCESS" if disable_success else "FAILED",
+            total_duration,
+        )
+        return True
+
+    def verify_enabled(self) -> None:
+        """Verify that expected applications and units are present.
+
+        This is a boilerplate method for future use. Currently not called
+        by default, but can be overridden in subclasses to add verification.
+        """
+        pass
+
+    def validate_feature_behavior(self) -> None:
+        """Validate that the feature is working correctly.
+
+        This is a boilerplate method for future use. Currently not called
+        by default, but can be overridden in subclasses to add functionality tests.
+        """
+        pass
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Simple verification that feature is enabled and basic check passes.
+
+        This is a simple method that can be called after enable to verify
+        the feature is working. Override in subclasses for feature-specific checks.
+        """
+        logger.info("Verifying feature '%s' is enabled...", self.feature_name)
+        if self.expected_applications:
+            for app in self.expected_applications:
+                if self.juju.has_application(app):
+                    logger.info("Application '%s' found", app)
+                else:
+                    logger.warning(
+                        "Application '%s' not found (may still be deploying)", app
+                    )
+        logger.info("Basic verification completed for feature '%s'", self.feature_name)
+
+    def _ensure_openstack_env(self) -> None:
+        """Load OpenStack credentials from adminrc if needed.
+
+        This avoids repeating sourcing logic across tests and keeps credentials
+        out of the code. If OS_AUTH_URL is already set, this is a no-op.
+        """
+        if os.environ.get("OS_AUTH_URL"):
+            return
+
+        adminrc_path = Path(__file__).resolve().parent / "adminrc"
+        if not adminrc_path.exists():
+            logger.debug(
+                "adminrc file not found at %s; relying on existing environment",
+                adminrc_path,
+            )
+            return
+
+        try:
+            for line in adminrc_path.read_text().splitlines():
+                line = line.strip()
+                if not line or line.startswith("#"):
+                    continue
+                if not line.startswith("export "):
+                    continue
+                _, rest = line.split("export ", 1)
+                if "=" not in rest:
+                    continue
+                key, value = rest.split("=", 1)
+                key = key.strip()
+                value = value.strip().strip('"').strip("'")
+                os.environ.setdefault(key, value)
+            logger.info("Loaded OpenStack credentials from %s", adminrc_path)
+        except Exception:  # noqa: BLE001
+            logger.exception(
+                "Failed to load OpenStack credentials from %s",
+                adminrc_path,
+            )
diff --git a/sunbeam-python/tests/functional/feature/features/caas.py b/sunbeam-python/tests/functional/feature/features/caas.py
new file mode 100644
index 000000000..51de55971
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/caas.py
@@ -0,0 +1,49 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for caas feature.
+
+Container as a Service (Magnum) allows managing Kubernetes clusters via OpenStack.
+Functionality is validated via the Magnum (COE) API.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class CaaSTest(BaseFeatureTest):
+    """Test caas feature enablement/disablement."""
+
+    feature_name = "caas"
+    expected_units: list[str] = []
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the Magnum (COE) API is reachable.
+
+        We call `openstack coe cluster list` to confirm the API is up.
+        """
+        logger.info("Verifying CaaS (Magnum) service is available...")
+        try:
+            subprocess.run(
+                ["openstack", "coe", "cluster", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except subprocess.CalledProcessError as exc:
+            logger.warning("Failed to list COE clusters: %s", exc.stderr)
+            raise AssertionError(
+                f"CaaS (Magnum) service not accessible: {exc.stderr}"
+            ) from exc
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying CaaS service: %s", exc)
+            raise AssertionError(f"CaaS service verification failed: {exc}") from exc
+
+        logger.info("CaaS (Magnum) service verified via `openstack coe cluster list`")
diff --git a/sunbeam-python/tests/functional/feature/features/dns.py b/sunbeam-python/tests/functional/feature/features/dns.py
new file mode 100644
index 000000000..597ab79f1
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/dns.py
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for dns feature.
+
+DNS requires nameservers as arguments, so we use dummy nameservers for testing.
+DNS is a simple feature with no direct feature dependencies (besides the required
+nameservers argument). Functionality is validated via the Designate (DNS) API.
+"""
+
+import logging
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class DnsTest(BaseFeatureTest):
+    """Test dns feature enablement/disablement."""
+
+    feature_name = "dns"
+    # DNS requires nameservers argument - using dummy values for testing
+    enable_args: list[str] = ["ns1.example.com.", "ns2.example.com."]
+    expected_units: list[str] = []
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that DNS as a Service is reachable.
+
+        We call `sunbeam dns address` to confirm that the
+        Designate service is registered and accessible.
+        """
+        logger.info("Verifying DNS service endpoints are available...")
+        try:
+            self.sunbeam.run(["dns", "address"])
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying DNS service: %s", exc)
+            raise AssertionError(f"DNS service verification failed: {exc}") from exc
+
+        logger.info("DNS service endpoints verified via `sunbeam dns address`")
+
+    def run_full_lifecycle(self) -> bool:
+        """Enable dns, perform basic test, then disable it."""
+        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
+
+        self.enable()
+        self.verify_validate_feature_behavior()
+
+        disable_success = self.disable()
+        if not disable_success:
+            logger.warning("DNS disable failed, but continuing test sequence")
+
+        return True
diff --git a/sunbeam-python/tests/functional/feature/features/images_sync.py b/sunbeam-python/tests/functional/feature/features/images_sync.py
new file mode 100644
index 000000000..d4480ee7a
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/images_sync.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for images-sync feature.
+
+Images-sync is a simple feature with no dependencies.
+Functionality is validated via the OpenStack Image API.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class ImagesSyncTest(BaseFeatureTest):
+    """Test images-sync feature enablement/disablement."""
+
+    feature_name = "images-sync"
+    expected_units: list[str] = []
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the Image service is reachable.
+
+        We call `openstack image list` to confirm that Glance is responding.
+        """
+        logger.info("Verifying Image service (Glance) is available...")
+        try:
+            subprocess.run(
+                ["openstack", "image", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying Image service: %s", exc)
+            raise AssertionError(f"Image service verification failed: {exc}") from exc
+
+        logger.info("Image service verified via `openstack image list`")
+
+    def run_full_lifecycle(self) -> bool:
+        """Enable images-sync, perform basic test, then disable it."""
+        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
+
+        self.enable()
+        self.verify_validate_feature_behavior()
+
+        disable_success = self.disable()
+        if not disable_success:
+            logger.warning("Images-sync disable failed, but continuing test sequence")
+
+        return True
diff --git a/sunbeam-python/tests/functional/feature/features/instance_recovery.py b/sunbeam-python/tests/functional/feature/features/instance_recovery.py
new file mode 100644
index 000000000..6110c2614
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/instance_recovery.py
@@ -0,0 +1,56 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for instance-recovery feature."""
+
+import subprocess
+
+from .base import BaseFeatureTest
+
+
+class InstanceRecoveryTest(BaseFeatureTest):
+    """Test instance-recovery feature enablement/disablement."""
+
+    # CLI feature name
+    feature_name = "instance-recovery"
+    expected_applications = [
+        "masakari",
+        "masakari-mysql-router",
+        "consul-management",
+        "consul-storage",
+        "consul-tenant",
+    ]
+    expected_units = [
+        "masakari/0",
+        "masakari-mysql-router/0",
+        "consul-management/0",
+        "consul-storage/0",
+        "consul-tenant/0",
+    ]
+    timeout_seconds = 900
+
+    def validate_feature_behavior(self) -> None:
+        """Run a small smoke test against the Masakari API.
+
+        We call `openstack segment list` to confirm Masakari is responding
+        and that the CLI can talk to the Instance Recovery control plane.
+        """
+        cmd = [
+            "openstack",
+            "segment",
+            "list",
+            "-c",
+            "name",
+            "-c",
+            "service_type",
+        ]
+        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+        if not result.stdout.strip():
+            raise AssertionError("openstack segment list returned no data")
+
+    def run_full_lifecycle(self) -> bool:
+        """Enable instance-recovery, verify resources and behavior, then disable it."""
+        self.enable()
+        self.validate_feature_behavior()
+        self.disable()
+        return True
diff --git a/sunbeam-python/tests/functional/feature/features/loadbalancer.py b/sunbeam-python/tests/functional/feature/features/loadbalancer.py
new file mode 100644
index 000000000..a7e377f35
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/loadbalancer.py
@@ -0,0 +1,56 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for loadbalancer feature.
+
+Loadbalancer is a simple feature with no dependencies.
+Deploys Octavia, the OpenStack Load Balancer as a Service.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class LoadbalancerTest(BaseFeatureTest):
+    """Test loadbalancer feature enablement/disablement."""
+
+    feature_name = "loadbalancer"
+    expected_applications: list[str] = ["octavia"]
+    expected_units: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that loadbalancer service (Octavia) is working."""
+        logger.info("Verifying loadbalancer service (Octavia) is available...")
+
+        try:
+            result = subprocess.run(
+                ["openstack", "loadbalancer", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+            logger.info("Loadbalancer service (Octavia) is accessible")
+            logger.debug("Loadbalancer list output: %s", result.stdout[:200])
+
+        except Exception as e:
+            logger.warning("Error checking loadbalancer service: %s", e)
+            raise AssertionError(f"Loadbalancer service verification failed: {e}")
+
+    def run_full_lifecycle(self) -> bool:
+        """Enable loadbalancer, perform basic test, then disable it."""
+        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
+
+        self.enable()
+        self.verify_validate_feature_behavior()
+
+        disable_success = self.disable()
+        if not disable_success:
+            logger.warning("Loadbalancer disable failed, but continuing test sequence")
+
+        return True
diff --git a/sunbeam-python/tests/functional/feature/features/observability.py b/sunbeam-python/tests/functional/feature/features/observability.py
new file mode 100644
index 000000000..9bef407d0
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/observability.py
@@ -0,0 +1,52 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for observability feature.
+
+Observability integrates Canonical OpenStack with COS.
+
+For this functional test we exercise the simple embedded workflow from the
+documentation:
+
+1. `sunbeam enable observability embedded`
+2. `sunbeam observability dashboard-url`
+3. `sunbeam disable observability embedded`
+"""
+
+import logging
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class ObservabilityTest(BaseFeatureTest):
+    """Test observability feature enablement/disablement."""
+
+    feature_name = "observability"
+    enable_args: list[str] = ["embedded"]
+    disable_args: list[str] = ["embedded"]
+    expected_units: list[str] = []
+    expected_applications: list[str] = []
+    timeout_seconds = 900
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the observability dashboard URL is available.
+
+        This uses `sunbeam observability dashboard-url` from the docs to
+        confirm that the embedded COS deployment is responding.
+        """
+        logger.info("Fetching observability dashboard URL...")
+        try:
+            result = self.sunbeam.run(["observability", "dashboard-url"])
+        except Exception as exc:  # noqa: BLE001
+            logger.warning(
+                "Error while retrieving observability dashboard URL: %s",
+                exc,
+            )
+            raise AssertionError(
+                f"Observability feature verification failed: {exc}"
+            ) from exc
+
+        url = result.stdout.strip()
+        logger.info("Observability dashboard URL: %s", url)
diff --git a/sunbeam-python/tests/functional/feature/features/orchestration.py b/sunbeam-python/tests/functional/feature/features/orchestration.py
new file mode 100644
index 000000000..0f9a502e6
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/orchestration.py
@@ -0,0 +1,56 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for orchestration feature.
+
+Orchestration is a simple feature with no dependencies.
+Deploys Heat, the OpenStack Orchestration service.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class OrchestrationTest(BaseFeatureTest):
+    """Test orchestration feature enablement/disablement."""
+
+    feature_name = "orchestration"
+    expected_applications: list[str] = ["heat"]
+    expected_units: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that orchestration service (Heat) is working."""
+        logger.info("Verifying orchestration service (Heat) is available...")
+
+        try:
+            result = subprocess.run(
+                ["openstack", "stack", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+            logger.info("Orchestration service (Heat) is accessible")
+            logger.debug("Stack list output: %s", result.stdout[:200])
+
+        except Exception as e:
+            logger.warning("Error checking orchestration service: %s", e)
+            raise AssertionError(f"Orchestration service verification failed: {e}")
+
+    def run_full_lifecycle(self) -> bool:
+        """Enable orchestration, perform basic test, then disable it."""
+        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
+
+        self.enable()
+        self.verify_validate_feature_behavior()
+
+        disable_success = self.disable()
+        if not disable_success:
+            logger.warning("Orchestration disable failed, but continuing test sequence")
+
+        return True
diff --git a/sunbeam-python/tests/functional/feature/features/resource_optimization.py b/sunbeam-python/tests/functional/feature/features/resource_optimization.py
new file mode 100644
index 000000000..c6828d22b
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/resource_optimization.py
@@ -0,0 +1,51 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for resource-optimization feature.
+
+Resource Optimization provides Watcher as a service.
+Functionality is validated via the Watcher (optimize) API.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class ResourceOptimizationTest(BaseFeatureTest):
+    """Test resource-optimization feature enablement/disablement."""
+
+    feature_name = "resource-optimization"
+    expected_units: list[str] = []
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the Watcher (resource optimization) API is reachable.
+
+        We call `openstack optimize goal list` to confirm the API is up.
+        """
+        logger.info("Verifying Resource Optimization (Watcher) service is available...")
+        try:
+            subprocess.run(
+                ["openstack", "optimize", "goal", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning(
+                "Error while verifying Resource Optimization service: %s",
+                exc,
+            )
+            raise AssertionError(
+                f"Resource Optimization service verification failed: {exc}"
+            ) from exc
+
+        logger.info(
+            "Resource Optimization service verified via `openstack optimize goal list`"
+        )
diff --git a/sunbeam-python/tests/functional/feature/features/shared_filesystem.py b/sunbeam-python/tests/functional/feature/features/shared_filesystem.py
new file mode 100644
index 000000000..66117213b
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/shared_filesystem.py
@@ -0,0 +1,49 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for shared-filesystem feature.
+
+Shared Filesystems provides Manila-based file share services.
+Functionality is validated via the Manila API.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class SharedFilesystemTest(BaseFeatureTest):
+    """Test shared-filesystem feature enablement/disablement."""
+
+    feature_name = "shared-filesystem"
+    expected_units: list[str] = []
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the Shared Filesystems (Manila) API is reachable.
+
+        We call `openstack share list` to confirm the API is up.
+        """
+        logger.info("Verifying Shared Filesystems (Manila) service is available...")
+        try:
+            subprocess.run(
+                ["openstack", "share", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning(
+                "Error while verifying Shared Filesystems service: %s",
+                exc,
+            )
+            raise AssertionError(
+                f"Shared Filesystems service verification failed: {exc}"
+            ) from exc
+
+        logger.info("Shared Filesystems service verified via `openstack share list`")
diff --git a/sunbeam-python/tests/functional/feature/features/telemetry.py b/sunbeam-python/tests/functional/feature/features/telemetry.py
new file mode 100644
index 000000000..6fbd79b81
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/telemetry.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for telemetry feature.
+
+Telemetry is a simple feature with no dependencies.
+Deploys Ceilometer, Aodh, Gnocchi, and OpenStack Exporter.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class TelemetryTest(BaseFeatureTest):
+    """Test telemetry feature enablement/disablement."""
+
+    feature_name = "telemetry"
+    expected_applications: list[str] = ["ceilometer", "gnocchi", "aodh"]
+    expected_units: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that telemetry services are working."""
+        logger.info("Verifying telemetry services are available...")
+
+        # Check if alarm service (Aodh) is accessible
+        try:
+            result = subprocess.run(
+                ["openstack", "alarm", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+            logger.info("Telemetry alarm service (Aodh) is accessible")
+            logger.debug("Alarm list output: %s", result.stdout[:200])
+
+        except Exception as e:
+            logger.warning("Error checking telemetry services: %s", e)
+            raise AssertionError(f"Telemetry service verification failed: {e}")
+
+    def run_full_lifecycle(self) -> bool:
+        """Enable telemetry, perform basic test, then disable it."""
+        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
+
+        self.enable()
+        self.verify_validate_feature_behavior()
+
+        disable_success = self.disable()
+        if not disable_success:
+            logger.warning("Telemetry disable failed, but continuing test sequence")
+
+        return True
diff --git a/sunbeam-python/tests/functional/feature/features/tls.py b/sunbeam-python/tests/functional/feature/features/tls.py
new file mode 100644
index 000000000..17a856292
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/tls.py
@@ -0,0 +1,160 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Tests for tls feature (CA mode).
+
+TLS enablement has multiple methods in Sunbeam, but this functional test
+suite only exercises the TLS CA path:
+
+- TLS CA: `sunbeam enable tls ca` (requires CA certificates)
+"""
+
+import base64
+import logging
+import subprocess
+import tempfile
+import time
+from pathlib import Path
+from typing import Tuple
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+def generate_self_signed_ca_certificate() -> Tuple[str, str]:
+    """Generate a self-signed CA certificate.
+
+    Returns a tuple of (ca_cert_base64, ca_chain_base64). For a simple self-signed CA,
+    the chain is the same as the cert. TLS CA currently only uses the CA certificate.
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp_path = Path(tmpdir)
+
+        key_path = tmp_path / "ca.key"
+        subprocess.run(
+            ["openssl", "genrsa", "-out", str(key_path), "4096"],
+            check=True,
+            capture_output=True,
+        )
+
+        cert_path = tmp_path / "ca.crt"
+        subprocess.run(
+            [
+                "openssl",
+                "req",
+                "-new",
+                "-x509",
+                "-days",
+                "365",
+                "-key",
+                str(key_path),
+                "-out",
+                str(cert_path),
+                "-subj",
+                "/C=US/ST=State/L=City/O=TestOrg/CN=TestCA",
+                "-extensions",
+                "v3_ca",
+                "-config",
+                "/dev/stdin",
+            ],
+            input=b"""[req]
+distinguished_name = req_distinguished_name
+[req_distinguished_name]
+[v3_ca]
+basicConstraints = critical,CA:TRUE
+keyUsage = critical,keyCertSign,cRLSign
+subjectKeyIdentifier = hash
+authorityKeyIdentifier = keyid:always,issuer
+""",
+            check=True,
+            capture_output=True,
+        )
+
+        ca_cert = cert_path.read_text()
+        ca_cert_base64 = base64.b64encode(ca_cert.encode()).decode()
+
+        ca_chain_base64 = ca_cert_base64
+
+        return (ca_cert_base64, ca_chain_base64)
+
+
+class TlsCaTest(BaseFeatureTest):
+    """Test TLS CA mode enablement/disablement.
+
+    TLS CA mode uses Certificate Authority certificates for TLS.
+    This test verifies that:
+    - TLS CA can be enabled (with self-signed CA certificates)
+    - Endpoints are exposed over HTTPS (both public and internal)
+    - Basic OpenStack operations work (e.g., listing images)
+    """
+
+    feature_name = "tls"
+    enable_args: list[str] = []
+    disable_args: list[str] = ["ca"]
+    expected_applications = [
+        "manual-tls-certificates",
+    ]
+    expected_units = [
+        "manual-tls-certificates/0",
+    ]
+    timeout_seconds = 600
+
+    def __init__(self, *args, **kwargs):
+        """Initialize and generate CA certificates."""
+        super().__init__(*args, **kwargs)
+        self.ca_cert_base64, _ = generate_self_signed_ca_certificate()
+        self.enable_args = [
+            "ca",
+            "--ca",
+            self.ca_cert_base64,
+        ]
+
+    def enable(self) -> bool:
+        """Enable TLS CA feature (without --accept-defaults flag)."""
+        logger.info("Enabling feature: '%s'", self.feature_name)
+        return self.sunbeam.enable_feature(
+            self.feature_name,
+            extra_args=self.enable_args,
+        )
+
+    def disable(self) -> bool:
+        """Disable TLS CA feature (without --accept-defaults flag)."""
+        logger.info("Disabling feature: '%s'", self.feature_name)
+        try:
+            return self.sunbeam.disable_feature(
+                self.feature_name,
+                extra_args=self.disable_args,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning(
+                "Failed to disable feature '%s': %s",
+                self.feature_name,
+                exc,
+            )
+            return False
+
+    def _ensure_tls_ca_disabled(self) -> bool:
+        """Ensure TLS CA is disabled before enabling (cleanup from previous runs)."""
+        if self.juju.has_application("manual-tls-certificates"):
+            logger.info("TLS CA is already enabled, disabling first...")
+            try:
+                self.disable()
+                # Wait a bit for cleanup
+                time.sleep(5)
+            except Exception as exc:  # noqa: BLE001
+                logger.warning("Failed to disable existing TLS CA: %s", exc)
+                return False
+        return True
+
+    def run_full_lifecycle(self) -> bool:
+        """Enable TLS CA, perform basic test, then disable it."""
+        if not self._ensure_tls_ca_disabled():
+            logger.warning("Could not ensure TLS CA is disabled, continuing anyway...")
+
+        self.enable()
+        disable_success = self.disable()
+        if not disable_success:
+            logger.warning("TLS CA disable failed, but continuing test sequence")
+
+        return True
diff --git a/sunbeam-python/tests/functional/feature/features/vault.py b/sunbeam-python/tests/functional/feature/features/vault.py
new file mode 100644
index 000000000..78bd44290
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/vault.py
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for vault feature.
+
+Vault provides the HashiCorp Vault service used by other features.
+Functionality is validated via the `sunbeam vault status` command.
+"""
+
+import logging
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class VaultTest(BaseFeatureTest):
+    """Test vault feature enablement/disablement."""
+
+    feature_name = "vault"
+    expected_units: list[str] = []
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that Vault is reachable via sunbeam."""
+        logger.info("Verifying Vault status via `sunbeam vault status`...")
+        try:
+            self.sunbeam.run(["vault", "status"])
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying Vault service: %s", exc)
+            raise AssertionError(f"Vault service verification failed: {exc}") from exc
+
+        logger.info("Vault service verified via `sunbeam vault status`")
diff --git a/sunbeam-python/tests/functional/feature/pytest.ini b/sunbeam-python/tests/functional/feature/pytest.ini
new file mode 100644
index 000000000..acba1481b
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/pytest.ini
@@ -0,0 +1,22 @@
+[pytest]
+# Pytest configuration for feature functional tests
+
+# Test discovery patterns
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+
+markers =
+    functional: marks tests as functional (deselect with '-m "not functional"')
+    slow: marks tests as slow (deselect with '-m "not slow"')
+
+addopts =
+    -v
+    --tb=short
+    --strict-markers
+
+timeout = 1800
+
+log_cli = true
+log_cli_level = INFO
+
diff --git a/sunbeam-python/tests/functional/feature/requirements.txt b/sunbeam-python/tests/functional/feature/requirements.txt
new file mode 100644
index 000000000..7ef618ac2
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/requirements.txt
@@ -0,0 +1,4 @@
+pytest>=7.4.0
+pytest-timeout>=2.1.0
+pyyaml>=6.0
+jubilant>=1.0.0
diff --git a/sunbeam-python/tests/functional/feature/test_config.yaml.example b/sunbeam-python/tests/functional/feature/test_config.yaml.example
new file mode 100644
index 000000000..2507a8535
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/test_config.yaml.example
@@ -0,0 +1,13 @@
+# Sunbeam Feature Functional Test Configuration
+# Copy this file to test_config.yaml and fill in your values
+
+sunbeam:
+  # Deployment name in Sunbeam (from `sunbeam deployment list`)
+  deployment_name: "ps6"
+
+juju:
+  # Juju model name (default: "openstack")
+  model: "openstack"
+  # Juju controller (auto-detected from sunbeam if not specified)
+  # controller: "your-controller"
+
diff --git a/sunbeam-python/tests/functional/feature/test_features.py b/sunbeam-python/tests/functional/feature/test_features.py
new file mode 100644
index 000000000..11b748907
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/test_features.py
@@ -0,0 +1,126 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Functional tests for Sunbeam features.
+
+These tests connect to an existing Sunbeam cluster and test feature
+enablement/disablement lifecycle.
+"""
+
+import logging
+
+import pytest
+
+from .features.caas import CaaSTest
+from .features.dns import DnsTest
+from .features.images_sync import ImagesSyncTest
+from .features.instance_recovery import InstanceRecoveryTest
+from .features.loadbalancer import LoadbalancerTest
+from .features.observability import ObservabilityTest
+from .features.orchestration import OrchestrationTest
+from .features.resource_optimization import ResourceOptimizationTest
+from .features.shared_filesystem import SharedFilesystemTest
+from .features.telemetry import TelemetryTest
+from .features.tls import TlsCaTest
+from .features.vault import VaultTest
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+@pytest.mark.functional
+def test_instance_recovery(sunbeam_client, juju_client, test_config):
+    """Test instance-recovery feature lifecycle (enable/disable with verification)."""
+    feature_test = InstanceRecoveryTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Instance recovery feature test failed"
+
+
+@pytest.mark.functional
+def test_caas(sunbeam_client, juju_client, test_config):
+    """Test caas feature lifecycle (enable/disable only)."""
+    feature_test = CaaSTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "CaaS feature test failed"
+
+
+@pytest.mark.functional
+def test_dns(sunbeam_client, juju_client, test_config):
+    """Test dns feature lifecycle (enable/disable only)."""
+    feature_test = DnsTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "DNS feature test failed"
+
+
+@pytest.mark.functional
+def test_images_sync(sunbeam_client, juju_client, test_config):
+    """Test images-sync feature lifecycle (enable/disable only)."""
+    feature_test = ImagesSyncTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Images-sync feature test failed"
+
+
+@pytest.mark.functional
+def test_loadbalancer(sunbeam_client, juju_client, test_config):
+    """Test loadbalancer feature lifecycle (enable/disable only)."""
+    feature_test = LoadbalancerTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Loadbalancer feature test failed"
+
+
+@pytest.mark.functional
+def test_orchestration(sunbeam_client, juju_client, test_config):
+    """Test orchestration feature lifecycle (enable/disable only)."""
+    feature_test = OrchestrationTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Orchestration feature test failed"
+
+
+@pytest.mark.functional
+def test_resource_optimization(sunbeam_client, juju_client, test_config):
+    """Test resource-optimization feature lifecycle (enable/disable only)."""
+    feature_test = ResourceOptimizationTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), (
+        "Resource-optimization feature test failed"
+    )
+
+
+@pytest.mark.functional
+def test_shared_filesystem(sunbeam_client, juju_client, test_config):
+    """Test shared-filesystem feature lifecycle (enable/disable only)."""
+    feature_test = SharedFilesystemTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Shared-filesystem feature test failed"
+
+
+@pytest.mark.functional
+def test_telemetry(sunbeam_client, juju_client, test_config):
+    """Test telemetry feature lifecycle (enable/disable only)."""
+    feature_test = TelemetryTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Telemetry feature test failed"
+
+
+@pytest.mark.functional
+def test_observability(sunbeam_client, juju_client, test_config):
+    """Test observability feature lifecycle (enable/disable only)."""
+    feature_test = ObservabilityTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Observability feature test failed"
+
+
+@pytest.mark.functional
+def test_maintenance(sunbeam_client, juju_client, test_config):
+    """Placeholder for maintenance feature test (not yet enabled)."""
+    pytest.skip("maintenance feature test not yet enabled in CI")
+
+
+@pytest.mark.functional
+def test_pro(sunbeam_client, juju_client, test_config):
+    """Placeholder for pro feature test (not yet enabled)."""
+    pytest.skip("pro feature test not yet enabled in CI")
+
+
+@pytest.mark.functional
+def test_tls_ca(sunbeam_client, juju_client, test_config):
+    """Test TLS CA mode lifecycle (enable/disable with verification)."""
+    feature_test = TlsCaTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "TLS CA feature test failed"
+
+
+@pytest.mark.functional
+def test_vault(sunbeam_client, juju_client, test_config):
+    """Test vault feature lifecycle (enable/disable only)."""
+    feature_test = VaultTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Vault feature test failed"
diff --git a/sunbeam-python/tests/functional/feature/utils/__init__.py b/sunbeam-python/tests/functional/feature/utils/__init__.py
new file mode 100644
index 000000000..42e1068cd
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/utils/__init__.py
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Utility wrappers for Sunbeam feature functional tests."""
diff --git a/sunbeam-python/tests/functional/feature/utils/juju.py b/sunbeam-python/tests/functional/feature/utils/juju.py
new file mode 100644
index 000000000..384711460
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/utils/juju.py
@@ -0,0 +1,145 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Juju CLI wrapper using Jubilant library for feature functional tests."""
+
+import json
+import logging
+from typing import Dict, List, Optional, Set
+
+from jubilant import Juju
+
+logger = logging.getLogger(__name__)
+
+
+class JujuClient:
+    """Client for interacting with Juju using Jubilant."""
+
+    def __init__(self, model: str = "openstack", controller: Optional[str] = None):
+        self.model = model
+        self.controller = controller
+        self._juju: Optional[Juju] = None
+
+    @property
+    def juju(self) -> Juju:
+        """Get or create Jubilant Juju instance."""
+        if self._juju is None:
+            self._juju = Juju()
+            if self.model:
+                try:
+                    self._juju.cli("switch", self.model)
+                except Exception as exc:  # noqa: BLE001
+                    # Log but continue - the model might already be active
+                    logger.debug("Could not switch to model %s: %s", self.model, exc)
+        return self._juju
+
+    def is_connected(self) -> bool:
+        """Check if we can connect to Juju."""
+        result = self.juju.cli("status", "--format", "json")
+        return bool(result)
+
+    def get_applications(self) -> Set[str]:
+        """Get list of all applications in the model."""
+        result_str = self.juju.cli("status", "--format", "json")
+        status = json.loads(result_str)
+        applications: Set[str] = set()
+
+        if "applications" in status:
+            applications.update(status["applications"].keys())
+
+        return applications
+
+    def get_units(self) -> Set[str]:
+        """Get list of all units in the model."""
+        result_str = self.juju.cli("status", "--format", "json")
+        status = json.loads(result_str)
+        units: Set[str] = set()
+
+        if "applications" in status:
+            for app_data in status["applications"].values():
+                if "units" in app_data:
+                    for unit_name in app_data["units"].keys():
+                        units.add(unit_name)
+
+        return units
+
+    def has_application(self, application_name: str) -> bool:
+        """Check if an application exists."""
+        applications = self.get_applications()
+        return application_name in applications
+
+    def has_unit(self, unit_name: str) -> bool:
+        """Check if a unit exists."""
+        units = self.get_units()
+        return unit_name in units
+
+    def wait_for_application(self, application_name: str, timeout: int = 300) -> bool:
+        """Wait for an application to appear using Jubilant's wait mechanism."""
+        if self.has_application(application_name):
+            logger.info(
+                "Application '%s' already exists, skipping wait",
+                application_name,
+            )
+            return True
+
+        def app_exists(status) -> bool:
+            return hasattr(status, "apps") and application_name in status.apps
+
+        self.juju.wait(app_exists, timeout=timeout, delay=1.0)
+        return True
+
+    def wait_for_unit(self, unit_name: str, timeout: int = 300) -> bool:
+        """Wait for a unit to appear using Jubilant's wait mechanism."""
+        if self.has_unit(unit_name):
+            logger.info("Unit '%s' already exists, skipping wait", unit_name)
+            return True
+
+        def unit_exists(status) -> bool:
+            if not hasattr(status, "apps"):
+                return False
+            for app_data in status.apps.values():
+                if hasattr(app_data, "units") and unit_name in app_data.units:
+                    return True
+            return False
+
+        self.juju.wait(unit_exists, timeout=timeout, delay=1.0)
+        return True
+
+    def wait_for_application_ready(
+        self,
+        application_name: str,
+        timeout: int = 600,
+    ) -> bool:
+        """Wait for an application to be in 'active' state."""
+
+        def app_active(status) -> bool:
+            if not hasattr(status, "apps") or application_name not in status.apps:
+                return False
+            app = status.apps[application_name]
+            return hasattr(app, "app_status") and app.app_status.current == "active"
+
+        self.juju.wait(app_active, timeout=timeout, delay=1.0)
+        return True
+
+    def verify_applications_exist(
+        self,
+        expected_applications: List[str],
+    ) -> Dict[str, bool]:
+        """Verify that expected applications exist."""
+        actual_applications = self.get_applications()
+        results: Dict[str, bool] = {}
+
+        for app in expected_applications:
+            results[app] = app in actual_applications
+
+        return results
+
+    def verify_units_exist(self, expected_units: List[str]) -> Dict[str, bool]:
+        """Verify that expected units exist."""
+        actual_units = self.get_units()
+        results: Dict[str, bool] = {}
+
+        for unit in expected_units:
+            results[unit] = unit in actual_units
+
+        return results
diff --git a/sunbeam-python/tests/functional/feature/utils/sunbeam.py b/sunbeam-python/tests/functional/feature/utils/sunbeam.py
new file mode 100644
index 000000000..307b67b99
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/utils/sunbeam.py
@@ -0,0 +1,87 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Sunbeam CLI wrapper for feature functional tests."""
+
+import logging
+import subprocess
+from typing import List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class SunbeamClient:
+    """Client for interacting with Sunbeam CLI."""
+
+    def __init__(self, deployment_name: str):
+        self.deployment_name = deployment_name
+        self._sunbeam_cmd = "/snap/bin/sunbeam"
+
+    def _run_command(self, command: List[str]) -> subprocess.CompletedProcess:
+        """Run a sunbeam command and return the result."""
+        full_command = [self._sunbeam_cmd] + command
+        logger.debug("Running: %s", " ".join(full_command))
+
+        result = subprocess.run(
+            full_command,
+            capture_output=True,
+            text=True,
+            check=False,
+            timeout=1800,
+        )
+
+        if result.returncode != 0:
+            logger.error(
+                "Command failed with exit code %d: %s",
+                result.returncode,
+                " ".join(full_command),
+            )
+            if result.stderr:
+                logger.error("stderr: %s", result.stderr)
+            if result.stdout:
+                logger.error("stdout: %s", result.stdout)
+            result.check_returncode()
+
+        return result
+
+    def run(self, command: List[str]) -> subprocess.CompletedProcess:
+        """Public helper to run arbitrary sunbeam subcommands."""
+        return self._run_command(command)
+
+    def is_connected(self) -> bool:
+        """Check if we can connect to the Sunbeam deployment."""
+        result = subprocess.run(
+            ["sunbeam", "deployment", "list"],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        return result.returncode == 0 and self.deployment_name in result.stdout
+
+    def enable_feature(
+        self,
+        feature_name: str,
+        extra_args: Optional[List[str]] = None,
+    ) -> bool:
+        """Enable a Sunbeam feature."""
+        cmd: List[str] = ["enable", feature_name]
+        if extra_args:
+            cmd.extend(extra_args)
+
+        self._run_command(cmd)
+        logger.info("Feature '%s' enabled successfully", feature_name)
+        return True
+
+    def disable_feature(
+        self,
+        feature_name: str,
+        extra_args: Optional[List[str]] = None,
+    ) -> bool:
+        """Disable a Sunbeam feature."""
+        cmd: List[str] = ["disable", feature_name]
+        if extra_args:
+            cmd.extend(extra_args)
+
+        self._run_command(cmd)
+        logger.info("Feature '%s' disabled successfully", feature_name)
+        return True
diff --git a/sunbeam-python/tox.ini b/sunbeam-python/tox.ini
index 8dc81acfa..5666df3e7 100644
--- a/sunbeam-python/tox.ini
+++ b/sunbeam-python/tox.ini
@@ -1,9 +1,6 @@
 [tox]
 envlist = unit,pep8,mypy
 skipsdist = True
-# Automatic envs (pyXX) will only use the python version appropriate to that
-# env and ignore basepython inherited from [testenv] if we set
-# ignore_basepython_conflict.
 ignore_basepython_conflict = True
 
 [vars]
@@ -25,14 +22,8 @@ setenv = OS_STDOUT_CAPTURE=1
 description = Sunbeam unit tests
 commands = uv run {[vars]uv_flags} python -m pytest -vv tests/unit {posargs}
 
-# The functional tests may have specific hardware requirements and are currently
-# skipped by default.
 [testenv:functional]
 description = Sunbeam functional tests
-# The snap can't access /tmp, we'll need to place manifests and other temporary
-# files in the home directory. At the same time, we need to expose USER/LOGNAME,
-# otherwise the Sunbeam group won't be initialized correctly and the Sunbeam
-# commands will fail due to missing privileges.
 passenv = USER
           LOGNAME
           USERNAME
@@ -42,6 +33,18 @@ commands = uv run {[vars]uv_flags} \
   --basetemp={env:HOME}/.local/share/openstack/tmp \
   {posargs}
 
+[testenv:functional-feature]
+description = Sunbeam feature functional tests (existing deployment)
+passenv = USER
+          LOGNAME
+          USERNAME
+          HOME
+commands = uv run {[vars]uv_flags} \
+  python -m pytest -s -vv tests/functional/feature \
+  --config=test_config.yaml \
+  --basetemp={env:HOME}/.local/share/openstack/tmp \
+  {posargs}
+
 [testenv:fmt]
 description = Apply coding style standards to code
 deps =
@@ -60,9 +63,6 @@ commands =
 [testenv:mypy]
 commands =
   uv run {[vars]uv_flags} mypy {[vars]src_path}/sunbeam
-  # TODO: consider uncommenting the following line once
-  # the unit tests pass the mypy check.
-  #   uv run {[vars]uv_flags} mypy {[vars]tst_path}/unit
   uv run {[vars]uv_flags} mypy {[vars]tst_path}/functional
 
 [testenv:cover]
@@ -87,7 +87,6 @@ deps =
 commands =
   sphinx-build -a -E -W -d doc/build/doctrees -b html doc/source doc/build/html
   sphinx-build -a -E -W -d doc/build/doctrees -b man doc/source doc/build/man
-  # Validate redirects (must be done after the docs build
   whereto doc/build/html/.htaccess doc/test/redirect-tests.txt
 
 [testenv:releasenotes]

From 0d8b6d0458a13bbdfac5a460bf42ed8c298e1c5f Mon Sep 17 00:00:00 2001
From: Ahmad Hassan <ahmad.hassan@canonical.com>
Date: Mon, 2 Feb 2026 15:22:04 +0500
Subject: [PATCH 2/5] Refine Sunbeam feature functional tests

---
 .../tests/functional/feature/README.md        | 42 ++++++++++++
 .../tests/functional/feature/conftest.py      |  7 +-
 .../functional/feature/features/baremetal.py  | 42 ++++++++++++
 .../tests/functional/feature/features/base.py |  5 +-
 .../tests/functional/feature/features/caas.py | 34 ++++++++++
 .../tests/functional/feature/features/dns.py  | 14 ----
 .../feature/features/images_sync.py           | 14 ----
 .../feature/features/instance_recovery.py     | 14 ----
 .../tests/functional/feature/features/ldap.py | 40 +++++++++++
 .../feature/features/loadbalancer.py          | 14 ----
 .../feature/features/maintenance.py           | 38 +++++++++++
 .../feature/features/observability.py         |  1 -
 .../feature/features/orchestration.py         | 14 ----
 .../tests/functional/feature/features/pro.py  | 61 +++++++++++++++++
 .../feature/features/resource_optimization.py |  1 -
 .../functional/feature/features/secrets.py    | 57 ++++++++++++++++
 .../feature/features/shared_filesystem.py     |  1 -
 .../functional/feature/features/telemetry.py  | 14 ----
 .../tests/functional/feature/features/tls.py  | 37 +---------
 .../functional/feature/features/validation.py | 38 +++++++++++
 .../functional/feature/features/vault.py      |  1 -
 .../tests/functional/feature/test_features.py | 68 +++++++++++++++++--
 .../tests/functional/feature/utils/juju.py    | 10 ---
 .../tests/functional/feature/utils/sunbeam.py |  2 +-
 24 files changed, 427 insertions(+), 142 deletions(-)
 create mode 100644 sunbeam-python/tests/functional/feature/features/baremetal.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/ldap.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/maintenance.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/pro.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/secrets.py
 create mode 100644 sunbeam-python/tests/functional/feature/features/validation.py

diff --git a/sunbeam-python/tests/functional/feature/README.md b/sunbeam-python/tests/functional/feature/README.md
index e6171e0b8..7befa7276 100644
--- a/sunbeam-python/tests/functional/feature/README.md
+++ b/sunbeam-python/tests/functional/feature/README.md
@@ -58,6 +58,48 @@ You can pass standard `pytest` selectors through tox via `posargs`. For example:
   tox -e functional-feature -- tests/functional/feature/test_features.py::test_tls_ca
   ```
 
+## Feature coverage and dependencies
+
+### Features in this suite
+
+- **Enabled in current flow**
+  - `instance-recovery`
+  - `caas` (Containers as a Service)
+  - `dns`
+  - `images-sync`
+  - `loadbalancer`
+  - `resource-optimization`
+  - `shared-filesystem`
+  - `telemetry`
+  - `observability`
+  - `tls` (CA mode)
+  - `vault`
+  - `validation`
+  - `secrets`
+
+- **Present but intentionally disabled for now**
+  - `baremetal`
+  - `ldap`
+  - `maintenance`
+  - `pro`
+
+### Feature dependencies
+
+Some features have explicit dependencies:
+
+- **CaaS (`caas`)**
+  - Depends on: **`secrets`**, **`loadbalancer`**
+  - The CaaS test ensures these dependencies are enabled before running.
+
+- **Secrets as a Service (`secrets`)**
+  - Depends on: **`vault`**
+  - The Secrets test ensures the Vault feature is enabled before running.
+
+- **TLS (Vault-backed)**
+  - TLS can also be deployed in a Vault-backed mode which implicitly depends on
+    the **`vault`** feature. This suite currently exercises only the TLS CA
+    mode (`test_tls_ca`).
+
 ## Notes
 
 - Disable failures are **logged and ignored** so that the suite continues
diff --git a/sunbeam-python/tests/functional/feature/conftest.py b/sunbeam-python/tests/functional/feature/conftest.py
index 9bde47e33..ef5d4cc5e 100644
--- a/sunbeam-python/tests/functional/feature/conftest.py
+++ b/sunbeam-python/tests/functional/feature/conftest.py
@@ -30,7 +30,12 @@ def test_config(request):
     config_file = Path(__file__).parent / config_path
 
     if not config_file.exists():
-        pytest.skip(f"Configuration file not found: {config_file}")
+        msg = (
+            f"Configuration file not found: {config_file}. "
+            "Copy tests/functional/feature/test_config.yaml.example to "
+            "tests/functional/feature/test_config.yaml and set sunbeam.deployment_name, juju.model."
+        )
+        pytest.skip(msg)
 
     with open(config_file, "r") as f:
         config = yaml.safe_load(f)
diff --git a/sunbeam-python/tests/functional/feature/features/baremetal.py b/sunbeam-python/tests/functional/feature/features/baremetal.py
new file mode 100644
index 000000000..57d99bfa8
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/baremetal.py
@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for baremetal feature.
+
+Baremetal provides Ironic-based bare metal provisioning.
+Functionality is validated via the Ironic (baremetal) API.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class BaremetalTest(BaseFeatureTest):
+    """Test baremetal feature enablement/disablement."""
+
+    feature_name = "baremetal"
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the Baremetal (Ironic) API is reachable."""
+        logger.info("Verifying Baremetal (Ironic) service is available...")
+        try:
+            subprocess.run(
+                ["openstack", "baremetal", "driver", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying Baremetal service: %s", exc)
+            raise AssertionError(
+                f"Baremetal service verification failed: {exc}"
+            ) from exc
+
+        logger.info("Baremetal service verified via `openstack baremetal driver list`")
diff --git a/sunbeam-python/tests/functional/feature/features/base.py b/sunbeam-python/tests/functional/feature/features/base.py
index dcdc7c4b3..87f647c53 100644
--- a/sunbeam-python/tests/functional/feature/features/base.py
+++ b/sunbeam-python/tests/functional/feature/features/base.py
@@ -19,7 +19,6 @@ class BaseFeatureTest:
     """Base class for testing Sunbeam features."""
 
     feature_name: str = ""
-    expected_units: List[str] = []
     expected_applications: List[str] = []
     timeout_seconds: int = 300
     enable_args: List[str] = []
@@ -36,7 +35,6 @@ def __init__(
         self.config = config or {}
 
         feature_config = self.config.get("features", {}).get(self.feature_name, {})
-        self.expected_units = feature_config.get("expected_units", self.expected_units)
         self.expected_applications = feature_config.get(
             "expected_applications",
             self.expected_applications,
@@ -167,8 +165,11 @@ def verify_validate_feature_behavior(self) -> None:
 
         This is a simple method that can be called after enable to verify
         the feature is working. Override in subclasses for feature-specific checks.
+        Subclasses can override validate_feature_behavior() for behavior checks;
+        that is invoked from here before the application presence checks.
         """
         logger.info("Verifying feature '%s' is enabled...", self.feature_name)
+        self.validate_feature_behavior()
         if self.expected_applications:
             for app in self.expected_applications:
                 if self.juju.has_application(app):
diff --git a/sunbeam-python/tests/functional/feature/features/caas.py b/sunbeam-python/tests/functional/feature/features/caas.py
index 51de55971..02dfa08f2 100644
--- a/sunbeam-python/tests/functional/feature/features/caas.py
+++ b/sunbeam-python/tests/functional/feature/features/caas.py
@@ -10,6 +10,8 @@
 import logging
 import subprocess
 
+import pytest
+
 from .base import BaseFeatureTest
 
 logger = logging.getLogger(__name__)
@@ -23,6 +25,24 @@ class CaaSTest(BaseFeatureTest):
     expected_applications: list[str] = []
     timeout_seconds = 600
 
+    def _ensure_dependency_enabled(self, feature: str) -> bool:
+        """Best-effort enable a required dependency feature.
+
+        If enabling the dependency fails (for example, missing Vault for
+        Secrets), we treat this as an unsatisfied dependency and skip.
+        """
+        logger.info("Ensuring dependency feature '%s' is enabled for CaaS...", feature)
+        try:
+            self.sunbeam.enable_feature(feature)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning(
+                "Failed to enable dependency '%s' required by CaaS: %s",
+                feature,
+                exc,
+            )
+            return False
+        return True
+
     def verify_validate_feature_behavior(self) -> None:
         """Validate that the Magnum (COE) API is reachable.
 
@@ -47,3 +67,17 @@ def verify_validate_feature_behavior(self) -> None:
             raise AssertionError(f"CaaS service verification failed: {exc}") from exc
 
         logger.info("CaaS (Magnum) service verified via `openstack coe cluster list`")
+
+    def run_full_lifecycle(self) -> bool:
+        """Ensure dependencies then run the standard enable/verify/disable flow.
+
+        CaaS depends on the Secrets and Load Balancer features.
+        """
+        for dep in ("secrets", "loadbalancer"):
+            if not self._ensure_dependency_enabled(dep):
+                pytest.skip(
+                    f"Skipping CaaS feature test: dependency '{dep}' "
+                    "could not be enabled"
+                )
+
+        return super().run_full_lifecycle()
diff --git a/sunbeam-python/tests/functional/feature/features/dns.py b/sunbeam-python/tests/functional/feature/features/dns.py
index 597ab79f1..6aa90802a 100644
--- a/sunbeam-python/tests/functional/feature/features/dns.py
+++ b/sunbeam-python/tests/functional/feature/features/dns.py
@@ -21,7 +21,6 @@ class DnsTest(BaseFeatureTest):
     feature_name = "dns"
     # DNS requires nameservers argument - using dummy values for testing
     enable_args: list[str] = ["ns1.example.com.", "ns2.example.com."]
-    expected_units: list[str] = []
     expected_applications: list[str] = []
     timeout_seconds = 600
 
@@ -39,16 +38,3 @@ def verify_validate_feature_behavior(self) -> None:
             raise AssertionError(f"DNS service verification failed: {exc}") from exc
 
         logger.info("DNS service endpoints verified via `sunbeam dns address`")
-
-    def run_full_lifecycle(self) -> bool:
-        """Enable dns, perform basic test, then disable it."""
-        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
-
-        self.enable()
-        self.verify_validate_feature_behavior()
-
-        disable_success = self.disable()
-        if not disable_success:
-            logger.warning("DNS disable failed, but continuing test sequence")
-
-        return True
diff --git a/sunbeam-python/tests/functional/feature/features/images_sync.py b/sunbeam-python/tests/functional/feature/features/images_sync.py
index d4480ee7a..288f8fece 100644
--- a/sunbeam-python/tests/functional/feature/features/images_sync.py
+++ b/sunbeam-python/tests/functional/feature/features/images_sync.py
@@ -19,7 +19,6 @@ class ImagesSyncTest(BaseFeatureTest):
     """Test images-sync feature enablement/disablement."""
 
     feature_name = "images-sync"
-    expected_units: list[str] = []
     expected_applications: list[str] = []
     timeout_seconds = 600
 
@@ -42,16 +41,3 @@ def verify_validate_feature_behavior(self) -> None:
             raise AssertionError(f"Image service verification failed: {exc}") from exc
 
         logger.info("Image service verified via `openstack image list`")
-
-    def run_full_lifecycle(self) -> bool:
-        """Enable images-sync, perform basic test, then disable it."""
-        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
-
-        self.enable()
-        self.verify_validate_feature_behavior()
-
-        disable_success = self.disable()
-        if not disable_success:
-            logger.warning("Images-sync disable failed, but continuing test sequence")
-
-        return True
diff --git a/sunbeam-python/tests/functional/feature/features/instance_recovery.py b/sunbeam-python/tests/functional/feature/features/instance_recovery.py
index 6110c2614..0f51f60ca 100644
--- a/sunbeam-python/tests/functional/feature/features/instance_recovery.py
+++ b/sunbeam-python/tests/functional/feature/features/instance_recovery.py
@@ -20,13 +20,6 @@ class InstanceRecoveryTest(BaseFeatureTest):
         "consul-storage",
         "consul-tenant",
     ]
-    expected_units = [
-        "masakari/0",
-        "masakari-mysql-router/0",
-        "consul-management/0",
-        "consul-storage/0",
-        "consul-tenant/0",
-    ]
     timeout_seconds = 900
 
     def validate_feature_behavior(self) -> None:
@@ -47,10 +40,3 @@ def validate_feature_behavior(self) -> None:
         result = subprocess.run(cmd, check=True, capture_output=True, text=True)
         if not result.stdout.strip():
             raise AssertionError("openstack segment list returned no data")
-
-    def run_full_lifecycle(self) -> bool:
-        """Enable instance-recovery, verify resources and behavior, then disable it."""
-        self.enable()
-        self.validate_feature_behavior()
-        self.disable()
-        return True
diff --git a/sunbeam-python/tests/functional/feature/features/ldap.py b/sunbeam-python/tests/functional/feature/features/ldap.py
new file mode 100644
index 000000000..21aae49d5
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/ldap.py
@@ -0,0 +1,40 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for ldap feature.
+
+LDAP integration configures Keystone to authenticate against LDAP.
+Functionality is minimally validated via the Identity API.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class LdapTest(BaseFeatureTest):
+    """Test ldap feature enablement/disablement."""
+
+    feature_name = "ldap"
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the Identity API is reachable."""
+        logger.info("Verifying Identity (Keystone) service is available for LDAP...")
+        try:
+            subprocess.run(
+                ["openstack", "domain", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying Identity service: %s", exc)
+            raise AssertionError(f"LDAP feature verification failed: {exc}") from exc
+
+        logger.info("Identity service verified via `openstack domain list`")
diff --git a/sunbeam-python/tests/functional/feature/features/loadbalancer.py b/sunbeam-python/tests/functional/feature/features/loadbalancer.py
index a7e377f35..1ff3549b9 100644
--- a/sunbeam-python/tests/functional/feature/features/loadbalancer.py
+++ b/sunbeam-python/tests/functional/feature/features/loadbalancer.py
@@ -20,7 +20,6 @@ class LoadbalancerTest(BaseFeatureTest):
 
     feature_name = "loadbalancer"
     expected_applications: list[str] = ["octavia"]
-    expected_units: list[str] = []
     timeout_seconds = 600
 
     def verify_validate_feature_behavior(self) -> None:
@@ -41,16 +40,3 @@ def verify_validate_feature_behavior(self) -> None:
         except Exception as e:
             logger.warning("Error checking loadbalancer service: %s", e)
             raise AssertionError(f"Loadbalancer service verification failed: {e}")
-
-    def run_full_lifecycle(self) -> bool:
-        """Enable loadbalancer, perform basic test, then disable it."""
-        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
-
-        self.enable()
-        self.verify_validate_feature_behavior()
-
-        disable_success = self.disable()
-        if not disable_success:
-            logger.warning("Loadbalancer disable failed, but continuing test sequence")
-
-        return True
diff --git a/sunbeam-python/tests/functional/feature/features/maintenance.py b/sunbeam-python/tests/functional/feature/features/maintenance.py
new file mode 100644
index 000000000..a2eeac6ee
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/maintenance.py
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for maintenance feature."""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class MaintenanceTest(BaseFeatureTest):
+    """Test maintenance feature enablement/disablement."""
+
+    feature_name = "maintenance"
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the Compute API is reachable."""
+        logger.info("Verifying Compute service is available for maintenance...")
+        try:
+            subprocess.run(
+                ["openstack", "compute", "service", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying Compute service: %s", exc)
+            raise AssertionError(
+                f"Maintenance feature verification failed: {exc}"
+            ) from exc
+
+        logger.info("Compute service verified via `openstack compute service list`")
diff --git a/sunbeam-python/tests/functional/feature/features/observability.py b/sunbeam-python/tests/functional/feature/features/observability.py
index 9bef407d0..9f6366cd3 100644
--- a/sunbeam-python/tests/functional/feature/features/observability.py
+++ b/sunbeam-python/tests/functional/feature/features/observability.py
@@ -26,7 +26,6 @@ class ObservabilityTest(BaseFeatureTest):
     feature_name = "observability"
     enable_args: list[str] = ["embedded"]
     disable_args: list[str] = ["embedded"]
-    expected_units: list[str] = []
     expected_applications: list[str] = []
     timeout_seconds = 900
 
diff --git a/sunbeam-python/tests/functional/feature/features/orchestration.py b/sunbeam-python/tests/functional/feature/features/orchestration.py
index 0f9a502e6..b09601f56 100644
--- a/sunbeam-python/tests/functional/feature/features/orchestration.py
+++ b/sunbeam-python/tests/functional/feature/features/orchestration.py
@@ -20,7 +20,6 @@ class OrchestrationTest(BaseFeatureTest):
 
     feature_name = "orchestration"
     expected_applications: list[str] = ["heat"]
-    expected_units: list[str] = []
     timeout_seconds = 600
 
     def verify_validate_feature_behavior(self) -> None:
@@ -41,16 +40,3 @@ def verify_validate_feature_behavior(self) -> None:
         except Exception as e:
             logger.warning("Error checking orchestration service: %s", e)
             raise AssertionError(f"Orchestration service verification failed: {e}")
-
-    def run_full_lifecycle(self) -> bool:
-        """Enable orchestration, perform basic test, then disable it."""
-        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
-
-        self.enable()
-        self.verify_validate_feature_behavior()
-
-        disable_success = self.disable()
-        if not disable_success:
-            logger.warning("Orchestration disable failed, but continuing test sequence")
-
-        return True
diff --git a/sunbeam-python/tests/functional/feature/features/pro.py b/sunbeam-python/tests/functional/feature/features/pro.py
new file mode 100644
index 000000000..7948bde8f
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/pro.py
@@ -0,0 +1,61 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for pro feature.
+
+Ubuntu Pro integrates subscription/entitlement with the deployment.
+Functionality is minimally validated via a generic OpenStack service call.
+"""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class ProTest(BaseFeatureTest):
+    """Test pro feature enablement/disablement."""
+
+    feature_name = "pro"
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def __init__(self, *args, **kwargs) -> None:
+        """Initialise Pro test with a token argument for enable.
+
+        The token is taken from the functional test configuration, if present.
+        If no token is configured, a dummy placeholder is used.
+        """
+        super().__init__(*args, **kwargs)
+        pro_cfg = self.config.get("pro", {}) if self.config is not None else {}
+        token = pro_cfg.get("token", "DUMMY-UBUNTU-PRO-TOKEN")
+        self.enable_args = ["--token", token]
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that OpenStack APIs remain reachable under Pro."""
+        logger.info("Verifying OpenStack service catalog for Ubuntu Pro...")
+        try:
+            result = subprocess.run(
+                ["openstack", "service", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except subprocess.CalledProcessError as exc:
+            logger.warning("Failed to list services: %s", exc.stderr)
+            raise AssertionError(
+                f"OpenStack service catalog not accessible: {exc.stderr}"
+            ) from exc
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying OpenStack services: %s", exc)
+            raise AssertionError(
+                f"Ubuntu Pro feature verification failed: {exc}"
+            ) from exc
+
+        if not result.stdout.strip():
+            raise AssertionError("Service list returned no data")
+
+        logger.info("OpenStack service catalog verified via `openstack service list`")
diff --git a/sunbeam-python/tests/functional/feature/features/resource_optimization.py b/sunbeam-python/tests/functional/feature/features/resource_optimization.py
index c6828d22b..8db74bbb2 100644
--- a/sunbeam-python/tests/functional/feature/features/resource_optimization.py
+++ b/sunbeam-python/tests/functional/feature/features/resource_optimization.py
@@ -19,7 +19,6 @@ class ResourceOptimizationTest(BaseFeatureTest):
     """Test resource-optimization feature enablement/disablement."""
 
     feature_name = "resource-optimization"
-    expected_units: list[str] = []
     expected_applications: list[str] = []
     timeout_seconds = 600
 
diff --git a/sunbeam-python/tests/functional/feature/features/secrets.py b/sunbeam-python/tests/functional/feature/features/secrets.py
new file mode 100644
index 000000000..49fdf12da
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/secrets.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for secrets feature."""
+
+import logging
+import subprocess
+
+import pytest
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class SecretsTest(BaseFeatureTest):
+    """Test secrets feature enablement/disablement."""
+
+    feature_name = "secrets"
+    expected_applications: list[str] = []
+    timeout_seconds = 600
+
+    def _ensure_vault_enabled(self) -> bool:
+        """Ensure the Vault feature is enabled before Secrets."""
+        logger.info("Ensuring 'vault' feature is enabled before 'secrets'...")
+        try:
+            self.sunbeam.enable_feature("vault")
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Failed to enable required dependency 'vault': %s", exc)
+            return False
+        return True
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the Secrets (Barbican) API is reachable."""
+        logger.info("Verifying Secrets (Barbican) service is available...")
+        try:
+            subprocess.run(
+                ["openstack", "secret", "list"],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=True,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying Secrets service: %s", exc)
+            raise AssertionError(f"Secrets service verification failed: {exc}") from exc
+
+        logger.info("Secrets service verified via `openstack secret list`")
+
+    def run_full_lifecycle(self) -> bool:
+        """Enable Vault first, then run the Secrets lifecycle."""
+        if not self._ensure_vault_enabled():
+            pytest.skip(
+                "Skipping Secrets feature test: dependency 'vault' not available"
+            )
+
+        return super().run_full_lifecycle()
diff --git a/sunbeam-python/tests/functional/feature/features/shared_filesystem.py b/sunbeam-python/tests/functional/feature/features/shared_filesystem.py
index 66117213b..086cdbe41 100644
--- a/sunbeam-python/tests/functional/feature/features/shared_filesystem.py
+++ b/sunbeam-python/tests/functional/feature/features/shared_filesystem.py
@@ -19,7 +19,6 @@ class SharedFilesystemTest(BaseFeatureTest):
     """Test shared-filesystem feature enablement/disablement."""
 
     feature_name = "shared-filesystem"
-    expected_units: list[str] = []
     expected_applications: list[str] = []
     timeout_seconds = 600
 
diff --git a/sunbeam-python/tests/functional/feature/features/telemetry.py b/sunbeam-python/tests/functional/feature/features/telemetry.py
index 6fbd79b81..191867230 100644
--- a/sunbeam-python/tests/functional/feature/features/telemetry.py
+++ b/sunbeam-python/tests/functional/feature/features/telemetry.py
@@ -20,7 +20,6 @@ class TelemetryTest(BaseFeatureTest):
 
     feature_name = "telemetry"
     expected_applications: list[str] = ["ceilometer", "gnocchi", "aodh"]
-    expected_units: list[str] = []
     timeout_seconds = 600
 
     def verify_validate_feature_behavior(self) -> None:
@@ -42,16 +41,3 @@ def verify_validate_feature_behavior(self) -> None:
         except Exception as e:
             logger.warning("Error checking telemetry services: %s", e)
             raise AssertionError(f"Telemetry service verification failed: {e}")
-
-    def run_full_lifecycle(self) -> bool:
-        """Enable telemetry, perform basic test, then disable it."""
-        logger.info("Starting lifecycle test for feature: '%s'", self.feature_name)
-
-        self.enable()
-        self.verify_validate_feature_behavior()
-
-        disable_success = self.disable()
-        if not disable_success:
-            logger.warning("Telemetry disable failed, but continuing test sequence")
-
-        return True
diff --git a/sunbeam-python/tests/functional/feature/features/tls.py b/sunbeam-python/tests/functional/feature/features/tls.py
index 17a856292..cfc8775d5 100644
--- a/sunbeam-python/tests/functional/feature/features/tls.py
+++ b/sunbeam-python/tests/functional/feature/features/tls.py
@@ -1,19 +1,12 @@
 # SPDX-FileCopyrightText: 2024 - Canonical Ltd
 # SPDX-License-Identifier: Apache-2.0
 
-"""Tests for tls feature (CA mode).
-
-TLS enablement has multiple methods in Sunbeam, but this functional test
-suite only exercises the TLS CA path:
-
-- TLS CA: `sunbeam enable tls ca` (requires CA certificates)
-"""
+"""Tests for tls feature (CA mode)."""
 
 import base64
 import logging
 import subprocess
 import tempfile
-import time
 from pathlib import Path
 from typing import Tuple
 
@@ -95,9 +88,6 @@ class TlsCaTest(BaseFeatureTest):
     expected_applications = [
         "manual-tls-certificates",
     ]
-    expected_units = [
-        "manual-tls-certificates/0",
-    ]
     timeout_seconds = 600
 
     def __init__(self, *args, **kwargs):
@@ -133,28 +123,3 @@ def disable(self) -> bool:
                 exc,
             )
             return False
-
-    def _ensure_tls_ca_disabled(self) -> bool:
-        """Ensure TLS CA is disabled before enabling (cleanup from previous runs)."""
-        if self.juju.has_application("manual-tls-certificates"):
-            logger.info("TLS CA is already enabled, disabling first...")
-            try:
-                self.disable()
-                # Wait a bit for cleanup
-                time.sleep(5)
-            except Exception as exc:  # noqa: BLE001
-                logger.warning("Failed to disable existing TLS CA: %s", exc)
-                return False
-        return True
-
-    def run_full_lifecycle(self) -> bool:
-        """Enable TLS CA, perform basic test, then disable it."""
-        if not self._ensure_tls_ca_disabled():
-            logger.warning("Could not ensure TLS CA is disabled, continuing anyway...")
-
-        self.enable()
-        disable_success = self.disable()
-        if not disable_success:
-            logger.warning("TLS CA disable failed, but continuing test sequence")
-
-        return True
diff --git a/sunbeam-python/tests/functional/feature/features/validation.py b/sunbeam-python/tests/functional/feature/features/validation.py
new file mode 100644
index 000000000..c752af5f9
--- /dev/null
+++ b/sunbeam-python/tests/functional/feature/features/validation.py
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: 2024 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test for validation feature."""
+
+import logging
+import subprocess
+
+from .base import BaseFeatureTest
+
+logger = logging.getLogger(__name__)
+
+
+class ValidationTest(BaseFeatureTest):
+    """Test validation feature enablement/disablement."""
+
+    feature_name = "validation"
+    expected_applications: list[str] = []
+    timeout_seconds = 900
+
+    def verify_validate_feature_behavior(self) -> None:
+        """Validate that the validation CLI is usable."""
+        logger.info("Verifying validation feature via `sunbeam validation profiles`...")
+        try:
+            subprocess.run(
+                ["sunbeam", "validation", "profiles"],
+                capture_output=True,
+                text=True,
+                timeout=60,
+                check=True,
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Error while verifying validation feature: %s", exc)
+            raise AssertionError(
+                f"Validation feature verification failed: {exc}"
+            ) from exc
+
+        logger.info("Validation feature verified via `sunbeam validation profiles`")
diff --git a/sunbeam-python/tests/functional/feature/features/vault.py b/sunbeam-python/tests/functional/feature/features/vault.py
index 78bd44290..26b71a5d6 100644
--- a/sunbeam-python/tests/functional/feature/features/vault.py
+++ b/sunbeam-python/tests/functional/feature/features/vault.py
@@ -18,7 +18,6 @@ class VaultTest(BaseFeatureTest):
     """Test vault feature enablement/disablement."""
 
     feature_name = "vault"
-    expected_units: list[str] = []
     expected_applications: list[str] = []
     timeout_seconds = 600
 
diff --git a/sunbeam-python/tests/functional/feature/test_features.py b/sunbeam-python/tests/functional/feature/test_features.py
index 11b748907..d6522f627 100644
--- a/sunbeam-python/tests/functional/feature/test_features.py
+++ b/sunbeam-python/tests/functional/feature/test_features.py
@@ -11,17 +11,23 @@
 
 import pytest
 
+from .features.baremetal import BaremetalTest
 from .features.caas import CaaSTest
 from .features.dns import DnsTest
 from .features.images_sync import ImagesSyncTest
 from .features.instance_recovery import InstanceRecoveryTest
+from .features.ldap import LdapTest
 from .features.loadbalancer import LoadbalancerTest
+from .features.maintenance import MaintenanceTest
 from .features.observability import ObservabilityTest
 from .features.orchestration import OrchestrationTest
+from .features.pro import ProTest
 from .features.resource_optimization import ResourceOptimizationTest
+from .features.secrets import SecretsTest
 from .features.shared_filesystem import SharedFilesystemTest
 from .features.telemetry import TelemetryTest
 from .features.tls import TlsCaTest
+from .features.validation import ValidationTest
 from .features.vault import VaultTest
 
 logging.basicConfig(level=logging.INFO)
@@ -35,6 +41,19 @@ def test_instance_recovery(sunbeam_client, juju_client, test_config):
     assert feature_test.run_full_lifecycle(), "Instance recovery feature test failed"
 
 
+@pytest.mark.functional
+@pytest.mark.skip(
+    reason=(
+        "Baremetal feature test is present but intentionally disabled in the "
+        "current feature flow (enable later when ready)."
+    )
+)
+def test_baremetal(sunbeam_client, juju_client, test_config):
+    """Test baremetal feature lifecycle (enable/disable only)."""
+    feature_test = BaremetalTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Baremetal feature test failed"
+
+
 @pytest.mark.functional
 def test_caas(sunbeam_client, juju_client, test_config):
     """Test caas feature lifecycle (enable/disable only)."""
@@ -56,6 +75,19 @@ def test_images_sync(sunbeam_client, juju_client, test_config):
     assert feature_test.run_full_lifecycle(), "Images-sync feature test failed"
 
 
+@pytest.mark.functional
+@pytest.mark.skip(
+    reason=(
+        "LDAP feature test is present but intentionally disabled in the "
+        "current feature flow (enable later when ready)."
+    )
+)
+def test_ldap(sunbeam_client, juju_client, test_config):
+    """Test ldap feature lifecycle (enable/disable only)."""
+    feature_test = LdapTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "LDAP feature test failed"
+
+
 @pytest.mark.functional
 def test_loadbalancer(sunbeam_client, juju_client, test_config):
     """Test loadbalancer feature lifecycle (enable/disable only)."""
@@ -86,6 +118,13 @@ def test_shared_filesystem(sunbeam_client, juju_client, test_config):
     assert feature_test.run_full_lifecycle(), "Shared-filesystem feature test failed"
 
 
+@pytest.mark.functional
+def test_secrets(sunbeam_client, juju_client, test_config):
+    """Test secrets feature lifecycle (enable/disable only)."""
+    feature_test = SecretsTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Secrets feature test failed"
+
+
 @pytest.mark.functional
 def test_telemetry(sunbeam_client, juju_client, test_config):
     """Test telemetry feature lifecycle (enable/disable only)."""
@@ -101,15 +140,29 @@ def test_observability(sunbeam_client, juju_client, test_config):
 
 
 @pytest.mark.functional
+@pytest.mark.skip(
+    reason=(
+        "Maintenance feature test is present but intentionally disabled in the "
+        "current feature flow (enable later when ready)."
+    )
+)
 def test_maintenance(sunbeam_client, juju_client, test_config):
-    """Placeholder for maintenance feature test (not yet enabled)."""
-    pytest.skip("maintenance feature test not yet enabled in CI")
+    """Test maintenance feature lifecycle (enable/disable only)."""
+    feature_test = MaintenanceTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Maintenance feature test failed"
 
 
 @pytest.mark.functional
+@pytest.mark.skip(
+    reason=(
+        "Pro feature test is present but intentionally disabled in the "
+        "current feature flow (enable later when ready)."
+    )
+)
 def test_pro(sunbeam_client, juju_client, test_config):
-    """Placeholder for pro feature test (not yet enabled)."""
-    pytest.skip("pro feature test not yet enabled in CI")
+    """Test pro feature lifecycle (enable/disable only)."""
+    feature_test = ProTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Pro feature test failed"
 
 
 @pytest.mark.functional
@@ -124,3 +177,10 @@ def test_vault(sunbeam_client, juju_client, test_config):
     """Test vault feature lifecycle (enable/disable only)."""
     feature_test = VaultTest(sunbeam_client, juju_client, test_config)
     assert feature_test.run_full_lifecycle(), "Vault feature test failed"
+
+
+@pytest.mark.functional
+def test_validation(sunbeam_client, juju_client, test_config):
+    """Test validation feature lifecycle (enable/disable only)."""
+    feature_test = ValidationTest(sunbeam_client, juju_client, test_config)
+    assert feature_test.run_full_lifecycle(), "Validation feature test failed"
diff --git a/sunbeam-python/tests/functional/feature/utils/juju.py b/sunbeam-python/tests/functional/feature/utils/juju.py
index 384711460..448e85da2 100644
--- a/sunbeam-python/tests/functional/feature/utils/juju.py
+++ b/sunbeam-python/tests/functional/feature/utils/juju.py
@@ -133,13 +133,3 @@ def verify_applications_exist(
             results[app] = app in actual_applications
 
         return results
-
-    def verify_units_exist(self, expected_units: List[str]) -> Dict[str, bool]:
-        """Verify that expected units exist."""
-        actual_units = self.get_units()
-        results: Dict[str, bool] = {}
-
-        for unit in expected_units:
-            results[unit] = unit in actual_units
-
-        return results
diff --git a/sunbeam-python/tests/functional/feature/utils/sunbeam.py b/sunbeam-python/tests/functional/feature/utils/sunbeam.py
index 307b67b99..7982f3de4 100644
--- a/sunbeam-python/tests/functional/feature/utils/sunbeam.py
+++ b/sunbeam-python/tests/functional/feature/utils/sunbeam.py
@@ -27,7 +27,7 @@ def _run_command(self, command: List[str]) -> subprocess.CompletedProcess:
             capture_output=True,
             text=True,
             check=False,
-            timeout=1800,
+            timeout=3600,
         )
 
         if result.returncode != 0:

From 7e6cc1eb83c9f965051949cdd045bcfccaa1d24b Mon Sep 17 00:00:00 2001
From: Ahmad Hassan <ahmad.hassan@canonical.com>
Date: Tue, 3 Feb 2026 15:39:16 +0500
Subject: [PATCH 3/5] Add Chaos Mesh validation resilience tests and shared
 chaos helpers

---
 .../tests/functional/chaos/README.md          |  38 ++++
 .../tests/functional/chaos/__init__.py        |   4 +
 .../tests/functional/chaos/conftest.py        |  10 +
 .../tests/functional/chaos/utils.py           | 189 ++++++++++++++++++
 .../functional/chaos/validation/__init__.py   |   4 +
 .../test_validation_keystone_chaos.py         | 113 +++++++++++
 sunbeam-python/tox.ini                        |  11 +
 7 files changed, 369 insertions(+)
 create mode 100644 sunbeam-python/tests/functional/chaos/README.md
 create mode 100644 sunbeam-python/tests/functional/chaos/__init__.py
 create mode 100644 sunbeam-python/tests/functional/chaos/conftest.py
 create mode 100644 sunbeam-python/tests/functional/chaos/utils.py
 create mode 100644 sunbeam-python/tests/functional/chaos/validation/__init__.py
 create mode 100644 sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py

diff --git a/sunbeam-python/tests/functional/chaos/README.md b/sunbeam-python/tests/functional/chaos/README.md
new file mode 100644
index 000000000..94457816e
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/README.md
@@ -0,0 +1,38 @@
+# Chaos Mesh functional tests
+
+Chaos Mesh-based resilience tests for Canonical OpenStack features.
+
+This directory is intentionally separate from the standard feature functional
+tests under `tests/functional/feature` so that chaos experiments can be run
+independently and expanded over time.
+
+## Prerequisites
+
+- A working Canonical OpenStack deployment (same requirements as the feature
+  functional tests).
+- `sunbeam`, `openstack` and `juju` CLIs configured for that deployment.
+- `kubectl` configured to talk to the Kubernetes cluster that backs the
+  OpenStack model.
+- Chaos Mesh installed and running, typically in the `chaos-mesh` namespace.
+
+## Layout
+
+- `validation/`: Chaos tests that target the **validation** feature.
+
+Additional feature-specific chaos tests can be added as new subdirectories
+alongside `validation/`.
+
+## Running the chaos tests
+
+From the `sunbeam-python` tree:
+
+```bash
+tox -e functional-chaos
+```
+
+You can also run individual chaos tests via `pytest`, for example:
+
+```bash
+python -m pytest -s -vv tests/functional/chaos/validation/test_validation_keystone_chaos.py
+```
+
diff --git a/sunbeam-python/tests/functional/chaos/__init__.py b/sunbeam-python/tests/functional/chaos/__init__.py
new file mode 100644
index 000000000..b9a36a80a
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/__init__.py
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: 2026 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Chaos Mesh-based functional tests."""
diff --git a/sunbeam-python/tests/functional/chaos/conftest.py b/sunbeam-python/tests/functional/chaos/conftest.py
new file mode 100644
index 000000000..a437f9031
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/conftest.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: 2026 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Functional fixtures for chaos tests."""
+
+from tests.functional.feature.conftest import (  # noqa: F401
+    juju_client,
+    sunbeam_client,
+    test_config,
+)
diff --git a/sunbeam-python/tests/functional/chaos/utils.py b/sunbeam-python/tests/functional/chaos/utils.py
new file mode 100644
index 000000000..ce12212ac
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/utils.py
@@ -0,0 +1,189 @@
+# SPDX-FileCopyrightText: 2026 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Shared helpers for Chaos Mesh functional tests.
+
+These utilities centralise common operations so that multiple chaos scenarios
+can reuse the same logic for:
+
+- Enabling Sunbeam features.
+- Inspecting Juju status via Jubilant.
+- Waiting for units to become active again.
+- Applying and deleting Chaos Mesh PodChaos resources.
+"""
+
+from __future__ import annotations
+
+import logging
+import subprocess
+import time
+from typing import List, Tuple
+
+import jubilant
+import pytest
+
+logger = logging.getLogger(__name__)
+
+
+def get_leader_and_non_leaders(
+    juju_client,
+    app_name: str,
+) -> Tuple[str, List[str]]:
+    """Return (leader_unit_name, [non_leader_unit_names]) for a Juju app."""
+    logger.info("Querying Juju status for application '%s' units...", app_name)
+
+    status: jubilant.Status = juju_client.juju.status()
+    app = status.apps[app_name]
+
+    leader_unit: str | None = None
+    non_leaders: List[str] = []
+    for unit_name, unit_data in app.units.items():
+        if getattr(unit_data, "leader", False):
+            leader_unit = unit_name
+        else:
+            non_leaders.append(unit_name)
+
+    if leader_unit is None:
+        pytest.skip(
+            f"No leader unit found for application '{app_name}' in Juju status."
+        )
+
+    return leader_unit, non_leaders
+
+
+def wait_for_unit_active(
+    juju_client,
+    app_name: str,
+    unit_name: str,
+    timeout: int = 600,
+) -> float:
+    """Wait until the given Juju unit's workload status is 'active'.
+
+    Returns the time (in seconds) taken for the unit to become active again.
+    Raises AssertionError if the timeout is exceeded or the app enters error.
+    """
+    logger.info(
+        "Waiting for unit %s (app '%s') to become active again...",
+        unit_name,
+        app_name,
+    )
+    start = time.time()
+
+    try:
+        juju_client.juju.wait(
+            lambda status: is_unit_active(status, app_name, unit_name),
+            error=lambda status: app_has_error(status, app_name),
+            _timeout=timeout,
+            _delay=5.0,
+        )
+    except jubilant.WaitError as exc:
+        raise AssertionError(
+            f"Application '{app_name}' entered error state while waiting for "
+            f"{unit_name} to recover."
+        ) from exc
+
+    elapsed = time.time() - start
+    logger.info(
+        "Unit %s (app '%s') is active again after %.1f seconds.",
+        unit_name,
+        app_name,
+        elapsed,
+    )
+    return elapsed
+
+
+def is_unit_active(
+    status: jubilant.Status,
+    app_name: str,
+    unit_name: str,
+) -> bool:
+    """Return True if the given unit's workload status is 'active'."""
+    units = status.get_units(app_name)
+    unit = units.get(unit_name)
+    if not unit:
+        return False
+    workload = getattr(getattr(unit, "workload_status", None), "current", None)
+    return workload == "active"
+
+
+def app_has_error(status: jubilant.Status, app_name: str) -> bool:
+    """Return True if any unit in the given app is in error."""
+    return jubilant.any_error(status, app_name)
+
+
+def unit_name_to_pod_name(unit_name: str) -> str:
+    """Map a Juju unit name (e.g. 'keystone/1') to a pod name (e.g. 'keystone-1').
+
+    For Kubernetes charms, Juju unit names and pod names follow this convention.
+    """
+    return unit_name.replace("/", "-")
+
+
+def pod_chaos_name_for_pod(app_name: str, pod_name: str) -> str:
+    """Return a deterministic PodChaos name for a given pod."""
+    return f"{app_name}-{pod_name}-pod-kill"
+
+
+def apply_pod_chaos_for_pod(
+    app_namespace: str,
+    pod_name: str,
+    chaos_namespace: str = "chaos-mesh",
+    *,
+    duration: str = "30s",
+    action: str = "pod-kill",
+) -> str:
+    """Create a PodChaos resource targeting a single pod.
+
+    Returns the name of the created PodChaos resource.
+    """
+    chaos_name = pod_chaos_name_for_pod(app_namespace, pod_name)
+    manifest = f"""
+apiVersion: chaos-mesh.org/v1alpha1
+kind: PodChaos
+metadata:
+  name: {chaos_name}
+  namespace: {chaos_namespace}
+spec:
+  action: {action}
+  mode: one
+  duration: "{duration}"
+  selector:
+    pods:
+      {app_namespace}:
+        - {pod_name}
+""".lstrip()
+    logger.info(
+        "Applying PodChaos for pod %s in namespace %s (resource: %s)",
+        pod_name,
+        app_namespace,
+        chaos_name,
+    )
+    subprocess.run(
+        ["kubectl", "apply", "-f", "-"],
+        input=manifest,
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    return chaos_name
+
+
+def delete_pod_chaos(chaos_name: str, chaos_namespace: str = "chaos-mesh") -> None:
+    """Delete a PodChaos resource by name."""
+    logger.info(
+        "Deleting PodChaos resource: %s (namespace: %s)", chaos_name, chaos_namespace
+    )
+    subprocess.run(
+        [
+            "kubectl",
+            "delete",
+            "podchaos",
+            chaos_name,
+            "-n",
+            chaos_namespace,
+            "--ignore-not-found=true",
+        ],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
diff --git a/sunbeam-python/tests/functional/chaos/validation/__init__.py b/sunbeam-python/tests/functional/chaos/validation/__init__.py
new file mode 100644
index 000000000..15cff7ecb
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/validation/__init__.py
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: 2026 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Chaos tests for the validation feature."""
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py
new file mode 100644
index 000000000..2b0d9c975
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py
@@ -0,0 +1,113 @@
+# SPDX-FileCopyrightText: 2026 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+
+"""Chaos Mesh tests for the validation feature.
+
+These tests exercise the validation feature (``sunbeam validation run``) while
+Chaos Mesh injects failures into **non-leader** Keystone pods, to assess how
+well validation behaves under control plane disruption.
+"""
+
+import logging
+import subprocess
+from typing import List
+
+import pytest
+
+from tests.functional.chaos.utils import (
+    apply_pod_chaos_for_pod,
+    delete_pod_chaos,
+    get_leader_and_non_leaders,
+    unit_name_to_pod_name,
+    wait_for_unit_active,
+)
+
+logger = logging.getLogger(__name__)
+
+OPENSTACK_NAMESPACE = "openstack"
+CHAOS_NAMESPACE = "chaos-mesh"
+KEYSTONE_APP = "keystone"
+
+
+@pytest.mark.functional
+def test_validation_resilient_to_non_leader_keystone_pod_kills(
+    sunbeam_client,
+    juju_client,
+) -> None:
+    """Validation 'smoke' profile should tolerate non-leader Keystone pod kills.
+
+    This test:
+
+    - Ensures the ``validation`` feature is enabled.
+    - Uses Jubilant status to discover the Keystone leader unit and its
+      non-leader units in the ``openstack`` model.
+    - Starts ``sunbeam validation run smoke``
+    - While validation is running, sequentially applies Chaos Mesh ``PodChaos``
+      resources that kill each **non-leader** Keystone pod in turn, waiting for
+      each unit to recover to ``workload-status: active``.
+    - Collects and logs the recovery time for each non-leader unit.
+
+    The expectation is that the validation smoke run completes successfully
+    despite transient failures of non-leader Keystone pods.
+    """
+    sunbeam_client.enable_feature("validation")
+    leader_unit, non_leader_units = get_leader_and_non_leaders(
+        juju_client,
+        KEYSTONE_APP,
+    )
+    logger.info(
+        "Keystone leader unit: %s; non-leaders: %s",
+        leader_unit,
+        non_leader_units,
+    )
+
+    # Start validation smoke tests in the background.
+    logger.info("Starting 'sunbeam validation run smoke'...")
+    validation_proc = subprocess.Popen(
+        ["sunbeam", "validation", "run", "smoke"],
+        text=True,
+    )
+
+    chaos_resources: List[str] = []
+    try:
+        for unit_name in non_leader_units:
+            pod_name = unit_name_to_pod_name(unit_name)
+            chaos_name = apply_pod_chaos_for_pod(
+                OPENSTACK_NAMESPACE,
+                pod_name,
+                chaos_namespace=CHAOS_NAMESPACE,
+                duration="30s",
+            )
+            chaos_resources.append(chaos_name)
+
+            # Wait for the affected unit to become active again.
+            wait_for_unit_active(
+                juju_client,
+                KEYSTONE_APP,
+                unit_name,
+                timeout=600,
+            )
+
+        # After injecting chaos to all non-leaders, wait for validation to finish.
+        logger.info("Waiting for validation smoke run to complete...")
+        try:
+            return_code = validation_proc.wait(timeout=3600)
+        except subprocess.TimeoutExpired:
+            validation_proc.kill()
+            raise AssertionError(
+                "sunbeam validation run smoke did not complete within the timeout."
+            )
+
+        assert return_code == 0, (
+            f"sunbeam validation run smoke failed with exit code {return_code}"
+        )
+    finally:
+        for chaos_name in chaos_resources:
+            try:
+                delete_pod_chaos(chaos_name, chaos_namespace=CHAOS_NAMESPACE)
+            except Exception as exc:  # noqa: BLE001
+                logger.warning("Failed to clean up PodChaos %s: %s", chaos_name, exc)
+
+        if validation_proc.poll() is None:
+            validation_proc.terminate()
diff --git a/sunbeam-python/tox.ini b/sunbeam-python/tox.ini
index 5666df3e7..2e89e14a8 100644
--- a/sunbeam-python/tox.ini
+++ b/sunbeam-python/tox.ini
@@ -45,6 +45,17 @@ commands = uv run {[vars]uv_flags} \
   --basetemp={env:HOME}/.local/share/openstack/tmp \
   {posargs}
 
+[testenv:functional-chaos]
+description = Sunbeam Chaos Mesh functional tests
+passenv = USER
+          LOGNAME
+          USERNAME
+          HOME
+commands = uv run {[vars]uv_flags} \
+  python -m pytest -s -vv tests/functional/chaos \
+  --basetemp={env:HOME}/.local/share/openstack/tmp \
+  {posargs}
+
 [testenv:fmt]
 description = Apply coding style standards to code
 deps =

From 2b7863619b936e5c77846d525c394ed06edba15a Mon Sep 17 00:00:00 2001
From: Ahmad Hassan <ahmad.hassan@canonical.com>
Date: Tue, 10 Feb 2026 17:48:11 +0500
Subject: [PATCH 4/5] Add chaos validation suite and fully automated TLS Vault
 feature flow

---
 .../tests/functional/chaos/conftest.py        | 117 +++++++++++-
 .../tests/functional/chaos/utils.py           | 170 ++++++++++++++++--
 .../test_validation_api_pod_chaos.py          |  42 +++++
 .../test_validation_db_router_chaos.py        |  48 +++++
 .../validation/test_validation_infra_chaos.py |  36 ++++
 .../test_validation_keystone_chaos.py         |  79 +-------
 6 files changed, 400 insertions(+), 92 deletions(-)
 create mode 100644 sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py
 create mode 100644 sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py
 create mode 100644 sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py

diff --git a/sunbeam-python/tests/functional/chaos/conftest.py b/sunbeam-python/tests/functional/chaos/conftest.py
index a437f9031..52f2e67c0 100644
--- a/sunbeam-python/tests/functional/chaos/conftest.py
+++ b/sunbeam-python/tests/functional/chaos/conftest.py
@@ -1,10 +1,115 @@
 # SPDX-FileCopyrightText: 2026 - Canonical Ltd
 # SPDX-License-Identifier: Apache-2.0
 
-"""Functional fixtures for chaos tests."""
+"""Functional fixtures and config hooks for chaos tests."""
 
-from tests.functional.feature.conftest import (  # noqa: F401
-    juju_client,
-    sunbeam_client,
-    test_config,
-)
+import logging
+
+import pytest
+
+from tests.functional.chaos.utils import _kubectl_command
+from tests.functional.feature import conftest as feature_conftest  # noqa: F401
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.fixture(scope="session", autouse=True)
+def ensure_chaos_mesh_installed() -> None:
+    """Ensure Chaos Mesh is installed and ready for chaos tests.
+
+    This follows the documented Helm installation path, using:
+
+    - sudo snap install helm --classic
+    - helm repo add chaos-mesh https://charts.chaos-mesh.org
+    - helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh
+    """
+    import subprocess
+
+    def _has_chaos_mesh() -> bool:
+        try:
+            result = subprocess.run(
+                _kubectl_command(["get", "pods", "-n", "chaos-mesh"]),
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+        except subprocess.CalledProcessError:
+            return False
+        return "chaos-mesh" in result.stdout or "controller-manager" in result.stdout
+
+    if _has_chaos_mesh():
+        logger.info("Chaos Mesh already present in namespace 'chaos-mesh'.")
+        return
+
+    logger.info("Chaos Mesh not detected; attempting to install via Helm...")
+
+    try:
+        subprocess.run(
+            ["helm", "version"],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+    except subprocess.CalledProcessError:
+        logger.info("Helm not found or not working; installing helm snap...")
+        subprocess.run(
+            ["sudo", "snap", "install", "helm", "--classic"],
+            check=True,
+            text=True,
+        )
+
+    subprocess.run(
+        ["helm", "repo", "add", "chaos-mesh", "https://charts.chaos-mesh.org"],
+        check=False,
+        text=True,
+    )
+    subprocess.run(
+        ["helm", "repo", "update"],
+        check=False,
+        text=True,
+    )
+
+    subprocess.run(
+        [
+            "helm",
+            "upgrade",
+            "--install",
+            "chaos-mesh",
+            "chaos-mesh/chaos-mesh",
+            "--namespace",
+            "chaos-mesh",
+            "--create-namespace",
+        ],
+        check=True,
+        text=True,
+    )
+
+    if not _has_chaos_mesh():
+        raise RuntimeError(
+            "Chaos Mesh could not be verified as running in 'chaos-mesh' "
+            "namespace after attempted installation. "
+            "Please check Juju/Helm/kubectl connectivity."
+        )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def ensure_validation_enabled_once(
+    sunbeam_client,  # type: ignore[reportUnusedFunction]
+) -> None:
+    """Enable the validation feature once for all chaos tests.
+
+    Chaos scenarios assume that the validation feature is enabled and they
+    merely start ``sunbeam validation run smoke`` during fault injection.
+    """
+    import subprocess
+
+    logger.info("Ensuring 'validation' feature is enabled for chaos tests...")
+    try:
+        sunbeam_client.enable_feature("validation")
+    except (
+        subprocess.CalledProcessError
+    ) as exc:  # pragma: no cover - environment-specific
+        logger.warning(
+            "Validation feature could not be enabled; chaos tests may fail: %s",
+            exc,
+        )
diff --git a/sunbeam-python/tests/functional/chaos/utils.py b/sunbeam-python/tests/functional/chaos/utils.py
index ce12212ac..197ea8217 100644
--- a/sunbeam-python/tests/functional/chaos/utils.py
+++ b/sunbeam-python/tests/functional/chaos/utils.py
@@ -17,10 +17,9 @@
 import logging
 import subprocess
 import time
-from typing import List, Tuple
+from typing import List, Sequence, Tuple
 
 import jubilant
-import pytest
 
 logger = logging.getLogger(__name__)
 
@@ -44,7 +43,7 @@ def get_leader_and_non_leaders(
             non_leaders.append(unit_name)
 
     if leader_unit is None:
-        pytest.skip(
+        raise AssertionError(
             f"No leader unit found for application '{app_name}' in Juju status."
         )
 
@@ -111,6 +110,34 @@ def app_has_error(status: jubilant.Status, app_name: str) -> bool:
     return jubilant.any_error(status, app_name)
 
 
+def assert_apps_healthy(juju_client, app_names: List[str]) -> None:
+    """Assert that the given applications have no units in error.
+
+    If none of the applications are present in the model, this function logs
+    a warning and returns without failing the test. This allows the same test
+    suite to run against deployments that may not include all optional apps.
+    """
+    status: jubilant.Status = juju_client.juju.status()
+    present_apps = [name for name in app_names if name in status.apps]
+
+    if not present_apps:
+        logger.warning(
+            "None of the apps %s found in Juju model; "
+            "skipping health assertion for them.",
+            app_names,
+        )
+        return
+
+    for app_name in present_apps:
+        if jubilant.any_error(status, app_name):
+            raise AssertionError(
+                f"Application '{app_name}' has units in error state during chaos."
+            )
+        logger.info(
+            "Application '%s' is healthy during chaos (no units in error).", app_name
+        )
+
+
 def unit_name_to_pod_name(unit_name: str) -> str:
     """Map a Juju unit name (e.g. 'keystone/1') to a pod name (e.g. 'keystone-1').
 
@@ -124,6 +151,28 @@ def pod_chaos_name_for_pod(app_name: str, pod_name: str) -> str:
     return f"{app_name}-{pod_name}-pod-kill"
 
 
+def _kubectl_command(args: List[str]) -> List[str]:
+    """Build a kubectl command suitable for the environment.
+
+    ``juju exec --unit <unit> -m <model> -- sudo k8s kubectl ...``
+    """
+    k8s_unit = "k8s/0"
+    k8s_model = "openstack-machines"
+    return [
+        "juju",
+        "exec",
+        "--unit",
+        k8s_unit,
+        "-m",
+        k8s_model,
+        "--",
+        "sudo",
+        "k8s",
+        "kubectl",
+        *args,
+    ]
+
+
 def apply_pod_chaos_for_pod(
     app_namespace: str,
     pod_name: str,
@@ -159,7 +208,7 @@ def apply_pod_chaos_for_pod(
         chaos_name,
     )
     subprocess.run(
-        ["kubectl", "apply", "-f", "-"],
+        _kubectl_command(["apply", "-f", "-"]),
         input=manifest,
         check=True,
         capture_output=True,
@@ -174,16 +223,111 @@ def delete_pod_chaos(chaos_name: str, chaos_namespace: str = "chaos-mesh") -> No
         "Deleting PodChaos resource: %s (namespace: %s)", chaos_name, chaos_namespace
     )
     subprocess.run(
-        [
-            "kubectl",
-            "delete",
-            "podchaos",
-            chaos_name,
-            "-n",
-            chaos_namespace,
-            "--ignore-not-found=true",
-        ],
+        _kubectl_command(
+            [
+                "delete",
+                "podchaos",
+                chaos_name,
+                "-n",
+                chaos_namespace,
+                "--ignore-not-found=true",
+            ]
+        ),
         check=False,
         capture_output=True,
         text=True,
     )
+
+
+def run_validation_with_pod_chaos(
+    juju_client,
+    targets: Sequence[tuple[str, List[str]]],
+    *,
+    suite_name: str,
+    openstack_namespace: str = "openstack",
+    chaos_namespace: str = "chaos-mesh",
+    validation_timeout: int = 3600,
+) -> None:
+    """Run 'sunbeam validation run smoke' while injecting PodChaos for targets.
+
+    Each entry in ``targets`` is (application_name, dependent_applications).
+    For each target application, all non-leader units are killed one by one
+    using PodChaos, and we wait for them to return to active status while
+    asserting that dependent applications remain healthy.
+    """
+    logger.info(
+        "Starting 'sunbeam validation run smoke' for %s chaos suite...",
+        suite_name,
+    )
+    validation_proc = subprocess.Popen(
+        ["sunbeam", "validation", "run", "smoke"],
+        text=True,
+    )
+
+    chaos_resources: List[str] = []
+    try:
+        for app_name, dependent_apps in targets:
+            leader_unit, non_leader_units = get_leader_and_non_leaders(
+                juju_client,
+                app_name,
+            )
+
+            if not non_leader_units:
+                logger.info(
+                    "Application '%s' has no non-leader units; skipping chaos.",
+                    app_name,
+                )
+                continue
+
+            logger.info(
+                "%s leader unit: %s; non-leaders: %s",
+                app_name,
+                leader_unit,
+                non_leader_units,
+            )
+
+            for unit_name in non_leader_units:
+                pod_name = unit_name_to_pod_name(unit_name)
+                chaos_name = apply_pod_chaos_for_pod(
+                    openstack_namespace,
+                    pod_name,
+                    chaos_namespace=chaos_namespace,
+                    duration="30s",
+                )
+                chaos_resources.append(chaos_name)
+
+                wait_for_unit_active(
+                    juju_client,
+                    app_name,
+                    unit_name,
+                    timeout=600,
+                )
+
+                if dependent_apps:
+                    assert_apps_healthy(juju_client, dependent_apps)
+
+        logger.info(
+            "Waiting for validation smoke run to complete after %s chaos suite...",
+            suite_name,
+        )
+        try:
+            return_code = validation_proc.wait(timeout=validation_timeout)
+        except subprocess.TimeoutExpired:
+            validation_proc.kill()
+            raise AssertionError(
+                "sunbeam validation run smoke did not complete within the timeout."
+            )
+
+        assert return_code == 0, (
+            "sunbeam validation run smoke failed with exit code "
+            f"{return_code} during {suite_name} chaos suite."
+        )
+    finally:
+        for chaos_name in chaos_resources:
+            try:
+                delete_pod_chaos(chaos_name, chaos_namespace=chaos_namespace)
+            except Exception as exc:  # noqa: BLE001
+                logger.warning("Failed to clean up PodChaos %s: %s", chaos_name, exc)
+
+        if validation_proc.poll() is None:
+            validation_proc.terminate()
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py
new file mode 100644
index 000000000..70e698216
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py
@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: 2026 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Generic API control-plane pod loss chaos tests."""
+
+from __future__ import annotations
+
+import logging
+
+import pytest
+
+from tests.functional.chaos.utils import run_validation_with_pod_chaos
+
+logger = logging.getLogger(__name__)
+
+
+API_TARGETS: list[tuple[str, list[str]]] = [
+    ("nova", ["keystone", "traefik-public", "traefik-internal"]),
+    ("neutron", ["keystone", "traefik-public", "traefik-internal"]),
+    ("glance", ["keystone", "traefik-public", "traefik-internal"]),
+    ("cinder-k8s", ["keystone", "traefik-public", "traefik-internal"]),
+    ("placement", ["keystone", "traefik-public", "traefik-internal"]),
+    ("aodh", ["keystone", "traefik-public", "traefik-internal"]),
+    ("ceilometer", ["keystone", "traefik-public", "traefik-internal"]),
+    ("gnocchi", ["keystone", "traefik-public", "traefik-internal"]),
+    ("masakari", ["keystone", "traefik-public", "traefik-internal"]),
+    ("watcher", ["keystone", "traefik-public", "traefik-internal"]),
+    ("horizon", ["keystone", "traefik-public", "traefik-internal"]),
+]
+
+
+@pytest.mark.functional
+def test_validation_resilient_to_non_leader_api_pod_kills(
+    sunbeam_client,
+    juju_client,
+) -> None:
+    """Validation 'smoke' profile should tolerate non-leader API pod kills."""
+    run_validation_with_pod_chaos(
+        juju_client,
+        targets=API_TARGETS,
+        suite_name="API pod",
+    )
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py
new file mode 100644
index 000000000..13dcae539
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py
@@ -0,0 +1,48 @@
+# SPDX-FileCopyrightText: 2026 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Chaos tests for database access-path degradation (mysql-router pods)."""
+
+from __future__ import annotations
+
+import logging
+
+import pytest
+
+from tests.functional.chaos.utils import run_validation_with_pod_chaos
+
+logger = logging.getLogger(__name__)
+
+DEPENDENT_APPS = ["keystone", "traefik-public", "traefik-internal"]
+
+
+ROUTER_APPS: list[str] = [
+    "nova-api-mysql-router",
+    "nova-cell-mysql-router",
+    "nova-mysql-router",
+    "cinder-mysql-router",
+    "cinder-volume-mysql-router",
+    "neutron-mysql-router",
+    "keystone-mysql-router",
+    "glance-mysql-router",
+    "placement-mysql-router",
+    "aodh-mysql-router",
+    "gnocchi-mysql-router",
+    "masakari-mysql-router",
+    "watcher-mysql-router",
+    "horizon-mysql-router",
+]
+
+
+@pytest.mark.functional
+def test_validation_resilient_to_mysql_router_pod_kills(
+    sunbeam_client,
+    juju_client,
+) -> None:
+    """Validation 'smoke' profile should tolerate mysql-router pod kills."""
+    targets = [(app, DEPENDENT_APPS) for app in ROUTER_APPS]
+    run_validation_with_pod_chaos(
+        juju_client,
+        targets=targets,
+        suite_name="mysql-router",
+    )
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py
new file mode 100644
index 000000000..78327d3ed
--- /dev/null
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: 2026 - Canonical Ltd
+# SPDX-License-Identifier: Apache-2.0
+
+"""Chaos tests for core infrastructure services (MySQL, RabbitMQ, Traefik)."""
+
+from __future__ import annotations
+
+import logging
+
+import pytest
+
+from tests.functional.chaos.utils import run_validation_with_pod_chaos
+
+logger = logging.getLogger(__name__)
+
+
+INFRA_TARGETS: list[tuple[str, list[str]]] = [
+    ("mysql", ["keystone", "traefik-public", "traefik-internal"]),
+    ("rabbitmq", ["keystone", "traefik-public", "traefik-internal"]),
+    ("traefik-public", ["keystone"]),
+    ("traefik", ["keystone"]),
+    ("traefik-rgw", ["keystone"]),
+]
+
+
+@pytest.mark.functional
+def test_validation_resilient_to_infra_pod_kills(
+    sunbeam_client,
+    juju_client,
+) -> None:
+    """Validation 'smoke' profile should tolerate infra pod/unit loss."""
+    run_validation_with_pod_chaos(
+        juju_client,
+        targets=INFRA_TARGETS,
+        suite_name="infra",
+    )
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py
index 2b0d9c975..13b3b42e3 100644
--- a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py
@@ -2,32 +2,20 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-"""Chaos Mesh tests for the validation feature.
-
-These tests exercise the validation feature (``sunbeam validation run``) while
-Chaos Mesh injects failures into **non-leader** Keystone pods, to assess how
-well validation behaves under control plane disruption.
-"""
+"""Keystone-specific chaos tests for the validation feature."""
 
 import logging
-import subprocess
-from typing import List
 
 import pytest
 
 from tests.functional.chaos.utils import (
-    apply_pod_chaos_for_pod,
-    delete_pod_chaos,
-    get_leader_and_non_leaders,
-    unit_name_to_pod_name,
-    wait_for_unit_active,
+    run_validation_with_pod_chaos,
 )
 
 logger = logging.getLogger(__name__)
 
-OPENSTACK_NAMESPACE = "openstack"
-CHAOS_NAMESPACE = "chaos-mesh"
 KEYSTONE_APP = "keystone"
+TRAEFIK_APPS = ["traefik-public", "traefik-internal"]
 
 
 @pytest.mark.functional
@@ -51,63 +39,8 @@ def test_validation_resilient_to_non_leader_keystone_pod_kills(
     The expectation is that the validation smoke run completes successfully
     despite transient failures of non-leader Keystone pods.
     """
-    sunbeam_client.enable_feature("validation")
-    leader_unit, non_leader_units = get_leader_and_non_leaders(
+    run_validation_with_pod_chaos(
         juju_client,
-        KEYSTONE_APP,
-    )
-    logger.info(
-        "Keystone leader unit: %s; non-leaders: %s",
-        leader_unit,
-        non_leader_units,
+        targets=[(KEYSTONE_APP, TRAEFIK_APPS)],
+        suite_name="Keystone API",
     )
-
-    # Start validation smoke tests in the background.
-    logger.info("Starting 'sunbeam validation run smoke'...")
-    validation_proc = subprocess.Popen(
-        ["sunbeam", "validation", "run", "smoke"],
-        text=True,
-    )
-
-    chaos_resources: List[str] = []
-    try:
-        for unit_name in non_leader_units:
-            pod_name = unit_name_to_pod_name(unit_name)
-            chaos_name = apply_pod_chaos_for_pod(
-                OPENSTACK_NAMESPACE,
-                pod_name,
-                chaos_namespace=CHAOS_NAMESPACE,
-                duration="30s",
-            )
-            chaos_resources.append(chaos_name)
-
-            # Wait for the affected unit to become active again.
-            wait_for_unit_active(
-                juju_client,
-                KEYSTONE_APP,
-                unit_name,
-                timeout=600,
-            )
-
-        # After injecting chaos to all non-leaders, wait for validation to finish.
-        logger.info("Waiting for validation smoke run to complete...")
-        try:
-            return_code = validation_proc.wait(timeout=3600)
-        except subprocess.TimeoutExpired:
-            validation_proc.kill()
-            raise AssertionError(
-                "sunbeam validation run smoke did not complete within the timeout."
-            )
-
-        assert return_code == 0, (
-            f"sunbeam validation run smoke failed with exit code {return_code}"
-        )
-    finally:
-        for chaos_name in chaos_resources:
-            try:
-                delete_pod_chaos(chaos_name, chaos_namespace=CHAOS_NAMESPACE)
-            except Exception as exc:  # noqa: BLE001
-                logger.warning("Failed to clean up PodChaos %s: %s", chaos_name, exc)
-
-        if validation_proc.poll() is None:
-            validation_proc.terminate()

From 750aa14758f678abc1d3c0eca6afa88d83e95df7 Mon Sep 17 00:00:00 2001
From: Ahmad Hassan <ahmad.hassan@canonical.com>
Date: Tue, 17 Feb 2026 13:51:15 +0500
Subject: [PATCH 5/5] Improved reporting metrics, Updated Readme

---
 .../tests/functional/chaos/README.md          |  49 ++-
 .../tests/functional/chaos/conftest.py        |  89 +++--
 .../tests/functional/chaos/utils.py           | 339 +++++++++++++++---
 .../test_validation_api_pod_chaos.py          |  31 +-
 .../test_validation_db_router_chaos.py        |  18 +-
 .../validation/test_validation_infra_chaos.py |  19 +-
 .../test_validation_keystone_chaos.py         |  28 +-
 7 files changed, 444 insertions(+), 129 deletions(-)

diff --git a/sunbeam-python/tests/functional/chaos/README.md b/sunbeam-python/tests/functional/chaos/README.md
index 94457816e..df84d69e1 100644
--- a/sunbeam-python/tests/functional/chaos/README.md
+++ b/sunbeam-python/tests/functional/chaos/README.md
@@ -2,27 +2,46 @@
 
 Chaos Mesh-based resilience tests for Canonical OpenStack features.
 
-This directory is intentionally separate from the standard feature functional
-tests under `tests/functional/feature` so that chaos experiments can be run
-independently and expanded over time.
+This directory is separate from the feature functional tests under
+`tests/functional/feature` so that chaos experiments can be run independently
+and expanded over time.
 
 ## Prerequisites
 
-- A working Canonical OpenStack deployment (same requirements as the feature
-  functional tests).
+- A working Canonical OpenStack deployment (same as feature functional tests).
 - `sunbeam`, `openstack` and `juju` CLIs configured for that deployment.
-- `kubectl` configured to talk to the Kubernetes cluster that backs the
-  OpenStack model.
-- Chaos Mesh installed and running, typically in the `chaos-mesh` namespace.
 
-## Layout
+Session-scoped fixtures automatically:
+
+- Enable the **validation** feature once per run.
+- Install or verify Chaos Mesh (Helm and `kubectl` are run via `juju exec` on
+  `k8s/0` in the `openstack-machines` model).
+
+## Run outcome and reports
+
+Each chaos test run is **SUCCESS** or **FAIL**:
+
+- **FAIL** if any unit does not return to `active` within the recovery timeout,
+  or if the post-chaos **quick** validation test fails.
+- **SUCCESS** only when all targeted units recover to `active` and the quick
+  test passes.
 
-- `validation/`: Chaos tests that target the **validation** feature.
+A JSON report is written to `tests/functional/chaos/reports/` for each run.
+Filenames include the outcome and a timestamp:
+
+- `SUCCESS_<test_name>_<YYYY-MM-DD_HH-MM-SS>.json`
+- `FAIL_<test_name>_<YYYY-MM-DD_HH-MM-SS>.json`
+
+Reports include test duration, smoke test output/status, per-unit recovery
+times and state sequences, and quick test output/status.
+
+## Layout
 
-Additional feature-specific chaos tests can be added as new subdirectories
-alongside `validation/`.
+- `validation/`: Chaos tests for the **validation** feature (Keystone, API pods,
+  DB routers, infra).
+- `reports/`: JSON reports from each run.
 
-## Running the chaos tests
+## Running the tests
 
 From the `sunbeam-python` tree:
 
@@ -30,9 +49,9 @@ From the `sunbeam-python` tree:
 tox -e functional-chaos
 ```
 
-You can also run individual chaos tests via `pytest`, for example:
+Or run a single test:
 
 ```bash
-python -m pytest -s -vv tests/functional/chaos/validation/test_validation_keystone_chaos.py
+python -m pytest -s -vv tests/functional/chaos/validation/test_validation_keystone_chaos.py --config tests/functional/feature/test_config.yaml
 ```
 
diff --git a/sunbeam-python/tests/functional/chaos/conftest.py b/sunbeam-python/tests/functional/chaos/conftest.py
index 52f2e67c0..f5aa33bc6 100644
--- a/sunbeam-python/tests/functional/chaos/conftest.py
+++ b/sunbeam-python/tests/functional/chaos/conftest.py
@@ -1,14 +1,20 @@
 # SPDX-FileCopyrightText: 2026 - Canonical Ltd
 # SPDX-License-Identifier: Apache-2.0
+# ruff: noqa: I001
 
 """Functional fixtures and config hooks for chaos tests."""
 
 import logging
+import subprocess
 
 import pytest
 
-from tests.functional.chaos.utils import _kubectl_command
-from tests.functional.feature import conftest as feature_conftest  # noqa: F401
+from tests.functional.chaos.utils import _helm_command, _kubectl_command
+from tests.functional.feature.conftest import (  # noqa: F401
+    juju_client,
+    sunbeam_client as _feature_sunbeam_client,
+    test_config,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -23,11 +29,10 @@ def ensure_chaos_mesh_installed() -> None:
     - helm repo add chaos-mesh https://charts.chaos-mesh.org
     - helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh
     """
-    import subprocess
 
     def _has_chaos_mesh() -> bool:
         try:
-            result = subprocess.run(
+            subprocess.run(
                 _kubectl_command(["get", "pods", "-n", "chaos-mesh"]),
                 check=True,
                 capture_output=True,
@@ -35,7 +40,8 @@ def _has_chaos_mesh() -> bool:
             )
         except subprocess.CalledProcessError:
             return False
-        return "chaos-mesh" in result.stdout or "controller-manager" in result.stdout
+
+        return True
 
     if _has_chaos_mesh():
         logger.info("Chaos Mesh already present in namespace 'chaos-mesh'.")
@@ -45,64 +51,93 @@ def _has_chaos_mesh() -> bool:
 
     try:
         subprocess.run(
-            ["helm", "version"],
+            _helm_command(["version"]),
             check=True,
             capture_output=True,
             text=True,
         )
     except subprocess.CalledProcessError:
-        logger.info("Helm not found or not working; installing helm snap...")
+        logger.info(
+            "Helm not found or not working in k8s/0@openstack-machines; "
+            "attempting to install helm snap in that unit...",
+        )
         subprocess.run(
-            ["sudo", "snap", "install", "helm", "--classic"],
+            [
+                "juju",
+                "exec",
+                "--unit",
+                "k8s/0",
+                "-m",
+                "openstack-machines",
+                "--",
+                "sudo",
+                "snap",
+                "install",
+                "helm",
+                "--classic",
+            ],
             check=True,
             text=True,
         )
+        # Re-check helm availability; if this fails, surface a clear error.
+        try:
+            subprocess.run(
+                _helm_command(["version"]),
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+        except subprocess.CalledProcessError as exc:  # pragma: no cover
+            msg = (
+                "Helm is still not available in k8s/0@openstack-machines after "
+                "attempted snap installation. Please log into that unit and "
+                "ensure 'helm' is installed and configured."
+            )
+            raise RuntimeError(msg) from exc
 
     subprocess.run(
-        ["helm", "repo", "add", "chaos-mesh", "https://charts.chaos-mesh.org"],
+        _helm_command(["repo", "add", "chaos-mesh", "https://charts.chaos-mesh.org"]),
         check=False,
         text=True,
     )
     subprocess.run(
-        ["helm", "repo", "update"],
+        _helm_command(["repo", "update"]),
         check=False,
         text=True,
     )
 
     subprocess.run(
-        [
-            "helm",
-            "upgrade",
-            "--install",
-            "chaos-mesh",
-            "chaos-mesh/chaos-mesh",
-            "--namespace",
-            "chaos-mesh",
-            "--create-namespace",
-        ],
+        _helm_command(
+            [
+                "upgrade",
+                "--install",
+                "chaos-mesh",
+                "chaos-mesh/chaos-mesh",
+                "--namespace",
+                "chaos-mesh",
+                "--create-namespace",
+            ],
+        ),
         check=True,
         text=True,
     )
 
     if not _has_chaos_mesh():
-        raise RuntimeError(
+        logger.warning(
             "Chaos Mesh could not be verified as running in 'chaos-mesh' "
             "namespace after attempted installation. "
-            "Please check Juju/Helm/kubectl connectivity."
+            "Continuing anyway; PodChaos operations may fail if Chaos Mesh "
+            "is not fully ready.",
         )
 
 
 @pytest.fixture(scope="session", autouse=True)
-def ensure_validation_enabled_once(
-    sunbeam_client,  # type: ignore[reportUnusedFunction]
-) -> None:
+def ensure_validation_enabled_once(sunbeam_client) -> None:
     """Enable the validation feature once for all chaos tests.
 
     Chaos scenarios assume that the validation feature is enabled and they
     merely start ``sunbeam validation run smoke`` during fault injection.
     """
-    import subprocess
-
     logger.info("Ensuring 'validation' feature is enabled for chaos tests...")
     try:
         sunbeam_client.enable_feature("validation")
diff --git a/sunbeam-python/tests/functional/chaos/utils.py b/sunbeam-python/tests/functional/chaos/utils.py
index 197ea8217..3abf54f1a 100644
--- a/sunbeam-python/tests/functional/chaos/utils.py
+++ b/sunbeam-python/tests/functional/chaos/utils.py
@@ -14,9 +14,13 @@
 
 from __future__ import annotations
 
+import base64
+import json
 import logging
 import subprocess
 import time
+from datetime import datetime, timezone
+from pathlib import Path
 from typing import List, Sequence, Tuple
 
 import jubilant
@@ -32,7 +36,14 @@ def get_leader_and_non_leaders(
     logger.info("Querying Juju status for application '%s' units...", app_name)
 
     status: jubilant.Status = juju_client.juju.status()
-    app = status.apps[app_name]
+    try:
+        app = status.apps[app_name]
+    except KeyError as exc:
+        available_apps = ", ".join(sorted(status.apps.keys()))
+        raise RuntimeError(
+            f"Application '{app_name}' not found in Juju status. "
+            f"Available applications: {available_apps}"
+        ) from exc
 
     leader_unit: str | None = None
     non_leaders: List[str] = []
@@ -72,8 +83,8 @@ def wait_for_unit_active(
         juju_client.juju.wait(
             lambda status: is_unit_active(status, app_name, unit_name),
             error=lambda status: app_has_error(status, app_name),
-            _timeout=timeout,
-            _delay=5.0,
+            timeout=timeout,
+            delay=5.0,
         )
     except jubilant.WaitError as exc:
         raise AssertionError(
@@ -91,18 +102,96 @@ def wait_for_unit_active(
     return elapsed
 
 
-def is_unit_active(
+def run_validation_command(
+    cmd: List[str],
+    timeout: int = 600,
+) -> Tuple[float, str, bool]:
+    """Run a validation command (e.g. sunbeam validation run quick).
+
+    Returns (duration_seconds, output, success).
+    """
+    start = time.time()
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
+    duration = time.time() - start
+    output = (result.stdout or "") + (result.stderr or "")
+    return (round(duration, 1), output, result.returncode == 0)
+
+
+def wait_for_unit_active_with_tracking(
+    juju_client,
+    app_name: str,
+    unit_name: str,
+    timeout: int = 600,
+    poll_interval: int = 10,
+) -> Tuple[float | None, List[dict]]:
+    """Poll until unit is active or timeout.
+
+    Returns time_to_return_active_seconds (or None) and state_sequence.
+
+    state_sequence: list of {timestamp_iso, state, message} when not active.
+    """
+    state_sequence: List[dict] = []
+    poll_start = time.time()
+    left_active_at: float | None = None
+
+    while (time.time() - poll_start) < timeout:
+        status = juju_client.juju.status()
+        current, message = get_unit_workload_status(status, app_name, unit_name)
+        now = time.time()
+        ts_iso = datetime.fromtimestamp(now, tz=timezone.utc).isoformat()
+
+        if current != "active":
+            if left_active_at is None:
+                left_active_at = now
+            state_sequence.append(
+                {
+                    "timestamp": ts_iso,
+                    "state": current,
+                    "message": message,
+                }
+            )
+        else:
+            if left_active_at is not None:
+                return (round(now - left_active_at, 1), state_sequence)
+            return (0.0, state_sequence)
+
+        time.sleep(poll_interval)
+
+    return (None, state_sequence)
+
+
+def get_unit_workload_status(
     status: jubilant.Status,
     app_name: str,
     unit_name: str,
-) -> bool:
-    """Return True if the given unit's workload status is 'active'."""
+) -> Tuple[str, str]:
+    """Return (workload_status.current, workload_status.message) for the unit.
+
+    Returns ("unknown", "") if the unit or workload_status is missing.
+    """
     units = status.get_units(app_name)
     unit = units.get(unit_name)
     if not unit:
-        return False
-    workload = getattr(getattr(unit, "workload_status", None), "current", None)
-    return workload == "active"
+        return ("unknown", "")
+    workload = getattr(unit, "workload_status", None)
+    current = getattr(workload, "current", None) or "unknown"
+    message = getattr(workload, "message", None) or ""
+    return (str(current), str(message))
+
+
+def is_unit_active(
+    status: jubilant.Status,
+    app_name: str,
+    unit_name: str,
+) -> bool:
+    """Return True if the given unit's workload status is 'active'."""
+    current, _ = get_unit_workload_status(status, app_name, unit_name)
+    return current == "active"
 
 
 def app_has_error(status: jubilant.Status, app_name: str) -> bool:
@@ -110,6 +199,23 @@ def app_has_error(status: jubilant.Status, app_name: str) -> bool:
     return jubilant.any_error(status, app_name)
 
 
+def get_status_json_for_apps(juju_client, app_names: List[str]) -> dict:
+    """Return juju status as a dict restricted to the given application names.
+
+    Runs ``juju status --format json`` and returns a structure with only
+    the requested applications (for use in report snapshots).
+    """
+    try:
+        raw = juju_client.juju.cli("status", "--format", "json")
+        full = json.loads(raw) if isinstance(raw, str) else json.loads(raw.decode())
+    except (json.JSONDecodeError, TypeError) as exc:
+        logger.warning("Could not get juju status JSON: %s", exc)
+        return {"_error": str(exc)}
+
+    apps = full.get("applications") or {}
+    return {"applications": {name: apps[name] for name in app_names if name in apps}}
+
+
 def assert_apps_healthy(juju_client, app_names: List[str]) -> None:
     """Assert that the given applications have no units in error.
 
@@ -154,7 +260,7 @@ def pod_chaos_name_for_pod(app_name: str, pod_name: str) -> str:
 def _kubectl_command(args: List[str]) -> List[str]:
     """Build a kubectl command suitable for the environment.
 
-    ``juju exec --unit <unit> -m <model> -- sudo k8s kubectl ...``
+    ``juju exec --unit <unit> -m <model> --stdin -- sudo k8s kubectl ...``
     """
     k8s_unit = "k8s/0"
     k8s_model = "openstack-machines"
@@ -173,6 +279,27 @@ def _kubectl_command(args: List[str]) -> List[str]:
     ]
 
 
+def _helm_command(args: List[str]) -> List[str]:
+    """Build a helm command targeting the Sunbeam K8s cluster.
+
+    ``juju exec --unit <unit> -m <model> -- sudo helm ...``
+    """
+    k8s_unit = "k8s/0"
+    k8s_model = "openstack-machines"
+    return [
+        "juju",
+        "exec",
+        "--unit",
+        k8s_unit,
+        "-m",
+        k8s_model,
+        "--",
+        "sudo",
+        "helm",
+        *args,
+    ]
+
+
 def apply_pod_chaos_for_pod(
     app_namespace: str,
     pod_name: str,
@@ -207,13 +334,40 @@ def apply_pod_chaos_for_pod(
         app_namespace,
         chaos_name,
     )
-    subprocess.run(
-        _kubectl_command(["apply", "-f", "-"]),
-        input=manifest,
-        check=True,
+
+    manifest_b64 = base64.b64encode(manifest.encode("utf-8")).decode("ascii")
+    cmd = [
+        "juju",
+        "exec",
+        "--unit",
+        "k8s/0",
+        "-m",
+        "openstack-machines",
+        "--",
+        "bash",
+        "-c",
+        'echo "$1" | base64 -d | sudo k8s kubectl apply -f -',
+        "_",
+        manifest_b64,
+    ]
+    result = subprocess.run(
+        cmd,
+        check=False,
         capture_output=True,
         text=True,
     )
+    if result.returncode != 0:
+        logger.error(
+            "Failed to apply PodChaos %s (exit code %s).\nstdout:\n%s\nstderr:\n%s",
+            chaos_name,
+            result.returncode,
+            result.stdout,
+            result.stderr,
+        )
+        raise RuntimeError(
+            f"kubectl apply for PodChaos '{chaos_name}' failed with exit code "
+            f"{result.returncode}: {result.stderr.strip()}"
+        )
     return chaos_name
 
 
@@ -239,21 +393,35 @@ def delete_pod_chaos(chaos_name: str, chaos_namespace: str = "chaos-mesh") -> No
     )
 
 
-def run_validation_with_pod_chaos(
+def _write_chaos_json_report(report_name: str, data: dict) -> Path:
+    """Write a single JSON report file; name includes timestamp. Returns path."""
+    reports_dir = Path(__file__).parent / "reports"
+    reports_dir.mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d_%H-%M-%S")
+    path = reports_dir / f"{report_name}_{timestamp}.json"
+    with path.open("w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2, sort_keys=True)
+    return path
+
+
+def run_validation_with_pod_chaos(  # noqa: C901
     juju_client,
     targets: Sequence[tuple[str, List[str]]],
     *,
     suite_name: str,
+    report_name: str | None = None,
     openstack_namespace: str = "openstack",
     chaos_namespace: str = "chaos-mesh",
     validation_timeout: int = 3600,
+    initial_delay: int = 60,
+    recovery_timeout: int = 600,
+    poll_interval: int = 10,
+    quick_test_timeout: int = 600,
 ) -> None:
-    """Run 'sunbeam validation run smoke' while injecting PodChaos for targets.
+    """Run validation with PodChaos and optional JSON reporting.
 
-    Each entry in ``targets`` is (application_name, dependent_applications).
-    For each target application, all non-leader units are killed one by one
-    using PodChaos, and we wait for them to return to active status while
-    asserting that dependent applications remain healthy.
+    Smoke runs in parallel with chaos; quick run and JSON report
+    are executed after chaos when report_name is provided.
     """
     logger.info(
         "Starting 'sunbeam validation run smoke' for %s chaos suite...",
@@ -262,23 +430,35 @@ def run_validation_with_pod_chaos(
     validation_proc = subprocess.Popen(
         ["sunbeam", "validation", "run", "smoke"],
         text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
     )
 
+    run_start_monotonic = time.time()
+    failed_recoveries: List[dict] = []
+    apps_in_error: List[dict] = []
+    recovery_per_unit: List[dict] = []
+    validation_return_code: int | None = None
+    error_summary: str | None = None
+    validation_output: str | None = None
+    smoke_duration: float | None = None
+
     chaos_resources: List[str] = []
+
     try:
+        if initial_delay > 0:
+            logger.info(
+                "Sleeping %s seconds before starting PodChaos injections to allow "
+                "Tempest discover-tempest-config/bootstrap to complete.",
+                initial_delay,
+            )
+            time.sleep(initial_delay)
+
         for app_name, dependent_apps in targets:
             leader_unit, non_leader_units = get_leader_and_non_leaders(
                 juju_client,
                 app_name,
             )
-
-            if not non_leader_units:
-                logger.info(
-                    "Application '%s' has no non-leader units; skipping chaos.",
-                    app_name,
-                )
-                continue
-
             logger.info(
                 "%s leader unit: %s; non-leaders: %s",
                 app_name,
@@ -296,13 +476,40 @@ def run_validation_with_pod_chaos(
                 )
                 chaos_resources.append(chaos_name)
 
-                wait_for_unit_active(
-                    juju_client,
-                    app_name,
-                    unit_name,
-                    timeout=600,
+                time_to_return_active_seconds, state_sequence = (
+                    wait_for_unit_active_with_tracking(
+                        juju_client,
+                        app_name,
+                        unit_name,
+                        timeout=recovery_timeout,
+                        poll_interval=poll_interval,
+                    )
                 )
-
+                recovery_per_unit.append(
+                    {
+                        "app": app_name,
+                        "unit": unit_name,
+                        "time_to_return_active_seconds": time_to_return_active_seconds,
+                    }
+                )
+                if state_sequence:
+                    apps_in_error.append(
+                        {
+                            "app": app_name,
+                            "unit": unit_name,
+                            "state_sequence": state_sequence,
+                        }
+                    )
+                if time_to_return_active_seconds is None:
+                    failed_recoveries.append(
+                        {
+                            "app": app_name,
+                            "unit": unit_name,
+                            "pod": pod_name,
+                            "error": "timeout",
+                        }
+                    )
+                    break
                 if dependent_apps:
                     assert_apps_healthy(juju_client, dependent_apps)
 
@@ -311,17 +518,69 @@ def run_validation_with_pod_chaos(
             suite_name,
         )
         try:
-            return_code = validation_proc.wait(timeout=validation_timeout)
+            stdout_data, _ = validation_proc.communicate(timeout=validation_timeout)
+            validation_output = stdout_data or ""
+            validation_return_code = validation_proc.returncode
+            smoke_duration = time.time() - run_start_monotonic
         except subprocess.TimeoutExpired:
             validation_proc.kill()
-            raise AssertionError(
+            stdout_data, _ = validation_proc.communicate()
+            validation_output = stdout_data or ""
+            smoke_duration = time.time() - run_start_monotonic
+            validation_return_code = None
+            error_summary = (
                 "sunbeam validation run smoke did not complete within the timeout."
             )
 
-        assert return_code == 0, (
-            "sunbeam validation run smoke failed with exit code "
-            f"{return_code} during {suite_name} chaos suite."
-        )
+        if report_name:
+            quick_duration, quick_output, quick_success = run_validation_command(
+                ["sunbeam", "validation", "run", "quick"],
+                timeout=quick_test_timeout,
+            )
+            test_duration = time.time() - run_start_monotonic
+            final_status = "SUCCESS"
+            if failed_recoveries or not quick_success:
+                final_status = "FAIL"
+            report_data = {
+                "status": final_status,
+                "test_duration_seconds": round(test_duration, 1),
+                "smoke_test": {
+                    "duration_seconds": round(smoke_duration or 0, 1),
+                    "output": (validation_output or "")[:10000],
+                    "success": validation_return_code == 0,
+                },
+                "apps_in_error": apps_in_error,
+                "recovery_per_unit": recovery_per_unit,
+                "quick_test": {
+                    "duration_seconds": quick_duration,
+                    "output": (quick_output or "")[:10000],
+                    "success": quick_success,
+                },
+            }
+            report_path = _write_chaos_json_report(
+                f"{final_status}_{report_name}",
+                report_data,
+            )
+            logger.info("Chaos report written to %s", report_path)
+
+            if not quick_success:
+                # Quick validation failure makes the chaos run a FAIL.
+                raise AssertionError(
+                    "Quick validation test failed after chaos. See reports/."
+                )
+
+        if failed_recoveries and error_summary is None:
+            failed_labels = ", ".join(
+                f"{fr['app']}/{fr['unit']}" for fr in failed_recoveries
+            )
+            error_summary = (
+                f"One or more chaos targets did not recover cleanly: {failed_labels}"
+            )
+            raise AssertionError(error_summary)
+    except Exception as exc:  # noqa: BLE001
+        if error_summary is None:
+            error_summary = repr(exc)
+        raise
     finally:
         for chaos_name in chaos_resources:
             try:
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py
index 70e698216..efeb43863 100644
--- a/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py
@@ -14,18 +14,18 @@
 logger = logging.getLogger(__name__)
 
 
-API_TARGETS: list[tuple[str, list[str]]] = [
-    ("nova", ["keystone", "traefik-public", "traefik-internal"]),
-    ("neutron", ["keystone", "traefik-public", "traefik-internal"]),
-    ("glance", ["keystone", "traefik-public", "traefik-internal"]),
-    ("cinder-k8s", ["keystone", "traefik-public", "traefik-internal"]),
-    ("placement", ["keystone", "traefik-public", "traefik-internal"]),
-    ("aodh", ["keystone", "traefik-public", "traefik-internal"]),
-    ("ceilometer", ["keystone", "traefik-public", "traefik-internal"]),
-    ("gnocchi", ["keystone", "traefik-public", "traefik-internal"]),
-    ("masakari", ["keystone", "traefik-public", "traefik-internal"]),
-    ("watcher", ["keystone", "traefik-public", "traefik-internal"]),
-    ("horizon", ["keystone", "traefik-public", "traefik-internal"]),
+API_APPS: list[str] = [
+    "nova",
+    "neutron",
+    "glance",
+    "cinder",
+    "placement",
+    # "aodh",
+    # "ceilometer",
+    # "gnocchi",
+    # "masakari",
+    # "watcher",
+    "horizon",
 ]
 
 
@@ -37,6 +37,11 @@ def test_validation_resilient_to_non_leader_api_pod_kills(
     """Validation 'smoke' profile should tolerate non-leader API pod kills."""
     run_validation_with_pod_chaos(
         juju_client,
-        targets=API_TARGETS,
+        targets=[(app, []) for app in API_APPS],
         suite_name="API pod",
+        report_name="test_validation_api_pod_chaos",
+        initial_delay=60,
+        recovery_timeout=1800,
+        poll_interval=10,
+        quick_test_timeout=1800,
     )
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py
index 13dcae539..797f7f467 100644
--- a/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py
@@ -13,8 +13,6 @@
 
 logger = logging.getLogger(__name__)
 
-DEPENDENT_APPS = ["keystone", "traefik-public", "traefik-internal"]
-
 
 ROUTER_APPS: list[str] = [
     "nova-api-mysql-router",
@@ -26,10 +24,10 @@
     "keystone-mysql-router",
     "glance-mysql-router",
     "placement-mysql-router",
-    "aodh-mysql-router",
-    "gnocchi-mysql-router",
-    "masakari-mysql-router",
-    "watcher-mysql-router",
+    # "aodh-mysql-router",
+    # "gnocchi-mysql-router",
+    # "masakari-mysql-router",
+    # "watcher-mysql-router",
     "horizon-mysql-router",
 ]
 
@@ -40,9 +38,13 @@ def test_validation_resilient_to_mysql_router_pod_kills(
     juju_client,
 ) -> None:
     """Validation 'smoke' profile should tolerate mysql-router pod kills."""
-    targets = [(app, DEPENDENT_APPS) for app in ROUTER_APPS]
     run_validation_with_pod_chaos(
         juju_client,
-        targets=targets,
+        targets=[(app, []) for app in ROUTER_APPS],
         suite_name="mysql-router",
+        report_name="test_validation_db_router_chaos",
+        initial_delay=60,
+        recovery_timeout=1800,
+        poll_interval=10,
+        quick_test_timeout=1800,
     )
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py
index 78327d3ed..0d5b9f262 100644
--- a/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py
@@ -14,12 +14,12 @@
 logger = logging.getLogger(__name__)
 
 
-INFRA_TARGETS: list[tuple[str, list[str]]] = [
-    ("mysql", ["keystone", "traefik-public", "traefik-internal"]),
-    ("rabbitmq", ["keystone", "traefik-public", "traefik-internal"]),
-    ("traefik-public", ["keystone"]),
-    ("traefik", ["keystone"]),
-    ("traefik-rgw", ["keystone"]),
+INFRA_APPS: list[str] = [
+    "mysql",
+    "rabbitmq",
+    "traefik-public",
+    "traefik",
+    "traefik-rgw",
 ]
 
 
@@ -31,6 +31,11 @@ def test_validation_resilient_to_infra_pod_kills(
     """Validation 'smoke' profile should tolerate infra pod/unit loss."""
     run_validation_with_pod_chaos(
         juju_client,
-        targets=INFRA_TARGETS,
+        targets=[(app, []) for app in INFRA_APPS],
         suite_name="infra",
+        report_name="test_validation_infra_chaos",
+        initial_delay=60,
+        recovery_timeout=1800,
+        poll_interval=10,
+        quick_test_timeout=1800,
     )
diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py
index 13b3b42e3..32fcbf67a 100644
--- a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py
+++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py
@@ -4,13 +4,13 @@
 
 """Keystone-specific chaos tests for the validation feature."""
 
+from __future__ import annotations
+
 import logging
 
 import pytest
 
-from tests.functional.chaos.utils import (
-    run_validation_with_pod_chaos,
-)
+from tests.functional.chaos.utils import run_validation_with_pod_chaos
 
 logger = logging.getLogger(__name__)
 
@@ -23,24 +23,14 @@ def test_validation_resilient_to_non_leader_keystone_pod_kills(
     sunbeam_client,
     juju_client,
 ) -> None:
-    """Validation 'smoke' profile should tolerate non-leader Keystone pod kills.
-
-    This test:
-
-    - Ensures the ``validation`` feature is enabled.
-    - Uses Jubilant status to discover the Keystone leader unit and its
-      non-leader units in the ``openstack`` model.
-    - Starts ``sunbeam validation run smoke``
-    - While validation is running, sequentially applies Chaos Mesh ``PodChaos``
-      resources that kill each **non-leader** Keystone pod in turn, waiting for
-      each unit to recover to ``workload-status: active``.
-    - Collects and logs the recovery time for each non-leader unit.
-
-    The expectation is that the validation smoke run completes successfully
-    despite transient failures of non-leader Keystone pods.
-    """
+    """Run smoke + quick validation around non-leader Keystone pod chaos."""
     run_validation_with_pod_chaos(
         juju_client,
         targets=[(KEYSTONE_APP, TRAEFIK_APPS)],
         suite_name="Keystone API",
+        report_name="test_validation_keystone_chaos",
+        initial_delay=60,
+        recovery_timeout=1800,
+        poll_interval=10,
+        quick_test_timeout=1800,
     )