From fabe182306cb15aba641273741c4609784da654b Mon Sep 17 00:00:00 2001 From: Ahmad Hassan Date: Tue, 27 Jan 2026 17:58:12 +0500 Subject: [PATCH 1/5] Add Sunbeam feature functional test suite --- .../tests/functional/feature/.gitignore | 21 ++ .../tests/functional/feature/README.md | 64 +++++ .../tests/functional/feature/__init__.py | 8 + .../tests/functional/feature/conftest.py | 67 ++++++ .../functional/feature/features/__init__.py | 4 + .../tests/functional/feature/features/base.py | 218 ++++++++++++++++++ .../tests/functional/feature/features/caas.py | 49 ++++ .../tests/functional/feature/features/dns.py | 54 +++++ .../feature/features/images_sync.py | 57 +++++ .../feature/features/instance_recovery.py | 56 +++++ .../feature/features/loadbalancer.py | 56 +++++ .../feature/features/observability.py | 52 +++++ .../feature/features/orchestration.py | 56 +++++ .../feature/features/resource_optimization.py | 51 ++++ .../feature/features/shared_filesystem.py | 49 ++++ .../functional/feature/features/telemetry.py | 57 +++++ .../tests/functional/feature/features/tls.py | 160 +++++++++++++ .../functional/feature/features/vault.py | 34 +++ .../tests/functional/feature/pytest.ini | 22 ++ .../tests/functional/feature/requirements.txt | 4 + .../feature/test_config.yaml.example | 13 ++ .../tests/functional/feature/test_features.py | 126 ++++++++++ .../functional/feature/utils/__init__.py | 4 + .../tests/functional/feature/utils/juju.py | 145 ++++++++++++ .../tests/functional/feature/utils/sunbeam.py | 87 +++++++ sunbeam-python/tox.ini | 25 +- 26 files changed, 1526 insertions(+), 13 deletions(-) create mode 100644 sunbeam-python/tests/functional/feature/.gitignore create mode 100644 sunbeam-python/tests/functional/feature/README.md create mode 100644 sunbeam-python/tests/functional/feature/__init__.py create mode 100644 sunbeam-python/tests/functional/feature/conftest.py create mode 100644 sunbeam-python/tests/functional/feature/features/__init__.py create mode 100644 sunbeam-python/tests/functional/feature/features/base.py create mode 100644 sunbeam-python/tests/functional/feature/features/caas.py create mode 100644 sunbeam-python/tests/functional/feature/features/dns.py create mode 100644 sunbeam-python/tests/functional/feature/features/images_sync.py create mode 100644 sunbeam-python/tests/functional/feature/features/instance_recovery.py create mode 100644 sunbeam-python/tests/functional/feature/features/loadbalancer.py create mode 100644 sunbeam-python/tests/functional/feature/features/observability.py create mode 100644 sunbeam-python/tests/functional/feature/features/orchestration.py create mode 100644 sunbeam-python/tests/functional/feature/features/resource_optimization.py create mode 100644 sunbeam-python/tests/functional/feature/features/shared_filesystem.py create mode 100644 sunbeam-python/tests/functional/feature/features/telemetry.py create mode 100644 sunbeam-python/tests/functional/feature/features/tls.py create mode 100644 sunbeam-python/tests/functional/feature/features/vault.py create mode 100644 sunbeam-python/tests/functional/feature/pytest.ini create mode 100644 sunbeam-python/tests/functional/feature/requirements.txt create mode 100644 sunbeam-python/tests/functional/feature/test_config.yaml.example create mode 100644 sunbeam-python/tests/functional/feature/test_features.py create mode 100644 sunbeam-python/tests/functional/feature/utils/__init__.py create mode 100644 sunbeam-python/tests/functional/feature/utils/juju.py create mode 100644 sunbeam-python/tests/functional/feature/utils/sunbeam.py diff --git a/sunbeam-python/tests/functional/feature/.gitignore b/sunbeam-python/tests/functional/feature/.gitignore new file mode 100644 index 000000000..db362e0b5 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/.gitignore @@ -0,0 +1,21 @@ +test_config.yaml +features/adminrc +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +venv/ +env/ +ENV/ + +.vscode/ +.idea/ +*.swp +*.swo + +.pytest_cache/ +.coverage +htmlcov/ +*.log diff --git a/sunbeam-python/tests/functional/feature/README.md b/sunbeam-python/tests/functional/feature/README.md new file mode 100644 index 000000000..e6171e0b8 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/README.md @@ -0,0 +1,64 @@ +# Sunbeam Feature Functional Tests + +Functional tests for Sunbeam feature enablement/disablement. These tests +connect to an **existing Sunbeam deployment** and run the enable/verify/disable +lifecycle for each feature, logging timing and basic behaviour checks. + +The suite is designed to be run via `tox` from the `sunbeam-python` tree. + +## Prerequisites + +- **Existing Sunbeam deployment** already bootstrapped and reachable +- `sunbeam` CLI on `PATH` and configured to talk to that deployment + - e.g. `sunbeam deployment list` shows your deployment +- `openstack` CLI configured for that cloud + - e.g. `openstack endpoint list` works +- `juju` CLI installed and able to access the controller/model that backs the + Sunbeam deployment + +## Configuration + +Create a config file from the example: + +```bash +cd sunbeam-python +cp tests/functional/feature/test_config.yaml.example tests/functional/feature/test_config.yaml +``` + +Then edit `tests/functional/feature/test_config.yaml`: + +```yaml +sunbeam: + deployment_name: "ps6" # Name shown by `sunbeam deployment list` + +juju: + model: "openstack" # Juju model backing the cloud + # controller: "my-controller" # Optional; auto-detected if omitted +``` + +### Run the full feature functional suite + +```bash +tox -e functional-feature +``` + +### Run a single feature functional test + +You can pass standard `pytest` selectors through tox via `posargs`. For example: + +- **Instance Recovery**: + + ```bash + tox -e functional-feature -- tests/functional/feature/test_features.py::test_instance_recovery + ``` + +- **TLS CA**: + + ```bash + tox -e functional-feature -- tests/functional/feature/test_features.py::test_tls_ca + ``` + +## Notes + +- Disable failures are **logged and ignored** so that the suite continues + to the next feature, matching the behaviour of the original tests. diff --git a/sunbeam-python/tests/functional/feature/__init__.py b/sunbeam-python/tests/functional/feature/__init__.py new file mode 100644 index 000000000..f3adc6677 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Sunbeam feature functional test suite. + +These tests exercise `sunbeam enable/disable` for individual features +against an existing Sunbeam deployment. +""" diff --git a/sunbeam-python/tests/functional/feature/conftest.py b/sunbeam-python/tests/functional/feature/conftest.py new file mode 100644 index 000000000..9bde47e33 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/conftest.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Pytest configuration and fixtures for Sunbeam feature functional tests.""" + +from pathlib import Path + +import pytest +import yaml + +from .utils.juju import JujuClient +from .utils.sunbeam import SunbeamClient + + +def pytest_addoption(parser): + """Add custom command-line options.""" + parser.addoption( + "--config", + action="store", + default="test_config.yaml", + help="Path to test configuration file", + ) + + +@pytest.fixture(scope="session") +def test_config(request): + """Load test configuration from YAML file.""" + config_path = request.config.getoption("--config") + # Resolve relative to this feature functional directory + config_file = Path(__file__).parent / config_path + + if not config_file.exists(): + pytest.skip(f"Configuration file not found: {config_file}") + + with open(config_file, "r") as f: + config = yaml.safe_load(f) + + return config + + +@pytest.fixture(scope="session") +def sunbeam_client(test_config): + """Create Sunbeam client for test session.""" + deployment_name = test_config.get("sunbeam", {}).get("deployment_name") + if not deployment_name: + pytest.skip("deployment_name not configured in test_config.yaml") + + client = SunbeamClient(deployment_name) + + if not client.is_connected(): + pytest.skip(f"Cannot connect to Sunbeam deployment '{deployment_name}'.") + + return client + + +@pytest.fixture(scope="session") +def juju_client(test_config): + """Create Juju client for test session.""" + model = test_config.get("juju", {}).get("model", "openstack") + controller = test_config.get("juju", {}).get("controller") + + client = JujuClient(model=model, controller=controller) + + if not client.is_connected(): + pytest.skip(f"Cannot connect to Juju model '{model}'.") + + return client diff --git a/sunbeam-python/tests/functional/feature/features/__init__.py b/sunbeam-python/tests/functional/feature/features/__init__.py new file mode 100644 index 000000000..0f78a8aa4 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Feature test classes for Sunbeam feature functional tests.""" diff --git a/sunbeam-python/tests/functional/feature/features/base.py b/sunbeam-python/tests/functional/feature/features/base.py new file mode 100644 index 000000000..dcdc7c4b3 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/base.py @@ -0,0 +1,218 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Base class for Sunbeam feature functional tests.""" + +import logging +import os +import time +from pathlib import Path +from typing import Dict, List, Optional + +from ..utils.juju import JujuClient +from ..utils.sunbeam import SunbeamClient + +logger = logging.getLogger(__name__) + + +class BaseFeatureTest: + """Base class for testing Sunbeam features.""" + + feature_name: str = "" + expected_units: List[str] = [] + expected_applications: List[str] = [] + timeout_seconds: int = 300 + enable_args: List[str] = [] + disable_args: List[str] = [] + + def __init__( + self, + sunbeam_client: SunbeamClient, + juju_client: JujuClient, + config: Optional[Dict] = None, + ): + self.sunbeam = sunbeam_client + self.juju = juju_client + self.config = config or {} + + feature_config = self.config.get("features", {}).get(self.feature_name, {}) + self.expected_units = feature_config.get("expected_units", self.expected_units) + self.expected_applications = feature_config.get( + "expected_applications", + self.expected_applications, + ) + self.timeout_seconds = feature_config.get( + "timeout_seconds", + self.timeout_seconds, + ) + self.enable_args = feature_config.get("enable_args", self.enable_args) + self.disable_args = feature_config.get("disable_args", self.disable_args) + + self._ensure_openstack_env() + + def enable(self) -> bool: + """Enable the feature.""" + logger.info("Enabling feature: '%s'", self.feature_name) + return self.sunbeam.enable_feature( + self.feature_name, + extra_args=self.enable_args, + ) + + def disable(self) -> bool: + """Disable the feature. + + Returns True if successful, False otherwise. + """ + logger.info("Disabling feature: '%s'", self.feature_name) + try: + return self.sunbeam.disable_feature( + self.feature_name, + extra_args=self.disable_args, + ) + except Exception as exc: # noqa: BLE001 + logger.warning( + "Failed to disable feature '%s': %s", + self.feature_name, + exc, + ) + return False + + def run_full_lifecycle(self) -> bool: + """Run enable/disable lifecycle with timing. + + Disable failures are logged but do not fail the overall test. + """ + logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) + + enable_start = time.time() + logger.info("[ENABLE] Starting enable for '%s'...", self.feature_name) + enable_success = self.enable() + enable_duration = time.time() - enable_start + if enable_success: + logger.info( + "[ENABLE] SUCCESS for '%s' - Time taken: %.2f seconds", + self.feature_name, + enable_duration, + ) + else: + logger.error( + "[ENABLE] FAILED for '%s' - Time taken: %.2f seconds", + self.feature_name, + enable_duration, + ) + return False + + try: + self.verify_validate_feature_behavior() + except Exception: # noqa: BLE001 + logger.exception( + "Validation failed for feature '%s' after enable", self.feature_name + ) + # Best-effort cleanup – if disable also fails, log and continue. + try: + self.disable() + except Exception: # noqa: BLE001 + logger.warning( + "Disable also failed while handling validation error for '%s'", + self.feature_name, + ) + return False + + disable_start = time.time() + logger.info("[DISABLE] Starting disable for '%s'...", self.feature_name) + disable_success = self.disable() + disable_duration = time.time() - disable_start + if disable_success: + logger.info( + "[DISABLE] SUCCESS for '%s' - Time taken: %.2f seconds", + self.feature_name, + disable_duration, + ) + else: + logger.warning( + "[DISABLE] FAILED for '%s' - Time taken: %.2f seconds (continuing anyway)", + self.feature_name, + disable_duration, + ) + + total_duration = time.time() - enable_start + logger.info( + "[SUMMARY] Feature '%s' - Enable: %.2fs, Disable: %.2fs (%s), Total: %.2fs", + self.feature_name, + enable_duration, + disable_duration, + "SUCCESS" if disable_success else "FAILED", + total_duration, + ) + return True + + def verify_enabled(self) -> None: + """Verify that expected applications and units are present. + + This is a boilerplate method for future use. Currently not called + by default, but can be overridden in subclasses to add verification. + """ + pass + + def validate_feature_behavior(self) -> None: + """Validate that the feature is working correctly. + + This is a boilerplate method for future use. Currently not called + by default, but can be overridden in subclasses to add functionality tests. + """ + pass + + def verify_validate_feature_behavior(self) -> None: + """Simple verification that feature is enabled and basic check passes. + + This is a simple method that can be called after enable to verify + the feature is working. Override in subclasses for feature-specific checks. + """ + logger.info("Verifying feature '%s' is enabled...", self.feature_name) + if self.expected_applications: + for app in self.expected_applications: + if self.juju.has_application(app): + logger.info("Application '%s' found", app) + else: + logger.warning( + "Application '%s' not found (may still be deploying)", app + ) + logger.info("Basic verification completed for feature '%s'", self.feature_name) + + def _ensure_openstack_env(self) -> None: + """Load OpenStack credentials from adminrc if needed. + + This avoids repeating sourcing logic across tests and keeps credentials + out of the code. If OS_AUTH_URL is already set, this is a no-op. + """ + if os.environ.get("OS_AUTH_URL"): + return + + adminrc_path = Path(__file__).resolve().parent / "adminrc" + if not adminrc_path.exists(): + logger.debug( + "adminrc file not found at %s; relying on existing environment", + adminrc_path, + ) + return + + try: + for line in adminrc_path.read_text().splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if not line.startswith("export "): + continue + _, rest = line.split("export ", 1) + if "=" not in rest: + continue + key, value = rest.split("=", 1) + key = key.strip() + value = value.strip().strip('"').strip("'") + os.environ.setdefault(key, value) + logger.info("Loaded OpenStack credentials from %s", adminrc_path) + except Exception: # noqa: BLE001 + logger.exception( + "Failed to load OpenStack credentials from %s", + adminrc_path, + ) diff --git a/sunbeam-python/tests/functional/feature/features/caas.py b/sunbeam-python/tests/functional/feature/features/caas.py new file mode 100644 index 000000000..51de55971 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/caas.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for caas feature. + +Container as a Service (Magnum) allows managing Kubernetes clusters via OpenStack. +Functionality is validated via the Magnum (COE) API. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class CaaSTest(BaseFeatureTest): + """Test caas feature enablement/disablement.""" + + feature_name = "caas" + expected_units: list[str] = [] + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the Magnum (COE) API is reachable. + + We call `openstack coe cluster list` to confirm the API is up. + """ + logger.info("Verifying CaaS (Magnum) service is available...") + try: + subprocess.run( + ["openstack", "coe", "cluster", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except subprocess.CalledProcessError as exc: + logger.warning("Failed to list COE clusters: %s", exc.stderr) + raise AssertionError( + f"CaaS (Magnum) service not accessible: {exc.stderr}" + ) from exc + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying CaaS service: %s", exc) + raise AssertionError(f"CaaS service verification failed: {exc}") from exc + + logger.info("CaaS (Magnum) service verified via `openstack coe cluster list`") diff --git a/sunbeam-python/tests/functional/feature/features/dns.py b/sunbeam-python/tests/functional/feature/features/dns.py new file mode 100644 index 000000000..597ab79f1 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/dns.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for dns feature. + +DNS requires nameservers as arguments, so we use dummy nameservers for testing. +DNS is a simple feature with no direct feature dependencies (besides the required +nameservers argument). Functionality is validated via the Designate (DNS) API. +""" + +import logging + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class DnsTest(BaseFeatureTest): + """Test dns feature enablement/disablement.""" + + feature_name = "dns" + # DNS requires nameservers argument - using dummy values for testing + enable_args: list[str] = ["ns1.example.com.", "ns2.example.com."] + expected_units: list[str] = [] + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that DNS as a Service is reachable. + + We call `sunbeam dns address` to confirm that the + Designate service is registered and accessible. + """ + logger.info("Verifying DNS service endpoints are available...") + try: + self.sunbeam.run(["dns", "address"]) + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying DNS service: %s", exc) + raise AssertionError(f"DNS service verification failed: {exc}") from exc + + logger.info("DNS service endpoints verified via `sunbeam dns address`") + + def run_full_lifecycle(self) -> bool: + """Enable dns, perform basic test, then disable it.""" + logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) + + self.enable() + self.verify_validate_feature_behavior() + + disable_success = self.disable() + if not disable_success: + logger.warning("DNS disable failed, but continuing test sequence") + + return True diff --git a/sunbeam-python/tests/functional/feature/features/images_sync.py b/sunbeam-python/tests/functional/feature/features/images_sync.py new file mode 100644 index 000000000..d4480ee7a --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/images_sync.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for images-sync feature. + +Images-sync is a simple feature with no dependencies. +Functionality is validated via the OpenStack Image API. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class ImagesSyncTest(BaseFeatureTest): + """Test images-sync feature enablement/disablement.""" + + feature_name = "images-sync" + expected_units: list[str] = [] + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the Image service is reachable. + + We call `openstack image list` to confirm that Glance is responding. + """ + logger.info("Verifying Image service (Glance) is available...") + try: + subprocess.run( + ["openstack", "image", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying Image service: %s", exc) + raise AssertionError(f"Image service verification failed: {exc}") from exc + + logger.info("Image service verified via `openstack image list`") + + def run_full_lifecycle(self) -> bool: + """Enable images-sync, perform basic test, then disable it.""" + logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) + + self.enable() + self.verify_validate_feature_behavior() + + disable_success = self.disable() + if not disable_success: + logger.warning("Images-sync disable failed, but continuing test sequence") + + return True diff --git a/sunbeam-python/tests/functional/feature/features/instance_recovery.py b/sunbeam-python/tests/functional/feature/features/instance_recovery.py new file mode 100644 index 000000000..6110c2614 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/instance_recovery.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for instance-recovery feature.""" + +import subprocess + +from .base import BaseFeatureTest + + +class InstanceRecoveryTest(BaseFeatureTest): + """Test instance-recovery feature enablement/disablement.""" + + # CLI feature name + feature_name = "instance-recovery" + expected_applications = [ + "masakari", + "masakari-mysql-router", + "consul-management", + "consul-storage", + "consul-tenant", + ] + expected_units = [ + "masakari/0", + "masakari-mysql-router/0", + "consul-management/0", + "consul-storage/0", + "consul-tenant/0", + ] + timeout_seconds = 900 + + def validate_feature_behavior(self) -> None: + """Run a small smoke test against the Masakari API. + + We call `openstack segment list` to confirm Masakari is responding + and that the CLI can talk to the Instance Recovery control plane. + """ + cmd = [ + "openstack", + "segment", + "list", + "-c", + "name", + "-c", + "service_type", + ] + result = subprocess.run(cmd, check=True, capture_output=True, text=True) + if not result.stdout.strip(): + raise AssertionError("openstack segment list returned no data") + + def run_full_lifecycle(self) -> bool: + """Enable instance-recovery, verify resources and behavior, then disable it.""" + self.enable() + self.validate_feature_behavior() + self.disable() + return True diff --git a/sunbeam-python/tests/functional/feature/features/loadbalancer.py b/sunbeam-python/tests/functional/feature/features/loadbalancer.py new file mode 100644 index 000000000..a7e377f35 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/loadbalancer.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for loadbalancer feature. + +Loadbalancer is a simple feature with no dependencies. +Deploys Octavia, the OpenStack Load Balancer as a Service. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class LoadbalancerTest(BaseFeatureTest): + """Test loadbalancer feature enablement/disablement.""" + + feature_name = "loadbalancer" + expected_applications: list[str] = ["octavia"] + expected_units: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that loadbalancer service (Octavia) is working.""" + logger.info("Verifying loadbalancer service (Octavia) is available...") + + try: + result = subprocess.run( + ["openstack", "loadbalancer", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + logger.info("Loadbalancer service (Octavia) is accessible") + logger.debug("Loadbalancer list output: %s", result.stdout[:200]) + + except Exception as e: + logger.warning("Error checking loadbalancer service: %s", e) + raise AssertionError(f"Loadbalancer service verification failed: {e}") + + def run_full_lifecycle(self) -> bool: + """Enable loadbalancer, perform basic test, then disable it.""" + logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) + + self.enable() + self.verify_validate_feature_behavior() + + disable_success = self.disable() + if not disable_success: + logger.warning("Loadbalancer disable failed, but continuing test sequence") + + return True diff --git a/sunbeam-python/tests/functional/feature/features/observability.py b/sunbeam-python/tests/functional/feature/features/observability.py new file mode 100644 index 000000000..9bef407d0 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/observability.py @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for observability feature. + +Observability integrates Canonical OpenStack with COS. + +For this functional test we exercise the simple embedded workflow from the +documentation: + +1. `sunbeam enable observability embedded` +2. `sunbeam observability dashboard-url` +3. `sunbeam disable observability embedded` +""" + +import logging + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class ObservabilityTest(BaseFeatureTest): + """Test observability feature enablement/disablement.""" + + feature_name = "observability" + enable_args: list[str] = ["embedded"] + disable_args: list[str] = ["embedded"] + expected_units: list[str] = [] + expected_applications: list[str] = [] + timeout_seconds = 900 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the observability dashboard URL is available. + + This uses `sunbeam observability dashboard-url` from the docs to + confirm that the embedded COS deployment is responding. + """ + logger.info("Fetching observability dashboard URL...") + try: + result = self.sunbeam.run(["observability", "dashboard-url"]) + except Exception as exc: # noqa: BLE001 + logger.warning( + "Error while retrieving observability dashboard URL: %s", + exc, + ) + raise AssertionError( + f"Observability feature verification failed: {exc}" + ) from exc + + url = result.stdout.strip() + logger.info("Observability dashboard URL: %s", url) diff --git a/sunbeam-python/tests/functional/feature/features/orchestration.py b/sunbeam-python/tests/functional/feature/features/orchestration.py new file mode 100644 index 000000000..0f9a502e6 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/orchestration.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for orchestration feature. + +Orchestration is a simple feature with no dependencies. +Deploys Heat, the OpenStack Orchestration service. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class OrchestrationTest(BaseFeatureTest): + """Test orchestration feature enablement/disablement.""" + + feature_name = "orchestration" + expected_applications: list[str] = ["heat"] + expected_units: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that orchestration service (Heat) is working.""" + logger.info("Verifying orchestration service (Heat) is available...") + + try: + result = subprocess.run( + ["openstack", "stack", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + logger.info("Orchestration service (Heat) is accessible") + logger.debug("Stack list output: %s", result.stdout[:200]) + + except Exception as e: + logger.warning("Error checking orchestration service: %s", e) + raise AssertionError(f"Orchestration service verification failed: {e}") + + def run_full_lifecycle(self) -> bool: + """Enable orchestration, perform basic test, then disable it.""" + logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) + + self.enable() + self.verify_validate_feature_behavior() + + disable_success = self.disable() + if not disable_success: + logger.warning("Orchestration disable failed, but continuing test sequence") + + return True diff --git a/sunbeam-python/tests/functional/feature/features/resource_optimization.py b/sunbeam-python/tests/functional/feature/features/resource_optimization.py new file mode 100644 index 000000000..c6828d22b --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/resource_optimization.py @@ -0,0 +1,51 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for resource-optimization feature. + +Resource Optimization provides Watcher as a service. +Functionality is validated via the Watcher (optimize) API. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class ResourceOptimizationTest(BaseFeatureTest): + """Test resource-optimization feature enablement/disablement.""" + + feature_name = "resource-optimization" + expected_units: list[str] = [] + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the Watcher (resource optimization) API is reachable. + + We call `openstack optimize goal list` to confirm the API is up. + """ + logger.info("Verifying Resource Optimization (Watcher) service is available...") + try: + subprocess.run( + ["openstack", "optimize", "goal", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except Exception as exc: # noqa: BLE001 + logger.warning( + "Error while verifying Resource Optimization service: %s", + exc, + ) + raise AssertionError( + f"Resource Optimization service verification failed: {exc}" + ) from exc + + logger.info( + "Resource Optimization service verified via `openstack optimize goal list`" + ) diff --git a/sunbeam-python/tests/functional/feature/features/shared_filesystem.py b/sunbeam-python/tests/functional/feature/features/shared_filesystem.py new file mode 100644 index 000000000..66117213b --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/shared_filesystem.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for shared-filesystem feature. + +Shared Filesystems provides Manila-based file share services. +Functionality is validated via the Manila API. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class SharedFilesystemTest(BaseFeatureTest): + """Test shared-filesystem feature enablement/disablement.""" + + feature_name = "shared-filesystem" + expected_units: list[str] = [] + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the Shared Filesystems (Manila) API is reachable. + + We call `openstack share list` to confirm the API is up. + """ + logger.info("Verifying Shared Filesystems (Manila) service is available...") + try: + subprocess.run( + ["openstack", "share", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except Exception as exc: # noqa: BLE001 + logger.warning( + "Error while verifying Shared Filesystems service: %s", + exc, + ) + raise AssertionError( + f"Shared Filesystems service verification failed: {exc}" + ) from exc + + logger.info("Shared Filesystems service verified via `openstack share list`") diff --git a/sunbeam-python/tests/functional/feature/features/telemetry.py b/sunbeam-python/tests/functional/feature/features/telemetry.py new file mode 100644 index 000000000..6fbd79b81 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/telemetry.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for telemetry feature. + +Telemetry is a simple feature with no dependencies. +Deploys Ceilometer, Aodh, Gnocchi, and OpenStack Exporter. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class TelemetryTest(BaseFeatureTest): + """Test telemetry feature enablement/disablement.""" + + feature_name = "telemetry" + expected_applications: list[str] = ["ceilometer", "gnocchi", "aodh"] + expected_units: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that telemetry services are working.""" + logger.info("Verifying telemetry services are available...") + + # Check if alarm service (Aodh) is accessible + try: + result = subprocess.run( + ["openstack", "alarm", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + logger.info("Telemetry alarm service (Aodh) is accessible") + logger.debug("Alarm list output: %s", result.stdout[:200]) + + except Exception as e: + logger.warning("Error checking telemetry services: %s", e) + raise AssertionError(f"Telemetry service verification failed: {e}") + + def run_full_lifecycle(self) -> bool: + """Enable telemetry, perform basic test, then disable it.""" + logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) + + self.enable() + self.verify_validate_feature_behavior() + + disable_success = self.disable() + if not disable_success: + logger.warning("Telemetry disable failed, but continuing test sequence") + + return True diff --git a/sunbeam-python/tests/functional/feature/features/tls.py b/sunbeam-python/tests/functional/feature/features/tls.py new file mode 100644 index 000000000..17a856292 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/tls.py @@ -0,0 +1,160 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for tls feature (CA mode). + +TLS enablement has multiple methods in Sunbeam, but this functional test +suite only exercises the TLS CA path: + +- TLS CA: `sunbeam enable tls ca` (requires CA certificates) +""" + +import base64 +import logging +import subprocess +import tempfile +import time +from pathlib import Path +from typing import Tuple + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +def generate_self_signed_ca_certificate() -> Tuple[str, str]: + """Generate a self-signed CA certificate. + + Returns a tuple of (ca_cert_base64, ca_chain_base64). For a simple self-signed CA, + the chain is the same as the cert. TLS CA currently only uses the CA certificate. + """ + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + + key_path = tmp_path / "ca.key" + subprocess.run( + ["openssl", "genrsa", "-out", str(key_path), "4096"], + check=True, + capture_output=True, + ) + + cert_path = tmp_path / "ca.crt" + subprocess.run( + [ + "openssl", + "req", + "-new", + "-x509", + "-days", + "365", + "-key", + str(key_path), + "-out", + str(cert_path), + "-subj", + "/C=US/ST=State/L=City/O=TestOrg/CN=TestCA", + "-extensions", + "v3_ca", + "-config", + "/dev/stdin", + ], + input=b"""[req] +distinguished_name = req_distinguished_name +[req_distinguished_name] +[v3_ca] +basicConstraints = critical,CA:TRUE +keyUsage = critical,keyCertSign,cRLSign +subjectKeyIdentifier = hash +authorityKeyIdentifier = keyid:always,issuer +""", + check=True, + capture_output=True, + ) + + ca_cert = cert_path.read_text() + ca_cert_base64 = base64.b64encode(ca_cert.encode()).decode() + + ca_chain_base64 = ca_cert_base64 + + return (ca_cert_base64, ca_chain_base64) + + +class TlsCaTest(BaseFeatureTest): + """Test TLS CA mode enablement/disablement. + + TLS CA mode uses Certificate Authority certificates for TLS. + This test verifies that: + - TLS CA can be enabled (with self-signed CA certificates) + - Endpoints are exposed over HTTPS (both public and internal) + - Basic OpenStack operations work (e.g., listing images) + """ + + feature_name = "tls" + enable_args: list[str] = [] + disable_args: list[str] = ["ca"] + expected_applications = [ + "manual-tls-certificates", + ] + expected_units = [ + "manual-tls-certificates/0", + ] + timeout_seconds = 600 + + def __init__(self, *args, **kwargs): + """Initialize and generate CA certificates.""" + super().__init__(*args, **kwargs) + self.ca_cert_base64, _ = generate_self_signed_ca_certificate() + self.enable_args = [ + "ca", + "--ca", + self.ca_cert_base64, + ] + + def enable(self) -> bool: + """Enable TLS CA feature (without --accept-defaults flag).""" + logger.info("Enabling feature: '%s'", self.feature_name) + return self.sunbeam.enable_feature( + self.feature_name, + extra_args=self.enable_args, + ) + + def disable(self) -> bool: + """Disable TLS CA feature (without --accept-defaults flag).""" + logger.info("Disabling feature: '%s'", self.feature_name) + try: + return self.sunbeam.disable_feature( + self.feature_name, + extra_args=self.disable_args, + ) + except Exception as exc: # noqa: BLE001 + logger.warning( + "Failed to disable feature '%s': %s", + self.feature_name, + exc, + ) + return False + + def _ensure_tls_ca_disabled(self) -> bool: + """Ensure TLS CA is disabled before enabling (cleanup from previous runs).""" + if self.juju.has_application("manual-tls-certificates"): + logger.info("TLS CA is already enabled, disabling first...") + try: + self.disable() + # Wait a bit for cleanup + time.sleep(5) + except Exception as exc: # noqa: BLE001 + logger.warning("Failed to disable existing TLS CA: %s", exc) + return False + return True + + def run_full_lifecycle(self) -> bool: + """Enable TLS CA, perform basic test, then disable it.""" + if not self._ensure_tls_ca_disabled(): + logger.warning("Could not ensure TLS CA is disabled, continuing anyway...") + + self.enable() + disable_success = self.disable() + if not disable_success: + logger.warning("TLS CA disable failed, but continuing test sequence") + + return True diff --git a/sunbeam-python/tests/functional/feature/features/vault.py b/sunbeam-python/tests/functional/feature/features/vault.py new file mode 100644 index 000000000..78bd44290 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/vault.py @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for vault feature. + +Vault provides the HashiCorp Vault service used by other features. +Functionality is validated via the `sunbeam vault status` command. +""" + +import logging + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class VaultTest(BaseFeatureTest): + """Test vault feature enablement/disablement.""" + + feature_name = "vault" + expected_units: list[str] = [] + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that Vault is reachable via sunbeam.""" + logger.info("Verifying Vault status via `sunbeam vault status`...") + try: + self.sunbeam.run(["vault", "status"]) + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying Vault service: %s", exc) + raise AssertionError(f"Vault service verification failed: {exc}") from exc + + logger.info("Vault service verified via `sunbeam vault status`") diff --git a/sunbeam-python/tests/functional/feature/pytest.ini b/sunbeam-python/tests/functional/feature/pytest.ini new file mode 100644 index 000000000..acba1481b --- /dev/null +++ b/sunbeam-python/tests/functional/feature/pytest.ini @@ -0,0 +1,22 @@ +[pytest] +# Pytest configuration for feature functional tests + +# Test discovery patterns +python_files = test_*.py +python_classes = Test* +python_functions = test_* + +markers = + functional: marks tests as functional (deselect with '-m "not functional"') + slow: marks tests as slow (deselect with '-m "not slow"') + +addopts = + -v + --tb=short + --strict-markers + +timeout = 1800 + +log_cli = true +log_cli_level = INFO + diff --git a/sunbeam-python/tests/functional/feature/requirements.txt b/sunbeam-python/tests/functional/feature/requirements.txt new file mode 100644 index 000000000..7ef618ac2 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/requirements.txt @@ -0,0 +1,4 @@ +pytest>=7.4.0 +pytest-timeout>=2.1.0 +pyyaml>=6.0 +jubilant>=1.0.0 diff --git a/sunbeam-python/tests/functional/feature/test_config.yaml.example b/sunbeam-python/tests/functional/feature/test_config.yaml.example new file mode 100644 index 000000000..2507a8535 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/test_config.yaml.example @@ -0,0 +1,13 @@ +# Sunbeam Feature Functional Test Configuration +# Copy this file to test_config.yaml and fill in your values + +sunbeam: + # Deployment name in Sunbeam (from `sunbeam deployment list`) + deployment_name: "ps6" + +juju: + # Juju model name (default: "openstack") + model: "openstack" + # Juju controller (auto-detected from sunbeam if not specified) + # controller: "your-controller" + diff --git a/sunbeam-python/tests/functional/feature/test_features.py b/sunbeam-python/tests/functional/feature/test_features.py new file mode 100644 index 000000000..11b748907 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/test_features.py @@ -0,0 +1,126 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Functional tests for Sunbeam features. + +These tests connect to an existing Sunbeam cluster and test feature +enablement/disablement lifecycle. +""" + +import logging + +import pytest + +from .features.caas import CaaSTest +from .features.dns import DnsTest +from .features.images_sync import ImagesSyncTest +from .features.instance_recovery import InstanceRecoveryTest +from .features.loadbalancer import LoadbalancerTest +from .features.observability import ObservabilityTest +from .features.orchestration import OrchestrationTest +from .features.resource_optimization import ResourceOptimizationTest +from .features.shared_filesystem import SharedFilesystemTest +from .features.telemetry import TelemetryTest +from .features.tls import TlsCaTest +from .features.vault import VaultTest + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@pytest.mark.functional +def test_instance_recovery(sunbeam_client, juju_client, test_config): + """Test instance-recovery feature lifecycle (enable/disable with verification).""" + feature_test = InstanceRecoveryTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Instance recovery feature test failed" + + +@pytest.mark.functional +def test_caas(sunbeam_client, juju_client, test_config): + """Test caas feature lifecycle (enable/disable only).""" + feature_test = CaaSTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "CaaS feature test failed" + + +@pytest.mark.functional +def test_dns(sunbeam_client, juju_client, test_config): + """Test dns feature lifecycle (enable/disable only).""" + feature_test = DnsTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "DNS feature test failed" + + +@pytest.mark.functional +def test_images_sync(sunbeam_client, juju_client, test_config): + """Test images-sync feature lifecycle (enable/disable only).""" + feature_test = ImagesSyncTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Images-sync feature test failed" + + +@pytest.mark.functional +def test_loadbalancer(sunbeam_client, juju_client, test_config): + """Test loadbalancer feature lifecycle (enable/disable only).""" + feature_test = LoadbalancerTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Loadbalancer feature test failed" + + +@pytest.mark.functional +def test_orchestration(sunbeam_client, juju_client, test_config): + """Test orchestration feature lifecycle (enable/disable only).""" + feature_test = OrchestrationTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Orchestration feature test failed" + + +@pytest.mark.functional +def test_resource_optimization(sunbeam_client, juju_client, test_config): + """Test resource-optimization feature lifecycle (enable/disable only).""" + feature_test = ResourceOptimizationTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), ( + "Resource-optimization feature test failed" + ) + + +@pytest.mark.functional +def test_shared_filesystem(sunbeam_client, juju_client, test_config): + """Test shared-filesystem feature lifecycle (enable/disable only).""" + feature_test = SharedFilesystemTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Shared-filesystem feature test failed" + + +@pytest.mark.functional +def test_telemetry(sunbeam_client, juju_client, test_config): + """Test telemetry feature lifecycle (enable/disable only).""" + feature_test = TelemetryTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Telemetry feature test failed" + + +@pytest.mark.functional +def test_observability(sunbeam_client, juju_client, test_config): + """Test observability feature lifecycle (enable/disable only).""" + feature_test = ObservabilityTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Observability feature test failed" + + +@pytest.mark.functional +def test_maintenance(sunbeam_client, juju_client, test_config): + """Placeholder for maintenance feature test (not yet enabled).""" + pytest.skip("maintenance feature test not yet enabled in CI") + + +@pytest.mark.functional +def test_pro(sunbeam_client, juju_client, test_config): + """Placeholder for pro feature test (not yet enabled).""" + pytest.skip("pro feature test not yet enabled in CI") + + +@pytest.mark.functional +def test_tls_ca(sunbeam_client, juju_client, test_config): + """Test TLS CA mode lifecycle (enable/disable with verification).""" + feature_test = TlsCaTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "TLS CA feature test failed" + + +@pytest.mark.functional +def test_vault(sunbeam_client, juju_client, test_config): + """Test vault feature lifecycle (enable/disable only).""" + feature_test = VaultTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Vault feature test failed" diff --git a/sunbeam-python/tests/functional/feature/utils/__init__.py b/sunbeam-python/tests/functional/feature/utils/__init__.py new file mode 100644 index 000000000..42e1068cd --- /dev/null +++ b/sunbeam-python/tests/functional/feature/utils/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Utility wrappers for Sunbeam feature functional tests.""" diff --git a/sunbeam-python/tests/functional/feature/utils/juju.py b/sunbeam-python/tests/functional/feature/utils/juju.py new file mode 100644 index 000000000..384711460 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/utils/juju.py @@ -0,0 +1,145 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Juju CLI wrapper using Jubilant library for feature functional tests.""" + +import json +import logging +from typing import Dict, List, Optional, Set + +from jubilant import Juju + +logger = logging.getLogger(__name__) + + +class JujuClient: + """Client for interacting with Juju using Jubilant.""" + + def __init__(self, model: str = "openstack", controller: Optional[str] = None): + self.model = model + self.controller = controller + self._juju: Optional[Juju] = None + + @property + def juju(self) -> Juju: + """Get or create Jubilant Juju instance.""" + if self._juju is None: + self._juju = Juju() + if self.model: + try: + self._juju.cli("switch", self.model) + except Exception as exc: # noqa: BLE001 + # Log but continue - the model might already be active + logger.debug("Could not switch to model %s: %s", self.model, exc) + return self._juju + + def is_connected(self) -> bool: + """Check if we can connect to Juju.""" + result = self.juju.cli("status", "--format", "json") + return bool(result) + + def get_applications(self) -> Set[str]: + """Get list of all applications in the model.""" + result_str = self.juju.cli("status", "--format", "json") + status = json.loads(result_str) + applications: Set[str] = set() + + if "applications" in status: + applications.update(status["applications"].keys()) + + return applications + + def get_units(self) -> Set[str]: + """Get list of all units in the model.""" + result_str = self.juju.cli("status", "--format", "json") + status = json.loads(result_str) + units: Set[str] = set() + + if "applications" in status: + for app_data in status["applications"].values(): + if "units" in app_data: + for unit_name in app_data["units"].keys(): + units.add(unit_name) + + return units + + def has_application(self, application_name: str) -> bool: + """Check if an application exists.""" + applications = self.get_applications() + return application_name in applications + + def has_unit(self, unit_name: str) -> bool: + """Check if a unit exists.""" + units = self.get_units() + return unit_name in units + + def wait_for_application(self, application_name: str, timeout: int = 300) -> bool: + """Wait for an application to appear using Jubilant's wait mechanism.""" + if self.has_application(application_name): + logger.info( + "Application '%s' already exists, skipping wait", + application_name, + ) + return True + + def app_exists(status) -> bool: + return hasattr(status, "apps") and application_name in status.apps + + self.juju.wait(app_exists, timeout=timeout, delay=1.0) + return True + + def wait_for_unit(self, unit_name: str, timeout: int = 300) -> bool: + """Wait for a unit to appear using Jubilant's wait mechanism.""" + if self.has_unit(unit_name): + logger.info("Unit '%s' already exists, skipping wait", unit_name) + return True + + def unit_exists(status) -> bool: + if not hasattr(status, "apps"): + return False + for app_data in status.apps.values(): + if hasattr(app_data, "units") and unit_name in app_data.units: + return True + return False + + self.juju.wait(unit_exists, timeout=timeout, delay=1.0) + return True + + def wait_for_application_ready( + self, + application_name: str, + timeout: int = 600, + ) -> bool: + """Wait for an application to be in 'active' state.""" + + def app_active(status) -> bool: + if not hasattr(status, "apps") or application_name not in status.apps: + return False + app = status.apps[application_name] + return hasattr(app, "app_status") and app.app_status.current == "active" + + self.juju.wait(app_active, timeout=timeout, delay=1.0) + return True + + def verify_applications_exist( + self, + expected_applications: List[str], + ) -> Dict[str, bool]: + """Verify that expected applications exist.""" + actual_applications = self.get_applications() + results: Dict[str, bool] = {} + + for app in expected_applications: + results[app] = app in actual_applications + + return results + + def verify_units_exist(self, expected_units: List[str]) -> Dict[str, bool]: + """Verify that expected units exist.""" + actual_units = self.get_units() + results: Dict[str, bool] = {} + + for unit in expected_units: + results[unit] = unit in actual_units + + return results diff --git a/sunbeam-python/tests/functional/feature/utils/sunbeam.py b/sunbeam-python/tests/functional/feature/utils/sunbeam.py new file mode 100644 index 000000000..307b67b99 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/utils/sunbeam.py @@ -0,0 +1,87 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Sunbeam CLI wrapper for feature functional tests.""" + +import logging +import subprocess +from typing import List, Optional + +logger = logging.getLogger(__name__) + + +class SunbeamClient: + """Client for interacting with Sunbeam CLI.""" + + def __init__(self, deployment_name: str): + self.deployment_name = deployment_name + self._sunbeam_cmd = "/snap/bin/sunbeam" + + def _run_command(self, command: List[str]) -> subprocess.CompletedProcess: + """Run a sunbeam command and return the result.""" + full_command = [self._sunbeam_cmd] + command + logger.debug("Running: %s", " ".join(full_command)) + + result = subprocess.run( + full_command, + capture_output=True, + text=True, + check=False, + timeout=1800, + ) + + if result.returncode != 0: + logger.error( + "Command failed with exit code %d: %s", + result.returncode, + " ".join(full_command), + ) + if result.stderr: + logger.error("stderr: %s", result.stderr) + if result.stdout: + logger.error("stdout: %s", result.stdout) + result.check_returncode() + + return result + + def run(self, command: List[str]) -> subprocess.CompletedProcess: + """Public helper to run arbitrary sunbeam subcommands.""" + return self._run_command(command) + + def is_connected(self) -> bool: + """Check if we can connect to the Sunbeam deployment.""" + result = subprocess.run( + ["sunbeam", "deployment", "list"], + capture_output=True, + text=True, + timeout=30, + ) + return result.returncode == 0 and self.deployment_name in result.stdout + + def enable_feature( + self, + feature_name: str, + extra_args: Optional[List[str]] = None, + ) -> bool: + """Enable a Sunbeam feature.""" + cmd: List[str] = ["enable", feature_name] + if extra_args: + cmd.extend(extra_args) + + self._run_command(cmd) + logger.info("Feature '%s' enabled successfully", feature_name) + return True + + def disable_feature( + self, + feature_name: str, + extra_args: Optional[List[str]] = None, + ) -> bool: + """Disable a Sunbeam feature.""" + cmd: List[str] = ["disable", feature_name] + if extra_args: + cmd.extend(extra_args) + + self._run_command(cmd) + logger.info("Feature '%s' disabled successfully", feature_name) + return True diff --git a/sunbeam-python/tox.ini b/sunbeam-python/tox.ini index 8dc81acfa..5666df3e7 100644 --- a/sunbeam-python/tox.ini +++ b/sunbeam-python/tox.ini @@ -1,9 +1,6 @@ [tox] envlist = unit,pep8,mypy skipsdist = True -# Automatic envs (pyXX) will only use the python version appropriate to that -# env and ignore basepython inherited from [testenv] if we set -# ignore_basepython_conflict. ignore_basepython_conflict = True [vars] @@ -25,14 +22,8 @@ setenv = OS_STDOUT_CAPTURE=1 description = Sunbeam unit tests commands = uv run {[vars]uv_flags} python -m pytest -vv tests/unit {posargs} -# The functional tests may have specific hardware requirements and are currently -# skipped by default. [testenv:functional] description = Sunbeam functional tests -# The snap can't access /tmp, we'll need to place manifests and other temporary -# files in the home directory. At the same time, we need to expose USER/LOGNAME, -# otherwise the Sunbeam group won't be initialized correctly and the Sunbeam -# commands will fail due to missing privileges. passenv = USER LOGNAME USERNAME @@ -42,6 +33,18 @@ commands = uv run {[vars]uv_flags} \ --basetemp={env:HOME}/.local/share/openstack/tmp \ {posargs} +[testenv:functional-feature] +description = Sunbeam feature functional tests (existing deployment) +passenv = USER + LOGNAME + USERNAME + HOME +commands = uv run {[vars]uv_flags} \ + python -m pytest -s -vv tests/functional/feature \ + --config=test_config.yaml \ + --basetemp={env:HOME}/.local/share/openstack/tmp \ + {posargs} + [testenv:fmt] description = Apply coding style standards to code deps = @@ -60,9 +63,6 @@ commands = [testenv:mypy] commands = uv run {[vars]uv_flags} mypy {[vars]src_path}/sunbeam - # TODO: consider uncommenting the following line once - # the unit tests pass the mypy check. - # uv run {[vars]uv_flags} mypy {[vars]tst_path}/unit uv run {[vars]uv_flags} mypy {[vars]tst_path}/functional [testenv:cover] @@ -87,7 +87,6 @@ deps = commands = sphinx-build -a -E -W -d doc/build/doctrees -b html doc/source doc/build/html sphinx-build -a -E -W -d doc/build/doctrees -b man doc/source doc/build/man - # Validate redirects (must be done after the docs build whereto doc/build/html/.htaccess doc/test/redirect-tests.txt [testenv:releasenotes] From 0d8b6d0458a13bbdfac5a460bf42ed8c298e1c5f Mon Sep 17 00:00:00 2001 From: Ahmad Hassan Date: Mon, 2 Feb 2026 15:22:04 +0500 Subject: [PATCH 2/5] Refine Sunbeam feature functional tests --- .../tests/functional/feature/README.md | 42 ++++++++++++ .../tests/functional/feature/conftest.py | 7 +- .../functional/feature/features/baremetal.py | 42 ++++++++++++ .../tests/functional/feature/features/base.py | 5 +- .../tests/functional/feature/features/caas.py | 34 ++++++++++ .../tests/functional/feature/features/dns.py | 14 ---- .../feature/features/images_sync.py | 14 ---- .../feature/features/instance_recovery.py | 14 ---- .../tests/functional/feature/features/ldap.py | 40 +++++++++++ .../feature/features/loadbalancer.py | 14 ---- .../feature/features/maintenance.py | 38 +++++++++++ .../feature/features/observability.py | 1 - .../feature/features/orchestration.py | 14 ---- .../tests/functional/feature/features/pro.py | 61 +++++++++++++++++ .../feature/features/resource_optimization.py | 1 - .../functional/feature/features/secrets.py | 57 ++++++++++++++++ .../feature/features/shared_filesystem.py | 1 - .../functional/feature/features/telemetry.py | 14 ---- .../tests/functional/feature/features/tls.py | 37 +--------- .../functional/feature/features/validation.py | 38 +++++++++++ .../functional/feature/features/vault.py | 1 - .../tests/functional/feature/test_features.py | 68 +++++++++++++++++-- .../tests/functional/feature/utils/juju.py | 10 --- .../tests/functional/feature/utils/sunbeam.py | 2 +- 24 files changed, 427 insertions(+), 142 deletions(-) create mode 100644 sunbeam-python/tests/functional/feature/features/baremetal.py create mode 100644 sunbeam-python/tests/functional/feature/features/ldap.py create mode 100644 sunbeam-python/tests/functional/feature/features/maintenance.py create mode 100644 sunbeam-python/tests/functional/feature/features/pro.py create mode 100644 sunbeam-python/tests/functional/feature/features/secrets.py create mode 100644 sunbeam-python/tests/functional/feature/features/validation.py diff --git a/sunbeam-python/tests/functional/feature/README.md b/sunbeam-python/tests/functional/feature/README.md index e6171e0b8..7befa7276 100644 --- a/sunbeam-python/tests/functional/feature/README.md +++ b/sunbeam-python/tests/functional/feature/README.md @@ -58,6 +58,48 @@ You can pass standard `pytest` selectors through tox via `posargs`. For example: tox -e functional-feature -- tests/functional/feature/test_features.py::test_tls_ca ``` +## Feature coverage and dependencies + +### Features in this suite + +- **Enabled in current flow** + - `instance-recovery` + - `caas` (Containers as a Service) + - `dns` + - `images-sync` + - `loadbalancer` + - `resource-optimization` + - `shared-filesystem` + - `telemetry` + - `observability` + - `tls` (CA mode) + - `vault` + - `validation` + - `secrets` + +- **Present but intentionally disabled for now** + - `baremetal` + - `ldap` + - `maintenance` + - `pro` + +### Feature dependencies + +Some features have explicit dependencies: + +- **CaaS (`caas`)** + - Depends on: **`secrets`**, **`loadbalancer`** + - The CaaS test ensures these dependencies are enabled before running. + +- **Secrets as a Service (`secrets`)** + - Depends on: **`vault`** + - The Secrets test ensures the Vault feature is enabled before running. + +- **TLS (Vault-backed)** + - TLS can also be deployed in a Vault-backed mode which implicitly depends on + the **`vault`** feature. This suite currently exercises only the TLS CA + mode (`test_tls_ca`). + ## Notes - Disable failures are **logged and ignored** so that the suite continues diff --git a/sunbeam-python/tests/functional/feature/conftest.py b/sunbeam-python/tests/functional/feature/conftest.py index 9bde47e33..ef5d4cc5e 100644 --- a/sunbeam-python/tests/functional/feature/conftest.py +++ b/sunbeam-python/tests/functional/feature/conftest.py @@ -30,7 +30,12 @@ def test_config(request): config_file = Path(__file__).parent / config_path if not config_file.exists(): - pytest.skip(f"Configuration file not found: {config_file}") + msg = ( + f"Configuration file not found: {config_file}. " + "Copy tests/functional/feature/test_config.yaml.example to " + "tests/functional/feature/test_config.yaml and set sunbeam.deployment_name, juju.model." + ) + pytest.skip(msg) with open(config_file, "r") as f: config = yaml.safe_load(f) diff --git a/sunbeam-python/tests/functional/feature/features/baremetal.py b/sunbeam-python/tests/functional/feature/features/baremetal.py new file mode 100644 index 000000000..57d99bfa8 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/baremetal.py @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for baremetal feature. + +Baremetal provides Ironic-based bare metal provisioning. +Functionality is validated via the Ironic (baremetal) API. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class BaremetalTest(BaseFeatureTest): + """Test baremetal feature enablement/disablement.""" + + feature_name = "baremetal" + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the Baremetal (Ironic) API is reachable.""" + logger.info("Verifying Baremetal (Ironic) service is available...") + try: + subprocess.run( + ["openstack", "baremetal", "driver", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying Baremetal service: %s", exc) + raise AssertionError( + f"Baremetal service verification failed: {exc}" + ) from exc + + logger.info("Baremetal service verified via `openstack baremetal driver list`") diff --git a/sunbeam-python/tests/functional/feature/features/base.py b/sunbeam-python/tests/functional/feature/features/base.py index dcdc7c4b3..87f647c53 100644 --- a/sunbeam-python/tests/functional/feature/features/base.py +++ b/sunbeam-python/tests/functional/feature/features/base.py @@ -19,7 +19,6 @@ class BaseFeatureTest: """Base class for testing Sunbeam features.""" feature_name: str = "" - expected_units: List[str] = [] expected_applications: List[str] = [] timeout_seconds: int = 300 enable_args: List[str] = [] @@ -36,7 +35,6 @@ def __init__( self.config = config or {} feature_config = self.config.get("features", {}).get(self.feature_name, {}) - self.expected_units = feature_config.get("expected_units", self.expected_units) self.expected_applications = feature_config.get( "expected_applications", self.expected_applications, @@ -167,8 +165,11 @@ def verify_validate_feature_behavior(self) -> None: This is a simple method that can be called after enable to verify the feature is working. Override in subclasses for feature-specific checks. + Subclasses can override validate_feature_behavior() for behavior checks; + that is invoked from here before the application presence checks. """ logger.info("Verifying feature '%s' is enabled...", self.feature_name) + self.validate_feature_behavior() if self.expected_applications: for app in self.expected_applications: if self.juju.has_application(app): diff --git a/sunbeam-python/tests/functional/feature/features/caas.py b/sunbeam-python/tests/functional/feature/features/caas.py index 51de55971..02dfa08f2 100644 --- a/sunbeam-python/tests/functional/feature/features/caas.py +++ b/sunbeam-python/tests/functional/feature/features/caas.py @@ -10,6 +10,8 @@ import logging import subprocess +import pytest + from .base import BaseFeatureTest logger = logging.getLogger(__name__) @@ -23,6 +25,24 @@ class CaaSTest(BaseFeatureTest): expected_applications: list[str] = [] timeout_seconds = 600 + def _ensure_dependency_enabled(self, feature: str) -> bool: + """Best-effort enable a required dependency feature. + + If enabling the dependency fails (for example, missing Vault for + Secrets), we treat this as an unsatisfied dependency and skip. + """ + logger.info("Ensuring dependency feature '%s' is enabled for CaaS...", feature) + try: + self.sunbeam.enable_feature(feature) + except Exception as exc: # noqa: BLE001 + logger.warning( + "Failed to enable dependency '%s' required by CaaS: %s", + feature, + exc, + ) + return False + return True + def verify_validate_feature_behavior(self) -> None: """Validate that the Magnum (COE) API is reachable. @@ -47,3 +67,17 @@ def verify_validate_feature_behavior(self) -> None: raise AssertionError(f"CaaS service verification failed: {exc}") from exc logger.info("CaaS (Magnum) service verified via `openstack coe cluster list`") + + def run_full_lifecycle(self) -> bool: + """Ensure dependencies then run the standard enable/verify/disable flow. + + CaaS depends on the Secrets and Load Balancer features. + """ + for dep in ("secrets", "loadbalancer"): + if not self._ensure_dependency_enabled(dep): + pytest.skip( + f"Skipping CaaS feature test: dependency '{dep}' " + "could not be enabled" + ) + + return super().run_full_lifecycle() diff --git a/sunbeam-python/tests/functional/feature/features/dns.py b/sunbeam-python/tests/functional/feature/features/dns.py index 597ab79f1..6aa90802a 100644 --- a/sunbeam-python/tests/functional/feature/features/dns.py +++ b/sunbeam-python/tests/functional/feature/features/dns.py @@ -21,7 +21,6 @@ class DnsTest(BaseFeatureTest): feature_name = "dns" # DNS requires nameservers argument - using dummy values for testing enable_args: list[str] = ["ns1.example.com.", "ns2.example.com."] - expected_units: list[str] = [] expected_applications: list[str] = [] timeout_seconds = 600 @@ -39,16 +38,3 @@ def verify_validate_feature_behavior(self) -> None: raise AssertionError(f"DNS service verification failed: {exc}") from exc logger.info("DNS service endpoints verified via `sunbeam dns address`") - - def run_full_lifecycle(self) -> bool: - """Enable dns, perform basic test, then disable it.""" - logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) - - self.enable() - self.verify_validate_feature_behavior() - - disable_success = self.disable() - if not disable_success: - logger.warning("DNS disable failed, but continuing test sequence") - - return True diff --git a/sunbeam-python/tests/functional/feature/features/images_sync.py b/sunbeam-python/tests/functional/feature/features/images_sync.py index d4480ee7a..288f8fece 100644 --- a/sunbeam-python/tests/functional/feature/features/images_sync.py +++ b/sunbeam-python/tests/functional/feature/features/images_sync.py @@ -19,7 +19,6 @@ class ImagesSyncTest(BaseFeatureTest): """Test images-sync feature enablement/disablement.""" feature_name = "images-sync" - expected_units: list[str] = [] expected_applications: list[str] = [] timeout_seconds = 600 @@ -42,16 +41,3 @@ def verify_validate_feature_behavior(self) -> None: raise AssertionError(f"Image service verification failed: {exc}") from exc logger.info("Image service verified via `openstack image list`") - - def run_full_lifecycle(self) -> bool: - """Enable images-sync, perform basic test, then disable it.""" - logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) - - self.enable() - self.verify_validate_feature_behavior() - - disable_success = self.disable() - if not disable_success: - logger.warning("Images-sync disable failed, but continuing test sequence") - - return True diff --git a/sunbeam-python/tests/functional/feature/features/instance_recovery.py b/sunbeam-python/tests/functional/feature/features/instance_recovery.py index 6110c2614..0f51f60ca 100644 --- a/sunbeam-python/tests/functional/feature/features/instance_recovery.py +++ b/sunbeam-python/tests/functional/feature/features/instance_recovery.py @@ -20,13 +20,6 @@ class InstanceRecoveryTest(BaseFeatureTest): "consul-storage", "consul-tenant", ] - expected_units = [ - "masakari/0", - "masakari-mysql-router/0", - "consul-management/0", - "consul-storage/0", - "consul-tenant/0", - ] timeout_seconds = 900 def validate_feature_behavior(self) -> None: @@ -47,10 +40,3 @@ def validate_feature_behavior(self) -> None: result = subprocess.run(cmd, check=True, capture_output=True, text=True) if not result.stdout.strip(): raise AssertionError("openstack segment list returned no data") - - def run_full_lifecycle(self) -> bool: - """Enable instance-recovery, verify resources and behavior, then disable it.""" - self.enable() - self.validate_feature_behavior() - self.disable() - return True diff --git a/sunbeam-python/tests/functional/feature/features/ldap.py b/sunbeam-python/tests/functional/feature/features/ldap.py new file mode 100644 index 000000000..21aae49d5 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/ldap.py @@ -0,0 +1,40 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for ldap feature. + +LDAP integration configures Keystone to authenticate against LDAP. +Functionality is minimally validated via the Identity API. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class LdapTest(BaseFeatureTest): + """Test ldap feature enablement/disablement.""" + + feature_name = "ldap" + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the Identity API is reachable.""" + logger.info("Verifying Identity (Keystone) service is available for LDAP...") + try: + subprocess.run( + ["openstack", "domain", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying Identity service: %s", exc) + raise AssertionError(f"LDAP feature verification failed: {exc}") from exc + + logger.info("Identity service verified via `openstack domain list`") diff --git a/sunbeam-python/tests/functional/feature/features/loadbalancer.py b/sunbeam-python/tests/functional/feature/features/loadbalancer.py index a7e377f35..1ff3549b9 100644 --- a/sunbeam-python/tests/functional/feature/features/loadbalancer.py +++ b/sunbeam-python/tests/functional/feature/features/loadbalancer.py @@ -20,7 +20,6 @@ class LoadbalancerTest(BaseFeatureTest): feature_name = "loadbalancer" expected_applications: list[str] = ["octavia"] - expected_units: list[str] = [] timeout_seconds = 600 def verify_validate_feature_behavior(self) -> None: @@ -41,16 +40,3 @@ def verify_validate_feature_behavior(self) -> None: except Exception as e: logger.warning("Error checking loadbalancer service: %s", e) raise AssertionError(f"Loadbalancer service verification failed: {e}") - - def run_full_lifecycle(self) -> bool: - """Enable loadbalancer, perform basic test, then disable it.""" - logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) - - self.enable() - self.verify_validate_feature_behavior() - - disable_success = self.disable() - if not disable_success: - logger.warning("Loadbalancer disable failed, but continuing test sequence") - - return True diff --git a/sunbeam-python/tests/functional/feature/features/maintenance.py b/sunbeam-python/tests/functional/feature/features/maintenance.py new file mode 100644 index 000000000..a2eeac6ee --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/maintenance.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for maintenance feature.""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class MaintenanceTest(BaseFeatureTest): + """Test maintenance feature enablement/disablement.""" + + feature_name = "maintenance" + expected_applications: list[str] = [] + timeout_seconds = 600 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the Compute API is reachable.""" + logger.info("Verifying Compute service is available for maintenance...") + try: + subprocess.run( + ["openstack", "compute", "service", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying Compute service: %s", exc) + raise AssertionError( + f"Maintenance feature verification failed: {exc}" + ) from exc + + logger.info("Compute service verified via `openstack compute service list`") diff --git a/sunbeam-python/tests/functional/feature/features/observability.py b/sunbeam-python/tests/functional/feature/features/observability.py index 9bef407d0..9f6366cd3 100644 --- a/sunbeam-python/tests/functional/feature/features/observability.py +++ b/sunbeam-python/tests/functional/feature/features/observability.py @@ -26,7 +26,6 @@ class ObservabilityTest(BaseFeatureTest): feature_name = "observability" enable_args: list[str] = ["embedded"] disable_args: list[str] = ["embedded"] - expected_units: list[str] = [] expected_applications: list[str] = [] timeout_seconds = 900 diff --git a/sunbeam-python/tests/functional/feature/features/orchestration.py b/sunbeam-python/tests/functional/feature/features/orchestration.py index 0f9a502e6..b09601f56 100644 --- a/sunbeam-python/tests/functional/feature/features/orchestration.py +++ b/sunbeam-python/tests/functional/feature/features/orchestration.py @@ -20,7 +20,6 @@ class OrchestrationTest(BaseFeatureTest): feature_name = "orchestration" expected_applications: list[str] = ["heat"] - expected_units: list[str] = [] timeout_seconds = 600 def verify_validate_feature_behavior(self) -> None: @@ -41,16 +40,3 @@ def verify_validate_feature_behavior(self) -> None: except Exception as e: logger.warning("Error checking orchestration service: %s", e) raise AssertionError(f"Orchestration service verification failed: {e}") - - def run_full_lifecycle(self) -> bool: - """Enable orchestration, perform basic test, then disable it.""" - logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) - - self.enable() - self.verify_validate_feature_behavior() - - disable_success = self.disable() - if not disable_success: - logger.warning("Orchestration disable failed, but continuing test sequence") - - return True diff --git a/sunbeam-python/tests/functional/feature/features/pro.py b/sunbeam-python/tests/functional/feature/features/pro.py new file mode 100644 index 000000000..7948bde8f --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/pro.py @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for pro feature. + +Ubuntu Pro integrates subscription/entitlement with the deployment. +Functionality is minimally validated via a generic OpenStack service call. +""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class ProTest(BaseFeatureTest): + """Test pro feature enablement/disablement.""" + + feature_name = "pro" + expected_applications: list[str] = [] + timeout_seconds = 600 + + def __init__(self, *args, **kwargs) -> None: + """Initialise Pro test with a token argument for enable. + + The token is taken from the functional test configuration, if present. + If no token is configured, a dummy placeholder is used. + """ + super().__init__(*args, **kwargs) + pro_cfg = self.config.get("pro", {}) if self.config is not None else {} + token = pro_cfg.get("token", "DUMMY-UBUNTU-PRO-TOKEN") + self.enable_args = ["--token", token] + + def verify_validate_feature_behavior(self) -> None: + """Validate that OpenStack APIs remain reachable under Pro.""" + logger.info("Verifying OpenStack service catalog for Ubuntu Pro...") + try: + result = subprocess.run( + ["openstack", "service", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except subprocess.CalledProcessError as exc: + logger.warning("Failed to list services: %s", exc.stderr) + raise AssertionError( + f"OpenStack service catalog not accessible: {exc.stderr}" + ) from exc + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying OpenStack services: %s", exc) + raise AssertionError( + f"Ubuntu Pro feature verification failed: {exc}" + ) from exc + + if not result.stdout.strip(): + raise AssertionError("Service list returned no data") + + logger.info("OpenStack service catalog verified via `openstack service list`") diff --git a/sunbeam-python/tests/functional/feature/features/resource_optimization.py b/sunbeam-python/tests/functional/feature/features/resource_optimization.py index c6828d22b..8db74bbb2 100644 --- a/sunbeam-python/tests/functional/feature/features/resource_optimization.py +++ b/sunbeam-python/tests/functional/feature/features/resource_optimization.py @@ -19,7 +19,6 @@ class ResourceOptimizationTest(BaseFeatureTest): """Test resource-optimization feature enablement/disablement.""" feature_name = "resource-optimization" - expected_units: list[str] = [] expected_applications: list[str] = [] timeout_seconds = 600 diff --git a/sunbeam-python/tests/functional/feature/features/secrets.py b/sunbeam-python/tests/functional/feature/features/secrets.py new file mode 100644 index 000000000..49fdf12da --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/secrets.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for secrets feature.""" + +import logging +import subprocess + +import pytest + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class SecretsTest(BaseFeatureTest): + """Test secrets feature enablement/disablement.""" + + feature_name = "secrets" + expected_applications: list[str] = [] + timeout_seconds = 600 + + def _ensure_vault_enabled(self) -> bool: + """Ensure the Vault feature is enabled before Secrets.""" + logger.info("Ensuring 'vault' feature is enabled before 'secrets'...") + try: + self.sunbeam.enable_feature("vault") + except Exception as exc: # noqa: BLE001 + logger.warning("Failed to enable required dependency 'vault': %s", exc) + return False + return True + + def verify_validate_feature_behavior(self) -> None: + """Validate that the Secrets (Barbican) API is reachable.""" + logger.info("Verifying Secrets (Barbican) service is available...") + try: + subprocess.run( + ["openstack", "secret", "list"], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying Secrets service: %s", exc) + raise AssertionError(f"Secrets service verification failed: {exc}") from exc + + logger.info("Secrets service verified via `openstack secret list`") + + def run_full_lifecycle(self) -> bool: + """Enable Vault first, then run the Secrets lifecycle.""" + if not self._ensure_vault_enabled(): + pytest.skip( + "Skipping Secrets feature test: dependency 'vault' not available" + ) + + return super().run_full_lifecycle() diff --git a/sunbeam-python/tests/functional/feature/features/shared_filesystem.py b/sunbeam-python/tests/functional/feature/features/shared_filesystem.py index 66117213b..086cdbe41 100644 --- a/sunbeam-python/tests/functional/feature/features/shared_filesystem.py +++ b/sunbeam-python/tests/functional/feature/features/shared_filesystem.py @@ -19,7 +19,6 @@ class SharedFilesystemTest(BaseFeatureTest): """Test shared-filesystem feature enablement/disablement.""" feature_name = "shared-filesystem" - expected_units: list[str] = [] expected_applications: list[str] = [] timeout_seconds = 600 diff --git a/sunbeam-python/tests/functional/feature/features/telemetry.py b/sunbeam-python/tests/functional/feature/features/telemetry.py index 6fbd79b81..191867230 100644 --- a/sunbeam-python/tests/functional/feature/features/telemetry.py +++ b/sunbeam-python/tests/functional/feature/features/telemetry.py @@ -20,7 +20,6 @@ class TelemetryTest(BaseFeatureTest): feature_name = "telemetry" expected_applications: list[str] = ["ceilometer", "gnocchi", "aodh"] - expected_units: list[str] = [] timeout_seconds = 600 def verify_validate_feature_behavior(self) -> None: @@ -42,16 +41,3 @@ def verify_validate_feature_behavior(self) -> None: except Exception as e: logger.warning("Error checking telemetry services: %s", e) raise AssertionError(f"Telemetry service verification failed: {e}") - - def run_full_lifecycle(self) -> bool: - """Enable telemetry, perform basic test, then disable it.""" - logger.info("Starting lifecycle test for feature: '%s'", self.feature_name) - - self.enable() - self.verify_validate_feature_behavior() - - disable_success = self.disable() - if not disable_success: - logger.warning("Telemetry disable failed, but continuing test sequence") - - return True diff --git a/sunbeam-python/tests/functional/feature/features/tls.py b/sunbeam-python/tests/functional/feature/features/tls.py index 17a856292..cfc8775d5 100644 --- a/sunbeam-python/tests/functional/feature/features/tls.py +++ b/sunbeam-python/tests/functional/feature/features/tls.py @@ -1,19 +1,12 @@ # SPDX-FileCopyrightText: 2024 - Canonical Ltd # SPDX-License-Identifier: Apache-2.0 -"""Tests for tls feature (CA mode). - -TLS enablement has multiple methods in Sunbeam, but this functional test -suite only exercises the TLS CA path: - -- TLS CA: `sunbeam enable tls ca` (requires CA certificates) -""" +"""Tests for tls feature (CA mode).""" import base64 import logging import subprocess import tempfile -import time from pathlib import Path from typing import Tuple @@ -95,9 +88,6 @@ class TlsCaTest(BaseFeatureTest): expected_applications = [ "manual-tls-certificates", ] - expected_units = [ - "manual-tls-certificates/0", - ] timeout_seconds = 600 def __init__(self, *args, **kwargs): @@ -133,28 +123,3 @@ def disable(self) -> bool: exc, ) return False - - def _ensure_tls_ca_disabled(self) -> bool: - """Ensure TLS CA is disabled before enabling (cleanup from previous runs).""" - if self.juju.has_application("manual-tls-certificates"): - logger.info("TLS CA is already enabled, disabling first...") - try: - self.disable() - # Wait a bit for cleanup - time.sleep(5) - except Exception as exc: # noqa: BLE001 - logger.warning("Failed to disable existing TLS CA: %s", exc) - return False - return True - - def run_full_lifecycle(self) -> bool: - """Enable TLS CA, perform basic test, then disable it.""" - if not self._ensure_tls_ca_disabled(): - logger.warning("Could not ensure TLS CA is disabled, continuing anyway...") - - self.enable() - disable_success = self.disable() - if not disable_success: - logger.warning("TLS CA disable failed, but continuing test sequence") - - return True diff --git a/sunbeam-python/tests/functional/feature/features/validation.py b/sunbeam-python/tests/functional/feature/features/validation.py new file mode 100644 index 000000000..c752af5f9 --- /dev/null +++ b/sunbeam-python/tests/functional/feature/features/validation.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Test for validation feature.""" + +import logging +import subprocess + +from .base import BaseFeatureTest + +logger = logging.getLogger(__name__) + + +class ValidationTest(BaseFeatureTest): + """Test validation feature enablement/disablement.""" + + feature_name = "validation" + expected_applications: list[str] = [] + timeout_seconds = 900 + + def verify_validate_feature_behavior(self) -> None: + """Validate that the validation CLI is usable.""" + logger.info("Verifying validation feature via `sunbeam validation profiles`...") + try: + subprocess.run( + ["sunbeam", "validation", "profiles"], + capture_output=True, + text=True, + timeout=60, + check=True, + ) + except Exception as exc: # noqa: BLE001 + logger.warning("Error while verifying validation feature: %s", exc) + raise AssertionError( + f"Validation feature verification failed: {exc}" + ) from exc + + logger.info("Validation feature verified via `sunbeam validation profiles`") diff --git a/sunbeam-python/tests/functional/feature/features/vault.py b/sunbeam-python/tests/functional/feature/features/vault.py index 78bd44290..26b71a5d6 100644 --- a/sunbeam-python/tests/functional/feature/features/vault.py +++ b/sunbeam-python/tests/functional/feature/features/vault.py @@ -18,7 +18,6 @@ class VaultTest(BaseFeatureTest): """Test vault feature enablement/disablement.""" feature_name = "vault" - expected_units: list[str] = [] expected_applications: list[str] = [] timeout_seconds = 600 diff --git a/sunbeam-python/tests/functional/feature/test_features.py b/sunbeam-python/tests/functional/feature/test_features.py index 11b748907..d6522f627 100644 --- a/sunbeam-python/tests/functional/feature/test_features.py +++ b/sunbeam-python/tests/functional/feature/test_features.py @@ -11,17 +11,23 @@ import pytest +from .features.baremetal import BaremetalTest from .features.caas import CaaSTest from .features.dns import DnsTest from .features.images_sync import ImagesSyncTest from .features.instance_recovery import InstanceRecoveryTest +from .features.ldap import LdapTest from .features.loadbalancer import LoadbalancerTest +from .features.maintenance import MaintenanceTest from .features.observability import ObservabilityTest from .features.orchestration import OrchestrationTest +from .features.pro import ProTest from .features.resource_optimization import ResourceOptimizationTest +from .features.secrets import SecretsTest from .features.shared_filesystem import SharedFilesystemTest from .features.telemetry import TelemetryTest from .features.tls import TlsCaTest +from .features.validation import ValidationTest from .features.vault import VaultTest logging.basicConfig(level=logging.INFO) @@ -35,6 +41,19 @@ def test_instance_recovery(sunbeam_client, juju_client, test_config): assert feature_test.run_full_lifecycle(), "Instance recovery feature test failed" +@pytest.mark.functional +@pytest.mark.skip( + reason=( + "Baremetal feature test is present but intentionally disabled in the " + "current feature flow (enable later when ready)." + ) +) +def test_baremetal(sunbeam_client, juju_client, test_config): + """Test baremetal feature lifecycle (enable/disable only).""" + feature_test = BaremetalTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Baremetal feature test failed" + + @pytest.mark.functional def test_caas(sunbeam_client, juju_client, test_config): """Test caas feature lifecycle (enable/disable only).""" @@ -56,6 +75,19 @@ def test_images_sync(sunbeam_client, juju_client, test_config): assert feature_test.run_full_lifecycle(), "Images-sync feature test failed" +@pytest.mark.functional +@pytest.mark.skip( + reason=( + "LDAP feature test is present but intentionally disabled in the " + "current feature flow (enable later when ready)." + ) +) +def test_ldap(sunbeam_client, juju_client, test_config): + """Test ldap feature lifecycle (enable/disable only).""" + feature_test = LdapTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "LDAP feature test failed" + + @pytest.mark.functional def test_loadbalancer(sunbeam_client, juju_client, test_config): """Test loadbalancer feature lifecycle (enable/disable only).""" @@ -86,6 +118,13 @@ def test_shared_filesystem(sunbeam_client, juju_client, test_config): assert feature_test.run_full_lifecycle(), "Shared-filesystem feature test failed" +@pytest.mark.functional +def test_secrets(sunbeam_client, juju_client, test_config): + """Test secrets feature lifecycle (enable/disable only).""" + feature_test = SecretsTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Secrets feature test failed" + + @pytest.mark.functional def test_telemetry(sunbeam_client, juju_client, test_config): """Test telemetry feature lifecycle (enable/disable only).""" @@ -101,15 +140,29 @@ def test_observability(sunbeam_client, juju_client, test_config): @pytest.mark.functional +@pytest.mark.skip( + reason=( + "Maintenance feature test is present but intentionally disabled in the " + "current feature flow (enable later when ready)." + ) +) def test_maintenance(sunbeam_client, juju_client, test_config): - """Placeholder for maintenance feature test (not yet enabled).""" - pytest.skip("maintenance feature test not yet enabled in CI") + """Test maintenance feature lifecycle (enable/disable only).""" + feature_test = MaintenanceTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Maintenance feature test failed" @pytest.mark.functional +@pytest.mark.skip( + reason=( + "Pro feature test is present but intentionally disabled in the " + "current feature flow (enable later when ready)." + ) +) def test_pro(sunbeam_client, juju_client, test_config): - """Placeholder for pro feature test (not yet enabled).""" - pytest.skip("pro feature test not yet enabled in CI") + """Test pro feature lifecycle (enable/disable only).""" + feature_test = ProTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Pro feature test failed" @pytest.mark.functional @@ -124,3 +177,10 @@ def test_vault(sunbeam_client, juju_client, test_config): """Test vault feature lifecycle (enable/disable only).""" feature_test = VaultTest(sunbeam_client, juju_client, test_config) assert feature_test.run_full_lifecycle(), "Vault feature test failed" + + +@pytest.mark.functional +def test_validation(sunbeam_client, juju_client, test_config): + """Test validation feature lifecycle (enable/disable only).""" + feature_test = ValidationTest(sunbeam_client, juju_client, test_config) + assert feature_test.run_full_lifecycle(), "Validation feature test failed" diff --git a/sunbeam-python/tests/functional/feature/utils/juju.py b/sunbeam-python/tests/functional/feature/utils/juju.py index 384711460..448e85da2 100644 --- a/sunbeam-python/tests/functional/feature/utils/juju.py +++ b/sunbeam-python/tests/functional/feature/utils/juju.py @@ -133,13 +133,3 @@ def verify_applications_exist( results[app] = app in actual_applications return results - - def verify_units_exist(self, expected_units: List[str]) -> Dict[str, bool]: - """Verify that expected units exist.""" - actual_units = self.get_units() - results: Dict[str, bool] = {} - - for unit in expected_units: - results[unit] = unit in actual_units - - return results diff --git a/sunbeam-python/tests/functional/feature/utils/sunbeam.py b/sunbeam-python/tests/functional/feature/utils/sunbeam.py index 307b67b99..7982f3de4 100644 --- a/sunbeam-python/tests/functional/feature/utils/sunbeam.py +++ b/sunbeam-python/tests/functional/feature/utils/sunbeam.py @@ -27,7 +27,7 @@ def _run_command(self, command: List[str]) -> subprocess.CompletedProcess: capture_output=True, text=True, check=False, - timeout=1800, + timeout=3600, ) if result.returncode != 0: From 7e6cc1eb83c9f965051949cdd045bcfccaa1d24b Mon Sep 17 00:00:00 2001 From: Ahmad Hassan Date: Tue, 3 Feb 2026 15:39:16 +0500 Subject: [PATCH 3/5] Add Chaos Mesh validation resilience tests and shared chaos helpers --- .../tests/functional/chaos/README.md | 38 ++++ .../tests/functional/chaos/__init__.py | 4 + .../tests/functional/chaos/conftest.py | 10 + .../tests/functional/chaos/utils.py | 189 ++++++++++++++++++ .../functional/chaos/validation/__init__.py | 4 + .../test_validation_keystone_chaos.py | 113 +++++++++++ sunbeam-python/tox.ini | 11 + 7 files changed, 369 insertions(+) create mode 100644 sunbeam-python/tests/functional/chaos/README.md create mode 100644 sunbeam-python/tests/functional/chaos/__init__.py create mode 100644 sunbeam-python/tests/functional/chaos/conftest.py create mode 100644 sunbeam-python/tests/functional/chaos/utils.py create mode 100644 sunbeam-python/tests/functional/chaos/validation/__init__.py create mode 100644 sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py diff --git a/sunbeam-python/tests/functional/chaos/README.md b/sunbeam-python/tests/functional/chaos/README.md new file mode 100644 index 000000000..94457816e --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/README.md @@ -0,0 +1,38 @@ +# Chaos Mesh functional tests + +Chaos Mesh-based resilience tests for Canonical OpenStack features. + +This directory is intentionally separate from the standard feature functional +tests under `tests/functional/feature` so that chaos experiments can be run +independently and expanded over time. + +## Prerequisites + +- A working Canonical OpenStack deployment (same requirements as the feature + functional tests). +- `sunbeam`, `openstack` and `juju` CLIs configured for that deployment. +- `kubectl` configured to talk to the Kubernetes cluster that backs the + OpenStack model. +- Chaos Mesh installed and running, typically in the `chaos-mesh` namespace. + +## Layout + +- `validation/`: Chaos tests that target the **validation** feature. + +Additional feature-specific chaos tests can be added as new subdirectories +alongside `validation/`. + +## Running the chaos tests + +From the `sunbeam-python` tree: + +```bash +tox -e functional-chaos +``` + +You can also run individual chaos tests via `pytest`, for example: + +```bash +python -m pytest -s -vv tests/functional/chaos/validation/test_validation_keystone_chaos.py +``` + diff --git a/sunbeam-python/tests/functional/chaos/__init__.py b/sunbeam-python/tests/functional/chaos/__init__.py new file mode 100644 index 000000000..b9a36a80a --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2026 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Chaos Mesh-based functional tests.""" diff --git a/sunbeam-python/tests/functional/chaos/conftest.py b/sunbeam-python/tests/functional/chaos/conftest.py new file mode 100644 index 000000000..a437f9031 --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/conftest.py @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: 2026 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Functional fixtures for chaos tests.""" + +from tests.functional.feature.conftest import ( # noqa: F401 + juju_client, + sunbeam_client, + test_config, +) diff --git a/sunbeam-python/tests/functional/chaos/utils.py b/sunbeam-python/tests/functional/chaos/utils.py new file mode 100644 index 000000000..ce12212ac --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/utils.py @@ -0,0 +1,189 @@ +# SPDX-FileCopyrightText: 2026 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Shared helpers for Chaos Mesh functional tests. + +These utilities centralise common operations so that multiple chaos scenarios +can reuse the same logic for: + +- Enabling Sunbeam features. +- Inspecting Juju status via Jubilant. +- Waiting for units to become active again. +- Applying and deleting Chaos Mesh PodChaos resources. +""" + +from __future__ import annotations + +import logging +import subprocess +import time +from typing import List, Tuple + +import jubilant +import pytest + +logger = logging.getLogger(__name__) + + +def get_leader_and_non_leaders( + juju_client, + app_name: str, +) -> Tuple[str, List[str]]: + """Return (leader_unit_name, [non_leader_unit_names]) for a Juju app.""" + logger.info("Querying Juju status for application '%s' units...", app_name) + + status: jubilant.Status = juju_client.juju.status() + app = status.apps[app_name] + + leader_unit: str | None = None + non_leaders: List[str] = [] + for unit_name, unit_data in app.units.items(): + if getattr(unit_data, "leader", False): + leader_unit = unit_name + else: + non_leaders.append(unit_name) + + if leader_unit is None: + pytest.skip( + f"No leader unit found for application '{app_name}' in Juju status." + ) + + return leader_unit, non_leaders + + +def wait_for_unit_active( + juju_client, + app_name: str, + unit_name: str, + timeout: int = 600, +) -> float: + """Wait until the given Juju unit's workload status is 'active'. + + Returns the time (in seconds) taken for the unit to become active again. + Raises AssertionError if the timeout is exceeded or the app enters error. + """ + logger.info( + "Waiting for unit %s (app '%s') to become active again...", + unit_name, + app_name, + ) + start = time.time() + + try: + juju_client.juju.wait( + lambda status: is_unit_active(status, app_name, unit_name), + error=lambda status: app_has_error(status, app_name), + _timeout=timeout, + _delay=5.0, + ) + except jubilant.WaitError as exc: + raise AssertionError( + f"Application '{app_name}' entered error state while waiting for " + f"{unit_name} to recover." + ) from exc + + elapsed = time.time() - start + logger.info( + "Unit %s (app '%s') is active again after %.1f seconds.", + unit_name, + app_name, + elapsed, + ) + return elapsed + + +def is_unit_active( + status: jubilant.Status, + app_name: str, + unit_name: str, +) -> bool: + """Return True if the given unit's workload status is 'active'.""" + units = status.get_units(app_name) + unit = units.get(unit_name) + if not unit: + return False + workload = getattr(getattr(unit, "workload_status", None), "current", None) + return workload == "active" + + +def app_has_error(status: jubilant.Status, app_name: str) -> bool: + """Return True if any unit in the given app is in error.""" + return jubilant.any_error(status, app_name) + + +def unit_name_to_pod_name(unit_name: str) -> str: + """Map a Juju unit name (e.g. 'keystone/1') to a pod name (e.g. 'keystone-1'). + + For Kubernetes charms, Juju unit names and pod names follow this convention. + """ + return unit_name.replace("/", "-") + + +def pod_chaos_name_for_pod(app_name: str, pod_name: str) -> str: + """Return a deterministic PodChaos name for a given pod.""" + return f"{app_name}-{pod_name}-pod-kill" + + +def apply_pod_chaos_for_pod( + app_namespace: str, + pod_name: str, + chaos_namespace: str = "chaos-mesh", + *, + duration: str = "30s", + action: str = "pod-kill", +) -> str: + """Create a PodChaos resource targeting a single pod. + + Returns the name of the created PodChaos resource. + """ + chaos_name = pod_chaos_name_for_pod(app_namespace, pod_name) + manifest = f""" +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: {chaos_name} + namespace: {chaos_namespace} +spec: + action: {action} + mode: one + duration: "{duration}" + selector: + pods: + {app_namespace}: + - {pod_name} +""".lstrip() + logger.info( + "Applying PodChaos for pod %s in namespace %s (resource: %s)", + pod_name, + app_namespace, + chaos_name, + ) + subprocess.run( + ["kubectl", "apply", "-f", "-"], + input=manifest, + check=True, + capture_output=True, + text=True, + ) + return chaos_name + + +def delete_pod_chaos(chaos_name: str, chaos_namespace: str = "chaos-mesh") -> None: + """Delete a PodChaos resource by name.""" + logger.info( + "Deleting PodChaos resource: %s (namespace: %s)", chaos_name, chaos_namespace + ) + subprocess.run( + [ + "kubectl", + "delete", + "podchaos", + chaos_name, + "-n", + chaos_namespace, + "--ignore-not-found=true", + ], + check=False, + capture_output=True, + text=True, + ) diff --git a/sunbeam-python/tests/functional/chaos/validation/__init__.py b/sunbeam-python/tests/functional/chaos/validation/__init__.py new file mode 100644 index 000000000..15cff7ecb --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/validation/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2026 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Chaos tests for the validation feature.""" diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py new file mode 100644 index 000000000..2b0d9c975 --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py @@ -0,0 +1,113 @@ +# SPDX-FileCopyrightText: 2026 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + + +"""Chaos Mesh tests for the validation feature. + +These tests exercise the validation feature (``sunbeam validation run``) while +Chaos Mesh injects failures into **non-leader** Keystone pods, to assess how +well validation behaves under control plane disruption. +""" + +import logging +import subprocess +from typing import List + +import pytest + +from tests.functional.chaos.utils import ( + apply_pod_chaos_for_pod, + delete_pod_chaos, + get_leader_and_non_leaders, + unit_name_to_pod_name, + wait_for_unit_active, +) + +logger = logging.getLogger(__name__) + +OPENSTACK_NAMESPACE = "openstack" +CHAOS_NAMESPACE = "chaos-mesh" +KEYSTONE_APP = "keystone" + + +@pytest.mark.functional +def test_validation_resilient_to_non_leader_keystone_pod_kills( + sunbeam_client, + juju_client, +) -> None: + """Validation 'smoke' profile should tolerate non-leader Keystone pod kills. + + This test: + + - Ensures the ``validation`` feature is enabled. + - Uses Jubilant status to discover the Keystone leader unit and its + non-leader units in the ``openstack`` model. + - Starts ``sunbeam validation run smoke`` + - While validation is running, sequentially applies Chaos Mesh ``PodChaos`` + resources that kill each **non-leader** Keystone pod in turn, waiting for + each unit to recover to ``workload-status: active``. + - Collects and logs the recovery time for each non-leader unit. + + The expectation is that the validation smoke run completes successfully + despite transient failures of non-leader Keystone pods. + """ + sunbeam_client.enable_feature("validation") + leader_unit, non_leader_units = get_leader_and_non_leaders( + juju_client, + KEYSTONE_APP, + ) + logger.info( + "Keystone leader unit: %s; non-leaders: %s", + leader_unit, + non_leader_units, + ) + + # Start validation smoke tests in the background. + logger.info("Starting 'sunbeam validation run smoke'...") + validation_proc = subprocess.Popen( + ["sunbeam", "validation", "run", "smoke"], + text=True, + ) + + chaos_resources: List[str] = [] + try: + for unit_name in non_leader_units: + pod_name = unit_name_to_pod_name(unit_name) + chaos_name = apply_pod_chaos_for_pod( + OPENSTACK_NAMESPACE, + pod_name, + chaos_namespace=CHAOS_NAMESPACE, + duration="30s", + ) + chaos_resources.append(chaos_name) + + # Wait for the affected unit to become active again. + wait_for_unit_active( + juju_client, + KEYSTONE_APP, + unit_name, + timeout=600, + ) + + # After injecting chaos to all non-leaders, wait for validation to finish. + logger.info("Waiting for validation smoke run to complete...") + try: + return_code = validation_proc.wait(timeout=3600) + except subprocess.TimeoutExpired: + validation_proc.kill() + raise AssertionError( + "sunbeam validation run smoke did not complete within the timeout." + ) + + assert return_code == 0, ( + f"sunbeam validation run smoke failed with exit code {return_code}" + ) + finally: + for chaos_name in chaos_resources: + try: + delete_pod_chaos(chaos_name, chaos_namespace=CHAOS_NAMESPACE) + except Exception as exc: # noqa: BLE001 + logger.warning("Failed to clean up PodChaos %s: %s", chaos_name, exc) + + if validation_proc.poll() is None: + validation_proc.terminate() diff --git a/sunbeam-python/tox.ini b/sunbeam-python/tox.ini index 5666df3e7..2e89e14a8 100644 --- a/sunbeam-python/tox.ini +++ b/sunbeam-python/tox.ini @@ -45,6 +45,17 @@ commands = uv run {[vars]uv_flags} \ --basetemp={env:HOME}/.local/share/openstack/tmp \ {posargs} +[testenv:functional-chaos] +description = Sunbeam Chaos Mesh functional tests +passenv = USER + LOGNAME + USERNAME + HOME +commands = uv run {[vars]uv_flags} \ + python -m pytest -s -vv tests/functional/chaos \ + --basetemp={env:HOME}/.local/share/openstack/tmp \ + {posargs} + [testenv:fmt] description = Apply coding style standards to code deps = From 2b7863619b936e5c77846d525c394ed06edba15a Mon Sep 17 00:00:00 2001 From: Ahmad Hassan Date: Tue, 10 Feb 2026 17:48:11 +0500 Subject: [PATCH 4/5] Add chaos validation suite and fully automated TLS Vault feature flow --- .../tests/functional/chaos/conftest.py | 117 +++++++++++- .../tests/functional/chaos/utils.py | 170 ++++++++++++++++-- .../test_validation_api_pod_chaos.py | 42 +++++ .../test_validation_db_router_chaos.py | 48 +++++ .../validation/test_validation_infra_chaos.py | 36 ++++ .../test_validation_keystone_chaos.py | 79 +------- 6 files changed, 400 insertions(+), 92 deletions(-) create mode 100644 sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py create mode 100644 sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py create mode 100644 sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py diff --git a/sunbeam-python/tests/functional/chaos/conftest.py b/sunbeam-python/tests/functional/chaos/conftest.py index a437f9031..52f2e67c0 100644 --- a/sunbeam-python/tests/functional/chaos/conftest.py +++ b/sunbeam-python/tests/functional/chaos/conftest.py @@ -1,10 +1,115 @@ # SPDX-FileCopyrightText: 2026 - Canonical Ltd # SPDX-License-Identifier: Apache-2.0 -"""Functional fixtures for chaos tests.""" +"""Functional fixtures and config hooks for chaos tests.""" -from tests.functional.feature.conftest import ( # noqa: F401 - juju_client, - sunbeam_client, - test_config, -) +import logging + +import pytest + +from tests.functional.chaos.utils import _kubectl_command +from tests.functional.feature import conftest as feature_conftest # noqa: F401 + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="session", autouse=True) +def ensure_chaos_mesh_installed() -> None: + """Ensure Chaos Mesh is installed and ready for chaos tests. + + This follows the documented Helm installation path, using: + + - sudo snap install helm --classic + - helm repo add chaos-mesh https://charts.chaos-mesh.org + - helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh + """ + import subprocess + + def _has_chaos_mesh() -> bool: + try: + result = subprocess.run( + _kubectl_command(["get", "pods", "-n", "chaos-mesh"]), + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError: + return False + return "chaos-mesh" in result.stdout or "controller-manager" in result.stdout + + if _has_chaos_mesh(): + logger.info("Chaos Mesh already present in namespace 'chaos-mesh'.") + return + + logger.info("Chaos Mesh not detected; attempting to install via Helm...") + + try: + subprocess.run( + ["helm", "version"], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError: + logger.info("Helm not found or not working; installing helm snap...") + subprocess.run( + ["sudo", "snap", "install", "helm", "--classic"], + check=True, + text=True, + ) + + subprocess.run( + ["helm", "repo", "add", "chaos-mesh", "https://charts.chaos-mesh.org"], + check=False, + text=True, + ) + subprocess.run( + ["helm", "repo", "update"], + check=False, + text=True, + ) + + subprocess.run( + [ + "helm", + "upgrade", + "--install", + "chaos-mesh", + "chaos-mesh/chaos-mesh", + "--namespace", + "chaos-mesh", + "--create-namespace", + ], + check=True, + text=True, + ) + + if not _has_chaos_mesh(): + raise RuntimeError( + "Chaos Mesh could not be verified as running in 'chaos-mesh' " + "namespace after attempted installation. " + "Please check Juju/Helm/kubectl connectivity." + ) + + +@pytest.fixture(scope="session", autouse=True) +def ensure_validation_enabled_once( + sunbeam_client, # type: ignore[reportUnusedFunction] +) -> None: + """Enable the validation feature once for all chaos tests. + + Chaos scenarios assume that the validation feature is enabled and they + merely start ``sunbeam validation run smoke`` during fault injection. + """ + import subprocess + + logger.info("Ensuring 'validation' feature is enabled for chaos tests...") + try: + sunbeam_client.enable_feature("validation") + except ( + subprocess.CalledProcessError + ) as exc: # pragma: no cover - environment-specific + logger.warning( + "Validation feature could not be enabled; chaos tests may fail: %s", + exc, + ) diff --git a/sunbeam-python/tests/functional/chaos/utils.py b/sunbeam-python/tests/functional/chaos/utils.py index ce12212ac..197ea8217 100644 --- a/sunbeam-python/tests/functional/chaos/utils.py +++ b/sunbeam-python/tests/functional/chaos/utils.py @@ -17,10 +17,9 @@ import logging import subprocess import time -from typing import List, Tuple +from typing import List, Sequence, Tuple import jubilant -import pytest logger = logging.getLogger(__name__) @@ -44,7 +43,7 @@ def get_leader_and_non_leaders( non_leaders.append(unit_name) if leader_unit is None: - pytest.skip( + raise AssertionError( f"No leader unit found for application '{app_name}' in Juju status." ) @@ -111,6 +110,34 @@ def app_has_error(status: jubilant.Status, app_name: str) -> bool: return jubilant.any_error(status, app_name) +def assert_apps_healthy(juju_client, app_names: List[str]) -> None: + """Assert that the given applications have no units in error. + + If none of the applications are present in the model, this function logs + a warning and returns without failing the test. This allows the same test + suite to run against deployments that may not include all optional apps. + """ + status: jubilant.Status = juju_client.juju.status() + present_apps = [name for name in app_names if name in status.apps] + + if not present_apps: + logger.warning( + "None of the apps %s found in Juju model; " + "skipping health assertion for them.", + app_names, + ) + return + + for app_name in present_apps: + if jubilant.any_error(status, app_name): + raise AssertionError( + f"Application '{app_name}' has units in error state during chaos." + ) + logger.info( + "Application '%s' is healthy during chaos (no units in error).", app_name + ) + + def unit_name_to_pod_name(unit_name: str) -> str: """Map a Juju unit name (e.g. 'keystone/1') to a pod name (e.g. 'keystone-1'). @@ -124,6 +151,28 @@ def pod_chaos_name_for_pod(app_name: str, pod_name: str) -> str: return f"{app_name}-{pod_name}-pod-kill" +def _kubectl_command(args: List[str]) -> List[str]: + """Build a kubectl command suitable for the environment. + + ``juju exec --unit -m -- sudo k8s kubectl ...`` + """ + k8s_unit = "k8s/0" + k8s_model = "openstack-machines" + return [ + "juju", + "exec", + "--unit", + k8s_unit, + "-m", + k8s_model, + "--", + "sudo", + "k8s", + "kubectl", + *args, + ] + + def apply_pod_chaos_for_pod( app_namespace: str, pod_name: str, @@ -159,7 +208,7 @@ def apply_pod_chaos_for_pod( chaos_name, ) subprocess.run( - ["kubectl", "apply", "-f", "-"], + _kubectl_command(["apply", "-f", "-"]), input=manifest, check=True, capture_output=True, @@ -174,16 +223,111 @@ def delete_pod_chaos(chaos_name: str, chaos_namespace: str = "chaos-mesh") -> No "Deleting PodChaos resource: %s (namespace: %s)", chaos_name, chaos_namespace ) subprocess.run( - [ - "kubectl", - "delete", - "podchaos", - chaos_name, - "-n", - chaos_namespace, - "--ignore-not-found=true", - ], + _kubectl_command( + [ + "delete", + "podchaos", + chaos_name, + "-n", + chaos_namespace, + "--ignore-not-found=true", + ] + ), check=False, capture_output=True, text=True, ) + + +def run_validation_with_pod_chaos( + juju_client, + targets: Sequence[tuple[str, List[str]]], + *, + suite_name: str, + openstack_namespace: str = "openstack", + chaos_namespace: str = "chaos-mesh", + validation_timeout: int = 3600, +) -> None: + """Run 'sunbeam validation run smoke' while injecting PodChaos for targets. + + Each entry in ``targets`` is (application_name, dependent_applications). + For each target application, all non-leader units are killed one by one + using PodChaos, and we wait for them to return to active status while + asserting that dependent applications remain healthy. + """ + logger.info( + "Starting 'sunbeam validation run smoke' for %s chaos suite...", + suite_name, + ) + validation_proc = subprocess.Popen( + ["sunbeam", "validation", "run", "smoke"], + text=True, + ) + + chaos_resources: List[str] = [] + try: + for app_name, dependent_apps in targets: + leader_unit, non_leader_units = get_leader_and_non_leaders( + juju_client, + app_name, + ) + + if not non_leader_units: + logger.info( + "Application '%s' has no non-leader units; skipping chaos.", + app_name, + ) + continue + + logger.info( + "%s leader unit: %s; non-leaders: %s", + app_name, + leader_unit, + non_leader_units, + ) + + for unit_name in non_leader_units: + pod_name = unit_name_to_pod_name(unit_name) + chaos_name = apply_pod_chaos_for_pod( + openstack_namespace, + pod_name, + chaos_namespace=chaos_namespace, + duration="30s", + ) + chaos_resources.append(chaos_name) + + wait_for_unit_active( + juju_client, + app_name, + unit_name, + timeout=600, + ) + + if dependent_apps: + assert_apps_healthy(juju_client, dependent_apps) + + logger.info( + "Waiting for validation smoke run to complete after %s chaos suite...", + suite_name, + ) + try: + return_code = validation_proc.wait(timeout=validation_timeout) + except subprocess.TimeoutExpired: + validation_proc.kill() + raise AssertionError( + "sunbeam validation run smoke did not complete within the timeout." + ) + + assert return_code == 0, ( + "sunbeam validation run smoke failed with exit code " + f"{return_code} during {suite_name} chaos suite." + ) + finally: + for chaos_name in chaos_resources: + try: + delete_pod_chaos(chaos_name, chaos_namespace=chaos_namespace) + except Exception as exc: # noqa: BLE001 + logger.warning("Failed to clean up PodChaos %s: %s", chaos_name, exc) + + if validation_proc.poll() is None: + validation_proc.terminate() diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py new file mode 100644 index 000000000..70e698216 --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2026 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Generic API control-plane pod loss chaos tests.""" + +from __future__ import annotations + +import logging + +import pytest + +from tests.functional.chaos.utils import run_validation_with_pod_chaos + +logger = logging.getLogger(__name__) + + +API_TARGETS: list[tuple[str, list[str]]] = [ + ("nova", ["keystone", "traefik-public", "traefik-internal"]), + ("neutron", ["keystone", "traefik-public", "traefik-internal"]), + ("glance", ["keystone", "traefik-public", "traefik-internal"]), + ("cinder-k8s", ["keystone", "traefik-public", "traefik-internal"]), + ("placement", ["keystone", "traefik-public", "traefik-internal"]), + ("aodh", ["keystone", "traefik-public", "traefik-internal"]), + ("ceilometer", ["keystone", "traefik-public", "traefik-internal"]), + ("gnocchi", ["keystone", "traefik-public", "traefik-internal"]), + ("masakari", ["keystone", "traefik-public", "traefik-internal"]), + ("watcher", ["keystone", "traefik-public", "traefik-internal"]), + ("horizon", ["keystone", "traefik-public", "traefik-internal"]), +] + + +@pytest.mark.functional +def test_validation_resilient_to_non_leader_api_pod_kills( + sunbeam_client, + juju_client, +) -> None: + """Validation 'smoke' profile should tolerate non-leader API pod kills.""" + run_validation_with_pod_chaos( + juju_client, + targets=API_TARGETS, + suite_name="API pod", + ) diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py new file mode 100644 index 000000000..13dcae539 --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py @@ -0,0 +1,48 @@ +# SPDX-FileCopyrightText: 2026 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Chaos tests for database access-path degradation (mysql-router pods).""" + +from __future__ import annotations + +import logging + +import pytest + +from tests.functional.chaos.utils import run_validation_with_pod_chaos + +logger = logging.getLogger(__name__) + +DEPENDENT_APPS = ["keystone", "traefik-public", "traefik-internal"] + + +ROUTER_APPS: list[str] = [ + "nova-api-mysql-router", + "nova-cell-mysql-router", + "nova-mysql-router", + "cinder-mysql-router", + "cinder-volume-mysql-router", + "neutron-mysql-router", + "keystone-mysql-router", + "glance-mysql-router", + "placement-mysql-router", + "aodh-mysql-router", + "gnocchi-mysql-router", + "masakari-mysql-router", + "watcher-mysql-router", + "horizon-mysql-router", +] + + +@pytest.mark.functional +def test_validation_resilient_to_mysql_router_pod_kills( + sunbeam_client, + juju_client, +) -> None: + """Validation 'smoke' profile should tolerate mysql-router pod kills.""" + targets = [(app, DEPENDENT_APPS) for app in ROUTER_APPS] + run_validation_with_pod_chaos( + juju_client, + targets=targets, + suite_name="mysql-router", + ) diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py new file mode 100644 index 000000000..78327d3ed --- /dev/null +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2026 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +"""Chaos tests for core infrastructure services (MySQL, RabbitMQ, Traefik).""" + +from __future__ import annotations + +import logging + +import pytest + +from tests.functional.chaos.utils import run_validation_with_pod_chaos + +logger = logging.getLogger(__name__) + + +INFRA_TARGETS: list[tuple[str, list[str]]] = [ + ("mysql", ["keystone", "traefik-public", "traefik-internal"]), + ("rabbitmq", ["keystone", "traefik-public", "traefik-internal"]), + ("traefik-public", ["keystone"]), + ("traefik", ["keystone"]), + ("traefik-rgw", ["keystone"]), +] + + +@pytest.mark.functional +def test_validation_resilient_to_infra_pod_kills( + sunbeam_client, + juju_client, +) -> None: + """Validation 'smoke' profile should tolerate infra pod/unit loss.""" + run_validation_with_pod_chaos( + juju_client, + targets=INFRA_TARGETS, + suite_name="infra", + ) diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py index 2b0d9c975..13b3b42e3 100644 --- a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py @@ -2,32 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 -"""Chaos Mesh tests for the validation feature. - -These tests exercise the validation feature (``sunbeam validation run``) while -Chaos Mesh injects failures into **non-leader** Keystone pods, to assess how -well validation behaves under control plane disruption. -""" +"""Keystone-specific chaos tests for the validation feature.""" import logging -import subprocess -from typing import List import pytest from tests.functional.chaos.utils import ( - apply_pod_chaos_for_pod, - delete_pod_chaos, - get_leader_and_non_leaders, - unit_name_to_pod_name, - wait_for_unit_active, + run_validation_with_pod_chaos, ) logger = logging.getLogger(__name__) -OPENSTACK_NAMESPACE = "openstack" -CHAOS_NAMESPACE = "chaos-mesh" KEYSTONE_APP = "keystone" +TRAEFIK_APPS = ["traefik-public", "traefik-internal"] @pytest.mark.functional @@ -51,63 +39,8 @@ def test_validation_resilient_to_non_leader_keystone_pod_kills( The expectation is that the validation smoke run completes successfully despite transient failures of non-leader Keystone pods. """ - sunbeam_client.enable_feature("validation") - leader_unit, non_leader_units = get_leader_and_non_leaders( + run_validation_with_pod_chaos( juju_client, - KEYSTONE_APP, - ) - logger.info( - "Keystone leader unit: %s; non-leaders: %s", - leader_unit, - non_leader_units, + targets=[(KEYSTONE_APP, TRAEFIK_APPS)], + suite_name="Keystone API", ) - - # Start validation smoke tests in the background. - logger.info("Starting 'sunbeam validation run smoke'...") - validation_proc = subprocess.Popen( - ["sunbeam", "validation", "run", "smoke"], - text=True, - ) - - chaos_resources: List[str] = [] - try: - for unit_name in non_leader_units: - pod_name = unit_name_to_pod_name(unit_name) - chaos_name = apply_pod_chaos_for_pod( - OPENSTACK_NAMESPACE, - pod_name, - chaos_namespace=CHAOS_NAMESPACE, - duration="30s", - ) - chaos_resources.append(chaos_name) - - # Wait for the affected unit to become active again. - wait_for_unit_active( - juju_client, - KEYSTONE_APP, - unit_name, - timeout=600, - ) - - # After injecting chaos to all non-leaders, wait for validation to finish. - logger.info("Waiting for validation smoke run to complete...") - try: - return_code = validation_proc.wait(timeout=3600) - except subprocess.TimeoutExpired: - validation_proc.kill() - raise AssertionError( - "sunbeam validation run smoke did not complete within the timeout." - ) - - assert return_code == 0, ( - f"sunbeam validation run smoke failed with exit code {return_code}" - ) - finally: - for chaos_name in chaos_resources: - try: - delete_pod_chaos(chaos_name, chaos_namespace=CHAOS_NAMESPACE) - except Exception as exc: # noqa: BLE001 - logger.warning("Failed to clean up PodChaos %s: %s", chaos_name, exc) - - if validation_proc.poll() is None: - validation_proc.terminate() From 750aa14758f678abc1d3c0eca6afa88d83e95df7 Mon Sep 17 00:00:00 2001 From: Ahmad Hassan Date: Tue, 17 Feb 2026 13:51:15 +0500 Subject: [PATCH 5/5] Improved reporting metrics, Updated Readme --- .../tests/functional/chaos/README.md | 49 ++- .../tests/functional/chaos/conftest.py | 89 +++-- .../tests/functional/chaos/utils.py | 339 +++++++++++++++--- .../test_validation_api_pod_chaos.py | 31 +- .../test_validation_db_router_chaos.py | 18 +- .../validation/test_validation_infra_chaos.py | 19 +- .../test_validation_keystone_chaos.py | 28 +- 7 files changed, 444 insertions(+), 129 deletions(-) diff --git a/sunbeam-python/tests/functional/chaos/README.md b/sunbeam-python/tests/functional/chaos/README.md index 94457816e..df84d69e1 100644 --- a/sunbeam-python/tests/functional/chaos/README.md +++ b/sunbeam-python/tests/functional/chaos/README.md @@ -2,27 +2,46 @@ Chaos Mesh-based resilience tests for Canonical OpenStack features. -This directory is intentionally separate from the standard feature functional -tests under `tests/functional/feature` so that chaos experiments can be run -independently and expanded over time. +This directory is separate from the feature functional tests under +`tests/functional/feature` so that chaos experiments can be run independently +and expanded over time. ## Prerequisites -- A working Canonical OpenStack deployment (same requirements as the feature - functional tests). +- A working Canonical OpenStack deployment (same as feature functional tests). - `sunbeam`, `openstack` and `juju` CLIs configured for that deployment. -- `kubectl` configured to talk to the Kubernetes cluster that backs the - OpenStack model. -- Chaos Mesh installed and running, typically in the `chaos-mesh` namespace. -## Layout +Session-scoped fixtures automatically: + +- Enable the **validation** feature once per run. +- Install or verify Chaos Mesh (Helm and `kubectl` are run via `juju exec` on + `k8s/0` in the `openstack-machines` model). + +## Run outcome and reports + +Each chaos test run is **SUCCESS** or **FAIL**: + +- **FAIL** if any unit does not return to `active` within the recovery timeout, + or if the post-chaos **quick** validation test fails. +- **SUCCESS** only when all targeted units recover to `active` and the quick + test passes. -- `validation/`: Chaos tests that target the **validation** feature. +A JSON report is written to `tests/functional/chaos/reports/` for each run. +Filenames include the outcome and a timestamp: + +- `SUCCESS__.json` +- `FAIL__.json` + +Reports include test duration, smoke test output/status, per-unit recovery +times and state sequences, and quick test output/status. + +## Layout -Additional feature-specific chaos tests can be added as new subdirectories -alongside `validation/`. +- `validation/`: Chaos tests for the **validation** feature (Keystone, API pods, + DB routers, infra). +- `reports/`: JSON reports from each run. -## Running the chaos tests +## Running the tests From the `sunbeam-python` tree: @@ -30,9 +49,9 @@ From the `sunbeam-python` tree: tox -e functional-chaos ``` -You can also run individual chaos tests via `pytest`, for example: +Or run a single test: ```bash -python -m pytest -s -vv tests/functional/chaos/validation/test_validation_keystone_chaos.py +python -m pytest -s -vv tests/functional/chaos/validation/test_validation_keystone_chaos.py --config tests/functional/feature/test_config.yaml ``` diff --git a/sunbeam-python/tests/functional/chaos/conftest.py b/sunbeam-python/tests/functional/chaos/conftest.py index 52f2e67c0..f5aa33bc6 100644 --- a/sunbeam-python/tests/functional/chaos/conftest.py +++ b/sunbeam-python/tests/functional/chaos/conftest.py @@ -1,14 +1,20 @@ # SPDX-FileCopyrightText: 2026 - Canonical Ltd # SPDX-License-Identifier: Apache-2.0 +# ruff: noqa: I001 """Functional fixtures and config hooks for chaos tests.""" import logging +import subprocess import pytest -from tests.functional.chaos.utils import _kubectl_command -from tests.functional.feature import conftest as feature_conftest # noqa: F401 +from tests.functional.chaos.utils import _helm_command, _kubectl_command +from tests.functional.feature.conftest import ( # noqa: F401 + juju_client, + sunbeam_client as _feature_sunbeam_client, + test_config, +) logger = logging.getLogger(__name__) @@ -23,11 +29,10 @@ def ensure_chaos_mesh_installed() -> None: - helm repo add chaos-mesh https://charts.chaos-mesh.org - helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh """ - import subprocess def _has_chaos_mesh() -> bool: try: - result = subprocess.run( + subprocess.run( _kubectl_command(["get", "pods", "-n", "chaos-mesh"]), check=True, capture_output=True, @@ -35,7 +40,8 @@ def _has_chaos_mesh() -> bool: ) except subprocess.CalledProcessError: return False - return "chaos-mesh" in result.stdout or "controller-manager" in result.stdout + + return True if _has_chaos_mesh(): logger.info("Chaos Mesh already present in namespace 'chaos-mesh'.") @@ -45,64 +51,93 @@ def _has_chaos_mesh() -> bool: try: subprocess.run( - ["helm", "version"], + _helm_command(["version"]), check=True, capture_output=True, text=True, ) except subprocess.CalledProcessError: - logger.info("Helm not found or not working; installing helm snap...") + logger.info( + "Helm not found or not working in k8s/0@openstack-machines; " + "attempting to install helm snap in that unit...", + ) subprocess.run( - ["sudo", "snap", "install", "helm", "--classic"], + [ + "juju", + "exec", + "--unit", + "k8s/0", + "-m", + "openstack-machines", + "--", + "sudo", + "snap", + "install", + "helm", + "--classic", + ], check=True, text=True, ) + # Re-check helm availability; if this fails, surface a clear error. + try: + subprocess.run( + _helm_command(["version"]), + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as exc: # pragma: no cover + msg = ( + "Helm is still not available in k8s/0@openstack-machines after " + "attempted snap installation. Please log into that unit and " + "ensure 'helm' is installed and configured." + ) + raise RuntimeError(msg) from exc subprocess.run( - ["helm", "repo", "add", "chaos-mesh", "https://charts.chaos-mesh.org"], + _helm_command(["repo", "add", "chaos-mesh", "https://charts.chaos-mesh.org"]), check=False, text=True, ) subprocess.run( - ["helm", "repo", "update"], + _helm_command(["repo", "update"]), check=False, text=True, ) subprocess.run( - [ - "helm", - "upgrade", - "--install", - "chaos-mesh", - "chaos-mesh/chaos-mesh", - "--namespace", - "chaos-mesh", - "--create-namespace", - ], + _helm_command( + [ + "upgrade", + "--install", + "chaos-mesh", + "chaos-mesh/chaos-mesh", + "--namespace", + "chaos-mesh", + "--create-namespace", + ], + ), check=True, text=True, ) if not _has_chaos_mesh(): - raise RuntimeError( + logger.warning( "Chaos Mesh could not be verified as running in 'chaos-mesh' " "namespace after attempted installation. " - "Please check Juju/Helm/kubectl connectivity." + "Continuing anyway; PodChaos operations may fail if Chaos Mesh " + "is not fully ready.", ) @pytest.fixture(scope="session", autouse=True) -def ensure_validation_enabled_once( - sunbeam_client, # type: ignore[reportUnusedFunction] -) -> None: +def ensure_validation_enabled_once(sunbeam_client) -> None: """Enable the validation feature once for all chaos tests. Chaos scenarios assume that the validation feature is enabled and they merely start ``sunbeam validation run smoke`` during fault injection. """ - import subprocess - logger.info("Ensuring 'validation' feature is enabled for chaos tests...") try: sunbeam_client.enable_feature("validation") diff --git a/sunbeam-python/tests/functional/chaos/utils.py b/sunbeam-python/tests/functional/chaos/utils.py index 197ea8217..3abf54f1a 100644 --- a/sunbeam-python/tests/functional/chaos/utils.py +++ b/sunbeam-python/tests/functional/chaos/utils.py @@ -14,9 +14,13 @@ from __future__ import annotations +import base64 +import json import logging import subprocess import time +from datetime import datetime, timezone +from pathlib import Path from typing import List, Sequence, Tuple import jubilant @@ -32,7 +36,14 @@ def get_leader_and_non_leaders( logger.info("Querying Juju status for application '%s' units...", app_name) status: jubilant.Status = juju_client.juju.status() - app = status.apps[app_name] + try: + app = status.apps[app_name] + except KeyError as exc: + available_apps = ", ".join(sorted(status.apps.keys())) + raise RuntimeError( + f"Application '{app_name}' not found in Juju status. " + f"Available applications: {available_apps}" + ) from exc leader_unit: str | None = None non_leaders: List[str] = [] @@ -72,8 +83,8 @@ def wait_for_unit_active( juju_client.juju.wait( lambda status: is_unit_active(status, app_name, unit_name), error=lambda status: app_has_error(status, app_name), - _timeout=timeout, - _delay=5.0, + timeout=timeout, + delay=5.0, ) except jubilant.WaitError as exc: raise AssertionError( @@ -91,18 +102,96 @@ def wait_for_unit_active( return elapsed -def is_unit_active( +def run_validation_command( + cmd: List[str], + timeout: int = 600, +) -> Tuple[float, str, bool]: + """Run a validation command (e.g. sunbeam validation run quick). + + Returns (duration_seconds, output, success). + """ + start = time.time() + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + ) + duration = time.time() - start + output = (result.stdout or "") + (result.stderr or "") + return (round(duration, 1), output, result.returncode == 0) + + +def wait_for_unit_active_with_tracking( + juju_client, + app_name: str, + unit_name: str, + timeout: int = 600, + poll_interval: int = 10, +) -> Tuple[float | None, List[dict]]: + """Poll until unit is active or timeout. + + Returns time_to_return_active_seconds (or None) and state_sequence. + + state_sequence: list of {timestamp_iso, state, message} when not active. + """ + state_sequence: List[dict] = [] + poll_start = time.time() + left_active_at: float | None = None + + while (time.time() - poll_start) < timeout: + status = juju_client.juju.status() + current, message = get_unit_workload_status(status, app_name, unit_name) + now = time.time() + ts_iso = datetime.fromtimestamp(now, tz=timezone.utc).isoformat() + + if current != "active": + if left_active_at is None: + left_active_at = now + state_sequence.append( + { + "timestamp": ts_iso, + "state": current, + "message": message, + } + ) + else: + if left_active_at is not None: + return (round(now - left_active_at, 1), state_sequence) + return (0.0, state_sequence) + + time.sleep(poll_interval) + + return (None, state_sequence) + + +def get_unit_workload_status( status: jubilant.Status, app_name: str, unit_name: str, -) -> bool: - """Return True if the given unit's workload status is 'active'.""" +) -> Tuple[str, str]: + """Return (workload_status.current, workload_status.message) for the unit. + + Returns ("unknown", "") if the unit or workload_status is missing. + """ units = status.get_units(app_name) unit = units.get(unit_name) if not unit: - return False - workload = getattr(getattr(unit, "workload_status", None), "current", None) - return workload == "active" + return ("unknown", "") + workload = getattr(unit, "workload_status", None) + current = getattr(workload, "current", None) or "unknown" + message = getattr(workload, "message", None) or "" + return (str(current), str(message)) + + +def is_unit_active( + status: jubilant.Status, + app_name: str, + unit_name: str, +) -> bool: + """Return True if the given unit's workload status is 'active'.""" + current, _ = get_unit_workload_status(status, app_name, unit_name) + return current == "active" def app_has_error(status: jubilant.Status, app_name: str) -> bool: @@ -110,6 +199,23 @@ def app_has_error(status: jubilant.Status, app_name: str) -> bool: return jubilant.any_error(status, app_name) +def get_status_json_for_apps(juju_client, app_names: List[str]) -> dict: + """Return juju status as a dict restricted to the given application names. + + Runs ``juju status --format json`` and returns a structure with only + the requested applications (for use in report snapshots). + """ + try: + raw = juju_client.juju.cli("status", "--format", "json") + full = json.loads(raw) if isinstance(raw, str) else json.loads(raw.decode()) + except (json.JSONDecodeError, TypeError) as exc: + logger.warning("Could not get juju status JSON: %s", exc) + return {"_error": str(exc)} + + apps = full.get("applications") or {} + return {"applications": {name: apps[name] for name in app_names if name in apps}} + + def assert_apps_healthy(juju_client, app_names: List[str]) -> None: """Assert that the given applications have no units in error. @@ -154,7 +260,7 @@ def pod_chaos_name_for_pod(app_name: str, pod_name: str) -> str: def _kubectl_command(args: List[str]) -> List[str]: """Build a kubectl command suitable for the environment. - ``juju exec --unit -m -- sudo k8s kubectl ...`` + ``juju exec --unit -m --stdin -- sudo k8s kubectl ...`` """ k8s_unit = "k8s/0" k8s_model = "openstack-machines" @@ -173,6 +279,27 @@ def _kubectl_command(args: List[str]) -> List[str]: ] +def _helm_command(args: List[str]) -> List[str]: + """Build a helm command targeting the Sunbeam K8s cluster. + + ``juju exec --unit -m -- sudo helm ...`` + """ + k8s_unit = "k8s/0" + k8s_model = "openstack-machines" + return [ + "juju", + "exec", + "--unit", + k8s_unit, + "-m", + k8s_model, + "--", + "sudo", + "helm", + *args, + ] + + def apply_pod_chaos_for_pod( app_namespace: str, pod_name: str, @@ -207,13 +334,40 @@ def apply_pod_chaos_for_pod( app_namespace, chaos_name, ) - subprocess.run( - _kubectl_command(["apply", "-f", "-"]), - input=manifest, - check=True, + + manifest_b64 = base64.b64encode(manifest.encode("utf-8")).decode("ascii") + cmd = [ + "juju", + "exec", + "--unit", + "k8s/0", + "-m", + "openstack-machines", + "--", + "bash", + "-c", + 'echo "$1" | base64 -d | sudo k8s kubectl apply -f -', + "_", + manifest_b64, + ] + result = subprocess.run( + cmd, + check=False, capture_output=True, text=True, ) + if result.returncode != 0: + logger.error( + "Failed to apply PodChaos %s (exit code %s).\nstdout:\n%s\nstderr:\n%s", + chaos_name, + result.returncode, + result.stdout, + result.stderr, + ) + raise RuntimeError( + f"kubectl apply for PodChaos '{chaos_name}' failed with exit code " + f"{result.returncode}: {result.stderr.strip()}" + ) return chaos_name @@ -239,21 +393,35 @@ def delete_pod_chaos(chaos_name: str, chaos_namespace: str = "chaos-mesh") -> No ) -def run_validation_with_pod_chaos( +def _write_chaos_json_report(report_name: str, data: dict) -> Path: + """Write a single JSON report file; name includes timestamp. Returns path.""" + reports_dir = Path(__file__).parent / "reports" + reports_dir.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d_%H-%M-%S") + path = reports_dir / f"{report_name}_{timestamp}.json" + with path.open("w", encoding="utf-8") as f: + json.dump(data, f, indent=2, sort_keys=True) + return path + + +def run_validation_with_pod_chaos( # noqa: C901 juju_client, targets: Sequence[tuple[str, List[str]]], *, suite_name: str, + report_name: str | None = None, openstack_namespace: str = "openstack", chaos_namespace: str = "chaos-mesh", validation_timeout: int = 3600, + initial_delay: int = 60, + recovery_timeout: int = 600, + poll_interval: int = 10, + quick_test_timeout: int = 600, ) -> None: - """Run 'sunbeam validation run smoke' while injecting PodChaos for targets. + """Run validation with PodChaos and optional JSON reporting. - Each entry in ``targets`` is (application_name, dependent_applications). - For each target application, all non-leader units are killed one by one - using PodChaos, and we wait for them to return to active status while - asserting that dependent applications remain healthy. + Smoke runs in parallel with chaos; quick run and JSON report + are executed after chaos when report_name is provided. """ logger.info( "Starting 'sunbeam validation run smoke' for %s chaos suite...", @@ -262,23 +430,35 @@ def run_validation_with_pod_chaos( validation_proc = subprocess.Popen( ["sunbeam", "validation", "run", "smoke"], text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, ) + run_start_monotonic = time.time() + failed_recoveries: List[dict] = [] + apps_in_error: List[dict] = [] + recovery_per_unit: List[dict] = [] + validation_return_code: int | None = None + error_summary: str | None = None + validation_output: str | None = None + smoke_duration: float | None = None + chaos_resources: List[str] = [] + try: + if initial_delay > 0: + logger.info( + "Sleeping %s seconds before starting PodChaos injections to allow " + "Tempest discover-tempest-config/bootstrap to complete.", + initial_delay, + ) + time.sleep(initial_delay) + for app_name, dependent_apps in targets: leader_unit, non_leader_units = get_leader_and_non_leaders( juju_client, app_name, ) - - if not non_leader_units: - logger.info( - "Application '%s' has no non-leader units; skipping chaos.", - app_name, - ) - continue - logger.info( "%s leader unit: %s; non-leaders: %s", app_name, @@ -296,13 +476,40 @@ def run_validation_with_pod_chaos( ) chaos_resources.append(chaos_name) - wait_for_unit_active( - juju_client, - app_name, - unit_name, - timeout=600, + time_to_return_active_seconds, state_sequence = ( + wait_for_unit_active_with_tracking( + juju_client, + app_name, + unit_name, + timeout=recovery_timeout, + poll_interval=poll_interval, + ) ) - + recovery_per_unit.append( + { + "app": app_name, + "unit": unit_name, + "time_to_return_active_seconds": time_to_return_active_seconds, + } + ) + if state_sequence: + apps_in_error.append( + { + "app": app_name, + "unit": unit_name, + "state_sequence": state_sequence, + } + ) + if time_to_return_active_seconds is None: + failed_recoveries.append( + { + "app": app_name, + "unit": unit_name, + "pod": pod_name, + "error": "timeout", + } + ) + break if dependent_apps: assert_apps_healthy(juju_client, dependent_apps) @@ -311,17 +518,69 @@ def run_validation_with_pod_chaos( suite_name, ) try: - return_code = validation_proc.wait(timeout=validation_timeout) + stdout_data, _ = validation_proc.communicate(timeout=validation_timeout) + validation_output = stdout_data or "" + validation_return_code = validation_proc.returncode + smoke_duration = time.time() - run_start_monotonic except subprocess.TimeoutExpired: validation_proc.kill() - raise AssertionError( + stdout_data, _ = validation_proc.communicate() + validation_output = stdout_data or "" + smoke_duration = time.time() - run_start_monotonic + validation_return_code = None + error_summary = ( "sunbeam validation run smoke did not complete within the timeout." ) - assert return_code == 0, ( - "sunbeam validation run smoke failed with exit code " - f"{return_code} during {suite_name} chaos suite." - ) + if report_name: + quick_duration, quick_output, quick_success = run_validation_command( + ["sunbeam", "validation", "run", "quick"], + timeout=quick_test_timeout, + ) + test_duration = time.time() - run_start_monotonic + final_status = "SUCCESS" + if failed_recoveries or not quick_success: + final_status = "FAIL" + report_data = { + "status": final_status, + "test_duration_seconds": round(test_duration, 1), + "smoke_test": { + "duration_seconds": round(smoke_duration or 0, 1), + "output": (validation_output or "")[:10000], + "success": validation_return_code == 0, + }, + "apps_in_error": apps_in_error, + "recovery_per_unit": recovery_per_unit, + "quick_test": { + "duration_seconds": quick_duration, + "output": (quick_output or "")[:10000], + "success": quick_success, + }, + } + report_path = _write_chaos_json_report( + f"{final_status}_{report_name}", + report_data, + ) + logger.info("Chaos report written to %s", report_path) + + if not quick_success: + # Quick validation failure makes the chaos run a FAIL. + raise AssertionError( + "Quick validation test failed after chaos. See reports/." + ) + + if failed_recoveries and error_summary is None: + failed_labels = ", ".join( + f"{fr['app']}/{fr['unit']}" for fr in failed_recoveries + ) + error_summary = ( + f"One or more chaos targets did not recover cleanly: {failed_labels}" + ) + raise AssertionError(error_summary) + except Exception as exc: # noqa: BLE001 + if error_summary is None: + error_summary = repr(exc) + raise finally: for chaos_name in chaos_resources: try: diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py index 70e698216..efeb43863 100644 --- a/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_api_pod_chaos.py @@ -14,18 +14,18 @@ logger = logging.getLogger(__name__) -API_TARGETS: list[tuple[str, list[str]]] = [ - ("nova", ["keystone", "traefik-public", "traefik-internal"]), - ("neutron", ["keystone", "traefik-public", "traefik-internal"]), - ("glance", ["keystone", "traefik-public", "traefik-internal"]), - ("cinder-k8s", ["keystone", "traefik-public", "traefik-internal"]), - ("placement", ["keystone", "traefik-public", "traefik-internal"]), - ("aodh", ["keystone", "traefik-public", "traefik-internal"]), - ("ceilometer", ["keystone", "traefik-public", "traefik-internal"]), - ("gnocchi", ["keystone", "traefik-public", "traefik-internal"]), - ("masakari", ["keystone", "traefik-public", "traefik-internal"]), - ("watcher", ["keystone", "traefik-public", "traefik-internal"]), - ("horizon", ["keystone", "traefik-public", "traefik-internal"]), +API_APPS: list[str] = [ + "nova", + "neutron", + "glance", + "cinder", + "placement", + # "aodh", + # "ceilometer", + # "gnocchi", + # "masakari", + # "watcher", + "horizon", ] @@ -37,6 +37,11 @@ def test_validation_resilient_to_non_leader_api_pod_kills( """Validation 'smoke' profile should tolerate non-leader API pod kills.""" run_validation_with_pod_chaos( juju_client, - targets=API_TARGETS, + targets=[(app, []) for app in API_APPS], suite_name="API pod", + report_name="test_validation_api_pod_chaos", + initial_delay=60, + recovery_timeout=1800, + poll_interval=10, + quick_test_timeout=1800, ) diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py index 13dcae539..797f7f467 100644 --- a/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_db_router_chaos.py @@ -13,8 +13,6 @@ logger = logging.getLogger(__name__) -DEPENDENT_APPS = ["keystone", "traefik-public", "traefik-internal"] - ROUTER_APPS: list[str] = [ "nova-api-mysql-router", @@ -26,10 +24,10 @@ "keystone-mysql-router", "glance-mysql-router", "placement-mysql-router", - "aodh-mysql-router", - "gnocchi-mysql-router", - "masakari-mysql-router", - "watcher-mysql-router", + # "aodh-mysql-router", + # "gnocchi-mysql-router", + # "masakari-mysql-router", + # "watcher-mysql-router", "horizon-mysql-router", ] @@ -40,9 +38,13 @@ def test_validation_resilient_to_mysql_router_pod_kills( juju_client, ) -> None: """Validation 'smoke' profile should tolerate mysql-router pod kills.""" - targets = [(app, DEPENDENT_APPS) for app in ROUTER_APPS] run_validation_with_pod_chaos( juju_client, - targets=targets, + targets=[(app, []) for app in ROUTER_APPS], suite_name="mysql-router", + report_name="test_validation_db_router_chaos", + initial_delay=60, + recovery_timeout=1800, + poll_interval=10, + quick_test_timeout=1800, ) diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py index 78327d3ed..0d5b9f262 100644 --- a/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_infra_chaos.py @@ -14,12 +14,12 @@ logger = logging.getLogger(__name__) -INFRA_TARGETS: list[tuple[str, list[str]]] = [ - ("mysql", ["keystone", "traefik-public", "traefik-internal"]), - ("rabbitmq", ["keystone", "traefik-public", "traefik-internal"]), - ("traefik-public", ["keystone"]), - ("traefik", ["keystone"]), - ("traefik-rgw", ["keystone"]), +INFRA_APPS: list[str] = [ + "mysql", + "rabbitmq", + "traefik-public", + "traefik", + "traefik-rgw", ] @@ -31,6 +31,11 @@ def test_validation_resilient_to_infra_pod_kills( """Validation 'smoke' profile should tolerate infra pod/unit loss.""" run_validation_with_pod_chaos( juju_client, - targets=INFRA_TARGETS, + targets=[(app, []) for app in INFRA_APPS], suite_name="infra", + report_name="test_validation_infra_chaos", + initial_delay=60, + recovery_timeout=1800, + poll_interval=10, + quick_test_timeout=1800, ) diff --git a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py index 13b3b42e3..32fcbf67a 100644 --- a/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py +++ b/sunbeam-python/tests/functional/chaos/validation/test_validation_keystone_chaos.py @@ -4,13 +4,13 @@ """Keystone-specific chaos tests for the validation feature.""" +from __future__ import annotations + import logging import pytest -from tests.functional.chaos.utils import ( - run_validation_with_pod_chaos, -) +from tests.functional.chaos.utils import run_validation_with_pod_chaos logger = logging.getLogger(__name__) @@ -23,24 +23,14 @@ def test_validation_resilient_to_non_leader_keystone_pod_kills( sunbeam_client, juju_client, ) -> None: - """Validation 'smoke' profile should tolerate non-leader Keystone pod kills. - - This test: - - - Ensures the ``validation`` feature is enabled. - - Uses Jubilant status to discover the Keystone leader unit and its - non-leader units in the ``openstack`` model. - - Starts ``sunbeam validation run smoke`` - - While validation is running, sequentially applies Chaos Mesh ``PodChaos`` - resources that kill each **non-leader** Keystone pod in turn, waiting for - each unit to recover to ``workload-status: active``. - - Collects and logs the recovery time for each non-leader unit. - - The expectation is that the validation smoke run completes successfully - despite transient failures of non-leader Keystone pods. - """ + """Run smoke + quick validation around non-leader Keystone pod chaos.""" run_validation_with_pod_chaos( juju_client, targets=[(KEYSTONE_APP, TRAEFIK_APPS)], suite_name="Keystone API", + report_name="test_validation_keystone_chaos", + initial_delay=60, + recovery_timeout=1800, + poll_interval=10, + quick_test_timeout=1800, )