diff --git a/sunbeam-python/sunbeam/steps/cinder_volume.py b/sunbeam-python/sunbeam/steps/cinder_volume.py index bb5db05b1..1e3b41ef3 100644 --- a/sunbeam-python/sunbeam/steps/cinder_volume.py +++ b/sunbeam-python/sunbeam/steps/cinder_volume.py @@ -8,6 +8,7 @@ from sunbeam import versions from sunbeam.clusterd.client import Client from sunbeam.clusterd.service import ( + ClusterServiceUnavailableException, NodeNotExistInClusterException, ) from sunbeam.core.common import BaseStep, Result, ResultType, Role, StepContext @@ -93,6 +94,36 @@ def __init__( self._optional_offers: dict[str, str | None] = {} self.override_tfvars: dict[str, Any] = extra_tfvars or {} + def is_skip(self, context: StepContext) -> Result: + """Skip deploying HA cinder-volume when only non-HA backends exist. + + If all registered storage backends use a non-HA principal application, + the HA cinder-volume deployment is unnecessary and should be skipped. + When no backends are registered yet (initial bootstrap), proceed + normally to deploy the HA application as the default. + """ + try: + backends = self.client.cluster.get_storage_backends() + if backends.root and all( + backend.principal != APPLICATION for backend in backends.root + ): + LOG.debug( + "All registered storage backends are non-HA;" + " skipping HA cinder-volume deployment." + ) + return Result( + ResultType.SKIPPED, + "All registered storage backends use non-HA principal;" + " HA cinder-volume deployment is not needed.", + ) + except ( + ClusterServiceUnavailableException, + NodeNotExistInClusterException, + ) as e: + LOG.debug(f"Failed to check storage backends: {e}") + + return Result(ResultType.COMPLETED) + def get_application_timeout(self) -> int: """Return application timeout in seconds.""" return CINDER_VOLUME_APP_TIMEOUT diff --git a/sunbeam-python/sunbeam/storage/steps.py b/sunbeam-python/sunbeam/storage/steps.py index 601971900..ce4471366 100644 --- a/sunbeam-python/sunbeam/storage/steps.py +++ b/sunbeam-python/sunbeam/storage/steps.py @@ -32,6 +32,7 @@ ) from sunbeam.core.deployment import Deployment, Networks from sunbeam.core.juju import ( + ApplicationNotFoundException, ControllerNotFoundException, ControllerNotReachableException, JujuException, @@ -424,6 +425,29 @@ def run(self, context: StepContext) -> Result: self.backend_name, validated_config, ) + + # If the HA cinder-volume application exists and this backend is + # non-HA, override the principal_application so the backend + # subordinates to the existing HA app. + actual_principal = self.backend_instance.principal_application + if actual_principal != APPLICATION: + try: + self.jhelper.get_application(APPLICATION, self.model) + LOG.debug( + f"Overriding principal application for backend" + f" {self.backend_name!r} from" + f" {actual_principal!r} to {APPLICATION!r}" + ) + backends[backend_key]["principal_application"] = APPLICATION + actual_principal = APPLICATION + except ApplicationNotFoundException: + pass + except JujuException as e: + LOG.debug( + f"Unable to confirm whether HA {APPLICATION} exists" + f" for backend {self.backend_name!r}: {e}" + ) + try: # Update Terraform variables and apply with merged map self.tfhelper.update_tfvars_and_apply_tf( @@ -447,7 +471,7 @@ def run(self, context: StepContext) -> Result: "name": self.backend_name, "backend_type": self.backend_instance.backend_type, "config": validated_config.model_dump(exclude_none=True, by_alias=True), - "principal": self.backend_instance.principal_application, + "principal": actual_principal, "model_uuid": model["model-uuid"], } try: @@ -663,6 +687,34 @@ def is_skip(self, context: StepContext) -> Result: " skipping specific cinder-volume deployment.", ) + # Skip deploying non-HA cinder-volume if the HA cinder-volume + # application already exists in the model. Non-HA backends should + # subordinate to the existing HA application instead. + try: + self.jhelper.get_application(APPLICATION, self.model) + LOG.debug( + "HA cinder-volume application already exists in model %r;" + " skipping non-HA cinder-volume deployment for backend %r.", + self.model, + self.backend_name, + ) + return Result( + ResultType.SKIPPED, + f"HA {APPLICATION} already exists in model {self.model!r};" + f" backend {self.backend_name} will use the existing application.", + ) + except ApplicationNotFoundException: + pass + except JujuException as exc: + LOG.debug( + "Unable to confirm whether HA %s exists in model %r for backend %r;" + " proceeding with specific cinder-volume deployment: %s", + APPLICATION, + self.model, + self.backend_name, + exc, + ) + return Result(ResultType.COMPLETED) def _get_offers(self): diff --git a/sunbeam-python/tests/unit/sunbeam/steps/test_cinder_volume.py b/sunbeam-python/tests/unit/sunbeam/steps/test_cinder_volume.py index 930c10ad9..b2a32394a 100644 --- a/sunbeam-python/tests/unit/sunbeam/steps/test_cinder_volume.py +++ b/sunbeam-python/tests/unit/sunbeam/steps/test_cinder_volume.py @@ -5,7 +5,9 @@ import pytest +from sunbeam.core.common import ResultType from sunbeam.steps.cinder_volume import ( + APPLICATION, CINDER_VOLUME_APP_TIMEOUT, CINDER_VOLUME_UNIT_TIMEOUT, DeployCinderVolumeApplicationStep, @@ -337,6 +339,53 @@ def test_extra_tfvars_telemetry_feature_disabled( deployment_with_tfhelpers, "telemetry" ) + def test_is_skip_no_backends_proceeds( + self, deploy_cinder_volume_step, basic_client, step_context + ): + basic_client.cluster.get_storage_backends.return_value = Mock(root=[]) + result = deploy_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.COMPLETED + + def test_is_skip_all_non_ha_backends_skips( + self, deploy_cinder_volume_step, basic_client, step_context + ): + non_ha_backend = Mock(principal="cinder-volume-noha") + basic_client.cluster.get_storage_backends.return_value = Mock( + root=[non_ha_backend] + ) + result = deploy_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.SKIPPED + + def test_is_skip_ha_backend_exists_proceeds( + self, deploy_cinder_volume_step, basic_client, step_context + ): + ha_backend = Mock(principal=APPLICATION) + basic_client.cluster.get_storage_backends.return_value = Mock(root=[ha_backend]) + result = deploy_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.COMPLETED + + def test_is_skip_mixed_ha_and_non_ha_backends_proceeds( + self, deploy_cinder_volume_step, basic_client, step_context + ): + ha_backend = Mock(principal=APPLICATION) + non_ha_backend = Mock(principal="cinder-volume-noha") + basic_client.cluster.get_storage_backends.return_value = Mock( + root=[ha_backend, non_ha_backend] + ) + result = deploy_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.COMPLETED + + def test_is_skip_exception_proceeds( + self, deploy_cinder_volume_step, basic_client, step_context + ): + from sunbeam.clusterd.service import ClusterServiceUnavailableException + + basic_client.cluster.get_storage_backends.side_effect = ( + ClusterServiceUnavailableException("err") + ) + result = deploy_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.COMPLETED + class TestRemoveCinderVolumeUnitsStep: @pytest.fixture diff --git a/sunbeam-python/tests/unit/sunbeam/storage/test_steps.py b/sunbeam-python/tests/unit/sunbeam/storage/test_steps.py index dbc412f12..cf079ca81 100644 --- a/sunbeam-python/tests/unit/sunbeam/storage/test_steps.py +++ b/sunbeam-python/tests/unit/sunbeam/storage/test_steps.py @@ -7,9 +7,13 @@ import pydantic import pytest +from sunbeam.clusterd.service import StorageBackendNotFoundException +from sunbeam.core.common import ResultType +from sunbeam.core.juju import ApplicationNotFoundException from sunbeam.core.questions import PasswordPromptQuestion, PromptQuestion from sunbeam.storage.models import SecretDictField from sunbeam.storage.steps import ( + BaseStorageBackendDeployStep, DeploySpecificCinderVolumeStep, basemodel_validator, generate_questions_from_config, @@ -356,3 +360,117 @@ def test_run_extra_tfvars_precedence( ] is True ) + + def test_is_skip_no_storage_nodes( + self, deploy_specific_cinder_volume_step, basic_client, step_context + ): + basic_client.cluster.list_nodes_by_role.return_value = [] + result = deploy_specific_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.FAILED + + def test_is_skip_ha_backend_skips( + self, deploy_specific_cinder_volume_step, basic_client, step_context + ): + basic_client.cluster.list_nodes_by_role.return_value = [{"machineid": "0"}] + deploy_specific_cinder_volume_step.backend_instance.principal_application = ( + "cinder-volume" + ) + result = deploy_specific_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.SKIPPED + + def test_is_skip_ha_app_already_exists( + self, + deploy_specific_cinder_volume_step, + basic_client, + basic_jhelper, + step_context, + ): + basic_client.cluster.list_nodes_by_role.return_value = [{"machineid": "0"}] + basic_jhelper.get_application.return_value = Mock() + result = deploy_specific_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.SKIPPED + + def test_is_skip_no_ha_app_proceeds( + self, + deploy_specific_cinder_volume_step, + basic_client, + basic_jhelper, + step_context, + ): + basic_client.cluster.list_nodes_by_role.return_value = [{"machineid": "0"}] + basic_jhelper.get_application.side_effect = ApplicationNotFoundException( + "not found" + ) + result = deploy_specific_cinder_volume_step.is_skip(step_context) + assert result.result_type == ResultType.COMPLETED + + +class TestBaseStorageBackendDeployStepPrincipalOverride: + """Test principal_application override when HA app exists.""" + + @patch("sunbeam.storage.steps.read_config") + @patch("sunbeam.storage.steps.write_answers") + def test_run_overrides_principal_when_ha_exists( + self, + mock_write_answers, + mock_read_config, + basic_deployment, + basic_client, + basic_tfhelper, + basic_jhelper, + basic_manifest, + test_model, + step_context, + ): + mock_read_config.return_value = {} + basic_jhelper.get_model.return_value = {"model-uuid": "test-uuid"} + basic_jhelper.get_application.return_value = Mock() # HA app exists + basic_jhelper.wait_application_ready = Mock() + basic_client.cluster.get_storage_backend.side_effect = ( + StorageBackendNotFoundException("not found") + ) + basic_deployment.reload_tfhelpers = Mock() + + backend_instance = Mock() + backend_instance.principal_application = "cinder-volume-noha" + backend_instance.supports_ha = False + backend_instance.display_name = "Test Backend" + backend_instance.backend_type = "test" + backend_instance.tfvar_config_key = "TerraformVarsStorageBackends" + backend_instance.config_key.return_value = "TestBackendConfig" + backend_instance.config_type.return_value = Mock( + model_validate=Mock( + return_value=Mock(model_dump=Mock(return_value={"san_ip": "1.2.3.4"})) + ) + ) + backend_instance.build_terraform_vars.return_value = { + "principal_application": "cinder-volume-noha", + } + backend_instance.enable_backend = Mock() + + step = BaseStorageBackendDeployStep( + basic_deployment, + basic_client, + basic_tfhelper, + basic_jhelper, + basic_manifest, + {}, + "my-backend", + backend_instance, + test_model, + ) + step.variables = {"san_ip": "1.2.3.4"} + + step.run(step_context) + + # Verify terraform vars have overridden principal + call_args = basic_tfhelper.update_tfvars_and_apply_tf.call_args + tfvars = call_args[1]["override_tfvars"] + assert ( + tfvars["backends"]["my-backend"]["principal_application"] == "cinder-volume" + ) + + # Verify clusterd record uses overridden principal + basic_client.cluster.add_storage_backend.assert_called_once() + add_call = basic_client.cluster.add_storage_backend.call_args + assert add_call[1]["principal"] == "cinder-volume"