diff --git a/sunbeam-python/sunbeam/core/juju.py b/sunbeam-python/sunbeam/core/juju.py index ec91a405e..e1f095f8d 100644 --- a/sunbeam-python/sunbeam/core/juju.py +++ b/sunbeam-python/sunbeam/core/juju.py @@ -1590,6 +1590,44 @@ def charm_trust(self, application_name: str, model: str) -> None: with self._model(model) as juju: juju.trust(application_name, scope="cluster") + def attach_resource( + self, + application_name: str, + model: str, + resource_name: str, + resource_path: str, + ) -> None: + """Upload a file resource to a deployed application. + + :param application_name: Name of the application + :param model: Name of the model + :param resource_name: Name of the resource as defined in the charm metadata + :param resource_path: Local path to the resource file to upload + """ + with self._model(model) as juju: + juju.cli( + "attach-resource", + application_name, + f"{resource_name}={resource_path}", + ) + + def get_application_resources( + self, + application_name: str, + model: str, + ) -> list[dict]: + """Return the resources defined for a deployed application. + + :param application_name: Name of the application + :param model: Name of the model + :returns: List of resource dicts sorted by name, each containing at + minimum the keys ``name``, ``type``, and ``description``. + """ + with self._model(model) as juju: + raw = juju.cli("resources", "--format", "json", application_name) + data = json.loads(raw) + return sorted(data.get("resources", []), key=lambda r: r["name"]) + def charm_refresh( self, application_name: str, diff --git a/sunbeam-python/sunbeam/features/observability/etc/deploy-grafana-agent/main.tf b/sunbeam-python/sunbeam/features/observability/etc/deploy-grafana-agent/main.tf index 1eed9b6c7..8c44ffb97 100644 --- a/sunbeam-python/sunbeam/features/observability/etc/deploy-grafana-agent/main.tf +++ b/sunbeam-python/sunbeam/features/observability/etc/deploy-grafana-agent/main.tf @@ -54,6 +54,21 @@ resource "juju_integration" "observability-agent-integrations" { } } +resource "juju_integration" "observability-agent-integrations-juju-info" { + for_each = toset(var.observability-agent-integration-apps-juju-info) + model_uuid = data.juju_model.principal_application_model.uuid + + application { + name = juju_application.observability-agent.name + endpoint = "juju-info" + } + + application { + name = each.value + endpoint = "juju-info" + } +} + resource "juju_integration" "observability-agent-to-cos-prometheus" { count = var.receive-remote-write-offer-url != null ? 1 : 0 model_uuid = data.juju_model.principal_application_model.uuid diff --git a/sunbeam-python/sunbeam/features/observability/etc/deploy-grafana-agent/variables.tf b/sunbeam-python/sunbeam/features/observability/etc/deploy-grafana-agent/variables.tf index 43793a79b..a55a04ae6 100644 --- a/sunbeam-python/sunbeam/features/observability/etc/deploy-grafana-agent/variables.tf +++ b/sunbeam-python/sunbeam/features/observability/etc/deploy-grafana-agent/variables.tf @@ -4,7 +4,13 @@ # SPDX-License-Identifier: Apache-2.0 variable "observability-agent-integration-apps" { - description = "List of the deployed principal applications that integrate with opentelemetry collector" + description = "List of the deployed principal applications that integrate with opentelemetry collector via cos_agent interface" + type = list(string) + default = [] +} + +variable "observability-agent-integration-apps-juju-info" { + description = "List of the deployed principal applications that integrate with opentelemetry collector via juju-info interface" type = list(string) default = [] } diff --git a/sunbeam-python/sunbeam/features/observability/etc/deploy-hardware-observer/main.tf b/sunbeam-python/sunbeam/features/observability/etc/deploy-hardware-observer/main.tf new file mode 100644 index 000000000..d6fe6c810 --- /dev/null +++ b/sunbeam-python/sunbeam/features/observability/etc/deploy-hardware-observer/main.tf @@ -0,0 +1,62 @@ +# Terraform manifest for deployment of Hardware Observer +# +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +terraform { + required_providers { + juju = { + source = "juju/juju" + version = "= 1.3.1" + } + } +} + +provider "juju" {} + +data "juju_model" "principal_application_model" { + uuid = var.principal-application-model-uuid +} + +resource "juju_application" "hardware-observer" { + name = "hardware-observer" + model_uuid = data.juju_model.principal_application_model.uuid + + charm { + name = "hardware-observer" + channel = var.hardware-observer-channel + revision = var.hardware-observer-revision + base = var.hardware-observer-base + } + + config = var.hardware-observer-config +} + +resource "juju_integration" "hardware-observer-to-observability-agent" { + model_uuid = data.juju_model.principal_application_model.uuid + + application { + name = juju_application.hardware-observer.name + endpoint = "cos-agent" + } + + application { + name = var.observability-agent-app + endpoint = "cos-agent" + } +} + +resource "juju_integration" "hardware-observer-principal-integrations" { + for_each = toset(var.principal-applications) + model_uuid = data.juju_model.principal_application_model.uuid + + application { + name = juju_application.hardware-observer.name + endpoint = "general-info" + } + + application { + name = each.value + endpoint = "juju-info" + } +} diff --git a/sunbeam-python/sunbeam/features/observability/etc/deploy-hardware-observer/variables.tf b/sunbeam-python/sunbeam/features/observability/etc/deploy-hardware-observer/variables.tf new file mode 100644 index 000000000..a0f1bfccc --- /dev/null +++ b/sunbeam-python/sunbeam/features/observability/etc/deploy-hardware-observer/variables.tf @@ -0,0 +1,45 @@ +# Terraform manifest for deployment of Hardware Observer +# +# SPDX-FileCopyrightText: 2024 - Canonical Ltd +# SPDX-License-Identifier: Apache-2.0 + +variable "principal-application-model-uuid" { + description = "UUID of the Juju model principal application is deployed in" + type = string +} + +variable "principal-applications" { + description = "List of the deployed principal applications that hardware-observer integrates with via juju-info" + type = list(string) + default = [] +} + +variable "observability-agent-app" { + description = "Name of the observability agent application to integrate with" + type = string + default = "opentelemetry-collector" +} + +variable "hardware-observer-channel" { + description = "Channel to use when deploying hardware-observer charm" + type = string + default = "latest/stable" +} + +variable "hardware-observer-revision" { + description = "Channel revision to use when deploying hardware-observer charm" + type = number + default = null +} + +variable "hardware-observer-base" { + description = "Base to use when deploying hardware-observer charm" + type = string + default = "ubuntu@24.04" +} + +variable "hardware-observer-config" { + description = "Config to use when deploying hardware-observer charm" + type = map(string) + default = {} +} diff --git a/sunbeam-python/sunbeam/features/observability/feature.py b/sunbeam-python/sunbeam/features/observability/feature.py index 36024b2d2..dff560a21 100644 --- a/sunbeam-python/sunbeam/features/observability/feature.py +++ b/sunbeam-python/sunbeam/features/observability/feature.py @@ -10,6 +10,7 @@ import copy import enum +import json import logging import queue from pathlib import Path @@ -17,6 +18,7 @@ import click from packaging.version import Version from rich.console import Console +from rich.table import Table from sunbeam.clusterd.client import Client from sunbeam.clusterd.service import ( @@ -35,6 +37,7 @@ ResultType, StepContext, convert_proxy_to_model_configs, + get_step_message, read_config, run_plan, update_config, @@ -97,10 +100,12 @@ OBSERVABILITY_AGENT_K8S_DEPLOY_TIMEOUT = 1800 # 30 minutes COS_TFPLAN = "cos-plan" OBSERVABILITY_AGENT_TFPLAN = "grafana-agent-plan" +HARDWARE_OBSERVER_TFPLAN = "hardware-observer-plan" COS_CONFIG_KEY = "TerraformVarsFeatureObservabilityPlanCos" OBSERVABILITY_AGENT_CONFIG_KEY = ( "TerraformVarsFeatureObservabilityPlanObservabilityAgent" ) +HARDWARE_OBSERVER_CONFIG_KEY = "TerraformVarsFeatureObservabilityPlanHardwareObserver" COS_STORAGE_KEY = "ObservabilityStorage" @@ -119,7 +124,13 @@ "prometheus_remote_write", "loki_push_api", ] -INTEGRATION_APPS = ["openstack-hypervisor", "microceph", "k8s"] +OBSERVABILITY_AGNET_INTEGRATION_APPS = ["openstack-hypervisor", "microceph", "k8s"] +OBSERVABILITY_AGENT_APP = "opentelemetry-collector" +MICROOVN_APP = "microovn" +SUNBEAM_MACHINE_APP = "sunbeam-machine" + +HARDWARE_OBSERVER_APP = "hardware-observer" +HARDWARE_OBSERVER_CHANNEL = "latest/stable" class ProviderType(enum.Enum): @@ -383,9 +394,17 @@ def __init__( def run(self, context: StepContext) -> Result: """Execute configuration using terraform.""" + model_status = self.jhelper.get_model_status(self.model) + integration_apps = list(OBSERVABILITY_AGNET_INTEGRATION_APPS) + # network role is optional, only add microovn if it's deployed in the model + if MICROOVN_APP in model_status.apps: + integration_apps.append(MICROOVN_APP) extra_tfvars = { "principal-application-model-uuid": self.jhelper.get_model_uuid(self.model), - "observability-agent-integration-apps": INTEGRATION_APPS, + "observability-agent-integration-apps": integration_apps, + "observability-agent-integration-apps-juju-info": [SUNBEAM_MACHINE_APP], + # Workaround for https://github.com/canonical/opentelemetry-collector-k8s-operator/issues/265 + "opentelemetry-collector-config": {"tls_insecure_skip_verify": True}, } # Offer URLs from COS are added from feature extra_tfvars.update( @@ -421,6 +440,180 @@ def run(self, context: StepContext) -> Result: return Result(ResultType.COMPLETED) +class DeployHardwareObserverStep(BaseStep, JujuStepHelper): + """Deploy Hardware Observer using Terraform.""" + + _CONFIG = HARDWARE_OBSERVER_CONFIG_KEY + + def __init__( + self, + deployment: Deployment, + config: FeatureConfig, + feature: "ObservabilityFeature", + tfhelper: TerraformHelper, + jhelper: JujuHelper, + accepted_app_status: list[str] = ["active", "blocked"], + ): + super().__init__("Deploy Hardware Observer", "Deploy Hardware Observer") + self.deployment = deployment + self.config = config + self.feature = feature + self.tfhelper = tfhelper + self.jhelper = jhelper + self.manifest = self.feature.manifest + self.accepted_app_status = accepted_app_status + self.client = self.deployment.get_client() + self.model = self.deployment.openstack_machines_model + + def run(self, context: StepContext) -> Result: + """Deploy hardware-observer and wait for it to settle.""" + extra_tfvars = { + "principal-application-model-uuid": self.jhelper.get_model_uuid(self.model), + "principal-applications": [SUNBEAM_MACHINE_APP], + "observability-agent-app": OBSERVABILITY_AGENT_APP, + } + + try: + self.update_status(context, "deploying services") + self.tfhelper.update_tfvars_and_apply_tf( + self.client, + self.manifest, + tfvar_config=self._CONFIG, + override_tfvars=extra_tfvars, + reporter=context.reporter, + ) + except (TerraformException, TerraformStateLockedException) as e: + LOG.exception("Error deploying hardware observer") + return Result(ResultType.FAILED, str(e)) + + LOG.debug("Application monitored for readiness: %s", HARDWARE_OBSERVER_APP) + try: + self.jhelper.wait_application_ready( + HARDWARE_OBSERVER_APP, + self.model, + accepted_status=self.accepted_app_status, + timeout=OBSERVABILITY_DEPLOY_TIMEOUT, + ) + except (JujuWaitException, TimeoutError) as e: + LOG.debug("Failed to deploy hardware observer", exc_info=True) + return Result(ResultType.FAILED, str(e)) + + return Result(ResultType.COMPLETED) + + +class RemoveHardwareObserverStep(BaseStep, JujuStepHelper): + """Remove Hardware Observer using Terraform.""" + + _CONFIG = HARDWARE_OBSERVER_CONFIG_KEY + + def __init__( + self, + deployment: Deployment, + feature: "ObservabilityFeature", + tfhelper: TerraformHelper, + jhelper: JujuHelper, + ): + super().__init__("Remove Hardware Observer", "Removing Hardware Observer") + self.deployment = deployment + self.feature = feature + self.tfhelper = tfhelper + self.jhelper = jhelper + self.manifest = self.feature.manifest + self.client = deployment.get_client() + self.model = deployment.openstack_machines_model + + def run(self, context: StepContext) -> Result: + """Destroy hardware-observer terraform plan and wait for app to be gone.""" + try: + self.tfhelper.destroy(reporter=context.reporter) + except TerraformException as e: + LOG.exception("Error destroying hardware observer") + return Result(ResultType.FAILED, str(e)) + + try: + self.jhelper.wait_application_gone( + [HARDWARE_OBSERVER_APP], + self.model, + timeout=OBSERVABILITY_DEPLOY_TIMEOUT, + ) + except TimeoutError as e: + LOG.debug("Failed to destroy hardware observer", exc_info=True) + return Result(ResultType.FAILED, str(e)) + + extra_tfvars = { + "principal-application-model-uuid": self.jhelper.get_model_uuid(self.model), + } + update_config(self.client, self._CONFIG, extra_tfvars) + + return Result(ResultType.COMPLETED) + + +class AttachHardwareObserverResourceStep(BaseStep): + """Attach a file resource to the hardware-observer application.""" + + def __init__( + self, + deployment: Deployment, + jhelper: JujuHelper, + resource_name: str, + resource_path: str, + ): + super().__init__( + "Attach Hardware Observer Resource", + f"Attaching resource {resource_name!r} to {HARDWARE_OBSERVER_APP}", + ) + self.deployment = deployment + self.jhelper = jhelper + self.resource_name = resource_name + self.resource_path = resource_path + self.model = deployment.openstack_machines_model + + def run(self, context: StepContext) -> Result: + """Upload a local file resource to the hardware-observer charm.""" + try: + self.jhelper.attach_resource( + HARDWARE_OBSERVER_APP, + self.model, + self.resource_name, + self.resource_path, + ) + except Exception as e: + LOG.exception("Error attaching resource to hardware observer") + return Result(ResultType.FAILED, str(e)) + + return Result(ResultType.COMPLETED) + + +class ListHardwareObserverResourcesStep(BaseStep): + """List resources defined for the hardware-observer application.""" + + def __init__( + self, + deployment: Deployment, + jhelper: JujuHelper, + ): + super().__init__( + "List Hardware Observer Resources", + f"Listing resources for {HARDWARE_OBSERVER_APP}", + ) + self.deployment = deployment + self.jhelper = jhelper + self.model = deployment.openstack_machines_model + + def run(self, context: StepContext) -> Result: + """Retrieve resource names from the hardware-observer application.""" + try: + resources = self.jhelper.get_application_resources( + HARDWARE_OBSERVER_APP, + self.model, + ) + except Exception as e: + LOG.exception("Error listing resources for hardware observer") + return Result(ResultType.FAILED, str(e)) + + return Result(ResultType.COMPLETED, json.dumps(resources)) + + class RemoveObservabilityStackStep(BaseStep, JujuStepHelper): """Remove Observability Stack using Terraform.""" @@ -700,6 +893,8 @@ def __init__(self) -> None: self.tfplan_observability_agent = OBSERVABILITY_AGENT_TFPLAN self.tfplan_observability_agent_dir = "deploy-grafana-agent" self.tfplan_observability_agent_k8s_dir = "deploy-grafana-agent-k8s" + self.tfplan_hardware_observer = HARDWARE_OBSERVER_TFPLAN + self.tfplan_hardware_observer_dir = "deploy-hardware-observer" self.prometheus_offer_url = "" self.grafana_offer_url = "" @@ -732,6 +927,7 @@ def default_software_overrides(self) -> SoftwareConfig: "opentelemetry-collector-k8s": CharmManifest( channel=OPENTELEMETRY_COLLECTOR_K8S_CHANNEL ), + "hardware-observer": CharmManifest(channel=HARDWARE_OBSERVER_CHANNEL), }, terraform={ self.tfplan_cos: TerraformManifest( @@ -742,6 +938,11 @@ def default_software_overrides(self) -> SoftwareConfig: / "etc" / self.tfplan_observability_agent_dir ), + self.tfplan_hardware_observer: TerraformManifest( + source=Path(__file__).parent + / "etc" + / self.tfplan_hardware_observer_dir + ), }, ) @@ -792,7 +993,16 @@ def manifest_attributes_tfvar_map(self) -> dict: "channel": "opentelemetry-collector-channel", "revision": "opentelemetry-collector-revision", "config": "opentelemetry-collector-config", - } + }, + } + }, + self.tfplan_hardware_observer: { + "charms": { + "hardware-observer": { + "channel": "hardware-observer-channel", + "revision": "hardware-observer-revision", + "config": "hardware-observer-config", + }, } }, self.tfplan: { @@ -829,6 +1039,8 @@ def set_tfvars_on_enable( """Set terraform variables to enable the application.""" tfvars = { "enable-observability": True, + # Workaround for https://github.com/canonical/opentelemetry-collector-k9s-operator/issues/265 + "opentelemetry-collector-config": {"tls_insecure_skip_verify": True}, } tfvars.update(self.get_cos_offer_urls(deployment)) return tfvars @@ -943,6 +1155,68 @@ def post_disable(self, deployment: Deployment, show_hints: bool) -> None: # ) # self.disable_feature(deployment, FeatureConfig()) + @click.command() + @click.argument("resource-name", type=str) + @click.argument("resource-path", type=click.Path(exists=True, dir_okay=False)) + @pass_method_obj + def attach_resource( + self, deployment: Deployment, resource_name: str, resource_path: str + ) -> None: + """Attach a file resource to the hardware-observer charm. + + RESOURCE_NAME is the name of the resource. + + RESOURCE_PATH is the local path to the resource file to attach. + + Use the `sunbeam observability list-resources` command to see the available + resource names. + """ + jhelper = JujuHelper(deployment.juju_controller) + + list_plan = [ListHardwareObserverResourcesStep(deployment, jhelper)] + list_results = run_plan(list_plan, console) + raw = get_step_message(list_results, ListHardwareObserverResourcesStep) + if raw is None: + raise click.ClickException("Failed to retrieve resource list.") + valid_names = [r["name"] for r in json.loads(raw)] + if resource_name not in valid_names: + raise click.ClickException( + f"Unknown resource {resource_name!r}. " + f"Use the `sunbeam observability list-resources` command " + "to see valid names." + ) + + plan = [ + AttachHardwareObserverResourceStep( + deployment, jhelper, resource_name, resource_path + ) + ] + run_plan(plan, console) + click.echo(f"Resource {resource_name!r} attached to {HARDWARE_OBSERVER_APP}.") + + @click.command() + @pass_method_obj + def list_resources(self, deployment: Deployment) -> None: + """List available resource names for the hardware-observer charm.""" + jhelper = JujuHelper(deployment.juju_controller) + plan = [ListHardwareObserverResourcesStep(deployment, jhelper)] + plan_results = run_plan(plan, console) + result = get_step_message(plan_results, ListHardwareObserverResourcesStep) + if result is None: + raise click.ClickException("Failed to retrieve resource list.") + resources = json.loads(result) + table = Table(title=f"{HARDWARE_OBSERVER_APP} resources") + table.add_column("Name") + table.add_column("Type") + table.add_column("Description") + for r in resources: + table.add_row( + r.get("name", ""), + r.get("type", ""), + r.get("description", ""), + ) + console.print(table) + @click.group() def observability_group(self): """Manage Observability.""" @@ -999,6 +1273,9 @@ def run_enable_plans( tfhelper_observability_agent = deployment.get_tfhelper( self.tfplan_observability_agent ) + tfhelper_hardware_observer = deployment.get_tfhelper( + self.tfplan_hardware_observer + ) client = deployment.get_client() plan = [] @@ -1035,10 +1312,18 @@ def run_enable_plans( ), ] + hardware_observer_plan = [ + TerraformInitStep(tfhelper_hardware_observer), + DeployHardwareObserverStep( + deployment, config, self, tfhelper_hardware_observer, jhelper + ), + ] + run_plan(plan, console, show_hints) run_plan(cos_plan, console, show_hints) run_plan(observability_agent_k8s_plan, console, show_hints) run_plan(observability_agent_plan, console, show_hints) + run_plan(hardware_observer_plan, console, show_hints) click.echo("Observability enabled.") @@ -1050,6 +1335,9 @@ def run_disable_plans(self, deployment: Deployment, show_hints: bool): tfhelper_observability_agent = deployment.get_tfhelper( self.tfplan_observability_agent ) + tfhelper_hardware_observer = deployment.get_tfhelper( + self.tfplan_hardware_observer + ) observability_agent_k8s_plan = [ TerraformInitStep(tfhelper), @@ -1059,6 +1347,13 @@ def run_disable_plans(self, deployment: Deployment, show_hints: bool): ), ] + hardware_observer_plan = [ + TerraformInitStep(tfhelper_hardware_observer), + RemoveHardwareObserverStep( + deployment, self, tfhelper_hardware_observer, jhelper + ), + ] + observability_agent_plan = [ TerraformInitStep(tfhelper_observability_agent), RemoveObservabilityAgentStep( @@ -1077,6 +1372,7 @@ def run_disable_plans(self, deployment: Deployment, show_hints: bool): ] run_plan(observability_agent_k8s_plan, console, show_hints) + run_plan(hardware_observer_plan, console, show_hints) run_plan(observability_agent_plan, console, show_hints) run_plan(cos_plan, console, show_hints) @@ -1136,7 +1432,9 @@ def enabled_commands(self) -> dict[str, list[dict]]: return { "init": [{"name": "observability", "command": self.observability_group}], "init.observability": [ - {"name": "dashboard-url", "command": self.dashboard_url} + {"name": "dashboard-url", "command": self.dashboard_url}, + {"name": "attach-resource", "command": self.attach_resource}, + {"name": "list-resources", "command": self.list_resources}, ], } @@ -1170,6 +1468,9 @@ def run_enable_plans( tfhelper_observability_agent = deployment.get_tfhelper( self.tfplan_observability_agent ) + tfhelper_hardware_observer = deployment.get_tfhelper( + self.tfplan_hardware_observer + ) client = deployment.get_client() plan = [] @@ -1200,6 +1501,13 @@ def run_enable_plans( ), ] + hardware_observer_plan = [ + TerraformInitStep(tfhelper_hardware_observer), + DeployHardwareObserverStep( + deployment, config, self, tfhelper_hardware_observer, jhelper + ), + ] + # Workaround as integrations are not handled in terraform plan # https://github.com/juju/terraform-provider-juju/issues/119 observability_integrations_plan = [ @@ -1209,6 +1517,7 @@ def run_enable_plans( run_plan(plan, console, show_hints) run_plan(observability_agent_k8s_plan, console, show_hints) run_plan(observability_agent_plan, console, show_hints) + run_plan(hardware_observer_plan, console, show_hints) run_plan(observability_integrations_plan, console, show_hints) click.echo("Observability enabled.") @@ -1220,6 +1529,9 @@ def run_disable_plans(self, deployment: Deployment, show_hints: bool): tfhelper_observability_agent = deployment.get_tfhelper( self.tfplan_observability_agent ) + tfhelper_hardware_observer = deployment.get_tfhelper( + self.tfplan_hardware_observer + ) # Workaround as integrations are not handled in terraform plan # https://github.com/juju/terraform-provider-juju/issues/119 @@ -1237,6 +1549,13 @@ def run_disable_plans(self, deployment: Deployment, show_hints: bool): ), ] + hardware_observer_plan = [ + TerraformInitStep(tfhelper_hardware_observer), + RemoveHardwareObserverStep( + deployment, self, tfhelper_hardware_observer, jhelper + ), + ] + grafana_agent_plan = [ TerraformInitStep(tfhelper_observability_agent), RemoveObservabilityAgentStep( @@ -1251,6 +1570,7 @@ def run_disable_plans(self, deployment: Deployment, show_hints: bool): run_plan(observability_remove_offers_plan, console, show_hints) run_plan(observability_agent_k8s_plan, console, show_hints) + run_plan(hardware_observer_plan, console, show_hints) run_plan(grafana_agent_plan, console, show_hints) click.echo("Observability disabled.") @@ -1307,6 +1627,19 @@ def get_provider_type(self) -> ProviderType: """Return provide type external or embedded.""" return ProviderType.EXTERNAL + def enabled_commands(self) -> dict[str, list[dict]]: + """Dict of clickgroup along with commands. + + Return the commands available once the feature is enabled. + """ + return { + "init": [{"name": "observability", "command": self.observability_group}], + "init.observability": [ + {"name": "attach-resource", "command": self.attach_resource}, + {"name": "list-resources", "command": self.list_resources}, + ], + } + def get_cos_offer_urls(self, deployment: Deployment) -> dict: """Return COS offer URLs.""" # Returning empty dict as integrations are not handled in terraform plan diff --git a/sunbeam-python/tests/unit/sunbeam/features/test_observability.py b/sunbeam-python/tests/unit/sunbeam/features/test_observability.py index aaf4efaf5..748f92790 100644 --- a/sunbeam-python/tests/unit/sunbeam/features/test_observability.py +++ b/sunbeam-python/tests/unit/sunbeam/features/test_observability.py @@ -1,8 +1,10 @@ # SPDX-FileCopyrightText: 2023 - Canonical Ltd # SPDX-License-Identifier: Apache-2.0 +import json from unittest.mock import MagicMock, Mock, patch +import click import pytest from sunbeam.clusterd.service import ConfigItemNotFoundException @@ -261,12 +263,128 @@ def test_run_waiting_timed_out( ) result = step.run(step_context) - tfhelper.destroy.assert_called_once() - jhelper.wait_model_gone.assert_called_once() assert result.result_type == ResultType.FAILED assert result.message == "timed out" +class TestAttachHardwareObserverResourceStep: + def test_run(self, deployment, jhelper, step_context): + """Happy path: attach_resource called with correct args.""" + step = observability_feature.AttachHardwareObserverResourceStep( + deployment, jhelper, "firmware", "/tmp/firmware.bin" + ) + result = step.run(step_context) + + jhelper.attach_resource.assert_called_once_with( + observability_feature.HARDWARE_OBSERVER_APP, + deployment.openstack_machines_model, + "firmware", + "/tmp/firmware.bin", + ) + assert result.result_type == ResultType.COMPLETED + + def test_run_attach_failed(self, deployment, jhelper, step_context): + """Exception from attach_resource returns FAILED.""" + jhelper.attach_resource.side_effect = Exception("attach failed") + + step = observability_feature.AttachHardwareObserverResourceStep( + deployment, jhelper, "firmware", "/tmp/firmware.bin" + ) + result = step.run(step_context) + + jhelper.attach_resource.assert_called_once() + assert result.result_type == ResultType.FAILED + assert result.message == "attach failed" + + +class TestListHardwareObserverResourcesStep: + def test_run(self, deployment, jhelper, step_context): + """Happy path: returns JSON-encoded list of resource dicts sorted by name.""" + resources = [ + {"name": "firmware", "type": "file", "description": "Firmware binary"}, + {"name": "storcli-amd64", "type": "file", "description": "StorCLI tool"}, + ] + jhelper.get_application_resources.return_value = resources + + step = observability_feature.ListHardwareObserverResourcesStep( + deployment, jhelper + ) + result = step.run(step_context) + + jhelper.get_application_resources.assert_called_once_with( + observability_feature.HARDWARE_OBSERVER_APP, + deployment.openstack_machines_model, + ) + assert result.result_type == ResultType.COMPLETED + assert json.loads(result.message) == resources + + def test_run_failed(self, deployment, jhelper, step_context): + """Exception from get_application_resources returns FAILED.""" + jhelper.get_application_resources.side_effect = Exception("list failed") + + step = observability_feature.ListHardwareObserverResourcesStep( + deployment, jhelper + ) + result = step.run(step_context) + + assert result.result_type == ResultType.FAILED + assert result.message == "list failed" + + +class TestAttachResourceCommand: + """Tests for the attach_resource CLI command (resource name validation).""" + + def _resources_json(self, names): + return json.dumps( + [{"name": n, "type": "file", "description": ""} for n in names] + ) + + def _make_feature(self): + return observability_feature.EmbeddedObservabilityFeature.__new__( + observability_feature.EmbeddedObservabilityFeature + ) + + def _call_attach(self, feature, deployment, resource_name, resource_path): + """Invoke attach_resource callback with an active Click context.""" + cmd = observability_feature.ObservabilityFeature.attach_resource + with click.Context(cmd, obj=deployment): + return feature.attach_resource.callback( + feature, resource_name, resource_path + ) + + def test_invalid_resource_name_raises( + self, deployment, run_plan_obs, juju_helper_obs + ): + """attach_resource raises ClickException when name is not in the list.""" + with patch( + "sunbeam.features.observability.feature.get_step_message", + return_value=self._resources_json(["firmware", "storcli-amd64"]), + ): + with pytest.raises(click.ClickException) as exc_info: + self._call_attach( + self._make_feature(), deployment, "bad-resource", "/tmp/file.bin" + ) + + msg = exc_info.value.format_message() + assert "bad-resource" in msg + assert "list-resources" in msg + + def test_valid_resource_name_proceeds( + self, deployment, run_plan_obs, juju_helper_obs + ): + """attach_resource proceeds to attach when name is valid.""" + with patch( + "sunbeam.features.observability.feature.get_step_message", + return_value=self._resources_json(["firmware"]), + ): + self._call_attach( + self._make_feature(), deployment, "firmware", "/tmp/file.bin" + ) + + # Two run_plan calls: one for list, one for attach + assert run_plan_obs.call_count == 2 + + class TestDeployObservabilityAgentStep: def test_run( self, deployment, tfhelper, jhelper, observabilityfeature, step_context @@ -280,6 +398,49 @@ def test_run( jhelper.wait_application_ready.assert_called_once() assert result.result_type == ResultType.COMPLETED + def test_run_includes_microovn_when_present( + self, deployment, tfhelper, jhelper, observabilityfeature, step_context + ): + """Microovn is added to integration-apps when it exists in the model.""" + jhelper.get_model_status.return_value = Mock( + apps={ + "openstack-hypervisor": Mock(), + "microovn": Mock(), + } + ) + + step = observability_feature.DeployObservabilityAgentStep( + deployment, Mock(), observabilityfeature, tfhelper, jhelper + ) + step.run(step_context) + + call_kwargs = tfhelper.update_tfvars_and_apply_tf.call_args.kwargs + integration_apps = call_kwargs["override_tfvars"][ + "observability-agent-integration-apps" + ] + assert "microovn" in integration_apps + + def test_run_excludes_microovn_when_absent( + self, deployment, tfhelper, jhelper, observabilityfeature, step_context + ): + """Microovn is not added when it does not exist in the model.""" + jhelper.get_model_status.return_value = Mock( + apps={ + "openstack-hypervisor": Mock(), + } + ) + + step = observability_feature.DeployObservabilityAgentStep( + deployment, Mock(), observabilityfeature, tfhelper, jhelper + ) + step.run(step_context) + + call_kwargs = tfhelper.update_tfvars_and_apply_tf.call_args.kwargs + integration_apps = call_kwargs["override_tfvars"][ + "observability-agent-integration-apps" + ] + assert "microovn" not in integration_apps + def test_run_tf_apply_failed( self, deployment, @@ -694,3 +855,121 @@ def test_post_enable_handles_grant_failure_gracefully( feature.post_enable(deployment, MagicMock(), show_hints=False) assert run_plan_obs.call_count == 3 + + +class TestDeployHardwareObserverStep: + def test_run( + self, deployment, tfhelper, jhelper, observabilityfeature, step_context + ): + """Happy path: terraform applies against sunbeam-machine, wait succeeds.""" + step = observability_feature.DeployHardwareObserverStep( + deployment, Mock(), observabilityfeature, tfhelper, jhelper + ) + result = step.run(step_context) + + tfhelper.update_tfvars_and_apply_tf.assert_called_once() + override = tfhelper.update_tfvars_and_apply_tf.call_args.kwargs[ + "override_tfvars" + ] + assert override["principal-applications"] == ["sunbeam-machine"] + jhelper.wait_application_ready.assert_called_once() + assert result.result_type == ResultType.COMPLETED + + def test_run_tf_apply_failed( + self, deployment, tfhelper, jhelper, observabilityfeature, step_context + ): + """Terraform failure returns FAILED without waiting.""" + tfhelper.update_tfvars_and_apply_tf.side_effect = TerraformException( + "apply failed..." + ) + + step = observability_feature.DeployHardwareObserverStep( + deployment, Mock(), observabilityfeature, tfhelper, jhelper + ) + result = step.run(step_context) + + tfhelper.update_tfvars_and_apply_tf.assert_called_once() + jhelper.wait_application_ready.assert_not_called() + assert result.result_type == ResultType.FAILED + assert result.message == "apply failed..." + + def test_run_waiting_timed_out( + self, deployment, tfhelper, jhelper, observabilityfeature, step_context + ): + """Timeout waiting for hardware-observer returns FAILED.""" + jhelper.wait_application_ready.side_effect = TimeoutError("timed out") + + step = observability_feature.DeployHardwareObserverStep( + deployment, Mock(), observabilityfeature, tfhelper, jhelper + ) + result = step.run(step_context) + + tfhelper.update_tfvars_and_apply_tf.assert_called_once() + jhelper.wait_application_ready.assert_called_once() + assert result.result_type == ResultType.FAILED + assert result.message == "timed out" + + def test_run_accepted_status_includes_blocked( + self, deployment, tfhelper, jhelper, observabilityfeature, step_context + ): + """Default accepted_app_status allows blocked so the step does not fail.""" + step = observability_feature.DeployHardwareObserverStep( + deployment, Mock(), observabilityfeature, tfhelper, jhelper + ) + assert "blocked" in step.accepted_app_status + assert "active" in step.accepted_app_status + + +class TestRemoveHardwareObserverStep: + def test_run( + self, + deployment, + tfhelper, + jhelper, + observabilityfeature, + update_config, + step_context, + ): + """Happy path: destroy succeeds, app gone, config cleared.""" + step = observability_feature.RemoveHardwareObserverStep( + deployment, observabilityfeature, tfhelper, jhelper + ) + result = step.run(step_context) + + tfhelper.destroy.assert_called_once() + jhelper.wait_application_gone.assert_called_once() + waited_apps = jhelper.wait_application_gone.call_args.args[0] + assert waited_apps == ["hardware-observer"] + assert result.result_type == ResultType.COMPLETED + + def test_run_tf_destroy_failed( + self, deployment, tfhelper, jhelper, observabilityfeature, step_context + ): + """Terraform destroy failure returns FAILED without waiting.""" + tfhelper.destroy.side_effect = TerraformException("destroy failed...") + + step = observability_feature.RemoveHardwareObserverStep( + deployment, observabilityfeature, tfhelper, jhelper + ) + result = step.run(step_context) + + tfhelper.destroy.assert_called_once() + jhelper.wait_application_gone.assert_not_called() + assert result.result_type == ResultType.FAILED + assert result.message == "destroy failed..." + + def test_run_waiting_timed_out( + self, deployment, tfhelper, jhelper, observabilityfeature, step_context + ): + """Timeout waiting for app to be gone returns FAILED.""" + jhelper.wait_application_gone.side_effect = TimeoutError("timed out") + + step = observability_feature.RemoveHardwareObserverStep( + deployment, observabilityfeature, tfhelper, jhelper + ) + result = step.run(step_context) + + tfhelper.destroy.assert_called_once() + jhelper.wait_application_gone.assert_called_once() + assert result.result_type == ResultType.FAILED + assert result.message == "timed out"