diff --git a/python/understack-workflows/tests/test_enroll_server.py b/python/understack-workflows/tests/test_enroll_server.py index 17507b224..33a734135 100644 --- a/python/understack-workflows/tests/test_enroll_server.py +++ b/python/understack-workflows/tests/test_enroll_server.py @@ -213,8 +213,12 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): ] fake_ironic, created_node = make_ironic_client( node_name="Dell-ABC123", - # OOB inspect, agent inspect, OOB inspect (post-RAID). - inspect_interfaces=["idrac-redfish", "idrac-redfish", "idrac-redfish"], + inspect_interfaces=[ + "idrac-redfish", + "idrac-redfish", + "idrac-redfish", + "idrac-redfish", + ], inventory=inventory, ports=ports, ) @@ -227,7 +231,7 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): ) bmc_set_hostname = mocker.patch.object(enroll_server, "bmc_set_hostname") update_dell_bios_settings = mocker.patch.object( - enroll_server, "update_dell_bios_settings" + enroll_server, "update_dell_bios_settings", return_value={"changed": True} ) mocker.patch( "understack_workflows.ironic.client.get_ironic_client", @@ -294,6 +298,13 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): runbook=None, disable_ramdisk=None, ), + call( + created_node.uuid, + "clean", + cleansteps=[{"interface": "management", "step": "clear_job_queue"}], + runbook=None, + disable_ramdisk=True, + ), call( created_node.uuid, "inspect", # OOB redfish inspect for bios_name / basic info @@ -303,7 +314,14 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): ), call( created_node.uuid, - "inspect", # agent inspect via virtual media + "inspect", # agent inspect + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + created_node.uuid, + "inspect", # second agent inspect to apply BIOS changes cleansteps=None, runbook=None, disable_ramdisk=None, @@ -345,6 +363,9 @@ def test_enrol_happy_path_uses_virtual_media_inspect_and_flips_back(mocker): call(created_node.uuid, expected_ipxe_boot), call(created_node.uuid, expected_agent), call(created_node.uuid, expected_ipxe_boot), + call(created_node.uuid, expected_ipxe_boot), + call(created_node.uuid, expected_agent), + call(created_node.uuid, expected_ipxe_boot), call(created_node.uuid, expected_reset), # Post-RAID OOB inspect prep ] @@ -382,7 +403,9 @@ def test_enrol_existing_failed_node_recovers_and_updates(mocker): mocker.patch.object(enroll_server, "set_bmc_password") mocker.patch.object(enroll_server, "update_dell_drac_settings") mocker.patch.object(enroll_server, "bmc_set_hostname") - mocker.patch.object(enroll_server, "update_dell_bios_settings") + mocker.patch.object( + enroll_server, "update_dell_bios_settings", return_value={"changed": True} + ) mocker.patch( "understack_workflows.ironic.client.get_ironic_client", return_value=fake_ironic, @@ -407,6 +430,13 @@ def test_enrol_existing_failed_node_recovers_and_updates(mocker): runbook=None, disable_ramdisk=None, ), + call( + existing_node.uuid, + "clean", + cleansteps=[{"interface": "management", "step": "clear_job_queue"}], + runbook=None, + disable_ramdisk=True, + ), call( existing_node.uuid, "inspect", # OOB inspect @@ -416,7 +446,14 @@ def test_enrol_existing_failed_node_recovers_and_updates(mocker): ), call( existing_node.uuid, - "inspect", # Agent inspect via virtual media + "inspect", # Agent inspect + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + existing_node.uuid, + "inspect", # second agent inspect to apply BIOS changes cleansteps=None, runbook=None, disable_ramdisk=None, diff --git a/python/understack-workflows/understack_workflows/bmc_bios.py b/python/understack-workflows/understack_workflows/bmc_bios.py index 2048531ba..85d08cb96 100644 --- a/python/understack-workflows/understack_workflows/bmc_bios.py +++ b/python/understack-workflows/understack_workflows/bmc_bios.py @@ -26,6 +26,10 @@ def required_bios_settings(pxe_interface: str | None) -> dict[str, str]: "SecureBoot": "Disabled", # PXE is enabled by default on DELL, but we don't use it: "PxeDev1EnDis": "Disabled", + "PxeDev2EnDis": "Disabled", + "PxeDev3EnDis": "Disabled", + "PxeDev4EnDis": "Disabled", + # Enable one HTTP port for booting: "HttpDev1EnDis": "Enabled", "HttpDev2EnDis": "Disabled", "HttpDev3EnDis": "Disabled", diff --git a/python/understack-workflows/understack_workflows/ironic_node.py b/python/understack-workflows/understack_workflows/ironic_node.py index 95821e3f4..b8ce8b230 100644 --- a/python/understack-workflows/understack_workflows/ironic_node.py +++ b/python/understack-workflows/understack_workflows/ironic_node.py @@ -162,6 +162,28 @@ def create_ironic_node( return client.create_node(node_data) +def clear_pending_idrac_jobs(node: Node): + logger.info("%s performing clear_job_queue clean step", node.uuid) + transition( + node, + target_state="clean", + expected_state="manageable", + clean_steps=[{"interface": "management", "step": "clear_job_queue"}], + disable_ramdisk=True, + ) + + +def reset_idrac_to_known_good_state(node: Node): + logger.info("%s performing known_good_state clean step", node.uuid) + transition( + node, + target_state="clean", + expected_state="manageable", + clean_steps=[{"interface": "management", "step": "known_good_state"}], + disable_ramdisk=True, + ) + + def _driver_for(manufacturer: str) -> tuple[str, str]: """Answer the (driver, inspect_interface) for this server.""" if manufacturer.startswith("Dell"): diff --git a/python/understack-workflows/understack_workflows/main/enroll_server.py b/python/understack-workflows/understack_workflows/main/enroll_server.py index bff6865f5..9db3d3fef 100644 --- a/python/understack-workflows/understack_workflows/main/enroll_server.py +++ b/python/understack-workflows/understack_workflows/main/enroll_server.py @@ -66,6 +66,7 @@ def main() -> None: firmware_update=args.firmware_update, raid_configure=args.raid_configure, external_cmdb_id=args.external_cmdb_id, + reset_idrac=args.reset_idrac, ) @@ -75,6 +76,7 @@ def enroll( raid_configure: bool, old_password: str | None, external_cmdb_id: str | None = None, + reset_idrac: bool = False, ) -> None: logger.info("Starting enroll workflow for bmc_ip_address=%s", ip_address) @@ -91,6 +93,13 @@ def enroll( external_cmdb_id=external_cmdb_id, ) + # Clear stale iDRAC jobs before virtual-media inspection, or optionally + # reset the controller to a broader known-good state. + if reset_idrac: + ironic_node.reset_idrac_to_known_good_state(node) + else: + ironic_node.clear_pending_idrac_jobs(node) + # Out-of-band redfish inspection populates data including baremetal ports. # # Our hooks augment the ironic baremetal port with the BMC-reported @@ -124,7 +133,10 @@ def enroll( ) logger.info("[node:%s] Selected PXE interface %s", node.uuid, pxe_interface) - update_dell_bios_settings(bmc, pxe_interface=pxe_interface) + # This sets the boot device to use for all future HTTP boots: + if update_dell_bios_settings(bmc, pxe_interface=pxe_interface): + logger.info("%s performing second inspection write BIOS settings", node.uuid) + agent_inspection(node) if raid_configure: configure_raid(node, inventory) @@ -219,6 +231,12 @@ def argument_parser(): default=True, help="Configure RAID before inspection", ) + parser.add_argument( + "--reset-idrac", + type=parse_bool, + default=False, + help="Reset iDRAC to known_good_state instead of clear_job_queue", + ) parser.add_argument( "--external-cmdb-id", required=False,