diff --git a/doc/notification_samples/common_payloads/RequestSpecPayload.json b/doc/notification_samples/common_payloads/RequestSpecPayload.json index 3301c18c589..090af96d3e3 100644 --- a/doc/notification_samples/common_payloads/RequestSpecPayload.json +++ b/doc/notification_samples/common_payloads/RequestSpecPayload.json @@ -4,6 +4,7 @@ "availability_zone": null, "flavor": {"$ref": "FlavorPayload.json#"}, "ignore_hosts": null, + "ignore_nodes": null, "image": {"$ref": "ImageMetaPayload.json#"}, "instance_uuid": "d5e6a7b7-80e5-4166-85a3-cd6115201082", "num_instances": 1, diff --git a/nova/api/openstack/compute/hosts.py b/nova/api/openstack/compute/hosts.py index 14d1a2666a5..a1ad8f5d9bf 100644 --- a/nova/api/openstack/compute/hosts.py +++ b/nova/api/openstack/compute/hosts.py @@ -204,20 +204,26 @@ def reboot(self, req, id): return self._host_power_action(req, host_name=id, action="reboot") @staticmethod - def _get_total_resources(host_name, compute_node): + def _get_total_resources(host_name, compute_nodes): return {'resource': {'host': host_name, 'project': '(total)', - 'cpu': compute_node.vcpus, - 'memory_mb': compute_node.memory_mb, - 'disk_gb': compute_node.local_gb}} + 'cpu': sum(cn.vcpus + for cn in compute_nodes), + 'memory_mb': sum(cn.memory_mb + for cn in compute_nodes), + 'disk_gb': sum(cn.local_gb + for cn in compute_nodes)}} @staticmethod - def _get_used_now_resources(host_name, compute_node): + def _get_used_now_resources(host_name, compute_nodes): return {'resource': {'host': host_name, 'project': '(used_now)', - 'cpu': compute_node.vcpus_used, - 'memory_mb': compute_node.memory_mb_used, - 'disk_gb': compute_node.local_gb_used}} + 'cpu': sum(cn.vcpus_used + for cn in compute_nodes), + 'memory_mb': sum(cn.memory_mb_used + for cn in compute_nodes), + 'disk_gb': sum(cn.local_gb_used + for cn in compute_nodes)}} @staticmethod def _get_resource_totals_from_instances(host_name, instances): @@ -272,16 +278,15 @@ def show(self, req, id): try: mapping = objects.HostMapping.get_by_host(context, host_name) nova_context.set_target_cell(context, mapping.cell_mapping) - compute_node = ( - objects.ComputeNode.get_first_node_by_host_for_old_compat( - context, host_name)) + compute_nodes = objects.ComputeNodeList.get_all_by_host( + context, host_name) instances = self.api.instance_get_all_by_host(context, host_name) except (exception.ComputeHostNotFound, exception.HostMappingNotFound) as e: raise webob.exc.HTTPNotFound(explanation=e.format_message()) - resources = [self._get_total_resources(host_name, compute_node)] + resources = [self._get_total_resources(host_name, compute_nodes)] resources.append(self._get_used_now_resources(host_name, - compute_node)) + compute_nodes)) resources.append(self._get_resource_totals_from_instances(host_name, instances)) by_proj_resources = self._get_resources_by_project(host_name, diff --git a/nova/compute/api.py b/nova/compute/api.py index 368f04e8b35..4c277986b62 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -4015,14 +4015,13 @@ def _validate_host_for_cold_migrate( raise exception.ComputeHostNotFound(host=host_name) with nova_context.target_cell(context, hm.cell_mapping) as cctxt: - node = objects.ComputeNode.\ - get_first_node_by_host_for_old_compat( - cctxt, host_name, use_slave=True) + nodes = objects.ComputeNodeList.get_all_by_host( + cctxt, host_name, use_slave=True) else: - node = objects.ComputeNode.get_first_node_by_host_for_old_compat( + nodes = objects.ComputeNodeList.get_all_by_host( context, host_name, use_slave=True) - return node + return nodes # TODO(stephenfin): This logic would be so much easier to grok if we # finally split resize and cold migration into separate code paths @@ -4050,8 +4049,10 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True, context, instance) if host_name is not None: - node = self._validate_host_for_cold_migrate( + nodes = self._validate_host_for_cold_migrate( context, instance, host_name, allow_cross_cell_resize) + else: + nodes = None self._check_auto_disk_config( instance, auto_disk_config=auto_disk_config) @@ -4076,9 +4077,8 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True, if CONF.always_resize_on_same_host: LOG.info('Setting resize to the same host') host_name = instance.host - node = ( - objects.ComputeNode.get_first_node_by_host_for_old_compat( - context, host_name, use_slave=True)) + nodes = objects.ComputeNodeList.get_all_by_host( + context, host_name, use_slave=True) new_flavor = flavors.get_flavor_by_flavor_id( flavor_id, read_deleted="no") # NOTE(wenping): We use this instead of the 'block_accelerator' @@ -4189,20 +4189,16 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True, # which takes has filter_properties which in turn has # scheduler_hints (plural). - if host_name is None: - # If 'host_name' is not specified, - # clear the 'requested_destination' field of the RequestSpec - # except set the allow_cross_cell_move flag since conductor uses - # it prior to scheduling. - request_spec.requested_destination = objects.Destination( - allow_cross_cell_move=allow_cross_cell_resize) + if nodes and len(nodes) == 1: + node_name = nodes[0].hypervisor_hostname else: - # Set the host and the node so that the scheduler will - # validate them. - request_spec.requested_destination = objects.Destination( - host=node.host, node=node.hypervisor_hostname, - allow_cross_cell_move=allow_cross_cell_resize) + node_name = None + # Set the host and the node so that the scheduler will either + # validate them, or select one matching host and potentially node + request_spec.requested_destination = objects.Destination( + host=host_name, node=node_name, + allow_cross_cell_move=allow_cross_cell_resize) # Asynchronously RPC cast to conductor so the response is not blocked # during scheduling. If something fails the user can find out via # instance actions. @@ -5479,15 +5475,13 @@ def evacuate(self, context, instance, host, on_shared_storage, # the pre-v2.29 API microversion, which wouldn't set force if force is False and host: nodes = objects.ComputeNodeList.get_all_by_host(context, host) - # NOTE(sbauza): Unset the host to make sure we call the scheduler - host = None - # FIXME(sbauza): Since only Ironic driver uses more than one - # compute per service but doesn't support evacuations, - # let's provide the first one. - target = nodes[0] + if len(nodes) == 1: + node = nodes[0].hypervisor_hostname + else: + node = None destination = objects.Destination( - host=target.host, - node=target.hypervisor_hostname + host=host, + node=node ) request_spec.requested_destination = destination @@ -5501,7 +5495,8 @@ def evacuate(self, context, instance, host, on_shared_storage, bdms=None, recreate=True, on_shared_storage=on_shared_storage, - host=host, + # NOTE(sbauza): To make sure we call the scheduler + host=None, request_spec=request_spec, ) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index e89199cd4bf..70ff99f1653 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -8000,9 +8000,15 @@ def _detach_interface(self, context, instance, port_id): action=fields.NotificationAction.INTERFACE_DETACH, phase=fields.NotificationPhase.END) - def _get_compute_info(self, context, host): - return objects.ComputeNode.get_first_node_by_host_for_old_compat( - context, host) + def _get_compute_info(self, host, nodename=None): + if not nodename: + nodes = objects.ComputeNodeList.get_all_by_host(self.context, host) + if len(nodes) != 1: + raise exception.ComputeHostNotFound(host=host) + return nodes[0] + + return objects.ComputeNode.get_by_host_and_nodename( + self.context, host, nodename) @wrap_exception() def check_instance_shared_storage(self, ctxt, data): @@ -8063,9 +8069,9 @@ def check_can_live_migrate_destination(self, ctxt, instance, raise exception.MigrationPreCheckError(reason=msg) src_compute_info = obj_base.obj_to_primitive( - self._get_compute_info(ctxt, instance.host)) + self._get_compute_info(ctxt, instance.host, instance.node)) dst_compute_info = obj_base.obj_to_primitive( - self._get_compute_info(ctxt, self.host)) + self._get_compute_info(ctxt, self.host, migration.dest_node)) dest_check_data = self.driver.check_can_live_migrate_destination(ctxt, instance, src_compute_info, dst_compute_info, block_migration, disk_over_commit) @@ -9019,12 +9025,19 @@ def post_live_migration_at_destination(self, context, instance, 'destination host.', instance=instance) finally: # Restore instance state and update host - current_power_state = self._get_power_state(instance) - node_name = None prev_host = instance.host try: - compute_node = self._get_compute_info(context, self.host) - node_name = compute_node.hypervisor_hostname + vm_info = self.driver.get_info(instance, use_cache=False) + current_power_state = vm_info.power_state + node_name = vm_info.node + except exception.InstanceNotFound: + current_power_state = power_state.NOSTATE + node_name = None + + try: + if not node_name: + compute_node = self._get_compute_info(context, self.host) + node_name = compute_node.hypervisor_hostname except exception.ComputeHostNotFound: LOG.exception('Failed to get compute_info for %s', self.host) finally: @@ -9832,11 +9845,17 @@ def _query_driver_power_state_and_sync(self, context, db_instance): try: vm_instance = self.driver.get_info(db_instance) vm_power_state = vm_instance.state + vm_node = vm_instance.node except exception.InstanceNotFound: vm_power_state = power_state.NOSTATE + vm_node = None # Note(maoy): the above get_info call might take a long time, # for example, because of a broken libvirt driver. try: + self._sync_instance_node(context, + db_instance, + vm_node, + use_slave=True) self._sync_instance_power_state(context, db_instance, vm_power_state, @@ -9884,6 +9903,65 @@ def _stop_unexpected_shutdown_instance(self, context, vm_state, LOG.exception("error during stop() in sync_power_state.", instance=db_instance) + def _sync_instance_node(self, context, instance, new_node, + use_slave=False): + """Align the instance node between the database and hypervisor + + If the instance is found on a different hypervisor the allocations + will be moved accordingly + """ + if not new_node: + return + + # We re-query the DB to get the latest instance info to minimize + # (not eliminate) race condition. + instance.refresh(use_slave=use_slave) + + if self.host != instance.host: + return + + source_node = instance.node + if new_node == instance.node: + return + + rt = self._get_resource_tracker() + rc = self.scheduler_client.reportclient + try: + source_cn_uuid = rt.get_node_uuid(source_node) + cn_uuid = rt.get_node_uuid(new_node) + + LOG.info("Moving instance from %s (%s) to %s (%s)", + source_node, source_cn_uuid, + new_node, cn_uuid, + instance=instance) + + allocs = rc.get_allocations_for_consumer(context, instance.uuid) + if not allocs: + LOG.warning("Failed to get existing resources") + return + + if cn_uuid in allocs: + LOG.info("Instance has already allocations for %s", + cn_uuid, instance=instance) + else: + LOG.debug(allocs, instance=instance) + resources = allocs.values()[0]['resources'] + res = rc.set_and_clear_allocations(context, cn_uuid, + instance.uuid, + resources, instance.project_id, + instance.user_id) + if not res: + LOG.warning("Failed to update allocations", + instance=instance) + return + LOG.debug("Updated allocations", instance=instance) + + instance.node = new_node + instance.save() + self._update_scheduler_instance_info(context, instance) + except Exception: + LOG.exception("Failed to move instance to new node") + def _sync_instance_power_state(self, context, db_instance, vm_power_state, use_slave=False): """Align instance power state between the database and hypervisor. diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index ae8acd3d5c2..bce66c35ff1 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -458,6 +458,7 @@ def live_migrate_instance(self, context, instance, scheduler_hint, def _live_migrate(self, context, instance, scheduler_hint, block_migration, disk_over_commit, request_spec): destination = scheduler_hint.get("host") + destination_node = scheduler_hint.get("node") def _set_vm_state(context, instance, ex, vm_state=None, task_state=None): @@ -474,10 +475,12 @@ def _set_vm_state(context, instance, ex, vm_state=None, migration = objects.Migration(context=context.elevated()) migration.dest_compute = destination + migration.dest_node = destination_node migration.status = 'accepted' migration.instance_uuid = instance.uuid migration.source_compute = instance.host migration.migration_type = fields.MigrationType.LIVE_MIGRATION + migration.source_node = instance.node if instance.obj_attr_is_set('flavor'): migration.old_instance_type_id = instance.flavor.id migration.new_instance_type_id = instance.flavor.id diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index 5e9d8235609..7b68ab4bab2 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -62,10 +62,12 @@ def __init__(self, context, instance, destination, request_spec=None): super(LiveMigrationTask, self).__init__(context, instance) self.destination = destination + self.dest_node = None self.block_migration = block_migration self.disk_over_commit = disk_over_commit self.migration = migration self.source = instance.host + self.source_node = instance.node self.migrate_data = None self.limits = None @@ -97,7 +99,8 @@ def _execute(self): # wants the scheduler to pick a destination host, or a host was # specified but is not forcing it, so they want the scheduler # filters to run on the specified host, like a scheduler hint. - self.destination, dest_node, self.limits = self._find_destination() + self.destination, self.dest_node, self.limits = \ + self._find_destination() else: # This is the case that the user specified the 'force' flag when # live migrating with a specific destination host so the scheduler @@ -108,7 +111,7 @@ def _execute(self): self._check_destination_has_enough_memory() source_node, dest_node = ( self._check_compatible_with_source_hypervisor( - self.destination)) + self.destination, self.dest_node)) # TODO(mriedem): Call select_destinations() with a # skip_filters=True flag so the scheduler does the work of claiming # resources on the destination in Placement but still bypass the @@ -315,7 +318,7 @@ def _check_destination_is_not_source(self): instance_id=self.instance.uuid, host=self.destination) def _check_destination_has_enough_memory(self): - compute = self._get_compute_info(self.destination) + compute = self._get_compute_info(self.destination, self.dest_node) free_ram_mb = compute.free_ram_mb total_ram_mb = compute.memory_mb mem_inst = self.instance.memory_mb @@ -336,13 +339,21 @@ def _check_destination_has_enough_memory(self): instance_uuid=instance_uuid, dest=dest, avail=avail, mem_inst=mem_inst)) - def _get_compute_info(self, host): - return objects.ComputeNode.get_first_node_by_host_for_old_compat( - self.context, host) + def _get_compute_info(self, host, nodename=None): + if not nodename: + nodes = objects.ComputeNodeList.get_all_by_host(self.context, host) + if len(nodes) != 1: + raise exception.ComputeHostNotFound(host=host) + return nodes[0] - def _check_compatible_with_source_hypervisor(self, destination): - source_info = self._get_compute_info(self.source) - destination_info = self._get_compute_info(destination) + return objects.ComputeNode.get_by_host_and_nodename( + self.context, host, nodename) + + def _check_compatible_with_source_hypervisor(self, dest_host, dest_node): + migration = self.migration + source_info = self._get_compute_info(migration.source_compute, + migration.source_node) + destination_info = self._get_compute_info(dest_host, dest_node) source_type = source_info.hypervisor_type destination_type = destination_info.hypervisor_type @@ -461,14 +472,12 @@ def _get_destination_cell_mapping(self): reason=(_('Unable to determine in which cell ' 'destination host %s lives.') % self.destination)) - def _get_request_spec_for_select_destinations(self, attempted_hosts=None): + def _get_request_spec_for_select_destinations(self): """Builds a RequestSpec that can be passed to select_destinations Used when calling the scheduler to pick a destination host for live migrating the instance. - :param attempted_hosts: List of host names to ignore in the scheduler. - This is generally at least seeded with the source host. :returns: nova.objects.RequestSpec object """ # NOTE(fwiesel): In order to check the compatibility @@ -522,14 +531,13 @@ def _get_request_spec_for_select_destinations(self, attempted_hosts=None): def _find_destination(self): # TODO(johngarbutt) this retry loop should be shared - attempted_hosts = [self.source] - request_spec = self._get_request_spec_for_select_destinations( - attempted_hosts) + attempted_nodes = [self.source_node] + request_spec = self._get_request_spec_for_select_destinations() host = None while host is None: - self._check_not_over_max_retries(attempted_hosts) - request_spec.ignore_hosts = attempted_hosts + self._check_not_over_max_retries(attempted_nodes) + request_spec.ignore_nodes = attempted_nodes try: selection_lists = self.query_client.select_destinations( self.context, request_spec, [self.instance.uuid], @@ -538,6 +546,7 @@ def _find_destination(self): # only one instance, and we don't care about any alternates. selection = selection_lists[0][0] host = selection.service_host + node = selection.nodename except messaging.RemoteError as ex: # TODO(ShaoHe Feng) There maybe multi-scheduler, and the # scheduling algorithm is R-R, we can let other scheduler try. @@ -560,17 +569,18 @@ def _find_destination(self): self.context, self.report_client, self.instance.pci_requests.requests, provider_mapping) try: - self._check_compatible_with_source_hypervisor(host) + self._check_compatible_with_source_hypervisor(host, node) self._call_livem_checks_on_host(host, provider_mapping) except (exception.Invalid, exception.MigrationPreCheckError) as e: - LOG.debug("Skipping host: %(host)s because: %(e)s", - {"host": host, "e": e}) - attempted_hosts.append(host) + LOG.debug("Skipping node: %(host)s/%(node)s because: %(e)s", + {"host": host, "node": node, "e": e}) + attempted_nodes.append(node) # The scheduler would have created allocations against the # selected destination host in Placement, so we need to remove # those before moving on. self._remove_host_allocations(selection.compute_node_uuid) host = None + node = None # TODO(artom) We should probably just return the whole selection object # at this point. return (selection.service_host, selection.nodename, selection.limits) @@ -587,11 +597,11 @@ def _remove_host_allocations(self, compute_node_uuid): self.report_client.remove_provider_tree_from_instance_allocation( self.context, self.instance.uuid, compute_node_uuid) - def _check_not_over_max_retries(self, attempted_hosts): + def _check_not_over_max_retries(self, attempted_nodes): if CONF.migrate_max_retries == -1: return - retries = len(attempted_hosts) - 1 + retries = len(attempted_nodes) - 1 if retries > CONF.migrate_max_retries: if self.migration: self.migration.status = 'failed' diff --git a/nova/conf/vmware.py b/nova/conf/vmware.py index ab87ab73fe6..1126cc13a48 100644 --- a/nova/conf/vmware.py +++ b/nova/conf/vmware.py @@ -451,6 +451,19 @@ Example: "vmx-13" for vSphere 6.5. Versions can be looked up here: https://kb.vmware.com/s/article/1003746 +"""), + cfg.StrOpt('hypervisor_mode', + default="cluster", + help=""" +Defines how the vsphere host is exposed to nova. + +Can be one of the following: + +cluster: The host has one hypervisor for the whole cluster. (Original) +esxi_to_cluster: Both will be exposed, but the ESXi resources are reserved, and + all instances will be moved over to the cluster +cluster_to_esxi: Both will be exposed, but the ESXi resources are reserved, and + all instances will be from the cluster to the ESXi-hosts """), ] diff --git a/nova/notifications/objects/request_spec.py b/nova/notifications/objects/request_spec.py index a46a71e8da6..8a748e6f625 100644 --- a/nova/notifications/objects/request_spec.py +++ b/nova/notifications/objects/request_spec.py @@ -26,10 +26,12 @@ class RequestSpecPayload(base.NotificationPayloadBase): # Version 1.1: Add force_hosts, force_nodes, ignore_hosts, image_meta, # instance_group, requested_destination, retry, # scheduler_hints and security_groups fields - VERSION = '1.1' + # Version 1.2: Add ignore_nodes field + VERSION = '1.2' SCHEMA = { 'ignore_hosts': ('request_spec', 'ignore_hosts'), + 'ignore_nodes': ('request_spec', 'ignore_nodes'), 'instance_uuid': ('request_spec', 'instance_uuid'), 'project_id': ('request_spec', 'project_id'), 'user_id': ('request_spec', 'user_id'), @@ -47,6 +49,7 @@ class RequestSpecPayload(base.NotificationPayloadBase): 'force_hosts': fields.StringField(nullable=True), 'force_nodes': fields.StringField(nullable=True), 'ignore_hosts': fields.ListOfStringsField(nullable=True), + 'ignore_nodes': fields.ListOfStringsField(nullable=True), 'image_meta': fields.ObjectField('ImageMetaPayload', nullable=True), 'instance_group': fields.ObjectField('ServerGroupPayload', nullable=True), diff --git a/nova/objects/request_spec.py b/nova/objects/request_spec.py index 67c9684057a..74f91047246 100644 --- a/nova/objects/request_spec.py +++ b/nova/objects/request_spec.py @@ -73,6 +73,8 @@ class RequestSpec(base.NovaObject): 'num_instances': fields.IntegerField(default=1), # NOTE(alex_xu): This field won't be persisted. 'ignore_hosts': fields.ListOfStringsField(nullable=True), + # NOTE(fabianw): This field won't be persisted + 'ignore_nodes': fields.ListOfStringsField(nullable=True), # NOTE(mriedem): In reality, you can only ever have one # host in the force_hosts list. The fact this is a list # is a mistake perpetuated over time. @@ -347,6 +349,7 @@ def from_primitives(cls, context, request_spec, filter_properties): spec._from_flavor(flavor) # Hydrate now from filter_properties spec.ignore_hosts = filter_properties.get('ignore_hosts') + spec.ignore_nodes = filter_properties.get('ignore_nodes') spec.force_hosts = filter_properties.get('force_hosts') spec.force_nodes = filter_properties.get('force_nodes') retry = filter_properties.get('retry', {}) @@ -460,6 +463,8 @@ def to_legacy_filter_properties_dict(self): filt_props = {} if self.obj_attr_is_set('ignore_hosts') and self.ignore_hosts: filt_props['ignore_hosts'] = self.ignore_hosts + if self.obj_attr_is_set('ignore_nodes') and self.ignore_nodes: + filt_props['ignore_nodes'] = self.ignore_nodes if self.obj_attr_is_set('force_hosts') and self.force_hosts: filt_props['force_hosts'] = self.force_hosts if self.obj_attr_is_set('force_nodes') and self.force_nodes: @@ -527,6 +532,7 @@ def from_components( spec_obj._from_instance_pci_requests(pci_requests) spec_obj._from_instance_numa_topology(numa_topology) spec_obj.ignore_hosts = filter_properties.get('ignore_hosts') + spec_obj.ignore_nodes = filter_properties.get('ignore_nodes') spec_obj.force_hosts = filter_properties.get('force_hosts') spec_obj.force_nodes = filter_properties.get('force_nodes') spec_obj._from_retry(filter_properties.get('retry', {})) @@ -619,10 +625,11 @@ def _from_db_object(context, spec, db_spec): # None and we'll lose what is set (but not persisted) on the # object. continue - elif key in ('retry', 'ignore_hosts'): + elif key in ('retry', 'ignore_hosts', 'ignore_nodes'): # NOTE(takashin): Do not override the 'retry' or 'ignore_hosts' # fields which are not persisted. They are not lazy-loadable # fields. If they are not set, set None. + # NOTE(fabianw): Same with 'ignore_nodes' if not spec.obj_attr_is_set(key): setattr(spec, key, None) elif key == "numa_topology": @@ -704,7 +711,8 @@ def _get_update_primitives(self): spec.instance_group.hosts = None # NOTE(mriedem): Don't persist these since they are per-request for excluded in ('retry', 'requested_destination', - 'requested_resources', 'ignore_hosts'): + 'requested_resources', 'ignore_hosts', + 'ignore_nodes'): if excluded in spec and getattr(spec, excluded): setattr(spec, excluded, None) # NOTE(stephenfin): Don't persist network metadata since we have diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py index 23d72140839..ef3027f8719 100644 --- a/nova/scheduler/host_manager.py +++ b/nova/scheduler/host_manager.py @@ -49,6 +49,7 @@ class ReadOnlyDict(IterableUserDict): """A read-only dict.""" + def __init__(self, source=None): self.data = {} if source: @@ -497,6 +498,16 @@ def _strip_ignore_hosts(host_map, hosts_to_ignore): ignored_hosts_str = ', '.join(ignored_hosts) LOG.info('Host filter ignoring hosts: %s', ignored_hosts_str) + def _strip_ignore_nodes(host_map, nodes_to_ignore): + ignored_nodes = [] + for node in nodes_to_ignore: + for (hostname, nodename) in list(host_map.keys()): + if node.lower() == nodename.lower(): + del host_map[(hostname, nodename)] + ignored_nodes.append(node) + ignored_nodes_str = ', '.join(ignored_nodes) + LOG.info('Host filter ignoring nodes: %s', ignored_nodes_str) + def _match_forced_hosts(host_map, hosts_to_force): forced_hosts = [] lowered_hosts_to_force = [host.lower() for host in hosts_to_force] @@ -567,6 +578,7 @@ def _get_hosts_matching_request(hosts, requested_destination): return iter(requested_nodes) ignore_hosts = spec_obj.ignore_hosts or [] + ignore_nodes = spec_obj.ignore_nodes or [] force_hosts = spec_obj.force_hosts or [] force_nodes = spec_obj.force_nodes or [] requested_node = spec_obj.requested_destination @@ -576,7 +588,7 @@ def _get_hosts_matching_request(hosts, requested_destination): # possible to any requested destination nodes before passing the # list to the filters hosts = _get_hosts_matching_request(hosts, requested_node) - if ignore_hosts or force_hosts or force_nodes: + if ignore_hosts or ignore_nodes or force_hosts or force_nodes: # NOTE(deva): we can't assume "host" is unique because # one host may have many nodes. name_to_cls_map = {(x.host, x.nodename): x for x in hosts} @@ -584,6 +596,10 @@ def _get_hosts_matching_request(hosts, requested_destination): _strip_ignore_hosts(name_to_cls_map, ignore_hosts) if not name_to_cls_map: return [] + if ignore_nodes: + _strip_ignore_nodes(name_to_cls_map, ignore_nodes) + if not name_to_cls_map: + return [] # NOTE(deva): allow force_hosts and force_nodes independently if force_hosts: _match_forced_hosts(name_to_cls_map, force_hosts) diff --git a/nova/tests/unit/notifications/objects/test_notification.py b/nova/tests/unit/notifications/objects/test_notification.py index b66d91283ad..6166fbe2a10 100644 --- a/nova/tests/unit/notifications/objects/test_notification.py +++ b/nova/tests/unit/notifications/objects/test_notification.py @@ -427,7 +427,7 @@ def test_payload_is_not_generated_if_notification_format_is_unversioned( 'MetricsNotification': '1.0-a73147b93b520ff0061865849d3dfa56', 'MetricsPayload': '1.0-65c69b15b4de5a8c01971cb5bb9ab650', 'NotificationPublisher': '2.2-ff8ef16673817ca7a3ea69c689e260c6', - 'RequestSpecPayload': '1.1-64d30723a2e381d0cd6a16a877002c64', + 'RequestSpecPayload': '1.2-6e4978f842a19991871904f126b97ecf', 'SchedulerRetriesPayload': '1.0-03a07d09575ef52cced5b1b24301d0b4', 'SelectDestinationsNotification': '1.0-a73147b93b520ff0061865849d3dfa56', 'ServerGroupNotification': '1.0-a73147b93b520ff0061865849d3dfa56', diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index 87b483bc8f0..60b0dfe611f 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -240,15 +240,17 @@ def get_number_of_serial_ports(flavor, image_meta): class InstanceInfo(object): - def __init__(self, state, internal_id=None): + def __init__(self, state, internal_id=None, node=None): """Create a new Instance Info object :param state: Required. The running state, one of the power_state codes :param internal_id: Optional. A unique ID for the instance. Need not be related to the Instance.uuid. + :param node: Optional. The name of the compute node. See Instance.node """ self.state = state self.internal_id = internal_id + self.node = node def __eq__(self, other): return (self.__class__ == other.__class__ and diff --git a/nova/virt/vmwareapi/driver.py b/nova/virt/vmwareapi/driver.py index b7ca867f50b..66001899697 100644 --- a/nova/virt/vmwareapi/driver.py +++ b/nova/virt/vmwareapi/driver.py @@ -71,6 +71,7 @@ UUID_RE = re.compile(r'[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-' r'[a-fA-F0-9]{4}-[a-fA-F0-9]{12}') +HYPERVISOR_MODES = ('cluster', 'esxi_to_cluster', 'cluster_to_esxi') class VMwareVCDriver(driver.ComputeDriver): @@ -118,6 +119,12 @@ def __init__(self, virtapi, scheme="https"): raise Exception(_("Must specify host_ip, host_username and " "host_password to use vmwareapi.VMwareVCDriver")) + if CONF.vmware.hypervisor_mode not in HYPERVISOR_MODES: + raise Exception("Unknown hypervisor mode {}. Expecting {}".format( + CONF.vmware.hypervisor_mode, + HYPERVISOR_MODES, + )) + self._datastore_regex = None if CONF.vmware.datastore_regex: try: @@ -477,8 +484,6 @@ def get_cluster_metrics(self): def get_available_nodes(self, refresh=False): """Returns nodenames of all nodes managed by the compute service. - - This driver supports only one compute node. """ # get_available_nodes is called at the beginning of polling # the resources of all the nodes via @@ -486,10 +491,14 @@ def get_available_nodes(self, refresh=False): # We follow here the same pattern as in the ironic driver and use this # function call as an indicator of a new polling cycle and refresh # the host stats cached in _vc_state by calling... - self._vc_state.get_host_stats(refresh=True) + hosts = self._vc_state.get_host_stats(refresh=True) # In the following calls to get_inventory and get_available_resource # for each node, we then return the cached data - return [self._nodename] + + if CONF.vmware.hypervisor_mode == 'cluster': + return [self._nodename] + + return hosts.keys() def update_provider_tree(self, provider_tree, nodename, allocations=None): """Update a ProviderTree object with current resource provider, @@ -538,6 +547,13 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None): :raises: ReshapeFailed if the requested tree reshape fails for whatever reason. """ + if nodename == self._nodename: + # Either "cluster", or + normal = (CONF.vmware.hypervisor_mode in ('cluster', + 'esxi_to_cluster')) + else: + normal = (CONF.vmware.hypervisor_mode == 'cluster_to_esxi') + stats = self.get_available_resource(nodename) result = {} @@ -549,7 +565,7 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None): ratios = self._get_allocation_ratios(inv) local_gb = stats["local_gb"] - local_gb_max_free = stats.get("local_gb_max_free", "local_gb") + local_gb_max_free = stats.get("local_gb_max_free", local_gb) local_gb_max_unit_limit = CONF.vmware.resource_disk_gb_max_unit_limit if local_gb_max_unit_limit != -1: local_gb_max_free = min(local_gb_max_free, local_gb_max_unit_limit) @@ -558,7 +574,7 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None): CONF.reserved_host_disk_mb) result[orc.DISK_GB] = { 'total': local_gb, - 'reserved': reserved_disk_gb, + 'reserved': reserved_disk_gb if normal else local_gb, 'min_unit': 1, 'max_unit': local_gb_max_free, 'step_size': 1, @@ -571,7 +587,7 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None): reserved_vcpus = stats["vcpus_reserved"] result[orc.VCPU] = { 'total': vcpus, - 'reserved': reserved_vcpus, + 'reserved': reserved_vcpus if normal else vcpus, 'min_unit': 1, 'max_unit': max_vcpus, 'step_size': 1, @@ -584,7 +600,7 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None): if memory_mb > 0 and max_memory_mb > 0: result[orc.MEMORY_MB] = { 'total': memory_mb, - 'reserved': reserved_memory_mb, + 'reserved': reserved_memory_mb if normal else memory_mb, 'min_unit': 1, 'max_unit': max_memory_mb, 'step_size': 1, @@ -597,7 +613,8 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None): if available_memory_mb > 0: result[utils.MEMORY_RESERVABLE_MB_RESOURCE] = { 'total': available_memory_mb, - 'reserved': reserved_reservable_memory, + 'reserved': (reserved_reservable_memory + if normal else available_memory_mb), 'min_unit': 1, 'max_unit': max_memory_mb, 'step_size': 1, diff --git a/nova/virt/vmwareapi/host.py b/nova/virt/vmwareapi/host.py index ed1c89faceb..966a87124fc 100644 --- a/nova/virt/vmwareapi/host.py +++ b/nova/virt/vmwareapi/host.py @@ -22,6 +22,7 @@ from oslo_utils import units from oslo_utils import versionutils from oslo_vmware import exceptions as vexc +from oslo_vmware import vim_util as v_util import nova.conf from nova import context @@ -66,6 +67,7 @@ def __init__(self, session, cluster_node_name, cluster, datastore_regex): self._cluster = cluster self._datastore_regex = datastore_regex self._stats = {} + self._host_ref_to_name = {} ctx = context.get_admin_context() try: service = objects.Service.get_by_compute_host(ctx, CONF.host) @@ -80,6 +82,10 @@ def __init__(self, session, cluster_node_name, cluster, datastore_regex): str(about_info.version)) self.update_status() + @property + def cluster_node_name(self): + return self._cluster_node_name + def get_host_stats(self, refresh=False): """Return the current state of the cluster. If 'refresh' is True, run the update first. @@ -88,6 +94,11 @@ def get_host_stats(self, refresh=False): self.update_status() return self._stats + def get_host_name(self, host_ref): + if not self._host_ref_to_name: + self.update_status() + return self._host_ref_to_name[v_util.get_moref_value(host_ref)] + def update_status(self): """Update the current state of the cluster.""" data = {} @@ -125,6 +136,8 @@ def update_status(self): } for host, stats in per_host_stats.items(): + obj = stats.pop("obj") + self._host_ref_to_name[v_util.get_moref_value(obj)] = host data[host] = self._merge_stats(host, stats, defaults) cluster_stats = vm_util.aggregate_stats_from_cluster(per_host_stats) diff --git a/nova/virt/vmwareapi/vm_util.py b/nova/virt/vmwareapi/vm_util.py index c805d5c6723..81a73baa2ec 100644 --- a/nova/virt/vmwareapi/vm_util.py +++ b/nova/virt/vmwareapi/vm_util.py @@ -1734,6 +1734,7 @@ def _process_host_stats(obj, host_reservations_map): "memory_mb": mem_mb, "memory_mb_used": getattr(stats_summary, "overallMemoryUsage", 0), "cpu_info": _host_props_to_cpu_info(host_props), + "obj": obj.obj, } _set_hypervisor_type_and_version(stats, host_props) diff --git a/nova/virt/vmwareapi/vmops.py b/nova/virt/vmwareapi/vmops.py index 23f9c1c546f..a9523ea6895 100644 --- a/nova/virt/vmwareapi/vmops.py +++ b/nova/virt/vmwareapi/vmops.py @@ -3320,14 +3320,22 @@ def poll_rebooting_instances(self, timeout, instances): def get_info(self, instance, use_cache=True): """Return data about the VM instance.""" - powerstate_property = 'runtime.powerState' + lst_properties = ["runtime.powerState"] + if CONF.vmware.hypervisor_mode == 'cluster_to_esxi': + lst_properties.append("runtime.host") # if use_cache is true, then we are fine with possibly slightly # outdated values in favour of less api load, so no polling of updates # => skip_update=use_cache - vm_props = self._get_instance_props(instance, [powerstate_property], + vm_props = self._get_instance_props(instance, lst_properties, skip_update=use_cache) + if CONF.vmware.hypervisor_mode == 'cluster_to_esxi': + node = self._vc_state.get_host_name(vm_props["runtime.host"]) + else: + node = self._vc_state.cluster_node_name return hardware.InstanceInfo( - state=constants.POWER_STATES[vm_props[powerstate_property]]) + state=constants.POWER_STATES[vm_props["runtime.powerState"]], + node=node + ) def _get_diagnostics(self, instance): """Return data about VM diagnostics.""" @@ -4131,6 +4139,7 @@ def _get_vm_monitor_spec(self, vim): ["config.instanceUuid", "config.managedBy", "runtime.powerState", + "runtime.host", "summary.guest.toolsStatus", "summary.guest.toolsRunningStatus", ])