diff --git a/nova/compute/api.py b/nova/compute/api.py index 4633fb5f1d0..dba844e2861 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -3692,7 +3692,10 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True, request_spec.requested_destination = objects.Destination( host=node.host, node=node.hypervisor_hostname) - scheduler_hint['_nova_check_type'] = ['resize'] + if 'scheduler_hints' not in request_spec: + request_spec.scheduler_hints = {} + request_spec.scheduler_hints['_nova_check_type'] = ['resize'] + self.compute_task_api.resize_instance(context, instance, extra_instance_updates, scheduler_hint=scheduler_hint, flavor=new_instance_type, diff --git a/nova/compute/manager.py b/nova/compute/manager.py index f5746bdb75d..ab379b8e906 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -4506,6 +4506,16 @@ def _revert_allocation(self, context, instance, migration): # Get just the resources part of the one allocation we need below orig_alloc = orig_alloc[cn_uuid].get('resources', {}) + # NOTE(jkulik): If this was a same-host resize, we didn't copy all + # resources into the Migration, but only the ones necessary to make + # sure we have enough to revert. Therefore, we have to rebuild the + # resources from the Migration's original flavor. + if migration.is_same_host(): + # FIXME(jkulik): This method is flawed in that it assumes + # allocations against only one provider. + orig_alloc = scheduler_utils.resources_from_flavor( + instance, instance.flavor) + # FIXME(danms): This method is flawed in that it asssumes allocations # against only one provider. So, this may overwite allocations against # a shared provider, if we had one. diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py index 9835774f2fa..bdd3b8a340b 100644 --- a/nova/conductor/tasks/migrate.py +++ b/nova/conductor/tasks/migrate.py @@ -263,7 +263,8 @@ def _execute(self): host_available = scheduler_utils.claim_resources( elevated, self.reportclient, self.request_spec, self.instance.uuid, alloc_req, - selection.allocation_request_version) + selection.allocation_request_version, + host=selection.service_host) else: # Some deployments use different schedulers that do not # use Placement, so they will not have an diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 66bc62595c5..8aa8a67ad3a 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -1853,6 +1853,48 @@ def set_and_clear_allocations(self, context, rp_uuid, consumer_uuid, 'text': r.text}) return r.status_code == 204 + @retries + def set_multiple_allocations(self, context, consumer_alloc_requests, + project_id, user_id): + """Atomically update multiple allocations for multiple consumers + + :param context: The security context + :param consumer_alloc_requests: Dict, keyed by consumer UUID, of dicts, + keyed by resource class, of amounts to + consume. + :param project_id: The project_id associated with the allocations. + :param user_id: The user_id associated with the allocations. + :returns: True if the allocations were created, False otherwise. + :raises: Retry if the operation should be retried due to a concurrent + update. + """ + # we will modify it, so make sure the caller keeps its original + payload = copy.deepcopy(consumer_alloc_requests) + + for consumer_uuid in payload: + payload[consumer_uuid]['project_id'] = project_id + payload[consumer_uuid]['user_id'] = user_id + + r = self.post('/allocations', payload, + version=POST_ALLOCATIONS_API_VERSION, + global_request_id=context.global_id) + if r.status_code != 204: + # NOTE(jaypipes): Yes, it sucks doing string comparison like this + # but we have no error codes, only error messages. + if 'concurrently updated' in r.text: + reason = ('another process changed the resource providers ' + 'involved in our attempt to post allocations for ' + 'consumers %s' % consumer_alloc_requests.keys()) + raise Retry('set_and_clear_allocations', reason) + else: + LOG.warning( + 'Unable to post allocations for consumers ' + '%(uuids)s (%(code)i %(text)s)', + {'uuids': consumer_alloc_requests.keys(), + 'code': r.status_code, + 'text': r.text}) + return r.status_code == 204 + @safe_connect @retries def put_allocations(self, context, rp_uuid, consumer_uuid, alloc_data, diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py index 5013edcd6e7..cc8ee0c9438 100644 --- a/nova/scheduler/filter_scheduler.py +++ b/nova/scheduler/filter_scheduler.py @@ -229,7 +229,8 @@ def _schedule(self, context, spec_obj, instance_uuids, alloc_req = alloc_reqs[0] if utils.claim_resources(elevated, self.placement_client, spec_obj, instance_uuid, alloc_req, - allocation_request_version=allocation_request_version): + allocation_request_version=allocation_request_version, + host=host.host): claimed_host = host break diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 2a2c5b5f411..dedcb96075f 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -135,6 +135,20 @@ def select_destinations(self, ctxt, request_spec=None, raise exception.NoValidHost(reason=e.message) resources = utils.resources_from_request_spec(spec_obj) + if utils.request_is_resize(spec_obj): + status = "pre-migrating" + try: + migration = objects.Migration.get_by_instance_and_status( + ctxt, instance_uuids[0], status) + except exception.MigrationNotFoundByStatus: + LOG.warning("Unable to find migration record with status " + "'%s' for instance %s on resize. Cannot " + "ignore Migration consumer.", + status, instance_uuids) + else: + LOG.debug("Ignoring consumers %s for resize of %s.", + migration.uuid, instance_uuids) + resources.ignore_consumers = [migration.uuid] res = self.placement_client.get_allocation_candidates(ctxt, resources) if res is None: diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index 146114ed362..b2da1e9d7bf 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -60,6 +60,9 @@ def __init__(self): # Default to the configured limit but _limit can be # set to None to indicate "no limit". self._limit = CONF.scheduler.max_placement_results + # List of consumer UUIDs that should be ignored by placement in the + # call to get allocation candiates + self._ignore_consumers = [] def __str__(self): return ', '.join(sorted( @@ -67,7 +70,8 @@ def __str__(self): def get_request_group(self, ident): if ident not in self._rg_by_id: - rq_grp = placement_lib.RequestGroup(use_same_provider=bool(ident)) + rq_grp = placement_lib.RequestGroup(use_same_provider=bool(ident), + ignore_consumers=self._ignore_consumers) self._rg_by_id[ident] = rq_grp return self._rg_by_id[ident] @@ -280,6 +284,9 @@ def to_queryparams(request_group, suffix): qparams = [] if self._group_policy is not None: qparams.append(('group_policy', self._group_policy)) + if self._ignore_consumers is not None: + for consumer_uuid in self._ignore_consumers: + qparams.append(('ignore_consumer', consumer_uuid)) for ident, rg in self._rg_by_id.items(): # [('resourcesN', 'rclass:amount,rclass:amount,...'), # ('requiredN', 'trait_name,!trait_name,...'), @@ -288,6 +295,19 @@ def to_queryparams(request_group, suffix): qparams.extend(to_queryparams(rg, ident or '')) return parse.urlencode(sorted(qparams)) + @property + def ignore_consumers(self): + return self._ignore_consumers + + @ignore_consumers.setter + def ignore_consumers(self, value): + self._ignore_consumers = value + # also update child request-groups, because this is a global setting + # all those groups adhere to and we want see it if we convert `self` to + # a string + for rg in self._rg_by_id.values(): + rg.ignore_consumers = value + def build_request_spec(image, instances, instance_type=None): """Build a request_spec for the scheduler. @@ -922,7 +942,7 @@ def request_is_resize(spec_obj): def claim_resources(ctx, client, spec_obj, instance_uuid, alloc_req, - allocation_request_version=None): + allocation_request_version=None, host=None): """Given an instance UUID (representing the consumer of resources) and the allocation_request JSON object returned from Placement, attempt to claim resources for the instance in the placement API. Returns True if the claim @@ -941,6 +961,8 @@ def claim_resources(ctx, client, spec_obj, instance_uuid, alloc_req, the instance :param allocation_request_version: The microversion used to request the allocations. + :param host: The name of the host the resources should be claimed on. This + is only used to find same-host resizes. """ if request_is_rebuild(spec_obj): # NOTE(danms): This is a rebuild-only scheduling request, so we should @@ -961,6 +983,73 @@ def claim_resources(ctx, client, spec_obj, instance_uuid, alloc_req, # the spec object? user_id = ctx.user_id + if request_is_resize(spec_obj): + status = 'pre-migrating' + try: + migration = objects.Migration.get_by_instance_and_status(ctx, + instance_uuid, status) + except exception.MigrationNotFoundByStatus: + LOG.warning("Unable to find migration record with status " + "'%s' for instance %s on resize. Cannot check for " + "same-host resize and thus not replace resources " + "in migration.", + status, instance_uuid) + else: + if migration.source_compute == host: + LOG.info("Replacing resources for same-host resize on " + "migration %s for instance %s.", + migration.uuid, instance_uuid) + + consumer_alloc_requests = {instance_uuid: alloc_req} + vm_allocs = alloc_req['allocations'] + + # get the allocations for migration consumer + mig_allocs = \ + client.get_allocations_for_consumer(ctx, migration.uuid) + + # set allocations for migration consumer to keep the RAM/CPU + # reserved. This means the difference on downsize and 0 on + # upsize. Disk is kept as-is, because while the instance is not + # powered on and thus doesn't consume RAM/CPU, it does consume + # disk with the copied disk. + for rp_uuid, data in mig_allocs.items(): + # if we don't use this resource-provider with the new + # allocation, we keep all resources reserved and thus don't + # need to modify the resources in mig_allocs + if rp_uuid not in vm_allocs: + continue + + mig_resources = data['resources'] + vm_resources = vm_allocs[rp_uuid]['resources'] + for name, value in mig_resources.items(): + # same as above: if the new allocation doesn't contain + # this resource, we need to keep it all reserved + if name not in vm_resources: + continue + + # we need to reserve the full disk because the disk is + # used even on a powered-off VM. + if name == 'DISK_GB': + continue + + # take the difference to the new allocation-request, + # but don't let it get negative. we only want to + # reserve the resources for a down-size, to be able to + # size up again on revert. + new_value = max(value - vm_resources[name], 0) + if new_value: + mig_resources[name] = new_value + else: + del mig_resources[name] + + if mig_allocs: + consumer_alloc_requests[migration.uuid] = \ + {'allocations': mig_allocs} + LOG.debug("Claim looks like %s", consumer_alloc_requests) + + return client.set_multiple_allocations(ctx, + consumer_alloc_requests, project_id, user_id) + return client.claim_resources(ctx, instance_uuid, alloc_req, project_id, user_id, allocation_request_version=allocation_request_version) diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 49eb568928f..e9e4f1a3011 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -6341,7 +6341,8 @@ def gen_fake_bdms(obj, instance): mock_get_bdms.side_effect = gen_fake_bdms # start test - migration = objects.Migration(uuid=uuids.migration) + migration = objects.Migration(uuid=uuids.migration, + source_compute='src_host', dest_compute=dest_host) @mock.patch.object(self.compute.network_api, 'setup_networks_on_host') @mock.patch.object(self.compute, 'reportclient') diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index a021d9af5dd..b1f76f0dee6 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -2028,8 +2028,7 @@ def _check_mig(): fake_spec = None scheduler_hint = { - 'filter_properties': filter_properties, - '_nova_check_type': ['resize'], + 'filter_properties': filter_properties } if flavor_id_passed: @@ -2060,6 +2059,10 @@ def _check_mig(): self.assertEqual('hypervisor_host', fake_spec.requested_destination.node) + self.assertIn('_nova_check_type', fake_spec.scheduler_hints) + self.assertEqual('resize', + fake_spec.scheduler_hints['_nova_check_type'][0]) + if host_name: mock_get_all_by_host.assert_called_once_with( self.context, host_name, True) diff --git a/nova/tests/unit/scheduler/test_filter_scheduler.py b/nova/tests/unit/scheduler/test_filter_scheduler.py index aaf9fd3d5ad..d29ff69c571 100644 --- a/nova/tests/unit/scheduler/test_filter_scheduler.py +++ b/nova/tests/unit/scheduler/test_filter_scheduler.py @@ -232,7 +232,8 @@ def _test_schedule_successful_claim(self, mock_get_hosts, mock_claim.assert_called_once_with(ctx.elevated.return_value, self.placement_client, spec_obj, uuids.instance, alloc_reqs_by_rp_uuid[uuids.cn1][0], - allocation_request_version=None) + allocation_request_version=None, + host=host_state.host) self.assertEqual(len(selected_hosts), 1) self.assertEqual(expected_selection, selected_hosts) @@ -297,7 +298,8 @@ def test_schedule_unsuccessful_claim(self, mock_get_hosts, mock_claim.assert_called_once_with(ctx.elevated.return_value, self.placement_client, spec_obj, uuids.instance, alloc_reqs_by_rp_uuid[uuids.cn1][0], - allocation_request_version=fake_version) + allocation_request_version=fake_version, + host=host_state.host) mock_cleanup.assert_not_called() # Ensure that we have consumed the resources on the chosen host states @@ -547,11 +549,13 @@ def fake_get_sorted_hosts(_spec_obj, host_states, index): mock.call(ctx.elevated.return_value, self.placement_client, spec_obj, uuids.instance0, alloc_reqs_by_rp_uuid[uuids.cn2][0], - allocation_request_version=None), + allocation_request_version=None, + host=hs2.host), mock.call(ctx.elevated.return_value, self.placement_client, spec_obj, uuids.instance1, alloc_reqs_by_rp_uuid[uuids.cn1][0], - allocation_request_version=None), + allocation_request_version=None, + host=hs1.host), ] mock_claim.assert_has_calls(claim_calls) diff --git a/nova/tests/unit/scheduler/test_utils.py b/nova/tests/unit/scheduler/test_utils.py index fdd384ed41c..2bc6862a56b 100644 --- a/nova/tests/unit/scheduler/test_utils.py +++ b/nova/tests/unit/scheduler/test_utils.py @@ -465,6 +465,36 @@ def test_process_use_force_hosts(self): ) self.assertEqual(expected_querystring, resources.to_querystring()) + def test_process_use_ignore_consumers(self): + flavor = objects.Flavor(vcpus=1, + memory_mb=1024, + root_gb=15, + ephemeral_gb=0, + swap=0) + fake_spec = objects.RequestSpec(flavor=flavor, force_hosts=['test']) + expected = utils.ResourceRequest() + expected._rg_by_id[None] = plib.RequestGroup( + use_same_provider=False, + resources={ + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 15, + }, + ) + expected._limit = None + resources = utils.resources_from_request_spec(fake_spec) + self.assertResourceRequestsEqual(expected, resources) + # NOTE(jkulik): can't use uuids.fake here, because we expect a certain + # order in the comparison and the order of the query parameters depends + # on sorting the values which is random for random UUIDs + resources._ignore_consumers = ['foo1', 'foo2'] + expected_querystring = ( + 'ignore_consumer={}&ignore_consumer={}&' + 'resources=DISK_GB%3A15%2CMEMORY_MB%3A1024%2CVCPU%3A1' + .format('foo1', 'foo2') + ) + self.assertEqual(expected_querystring, resources.to_querystring()) + @ddt.data( # Test single hint that we are checking for. {'group': [uuids.fake]}, @@ -918,11 +948,14 @@ def test(mock_claim, mock_get_allocs): @mock.patch('nova.scheduler.client.report.SchedulerReportClient') @mock.patch('nova.scheduler.utils.request_is_rebuild') - def test_claim_resources(self, mock_is_rebuild, mock_client): + @mock.patch('nova.scheduler.utils.request_is_resize') + def test_claim_resources(self, mock_is_resize, mock_is_rebuild, + mock_client): """Tests that when claim_resources() is called, that we appropriately call the placement client to claim resources for the instance. """ mock_is_rebuild.return_value = False + mock_is_resize.return_value = False ctx = mock.Mock(user_id=uuids.user_id) spec_obj = mock.Mock(project_id=uuids.project_id) instance_uuid = uuids.instance @@ -948,3 +981,128 @@ def test_claim_resouces_for_policy_check(self, mock_is_rebuild, self.assertTrue(res) mock_is_rebuild.assert_called_once_with(mock.sentinel.spec_obj) self.assertFalse(mock_client.claim_resources.called) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient') + @mock.patch('nova.scheduler.utils.request_is_rebuild') + @mock.patch('nova.scheduler.utils.request_is_resize') + @mock.patch('nova.objects.Migration.get_by_instance_and_status') + def test_claim_resources_on_resize(self, mock_mig_get, mock_is_resize, + mock_is_rebuild, mock_client): + """Test that when claim_resources() is called, that we we check for + resize and go the normal path of claim-resources if the hosts differ. + """ + mock_is_rebuild.return_value = False + mock_is_resize.return_value = True + migration = objects.Migration( + context=self.context.elevated(), + id=1, + uuid=uuids.migration_uuid, + instance_uuid=uuids.instance, + new_instance_type_id=7, + dest_compute=None, + dest_node=None, + dest_host=None, + source_compute='source_compute', + source_node='source_node', + status='pre-migration') + mock_mig_get.return_value = migration + + ctx = mock.Mock(user_id=uuids.user_id) + spec_obj = mock.Mock(project_id=uuids.project_id) + instance_uuid = uuids.instance + alloc_req = mock.sentinel.alloc_req + mock_client.claim_resources.return_value = True + + res = utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid, + alloc_req, host='dest_compute') + + mock_client.claim_resources.assert_called_once_with( + ctx, uuids.instance, mock.sentinel.alloc_req, uuids.project_id, + uuids.user_id, allocation_request_version=None) + self.assertTrue(res) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient') + @mock.patch('nova.scheduler.utils.request_is_rebuild') + @mock.patch('nova.scheduler.utils.request_is_resize') + @mock.patch('nova.objects.Migration.get_by_instance_and_status', + side_effect=exception.MigrationNotFoundByStatus( + instance_id=uuids.instance, status='running (post-copy)')) + def test_claim_resources_on_resize_no_migration(self, mock_mig_get, + mock_is_resize, mock_is_rebuild, mock_client): + """Test that when claim_resources() is called, that we we check for + resize and go the normal path of claim-resources if we cannot find a + Migration object for the instance. + """ + mock_is_rebuild.return_value = False + mock_is_resize.return_value = True + ctx = mock.Mock(user_id=uuids.user_id) + spec_obj = mock.Mock(project_id=uuids.project_id) + instance_uuid = uuids.instance + alloc_req = mock.sentinel.alloc_req + mock_client.claim_resources.return_value = True + + res = utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid, + alloc_req) + + mock_client.claim_resources.assert_called_once_with( + ctx, uuids.instance, mock.sentinel.alloc_req, uuids.project_id, + uuids.user_id, allocation_request_version=None) + self.assertTrue(res) + + @ddt.data(({}, None), + ({uuids.rp2: {'CUSTOM_BLA': 2}, + uuids.rp1: {'DISK_GB': 15, 'VCPUS': 12, 'MEMORY_MB': 14}}, + {uuids.rp2: {'CUSTOM_BLA': 2}, + uuids.rp1: {'DISK_GB': 15, 'VCPUS': 4}})) + @ddt.unpack + @mock.patch('nova.scheduler.client.report.SchedulerReportClient') + @mock.patch('nova.scheduler.utils.request_is_rebuild') + @mock.patch('nova.scheduler.utils.request_is_resize') + @mock.patch('nova.objects.Migration.get_by_instance_and_status') + def test_claim_resources_on_resize_same_host(self, mig_allocs, + expected_mig_allocs, mock_mig_get, mock_is_resize, mock_is_rebuild, + mock_client): + """Test that when claim_resources() is called, that we we check for + resize and call placement client to claim the resources and update the + migration at the same time. + """ + mock_is_rebuild.return_value = False + mock_is_resize.return_value = True + migration = objects.Migration( + context=self.context.elevated(), + id=1, + uuid=uuids.migration_uuid, + instance_uuid=uuids.instance, + new_instance_type_id=7, + dest_compute=None, + dest_node=None, + dest_host=None, + source_compute='source_compute', + source_node='source_node', + status='pre-migration') + mock_mig_get.return_value = migration + + ctx = mock.Mock(user_id=uuids.user_id) + spec_obj = mock.Mock(project_id=uuids.project_id) + instance_uuid = uuids.instance + alloc_req = {'allocations': + {uuids.rp1: {'resources': + {'DISK_GB': 17, 'VCPUS': 8, + 'MEMORY_MB': 24, 'CUSTOM_2': 1}}}} + mock_client.claim_resources.return_value = True + + mig_allocs = {rp: {'resources': values} + for rp, values in mig_allocs.items()} + mock_client.get_allocations_for_consumer.return_value = mig_allocs + res = utils.claim_resources(ctx, mock_client, spec_obj, instance_uuid, + alloc_req, host='source_compute') + + expected_req = {instance_uuid: alloc_req} + if expected_mig_allocs is not None: + expected_mig_allocs = {'allocations': + {rp: {'resources': values} + for rp, values in expected_mig_allocs.items()}} + expected_req[uuids.migration_uuid] = expected_mig_allocs + mock_client.set_multiple_allocations.assert_called_once_with( + ctx, expected_req, uuids.project_id, uuids.user_id) + self.assertTrue(res)