diff --git a/lower-constraints.txt b/lower-constraints.txt index e664f179c..4fc945231 100644 --- a/lower-constraints.txt +++ b/lower-constraints.txt @@ -66,8 +66,10 @@ openstacksdk==0.12.0 os-api-ref==1.4.0 os-brick==2.2.0 os-client-config==1.29.0 +os-resource-classes==0.1.0 os-service-types==1.2.0 os-testr==1.0.0 +os-traits==0.15.0 os-win==4.0.0 osc-lib==1.10.0 oslo.cache==1.29.0 @@ -84,8 +86,8 @@ oslo.privsep==1.32.0 oslo.serialization==2.18.0 oslo.service==1.24.0 oslo.upgradecheck==0.1.0 -oslo.utils==3.33.0 -oslo.versionedobjects==1.31.2 +oslo.utils==3.37.0 +oslo.versionedobjects==1.35.0 oslo.rootwrap==5.8.0 oslotest==3.2.0 osprofiler==1.4.0 diff --git a/requirements.txt b/requirements.txt index 684ff06b0..5154b1598 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,13 +20,15 @@ oslo.policy>=1.30.0 # Apache-2.0 oslo.privsep>=1.32.0 # Apache-2.0 oslo.serialization!=2.19.1,>=2.18.0 # Apache-2.0 oslo.service!=1.28.1,>=1.24.0 # Apache-2.0 -oslo.versionedobjects>=1.31.2 # Apache-2.0 +oslo.versionedobjects>=1.35.0 # Apache-2.0 oslo.context>=2.19.2 # Apache-2.0 -oslo.utils>=3.33.0 # Apache-2.0 +oslo.utils>=3.37.0 # Apache-2.0 oslo.db>=4.27.0 # Apache-2.0 oslo.rootwrap>=5.8.0 # Apache-2.0 oslo.upgradecheck>=0.1.0 # Apache-2.0 os-brick>=2.2.0 # Apache-2.0 +os-resource-classes>=0.1.0 # Apache-2.0 +os-traits>=0.15.0 # Apache-2.0 six>=1.10.0 # MIT SQLAlchemy!=1.1.5,!=1.1.6,!=1.1.7,!=1.1.8,>=1.0.10 # MIT stevedore>=1.20.0 # Apache-2.0 diff --git a/zun/common/clients.py b/zun/common/clients.py index 6acd0ddcb..63d1fdaa7 100644 --- a/zun/common/clients.py +++ b/zun/common/clients.py @@ -14,6 +14,7 @@ from cinderclient import client as cinderclient from glanceclient import client as glanceclient +from keystoneauth1.loading import adapter as ka_adapter from neutronclient.v2_0 import client as neutronclient from zun.common import exception @@ -30,6 +31,8 @@ class OpenStackClients(object): self._glance = None self._neutron = None self._cinder = None + self._placement = None + self._placement_ks_filter = None def url_for(self, **kwargs): return self.keystone().session.get_endpoint(**kwargs) @@ -115,3 +118,26 @@ class OpenStackClients(object): **kwargs) return self._cinder + + @exception.wrap_keystone_exception + def placement(self): + if self._placement: + return self._placement, self._placement_ks_filter + + session = self.keystone().session + session.verify = \ + self._get_client_option('placement', 'ca_file') or True + if self._get_client_option('placement', 'insecure'): + session.verify = False + region_name = self._get_client_option('placement', 'region_name') + endpoint_type = self._get_client_option('placement', 'endpoint_type') + kwargs = { + 'session': self.keystone().session, + 'auth': self.keystone().auth, + } + self._placement_ks_filter = {'service_type': 'placement', + 'region_name': region_name, + 'interface': endpoint_type} + self._placement = ka_adapter.Adapter().load_from_options(**kwargs) + + return self._placement, self._placement_ks_filter diff --git a/zun/common/exception.py b/zun/common/exception.py index 8225af7f3..45c094704 100644 --- a/zun/common/exception.py +++ b/zun/common/exception.py @@ -423,10 +423,6 @@ class ZunServiceNotFound(HTTPNotFound): message = _("Zun service %(binary)s on host %(host)s could not be found.") -class ResourceProviderNotFound(HTTPNotFound): - message = _("Resource provider %(resource_provider)s could not be found.") - - class ResourceClassNotFound(HTTPNotFound): message = _("Resource class %(resource_class)s could not be found.") @@ -776,3 +772,113 @@ class NameEmpty(InvalidReference): class NameTooLong(InvalidReference): message = _('repository name must not be more than %(length_max)s ' 'characters') + + +# An exception with this name is used on both sides of the placement/zun +# interaction. +class ResourceProviderInUse(ZunException): + message = _("Resource provider has allocations.") + + +class ResourceProviderRetrievalFailed(ZunException): + message = _("Failed to get resource provider with UUID %(uuid)s") + + +class ResourceProviderAggregateRetrievalFailed(ZunException): + message = _("Failed to get aggregates for resource provider with UUID" + " %(uuid)s") + + +class ResourceProviderTraitRetrievalFailed(ZunException): + message = _("Failed to get traits for resource provider with UUID" + " %(uuid)s") + + +class ResourceProviderCreationFailed(ZunException): + message = _("Failed to create resource provider %(name)s") + + +class ResourceProviderDeletionFailed(ZunException): + message = _("Failed to delete resource provider %(uuid)s") + + +class ResourceProviderUpdateFailed(ZunException): + message = _("Failed to update resource provider via URL %(url)s: " + "%(error)s") + + +class ResourceProviderNotFound(NotFound): + message = _("No such resource provider %(name_or_uuid)s.") + + +class ResourceProviderSyncFailed(ZunException): + message = _("Failed to synchronize the placement service with resource " + "provider information supplied by the compute host.") + + +class PlacementAPIConnectFailure(ZunException): + message = _("Unable to communicate with the Placement API.") + + +class PlacementAPIConflict(ZunException): + """Any 409 error from placement APIs should use (a subclass of) this + exception. + """ + message = _("A conflict was encountered attempting to invoke the " + "placement API at URL %(url)s: %(error)s") + + +class ResourceProviderUpdateConflict(PlacementAPIConflict): + """A 409 caused by generation mismatch from attempting to update an + existing provider record or its associated data (aggregates, traits, etc.). + """ + message = _("A conflict was encountered attempting to update resource " + "provider %(uuid)s (generation %(generation)d): %(error)s") + + +class InvalidResourceClass(Invalid): + message = _("Resource class '%(resource_class)s' invalid.") + + +class InvalidResourceAmount(Invalid): + message = _("Resource amounts must be integers. Received '%(amount)s'.") + + +class InvalidInventory(Invalid): + message = _("Inventory for '%(resource_class)s' on " + "resource provider '%(resource_provider)s' invalid.") + + +class UsagesRetrievalFailed(ZunException): + message = _("Failed to retrieve usages for project '%(project_id)s' and " + "user '%(user_id)s'.") + + +class AllocationUpdateFailed(ZunException): + message = _('Failed to update allocations for consumer %(consumer_uuid)s. ' + 'Error: %(error)s') + + +class ConsumerAllocationRetrievalFailed(ZunException): + message = _("Failed to retrieve allocations for consumer " + "%(consumer_uuid)s: %(error)s") + + +class TraitRetrievalFailed(ZunException): + message = _("Failed to get traits for resource provider with UUID" + " %(uuid)s") + + +class TraitCreationFailed(ZunException): + message = _("Failed to create trait %(name)s: %(error)s") + + +class AllocationMoveFailed(ZunException): + message = _('Failed to move allocations from consumer %(source_consumer)s ' + 'to consumer %(target_consumer)s. ' + 'Error: %(error)s') + + +class ResourceProviderAllocationRetrievalFailed(ZunException): + message = _("Failed to retrieve allocations for resource provider " + "%(rp_uuid)s: %(error)s") diff --git a/zun/common/keystone.py b/zun/common/keystone.py index cf4e27541..74ae1b13d 100644 --- a/zun/common/keystone.py +++ b/zun/common/keystone.py @@ -33,6 +33,7 @@ class KeystoneClientV3(object): self.context = context self._client = None self._session = None + self._auth = None @property def auth_url(self): @@ -55,6 +56,14 @@ class KeystoneClientV3(object): self._session = session return session + @property + def auth(self): + if self._auth: + return self._auth + auth = self._get_auth() + self._auth = auth + return auth + def _get_session(self, auth): session = ka_loading.load_session_from_conf_options( CONF, ksconf.CFG_GROUP, auth=auth) diff --git a/zun/common/utils.py b/zun/common/utils.py index 0cb83d2d6..9b2163cf8 100644 --- a/zun/common/utils.py +++ b/zun/common/utils.py @@ -18,6 +18,7 @@ """Utilities and helper functions.""" import base64 import binascii +import contextlib import eventlet import functools import inspect @@ -716,3 +717,12 @@ def decode_file_data(data): def strtime(at): return at.strftime("%Y-%m-%dT%H:%M:%S.%f") + + +if six.PY2: + nested_contexts = contextlib.nested +else: + @contextlib.contextmanager + def nested_contexts(*contexts): + with contextlib.ExitStack() as stack: + yield [stack.enter_context(c) for c in contexts] diff --git a/zun/compute/api.py b/zun/compute/api.py index c6ea37f6f..d44848d7a 100644 --- a/zun/compute/api.py +++ b/zun/compute/api.py @@ -23,7 +23,7 @@ from zun.compute import container_actions from zun.compute import rpcapi import zun.conf from zun import objects -from zun.scheduler import client as scheduler_client +from zun.scheduler.client import query as scheduler_client CONF = zun.conf.CONF diff --git a/zun/compute/compute_node_tracker.py b/zun/compute/compute_node_tracker.py index 50457f55e..be9cae00b 100644 --- a/zun/compute/compute_node_tracker.py +++ b/zun/compute/compute_node_tracker.py @@ -24,7 +24,7 @@ import zun.conf from zun import objects from zun.objects import base as obj_base from zun.pci import manager as pci_manager -from zun.scheduler import client as scheduler_client +from zun.scheduler.client import query as scheduler_client CONF = zun.conf.CONF diff --git a/zun/compute/provider_tree.py b/zun/compute/provider_tree.py new file mode 100644 index 000000000..5a48dd575 --- /dev/null +++ b/zun/compute/provider_tree.py @@ -0,0 +1,680 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""An object describing a tree of resource providers and their inventories. + +This object is not stored in the Zun API; rather, this +object is constructed and used by the scheduler report client to track state +changes for resources on the container engine. As such, there are +no remoteable methods nor is there any interaction with the zun.db modules. +""" + +import collections +import copy + +import os_traits +from oslo_concurrency import lockutils +from oslo_log import log as logging +from oslo_utils import uuidutils + +from zun.common.i18n import _ + + +LOG = logging.getLogger(__name__) +_LOCK_NAME = 'provider-tree-lock' + +# Point-in-time representation of a resource provider in the tree. +# Note that, whereas namedtuple enforces read-only-ness of containers as a +# whole, nothing prevents modification of the internals of attributes of +# complex types (children/inventory/traits/aggregates). However, any such +# modifications still have no effect on the ProviderTree the container came +# from. Like, you can Sharpie a moustache on a Polaroid of my face, but that +# doesn't make a moustache appear on my actual face. +ProviderData = collections.namedtuple( + 'ProviderData', ['uuid', 'name', 'generation', 'parent_uuid', 'inventory', + 'traits', 'aggregates']) + + +class _Provider(object): + """Represents a resource provider in the tree. All operations against the + tree should be done using the ProviderTree interface, since it controls + thread-safety. + """ + def __init__(self, name, uuid=None, generation=None, parent_uuid=None): + if uuid is None: + uuid = uuidutils.generate_uuid() + self.uuid = uuid + self.name = name + self.generation = generation + self.parent_uuid = parent_uuid + # Contains a dict, keyed by uuid of child resource providers having + # this provider as a parent + self.children = {} + # dict of inventory records, keyed by resource class + self.inventory = {} + # Set of trait names + self.traits = set() + # Set of aggregate UUIDs + self.aggregates = set() + + @classmethod + def from_dict(cls, pdict): + """Factory method producing a _Provider based on a dict with + appropriate keys. + + :param pdict: Dictionary representing a provider, with keys 'name', + 'uuid', 'generation', 'parent_provider_uuid'. Of these, + only 'name' is mandatory. + """ + return cls(pdict['name'], uuid=pdict.get('uuid'), + generation=pdict.get('generation'), + parent_uuid=pdict.get('parent_provider_uuid')) + + def data(self): + inventory = copy.deepcopy(self.inventory) + traits = copy.copy(self.traits) + aggregates = copy.copy(self.aggregates) + return ProviderData( + self.uuid, self.name, self.generation, self.parent_uuid, + inventory, traits, aggregates) + + def get_provider_uuids(self): + """Returns a list, in top-down traversal order, of UUIDs of this + provider and all its descendants. + """ + ret = [self.uuid] + for child in self.children.values(): + ret.extend(child.get_provider_uuids()) + return ret + + def find(self, search): + if self.name == search or self.uuid == search: + return self + if search in self.children: + return self.children[search] + if self.children: + for child in self.children.values(): + # We already searched for the child by UUID above, so here we + # just check for a child name match + if child.name == search: + return child + subchild = child.find(search) + if subchild: + return subchild + return None + + def add_child(self, provider): + self.children[provider.uuid] = provider + + def remove_child(self, provider): + if provider.uuid in self.children: + del self.children[provider.uuid] + + def has_inventory(self): + """Returns whether the provider has any inventory records at all. """ + return self.inventory != {} + + def has_inventory_changed(self, new): + """Returns whether the inventory has changed for the provider.""" + cur = self.inventory + if set(cur) != set(new): + return True + for key, cur_rec in cur.items(): + new_rec = new[key] + # If the new record contains new fields (e.g. we're adding on + # `reserved` or `allocation_ratio`) we want to make sure to pick + # them up + if set(new_rec) - set(cur_rec): + return True + for rec_key, cur_val in cur_rec.items(): + if rec_key not in new_rec: + # Deliberately don't want to compare missing keys in the + # *new* inventory record. For instance, we will be passing + # in fields like allocation_ratio in the current dict but + # the resource tracker may only pass in the total field. We + # want to return that inventory didn't change when the + # total field values are the same even if the + # allocation_ratio field is missing from the new record. + continue + if new_rec[rec_key] != cur_val: + return True + return False + + def _update_generation(self, generation, operation): + if generation is not None and generation != self.generation: + msg_args = { + 'rp_uuid': self.uuid, + 'old': self.generation, + 'new': generation, + 'op': operation + } + LOG.debug("Updating resource provider %(rp_uuid)s generation " + "from %(old)s to %(new)s during operation: %(op)s", + msg_args) + self.generation = generation + + def update_inventory(self, inventory, generation): + """Update the stored inventory for the provider along with a resource + provider generation to set the provider to. The method returns whether + the inventory has changed. + """ + self._update_generation(generation, 'update_inventory') + if self.has_inventory_changed(inventory): + LOG.debug('Updating inventory in ProviderTree for provider %s ' + 'with inventory: %s', self.uuid, inventory) + self.inventory = copy.deepcopy(inventory) + return True + LOG.debug('Inventory has not changed in ProviderTree for provider: %s', + self.uuid) + return False + + def have_traits_changed(self, new): + """Returns whether the provider's traits have changed.""" + return set(new) != self.traits + + def update_traits(self, new, generation=None): + """Update the stored traits for the provider along with a resource + provider generation to set the provider to. The method returns whether + the traits have changed. + """ + self._update_generation(generation, 'update_traits') + if self.have_traits_changed(new): + self.traits = set(new) # create a copy of the new traits + return True + return False + + def has_traits(self, traits): + """Query whether the provider has certain traits. + + :param traits: Iterable of string trait names to look for. + :return: True if this provider has *all* of the specified traits; False + if any of the specified traits are absent. Returns True if + the traits parameter is empty. + """ + return not bool(set(traits) - self.traits) + + def have_aggregates_changed(self, new): + """Returns whether the provider's aggregates have changed.""" + return set(new) != self.aggregates + + def update_aggregates(self, new, generation=None): + """Update the stored aggregates for the provider along with a resource + provider generation to set the provider to. The method returns whether + the aggregates have changed. + """ + self._update_generation(generation, 'update_aggregates') + if self.have_aggregates_changed(new): + self.aggregates = set(new) # create a copy of the new aggregates + return True + return False + + def in_aggregates(self, aggregates): + """Query whether the provider is a member of certain aggregates. + + :param aggregates: Iterable of string aggregate UUIDs to look for. + :return: True if this provider is a member of *all* of the specified + aggregates; False if any of the specified aggregates are + absent. Returns True if the aggregates parameter is empty. + """ + return not bool(set(aggregates) - self.aggregates) + + +class ProviderTree(object): + + def __init__(self): + """Create an empty provider tree.""" + self.lock = lockutils.internal_lock(_LOCK_NAME) + self.roots = [] + + def get_provider_uuids(self, name_or_uuid=None): + """Return a list, in top-down traversable order, of the UUIDs of all + providers (in a (sub)tree). + + :param name_or_uuid: Provider name or UUID representing the root of a + (sub)tree for which to return UUIDs. If not + specified, the method returns all UUIDs in the + ProviderTree. + """ + if name_or_uuid is not None: + with self.lock: + return self._find_with_lock(name_or_uuid).get_provider_uuids() + + # If no name_or_uuid, get UUIDs for all providers recursively. + ret = [] + with self.lock: + for root in self.roots: + ret.extend(root.get_provider_uuids()) + return ret + + def get_provider_uuids_in_tree(self, name_or_uuid): + """Returns a list, in top-down traversable order, of the UUIDs of all + providers in the whole tree of which the provider identified by + ``name_or_uuid`` is a member. + + :param name_or_uuid: Provider name or UUID representing any member of + whole tree for which to return UUIDs. + """ + with self.lock: + return self._find_with_lock( + name_or_uuid, return_root=True).get_provider_uuids() + + def populate_from_iterable(self, provider_dicts): + """Populates this ProviderTree from an iterable of provider dicts. + + This method will ADD providers to the tree if provider_dicts contains + providers that do not exist in the tree already and will REPLACE + providers in the tree if provider_dicts contains providers that are + already in the tree. This method will NOT remove providers from the + tree that are not in provider_dicts. But if a parent provider is in + provider_dicts and the descendents are not, this method will remove the + descendents from the tree. + + :param provider_dicts: An iterable of dicts of resource provider + information. If a provider is present in + provider_dicts, all its descendants must also be + present. + :raises: ValueError if any provider in provider_dicts has a parent that + is not in this ProviderTree or elsewhere in provider_dicts. + """ + if not provider_dicts: + return + + # Map of provider UUID to provider dict for the providers we're + # *adding* via this method. + to_add_by_uuid = {pd['uuid']: pd for pd in provider_dicts} + + with self.lock: + # Sanity check for orphans. Every parent UUID must either be None + # (the provider is a root), or be in the tree already, or exist as + # a key in to_add_by_uuid (we're adding it). + all_parents = set([None]) | set(to_add_by_uuid) + # NOTE(efried): Can't use get_provider_uuids directly because we're + # already under lock. + for root in self.roots: + all_parents |= set(root.get_provider_uuids()) + missing_parents = set() + for pd in to_add_by_uuid.values(): + parent_uuid = pd.get('parent_provider_uuid') + if parent_uuid not in all_parents: + missing_parents.add(parent_uuid) + if missing_parents: + raise ValueError( + _("The following parents were not found: %s") % + ', '.join(missing_parents)) + + # Ready to do the work. + # Use to_add_by_uuid to keep track of which providers are left to + # be added. + while to_add_by_uuid: + # Find a provider that's suitable to inject. + for uuid, pd in to_add_by_uuid.items(): + # Roots are always okay to inject (None won't be a key in + # to_add_by_uuid). Otherwise, we have to make sure we + # already added the parent (and, by recursion, all + # ancestors) if present in the input. + parent_uuid = pd.get('parent_provider_uuid') + if parent_uuid not in to_add_by_uuid: + break + else: + # This should never happen - we already ensured all parents + # exist in the tree, which means we can't have any branches + # that don't wind up at the root, which means we can't have + # cycles. But to quell the paranoia... + raise ValueError( + _("Unexpectedly failed to find parents already in the " + "tree for any of the following: %s") % + ','.join(set(to_add_by_uuid))) + + # Add or replace the provider, either as a root or under its + # parent + try: + self._remove_with_lock(uuid) + except ValueError: + # Wasn't there in the first place - fine. + pass + + provider = _Provider.from_dict(pd) + if parent_uuid is None: + self.roots.append(provider) + else: + parent = self._find_with_lock(parent_uuid) + parent.add_child(provider) + + # Remove this entry to signify we're done with it. + to_add_by_uuid.pop(uuid) + + def _remove_with_lock(self, name_or_uuid): + found = self._find_with_lock(name_or_uuid) + if found.parent_uuid: + parent = self._find_with_lock(found.parent_uuid) + parent.remove_child(found) + else: + self.roots.remove(found) + + def remove(self, name_or_uuid): + """Safely removes the provider identified by the supplied name_or_uuid + parameter and all of its children from the tree. + + :raises ValueError if name_or_uuid points to a non-existing provider. + :param name_or_uuid: Either name or UUID of the resource provider to + remove from the tree. + """ + with self.lock: + self._remove_with_lock(name_or_uuid) + + def new_root(self, name, uuid, generation=None): + """Adds a new root provider to the tree, returning its UUID. + + :param name: The name of the new root provider + :param uuid: The UUID of the new root provider + :param generation: Generation to set for the new root provider + :returns: the UUID of the new provider + :raises: ValueError if a provider with the specified uuid already + exists in the tree. + """ + + with self.lock: + exists = True + try: + self._find_with_lock(uuid) + except ValueError: + exists = False + + if exists: + err = _("Provider %s already exists.") + raise ValueError(err % uuid) + + p = _Provider(name, uuid=uuid, generation=generation) + self.roots.append(p) + return p.uuid + + def _find_with_lock(self, name_or_uuid, return_root=False): + for root in self.roots: + found = root.find(name_or_uuid) + if found: + return root if return_root else found + raise ValueError(_("No such provider %s") % name_or_uuid) + + def data(self, name_or_uuid): + """Return a point-in-time copy of the specified provider's data. + + :param name_or_uuid: Either name or UUID of the resource provider whose + data is to be returned. + :return: ProviderData object representing the specified provider. + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + """ + with self.lock: + return self._find_with_lock(name_or_uuid).data() + + def exists(self, name_or_uuid): + """Given either a name or a UUID, return True if the tree contains the + provider, False otherwise. + """ + with self.lock: + try: + self._find_with_lock(name_or_uuid) + return True + except ValueError: + return False + + def new_child(self, name, parent, uuid=None, generation=None): + """Creates a new child provider with the given name and uuid under the + given parent. + + :param name: The name of the new child provider + :param parent: Either name or UUID of the parent provider + :param uuid: The UUID of the new child provider + :param generation: Generation to set for the new child provider + :returns: the UUID of the new provider + + :raises ValueError if a provider with the specified uuid or name + already exists; or if parent_uuid points to a nonexistent + provider. + """ + with self.lock: + try: + self._find_with_lock(uuid or name) + except ValueError: + pass + else: + err = _("Provider %s already exists.") + raise ValueError(err % (uuid or name)) + + parent_node = self._find_with_lock(parent) + p = _Provider(name, uuid, generation, parent_node.uuid) + parent_node.add_child(p) + return p.uuid + + def has_inventory(self, name_or_uuid): + """Returns True if the provider identified by name_or_uuid has any + inventory records at all. + + :raises: ValueError if a provider with uuid was not found in the tree. + :param name_or_uuid: Either name or UUID of the resource provider + """ + with self.lock: + p = self._find_with_lock(name_or_uuid) + return p.has_inventory() + + def has_inventory_changed(self, name_or_uuid, inventory): + """Returns True if the supplied inventory is different for the provider + with the supplied name or UUID. + + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + :param name_or_uuid: Either name or UUID of the resource provider to + query inventory for. + :param inventory: dict, keyed by resource class, of inventory + information. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + return provider.has_inventory_changed(inventory) + + def update_inventory(self, name_or_uuid, inventory, generation=None): + """Given a name or UUID of a provider and a dict of inventory resource + records, update the provider's inventory and set the provider's + generation. + + :returns: True if the inventory has changed. + + :note: The provider's generation is always set to the supplied + generation, even if there were no changes to the inventory. + + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + :param name_or_uuid: Either name or UUID of the resource provider to + update inventory for. + :param inventory: dict, keyed by resource class, of inventory + information. + :param generation: The resource provider generation to set. If not + specified, the provider's generation is not changed. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + return provider.update_inventory(inventory, generation) + + def has_sharing_provider(self, resource_class): + """Returns whether the specified provider_tree contains any sharing + providers of inventory of the specified resource_class. + """ + for rp_uuid in self.get_provider_uuids(): + pdata = self.data(rp_uuid) + has_rc = resource_class in pdata.inventory + is_sharing = os_traits.MISC_SHARES_VIA_AGGREGATE in pdata.traits + if has_rc and is_sharing: + return True + return False + + def has_traits(self, name_or_uuid, traits): + """Given a name or UUID of a provider, query whether that provider has + *all* of the specified traits. + + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + :param name_or_uuid: Either name or UUID of the resource provider to + query for traits. + :param traits: Iterable of string trait names to search for. + :return: True if this provider has *all* of the specified traits; False + if any of the specified traits are absent. Returns True if + the traits parameter is empty, even if the provider has no + traits. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + return provider.has_traits(traits) + + def have_traits_changed(self, name_or_uuid, traits): + """Returns True if the specified traits list is different for the + provider with the specified name or UUID. + + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + :param name_or_uuid: Either name or UUID of the resource provider to + query traits for. + :param traits: Iterable of string trait names to compare against the + provider's traits. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + return provider.have_traits_changed(traits) + + def update_traits(self, name_or_uuid, traits, generation=None): + """Given a name or UUID of a provider and an iterable of string trait + names, update the provider's traits and set the provider's generation. + + :returns: True if the traits list has changed. + + :note: The provider's generation is always set to the supplied + generation, even if there were no changes to the traits. + + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + :param name_or_uuid: Either name or UUID of the resource provider to + update traits for. + :param traits: Iterable of string trait names to set. + :param generation: The resource provider generation to set. If None, + the provider's generation is not changed. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + return provider.update_traits(traits, generation=generation) + + def add_traits(self, name_or_uuid, *traits): + """Set traits on a provider, without affecting existing traits. + + :param name_or_uuid: The name or UUID of the provider whose traits are + to be affected. + :param traits: String names of traits to be added. + """ + if not traits: + return + with self.lock: + provider = self._find_with_lock(name_or_uuid) + final_traits = provider.traits | set(traits) + provider.update_traits(final_traits) + + def remove_traits(self, name_or_uuid, *traits): + """Unset traits on a provider, without affecting other existing traits. + + :param name_or_uuid: The name or UUID of the provider whose traits are + to be affected. + :param traits: String names of traits to be removed. + """ + if not traits: + return + with self.lock: + provider = self._find_with_lock(name_or_uuid) + final_traits = provider.traits - set(traits) + provider.update_traits(final_traits) + + def in_aggregates(self, name_or_uuid, aggregates): + """Given a name or UUID of a provider, query whether that provider is a + member of *all* the specified aggregates. + + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + :param name_or_uuid: Either name or UUID of the resource provider to + query for aggregates. + :param aggregates: Iterable of string aggregate UUIDs to search for. + :return: True if this provider is associated with *all* of the + specified aggregates; False if any of the specified aggregates + are absent. Returns True if the aggregates parameter is + empty, even if the provider has no aggregate associations. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + return provider.in_aggregates(aggregates) + + def have_aggregates_changed(self, name_or_uuid, aggregates): + """Returns True if the specified aggregates list is different for the + provider with the specified name or UUID. + + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + :param name_or_uuid: Either name or UUID of the resource provider to + query aggregates for. + :param aggregates: Iterable of string aggregate UUIDs to compare + against the provider's aggregates. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + return provider.have_aggregates_changed(aggregates) + + def update_aggregates(self, name_or_uuid, aggregates, generation=None): + """Given a name or UUID of a provider and an iterable of string + aggregate UUIDs, update the provider's aggregates and set the + provider's generation. + + :returns: True if the aggregates list has changed. + + :note: The provider's generation is always set to the supplied + generation, even if there were no changes to the aggregates. + + :raises: ValueError if a provider with name_or_uuid was not found in + the tree. + :param name_or_uuid: Either name or UUID of the resource provider to + update aggregates for. + :param aggregates: Iterable of string aggregate UUIDs to set. + :param generation: The resource provider generation to set. If None, + the provider's generation is not changed. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + return provider.update_aggregates(aggregates, + generation=generation) + + def add_aggregates(self, name_or_uuid, *aggregates): + """Set aggregates on a provider, without affecting existing aggregates. + + :param name_or_uuid: The name or UUID of the provider whose aggregates + are to be affected. + :param aggregates: String UUIDs of aggregates to be added. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + final_aggs = provider.aggregates | set(aggregates) + provider.update_aggregates(final_aggs) + + def remove_aggregates(self, name_or_uuid, *aggregates): + """Unset aggregates on a provider, without affecting other existing + aggregates. + + :param name_or_uuid: The name or UUID of the provider whose aggregates + are to be affected. + :param aggregates: String UUIDs of aggregates to be removed. + """ + with self.lock: + provider = self._find_with_lock(name_or_uuid) + final_aggs = provider.aggregates - set(aggregates) + provider.update_aggregates(final_aggs) diff --git a/zun/conf/__init__.py b/zun/conf/__init__.py index 93cba62fc..ba7807c01 100644 --- a/zun/conf/__init__.py +++ b/zun/conf/__init__.py @@ -29,6 +29,7 @@ from zun.conf import network from zun.conf import neutron_client from zun.conf import path from zun.conf import pci +from zun.conf import placement_client from zun.conf import profiler from zun.conf import quota from zun.conf import scheduler @@ -65,3 +66,4 @@ cinder_client.register_opts(CONF) netconf.register_opts(CONF) availability_zone.register_opts(CONF) utils.register_opts(CONF) +placement_client.register_opts(CONF) diff --git a/zun/conf/compute.py b/zun/conf/compute.py index 72a9894ef..ee4daadfd 100644 --- a/zun/conf/compute.py +++ b/zun/conf/compute.py @@ -29,6 +29,25 @@ compute_opts = [ 'enable_cpu_pinning', default=False, help='allow the container with cpu_policy is dedicated'), + cfg.IntOpt( + 'resource_provider_association_refresh', + default=300, + min=0, + mutable=True, + # TODO(efried): Provide more/better explanation of what this option is + # all about. Reference bug(s). Unless we're just going to remove it. + help=""" +Interval for updating zun-compute-side cache of the compute node resource +provider's inventories, aggregates, and traits. +This option specifies the number of seconds between attempts to update a +provider's inventories, aggregates and traits in the local cache of the compute +node. +A value of zero disables cache refresh completely. +The cache can be cleared manually at any time by sending SIGHUP to the compute +process, causing it to be repopulated the next time the data is accessed. +Possible values: +* Any positive integer in seconds, or zero to disable refresh. +"""), ] service_opts = [ diff --git a/zun/conf/placement_client.py b/zun/conf/placement_client.py new file mode 100644 index 000000000..f0566a265 --- /dev/null +++ b/zun/conf/placement_client.py @@ -0,0 +1,53 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_config import cfg + + +placement_group = cfg.OptGroup( + name='placement_client', + title='Placement Service Options', + help="Configuration options for connecting to the placement API service") + +common_security_opts = [ + cfg.StrOpt('ca_file', + help='Optional CA cert file to use in SSL connections.'), + cfg.StrOpt('cert_file', + help='Optional PEM-formatted certificate chain file.'), + cfg.StrOpt('key_file', + help='Optional PEM-formatted file that contains the ' + 'private key.'), + cfg.BoolOpt('insecure', + default=False, + help="If set, then the server's certificate will not " + "be verified.")] + +placement_client_opts = [ + cfg.StrOpt('region_name', + help='Region in Identity service catalog to use for ' + 'communication with the OpenStack service.'), + cfg.StrOpt('endpoint_type', + default='publicURL', + help='Type of endpoint in Identity service catalog to use ' + 'for communication with the OpenStack service.')] + + +ALL_OPTS = (placement_client_opts + common_security_opts) + + +def register_opts(conf): + conf.register_group(placement_group) + conf.register_opts(ALL_OPTS, group=placement_group) + + +def list_opts(): + return {placement_group: ALL_OPTS} diff --git a/zun/conf/scheduler.py b/zun/conf/scheduler.py index 366d7d4fa..75498d9f0 100644 --- a/zun/conf/scheduler.py +++ b/zun/conf/scheduler.py @@ -92,6 +92,25 @@ Related options: * All of the filters in this option *must* be present in the 'scheduler_available_filters' option, or a SchedulerHostFilterNotFound exception will be raised. +"""), + cfg.IntOpt("max_placement_results", + default=1000, + min=1, + help=""" +This setting determines the maximum limit on results received from the +placement service during a scheduling operation. It effectively limits +the number of hosts that may be considered for scheduling requests that +match a large number of candidates. + +A value of 1 (the minimum) will effectively defer scheduling to the placement +service strictly on "will it fit" grounds. A higher value will put an upper +cap on the number of results the scheduler will consider during the filtering +and weighing process. Large deployments may need to set this lower than the +total number of hosts available to limit memory consumption, network traffic, +etc. of the scheduler. + +This option is only used by the FilterScheduler; if you use a different +scheduler, this option has no effect. """), ] diff --git a/zun/objects/__init__.py b/zun/objects/__init__.py index 906c57b32..710add21f 100644 --- a/zun/objects/__init__.py +++ b/zun/objects/__init__.py @@ -23,6 +23,7 @@ from zun.objects import pci_device_pool from zun.objects import quota from zun.objects import quota_class from zun.objects import registry +from zun.objects import request_group from zun.objects import resource_class from zun.objects import resource_provider from zun.objects import volume @@ -54,6 +55,7 @@ ContainerAction = container_action.ContainerAction ContainerActionEvent = container_action.ContainerActionEvent ExecInstance = exec_instance.ExecInstance Registry = registry.Registry +RequestGroup = request_group.RequestGroup __all__ = ( 'Container', @@ -79,4 +81,5 @@ __all__ = ( 'ContainerActionEvent', 'ExecInstance', 'Registry', + 'RequestGroup', ) diff --git a/zun/objects/fields.py b/zun/objects/fields.py index 936f81c3b..91f18490a 100644 --- a/zun/objects/fields.py +++ b/zun/objects/fields.py @@ -83,6 +83,14 @@ class JsonField(fields.AutoTypedField): AUTO_TYPE = Json() +class SetOfStringsField(fields.AutoTypedField): + AUTO_TYPE = fields.Set(fields.String()) + + +class ListOfListsOfStringsField(fields.AutoTypedField): + AUTO_TYPE = fields.List(fields.List(fields.String())) + + class ResourceClass(fields.Enum): ALL = consts.RESOURCE_CLASSES @@ -119,8 +127,8 @@ class PciDeviceType(BaseZunEnum): class PciDeviceTypeField(fields.BaseEnumField): - AUTO_TYPE = PciDeviceType() + AUTO_TYPE = PciDeviceType() class PciDeviceStatusField(fields.BaseEnumField): - AUTO_TYPE = PciDeviceStatus() + AUTO_TYPE = PciDeviceStatus() diff --git a/zun/objects/request_group.py b/zun/objects/request_group.py new file mode 100644 index 000000000..6c8299b3c --- /dev/null +++ b/zun/objects/request_group.py @@ -0,0 +1,48 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_versionedobjects import fields + +from zun.objects import base +from zun.objects import fields as zun_fields + + +@base.ZunObjectRegistry.register +class RequestGroup(base.ZunPersistentObject, base.ZunObject): + # Version 1.0: Initial version + VERSION = '1.0' + + fields = { + 'use_same_provider': fields.BooleanField(default=True), + 'resources': fields.DictOfIntegersField(default={}), + 'required_traits': zun_fields.SetOfStringsField(default=set()), + 'forbidden_traits': zun_fields.SetOfStringsField(default=set()), + # The aggregates field has a form of + # [[aggregate_UUID1], + # [aggregate_UUID2, aggregate_UUID3]] + # meaning that the request should be fulfilled from an RP that is a + # member of the aggregate aggregate_UUID1 and member of the aggregate + # aggregate_UUID2 or aggregate_UUID3 . + 'aggregates': zun_fields.ListOfListsOfStringsField(default=[]), + # The entity the request is coming from (e.g. the Neutron port uuid) + # which may not always be a UUID. + 'requester_id': fields.StringField(nullable=True, default=None), + # The resource provider UUIDs that together fulfill the request + # NOTE(gibi): this can be more than one if this is the unnumbered + # request group (i.e. use_same_provider=False) + 'provider_uuids': fields.ListOfUUIDField(default=[]), + 'in_tree': fields.UUIDField(nullable=True, default=None), + } + + def __init__(self, context=None, **kwargs): + super(RequestGroup, self).__init__(context=context, **kwargs) + self.obj_set_defaults() diff --git a/zun/scheduler/client/__init__.py b/zun/scheduler/client/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/zun/scheduler/client.py b/zun/scheduler/client/query.py similarity index 100% rename from zun/scheduler/client.py rename to zun/scheduler/client/query.py diff --git a/zun/scheduler/client/report.py b/zun/scheduler/client/report.py new file mode 100644 index 000000000..a1bb59da9 --- /dev/null +++ b/zun/scheduler/client/report.py @@ -0,0 +1,2291 @@ +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import collections +import contextlib +import copy +import functools +import random +import time + +from keystoneauth1 import exceptions as ks_exc +import os_resource_classes as orc +import os_traits +from oslo_log import log as logging +from oslo_middleware import request_id +from oslo_utils import excutils +from oslo_utils import versionutils +import retrying +import six + +from zun.common import clients +from zun.common import exception +from zun.common.i18n import _ +from zun.compute import provider_tree +import zun.conf +from zun import objects + + +CONF = zun.conf.CONF +LOG = logging.getLogger(__name__) +WARN_EVERY = 10 +RESHAPER_VERSION = '1.30' +CONSUMER_GENERATION_VERSION = '1.28' +INTREE_AC_VERSION = '1.31' +ALLOW_RESERVED_EQUAL_TOTAL_INVENTORY_VERSION = '1.26' +POST_RPS_RETURNS_PAYLOAD_API_VERSION = '1.20' +AGGREGATE_GENERATION_VERSION = '1.19' +NESTED_PROVIDER_API_VERSION = '1.14' +POST_ALLOCATIONS_API_VERSION = '1.13' +GET_USAGES_VERSION = '1.9' + +AggInfo = collections.namedtuple('AggInfo', ['aggregates', 'generation']) +TraitInfo = collections.namedtuple('TraitInfo', ['traits', 'generation']) +ProviderAllocInfo = collections.namedtuple( + 'ProviderAllocInfo', ['allocations']) + + +def warn_limit(self, msg): + if self._warn_count: + self._warn_count -= 1 + else: + self._warn_count = WARN_EVERY + LOG.warning(msg) + + +class Retry(Exception): + def __init__(self, operation, reason): + self.operation = operation + self.reason = reason + + +def retries(f): + """Decorator to retry a call three times if it raises Retry + + Note that this returns the actual value of the inner call on success + or returns False if all the retries fail. + """ + @functools.wraps(f) + def wrapper(self, *a, **k): + for retry in range(0, 4): + try: + sleep_time = random.uniform(0, retry * 2) + time.sleep(sleep_time) + return f(self, *a, **k) + except Retry as e: + LOG.debug( + 'Unable to %(op)s because %(reason)s; retrying...', + {'op': e.operation, 'reason': e.reason}) + LOG.error('Failed scheduler client operation %s: out of retries', + f.__name__) + return False + return wrapper + + +def _move_operation_alloc_request(source_allocs, dest_alloc_req): + """Given existing allocations for a source host and a new allocation + request for a destination host, return a new allocation_request that + contains resources claimed against both source and destination, accounting + for shared providers. + + This is expected to only be used during an evacuate operation. + + :param source_allocs: Dict, keyed by resource provider UUID, of resources + allocated on the source host + :param dest_alloc_req: The allocation_request for resources against the + destination host + """ + LOG.debug("Doubling-up allocation_request for move operation. Current " + "allocations: %s", source_allocs) + # Remove any allocations against resource providers that are + # already allocated against on the source host (like shared storage + # providers) + cur_rp_uuids = set(source_allocs.keys()) + new_rp_uuids = set(dest_alloc_req['allocations']) - cur_rp_uuids + + current_allocs = { + cur_rp_uuid: {'resources': alloc['resources']} + for cur_rp_uuid, alloc in source_allocs.items() + } + new_alloc_req = {'allocations': current_allocs} + for rp_uuid in dest_alloc_req['allocations']: + if rp_uuid in new_rp_uuids: + new_alloc_req['allocations'][rp_uuid] = dest_alloc_req[ + 'allocations'][rp_uuid] + + LOG.debug("New allocation_request containing both source and " + "destination hosts in move operation: %s", new_alloc_req) + return new_alloc_req + + +def get_placement_request_id(response): + if response is not None: + return response.headers.get(request_id.HTTP_RESP_HEADER_REQUEST_ID) + + +# TODO(mriedem): Consider making SchedulerReportClient a global singleton so +# that things like the compute API do not have to lazy-load it. That would +# likely require inspecting methods that use a ProviderTree cache to see if +# they need locks. +class SchedulerReportClient(object): + """Client class for updating the scheduler.""" + + def __init__(self, context=None, adapter=None): + """Initialize the report client. + + :param context: Security context + :param adapter: A prepared keystoneauth1 Adapter for API communication. + If unspecified, one is created based on config options in the + [placement_client] section. + """ + self._context = context + # An object that contains a zun-compute-side cache of resource + # provider and inventory information + self._provider_tree = None + # Track the last time we updated providers' aggregates and traits + self._association_refresh_time = None + self._client, self._ks_filter = self._create_client() + # NOTE(danms): Keep track of how naggy we've been + self._warn_count = 0 + + def clear_provider_cache(self, init=False): + if not init: + LOG.info("Clearing the report client's provider cache.") + self._provider_tree = provider_tree.ProviderTree() + self._association_refresh_time = {} + + def _clear_provider_cache_for_tree(self, rp_uuid): + """Clear the provider cache for only the tree containing rp_uuid. + + This exists for situations where we encounter an error updating + placement, and therefore need to refresh the provider tree cache before + redriving the update. However, it would be wasteful and inefficient to + clear the *entire* cache, which may contain many separate trees (e.g. + nova/ironic nodes or sharing providers) which should be unaffected by + the error. + + :param rp_uuid: UUID of a resource provider, which may be anywhere in a + a tree hierarchy, i.e. need not be a root. For non-root + providers, we still clear the cache for the entire tree + including descendants, ancestors up to the root, + siblings/cousins and *their* ancestors/descendants. + """ + try: + uuids = self._provider_tree.get_provider_uuids_in_tree(rp_uuid) + except ValueError: + # If the provider isn't in the tree, it should also not be in the + # timer dict, so nothing to clear. + return + + # get_provider_uuids_in_tree returns UUIDs in top-down order, so the + # first one is the root; and .remove() is recursive. + self._provider_tree.remove(uuids[0]) + for uuid in uuids: + self._association_refresh_time.pop(uuid, None) + + def _create_client(self): + """Create the HTTP session accessing the placement service.""" + # Flush provider tree and associations so we start from a clean slate. + self.clear_provider_cache(init=True) + client, ks_filter = clients.OpenStackClients(self._context).placement() + + # Set accept header on every request to ensure we notify placement + # service of our response body media type preferences. + client.additional_headers = {'accept': 'application/json'} + return client, ks_filter + + def get(self, url, version=None, global_request_id=None): + headers = ({request_id.INBOUND_HEADER: global_request_id} + if global_request_id else {}) + return self._client.get(url, endpoint_filter=self._ks_filter, + microversion=version, headers=headers) + + def post(self, url, data, version=None, global_request_id=None): + headers = ({request_id.INBOUND_HEADER: global_request_id} + if global_request_id else {}) + # NOTE(sdague): using json= instead of data= sets the + # media type to application/json for us. Placement API is + # more sensitive to this than other APIs in the OpenStack + # ecosystem. + return self._client.post(url, endpoint_filter=self._ks_filter, + json=data, microversion=version, + headers=headers) + + def put(self, url, data, version=None, global_request_id=None): + # NOTE(sdague): using json= instead of data= sets the + # media type to application/json for us. Placement API is + # more sensitive to this than other APIs in the OpenStack + # ecosystem. + kwargs = {'microversion': version, + 'endpoint_filter': self._ks_filter, + 'headers': {request_id.INBOUND_HEADER: + global_request_id} if global_request_id else {}} + if data is not None: + kwargs['json'] = data + return self._client.put(url, **kwargs) + + def delete(self, url, version=None, global_request_id=None): + headers = ({request_id.INBOUND_HEADER: global_request_id} + if global_request_id else {}) + return self._client.delete(url, endpoint_filter=self._ks_filter, + microversion=version, headers=headers) + + def get_allocation_candidates(self, context, resources): + """Returns a tuple of (allocation_requests, provider_summaries, + allocation_request_version). + + The allocation_requests are a collection of potential JSON objects that + can be passed to the PUT /allocations/{consumer_uuid} Placement REST + API to claim resources against one or more resource providers that meet + the requested resource constraints. + + The provider summaries is a dict, keyed by resource provider UUID, of + inventory and capacity information and traits for any resource + provider involved in the allocation_requests. + + :returns: A tuple with a list of allocation_request dicts, a dict of + provider information, and the microversion used to request + this data from placement, or (None, None, None) if the + request failed + + :param context: The security context + :param zun.scheduler.utils.ResourceRequest resources: + A ResourceRequest object representing the requested resources, + traits, and aggregates from the request spec. + + Example member_of (aggregates) value in resources: + + [('foo', 'bar'), ('baz',)] + + translates to: + + "Candidates are in either 'foo' or 'bar', but definitely in 'baz'" + + """ + # Note that claim_resources() will use this version as well to + # make allocations by `PUT /allocations/{consumer_uuid}` + version = INTREE_AC_VERSION + qparams = resources.to_querystring() + url = "/allocation_candidates?%s" % qparams + resp = self.get(url, version=version, + global_request_id=context.global_id) + if resp.status_code == 200: + data = resp.json() + return (data['allocation_requests'], data['provider_summaries'], + version) + + args = { + 'resource_request': str(resources), + 'status_code': resp.status_code, + 'err_text': resp.text, + } + msg = ("Failed to retrieve allocation candidates from placement " + "API for filters: %(resource_request)s\n" + "Got %(status_code)d: %(err_text)s.") + LOG.error(msg, args) + return None, None, None + + def _get_provider_aggregates(self, context, rp_uuid): + """Queries the placement API for a resource provider's aggregates. + + :param rp_uuid: UUID of the resource provider to grab aggregates for. + :return: A namedtuple comprising: + * .aggregates: A set() of string aggregate UUIDs, which may + be empty if the specified provider is associated with no + aggregates. + * .generation: The resource provider generation. + :raise: ResourceProviderAggregateRetrievalFailed on errors. In + particular, we raise this exception (as opposed to returning + None or the empty set()) if the specified resource provider + does not exist. + """ + resp = self.get("/resource_providers/%s/aggregates" % rp_uuid, + version=AGGREGATE_GENERATION_VERSION, + global_request_id=context.global_id) + if resp.status_code == 200: + data = resp.json() + return AggInfo(aggregates=set(data['aggregates']), + generation=data['resource_provider_generation']) + + placement_req_id = get_placement_request_id(resp) + msg = ("[%(placement_req_id)s] Failed to retrieve aggregates from " + "placement API for resource provider with UUID %(uuid)s. " + "Got %(status_code)d: %(err_text)s.") + args = { + 'placement_req_id': placement_req_id, + 'uuid': rp_uuid, + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + raise exception.ResourceProviderAggregateRetrievalFailed(uuid=rp_uuid) + + def get_provider_traits(self, context, rp_uuid): + """Queries the placement API for a resource provider's traits. + + :param context: The security context + :param rp_uuid: UUID of the resource provider to grab traits for. + :return: A namedtuple comprising: + * .traits: A set() of string trait names, which may be + empty if the specified provider has no traits. + * .generation: The resource provider generation. + :raise: ResourceProviderTraitRetrievalFailed on errors. In particular, + we raise this exception (as opposed to returning None or the + empty set()) if the specified resource provider does not exist. + :raise: keystoneauth1.exceptions.ClientException if placement API + communication fails. + """ + resp = self.get("/resource_providers/%s/traits" % rp_uuid, + version='1.6', global_request_id=context.global_id) + + if resp.status_code == 200: + json = resp.json() + return TraitInfo(traits=set(json['traits']), + generation=json['resource_provider_generation']) + + placement_req_id = get_placement_request_id(resp) + LOG.error( + "[%(placement_req_id)s] Failed to retrieve traits from " + "placement API for resource provider with UUID %(uuid)s. Got " + "%(status_code)d: %(err_text)s.", + {'placement_req_id': placement_req_id, 'uuid': rp_uuid, + 'status_code': resp.status_code, 'err_text': resp.text}) + raise exception.ResourceProviderTraitRetrievalFailed(uuid=rp_uuid) + + def get_resource_provider_name(self, context, uuid): + """Return the name of a RP. It tries to use the internal of RPs or + falls back to calling placement directly. + + :param context: The security context + :param uuid: UUID identifier for the resource provider to look up + :return: The name of the RP + :raise: ResourceProviderRetrievalFailed if the RP is not in the cache + and the communication with the placement is failed. + :raise: ResourceProviderNotFound if the RP does not exists. + """ + + try: + return self._provider_tree.data(uuid).name + except ValueError: + rsp = self._get_resource_provider(context, uuid) + if rsp is None: + raise exception.ResourceProviderNotFound(name_or_uuid=uuid) + else: + return rsp['name'] + + def _get_resource_provider(self, context, uuid): + """Queries the placement API for a resource provider record with the + supplied UUID. + + :param context: The security context + :param uuid: UUID identifier for the resource provider to look up + :return: A dict of resource provider information if found or None if no + such resource provider could be found. + :raise: ResourceProviderRetrievalFailed on error. + """ + resp = self.get("/resource_providers/%s" % uuid, + version=NESTED_PROVIDER_API_VERSION, + global_request_id=context.global_id) + if resp.status_code == 200: + data = resp.json() + return data + elif resp.status_code == 404: + return None + else: + placement_req_id = get_placement_request_id(resp) + msg = ("[%(placement_req_id)s] Failed to retrieve resource " + "provider record from placement API for UUID %(uuid)s. Got " + "%(status_code)d: %(err_text)s.") + args = { + 'uuid': uuid, + 'status_code': resp.status_code, + 'err_text': resp.text, + 'placement_req_id': placement_req_id, + } + LOG.error(msg, args) + raise exception.ResourceProviderRetrievalFailed(uuid=uuid) + + def _get_sharing_providers(self, context, agg_uuids): + """Queries the placement API for a list of the resource providers + associated with any of the specified aggregates and possessing the + MISC_SHARES_VIA_AGGREGATE trait. + + :param context: The security context + :param agg_uuids: Iterable of string UUIDs of aggregates to filter on. + :return: A list of dicts of resource provider information, which may be + empty if no provider exists with the specified UUID. + :raise: ResourceProviderRetrievalFailed on error. + """ + if not agg_uuids: + return [] + + aggs = ','.join(agg_uuids) + url = "/resource_providers?member_of=in:%s&required=%s" % ( + aggs, os_traits.MISC_SHARES_VIA_AGGREGATE) + resp = self.get(url, version='1.18', + global_request_id=context.global_id) + if resp.status_code == 200: + return resp.json()['resource_providers'] + + msg = _("[%(placement_req_id)s] Failed to retrieve sharing resource " + "providers associated with the following aggregates from " + "placement API: %(aggs)s. Got %(status_code)d: %(err_text)s.") + args = { + 'aggs': aggs, + 'status_code': resp.status_code, + 'err_text': resp.text, + 'placement_req_id': get_placement_request_id(resp), + } + LOG.error(msg, args) + raise exception.ResourceProviderRetrievalFailed(message=msg % args) + + def get_providers_in_tree(self, context, uuid): + """Queries the placement API for a list of the resource providers in + the tree associated with the specified UUID. + + :param context: The security context + :param uuid: UUID identifier for the resource provider to look up + :return: A list of dicts of resource provider information, which may be + empty if no provider exists with the specified UUID. + :raise: ResourceProviderRetrievalFailed on error. + :raise: keystoneauth1.exceptions.ClientException if placement API + communication fails. + """ + resp = self.get("/resource_providers?in_tree=%s" % uuid, + version=NESTED_PROVIDER_API_VERSION, + global_request_id=context.global_id) + + if resp.status_code == 200: + return resp.json()['resource_providers'] + + # Some unexpected error + placement_req_id = get_placement_request_id(resp) + msg = ("[%(placement_req_id)s] Failed to retrieve resource provider " + "tree from placement API for UUID %(uuid)s. Got " + "%(status_code)d: %(err_text)s.") + args = { + 'uuid': uuid, + 'status_code': resp.status_code, + 'err_text': resp.text, + 'placement_req_id': placement_req_id, + } + LOG.error(msg, args) + raise exception.ResourceProviderRetrievalFailed(uuid=uuid) + + def _create_resource_provider(self, context, uuid, name, + parent_provider_uuid=None): + """Calls the placement API to create a new resource provider record. + + :param context: The security context + :param uuid: UUID of the new resource provider + :param name: Name of the resource provider + :param parent_provider_uuid: Optional UUID of the immediate parent + :return: A dict of resource provider information object representing + the newly-created resource provider. + :raise: ResourceProviderCreationFailed or + ResourceProviderRetrievalFailed on error. + """ + url = "/resource_providers" + payload = { + 'uuid': uuid, + 'name': name, + } + if parent_provider_uuid is not None: + payload['parent_provider_uuid'] = parent_provider_uuid + + # Bug #1746075: First try the microversion that returns the new + # provider's payload. + resp = self.post(url, payload, + version=POST_RPS_RETURNS_PAYLOAD_API_VERSION, + global_request_id=context.global_id) + + placement_req_id = get_placement_request_id(resp) + + if resp: + msg = ("[%(placement_req_id)s] Created resource provider record " + "via placement API for resource provider with UUID " + "%(uuid)s and name %(name)s.") + args = { + 'uuid': uuid, + 'name': name, + 'placement_req_id': placement_req_id, + } + LOG.info(msg, args) + return resp.json() + + # TODO(efried): Push error codes from placement, and use 'em. + name_conflict = 'Conflicting resource provider name:' + if resp.status_code == 409 and name_conflict not in resp.text: + # Another thread concurrently created a resource provider with the + # same UUID. Log a warning and then just return the resource + # provider object from _get_resource_provider() + msg = ("[%(placement_req_id)s] Another thread already created a " + "resource provider with the UUID %(uuid)s. Grabbing that " + "record from the placement API.") + args = { + 'uuid': uuid, + 'placement_req_id': placement_req_id, + } + LOG.info(msg, args) + return self._get_resource_provider(context, uuid) + + # A provider with the same *name* already exists, or some other error. + msg = ("[%(placement_req_id)s] Failed to create resource provider " + "record in placement API for UUID %(uuid)s. Got " + "%(status_code)d: %(err_text)s.") + args = { + 'uuid': uuid, + 'status_code': resp.status_code, + 'err_text': resp.text, + 'placement_req_id': placement_req_id, + } + LOG.error(msg, args) + raise exception.ResourceProviderCreationFailed(name=name) + + def _ensure_resource_provider(self, context, uuid, name=None, + parent_provider_uuid=None): + """Ensures that the placement API has a record of a resource provider + with the supplied UUID. If not, creates the resource provider record in + the placement API for the supplied UUID, passing in a name for the + resource provider. + + If found or created, the provider's UUID is returned from this method. + If the resource provider for the supplied uuid was not found and the + resource provider record could not be created in the placement API, an + exception is raised. + + If this method returns successfully, callers are assured that the + placement API contains a record of the provider; and that the local + cache of resource provider information contains a record of: + - The specified provider + - All providers in its tree + - All providers associated via aggregate with all providers in said + tree + and for each of those providers: + - The UUIDs of its aggregates + - The trait strings associated with the provider + + Note that if the provider did not exist prior to this call, the above + reduces to just the specified provider as a root, with no aggregates or + traits. + + :param context: The security context + :param uuid: UUID identifier for the resource provider to ensure exists + :param name: Optional name for the resource provider if the record + does not exist. If empty, the name is set to the UUID + value + :param parent_provider_uuid: Optional UUID of the immediate parent, + which must have been previously _ensured. + :raise ResourceProviderCreationFailed: If we expected to be creating + providers, but couldn't. + :raise: keystoneauth1.exceptions.ClientException if placement API + communication fails. + """ + # NOTE(efried): We currently have no code path where we need to set the + # parent_provider_uuid on a previously-parent-less provider - so we do + # NOT handle that scenario here. + + # If we already have the root provider in the cache, and it's not + # stale, don't refresh it; and use the cache to determine the + # descendants to (soft) refresh. + # NOTE(efried): This assumes the compute service only cares about + # providers it "owns". If that ever changes, we'll need a way to find + # out about out-of-band changes here. Options that have been + # brainstormed at this time: + # - Make this condition more frequently True + # - Some kind of notification subscription so a separate thread is + # alerted when . + # - "Cascading generations" - i.e. a change to a leaf node percolates + # generation bump up the tree so that we bounce 409 the next time we + # try to update anything and have to refresh. + if (self._provider_tree.exists(uuid) and + not self._associations_stale(uuid)): + uuids_to_refresh = [ + u for u in self._provider_tree.get_provider_uuids(uuid) + if self._associations_stale(u)] + else: + # We either don't have it locally or it's stale. Pull or create it. + created_rp = None + rps_to_refresh = self.get_providers_in_tree(context, uuid) + if not rps_to_refresh: + try: + created_rp = self._create_resource_provider( + context, uuid, name or uuid, + parent_provider_uuid=parent_provider_uuid) + except (ks_exc.EndpointNotFound, + ks_exc.MissingAuthPlugin, + ks_exc.Unauthorized, + ks_exc.DiscoveryFailure, + ks_exc.ConnectFailure) as e: + warn_limit( + self, + 'cannot establish connection to placement: ' + str(e)) + # If we can't establish a connection to the + # placement service, like if placement isn't running or + # zun-compute is mis-configured for authentication, we'll + # get None back and need to treat it like we couldn't + # create the provider (because we couldn't). + raise exception.ResourceProviderCreationFailed( + name=name or uuid) + # Don't add the created_rp to rps_to_refresh. Since we just + # created it, it has no aggregates or traits. + # But do mark it as having just been "refreshed". + self._association_refresh_time[uuid] = time.time() + + self._provider_tree.populate_from_iterable( + rps_to_refresh or [created_rp]) + + uuids_to_refresh = [rp['uuid'] for rp in rps_to_refresh] + + # At this point, the whole tree exists in the local cache. + + for uuid_to_refresh in uuids_to_refresh: + self._refresh_associations(context, uuid_to_refresh, force=True) + + return uuid + + def _delete_provider(self, rp_uuid, global_request_id=None): + resp = self.delete('/resource_providers/%s' % rp_uuid, + global_request_id=global_request_id) + # Check for 404 since we don't need to warn/raise if we tried to delete + # something which doesn"t actually exist. + if resp or resp.status_code == 404: + if resp: + LOG.info("Deleted resource provider %s", rp_uuid) + # clean the caches + try: + self._provider_tree.remove(rp_uuid) + except ValueError: + pass + self._association_refresh_time.pop(rp_uuid, None) + return + + msg = ("[%(placement_req_id)s] Failed to delete resource provider " + "with UUID %(uuid)s from the placement API. Got " + "%(status_code)d: %(err_text)s.") + args = { + 'placement_req_id': get_placement_request_id(resp), + 'uuid': rp_uuid, + 'status_code': resp.status_code, + 'err_text': resp.text + } + LOG.error(msg, args) + # On conflict, the caller may wish to delete allocations and + # redrive. (Note that this is not the same as a + # PlacementAPIConflict case.) + if resp.status_code == 409: + raise exception.ResourceProviderInUse() + raise exception.ResourceProviderDeletionFailed(uuid=rp_uuid) + + def _get_inventory(self, context, rp_uuid): + url = '/resource_providers/%s/inventories' % rp_uuid + result = self.get(url, global_request_id=context.global_id) + if not result: + # TODO(efried): Log. + return None + return result.json() + + def _refresh_and_get_inventory(self, context, rp_uuid): + """Helper method that retrieves the current inventory for the supplied + resource provider according to the placement API. + + If the cached generation of the resource provider is not the same as + the generation returned from the placement API, we update the cached + generation and attempt to update inventory if any exists, otherwise + return empty inventories. + """ + curr = self._get_inventory(context, rp_uuid) + if curr is None: + return None + + LOG.debug('Updating ProviderTree inventory for provider %s from ' + '_refresh_and_get_inventory using data: %s', rp_uuid, + curr['inventories']) + self._provider_tree.update_inventory( + rp_uuid, curr['inventories'], + generation=curr['resource_provider_generation']) + + return curr + + def _refresh_associations(self, context, rp_uuid, force=False, + refresh_sharing=True): + """Refresh inventories, aggregates, traits, and (optionally) aggregate- + associated sharing providers for the specified resource provider uuid. + + Only refresh if there has been no refresh during the lifetime of + this process, CONF.compute.resource_provider_association_refresh + seconds have passed, or the force arg has been set to True. + + :param context: The security context + :param rp_uuid: UUID of the resource provider to check for fresh + inventories, aggregates, and traits + :param force: If True, force the refresh + :param refresh_sharing: If True, fetch all the providers associated + by aggregate with the specified provider, + including their inventories, traits, and + aggregates (but not *their* sharing providers). + :raise: On various placement API errors, one of: + - ResourceProviderAggregateRetrievalFailed + - ResourceProviderTraitRetrievalFailed + - ResourceProviderRetrievalFailed + :raise: keystoneauth1.exceptions.ClientException if placement API + communication fails. + """ + if force or self._associations_stale(rp_uuid): + # Refresh inventories + msg = "Refreshing inventories for resource provider %s" + LOG.debug(msg, rp_uuid) + self._refresh_and_get_inventory(context, rp_uuid) + # Refresh aggregates + agg_info = self._get_provider_aggregates(context, rp_uuid) + aggs, generation = agg_info.aggregates, agg_info.generation + msg = ("Refreshing aggregate associations for resource provider " + "%s, aggregates: %s") + LOG.debug(msg, rp_uuid, ','.join(aggs or ['None'])) + + # NOTE(efried): This will blow up if called for a RP that doesn't + # exist in our _provider_tree. + self._provider_tree.update_aggregates( + rp_uuid, aggs, generation=generation) + + # Refresh traits + trait_info = self.get_provider_traits(context, rp_uuid) + traits, generation = trait_info.traits, trait_info.generation + msg = ("Refreshing trait associations for resource provider %s, " + "traits: %s") + LOG.debug(msg, rp_uuid, ','.join(traits or ['None'])) + # NOTE(efried): This will blow up if called for a RP that doesn't + # exist in our _provider_tree. + self._provider_tree.update_traits( + rp_uuid, traits, generation=generation) + + if refresh_sharing: + # Refresh providers associated by aggregate + for rp in self._get_sharing_providers(context, aggs): + if not self._provider_tree.exists(rp['uuid']): + # NOTE(efried): Right now sharing providers are always + # treated as roots. This is deliberate. From the + # context of this compute's RP, it doesn't matter if a + # sharing RP is part of a tree. + self._provider_tree.new_root( + rp['name'], rp['uuid'], + generation=rp['generation']) + # Now we have to (populate or) refresh that provider's + # traits, aggregates, and inventories (but not *its* + # aggregate-associated providers). No need to override + # force=True for newly-added providers - the missing + # timestamp will always trigger them to refresh. + self._refresh_associations(context, rp['uuid'], + force=force, + refresh_sharing=False) + self._association_refresh_time[rp_uuid] = time.time() + + def _associations_stale(self, uuid): + """Respond True if aggregates and traits have not been refreshed + "recently". + + Associations are stale if association_refresh_time for this uuid is not + set or is more than CONF.compute.resource_provider_association_refresh + seconds ago. + + Always False if CONF.compute.resource_provider_association_refresh is + zero. + """ + rpar = CONF.compute.resource_provider_association_refresh + refresh_time = self._association_refresh_time.get(uuid, 0) + # If refresh is disabled, associations are "never" stale. (But still + # load them if we haven't yet done so.) + if rpar == 0 and refresh_time != 0: + # TODO(efried): If refresh is disabled, we could avoid touching the + # _association_refresh_time dict anywhere, but that would take some + # nontrivial refactoring. + return False + return (time.time() - refresh_time) > rpar + + def get_provider_tree_and_ensure_root(self, context, rp_uuid, name=None, + parent_provider_uuid=None): + """Returns a fresh ProviderTree representing all providers which are in + the same tree or in the same aggregate as the specified provider, + including their aggregates, traits, and inventories. + + If the specified provider does not exist, it is created with the + specified UUID, name, and parent provider (which *must* already exist). + + :param context: The security context + :param rp_uuid: UUID of the resource provider for which to populate the + tree. (This doesn't need to be the UUID of the root.) + :param name: Optional name for the resource provider if the record + does not exist. If empty, the name is set to the UUID + value + :param parent_provider_uuid: Optional UUID of the immediate parent, + which must have been previously _ensured. + :return: A new ProviderTree object. + """ + # TODO(efried): We would like to have the caller handle create-and/or- + # cache-if-not-already, but the resource tracker is currently + # structured to handle initialization and update in a single path. At + # some point this should be refactored, and this method can *just* + # return a deep copy of the local _provider_tree cache. + # (Re)populate the local ProviderTree + self._ensure_resource_provider( + context, rp_uuid, name=name, + parent_provider_uuid=parent_provider_uuid) + # Return a *copy* of the tree. + return copy.deepcopy(self._provider_tree) + + def set_inventory_for_provider(self, context, rp_uuid, inv_data): + """Given the UUID of a provider, set the inventory records for the + provider to the supplied dict of resources. + + The provider must exist - this method does not attempt to create it. + + :param context: The security context + :param rp_uuid: The UUID of the provider whose inventory is to be + updated. + :param inv_data: Dict, keyed by resource class name, of inventory data + to set for the provider. Use None or the empty dict + to remove all inventory for the provider. + :raises: InventoryInUse if inv_data indicates removal of inventory in a + resource class which has active allocations for this provider. + :raises: InvalidResourceClass if inv_data contains a resource class + which cannot be created. + :raises: ResourceProviderUpdateConflict if the provider's generation + doesn't match the generation in the cache. Callers may choose + to retrieve the provider and its associations afresh and + redrive this operation. + :raises: ResourceProviderUpdateFailed on any other placement API + failure. + """ + def do_put(url, payload): + # NOTE(vdrok): in microversion 1.26 it is allowed to have inventory + # records with reserved value equal to total + return self.put( + url, payload, global_request_id=context.global_id, + version=ALLOW_RESERVED_EQUAL_TOTAL_INVENTORY_VERSION) + + # If not different from what we've got, short out + if not self._provider_tree.has_inventory_changed(rp_uuid, inv_data): + LOG.debug('Inventory has not changed for provider %s based ' + 'on inventory data: %s', rp_uuid, inv_data) + return + + # Ensure non-standard resource classes exist, creating them if needed. + self._ensure_resource_classes(context, set(inv_data)) + + url = '/resource_providers/%s/inventories' % rp_uuid + inv_data = inv_data or {} + generation = self._provider_tree.data(rp_uuid).generation + payload = { + 'resource_provider_generation': generation, + 'inventories': inv_data, + } + resp = do_put(url, payload) + + if resp.status_code == 200: + LOG.debug('Updated inventory for provider %s with generation %s ' + 'in Placement from set_inventory_for_provider using ' + 'data: %s', rp_uuid, generation, inv_data) + json = resp.json() + self._provider_tree.update_inventory( + rp_uuid, json['inventories'], + generation=json['resource_provider_generation']) + return + + # Some error occurred; log it + msg = ("[%(placement_req_id)s] Failed to update inventory to " + "[%(inv_data)s] for resource provider with UUID %(uuid)s. Got " + "%(status_code)d: %(err_text)s") + args = { + 'placement_req_id': get_placement_request_id(resp), + 'uuid': rp_uuid, + 'inv_data': str(inv_data), + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + + if resp.status_code == 409: + # If a conflict attempting to remove inventory in a resource class + # with active allocations, raise InventoryInUse + err = resp.json()['errors'][0] + # TODO(efried): If there's ever a lib exporting symbols for error + # codes, use it. + if err['code'] == 'placement.inventory.inuse': + # The error detail includes the resource class and provider. + raise exception.InventoryInUse(err['detail']) + # Other conflicts are generation mismatch: raise conflict exception + raise exception.ResourceProviderUpdateConflict( + uuid=rp_uuid, generation=generation, error=resp.text) + + # Otherwise, raise generic exception + raise exception.ResourceProviderUpdateFailed(url=url, error=resp.text) + + def _ensure_traits(self, context, traits): + """Make sure all specified traits exist in the placement service. + + :param context: The security context + :param traits: Iterable of trait strings to ensure exist. + :raises: TraitCreationFailed if traits contains a trait that did not + exist in placement, and couldn't be created. When this + exception is raised, it is possible that *some* of the + requested traits were created. + :raises: TraitRetrievalFailed if the initial query of existing traits + was unsuccessful. In this scenario, it is guaranteed that + no traits were created. + """ + if not traits: + return + + # Query for all the requested traits. Whichever ones we *don't* get + # back, we need to create. + # NOTE(efried): We don't attempt to filter based on our local idea of + # standard traits, which may not be in sync with what the placement + # service knows. If the caller tries to ensure a nonexistent + # "standard" trait, they deserve the TraitCreationFailed exception + # they'll get. + resp = self.get('/traits?name=in:' + ','.join(traits), version='1.6', + global_request_id=context.global_id) + if resp.status_code == 200: + traits_to_create = set(traits) - set(resp.json()['traits']) + # Might be neat to have a batch create. But creating multiple + # traits will generally happen once, at initial startup, if at all. + for trait in traits_to_create: + resp = self.put('/traits/' + trait, None, version='1.6', + global_request_id=context.global_id) + if not resp: + raise exception.TraitCreationFailed(name=trait, + error=resp.text) + return + + # The initial GET failed + msg = ("[%(placement_req_id)s] Failed to retrieve the list of traits. " + "Got %(status_code)d: %(err_text)s") + args = { + 'placement_req_id': get_placement_request_id(resp), + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + raise exception.TraitRetrievalFailed(error=resp.text) + + def set_traits_for_provider(self, context, rp_uuid, traits): + """Replace a provider's traits with those specified. + + The provider must exist - this method does not attempt to create it. + + :param context: The security context + :param rp_uuid: The UUID of the provider whose traits are to be updated + :param traits: Iterable of traits to set on the provider + :raises: ResourceProviderUpdateConflict if the provider's generation + doesn't match the generation in the cache. Callers may choose + to retrieve the provider and its associations afresh and + redrive this operation. + :raises: ResourceProviderUpdateFailed on any other placement API + failure. + :raises: TraitCreationFailed if traits contains a trait that did not + exist in placement, and couldn't be created. + :raises: TraitRetrievalFailed if the initial query of existing traits + was unsuccessful. + """ + # If not different from what we've got, short out + if not self._provider_tree.have_traits_changed(rp_uuid, traits): + return + + self._ensure_traits(context, traits) + + url = '/resource_providers/%s/traits' % rp_uuid + # NOTE(efried): Don't use the DELETE API when traits is empty, because + # that method doesn't return content, and we need to update the cached + # provider tree with the new generation. + traits = list(traits) if traits else [] + generation = self._provider_tree.data(rp_uuid).generation + payload = { + 'resource_provider_generation': generation, + 'traits': traits, + } + resp = self.put(url, payload, version='1.6', + global_request_id=context.global_id) + + if resp.status_code == 200: + json = resp.json() + self._provider_tree.update_traits( + rp_uuid, json['traits'], + generation=json['resource_provider_generation']) + return + + # Some error occurred; log it + msg = ("[%(placement_req_id)s] Failed to update traits to " + "[%(traits)s] for resource provider with UUID %(uuid)s. Got " + "%(status_code)d: %(err_text)s") + args = { + 'placement_req_id': get_placement_request_id(resp), + 'uuid': rp_uuid, + 'traits': ','.join(traits), + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + + # If a conflict, raise special conflict exception + if resp.status_code == 409: + raise exception.ResourceProviderUpdateConflict( + uuid=rp_uuid, generation=generation, error=resp.text) + + # Otherwise, raise generic exception + raise exception.ResourceProviderUpdateFailed(url=url, error=resp.text) + + def set_aggregates_for_provider(self, context, rp_uuid, aggregates, + use_cache=True, generation=None): + """Replace a provider's aggregates with those specified. + + The provider must exist - this method does not attempt to create it. + + :param context: The security context + :param rp_uuid: The UUID of the provider whose aggregates are to be + updated. + :param aggregates: Iterable of aggregates to set on the provider. + :param use_cache: If False, indicates not to update the cache of + resource providers. + :param generation: Resource provider generation. Required if use_cache + is False. + :raises: ResourceProviderUpdateConflict if the provider's generation + doesn't match the generation in the cache. Callers may choose + to retrieve the provider and its associations afresh and + redrive this operation. + :raises: ResourceProviderUpdateFailed on any other placement API + failure. + """ + # If a generation is specified, it trumps whatever's in the cache. + # Otherwise... + if generation is None: + if use_cache: + generation = self._provider_tree.data(rp_uuid).generation + else: + # Either cache or generation is required + raise ValueError( + _("generation is required with use_cache=False")) + + # Check whether aggregates need updating. We can only do this if we + # have a cache entry with a matching generation. + try: + if (self._provider_tree.data(rp_uuid).generation == generation and + not self._provider_tree.have_aggregates_changed( + rp_uuid, aggregates)): + return + except ValueError: + # Not found in the cache; proceed + pass + + url = '/resource_providers/%s/aggregates' % rp_uuid + aggregates = list(aggregates) if aggregates else [] + payload = {'aggregates': aggregates, + 'resource_provider_generation': generation} + resp = self.put(url, payload, version=AGGREGATE_GENERATION_VERSION, + global_request_id=context.global_id) + + if resp.status_code == 200: + # Try to update the cache regardless. If use_cache=False, ignore + # any failures. + try: + data = resp.json() + self._provider_tree.update_aggregates( + rp_uuid, data['aggregates'], + generation=data['resource_provider_generation']) + except ValueError: + if use_cache: + # The entry should've been there + raise + return + + # Some error occurred; log it + msg = ("[%(placement_req_id)s] Failed to update aggregates to " + "[%(aggs)s] for resource provider with UUID %(uuid)s. Got " + "%(status_code)d: %(err_text)s") + args = { + 'placement_req_id': get_placement_request_id(resp), + 'uuid': rp_uuid, + 'aggs': ','.join(aggregates), + 'status_code': resp.status_code, + 'err_text': resp.text, + } + + # If a conflict, invalidate the cache and raise special exception + if resp.status_code == 409: + # No reason to condition cache invalidation on use_cache - if we + # got a 409, the cache entry is still bogus if it exists; and the + # below is a no-op if it doesn't. + try: + self._provider_tree.remove(rp_uuid) + except ValueError: + pass + self._association_refresh_time.pop(rp_uuid, None) + + LOG.warning(msg, args) + raise exception.ResourceProviderUpdateConflict( + uuid=rp_uuid, generation=generation, error=resp.text) + + # Otherwise, raise generic exception + LOG.error(msg, args) + raise exception.ResourceProviderUpdateFailed(url=url, error=resp.text) + + def _ensure_resource_classes(self, context, names): + """Make sure resource classes exist. + + :param context: The security context + :param names: Iterable of string names of the resource classes to + check/create. Must not be None. + :raises: exception.InvalidResourceClass if an attempt is made to create + an invalid resource class. + """ + # Placement API version that supports PUT /resource_classes/CUSTOM_* + # to create (or validate the existence of) a consumer-specified + # resource class. + version = '1.7' + to_ensure = set(n for n in names + if n.startswith(orc.CUSTOM_NAMESPACE)) + + for name in to_ensure: + # no payload on the put request + resp = self.put( + "/resource_classes/%s" % name, None, version=version, + global_request_id=context.global_id) + if not resp: + msg = ("Failed to ensure resource class record with placement " + "API for resource class %(rc_name)s. Got " + "%(status_code)d: %(err_text)s.") + args = { + 'rc_name': name, + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + raise exception.InvalidResourceClass(resource_class=name) + + def _reshape(self, context, inventories, allocations): + """Perform atomic inventory & allocation data migration. + + :param context: The security context + :param inventories: A dict, keyed by resource provider UUID, of: + { "inventories": { inventory dicts, keyed by resource class }, + "resource_provider_generation": $RP_GEN } + :param allocations: A dict, keyed by consumer UUID, of: + { "project_id": $PROJ_ID, + "user_id": $USER_ID, + "consumer_generation": $CONSUMER_GEN, + "allocations": { + $RP_UUID: { + "resources": { $RC: $AMOUNT, ... } + }, + ... + } + } + :return: The Response object representing a successful API call. + :raises: ReshapeFailed if the POST /reshaper request fails. + :raises: keystoneauth1.exceptions.ClientException if placement API + communication fails. + """ + # We have to make sure any new resource classes exist + for invs in inventories.values(): + self._ensure_resource_classes(context, list(invs['inventories'])) + payload = {"inventories": inventories, "allocations": allocations} + resp = self.post('/reshaper', payload, version=RESHAPER_VERSION, + global_request_id=context.global_id) + if not resp: + raise exception.ReshapeFailed(error=resp.text) + + return resp + + def _set_up_and_do_reshape(self, context, old_tree, new_tree, allocations): + LOG.info("Performing resource provider inventory and allocation " + "data migration.") + new_uuids = new_tree.get_provider_uuids() + inventories = {} + for rp_uuid in new_uuids: + data = new_tree.data(rp_uuid) + inventories[rp_uuid] = { + "inventories": data.inventory, + "resource_provider_generation": data.generation + } + # Even though we're going to delete them immediately, we still want + # to send "inventory changes" for to-be-removed providers in this + # reshape request so they're done atomically. This prevents races + # where the scheduler could allocate between here and when we + # delete the providers. + to_remove = set(old_tree.get_provider_uuids()) - set(new_uuids) + for rp_uuid in to_remove: + inventories[rp_uuid] = { + "inventories": {}, + "resource_provider_generation": + old_tree.data(rp_uuid).generation + } + # Now we're ready to POST /reshaper. This can raise ReshapeFailed, + # but we also need to convert any other exception (including e.g. + # PlacementAPIConnectFailure) to ReshapeFailed because we want any + # failure here to be fatal to the caller. + try: + self._reshape(context, inventories, allocations) + except exception.ReshapeFailed: + raise + except Exception as e: + # Make sure the original stack trace gets logged. + LOG.exception('Reshape failed') + raise exception.ReshapeFailed(error=e) + + def update_from_provider_tree(self, context, new_tree, allocations=None): + """Flush changes from a specified ProviderTree back to placement. + + The specified ProviderTree is compared against the local cache. Any + changes are flushed back to the placement service. Upon successful + completion, the local cache should reflect the specified ProviderTree. + + This method is best-effort and not atomic. When exceptions are raised, + it is possible that some of the changes have been flushed back, leaving + the placement database in an inconsistent state. This should be + recoverable through subsequent calls. + + :param context: The security context + :param new_tree: A ProviderTree instance representing the desired state + of providers in placement. + :param allocations: A dict, keyed by consumer UUID, of allocation + records of the form returned by + GET /allocations/{consumer_uuid} representing the + comprehensive final picture of the allocations for + each consumer therein. A value of None indicates + that no reshape is being performed. + :raises: ResourceProviderUpdateConflict if a generation conflict was + encountered - i.e. we are attempting to update placement based + on a stale view of it. + :raises: ResourceProviderSyncFailed if any errors were encountered + attempting to perform the necessary API operations, except + reshape (see below). + :raises: ReshapeFailed if a reshape was signaled (allocations not None) + and it fails for any reason. + """ + # NOTE(efried): We currently do not handle the "rename" case. This is + # where new_tree contains a provider named Y whose UUID already exists + # but is named X. + + @contextlib.contextmanager + def catch_all(rp_uuid): + """Convert all "expected" exceptions from placement API helpers to + ResourceProviderSyncFailed* and invalidate the caches for the tree + around `rp_uuid`. + + * Except ResourceProviderUpdateConflict, which signals the caller + to redrive the operation; and ReshapeFailed, which triggers + special error handling behavior in the resource tracker and + compute manager. + """ + # TODO(efried): Make a base exception class from which all these + # can inherit. + helper_exceptions = ( + exception.InvalidResourceClass, + exception.InventoryInUse, + exception.ResourceProviderAggregateRetrievalFailed, + exception.ResourceProviderDeletionFailed, + exception.ResourceProviderInUse, + exception.ResourceProviderRetrievalFailed, + exception.ResourceProviderTraitRetrievalFailed, + exception.ResourceProviderUpdateFailed, + exception.TraitCreationFailed, + exception.TraitRetrievalFailed, + # NOTE(efried): We do not trap/convert ReshapeFailed - that one + # needs to bubble up right away and be handled specially. + ) + try: + yield + except exception.ResourceProviderUpdateConflict: + # Invalidate the tree around the failing provider and reraise + # the conflict exception. This signals the resource tracker to + # redrive the update right away rather than waiting until the + # next periodic. + with excutils.save_and_reraise_exception(): + self._clear_provider_cache_for_tree(rp_uuid) + except helper_exceptions: + # Invalidate the relevant part of the cache. It gets rebuilt on + # the next pass. + self._clear_provider_cache_for_tree(rp_uuid) + raise exception.ResourceProviderSyncFailed() + + # Helper methods herein will be updating the local cache (this is + # intentional) so we need to grab up front any data we need to operate + # on in its "original" form. + old_tree = self._provider_tree + old_uuids = old_tree.get_provider_uuids() + new_uuids = new_tree.get_provider_uuids() + uuids_to_add = set(new_uuids) - set(old_uuids) + uuids_to_remove = set(old_uuids) - set(new_uuids) + + # In case a reshape is happening, we first have to create (or load) any + # "new" providers. + # We have to do additions in top-down order, so we don't error + # attempting to create a child before its parent exists. + for uuid in new_uuids: + if uuid not in uuids_to_add: + continue + provider = new_tree.data(uuid) + with catch_all(uuid): + self._ensure_resource_provider( + context, uuid, name=provider.name, + parent_provider_uuid=provider.parent_uuid) + # We have to stuff the freshly-created provider's generation + # into the new_tree so we don't get conflicts updating its + # inventories etc. later. + # TODO(efried): We don't have a good way to set the generation + # independently; this is a hack. + new_tree.update_inventory( + uuid, new_tree.data(uuid).inventory, + generation=self._provider_tree.data(uuid).generation) + + # If we need to reshape, do it here. + if allocations is not None: + # NOTE(efried): We do not catch_all here, because ReshapeFailed + # needs to bubble up right away and be handled specially. + self._set_up_and_do_reshape(context, old_tree, new_tree, + allocations) + # The reshape updated provider generations, so the ones we have in + # the cache are now stale. The inventory update below will short + # out, but we would still bounce with a provider generation + # conflict on the trait and aggregate updates. + for uuid in new_uuids: + # TODO(efried): GET /resource_providers?uuid=in:[list] would be + # handy here. Meanwhile, this is an already-written, if not + # obvious, way to refresh provider generations in the cache. + with catch_all(uuid): + self._refresh_and_get_inventory(context, uuid) + + # Now we can do provider deletions, because we should have moved any + # allocations off of them via reshape. + # We have to do deletions in bottom-up order, so we don't error + # attempting to delete a parent who still has children. (We get the + # UUIDs in bottom-up order by reversing old_uuids, which was given to + # us in top-down order per ProviderTree.get_provider_uuids().) + for uuid in reversed(old_uuids): + if uuid not in uuids_to_remove: + continue + with catch_all(uuid): + self._delete_provider(uuid) + + # At this point the local cache should have all the same providers as + # new_tree. Whether we added them or not, walk through and diff/flush + # inventories, traits, and aggregates as necessary. Note that, if we + # reshaped above, any inventory changes have already been done. But the + # helper methods are set up to check and short out when the relevant + # property does not differ from what's in the cache. + # If we encounter any error and remove a provider from the cache, all + # its descendants are also removed, and set_*_for_provider methods on + # it wouldn't be able to get started. Walking the tree in bottom-up + # order ensures we at least try to process all of the providers. (We + # get the UUIDs in bottom-up order by reversing new_uuids, which was + # given to us in top-down order per ProviderTree.get_provider_uuids().) + for uuid in reversed(new_uuids): + pd = new_tree.data(uuid) + with catch_all(pd.uuid): + self.set_inventory_for_provider( + context, pd.uuid, pd.inventory) + self.set_aggregates_for_provider( + context, pd.uuid, pd.aggregates) + self.set_traits_for_provider(context, pd.uuid, pd.traits) + + def get_allocs_for_consumer(self, context, consumer): + """Makes a GET /allocations/{consumer} call to Placement. + + :param context: The zun.context.RequestContext auth context + :param consumer: UUID of the consumer resource + :return: Dict of the form: + { "allocations": { + $RP_UUID: { + "generation": $RP_GEN, + "resources": { + $RESOURCE_CLASS: $AMOUNT + ... + }, + }, + ... + }, + "consumer_generation": $CONSUMER_GEN, + "project_id": $PROJ_ID, + "user_id": $USER_ID, + } + :raises: keystoneauth1.exceptions.base.ClientException on failure to + communicate with the placement API + :raises: ConsumerAllocationRetrievalFailed if the placement API call + fails + """ + resp = self.get('/allocations/%s' % consumer, + version=CONSUMER_GENERATION_VERSION, + global_request_id=context.global_id) + if not resp: + # TODO(efried): Use code/title/detail to make a better exception + raise exception.ConsumerAllocationRetrievalFailed( + consumer_uuid=consumer, error=resp.text) + + return resp.json() + + def get_allocations_for_consumer_by_provider(self, context, rp_uuid, + consumer): + """Return allocations for a consumer and a resource provider. + + :param context: The zun.context.RequestContext auth context + :param rp_uuid: UUID of the resource provider + :param consumer: UUID of the consumer + :return: the resources dict of the consumer's allocation keyed by + resource classes + """ + try: + # NOTE(cdent): This trims to just the allocations being + # used on this resource provider. In the future when there + # are shared resources there might be other providers. + allocations = self.get_allocations_for_consumer(context, consumer) + return allocations.get( + rp_uuid, {}).get('resources', {}) + except ks_exc.NotFound: + return {} + + # TODO(hongbin): revisit the commit below + # NOTE(jaypipes): Currently, this method is ONLY used in three places: + # 1. By the scheduler to allocate resources on the selected destination + # hosts. + # 2. By the conductor LiveMigrationTask to allocate resources on a forced + # destination host. In this case, the source node allocations have + # already been moved to the migration record so the instance should not + # have allocations and _move_operation_alloc_request will not be called. + # 3. By the conductor ComputeTaskManager to allocate resources on a forced + # destination host during evacuate. This case will call the + # _move_operation_alloc_request method. + # This method should not be called by the resource tracker. + @retries + def claim_resources(self, context, consumer_uuid, alloc_request, + project_id, user_id, allocation_request_version, + consumer_generation=None): + """Creates allocation records for the supplied container UUID against + the supplied resource providers. + + We check to see if resources have already been claimed for this + consumer. If so, we assume that a move operation is underway and the + scheduler is attempting to claim resources against the new (destination + host). In order to prevent compute nodes currently performing move + operations from being scheduled to improperly, we create a "doubled-up" + allocation that consumes resources on *both* the source and the + destination host during the move operation. + + :param context: The security context + :param consumer_uuid: The container's UUID. + :param alloc_request: The JSON body of the request to make to the + placement's PUT /allocations API + :param project_id: The project_id associated with the allocations. + :param user_id: The user_id associated with the allocations. + :param allocation_request_version: The microversion used to request the + allocations. + :param consumer_generation: The expected generation of the consumer. + None if a new consumer is expected + :returns: True if the allocations were created, False otherwise. + :raise AllocationUpdateFailed: If consumer_generation in the + alloc_request does not match with the + placement view. + """ + # Ensure we don't change the supplied alloc request since it's used in + # a loop within the scheduler against multiple container claims + ar = copy.deepcopy(alloc_request) + + url = '/allocations/%s' % consumer_uuid + + payload = ar + + # We first need to determine if this is a move operation and if so + # create the "doubled-up" allocation that exists for the duration of + # the move operation against both the source and destination hosts + r = self.get(url, global_request_id=context.global_id, + version=CONSUMER_GENERATION_VERSION) + if r.status_code == 200: + body = r.json() + current_allocs = body['allocations'] + if current_allocs: + if 'consumer_generation' not in ar: + # this is non-forced evacuation. Evacuation does not use + # the migration.uuid to hold the source host allocation + # therefore when the scheduler calls claim_resources() then + # the two allocations need to be combined. Scheduler does + # not know that this is not a new consumer as it only sees + # allocation candidates. + # Therefore we need to use the consumer generation from + # the above GET. + # If between the GET and the PUT the consumer generation + # changes in placement then we raise + # AllocationUpdateFailed. + # NOTE(gibi): This only detect a small portion of possible + # cases when allocation is modified outside of the this + # code path. The rest can only be detected if zun would + # cache at least the consumer generation of the container. + consumer_generation = body['consumer_generation'] + else: + # this is forced evacuation and the caller + # claim_resources_on_destination() provides the consumer + # generation it sees in the conductor when it generates the + # request. + consumer_generation = ar['consumer_generation'] + payload = _move_operation_alloc_request(current_allocs, ar) + + payload['project_id'] = project_id + payload['user_id'] = user_id + + if (versionutils.convert_version_to_tuple( + allocation_request_version) >= + versionutils.convert_version_to_tuple( + CONSUMER_GENERATION_VERSION)): + payload['consumer_generation'] = consumer_generation + + r = self._put_allocations( + context, + consumer_uuid, + payload, + version=allocation_request_version) + if r.status_code != 204: + err = r.json()['errors'][0] + if err['code'] == 'placement.concurrent_update': + # NOTE(jaypipes): Yes, it sucks doing string comparison like + # this but we have no error codes, only error messages. + # TODO(gibi): Use more granular error codes when available + if 'consumer generation conflict' in err['detail']: + reason = ('another process changed the consumer %s after ' + 'the report client read the consumer state ' + 'during the claim ' % consumer_uuid) + raise exception.AllocationUpdateFailed( + consumer_uuid=consumer_uuid, error=reason) + + # this is not a consumer generation conflict so it can only be + # a resource provider generation conflict. The caller does not + # provide resource provider generation so this is just a + # placement internal race. We can blindly retry locally. + reason = ('another process changed the resource providers ' + 'involved in our attempt to put allocations for ' + 'consumer %s' % consumer_uuid) + raise Retry('claim_resources', reason) + return r.status_code == 204 + + def remove_resources_from_container_allocation( + self, context, consumer_uuid, resources): + """Removes certain resources from the current allocation of the + consumer. + + :param context: the request context + :param consumer_uuid: the uuid of the consumer to update + :param resources: a dict of resources. E.g.: + { + : { + : amount + : amount + } + : { + : amount + } + } + :raises AllocationUpdateFailed: if the requested resource cannot be + removed from the current allocation (e.g. rp is missing from + the allocation) or there was multiple generation conflict and + we run out of retires. + :raises ConsumerAllocationRetrievalFailed: If the current allocation + cannot be read from placement. + :raises: keystoneauth1.exceptions.base.ClientException on failure to + communicate with the placement API + """ + + # NOTE(gibi): It is just a small wrapper to raise instead of return + # if we run out of retries. + if not self._remove_resources_from_container_allocation( + context, consumer_uuid, resources): + error_reason = _("Cannot remove resources %s from the allocation " + "due to multiple successive generation conflicts " + "in placement.") + raise exception.AllocationUpdateFailed( + consumer_uuid=consumer_uuid, + error=error_reason % resources) + + @retries + def _remove_resources_from_container_allocation( + self, context, consumer_uuid, resources): + if not resources: + # Nothing to remove so do not query or update allocation in + # placement. + # The True value is only here because the retry decorator returns + # False when runs out of retries. It would be nicer to raise in + # that case too. + return True + + current_allocs = self.get_allocs_for_consumer(context, consumer_uuid) + + if not current_allocs['allocations']: + error_reason = _("Cannot remove resources %(resources)s from " + "allocation %(allocations)s. The allocation is " + "empty.") + raise exception.AllocationUpdateFailed( + consumer_uuid=consumer_uuid, + error=error_reason % {'resources': resources, + 'allocations': current_allocs}) + + try: + for rp_uuid, resources_to_remove in resources.items(): + allocation_on_rp = current_allocs['allocations'][rp_uuid] + for rc, value in resources_to_remove.items(): + allocation_on_rp['resources'][rc] -= value + + if allocation_on_rp['resources'][rc] < 0: + error_reason = _( + "Cannot remove resources %(resources)s from " + "allocation %(allocations)s. There are not enough " + "allocated resources left on %(rp_uuid)s resource " + "provider to remove %(amount)d amount of " + "%(resource_class)s resources.") + raise exception.AllocationUpdateFailed( + consumer_uuid=consumer_uuid, + error=error_reason % { + 'resources': resources, + 'allocations': current_allocs, + 'rp_uuid': rp_uuid, + 'amount': value, + 'resource_class': rc}) + + if allocation_on_rp['resources'][rc] == 0: + # if no allocation left for this rc then remove it + # from the allocation + del allocation_on_rp['resources'][rc] + except KeyError as e: + error_reason = _("Cannot remove resources %(resources)s from " + "allocation %(allocations)s. Key %(missing_key)s " + "is missing from the allocation.") + # rp_uuid is missing from the allocation or resource class is + # missing from the allocation + raise exception.AllocationUpdateFailed( + consumer_uuid=consumer_uuid, + error=error_reason % { + 'resources': resources, + 'allocations': current_allocs, + 'missing_key': e}) + + # we have to remove the rps from the allocation that has no resources + # any more + current_allocs['allocations'] = { + rp_uuid: alloc + for rp_uuid, alloc in current_allocs['allocations'].items() + if alloc['resources']} + + r = self._put_allocations( + context, consumer_uuid, current_allocs) + + if r.status_code != 204: + err = r.json()['errors'][0] + if err['code'] == 'placement.concurrent_update': + reason = ('another process changed the resource providers or ' + 'the consumer involved in our attempt to update ' + 'allocations for consumer %s so we cannot remove ' + 'resources %s from the current allocation %s' % + (consumer_uuid, resources, current_allocs)) + # NOTE(gibi): automatic retry is meaningful if we can still + # remove the resources from the updated allocations. Retry + # works here as this function (re)queries the allocations. + raise Retry( + 'remove_resources_from_container_allocation', reason) + + # It is only here because the retry decorator returns False when runs + # out of retries. It would be nicer to raise in that case too. + return True + + def remove_provider_tree_from_container_allocation(self, context, + consumer_uuid, + root_rp_uuid): + """Removes every allocation from the consumer that is on the + specified provider tree. + + Note that this function does not try to remove allocations from sharing + providers. + + :param context: The security context + :param consumer_uuid: The UUID of the consumer to manipulate + :param root_rp_uuid: The root of the provider tree + :raises: keystoneauth1.exceptions.base.ClientException on failure to + communicate with the placement API + :raises: ConsumerAllocationRetrievalFailed if this call cannot read + the current state of the allocations from placement + :raises: ResourceProviderRetrievalFailed if it cannot collect the RPs + in the tree specified by root_rp_uuid. + """ + current_allocs = self.get_allocs_for_consumer(context, consumer_uuid) + if not current_allocs['allocations']: + LOG.error("Expected to find current allocations for %s, but " + "found none.", consumer_uuid) + # TODO(gibi): do not return False as none of the callers + # do anything with the return value except log + return False + + rps = self.get_providers_in_tree(context, root_rp_uuid) + rp_uuids = [rp['uuid'] for rp in rps] + + # go through the current allocations and remove every RP from it that + # belongs to the RP tree identified by the root_rp_uuid parameter + has_changes = False + for rp_uuid in rp_uuids: + changed = bool( + current_allocs['allocations'].pop(rp_uuid, None)) + has_changes = has_changes or changed + + # If nothing changed then don't do anything + if not has_changes: + LOG.warning( + "Expected to find allocations referencing resource " + "provider tree rooted at %s for %s, but found none.", + root_rp_uuid, consumer_uuid) + # TODO(gibi): do not return a value as none of the callers + # do anything with the return value except logging + return True + + r = self._put_allocations(context, consumer_uuid, current_allocs) + # TODO(gibi): do not return a value as none of the callers + # do anything with the return value except logging + return r.status_code == 204 + + def _put_allocations( + self, context, consumer_uuid, payload, + version=CONSUMER_GENERATION_VERSION): + url = '/allocations/%s' % consumer_uuid + r = self.put(url, payload, version=version, + global_request_id=context.global_id) + if r.status_code != 204: + LOG.warning("Failed to save allocation for %s. Got HTTP %s: %s", + consumer_uuid, r.status_code, r.text) + return r + + @retries + def move_allocations(self, context, source_consumer_uuid, + target_consumer_uuid): + """Move allocations from one consumer to the other + + Note that this call moves the current allocation from the source + consumer to the target consumer. If parallel update happens on either + consumer during this call then Placement will detect that and + this code will raise AllocationMoveFailed. If you want to move a known + piece of allocation from source to target then this function might not + be what you want as it always moves what source has in Placement. + + If the target consumer has allocations but the source consumer does + not, this method assumes the allocations were already moved and + returns True. + + :param context: The security context + :param source_consumer_uuid: the UUID of the consumer from which + allocations are moving + :param target_consumer_uuid: the UUID of the target consumer for the + allocations + :returns: True if the move was successful (or already done), + False otherwise. + :raises AllocationMoveFailed: If the source or the target consumer has + been modified while this call tries to + move allocations. + """ + source_alloc = self.get_allocs_for_consumer( + context, source_consumer_uuid) + target_alloc = self.get_allocs_for_consumer( + context, target_consumer_uuid) + + if target_alloc and target_alloc['allocations']: + # Check to see if the source allocations still exist because if + # they don't they might have already been moved to the target. + if not (source_alloc and source_alloc['allocations']): + LOG.info('Allocations not found for consumer %s; assuming ' + 'they were already moved to consumer %s', + source_consumer_uuid, target_consumer_uuid) + return True + LOG.debug('Overwriting current allocation %(allocation)s on ' + 'consumer %(consumer)s', + {'allocation': target_alloc, + 'consumer': target_consumer_uuid}) + + new_allocs = { + source_consumer_uuid: { + # 'allocations': {} means we are removing the allocation from + # the source consumer + 'allocations': {}, + 'project_id': source_alloc['project_id'], + 'user_id': source_alloc['user_id'], + 'consumer_generation': source_alloc['consumer_generation']}, + target_consumer_uuid: { + 'allocations': source_alloc['allocations'], + # NOTE(gibi): Is there any case when we need to keep the + # project_id and user_id of the target allocation that we are + # about to overwrite? + 'project_id': source_alloc['project_id'], + 'user_id': source_alloc['user_id'], + 'consumer_generation': target_alloc.get('consumer_generation') + } + } + r = self.post('/allocations', new_allocs, + version=CONSUMER_GENERATION_VERSION, + global_request_id=context.global_id) + if r.status_code != 204: + err = r.json()['errors'][0] + if err['code'] == 'placement.concurrent_update': + # NOTE(jaypipes): Yes, it sucks doing string comparison like + # this but we have no error codes, only error messages. + # TODO(gibi): Use more granular error codes when available + if 'consumer generation conflict' in err['detail']: + raise exception.AllocationMoveFailed( + source_consumer=source_consumer_uuid, + target_consumer=target_consumer_uuid, + error=r.text) + + reason = ('another process changed the resource providers ' + 'involved in our attempt to post allocations for ' + 'consumer %s' % target_consumer_uuid) + raise Retry('move_allocations', reason) + else: + LOG.warning( + 'Unable to post allocations for consumer ' + '%(uuid)s (%(code)i %(text)s)', + {'uuid': target_consumer_uuid, + 'code': r.status_code, + 'text': r.text}) + return r.status_code == 204 + + @retries + def put_allocations(self, context, consumer_uuid, payload): + """Creates allocation records for the supplied consumer UUID based on + the provided allocation dict + + :param context: The security context + :param consumer_uuid: The container's UUID. + :param payload: Dict in the format expected by the placement + PUT /allocations/{consumer_uuid} API + :returns: True if the allocations were created, False otherwise. + :raises: Retry if the operation should be retried due to a concurrent + resource provider update. + :raises: AllocationUpdateFailed if placement returns a consumer + generation conflict + """ + + r = self._put_allocations(context, consumer_uuid, payload) + if r.status_code != 204: + err = r.json()['errors'][0] + # NOTE(jaypipes): Yes, it sucks doing string comparison like this + # but we have no error codes, only error messages. + # TODO(gibi): Use more granular error codes when available + if err['code'] == 'placement.concurrent_update': + if 'consumer generation conflict' in err['detail']: + raise exception.AllocationUpdateFailed( + consumer_uuid=consumer_uuid, error=err['detail']) + # this is not a consumer generation conflict so it can only be + # a resource provider generation conflict. The caller does not + # provide resource provider generation so this is just a + # placement internal race. We can blindly retry locally. + reason = ('another process changed the resource providers ' + 'involved in our attempt to put allocations for ' + 'consumer %s' % consumer_uuid) + raise Retry('put_allocations', reason) + return r.status_code == 204 + + def delete_allocation_for_container(self, context, uuid, + consumer_type='container'): + """Delete the container allocation from placement + + :param context: The security context + :param uuid: the container or migration UUID which will be used + as the consumer UUID towards placement + :param consumer_type: The type of the consumer specified by uuid. + 'container' or 'migration' (Default: container) + :return: Returns True if the allocation is successfully deleted by this + call. Returns False if the allocation does not exist. + :raises AllocationDeleteFailed: If the allocation cannot be read from + placement or it is changed by another process while we tried to + delete it. + """ + url = '/allocations/%s' % uuid + # We read the consumer generation then try to put an empty allocation + # for that consumer. If between the GET and the PUT the consumer + # generation changes then we raise AllocationDeleteFailed. + # NOTE(gibi): This only detect a small portion of possible cases when + # allocation is modified outside of the delete code path. The rest can + # only be detected if zun would cache at least the consumer generation + # of the container. + # NOTE(gibi): placement does not return 404 for non-existing consumer + # but returns an empty consumer instead. Putting an empty allocation to + # that non-existing consumer won't be 404 or other error either. + r = self.get(url, global_request_id=context.global_id, + version=CONSUMER_GENERATION_VERSION) + if not r: + # at the moment there is no way placement returns a failure so we + # could even delete this code + LOG.warning('Unable to delete allocation for %(consumer_type)s ' + '%(uuid)s: (%(code)i %(text)s)', + {'consumer_type': consumer_type, + 'uuid': uuid, + 'code': r.status_code, + 'text': r.text}) + raise exception.AllocationDeleteFailed(consumer_uuid=uuid, + error=r.text) + allocations = r.json() + if allocations['allocations'] == {}: + # the consumer did not exist in the first place + LOG.debug('Cannot delete allocation for %s consumer in placement ' + 'as consumer does not exists', uuid) + return False + + # removing all resources from the allocation will auto delete the + # consumer in placement + allocations['allocations'] = {} + r = self.put(url, allocations, global_request_id=context.global_id, + version=CONSUMER_GENERATION_VERSION) + if r.status_code == 204: + LOG.info('Deleted allocation for %(consumer_type)s %(uuid)s', + {'consumer_type': consumer_type, + 'uuid': uuid}) + return True + else: + LOG.warning('Unable to delete allocation for %(consumer_type)s ' + '%(uuid)s: (%(code)i %(text)s)', + {'consumer_type': consumer_type, + 'uuid': uuid, + 'code': r.status_code, + 'text': r.text}) + raise exception.AllocationDeleteFailed(consumer_uuid=uuid, + error=r.text) + + def get_allocations_for_resource_provider(self, context, rp_uuid): + """Retrieves the allocations for a specific provider. + + :param context: The zun.context.RequestContext auth context + :param rp_uuid: The UUID of the provider. + :return: ProviderAllocInfo namedtuple. + :raises: keystoneauth1.exceptions.base.ClientException on failure to + communicate with the placement API + :raises: ResourceProviderAllocationRetrievalFailed if the placement API + call fails. + """ + url = '/resource_providers/%s/allocations' % rp_uuid + resp = self.get(url, global_request_id=context.global_id) + if not resp: + raise exception.ResourceProviderAllocationRetrievalFailed( + rp_uuid=rp_uuid, error=resp.text) + + data = resp.json() + return ProviderAllocInfo(allocations=data['allocations']) + + def get_allocations_for_provider_tree(self, context, nodename): + """Retrieve allocation records associated with all providers in the + provider tree. + + This method uses the cache exclusively to discover providers. The + caller must ensure that the cache is populated. + + This method is (and should remain) used exclusively in the reshaper + flow by the resource tracker. + + Note that, in addition to allocations on providers in this compute + node's provider tree, this method will return allocations on sharing + providers if those allocations are associated with a consumer on this + compute node. This is intentional and desirable. But it may also return + allocations belonging to other hosts, e.g. if this is happening in the + middle of an evacuate. ComputeDriver.update_provider_tree is supposed + to ignore such allocations if they appear. + + :param context: The security context + :param nodename: The name of a node for whose tree we are getting + allocations. + :returns: A dict, keyed by consumer UUID, of allocation records: + { $CONSUMER_UUID: { + # The shape of each "allocations" dict below is identical + # to the return from GET /allocations/{consumer_uuid} + "allocations": { + $RP_UUID: { + "generation": $RP_GEN, + "resources": { + $RESOURCE_CLASS: $AMOUNT, + ... + }, + }, + ... + }, + "project_id": $PROJ_ID, + "user_id": $USER_ID, + "consumer_generation": $CONSUMER_GEN, + }, + ... + } + :raises: keystoneauth1.exceptions.ClientException if placement API + communication fails. + :raises: ResourceProviderAllocationRetrievalFailed if a placement API + call fails. + :raises: ValueError if there's no provider with the specified nodename. + """ + # NOTE(efried): Despite our best efforts, there are some scenarios + # (e.g. mid-evacuate) where we can still wind up returning allocations + # against providers belonging to other hosts. We count on the consumer + # of this information (i.e. the reshaper flow of a virt driver's + # update_provider_tree) to ignore allocations associated with any + # provider it is not reshaping - and it should never be reshaping + # providers belonging to other hosts. + + # We can't get *all* allocations for associated sharing providers + # because some of those will belong to consumers on other hosts. So we + # have to discover all the consumers associated with the providers in + # the "local" tree (we use the nodename to figure out which providers + # are "local"). + # All we want to do at this point is accumulate the set of consumers we + # care about. + consumers = set() + # TODO(efried): This could be more efficient if placement offered an + # operation like GET /allocations?rp_uuid=in: + for u in self._provider_tree.get_provider_uuids(name_or_uuid=nodename): + alloc_info = self.get_allocations_for_resource_provider(context, u) + # The allocations dict is keyed by consumer UUID + consumers.update(alloc_info.allocations) + + # Now get all the allocations for each of these consumers to build the + # result. This will include allocations on sharing providers, which is + # intentional and desirable. But it may also include allocations + # belonging to other hosts, e.g. if this is happening in the middle of + # an evacuate. ComputeDriver.update_provider_tree is supposed to ignore + # such allocations if they appear. + # TODO(efried): This could be more efficient if placement offered an + # operation like GET /allocations?consumer_uuid=in: + return {consumer: self.get_allocs_for_consumer(context, consumer) + for consumer in consumers} + + def delete_resource_provider(self, context, compute_node, cascade=False): + """Deletes the ResourceProvider record for the compute_node. + + :param context: The security context + :param compute_node: The zun.objects.ComputeNode object that is the + resource provider being deleted. + :param cascade: Boolean value that, when True, will first delete any + associated Allocation and Inventory records for the + compute node + """ + host = compute_node.hostname + rp_uuid = compute_node.uuid + if cascade: + # Delete any allocations for this resource provider. + # Since allocations are by consumer, we get the consumers on this + # host, which are its containers. + # TODO(mriedem): Optimize this up by adding an + # InstanceList.get_uuids_by_host_and_node method. + # Pass expected_attrs=[] to avoid joining on extra columns we + # don't use. + containers = objects.Container.list_by_host(context, host) + for container in containers: + self.delete_allocation_for_container(context, container.uuid) + try: + self._delete_provider(rp_uuid, global_request_id=context.global_id) + except (exception.ResourceProviderInUse, + exception.ResourceProviderDeletionFailed): + # TODO(efried): Raise these. Right now this is being left a no-op + # for backward compatibility. + pass + + def get_provider_by_name(self, context, name): + """Queries the placement API for resource provider information matching + a supplied name. + + :param context: The security context + :param name: Name of the resource provider to look up + :return: A dict of resource provider information including the + provider's UUID and generation + :raises: `exception.ResourceProviderNotFound` when no such provider was + found + :raises: PlacementAPIConnectFailure if there was an issue making the + API call to placement. + """ + try: + resp = self.get("/resource_providers?name=%s" % name, + global_request_id=context.global_id) + except ks_exc.ClientException as ex: + LOG.error('Failed to get resource provider by name: %s. Error: %s', + name, six.text_type(ex)) + raise exception.PlacementAPIConnectFailure() + + if resp.status_code == 200: + data = resp.json() + records = data['resource_providers'] + num_recs = len(records) + if num_recs == 1: + return records[0] + elif num_recs > 1: + msg = ("Found multiple resource provider records for resource " + "provider name %(rp_name)s: %(rp_uuids)s. " + "This should not happen.") + LOG.warning(msg, { + 'rp_name': name, + 'rp_uuids': ','.join([r['uuid'] for r in records]) + }) + elif resp.status_code != 404: + msg = ("Failed to retrieve resource provider information by name " + "for resource provider %s. Got %d: %s") + LOG.warning(msg, name, resp.status_code, resp.text) + + raise exception.ResourceProviderNotFound(name_or_uuid=name) + + @retrying.retry(stop_max_attempt_number=4, + retry_on_exception=lambda e: isinstance( + e, exception.ResourceProviderUpdateConflict)) + def aggregate_add_host(self, context, agg_uuid, host_name=None, + rp_uuid=None): + """Looks up a resource provider by the supplied host name, and adds the + aggregate with supplied UUID to that resource provider. + + :note: This method does NOT use the cached provider tree. It is only + called from the Compute API when a host aggregate is + modified + + :param context: The security context + :param agg_uuid: UUID of the aggregate being modified + :param host_name: Name of the zun-compute service worker to look up a + resource provider for. Either host_name or rp_uuid is + required. + :param rp_uuid: UUID of the resource provider to add to the aggregate. + Either host_name or rp_uuid is required. + :raises: `exceptions.ResourceProviderNotFound` if no resource provider + matching the host name could be found from the placement API + :raises: `exception.ResourceProviderAggregateRetrievalFailed` when + failing to get a provider's existing aggregates + :raises: `exception.ResourceProviderUpdateFailed` if there was a + failure attempting to save the provider aggregates + :raises: `exception.ResourceProviderUpdateConflict` if a concurrent + update to the provider was detected. + :raises: PlacementAPIConnectFailure if there was an issue making an + API call to placement. + """ + if host_name is None and rp_uuid is None: + raise ValueError(_("Either host_name or rp_uuid is required")) + if rp_uuid is None: + rp_uuid = self.get_provider_by_name(context, host_name)['uuid'] + + # Now attempt to add the aggregate to the resource provider. We don't + # want to overwrite any other aggregates the provider may be associated + # with, however, so we first grab the list of aggregates for this + # provider and add the aggregate to the list of aggregates it already + # has + agg_info = self._get_provider_aggregates(context, rp_uuid) + existing_aggs, gen = agg_info.aggregates, agg_info.generation + if agg_uuid in existing_aggs: + return + + new_aggs = existing_aggs | set([agg_uuid]) + self.set_aggregates_for_provider( + context, rp_uuid, new_aggs, use_cache=False, generation=gen) + + @retrying.retry(stop_max_attempt_number=4, + retry_on_exception=lambda e: isinstance( + e, exception.ResourceProviderUpdateConflict)) + def aggregate_remove_host(self, context, agg_uuid, host_name): + """Looks up a resource provider by the supplied host name, and removes + the aggregate with supplied UUID from that resource provider. + + :note: This method does NOT use the cached provider tree. It is only + called from the Compute API when a host aggregate is + modified + + :param context: The security context + :param agg_uuid: UUID of the aggregate being modified + :param host_name: Name of the zun-compute service worker to look up a + resource provider for + :raises: `exceptions.ResourceProviderNotFound` if no resource provider + matching the host name could be found from the placement API + :raises: `exception.ResourceProviderAggregateRetrievalFailed` when + failing to get a provider's existing aggregates + :raises: `exception.ResourceProviderUpdateFailed` if there was a + failure attempting to save the provider aggregates + :raises: `exception.ResourceProviderUpdateConflict` if a concurrent + update to the provider was detected. + :raises: PlacementAPIConnectFailure if there was an issue making an + API call to placement. + """ + rp_uuid = self.get_provider_by_name(context, host_name)['uuid'] + # Now attempt to remove the aggregate from the resource provider. We + # don't want to overwrite any other aggregates the provider may be + # associated with, however, so we first grab the list of aggregates for + # this provider and remove the aggregate from the list of aggregates it + # already has + agg_info = self._get_provider_aggregates(context, rp_uuid) + existing_aggs, gen = agg_info.aggregates, agg_info.generation + if agg_uuid not in existing_aggs: + return + + new_aggs = existing_aggs - set([agg_uuid]) + self.set_aggregates_for_provider( + context, rp_uuid, new_aggs, use_cache=False, generation=gen) + + @staticmethod + def _handle_usages_error_from_placement(resp, project_id, user_id=None): + msg = ('[%(placement_req_id)s] Failed to retrieve usages for project ' + '%(project_id)s and user %(user_id)s. Got %(status_code)d: ' + '%(err_text)s') + args = {'placement_req_id': get_placement_request_id(resp), + 'project_id': project_id, + 'user_id': user_id or 'N/A', + 'status_code': resp.status_code, + 'err_text': resp.text} + LOG.error(msg, args) + raise exception.UsagesRetrievalFailed(project_id=project_id, + user_id=user_id or 'N/A') + + @retrying.retry(stop_max_attempt_number=4, + retry_on_exception=lambda e: isinstance( + e, ks_exc.ConnectFailure)) + def _get_usages(self, context, project_id, user_id=None): + url = '/usages?project_id=%s' % project_id + if user_id: + url = ''.join([url, '&user_id=%s' % user_id]) + return self.get(url, version=GET_USAGES_VERSION, + global_request_id=context.global_id) + + def get_usages_counts_for_quota(self, context, project_id, user_id=None): + """Get the usages counts for the purpose of counting quota usage. + + :param context: The request context + :param project_id: The project_id to count across + :param user_id: The user_id to count across + :returns: A dict containing the project-scoped and user-scoped counts + if user_id is specified. For example: + {'project': {'cores': , + 'ram': }, + {'user': {'cores': , + 'ram': }, + :raises: `exception.UsagesRetrievalFailed` if a placement API call + fails + """ + total_counts = {'project': {}} + # First query counts across all users of a project + LOG.debug('Getting usages for project_id %s from placement', + project_id) + resp = self._get_usages(context, project_id) + if resp: + data = resp.json() + # The response from placement will not contain a resource class if + # there is no usage. We can consider a missing class to be 0 usage. + cores = data['usages'].get(orc.VCPU, 0) + ram = data['usages'].get(orc.MEMORY_MB, 0) + total_counts['project'] = {'cores': cores, 'ram': ram} + else: + self._handle_usages_error_from_placement(resp, project_id) + # If specified, second query counts across one user in the project + if user_id: + LOG.debug('Getting usages for project_id %s and user_id %s from ' + 'placement', project_id, user_id) + resp = self._get_usages(context, project_id, user_id=user_id) + if resp: + data = resp.json() + cores = data['usages'].get(orc.VCPU, 0) + ram = data['usages'].get(orc.MEMORY_MB, 0) + total_counts['user'] = {'cores': cores, 'ram': ram} + else: + self._handle_usages_error_from_placement(resp, project_id, + user_id=user_id) + return total_counts diff --git a/zun/scheduler/utils.py b/zun/scheduler/utils.py new file mode 100644 index 000000000..24d44461d --- /dev/null +++ b/zun/scheduler/utils.py @@ -0,0 +1,282 @@ +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Utility methods for scheduling.""" + +import collections +import re + +import os_resource_classes as orc +from oslo_log import log as logging +from six.moves.urllib import parse + +import zun.conf +from zun import objects + + +LOG = logging.getLogger(__name__) + +CONF = zun.conf.CONF + + +class ResourceRequest(object): + """Presents a granular resource request via RequestGroup instances.""" + # extra_specs-specific consts + XS_RES_PREFIX = 'resources' + XS_TRAIT_PREFIX = 'trait' + # Regex patterns for numbered or un-numbered resources/trait keys + XS_KEYPAT = re.compile(r"^(%s)([1-9][0-9]*)?:(.*)$" % + '|'.join((XS_RES_PREFIX, XS_TRAIT_PREFIX))) + + def __init__(self): + # { ident: RequestGroup } + self._rg_by_id = {} + self._group_policy = None + # Default to the configured limit but _limit can be + # set to None to indicate "no limit". + self._limit = CONF.scheduler.max_placement_results + + def __str__(self): + return ', '.join(sorted( + list(str(rg) for rg in list(self._rg_by_id.values())))) + + @property + def group_policy(self): + return self._group_policy + + @group_policy.setter + def group_policy(self, value): + self._group_policy = value + + def get_request_group(self, ident): + if ident not in self._rg_by_id: + rq_grp = objects.RequestGroup(use_same_provider=bool(ident)) + self._rg_by_id[ident] = rq_grp + return self._rg_by_id[ident] + + def add_request_group(self, request_group): + """Inserts the existing group with a unique integer id + + This function can ensure unique ids by using bigger + ids than the maximum of existing ids. + + :param request_group: the RequestGroup to be added + """ + # NOTE(gibi) [0] just here to always have a defined maximum + group_idents = [0] + [int(ident) for ident in self._rg_by_id if ident] + ident = max(group_idents) + 1 + self._rg_by_id[ident] = request_group + + def _add_resource(self, groupid, rclass, amount): + # Validate the class. + if not (rclass.startswith(orc.CUSTOM_NAMESPACE) or + rclass in orc.STANDARDS): + LOG.warning( + "Received an invalid ResourceClass '%(key)s' in extra_specs.", + {"key": rclass}) + return + # val represents the amount. Convert to int, or warn and skip. + try: + amount = int(amount) + if amount < 0: + raise ValueError() + except ValueError: + LOG.warning( + "Resource amounts must be nonnegative integers. Received " + "'%(val)s' for key resources%(groupid)s.", + {"groupid": groupid or '', "val": amount}) + return + self.get_request_group(groupid).resources[rclass] = amount + + def _add_trait(self, groupid, trait_name, trait_type): + # Currently the only valid values for a trait entry are 'required' + # and 'forbidden' + trait_vals = ('required', 'forbidden') + if trait_type == 'required': + self.get_request_group(groupid).required_traits.add(trait_name) + elif trait_type == 'forbidden': + self.get_request_group(groupid).forbidden_traits.add(trait_name) + else: + LOG.warning( + "Only (%(tvals)s) traits are supported. Received '%(val)s' " + "for key trait%(groupid)s.", + {"tvals": ', '.join(trait_vals), "groupid": groupid or '', + "val": trait_type}) + return + + def _add_group_policy(self, policy): + # The only valid values for group_policy are 'none' and 'isolate'. + if policy not in ('none', 'isolate'): + LOG.warning( + "Invalid group_policy '%s'. Valid values are 'none' and " + "'isolate'.", policy) + return + self._group_policy = policy + + @classmethod + def from_extra_specs(cls, extra_specs, req=None): + """Processes resources and traits in numbered groupings in extra_specs. + + Examines extra_specs for items of the following forms: + "resources:$RESOURCE_CLASS": $AMOUNT + "resources$N:$RESOURCE_CLASS": $AMOUNT + "trait:$TRAIT_NAME": "required" + "trait$N:$TRAIT_NAME": "required" + + Does *not* yet handle member_of[$N]. + + :param extra_specs: The extra_specs dict. + :param req: the ResourceRequest object to add the requirements to or + None to create a new ResourceRequest + :return: A ResourceRequest object representing the resources and + required traits in the extra_specs. + """ + # TODO(efried): Handle member_of[$N], which will need to be reconciled + # with destination.aggregates handling in resources_from_request_spec + + if req is not None: + ret = req + else: + ret = cls() + + for key, val in extra_specs.items(): + if key == 'group_policy': + ret._add_group_policy(val) + continue + + match = cls.XS_KEYPAT.match(key) + if not match: + continue + + # 'prefix' is 'resources' or 'trait' + # 'suffix' is $N or None + # 'name' is either the resource class name or the trait name. + prefix, suffix, name = match.groups() + + # Process "resources[$N]" + if prefix == cls.XS_RES_PREFIX: + ret._add_resource(suffix, name, val) + + # Process "trait[$N]" + elif prefix == cls.XS_TRAIT_PREFIX: + ret._add_trait(suffix, name, val) + + return ret + + def resource_groups(self): + for rg in self._rg_by_id.values(): + yield rg.resources + + def get_num_of_numbered_groups(self): + return len([ident for ident in self._rg_by_id.keys() + if ident is not None]) + + def merged_resources(self, resources=None): + """Returns a merge of {resource_class: amount} for all resource groups. + + Amounts of the same resource class from different groups are added + together. + + :param resources: A flat dict of {resource_class: amount}. If + specified, the resources therein are folded + into the return dict, such that any resource + in resources is included only if that + resource class does not exist elsewhere in the + merged ResourceRequest. + :return: A dict of the form {resource_class: amount} + """ + ret = collections.defaultdict(lambda: 0) + for resource_dict in self.resource_groups(): + for resource_class, amount in resource_dict.items(): + ret[resource_class] += amount + if resources: + for resource_class, amount in resources.items(): + # If it's in there - even if zero - ignore the one from the + # flavor. + if resource_class not in ret: + ret[resource_class] = amount + # Now strip zeros. This has to be done after the above - we can't + # use strip_zeros :( + ret = {rc: amt for rc, amt in ret.items() if amt} + return dict(ret) + + def _clean_empties(self): + """Get rid of any empty ResourceGroup instances.""" + for ident, rg in list(self._rg_by_id.items()): + if not any((rg.resources, rg.required_traits, + rg.forbidden_traits)): + self._rg_by_id.pop(ident) + + def strip_zeros(self): + """Remove any resources whose amounts are zero.""" + for resource_dict in self.resource_groups(): + for rclass in list(resource_dict): + if resource_dict[rclass] == 0: + resource_dict.pop(rclass) + self._clean_empties() + + def to_querystring(self): + """Produce a querystring of the form expected by + GET /allocation_candidates. + """ + # TODO(gibi): We have a RequestGroup OVO so we can move this to that + # class as a member function. + # NOTE(efried): The sorting herein is not necessary for the API; it is + # to make testing easier and logging/debugging predictable. + def to_queryparams(request_group, suffix): + res = request_group.resources + required_traits = request_group.required_traits + forbidden_traits = request_group.forbidden_traits + aggregates = request_group.aggregates + in_tree = request_group.in_tree + + resource_query = ",".join( + sorted("%s:%s" % (rc, amount) + for (rc, amount) in res.items())) + qs_params = [('resources%s' % suffix, resource_query)] + + # Assemble required and forbidden traits, allowing for either/both + # to be empty. + required_val = ','.join( + sorted(required_traits) + + ['!%s' % ft for ft in sorted(forbidden_traits)]) + if required_val: + qs_params.append(('required%s' % suffix, required_val)) + if aggregates: + aggs = [] + # member_ofN is a list of lists. We need a tuple of + # ('member_ofN', 'in:uuid,uuid,...') for each inner list. + for agglist in aggregates: + aggs.append(('member_of%s' % suffix, + 'in:' + ','.join(sorted(agglist)))) + qs_params.extend(sorted(aggs)) + if in_tree: + qs_params.append(('in_tree%s' % suffix, in_tree)) + return qs_params + + if self._limit is not None: + qparams = [('limit', self._limit)] + else: + qparams = [] + if self._group_policy is not None: + qparams.append(('group_policy', self._group_policy)) + + for ident, rg in self._rg_by_id.items(): + # [('resourcesN', 'rclass:amount,rclass:amount,...'), + # ('requiredN', 'trait_name,!trait_name,...'), + # ('member_ofN', 'in:uuid,uuid,...'), + # ('member_ofN', 'in:uuid,uuid,...')] + qparams.extend(to_queryparams(rg, ident or '')) + + return parse.urlencode(sorted(qparams)) diff --git a/zun/tests/unit/compute/test_provider_tree.py b/zun/tests/unit/compute/test_provider_tree.py new file mode 100644 index 000000000..5fb0d6e4a --- /dev/null +++ b/zun/tests/unit/compute/test_provider_tree.py @@ -0,0 +1,692 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from oslo_utils.fixture import uuidsentinel as uuids + +from zun.common import context +from zun.compute import provider_tree +from zun.tests import base +from zun.tests.unit.objects import utils + + +class TestProviderTree(base.TestCase): + + def setUp(self): + super(TestProviderTree, self).setUp() + self.context = context.get_admin_context() + self.compute_node1 = utils.get_test_compute_node( + self.context, uuid=uuids.cn1, hostname='compute-node-1') + self.compute_node2 = utils.get_test_compute_node( + self.context, uuid=uuids.cn2, hostname='compute-node-2') + self.compute_nodes = [self.compute_node1, self.compute_node2] + + def _pt_with_cns(self): + pt = provider_tree.ProviderTree() + for cn in self.compute_nodes: + pt.new_root(cn.hostname, cn.uuid, generation=0) + return pt + + def test_tree_ops(self): + cn1 = self.compute_node1 + cn2 = self.compute_node2 + pt = self._pt_with_cns() + + self.assertRaises( + ValueError, + pt.new_root, + cn1.hostname, + cn1.uuid, + ) + + self.assertTrue(pt.exists(cn1.uuid)) + self.assertTrue(pt.exists(cn1.hostname)) + self.assertFalse(pt.exists(uuids.non_existing_rp)) + self.assertFalse(pt.exists('noexist')) + + self.assertEqual([cn1.uuid], + pt.get_provider_uuids(name_or_uuid=cn1.uuid)) + # Same with ..._in_tree + self.assertEqual([cn1.uuid], pt.get_provider_uuids_in_tree(cn1.uuid)) + self.assertEqual(set([cn1.uuid, cn2.uuid]), + set(pt.get_provider_uuids())) + + numa_cell0_uuid = pt.new_child('numa_cell0', cn1.uuid) + numa_cell1_uuid = pt.new_child('numa_cell1', cn1.hostname) + + self.assertEqual(cn1.uuid, pt.data(numa_cell1_uuid).parent_uuid) + + self.assertTrue(pt.exists(numa_cell0_uuid)) + self.assertTrue(pt.exists('numa_cell0')) + + self.assertTrue(pt.exists(numa_cell1_uuid)) + self.assertTrue(pt.exists('numa_cell1')) + + pf1_cell0_uuid = pt.new_child('pf1_cell0', numa_cell0_uuid) + self.assertTrue(pt.exists(pf1_cell0_uuid)) + self.assertTrue(pt.exists('pf1_cell0')) + + # Now we've got a 3-level tree under cn1 - check provider UUIDs again + all_cn1 = [cn1.uuid, numa_cell0_uuid, pf1_cell0_uuid, numa_cell1_uuid] + self.assertEqual( + set(all_cn1), + set(pt.get_provider_uuids(name_or_uuid=cn1.uuid))) + # Same with ..._in_tree if we're asking for the root + self.assertEqual( + set(all_cn1), + set(pt.get_provider_uuids_in_tree(cn1.uuid))) + # Asking for a subtree. + self.assertEqual( + [numa_cell0_uuid, pf1_cell0_uuid], + pt.get_provider_uuids(name_or_uuid=numa_cell0_uuid)) + # With ..._in_tree, get the whole tree no matter which we specify. + for node in all_cn1: + self.assertEqual(set(all_cn1), set(pt.get_provider_uuids_in_tree( + node))) + # With no provider specified, get everything + self.assertEqual( + set([cn1.uuid, cn2.uuid, numa_cell0_uuid, pf1_cell0_uuid, + numa_cell1_uuid]), + set(pt.get_provider_uuids())) + + self.assertRaises( + ValueError, + pt.new_child, + 'pf1_cell0', + uuids.non_existing_rp, + ) + + # Fail attempting to add a child that already exists in the tree + # Existing provider is a child; search by name + self.assertRaises(ValueError, pt.new_child, 'numa_cell0', cn1.uuid) + # Existing provider is a root; search by UUID + self.assertRaises(ValueError, pt.new_child, cn1.uuid, cn2.uuid) + + # Test data(). + # Root, by UUID + cn1_snap = pt.data(cn1.uuid) + # Fields were faithfully copied + self.assertEqual(cn1.uuid, cn1_snap.uuid) + self.assertEqual(cn1.hostname, cn1_snap.name) + self.assertIsNone(cn1_snap.parent_uuid) + self.assertEqual({}, cn1_snap.inventory) + self.assertEqual(set(), cn1_snap.traits) + self.assertEqual(set(), cn1_snap.aggregates) + # Validate read-only-ness + self.assertRaises(AttributeError, setattr, cn1_snap, 'name', 'foo') + + cn3 = utils.get_test_compute_node( + self.context, uuid=uuids.cn3, hostname='compute-node-3') + self.assertFalse(pt.exists(cn3.uuid)) + self.assertFalse(pt.exists(cn3.hostname)) + pt.new_root(cn3.hostname, cn3.uuid) + + self.assertTrue(pt.exists(cn3.uuid)) + self.assertTrue(pt.exists(cn3.hostname)) + + self.assertRaises( + ValueError, + pt.new_root, + cn3.hostname, + cn3.uuid, + ) + + self.assertRaises( + ValueError, + pt.remove, + uuids.non_existing_rp, + ) + + pt.remove(numa_cell1_uuid) + self.assertFalse(pt.exists(numa_cell1_uuid)) + self.assertTrue(pt.exists(pf1_cell0_uuid)) + self.assertTrue(pt.exists(numa_cell0_uuid)) + self.assertTrue(pt.exists(uuids.cn1)) + + # Now remove the root and check that children no longer exist + pt.remove(uuids.cn1) + self.assertFalse(pt.exists(pf1_cell0_uuid)) + self.assertFalse(pt.exists(numa_cell0_uuid)) + self.assertFalse(pt.exists(uuids.cn1)) + + def test_populate_from_iterable_empty(self): + pt = provider_tree.ProviderTree() + # Empty list is a no-op + pt.populate_from_iterable([]) + self.assertEqual([], pt.get_provider_uuids()) + + def test_populate_from_iterable_error_orphan_cycle(self): + pt = provider_tree.ProviderTree() + + # Error trying to populate with an orphan + grandchild1_1 = { + 'uuid': uuids.grandchild1_1, + 'name': 'grandchild1_1', + 'generation': 11, + 'parent_provider_uuid': uuids.child1, + } + + self.assertRaises(ValueError, + pt.populate_from_iterable, [grandchild1_1]) + + # Create a cycle so there are no orphans, but no path to a root + cycle = { + 'uuid': uuids.child1, + 'name': 'child1', + 'generation': 1, + # There's a country song about this + 'parent_provider_uuid': uuids.grandchild1_1, + } + + self.assertRaises(ValueError, + pt.populate_from_iterable, [grandchild1_1, cycle]) + + def test_populate_from_iterable_complex(self): + # root + # +-> child1 + # | +-> grandchild1_2 + # | +-> ggc1_2_1 + # | +-> ggc1_2_2 + # | +-> ggc1_2_3 + # +-> child2 + # another_root + pt = provider_tree.ProviderTree() + plist = [ + { + 'uuid': uuids.root, + 'name': 'root', + 'generation': 0, + }, + { + 'uuid': uuids.child1, + 'name': 'child1', + 'generation': 1, + 'parent_provider_uuid': uuids.root, + }, + { + 'uuid': uuids.child2, + 'name': 'child2', + 'generation': 2, + 'parent_provider_uuid': uuids.root, + }, + { + 'uuid': uuids.grandchild1_2, + 'name': 'grandchild1_2', + 'generation': 12, + 'parent_provider_uuid': uuids.child1, + }, + { + 'uuid': uuids.ggc1_2_1, + 'name': 'ggc1_2_1', + 'generation': 121, + 'parent_provider_uuid': uuids.grandchild1_2, + }, + { + 'uuid': uuids.ggc1_2_2, + 'name': 'ggc1_2_2', + 'generation': 122, + 'parent_provider_uuid': uuids.grandchild1_2, + }, + { + 'uuid': uuids.ggc1_2_3, + 'name': 'ggc1_2_3', + 'generation': 123, + 'parent_provider_uuid': uuids.grandchild1_2, + }, + { + 'uuid': uuids.another_root, + 'name': 'another_root', + 'generation': 911, + }, + ] + pt.populate_from_iterable(plist) + + def validate_root(expected_uuids): + # Make sure we have all and only the expected providers + self.assertEqual(expected_uuids, set(pt.get_provider_uuids())) + # Now make sure they're in the right hierarchy. Cheat: get the + # actual _Provider to make it easier to walk the tree (ProviderData + # doesn't include children). + root = pt._find_with_lock(uuids.root) + self.assertEqual(uuids.root, root.uuid) + self.assertEqual('root', root.name) + self.assertEqual(0, root.generation) + self.assertIsNone(root.parent_uuid) + self.assertEqual(2, len(list(root.children))) + for child in root.children.values(): + self.assertTrue(child.name.startswith('child')) + if child.name == 'child1': + if uuids.grandchild1_1 in expected_uuids: + self.assertEqual(2, len(list(child.children))) + else: + self.assertEqual(1, len(list(child.children))) + for grandchild in child.children.values(): + self.assertTrue(grandchild.name.startswith( + 'grandchild1_')) + if grandchild.name == 'grandchild1_1': + self.assertEqual(0, len(list(grandchild.children))) + if grandchild.name == 'grandchild1_2': + self.assertEqual(3, len(list(grandchild.children))) + for ggc in grandchild.children.values(): + self.assertTrue(ggc.name.startswith('ggc1_2_')) + another_root = pt._find_with_lock(uuids.another_root) + self.assertEqual(uuids.another_root, another_root.uuid) + self.assertEqual('another_root', another_root.name) + self.assertEqual(911, another_root.generation) + self.assertIsNone(another_root.parent_uuid) + self.assertEqual(0, len(list(another_root.children))) + if uuids.new_root in expected_uuids: + new_root = pt._find_with_lock(uuids.new_root) + self.assertEqual(uuids.new_root, new_root.uuid) + self.assertEqual('new_root', new_root.name) + self.assertEqual(42, new_root.generation) + self.assertIsNone(new_root.parent_uuid) + self.assertEqual(0, len(list(new_root.children))) + + expected_uuids = set([ + uuids.root, uuids.child1, uuids.child2, uuids.grandchild1_2, + uuids.ggc1_2_1, uuids.ggc1_2_2, uuids.ggc1_2_3, + uuids.another_root]) + + validate_root(expected_uuids) + + # Merge an orphan - still an error + orphan = { + 'uuid': uuids.orphan, + 'name': 'orphan', + 'generation': 86, + 'parent_provider_uuid': uuids.mystery, + } + self.assertRaises(ValueError, pt.populate_from_iterable, [orphan]) + + # And the tree didn't change + validate_root(expected_uuids) + + # Merge a list with a new grandchild and a new root + plist = [ + { + 'uuid': uuids.grandchild1_1, + 'name': 'grandchild1_1', + 'generation': 11, + 'parent_provider_uuid': uuids.child1, + }, + { + 'uuid': uuids.new_root, + 'name': 'new_root', + 'generation': 42, + }, + ] + pt.populate_from_iterable(plist) + + expected_uuids |= set([uuids.grandchild1_1, uuids.new_root]) + + validate_root(expected_uuids) + + # Merge an empty list - still a no-op + pt.populate_from_iterable([]) + validate_root(expected_uuids) + + # Since we have a complex tree, test the ordering of get_provider_uuids + # We can't predict the order of siblings, or where nephews will appear + # relative to their uncles, but we can guarantee that any given child + # always comes after its parent (and by extension, its ancestors too). + puuids = pt.get_provider_uuids() + for desc in (uuids.child1, uuids.child2): + self.assertGreater(puuids.index(desc), puuids.index(uuids.root)) + for desc in (uuids.grandchild1_1, uuids.grandchild1_2): + self.assertGreater(puuids.index(desc), puuids.index(uuids.child1)) + for desc in (uuids.ggc1_2_1, uuids.ggc1_2_2, uuids.ggc1_2_3): + self.assertGreater( + puuids.index(desc), puuids.index(uuids.grandchild1_2)) + + def test_populate_from_iterable_with_root_update(self): + # Ensure we can update hierarchies, including adding children, in a + # tree that's already populated. This tests the case where a given + # provider exists both in the tree and in the input. We must replace + # that provider *before* we inject its descendants; otherwise the + # descendants will be lost. Note that this test case is not 100% + # reliable, as we can't predict the order over which hashed values are + # iterated. + + pt = provider_tree.ProviderTree() + + # Let's create a root + plist = [ + { + 'uuid': uuids.root, + 'name': 'root', + 'generation': 0, + }, + ] + pt.populate_from_iterable(plist) + expected_uuids = [uuids.root] + self.assertEqual(expected_uuids, pt.get_provider_uuids()) + + # Let's add a child updating the name and generation for the root. + # root + # +-> child1 + plist = [ + { + 'uuid': uuids.root, + 'name': 'root_with_new_name', + 'generation': 1, + }, + { + 'uuid': uuids.child1, + 'name': 'child1', + 'generation': 1, + 'parent_provider_uuid': uuids.root, + }, + ] + pt.populate_from_iterable(plist) + expected_uuids = [uuids.root, uuids.child1] + self.assertEqual(expected_uuids, pt.get_provider_uuids()) + + def test_populate_from_iterable_disown_grandchild(self): + # Start with: + # root + # +-> child + # | +-> grandchild + # Then send in [child] and grandchild should disappear. + child = { + 'uuid': uuids.child, + 'name': 'child', + 'generation': 1, + 'parent_provider_uuid': uuids.root, + } + pt = provider_tree.ProviderTree() + plist = [ + { + 'uuid': uuids.root, + 'name': 'root', + 'generation': 0, + }, + child, + { + 'uuid': uuids.grandchild, + 'name': 'grandchild', + 'generation': 2, + 'parent_provider_uuid': uuids.child, + }, + ] + pt.populate_from_iterable(plist) + self.assertEqual([uuids.root, uuids.child, uuids.grandchild], + pt.get_provider_uuids()) + self.assertTrue(pt.exists(uuids.grandchild)) + pt.populate_from_iterable([child]) + self.assertEqual([uuids.root, uuids.child], pt.get_provider_uuids()) + self.assertFalse(pt.exists(uuids.grandchild)) + + def test_has_inventory_changed_no_existing_rp(self): + pt = self._pt_with_cns() + self.assertRaises( + ValueError, + pt.has_inventory_changed, + uuids.non_existing_rp, + {} + ) + + def test_update_inventory_no_existing_rp(self): + pt = self._pt_with_cns() + self.assertRaises( + ValueError, + pt.update_inventory, + uuids.non_existing_rp, + {}, + ) + + def test_has_inventory_changed(self): + cn = self.compute_node1 + pt = self._pt_with_cns() + rp_gen = 1 + + cn_inv = { + 'VCPU': { + 'total': 8, + 'min_unit': 1, + 'max_unit': 8, + 'step_size': 1, + 'allocation_ratio': 16.0, + }, + 'MEMORY_MB': { + 'total': 1024, + 'reserved': 512, + 'min_unit': 64, + 'max_unit': 1024, + 'step_size': 64, + 'allocation_ratio': 1.5, + }, + 'DISK_GB': { + 'total': 1000, + 'reserved': 100, + 'min_unit': 10, + 'max_unit': 1000, + 'step_size': 10, + 'allocation_ratio': 1.0, + }, + } + self.assertTrue(pt.has_inventory_changed(cn.uuid, cn_inv)) + self.assertTrue(pt.update_inventory(cn.uuid, cn_inv, + generation=rp_gen)) + + # Updating with the same inventory info should return False + self.assertFalse(pt.has_inventory_changed(cn.uuid, cn_inv)) + self.assertFalse(pt.update_inventory(cn.uuid, cn_inv, + generation=rp_gen)) + + # A data-grab's inventory should be "equal" to the original + cndata = pt.data(cn.uuid) + self.assertFalse(pt.has_inventory_changed(cn.uuid, cndata.inventory)) + + cn_inv['VCPU']['total'] = 6 + self.assertTrue(pt.has_inventory_changed(cn.uuid, cn_inv)) + self.assertTrue(pt.update_inventory(cn.uuid, cn_inv, + generation=rp_gen)) + + # The data() result was not affected; now the tree's copy is different + self.assertTrue(pt.has_inventory_changed(cn.uuid, cndata.inventory)) + + self.assertFalse(pt.has_inventory_changed(cn.uuid, cn_inv)) + self.assertFalse(pt.update_inventory(cn.uuid, cn_inv, + generation=rp_gen)) + + # Deleting a key in the new record should NOT result in changes being + # recorded... + del cn_inv['VCPU']['allocation_ratio'] + self.assertFalse(pt.has_inventory_changed(cn.uuid, cn_inv)) + self.assertFalse(pt.update_inventory(cn.uuid, cn_inv, + generation=rp_gen)) + + del cn_inv['MEMORY_MB'] + self.assertTrue(pt.has_inventory_changed(cn.uuid, cn_inv)) + self.assertTrue(pt.update_inventory(cn.uuid, cn_inv, + generation=rp_gen)) + + # ...but *adding* a key in the new record *should* result in changes + # being recorded + cn_inv['VCPU']['reserved'] = 0 + self.assertTrue(pt.has_inventory_changed(cn.uuid, cn_inv)) + self.assertTrue(pt.update_inventory(cn.uuid, cn_inv, + generation=rp_gen)) + + def test_have_traits_changed_no_existing_rp(self): + pt = self._pt_with_cns() + self.assertRaises( + ValueError, pt.have_traits_changed, uuids.non_existing_rp, []) + + def test_update_traits_no_existing_rp(self): + pt = self._pt_with_cns() + self.assertRaises( + ValueError, pt.update_traits, uuids.non_existing_rp, []) + + def test_have_traits_changed(self): + cn = self.compute_node1 + pt = self._pt_with_cns() + rp_gen = 1 + + traits = [ + "HW_GPU_API_DIRECT3D_V7_0", + "HW_NIC_OFFLOAD_SG", + "HW_CPU_X86_AVX", + ] + self.assertTrue(pt.have_traits_changed(cn.uuid, traits)) + # A data-grab's traits are the same + cnsnap = pt.data(cn.uuid) + self.assertFalse(pt.have_traits_changed(cn.uuid, cnsnap.traits)) + self.assertTrue(pt.has_traits(cn.uuid, [])) + self.assertFalse(pt.has_traits(cn.uuid, traits)) + self.assertFalse(pt.has_traits(cn.uuid, traits[:1])) + self.assertTrue(pt.update_traits(cn.uuid, traits, generation=rp_gen)) + self.assertTrue(pt.has_traits(cn.uuid, traits)) + self.assertTrue(pt.has_traits(cn.uuid, traits[:1])) + + # Updating with the same traits info should return False + self.assertFalse(pt.have_traits_changed(cn.uuid, traits)) + # But the generation should get updated + rp_gen = 2 + self.assertFalse(pt.update_traits(cn.uuid, traits, generation=rp_gen)) + self.assertFalse(pt.have_traits_changed(cn.uuid, traits)) + self.assertEqual(rp_gen, pt.data(cn.uuid).generation) + self.assertTrue(pt.has_traits(cn.uuid, traits)) + self.assertTrue(pt.has_traits(cn.uuid, traits[:1])) + + # Make a change to the traits list + traits.append("HW_GPU_RESOLUTION_W800H600") + self.assertTrue(pt.have_traits_changed(cn.uuid, traits)) + # The previously-taken data now differs + self.assertTrue(pt.have_traits_changed(cn.uuid, cnsnap.traits)) + self.assertFalse(pt.has_traits(cn.uuid, traits[-1:])) + # Don't update the generation + self.assertTrue(pt.update_traits(cn.uuid, traits)) + self.assertEqual(rp_gen, pt.data(cn.uuid).generation) + self.assertTrue(pt.has_traits(cn.uuid, traits[-1:])) + + def test_add_remove_traits(self): + cn = self.compute_node1 + pt = self._pt_with_cns() + self.assertEqual(set([]), pt.data(cn.uuid).traits) + # Test adding with no trait provided for a bogus provider + pt.add_traits('bogus-uuid') + self.assertEqual( + set([]), + pt.data(cn.uuid).traits + ) + # Add a couple of traits + pt.add_traits(cn.uuid, "HW_GPU_API_DIRECT3D_V7_0", "HW_NIC_OFFLOAD_SG") + self.assertEqual( + set(["HW_GPU_API_DIRECT3D_V7_0", "HW_NIC_OFFLOAD_SG"]), + pt.data(cn.uuid).traits) + # set() behavior: add a trait that's already there, and one that's not. + # The unrelated one is unaffected. + pt.add_traits(cn.uuid, "HW_GPU_API_DIRECT3D_V7_0", "HW_CPU_X86_AVX") + self.assertEqual( + set(["HW_GPU_API_DIRECT3D_V7_0", "HW_NIC_OFFLOAD_SG", + "HW_CPU_X86_AVX"]), + pt.data(cn.uuid).traits) + # Test removing with no trait provided for a bogus provider + pt.remove_traits('bogus-uuid') + self.assertEqual( + set(["HW_GPU_API_DIRECT3D_V7_0", "HW_NIC_OFFLOAD_SG", + "HW_CPU_X86_AVX"]), + pt.data(cn.uuid).traits) + # Now remove a trait + pt.remove_traits(cn.uuid, "HW_NIC_OFFLOAD_SG") + self.assertEqual( + set(["HW_GPU_API_DIRECT3D_V7_0", "HW_CPU_X86_AVX"]), + pt.data(cn.uuid).traits) + # set() behavior: remove a trait that's there, and one that's not. + # The unrelated one is unaffected. + pt.remove_traits(cn.uuid, + "HW_NIC_OFFLOAD_SG", "HW_GPU_API_DIRECT3D_V7_0") + self.assertEqual(set(["HW_CPU_X86_AVX"]), pt.data(cn.uuid).traits) + # Remove the last trait, and an unrelated one + pt.remove_traits(cn.uuid, "CUSTOM_FOO", "HW_CPU_X86_AVX") + self.assertEqual(set([]), pt.data(cn.uuid).traits) + + def test_have_aggregates_changed_no_existing_rp(self): + pt = self._pt_with_cns() + self.assertRaises( + ValueError, pt.have_aggregates_changed, uuids.non_existing_rp, []) + + def test_update_aggregates_no_existing_rp(self): + pt = self._pt_with_cns() + self.assertRaises( + ValueError, pt.update_aggregates, uuids.non_existing_rp, []) + + def test_have_aggregates_changed(self): + cn = self.compute_node1 + pt = self._pt_with_cns() + rp_gen = 1 + + aggregates = [ + uuids.agg1, + uuids.agg2, + ] + self.assertTrue(pt.have_aggregates_changed(cn.uuid, aggregates)) + self.assertTrue(pt.in_aggregates(cn.uuid, [])) + self.assertFalse(pt.in_aggregates(cn.uuid, aggregates)) + self.assertFalse(pt.in_aggregates(cn.uuid, aggregates[:1])) + self.assertTrue(pt.update_aggregates(cn.uuid, aggregates, + generation=rp_gen)) + self.assertTrue(pt.in_aggregates(cn.uuid, aggregates)) + self.assertTrue(pt.in_aggregates(cn.uuid, aggregates[:1])) + + # data() gets the same aggregates + cnsnap = pt.data(cn.uuid) + self.assertFalse( + pt.have_aggregates_changed(cn.uuid, cnsnap.aggregates)) + + # Updating with the same aggregates info should return False + self.assertFalse(pt.have_aggregates_changed(cn.uuid, aggregates)) + # But the generation should get updated + rp_gen = 2 + self.assertFalse(pt.update_aggregates(cn.uuid, aggregates, + generation=rp_gen)) + self.assertFalse(pt.have_aggregates_changed(cn.uuid, aggregates)) + self.assertEqual(rp_gen, pt.data(cn.uuid).generation) + self.assertTrue(pt.in_aggregates(cn.uuid, aggregates)) + self.assertTrue(pt.in_aggregates(cn.uuid, aggregates[:1])) + + # Make a change to the aggregates list + aggregates.append(uuids.agg3) + self.assertTrue(pt.have_aggregates_changed(cn.uuid, aggregates)) + self.assertFalse(pt.in_aggregates(cn.uuid, aggregates[-1:])) + # Don't update the generation + self.assertTrue(pt.update_aggregates(cn.uuid, aggregates)) + self.assertEqual(rp_gen, pt.data(cn.uuid).generation) + self.assertTrue(pt.in_aggregates(cn.uuid, aggregates[-1:])) + # Previously-taken data now differs + self.assertTrue(pt.have_aggregates_changed(cn.uuid, cnsnap.aggregates)) + + def test_add_remove_aggregates(self): + cn = self.compute_node1 + pt = self._pt_with_cns() + self.assertEqual(set([]), pt.data(cn.uuid).aggregates) + # Add a couple of aggregates + pt.add_aggregates(cn.uuid, uuids.agg1, uuids.agg2) + self.assertEqual( + set([uuids.agg1, uuids.agg2]), + pt.data(cn.uuid).aggregates) + # set() behavior: add an aggregate that's already there, and one that's + # not. The unrelated one is unaffected. + pt.add_aggregates(cn.uuid, uuids.agg1, uuids.agg3) + self.assertEqual(set([uuids.agg1, uuids.agg2, uuids.agg3]), + pt.data(cn.uuid).aggregates) + # Now remove an aggregate + pt.remove_aggregates(cn.uuid, uuids.agg2) + self.assertEqual(set([uuids.agg1, uuids.agg3]), + pt.data(cn.uuid).aggregates) + # set() behavior: remove an aggregate that's there, and one that's not. + # The unrelated one is unaffected. + pt.remove_aggregates(cn.uuid, uuids.agg2, uuids.agg3) + self.assertEqual(set([uuids.agg1]), pt.data(cn.uuid).aggregates) + # Remove the last aggregate, and an unrelated one + pt.remove_aggregates(cn.uuid, uuids.agg4, uuids.agg1) + self.assertEqual(set([]), pt.data(cn.uuid).aggregates) diff --git a/zun/tests/unit/fake_requests.py b/zun/tests/unit/fake_requests.py new file mode 100644 index 000000000..da2652201 --- /dev/null +++ b/zun/tests/unit/fake_requests.py @@ -0,0 +1,39 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""Fakes relating to the `requests` module.""" + +import requests + + +class FakeResponse(requests.Response): + def __init__(self, status_code, content=None, headers=None): + """A requests.Response that can be used as a mock return_value. + + A key feature is that the instance will evaluate to True or False like + a real Response, based on the status_code. + + Properties like ok, status_code, text, and content, and methods like + json(), work as expected based on the inputs. + + :param status_code: Integer HTTP response code (200, 404, etc.) + :param content: String supplying the payload content of the response. + Using a json-encoded string will make the json() method + behave as expected. + :param headers: Dict of HTTP header values to set. + """ + super(FakeResponse, self).__init__() + self.status_code = status_code + if content: + self._content = content.encode('utf-8') + self.encoding = 'utf-8' + if headers: + self.headers = headers diff --git a/zun/tests/unit/objects/test_objects.py b/zun/tests/unit/objects/test_objects.py index 6281c0fe7..44f750083 100644 --- a/zun/tests/unit/objects/test_objects.py +++ b/zun/tests/unit/objects/test_objects.py @@ -371,6 +371,7 @@ object_data = { 'Network': '1.1-26e8d37a54e5fc905ede657744a221d9', 'ExecInstance': '1.0-59464e7b96db847c0abb1e96d3cec30a', 'Registry': '1.0-36c2053fbc30e0021630e657dd1699c9', + 'RequestGroup': '1.0-5e08d68d0a63b729778340d608ec4eae', } diff --git a/zun/tests/unit/objects/utils.py b/zun/tests/unit/objects/utils.py index f788cd0ba..4ab997f6e 100644 --- a/zun/tests/unit/objects/utils.py +++ b/zun/tests/unit/objects/utils.py @@ -13,6 +13,7 @@ from zun import objects +from zun.objects.numa import NUMATopology from zun.tests.unit.db import utils as db_utils @@ -62,3 +63,20 @@ def get_test_registry(context, **kwargs): for key in db_registry: setattr(registry, key, db_registry[key]) return registry + + +def get_test_compute_node(context, **kwargs): + """Return a test compute node object with appropriate attributes. + + NOTE: The object leaves the attributes marked as changed, such + that a create() could be used to commit it to the DB. + """ + db_compute_node = db_utils.get_test_compute_node(**kwargs) + compute_node = objects.ComputeNode(context) + for key in db_compute_node: + if key == 'numa_topology': + numa_obj = NUMATopology._from_dict(db_compute_node[key]) + compute_node.numa_topology = numa_obj + else: + setattr(compute_node, key, db_compute_node[key]) + return compute_node diff --git a/zun/tests/unit/scheduler/client/__init__.py b/zun/tests/unit/scheduler/client/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/zun/tests/unit/scheduler/test_client.py b/zun/tests/unit/scheduler/client/test_query.py similarity index 96% rename from zun/tests/unit/scheduler/test_client.py rename to zun/tests/unit/scheduler/client/test_query.py index 223cf107d..6363e56c6 100644 --- a/zun/tests/unit/scheduler/test_client.py +++ b/zun/tests/unit/scheduler/client/test_query.py @@ -14,7 +14,7 @@ import mock from oslo_config import cfg -from zun.scheduler import client as scheduler_client +from zun.scheduler.client import query as scheduler_client from zun.scheduler import filter_scheduler from zun.tests import base from zun.tests.unit.scheduler import fakes diff --git a/zun/tests/unit/scheduler/client/test_report.py b/zun/tests/unit/scheduler/client/test_report.py new file mode 100644 index 000000000..69fd84348 --- /dev/null +++ b/zun/tests/unit/scheduler/client/test_report.py @@ -0,0 +1,4010 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import copy +import time + +import fixtures +from keystoneauth1 import exceptions as ks_exc +import mock +import os_resource_classes as orc +from oslo_serialization import jsonutils +from oslo_utils.fixture import uuidsentinel as uuids +import six +from six.moves.urllib import parse + +from zun.common import context +from zun.common import exception +from zun.common import utils as zun_utils +import zun.conf +from zun.scheduler.client import report +from zun.scheduler import utils as scheduler_utils +from zun.tests import base +from zun.tests.unit import fake_requests +from zun.tests.unit.objects import utils + + +CONF = zun.conf.CONF + + +class SchedulerReportClientTestCase(base.TestCase): + + def setUp(self): + super(SchedulerReportClientTestCase, self).setUp() + self.context = context.get_admin_context() + self.ks_adap_mock = mock.Mock() + self.compute_node = utils.get_test_compute_node( + self.context, + uuid=uuids.compute_node, + hostname='foo', + vcpus=8, + cpu_allocation_ratio=16.0, + memory_mb=1024, + ram_allocation_ratio=1.5, + local_gb=10, + disk_allocation_ratio=1.0, + ) + + with zun_utils.nested_contexts( + mock.patch( + 'keystoneauth1.loading.adapter.Adapter.load_from_options', + return_value=self.ks_adap_mock), + mock.patch('zun.common.clients.OpenStackClients.keystone') + ): + self.client = report.SchedulerReportClient() + + def _init_provider_tree(self, generation_override=None, + resources_override=None): + cn = self.compute_node + resources = resources_override + if resources_override is None: + resources = { + 'VCPU': { + 'total': cn.vcpus, + 'reserved': 0, + 'min_unit': 1, + 'max_unit': cn.vcpus, + 'step_size': 1, + 'allocation_ratio': cn.cpu_allocation_ratio, + }, + 'MEMORY_MB': { + 'total': cn.memory_mb, + 'reserved': 512, + 'min_unit': 1, + 'max_unit': cn.memory_mb, + 'step_size': 1, + 'allocation_ratio': cn.ram_allocation_ratio, + }, + 'DISK_GB': { + 'total': cn.local_gb, + 'reserved': 0, + 'min_unit': 1, + 'max_unit': cn.local_gb, + 'step_size': 1, + 'allocation_ratio': cn.disk_allocation_ratio, + }, + } + generation = generation_override or 1 + rp_uuid = self.client._provider_tree.new_root( + cn.hypervisor_hostname, + cn.uuid, + generation=generation, + ) + self.client._provider_tree.update_inventory(rp_uuid, resources) + + def _validate_provider(self, name_or_uuid, **kwargs): + """Validates existence and values of a provider in this client's + _provider_tree. + + :param name_or_uuid: The name or UUID of the provider to validate. + :param kwargs: Optional keyword arguments of ProviderData attributes + whose values are to be validated. + """ + found = self.client._provider_tree.data(name_or_uuid) + # If kwargs provided, their names indicate ProviderData attributes + for attr, expected in kwargs.items(): + try: + self.assertEqual(getattr(found, attr), expected) + except AttributeError: + self.fail("Provider with name or UUID %s doesn't have " + "attribute %s (expected value: %s)" % + (name_or_uuid, attr, expected)) + + +class TestPutAllocations(SchedulerReportClientTestCase): + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.put') + def test_put_allocations(self, mock_put): + mock_put.return_value.status_code = 204 + mock_put.return_value.text = "cool" + rp_uuid = mock.sentinel.rp + consumer_uuid = mock.sentinel.consumer + data = {"MEMORY_MB": 1024} + expected_url = "/allocations/%s" % consumer_uuid + payload = { + "allocations": { + rp_uuid: {"resources": data} + }, + "project_id": mock.sentinel.project_id, + "user_id": mock.sentinel.user_id, + "consumer_generation": mock.sentinel.consumer_generation + } + resp = self.client.put_allocations( + self.context, consumer_uuid, payload) + self.assertTrue(resp) + mock_put.assert_called_once_with( + expected_url, payload, version='1.28', + global_request_id=self.context.global_id) + + @mock.patch.object(report.LOG, 'warning') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.put') + def test_put_allocations_fail(self, mock_put, mock_warn): + mock_put.return_value.status_code = 400 + mock_put.return_value.text = "not cool" + rp_uuid = mock.sentinel.rp + consumer_uuid = mock.sentinel.consumer + data = {"MEMORY_MB": 1024} + expected_url = "/allocations/%s" % consumer_uuid + payload = { + "allocations": { + rp_uuid: {"resources": data} + }, + "project_id": mock.sentinel.project_id, + "user_id": mock.sentinel.user_id, + "consumer_generation": mock.sentinel.consumer_generation + } + resp = self.client.put_allocations( + self.context, consumer_uuid, payload) + + self.assertFalse(resp) + mock_put.assert_called_once_with( + expected_url, payload, version='1.28', + global_request_id=self.context.global_id) + log_msg = mock_warn.call_args[0][0] + self.assertIn("Failed to save allocation for", log_msg) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.put') + def test_put_allocations_fail_due_to_consumer_generation_conflict( + self, mock_put): + mock_put.return_value = fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps( + {'errors': [{'code': 'placement.concurrent_update', + 'detail': 'consumer generation conflict'}]})) + + rp_uuid = mock.sentinel.rp + consumer_uuid = mock.sentinel.consumer + data = {"MEMORY_MB": 1024} + expected_url = "/allocations/%s" % consumer_uuid + payload = { + "allocations": { + rp_uuid: {"resources": data} + }, + "project_id": mock.sentinel.project_id, + "user_id": mock.sentinel.user_id, + "consumer_generation": mock.sentinel.consumer_generation + } + self.assertRaises(exception.AllocationUpdateFailed, + self.client.put_allocations, + self.context, consumer_uuid, payload) + + mock_put.assert_called_once_with( + expected_url, mock.ANY, version='1.28', + global_request_id=self.context.global_id) + + @mock.patch('time.sleep', new=mock.Mock()) + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.put') + def test_put_allocations_retries_conflict(self, mock_put): + failed = fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps( + {'errors': [{'code': 'placement.concurrent_update', + 'detail': ''}]})) + + succeeded = mock.MagicMock() + succeeded.status_code = 204 + + mock_put.side_effect = (failed, succeeded) + + rp_uuid = mock.sentinel.rp + consumer_uuid = mock.sentinel.consumer + data = {"MEMORY_MB": 1024} + expected_url = "/allocations/%s" % consumer_uuid + payload = { + "allocations": { + rp_uuid: {"resources": data} + }, + "project_id": mock.sentinel.project_id, + "user_id": mock.sentinel.user_id, + "consumer_generation": mock.sentinel.consumer_generation + } + resp = self.client.put_allocations( + self.context, consumer_uuid, payload) + self.assertTrue(resp) + mock_put.assert_has_calls([ + mock.call(expected_url, payload, version='1.28', + global_request_id=self.context.global_id)] * 2) + + @mock.patch('time.sleep', new=mock.Mock()) + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.put') + def test_put_allocations_retry_gives_up(self, mock_put): + + failed = fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps( + {'errors': [{'code': 'placement.concurrent_update', + 'detail': ''}]})) + + mock_put.return_value = failed + + rp_uuid = mock.sentinel.rp + consumer_uuid = mock.sentinel.consumer + data = {"MEMORY_MB": 1024} + expected_url = "/allocations/%s" % consumer_uuid + payload = { + "allocations": { + rp_uuid: {"resources": data} + }, + "project_id": mock.sentinel.project_id, + "user_id": mock.sentinel.user_id, + "consumer_generation": mock.sentinel.consumer_generation + } + resp = self.client.put_allocations( + self.context, consumer_uuid, payload) + self.assertFalse(resp) + mock_put.assert_has_calls([ + mock.call(expected_url, payload, version='1.28', + global_request_id=self.context.global_id)] * 3) + + def test_claim_resources_success(self): + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.return_value = { + 'allocations': {}, # build instance, not move + } + self.ks_adap_mock.get.return_value = get_resp_mock + resp_mock = mock.Mock(status_code=204) + self.ks_adap_mock.put.return_value = resp_mock + consumer_uuid = uuids.consumer_uuid + alloc_req = { + 'allocations': { + uuids.cn1: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + } + }, + }, + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.12') + + expected_url = "/allocations/%s" % consumer_uuid + expected_payload = {'allocations': { + rp_uuid: alloc + for rp_uuid, alloc in alloc_req['allocations'].items()}} + expected_payload['project_id'] = project_id + expected_payload['user_id'] = user_id + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.12', json=expected_payload, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + + self.assertTrue(res) + + def test_claim_resources_older_alloc_req(self): + """Test the case when a stale allocation request is sent to the report + client to claim + """ + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.return_value = { + 'allocations': {}, # build instance, not move + } + self.ks_adap_mock.get.return_value = get_resp_mock + resp_mock = mock.Mock(status_code=204) + self.ks_adap_mock.put.return_value = resp_mock + consumer_uuid = uuids.consumer_uuid + alloc_req = { + 'allocations': { + uuids.cn1: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + } + }, + }, + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.12') + + expected_url = "/allocations/%s" % consumer_uuid + expected_payload = { + 'allocations': { + rp_uuid: res + for rp_uuid, res in alloc_req['allocations'].items()}, + # no consumer generation in the payload as the caller requested + # older microversion to be used + 'project_id': project_id, + 'user_id': user_id} + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.12', json=expected_payload, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertTrue(res) + + def test_claim_resources_success_resize_to_same_host_no_shared(self): + """Tests resize to the same host operation. In this case allocation + exists against the same host RP but with the migration_uuid. + """ + get_current_allocations_resp_mock = mock.Mock(status_code=200) + # source host allocation held by the migration_uuid so it is not + # not returned to the claim code as that asks for the instance_uuid + # consumer + get_current_allocations_resp_mock.json.return_value = { + 'allocations': {}, + "consumer_generation": 1, + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + self.ks_adap_mock.get.return_value = get_current_allocations_resp_mock + put_allocations_resp_mock = mock.Mock(status_code=204) + self.ks_adap_mock.put.return_value = put_allocations_resp_mock + consumer_uuid = uuids.consumer_uuid + # This is the resize-up allocation where VCPU, MEMORY_MB and DISK_GB + # are all being increased but on the same host. We also throw a custom + # resource class in the new allocation to make sure it's not lost + alloc_req = { + 'allocations': { + uuids.same_host: { + 'resources': { + 'VCPU': 2, + 'MEMORY_MB': 2048, + 'DISK_GB': 40, + 'CUSTOM_FOO': 1 + } + }, + }, + # this allocation request comes from the scheduler therefore it + # does not have consumer_generation in it. + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + expected_payload = { + 'allocations': { + uuids.same_host: { + 'resources': { + 'VCPU': 2, + 'MEMORY_MB': 2048, + 'DISK_GB': 40, + 'CUSTOM_FOO': 1 + } + }, + }, + # report client assumes a new consumer in this case + 'consumer_generation': None, + 'project_id': project_id, + 'user_id': user_id} + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + + self.assertTrue(res) + + def test_claim_resources_success_resize_to_same_host_with_shared(self): + """Tests resize to the same host operation. In this case allocation + exists against the same host RP and the shared RP but with the + migration_uuid. + """ + get_current_allocations_resp_mock = mock.Mock(status_code=200) + # source host allocation held by the migration_uuid so it is not + # not returned to the claim code as that asks for the instance_uuid + # consumer + get_current_allocations_resp_mock.json.return_value = { + 'allocations': {}, + "consumer_generation": 1, + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + self.ks_adap_mock.get.return_value = get_current_allocations_resp_mock + put_allocations_resp_mock = mock.Mock(status_code=204) + self.ks_adap_mock.put.return_value = put_allocations_resp_mock + consumer_uuid = uuids.consumer_uuid + # This is the resize-up allocation where VCPU, MEMORY_MB and DISK_GB + # are all being increased but on the same host. We also throw a custom + # resource class in the new allocation to make sure it's not lost + alloc_req = { + 'allocations': { + uuids.same_host: { + 'resources': { + 'VCPU': 2, + 'MEMORY_MB': 2048, + 'CUSTOM_FOO': 1 + } + }, + uuids.shared_storage: { + 'resources': { + 'DISK_GB': 40, + } + }, + }, + # this allocation request comes from the scheduler therefore it + # does not have consumer_generation in it. + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + expected_payload = { + 'allocations': { + uuids.same_host: { + 'resources': { + 'VCPU': 2, + 'MEMORY_MB': 2048, + 'CUSTOM_FOO': 1 + } + }, + uuids.shared_storage: { + 'resources': { + 'DISK_GB': 40, + } + }, + }, + # report client assumes a new consumer in this case + 'consumer_generation': None, + 'project_id': project_id, + 'user_id': user_id} + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + + self.assertTrue(res) + + def test_claim_resources_success_evacuate_no_shared(self): + """Tests non-forced evacuate. In this case both the source and the + dest allocation are held by the instance_uuid in placement. So the + claim code needs to merge allocations. The second claim comes from the + scheduler and therefore it does not have consumer_generation in it. + """ + # the source allocation is also held by the instance_uuid so report + # client will see it. + current_allocs = { + 'allocations': { + uuids.source_host: { + 'generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 20 + }, + }, + }, + "consumer_generation": 1, + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + self.ks_adap_mock.get.return_value = fake_requests.FakeResponse( + status_code=200, + content=jsonutils.dumps(current_allocs)) + put_allocations_resp_mock = fake_requests.FakeResponse(status_code=204) + self.ks_adap_mock.put.return_value = put_allocations_resp_mock + consumer_uuid = uuids.consumer_uuid + # this is an evacuate so we have the same resources request towards the + # dest host + alloc_req = { + 'allocations': { + uuids.dest_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 20, + } + }, + }, + # this allocation request comes from the scheduler therefore it + # does not have consumer_generation in it. + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + # we expect that both the source and dest allocations are here + expected_payload = { + 'allocations': { + uuids.source_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 20 + }, + }, + uuids.dest_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 20, + } + }, + }, + # report client uses the consumer_generation that it got from + # placement when asked for the existing allocations + 'consumer_generation': 1, + 'project_id': project_id, + 'user_id': user_id} + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + + self.assertTrue(res) + + def test_claim_resources_success_evacuate_with_shared(self): + """Similar test that test_claim_resources_success_evacuate_no_shared + but adds shared disk into the mix. + """ + # the source allocation is also held by the instance_uuid so report + # client will see it. + current_allocs = { + 'allocations': { + uuids.source_host: { + 'generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.shared_storage: { + 'generation': 42, + 'resources': { + 'DISK_GB': 20, + }, + }, + }, + "consumer_generation": 1, + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + self.ks_adap_mock.get.return_value = fake_requests.FakeResponse( + status_code=200, + content=jsonutils.dumps(current_allocs)) + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse( + status_code=204) + consumer_uuid = uuids.consumer_uuid + # this is an evacuate so we have the same resources request towards the + # dest host + alloc_req = { + 'allocations': { + uuids.dest_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.shared_storage: { + 'generation': 42, + 'resources': { + 'DISK_GB': 20, + }, + }, + }, + # this allocation request comes from the scheduler therefore it + # does not have consumer_generation in it. + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + # we expect that both the source and dest allocations are here plus the + # shared storage allocation + expected_payload = { + 'allocations': { + uuids.source_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.dest_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + } + }, + uuids.shared_storage: { + 'resources': { + 'DISK_GB': 20, + }, + }, + }, + # report client uses the consumer_generation that got from + # placement when asked for the existing allocations + 'consumer_generation': 1, + 'project_id': project_id, + 'user_id': user_id} + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + + self.assertTrue(res) + + def test_claim_resources_success_force_evacuate_no_shared(self): + """Tests forced evacuate. In this case both the source and the + dest allocation are held by the instance_uuid in placement. So the + claim code needs to merge allocations. The second claim comes from the + conductor and therefore it does have consumer_generation in it. + """ + # the source allocation is also held by the instance_uuid so report + # client will see it. + current_allocs = { + 'allocations': { + uuids.source_host: { + 'generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 20 + }, + }, + }, + "consumer_generation": 1, + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + self.ks_adap_mock.get.return_value = fake_requests.FakeResponse( + status_code=200, + content=jsonutils.dumps(current_allocs)) + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse( + status_code=204) + consumer_uuid = uuids.consumer_uuid + # this is an evacuate so we have the same resources request towards the + # dest host + alloc_req = { + 'allocations': { + uuids.dest_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 20, + } + }, + }, + # this allocation request comes from the conductor that read the + # allocation from placement therefore it has consumer_generation in + # it. + "consumer_generation": 1, + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + # we expect that both the source and dest allocations are here + expected_payload = { + 'allocations': { + uuids.source_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 20 + }, + }, + uuids.dest_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + 'DISK_GB': 20, + } + }, + }, + # report client uses the consumer_generation that it got in the + # allocation request + 'consumer_generation': 1, + 'project_id': project_id, + 'user_id': user_id} + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + + self.assertTrue(res) + + def test_claim_resources_success_force_evacuate_with_shared(self): + """Similar test that + test_claim_resources_success_force_evacuate_no_shared but adds shared + disk into the mix. + """ + # the source allocation is also held by the instance_uuid so report + # client will see it. + current_allocs = { + 'allocations': { + uuids.source_host: { + 'generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.shared_storage: { + 'generation': 42, + 'resources': { + 'DISK_GB': 20, + }, + }, + }, + "consumer_generation": 1, + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + self.ks_adap_mock.get.return_value = fake_requests.FakeResponse( + status_code=200, + content=jsonutils.dumps(current_allocs)) + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse( + status_code=204) + consumer_uuid = uuids.consumer_uuid + # this is an evacuate so we have the same resources request towards the + # dest host + alloc_req = { + 'allocations': { + uuids.dest_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.shared_storage: { + 'generation': 42, + 'resources': { + 'DISK_GB': 20, + }, + }, + }, + # this allocation request comes from the conductor that read the + # allocation from placement therefore it has consumer_generation in + # it. + "consumer_generation": 1, + "project_id": uuids.project_id, + "user_id": uuids.user_id + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + # we expect that both the source and dest allocations are here plus the + # shared storage allocation + expected_payload = { + 'allocations': { + uuids.source_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.dest_host: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + } + }, + uuids.shared_storage: { + 'resources': { + 'DISK_GB': 20, + }, + }, + }, + # report client uses the consumer_generation that it got in the + # allocation request + 'consumer_generation': 1, + 'project_id': project_id, + 'user_id': user_id} + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + + self.assertTrue(res) + + @mock.patch('time.sleep', new=mock.Mock()) + def test_claim_resources_fail_due_to_rp_generation_retry_success(self): + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.return_value = { + 'allocations': {}, # build instance, not move + } + self.ks_adap_mock.get.return_value = get_resp_mock + resp_mocks = [ + fake_requests.FakeResponse( + 409, + jsonutils.dumps( + {'errors': [ + {'code': 'placement.concurrent_update', + 'detail': ''}]})), + fake_requests.FakeResponse(204) + ] + self.ks_adap_mock.put.side_effect = resp_mocks + consumer_uuid = uuids.consumer_uuid + alloc_req = { + 'allocations': { + uuids.cn1: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + } + }, + }, + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + expected_payload = { + 'allocations': + {rp_uuid: res + for rp_uuid, res in alloc_req['allocations'].items()} + } + expected_payload['project_id'] = project_id + expected_payload['user_id'] = user_id + expected_payload['consumer_generation'] = None + # We should have exactly two calls to the placement API that look + # identical since we're retrying the same HTTP request + expected_calls = [ + mock.call(expected_url, microversion='1.28', json=expected_payload, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': + self.context.global_id})] * 2 + self.assertEqual(len(expected_calls), + self.ks_adap_mock.put.call_count) + self.ks_adap_mock.put.assert_has_calls(expected_calls) + + self.assertTrue(res) + + @mock.patch.object(report.LOG, 'warning') + def test_claim_resources_failure(self, mock_log): + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.return_value = { + 'allocations': {}, # build instance, not move + } + self.ks_adap_mock.get.return_value = get_resp_mock + resp_mock = fake_requests.FakeResponse( + 409, + jsonutils.dumps( + {'errors': [ + {'code': 'something else', + 'detail': 'not cool'}]})) + + self.ks_adap_mock.put.return_value = resp_mock + consumer_uuid = uuids.consumer_uuid + alloc_req = { + 'allocations': { + uuids.cn1: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + } + }, + }, + } + + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.claim_resources(self.context, consumer_uuid, + alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + expected_payload = { + 'allocations': + {rp_uuid: res + for rp_uuid, res in alloc_req['allocations'].items()} + } + expected_payload['project_id'] = project_id + expected_payload['user_id'] = user_id + expected_payload['consumer_generation'] = None + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=expected_payload, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + + self.assertFalse(res) + self.assertTrue(mock_log.called) + + def test_claim_resources_consumer_generation_failure(self): + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.return_value = { + 'allocations': {}, # build instance, not move + } + self.ks_adap_mock.get.return_value = get_resp_mock + resp_mock = fake_requests.FakeResponse( + 409, + jsonutils.dumps( + {'errors': [ + {'code': 'placement.concurrent_update', + 'detail': 'consumer generation conflict'}]})) + + self.ks_adap_mock.put.return_value = resp_mock + consumer_uuid = uuids.consumer_uuid + alloc_req = { + 'allocations': { + uuids.cn1: { + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + } + }, + }, + } + + project_id = uuids.project_id + user_id = uuids.user_id + self.assertRaises(exception.AllocationUpdateFailed, + self.client.claim_resources, self.context, + consumer_uuid, alloc_req, project_id, user_id, + allocation_request_version='1.28') + + expected_url = "/allocations/%s" % consumer_uuid + expected_payload = { + 'allocations': { + rp_uuid: res + for rp_uuid, res in alloc_req['allocations'].items()}, + 'project_id': project_id, + 'user_id': user_id, + 'consumer_generation': None} + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=expected_payload, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + + def test_remove_provider_from_inst_alloc_no_shared(self): + """Tests that the method which manipulates an existing doubled-up + allocation for a move operation to remove the source host results in + sending placement the proper payload to PUT + /allocations/{consumer_uuid} call. + """ + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.side_effect = [{ + 'allocations': { + uuids.source: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.destination: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + }, + 'consumer_generation': 1, + 'project_id': uuids.project_id, + 'user_id': uuids.user_id, + }, + # the second get is for resource providers in the compute tree, + # return just the compute + { + "resource_providers": [ + { + "uuid": uuids.source, + }, + ] + }, + ] + self.ks_adap_mock.get.return_value = get_resp_mock + resp_mock = mock.Mock(status_code=204) + self.ks_adap_mock.put.return_value = resp_mock + consumer_uuid = uuids.consumer_uuid + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.remove_provider_tree_from_container_allocation( + self.context, consumer_uuid, uuids.source) + + expected_url = "/allocations/%s" % consumer_uuid + # New allocations should only include the destination... + expected_payload = { + 'allocations': { + uuids.destination: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + }, + 'consumer_generation': 1, + 'project_id': project_id, + 'user_id': user_id + } + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + + self.assertTrue(res) + + def test_remove_provider_from_inst_alloc_with_shared(self): + """Tests that the method which manipulates an existing doubled-up + allocation with DISK_GB being consumed from a shared storage provider + for a move operation to remove the source host results in sending + placement the proper payload to PUT /allocations/{consumer_uuid} + call. + """ + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.side_effect = [{ + 'allocations': { + uuids.source: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.shared_storage: { + 'resource_provider_generation': 42, + 'resources': { + 'DISK_GB': 100, + }, + }, + uuids.destination: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + }, + 'consumer_generation': 1, + 'project_id': uuids.project_id, + 'user_id': uuids.user_id, + }, + # the second get is for resource providers in the compute tree, + # return just the compute + { + "resource_providers": [ + { + "uuid": uuids.source, + }, + ] + }, + ] + self.ks_adap_mock.get.return_value = get_resp_mock + resp_mock = mock.Mock(status_code=204) + self.ks_adap_mock.put.return_value = resp_mock + consumer_uuid = uuids.consumer_uuid + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.remove_provider_tree_from_container_allocation( + self.context, consumer_uuid, uuids.source) + + expected_url = "/allocations/%s" % consumer_uuid + # New allocations should only include the destination... + expected_payload = { + 'allocations': { + uuids.shared_storage: { + 'resource_provider_generation': 42, + 'resources': { + 'DISK_GB': 100, + }, + }, + uuids.destination: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + }, + 'consumer_generation': 1, + 'project_id': project_id, + 'user_id': user_id + } + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + + self.assertTrue(res) + + def test_remove_provider_from_inst_alloc_no_source(self): + """Tests that if remove_provider_tree_from_container_allocation() fails + to find any allocations for the source host, it just returns True and + does not attempt to rewrite the allocation for the consumer. + """ + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.side_effect = [{ + # Act like the allocations already did not include the source host + # for some reason + 'allocations': { + uuids.shared_storage: { + 'resource_provider_generation': 42, + 'resources': { + 'DISK_GB': 100, + }, + }, + uuids.destination: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + }, + 'consumer_generation': 1, + 'project_id': uuids.project_id, + 'user_id': uuids.user_id, + }, + # the second get is for resource providers in the compute tree, + # return just the compute + { + "resource_providers": [ + { + "uuid": uuids.source, + }, + ] + }, + ] + self.ks_adap_mock.get.return_value = get_resp_mock + consumer_uuid = uuids.consumer_uuid + res = self.client.remove_provider_tree_from_container_allocation( + self.context, consumer_uuid, uuids.source) + + self.ks_adap_mock.get.assert_called() + self.ks_adap_mock.put.assert_not_called() + + self.assertTrue(res) + + def test_remove_provider_from_inst_alloc_fail_get_allocs(self): + self.ks_adap_mock.get.return_value = fake_requests.FakeResponse( + status_code=500) + consumer_uuid = uuids.consumer_uuid + self.assertRaises( + exception.ConsumerAllocationRetrievalFailed, + self.client.remove_provider_tree_from_container_allocation, + self.context, consumer_uuid, uuids.source) + + self.ks_adap_mock.get.assert_called() + self.ks_adap_mock.put.assert_not_called() + + def test_remove_provider_from_inst_alloc_consumer_gen_conflict(self): + get_resp_mock = mock.Mock(status_code=200) + get_resp_mock.json.side_effect = [{ + 'allocations': { + uuids.source: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.destination: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + }, + 'consumer_generation': 1, + 'project_id': uuids.project_id, + 'user_id': uuids.user_id, + }, + # the second get is for resource providers in the compute tree, + # return just the compute + { + "resource_providers": [ + { + "uuid": uuids.source, + }, + ] + }, + ] + self.ks_adap_mock.get.return_value = get_resp_mock + resp_mock = mock.Mock(status_code=409) + self.ks_adap_mock.put.return_value = resp_mock + consumer_uuid = uuids.consumer_uuid + res = self.client.remove_provider_tree_from_container_allocation( + self.context, consumer_uuid, uuids.source) + + self.assertFalse(res) + + def test_remove_provider_tree_from_inst_alloc_nested(self): + self.ks_adap_mock.get.side_effect = [ + fake_requests.FakeResponse( + status_code=200, + content=jsonutils.dumps( + { + 'allocations': { + uuids.source_compute: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + uuids.source_nested: { + 'resource_provider_generation': 42, + 'resources': { + 'CUSTOM_MAGIC': 1 + }, + }, + uuids.destination: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + }, + 'consumer_generation': 1, + 'project_id': uuids.project_id, + 'user_id': uuids.user_id, + })), + # the second get is for resource providers in the compute tree, + # return both RPs in the source compute tree + fake_requests.FakeResponse( + status_code=200, + content=jsonutils.dumps( + { + "resource_providers": [ + { + "uuid": uuids.source_compute, + }, + { + "uuid": uuids.source_nested, + }, + ] + })) + ] + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse( + status_code=204) + consumer_uuid = uuids.consumer_uuid + project_id = uuids.project_id + user_id = uuids.user_id + res = self.client.remove_provider_tree_from_container_allocation( + self.context, consumer_uuid, uuids.source_compute) + + expected_url = "/allocations/%s" % consumer_uuid + # New allocations should only include the destination... + expected_payload = { + 'allocations': { + uuids.destination: { + 'resource_provider_generation': 42, + 'resources': { + 'VCPU': 1, + 'MEMORY_MB': 1024, + }, + }, + }, + 'consumer_generation': 1, + 'project_id': project_id, + 'user_id': user_id + } + + self.assertEqual( + [ + mock.call( + '/allocations/%s' % consumer_uuid, + endpoint_filter=mock.ANY, + headers=mock.ANY, + microversion='1.28' + ), + mock.call( + '/resource_providers?in_tree=%s' % uuids.source_compute, + headers=mock.ANY, + endpoint_filter=mock.ANY, + microversion='1.14' + ) + ], + self.ks_adap_mock.get.mock_calls) + + # We have to pull the json body from the mock call_args to validate + # it separately otherwise hash seed issues get in the way. + actual_payload = self.ks_adap_mock.put.call_args[1]['json'] + self.assertEqual(expected_payload, actual_payload) + self.ks_adap_mock.put.assert_called_once_with( + expected_url, microversion='1.28', json=mock.ANY, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + + self.assertTrue(res) + + +class TestMoveAllocations(SchedulerReportClientTestCase): + + def setUp(self): + super(TestMoveAllocations, self).setUp() + # We want to reuse the mock throughout the class, but with + # different return values. + patcher = mock.patch( + 'zun.scheduler.client.report.SchedulerReportClient.post') + self.mock_post = patcher.start() + self.addCleanup(patcher.stop) + self.mock_post.return_value.status_code = 204 + self.rp_uuid = mock.sentinel.rp + self.consumer_uuid = mock.sentinel.consumer + self.data = {"MEMORY_MB": 1024} + patcher = mock.patch( + 'zun.scheduler.client.report.SchedulerReportClient.get') + self.mock_get = patcher.start() + self.addCleanup(patcher.stop) + + self.project_id = mock.sentinel.project_id + self.user_id = mock.sentinel.user_id + + self.mock_post.return_value.status_code = 204 + self.rp_uuid = mock.sentinel.rp + self.source_consumer_uuid = mock.sentinel.source_consumer + self.target_consumer_uuid = mock.sentinel.target_consumer + self.source_consumer_data = { + "allocations": { + self.rp_uuid: { + "generation": 1, + "resources": { + "MEMORY_MB": 1024 + } + } + }, + "consumer_generation": 2, + "project_id": self.project_id, + "user_id": self.user_id + } + self.source_rsp = mock.Mock() + self.source_rsp.json.return_value = self.source_consumer_data + self.target_consumer_data = { + "allocations": { + self.rp_uuid: { + "generation": 1, + "resources": { + "MEMORY_MB": 2048 + } + } + }, + "consumer_generation": 1, + "project_id": self.project_id, + "user_id": self.user_id + } + self.target_rsp = mock.Mock() + self.target_rsp.json.return_value = self.target_consumer_data + self.mock_get.side_effect = [self.source_rsp, self.target_rsp] + self.expected_url = '/allocations' + self.expected_microversion = '1.28' + + def test_url_microversion(self): + resp = self.client.move_allocations( + self.context, self.source_consumer_uuid, self.target_consumer_uuid) + + self.assertTrue(resp) + self.mock_post.assert_called_once_with( + self.expected_url, mock.ANY, + version=self.expected_microversion, + global_request_id=self.context.global_id) + + def test_move_to_empty_target(self): + self.target_consumer_data = {"allocations": {}} + target_rsp = mock.Mock() + target_rsp.json.return_value = self.target_consumer_data + self.mock_get.side_effect = [self.source_rsp, target_rsp] + + expected_payload = { + self.target_consumer_uuid: { + "allocations": { + self.rp_uuid: { + "resources": { + "MEMORY_MB": 1024 + }, + "generation": 1 + } + }, + "consumer_generation": None, + "project_id": self.project_id, + "user_id": self.user_id, + }, + self.source_consumer_uuid: { + "allocations": {}, + "consumer_generation": 2, + "project_id": self.project_id, + "user_id": self.user_id, + } + } + + resp = self.client.move_allocations( + self.context, self.source_consumer_uuid, self.target_consumer_uuid) + + self.assertTrue(resp) + self.mock_post.assert_called_once_with( + self.expected_url, expected_payload, + version=self.expected_microversion, + global_request_id=self.context.global_id) + + @mock.patch('zun.scheduler.client.report.LOG.info') + def test_move_from_empty_source(self, mock_info): + """Tests the case that the target has allocations but the source does + not so the move_allocations method assumes the allocations were already + moved and returns True without trying to POST /allocations. + """ + source_consumer_data = {"allocations": {}} + source_rsp = mock.Mock() + source_rsp.json.return_value = source_consumer_data + self.mock_get.side_effect = [source_rsp, self.target_rsp] + + resp = self.client.move_allocations( + self.context, self.source_consumer_uuid, self.target_consumer_uuid) + + self.assertTrue(resp) + self.mock_post.assert_not_called() + mock_info.assert_called_once() + self.assertIn('Allocations not found for consumer', + mock_info.call_args[0][0]) + + def test_move_to_non_empty_target(self): + self.mock_get.side_effect = [self.source_rsp, self.target_rsp] + + expected_payload = { + self.target_consumer_uuid: { + "allocations": { + self.rp_uuid: { + "resources": { + "MEMORY_MB": 1024 + }, + "generation": 1 + } + }, + "consumer_generation": 1, + "project_id": self.project_id, + "user_id": self.user_id, + }, + self.source_consumer_uuid: { + "allocations": {}, + "consumer_generation": 2, + "project_id": self.project_id, + "user_id": self.user_id, + } + } + + with mock.patch('zun.scheduler.client.report.LOG') as mock_log: + resp = self.client.move_allocations( + self.context, self.source_consumer_uuid, + self.target_consumer_uuid) + + self.assertTrue(resp) + self.mock_post.assert_called_once_with( + self.expected_url, expected_payload, + version=self.expected_microversion, + global_request_id=self.context.global_id) + self.assertIn('Overwriting current allocation', + mock_log.debug.call_args_list[0][0][0]) + + @mock.patch('time.sleep') + def test_409_concurrent_provider_update(self, mock_sleep): + # there will be 1 normal call and 3 retries + self.mock_get.side_effect = [self.source_rsp, self.target_rsp, + self.source_rsp, self.target_rsp, + self.source_rsp, self.target_rsp, + self.source_rsp, self.target_rsp] + rsp = fake_requests.FakeResponse( + 409, + jsonutils.dumps( + {'errors': [ + {'code': 'placement.concurrent_update', + 'detail': ''}]})) + + self.mock_post.return_value = rsp + + resp = self.client.move_allocations( + self.context, self.source_consumer_uuid, self.target_consumer_uuid) + + self.assertFalse(resp) + # Post was attempted four times. + self.assertEqual(4, self.mock_post.call_count) + + @mock.patch('zun.scheduler.client.report.LOG.warning') + def test_not_409_failure(self, mock_log): + error_message = 'placement not there' + self.mock_post.return_value.status_code = 503 + self.mock_post.return_value.text = error_message + + resp = self.client.move_allocations( + self.context, self.source_consumer_uuid, self.target_consumer_uuid) + + self.assertFalse(resp) + args, kwargs = mock_log.call_args + log_message = args[0] + log_args = args[1] + self.assertIn('Unable to post allocations', log_message) + self.assertEqual(error_message, log_args['text']) + + def test_409_concurrent_consumer_update(self): + self.mock_post.return_value = fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps( + {'errors': [{'code': 'placement.concurrent_update', + 'detail': 'consumer generation conflict'}]})) + + self.assertRaises( + exception.AllocationMoveFailed, + self.client.move_allocations, self.context, + self.source_consumer_uuid, self.target_consumer_uuid) + + +class TestProviderOperations(SchedulerReportClientTestCase): + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_create_resource_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_inventory') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_traits') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_sharing_providers') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_providers_in_tree') + def test_ensure_resource_provider_get( + self, get_rpt_mock, get_shr_mock, get_trait_mock, get_agg_mock, + get_inv_mock, create_rp_mock): + # No resource provider exists in the client's cache, so validate that + # if we get the resource provider from the placement API that we don't + # try to create the resource provider. + get_rpt_mock.return_value = [{ + 'uuid': uuids.compute_node, + 'name': mock.sentinel.name, + 'generation': 1, + }] + + get_inv_mock.return_value = None + get_agg_mock.return_value = report.AggInfo( + aggregates=set([uuids.agg1]), generation=42) + get_trait_mock.return_value = report.TraitInfo( + traits=set(['CUSTOM_GOLD']), generation=43) + get_shr_mock.return_value = [] + + def assert_cache_contents(): + self.assertTrue( + self.client._provider_tree.exists(uuids.compute_node)) + self.assertTrue( + self.client._provider_tree.in_aggregates(uuids.compute_node, + [uuids.agg1])) + self.assertFalse( + self.client._provider_tree.in_aggregates(uuids.compute_node, + [uuids.agg2])) + self.assertTrue( + self.client._provider_tree.has_traits(uuids.compute_node, + ['CUSTOM_GOLD'])) + self.assertFalse( + self.client._provider_tree.has_traits(uuids.compute_node, + ['CUSTOM_SILVER'])) + data = self.client._provider_tree.data(uuids.compute_node) + self.assertEqual(43, data.generation) + + self.client._ensure_resource_provider(self.context, uuids.compute_node) + + assert_cache_contents() + get_rpt_mock.assert_called_once_with(self.context, uuids.compute_node) + get_agg_mock.assert_called_once_with(self.context, uuids.compute_node) + get_trait_mock.assert_called_once_with(self.context, + uuids.compute_node) + get_shr_mock.assert_called_once_with(self.context, set([uuids.agg1])) + self.assertFalse(create_rp_mock.called) + + # Now that the cache is populated, a subsequent call should be a no-op. + get_rpt_mock.reset_mock() + get_agg_mock.reset_mock() + get_trait_mock.reset_mock() + get_shr_mock.reset_mock() + + self.client._ensure_resource_provider(self.context, uuids.compute_node) + + assert_cache_contents() + get_rpt_mock.assert_not_called() + get_agg_mock.assert_not_called() + get_trait_mock.assert_not_called() + get_shr_mock.assert_not_called() + create_rp_mock.assert_not_called() + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_create_resource_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_refresh_associations') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_providers_in_tree') + def test_ensure_resource_provider_create_fail( + self, get_rpt_mock, refresh_mock, create_rp_mock): + # No resource provider exists in the client's cache, and + # _create_provider raises, indicating there was an error with the + # create call. Ensure we don't populate the resource provider cache + get_rpt_mock.return_value = [] + create_rp_mock.side_effect = exception.ResourceProviderCreationFailed( + name=uuids.compute_node) + + self.assertRaises( + exception.ResourceProviderCreationFailed, + self.client._ensure_resource_provider, self.context, + uuids.compute_node) + + get_rpt_mock.assert_called_once_with(self.context, uuids.compute_node) + create_rp_mock.assert_called_once_with( + self.context, uuids.compute_node, uuids.compute_node, + parent_provider_uuid=None) + self.assertFalse(self.client._provider_tree.exists(uuids.compute_node)) + self.assertFalse(refresh_mock.called) + self.assertRaises( + ValueError, + self.client._provider_tree.in_aggregates, uuids.compute_node, []) + self.assertRaises( + ValueError, + self.client._provider_tree.has_traits, uuids.compute_node, []) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_create_resource_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_refresh_associations') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_providers_in_tree') + def test_ensure_resource_provider_create_no_placement( + self, get_rpt_mock, refresh_mock, create_rp_mock): + create_rp_mock.side_effect = ks_exc.EndpointNotFound('test') + # No resource provider exists in the client's cache, and + # _create_resource_provider raise because + # Placement isn't running yet. Ensure we don't populate the resource + # provider cache. + get_rpt_mock.return_value = [] + + self.assertRaises( + exception.ResourceProviderCreationFailed, + self.client._ensure_resource_provider, self.context, + uuids.compute_node) + + get_rpt_mock.assert_called_once_with(self.context, uuids.compute_node) + create_rp_mock.assert_called_once_with( + self.context, uuids.compute_node, uuids.compute_node, + parent_provider_uuid=None) + self.assertFalse(self.client._provider_tree.exists(uuids.compute_node)) + refresh_mock.assert_not_called() + self.assertRaises( + ValueError, + self.client._provider_tree.in_aggregates, uuids.compute_node, []) + self.assertRaises( + ValueError, + self.client._provider_tree.has_traits, uuids.compute_node, []) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_create_resource_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_refresh_and_get_inventory') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_refresh_associations') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_providers_in_tree') + def test_ensure_resource_provider_create(self, get_rpt_mock, + refresh_inv_mock, + refresh_assoc_mock, + create_rp_mock): + # No resource provider exists in the client's cache and no resource + # provider was returned from the placement API, so verify that in this + # case we try to create the resource provider via the placement API. + get_rpt_mock.return_value = [] + create_rp_mock.return_value = { + 'uuid': uuids.compute_node, + 'name': 'compute-name', + 'generation': 1, + } + self.assertEqual( + uuids.compute_node, + self.client._ensure_resource_provider(self.context, + uuids.compute_node)) + self._validate_provider(uuids.compute_node, name='compute-name', + generation=1, parent_uuid=None, + aggregates=set(), traits=set()) + + # We don't refresh for a just-created provider + refresh_inv_mock.assert_not_called() + refresh_assoc_mock.assert_not_called() + get_rpt_mock.assert_called_once_with(self.context, uuids.compute_node) + create_rp_mock.assert_called_once_with( + self.context, + uuids.compute_node, + uuids.compute_node, # name param defaults to UUID if None + parent_provider_uuid=None, + ) + self.assertTrue(self.client._provider_tree.exists(uuids.compute_node)) + + create_rp_mock.reset_mock() + + # Validate the path where we specify a name (don't default to the UUID) + self.client._ensure_resource_provider( + self.context, uuids.cn2, 'a-name') + create_rp_mock.assert_called_once_with( + self.context, uuids.cn2, 'a-name', parent_provider_uuid=None) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_refresh_associations') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_create_resource_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_providers_in_tree') + def test_ensure_resource_provider_tree(self, get_rpt_mock, create_rp_mock, + refresh_mock): + """Test _ensure_resource_provider with a tree of providers.""" + def _create_resource_provider(context, uuid, name, + parent_provider_uuid=None): + """Mock side effect for creating the RP with the specified args.""" + return { + 'uuid': uuid, + 'name': name, + 'generation': 0, + 'parent_provider_uuid': parent_provider_uuid + } + create_rp_mock.side_effect = _create_resource_provider + + # We at least have to simulate the part of _refresh_associations that + # marks a provider as 'seen' + def mocked_refresh(context, rp_uuid, **kwargs): + self.client._association_refresh_time[rp_uuid] = time.time() + refresh_mock.side_effect = mocked_refresh + + # Not initially in the placement database, so we have to create it. + get_rpt_mock.return_value = [] + + # Create the root + root = self.client._ensure_resource_provider(self.context, uuids.root) + self.assertEqual(uuids.root, root) + + # Now create a child + child1 = self.client._ensure_resource_provider( + self.context, uuids.child1, name='junior', + parent_provider_uuid=uuids.root) + self.assertEqual(uuids.child1, child1) + + # If we re-ensure the child, we get the object from the tree, not a + # newly-created one - i.e. the early .find() works like it should. + self.assertIs(child1, + self.client._ensure_resource_provider(self.context, + uuids.child1)) + + # Make sure we can create a grandchild + grandchild = self.client._ensure_resource_provider( + self.context, uuids.grandchild, + parent_provider_uuid=uuids.child1) + self.assertEqual(uuids.grandchild, grandchild) + + # Now create a second child of the root and make sure it doesn't wind + # up in some crazy wrong place like under child1 or grandchild + child2 = self.client._ensure_resource_provider( + self.context, uuids.child2, parent_provider_uuid=uuids.root) + self.assertEqual(uuids.child2, child2) + + all_rp_uuids = [uuids.root, uuids.child1, uuids.child2, + uuids.grandchild] + + # At this point we should get all the providers. + self.assertEqual( + set(all_rp_uuids), + set(self.client._provider_tree.get_provider_uuids())) + + # And now _ensure is a no-op because everything is cached + get_rpt_mock.reset_mock() + create_rp_mock.reset_mock() + refresh_mock.reset_mock() + + for rp_uuid in all_rp_uuids: + self.client._ensure_resource_provider(self.context, rp_uuid) + get_rpt_mock.assert_not_called() + create_rp_mock.assert_not_called() + refresh_mock.assert_not_called() + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_providers_in_tree') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_refresh_associations') + def test_ensure_resource_provider_refresh_fetch(self, mock_ref_assoc, + mock_gpit): + """Make sure refreshes are called with the appropriate UUIDs and flags + when we fetch the provider tree from placement. + """ + tree_uuids = set([uuids.root, uuids.one, uuids.two]) + mock_gpit.return_value = [{'uuid': u, 'name': u, 'generation': 42} + for u in tree_uuids] + self.assertEqual(uuids.root, + self.client._ensure_resource_provider(self.context, + uuids.root)) + mock_gpit.assert_called_once_with(self.context, uuids.root) + mock_ref_assoc.assert_has_calls( + [mock.call(self.context, uuid, force=True) + for uuid in tree_uuids]) + self.assertEqual(tree_uuids, + set(self.client._provider_tree.get_provider_uuids())) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_providers_in_tree') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_create_resource_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_refresh_associations') + def test_ensure_resource_provider_refresh_create( + self, mock_refresh, mock_create, mock_gpit): + """Make sure refresh is not called when we create the RP.""" + mock_gpit.return_value = [] + mock_create.return_value = {'name': 'cn', 'uuid': uuids.cn, + 'generation': 42} + self.assertEqual(uuids.root, + self.client._ensure_resource_provider(self.context, + uuids.root)) + mock_gpit.assert_called_once_with(self.context, uuids.root) + mock_create.assert_called_once_with(self.context, uuids.root, + uuids.root, + parent_provider_uuid=None) + mock_refresh.assert_not_called() + self.assertEqual([uuids.cn], + self.client._provider_tree.get_provider_uuids()) + + def test_get_allocation_candidates(self): + resp_mock = mock.Mock(status_code=200) + json_data = { + 'allocation_requests': mock.sentinel.alloc_reqs, + 'provider_summaries': mock.sentinel.p_sums, + } + resources = scheduler_utils.ResourceRequest.from_extra_specs({ + 'resources:VCPU': '1', + 'resources:MEMORY_MB': '1024', + 'trait:HW_CPU_X86_AVX': 'required', + 'trait:CUSTOM_TRAIT1': 'required', + 'trait:CUSTOM_TRAIT2': 'preferred', + 'trait:CUSTOM_TRAIT3': 'forbidden', + 'trait:CUSTOM_TRAIT4': 'forbidden', + 'resources1:DISK_GB': '30', + 'trait1:STORAGE_DISK_SSD': 'required', + 'resources2:VGPU': '2', + 'trait2:HW_GPU_RESOLUTION_W2560H1600': 'required', + 'trait2:HW_GPU_API_VULKAN': 'required', + 'resources3:SRIOV_NET_VF': '1', + 'resources3:CUSTOM_NET_EGRESS_BYTES_SEC': '125000', + 'group_policy': 'isolate', + # These are ignored because misspelled, bad value, etc. + 'resources02:CUSTOM_WIDGET': '123', + 'trait:HW_NIC_OFFLOAD_LRO': 'preferred', + 'group_policy3': 'none', + }) + resources.get_request_group(None).aggregates = [ + ['agg1', 'agg2', 'agg3'], ['agg1', 'agg2']] + expected_path = '/allocation_candidates' + expected_query = [ + ('group_policy', 'isolate'), + ('limit', '1000'), + ('member_of', 'in:agg1,agg2'), + ('member_of', 'in:agg1,agg2,agg3'), + ('required', 'CUSTOM_TRAIT1,HW_CPU_X86_AVX,!CUSTOM_TRAIT3,' + '!CUSTOM_TRAIT4'), + ('required1', 'STORAGE_DISK_SSD'), + ('required2', 'HW_GPU_API_VULKAN,HW_GPU_RESOLUTION_W2560H1600'), + ('resources', 'MEMORY_MB:1024,VCPU:1'), + ('resources1', 'DISK_GB:30'), + ('resources2', 'VGPU:2'), + ('resources3', 'CUSTOM_NET_EGRESS_BYTES_SEC:125000,SRIOV_NET_VF:1') + ] + + resp_mock.json.return_value = json_data + self.ks_adap_mock.get.return_value = resp_mock + + alloc_reqs, p_sums, allocation_request_version = ( + self.client.get_allocation_candidates(self.context, resources)) + + url = self.ks_adap_mock.get.call_args[0][0] + split_url = parse.urlsplit(url) + query = parse.parse_qsl(split_url.query) + self.assertEqual(expected_path, split_url.path) + self.assertEqual(expected_query, query) + expected_url = '/allocation_candidates?%s' % parse.urlencode( + expected_query) + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.31', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertEqual(mock.sentinel.alloc_reqs, alloc_reqs) + self.assertEqual(mock.sentinel.p_sums, p_sums) + + def test_get_ac_no_trait_bogus_group_policy_custom_limit(self): + CONF.set_override('max_placement_results', 42, group='scheduler') + resp_mock = mock.Mock(status_code=200) + json_data = { + 'allocation_requests': mock.sentinel.alloc_reqs, + 'provider_summaries': mock.sentinel.p_sums, + } + resources = scheduler_utils.ResourceRequest.from_extra_specs({ + 'resources:VCPU': '1', + 'resources:MEMORY_MB': '1024', + 'resources1:DISK_GB': '30', + 'group_policy': 'bogus', + }) + expected_path = '/allocation_candidates' + expected_query = [ + ('limit', '42'), + ('resources', 'MEMORY_MB:1024,VCPU:1'), + ('resources1', 'DISK_GB:30'), + ] + + resp_mock.json.return_value = json_data + self.ks_adap_mock.get.return_value = resp_mock + + alloc_reqs, p_sums, allocation_request_version = ( + self.client.get_allocation_candidates(self.context, resources)) + + url = self.ks_adap_mock.get.call_args[0][0] + split_url = parse.urlsplit(url) + query = parse.parse_qsl(split_url.query) + self.assertEqual(expected_path, split_url.path) + self.assertEqual(expected_query, query) + expected_url = '/allocation_candidates?%s' % parse.urlencode( + expected_query) + self.assertEqual(mock.sentinel.alloc_reqs, alloc_reqs) + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.31', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertEqual(mock.sentinel.p_sums, p_sums) + + def test_get_allocation_candidates_not_found(self): + # Ensure _get_resource_provider() just returns None when the placement + # API doesn't find a resource provider matching a UUID + resp_mock = mock.Mock(status_code=404) + self.ks_adap_mock.get.return_value = resp_mock + expected_path = '/allocation_candidates' + expected_query = {'resources': ['MEMORY_MB:1024'], + 'limit': ['100']} + + # Make sure we're also honoring the configured limit + CONF.set_override('max_placement_results', 100, group='scheduler') + + resources = scheduler_utils.ResourceRequest.from_extra_specs( + {'resources:MEMORY_MB': '1024'}) + + res = self.client.get_allocation_candidates(self.context, resources) + + self.ks_adap_mock.get.assert_called_once_with( + mock.ANY, microversion='1.31', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + url = self.ks_adap_mock.get.call_args[0][0] + split_url = parse.urlsplit(url) + query = parse.parse_qs(split_url.query) + self.assertEqual(expected_path, split_url.path) + self.assertEqual(expected_query, query) + self.assertIsNone(res[0]) + + def test_get_resource_provider_found(self): + # Ensure _get_resource_provider() returns a dict of resource provider + # if it finds a resource provider record from the placement API + uuid = uuids.compute_node + resp_mock = mock.Mock(status_code=200) + json_data = { + 'uuid': uuid, + 'name': uuid, + 'generation': 42, + 'parent_provider_uuid': None, + } + resp_mock.json.return_value = json_data + self.ks_adap_mock.get.return_value = resp_mock + + result = self.client._get_resource_provider(self.context, uuid) + + expected_provider_dict = dict( + uuid=uuid, + name=uuid, + generation=42, + parent_provider_uuid=None, + ) + expected_url = '/resource_providers/' + uuid + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.14', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertEqual(expected_provider_dict, result) + + def test_get_resource_provider_not_found(self): + # Ensure _get_resource_provider() just returns None when the placement + # API doesn't find a resource provider matching a UUID + resp_mock = mock.Mock(status_code=404) + self.ks_adap_mock.get.return_value = resp_mock + + uuid = uuids.compute_node + result = self.client._get_resource_provider(self.context, uuid) + + expected_url = '/resource_providers/' + uuid + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.14', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertIsNone(result) + + @mock.patch.object(report.LOG, 'error') + def test_get_resource_provider_error(self, logging_mock): + # Ensure _get_resource_provider() sets the error flag when trying to + # communicate with the placement API and not getting an error we can + # deal with + resp_mock = mock.Mock(status_code=503) + self.ks_adap_mock.get.return_value = resp_mock + self.ks_adap_mock.get.return_value.headers = { + 'x-openstack-request-id': uuids.request_id} + + uuid = uuids.compute_node + self.assertRaises( + exception.ResourceProviderRetrievalFailed, + self.client._get_resource_provider, self.context, uuid) + + expected_url = '/resource_providers/' + uuid + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.14', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # A 503 Service Unavailable should trigger an error log that + # includes the placement request id and return None + # from _get_resource_provider() + self.assertTrue(logging_mock.called) + self.assertEqual(uuids.request_id, + logging_mock.call_args[0][1]['placement_req_id']) + + def test_get_sharing_providers(self): + resp_mock = mock.Mock(status_code=200) + rpjson = [ + { + 'uuid': uuids.sharing1, + 'name': 'bandwidth_provider', + 'generation': 42, + 'parent_provider_uuid': None, + 'root_provider_uuid': None, + 'links': [], + }, + { + 'uuid': uuids.sharing2, + 'name': 'storage_provider', + 'generation': 42, + 'parent_provider_uuid': None, + 'root_provider_uuid': None, + 'links': [], + }, + ] + resp_mock.json.return_value = {'resource_providers': rpjson} + self.ks_adap_mock.get.return_value = resp_mock + + result = self.client._get_sharing_providers( + self.context, [uuids.agg1, uuids.agg2]) + + expected_url = ('/resource_providers?member_of=in:' + + ','.join((uuids.agg1, uuids.agg2)) + + '&required=MISC_SHARES_VIA_AGGREGATE') + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.18', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertEqual(rpjson, result) + + def test_get_sharing_providers_emptylist(self): + self.assertEqual( + [], self.client._get_sharing_providers(self.context, [])) + self.ks_adap_mock.get.assert_not_called() + + @mock.patch.object(report.LOG, 'error') + def test_get_sharing_providers_error(self, logging_mock): + # Ensure _get_sharing_providers() logs an error and raises if the + # placement API call doesn't respond 200 + resp_mock = mock.Mock(status_code=503) + self.ks_adap_mock.get.return_value = resp_mock + self.ks_adap_mock.get.return_value.headers = { + 'x-openstack-request-id': uuids.request_id} + + uuid = uuids.agg + self.assertRaises(exception.ResourceProviderRetrievalFailed, + self.client._get_sharing_providers, + self.context, [uuid]) + + expected_url = ('/resource_providers?member_of=in:' + uuid + + '&required=MISC_SHARES_VIA_AGGREGATE') + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.18', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # A 503 Service Unavailable should trigger an error log that + # includes the placement request id + self.assertTrue(logging_mock.called) + self.assertEqual(uuids.request_id, + logging_mock.call_args[0][1]['placement_req_id']) + + def test_get_providers_in_tree(self): + # Ensure get_providers_in_tree() returns a list of resource + # provider dicts if it finds a resource provider record from the + # placement API + root = uuids.compute_node + child = uuids.child + resp_mock = mock.Mock(status_code=200) + rpjson = [ + { + 'uuid': root, + 'name': 'daddy', 'generation': 42, + 'parent_provider_uuid': None, + }, + { + 'uuid': child, + 'name': 'junior', + 'generation': 42, + 'parent_provider_uuid': root, + }, + ] + resp_mock.json.return_value = {'resource_providers': rpjson} + self.ks_adap_mock.get.return_value = resp_mock + + result = self.client.get_providers_in_tree(self.context, root) + + expected_url = '/resource_providers?in_tree=' + root + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.14', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertEqual(rpjson, result) + + @mock.patch.object(report.LOG, 'error') + def test_get_providers_in_tree_error(self, logging_mock): + # Ensure get_providers_in_tree() logs an error and raises if the + # placement API call doesn't respond 200 + resp_mock = mock.Mock(status_code=503) + self.ks_adap_mock.get.return_value = resp_mock + self.ks_adap_mock.get.return_value.headers = { + 'x-openstack-request-id': 'req-' + uuids.request_id} + + uuid = uuids.compute_node + self.assertRaises(exception.ResourceProviderRetrievalFailed, + self.client.get_providers_in_tree, self.context, + uuid) + + expected_url = '/resource_providers?in_tree=' + uuid + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.14', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # A 503 Service Unavailable should trigger an error log that includes + # the placement request id + self.assertTrue(logging_mock.called) + self.assertEqual('req-' + uuids.request_id, + logging_mock.call_args[0][1]['placement_req_id']) + + def test_get_providers_in_tree_ksa_exc(self): + self.ks_adap_mock.get.side_effect = ks_exc.EndpointNotFound() + self.assertRaises( + ks_exc.ClientException, + self.client.get_providers_in_tree, self.context, uuids.whatever) + + def test_create_resource_provider(self): + """Test that _create_resource_provider() sends a dict of resource + provider information without a parent provider UUID. + """ + uuid = uuids.compute_node + name = 'computehost' + resp_mock = mock.Mock(status_code=200) + self.ks_adap_mock.post.return_value = resp_mock + + self.assertEqual( + resp_mock.json.return_value, + self.client._create_resource_provider(self.context, uuid, name)) + + expected_payload = { + 'uuid': uuid, + 'name': name, + } + + expected_url = '/resource_providers' + self.ks_adap_mock.post.assert_called_once_with( + expected_url, json=expected_payload, microversion='1.20', + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + + def test_create_resource_provider_with_parent(self): + """Test that when specifying a parent provider UUID, that the + parent_provider_uuid part of the payload is properly specified. + """ + parent_uuid = uuids.parent + uuid = uuids.compute_node + name = 'computehost' + resp_mock = mock.Mock(status_code=200) + self.ks_adap_mock.post.return_value = resp_mock + + self.assertEqual( + resp_mock.json.return_value, + self.client._create_resource_provider( + self.context, + uuid, + name, + parent_provider_uuid=parent_uuid, + ) + ) + + expected_payload = { + 'uuid': uuid, + 'name': name, + 'parent_provider_uuid': parent_uuid, + } + expected_url = '/resource_providers' + self.ks_adap_mock.post.assert_called_once_with( + expected_url, json=expected_payload, microversion='1.20', + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + + @mock.patch.object(report.LOG, 'info') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') + def test_create_resource_provider_concurrent_create(self, get_rp_mock, + logging_mock): + # Ensure _create_resource_provider() returns a dict of resource + # provider gotten from _get_resource_provider() if the call to create + # the resource provider in the placement API returned a 409 Conflict, + # indicating another thread concurrently created the resource provider + # record. + uuid = uuids.compute_node + name = 'computehost' + self.ks_adap_mock.post.return_value = fake_requests.FakeResponse( + 409, content='not a name conflict', + headers={'x-openstack-request-id': uuids.request_id}) + + get_rp_mock.return_value = mock.sentinel.get_rp + + result = self.client._create_resource_provider(self.context, uuid, + name) + + expected_payload = { + 'uuid': uuid, + 'name': name, + } + expected_url = '/resource_providers' + self.ks_adap_mock.post.assert_called_once_with( + expected_url, json=expected_payload, microversion='1.20', + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertEqual(mock.sentinel.get_rp, result) + # The 409 response will produce a message to the info log. + self.assertTrue(logging_mock.called) + self.assertEqual(uuids.request_id, + logging_mock.call_args[0][1]['placement_req_id']) + + def test_create_resource_provider_name_conflict(self): + # When the API call to create the resource provider fails 409 with a + # name conflict, we raise an exception. + self.ks_adap_mock.post.return_value = fake_requests.FakeResponse( + 409, content='Conflicting resource provider name: foo ' + 'already exists.') + + self.assertRaises( + exception.ResourceProviderCreationFailed, + self.client._create_resource_provider, self.context, + uuids.compute_node, 'foo') + + @mock.patch.object(report.LOG, 'error') + def test_create_resource_provider_error(self, logging_mock): + # Ensure _create_resource_provider() sets the error flag when trying to + # communicate with the placement API and not getting an error we can + # deal with + uuid = uuids.compute_node + name = 'computehost' + self.ks_adap_mock.post.return_value = fake_requests.FakeResponse( + 503, headers={'x-openstack-request-id': uuids.request_id}) + + self.assertRaises( + exception.ResourceProviderCreationFailed, + self.client._create_resource_provider, self.context, uuid, name) + + expected_payload = { + 'uuid': uuid, + 'name': name, + } + expected_url = '/resource_providers' + self.ks_adap_mock.post.assert_called_once_with( + expected_url, json=expected_payload, microversion='1.20', + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # A 503 Service Unavailable should log an error that + # includes the placement request id and + # _create_resource_provider() should return None + self.assertTrue(logging_mock.called) + self.assertEqual(uuids.request_id, + logging_mock.call_args[0][1]['placement_req_id']) + + def test_put_empty(self): + # A simple put with an empty (not None) payload should send the empty + # payload through. + # Bug #1744786 + url = '/resource_providers/%s/aggregates' % uuids.foo + self.client.put(url, []) + self.ks_adap_mock.put.assert_called_once_with( + url, json=[], microversion=None, endpoint_filter=mock.ANY, + headers={}) + + def test_delete_provider(self): + delete_mock = fake_requests.FakeResponse(None) + self.ks_adap_mock.delete.return_value = delete_mock + + for status_code in (204, 404): + delete_mock.status_code = status_code + # Seed the caches + self.client._provider_tree.new_root('compute', uuids.root, + generation=0) + self.client._association_refresh_time[uuids.root] = 1234 + + self.client._delete_provider(uuids.root, global_request_id='gri') + + self.ks_adap_mock.delete.assert_called_once_with( + '/resource_providers/' + uuids.root, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': 'gri'}, microversion=None) + self.assertFalse(self.client._provider_tree.exists(uuids.root)) + self.assertNotIn(uuids.root, self.client._association_refresh_time) + + self.ks_adap_mock.delete.reset_mock() + + def test_delete_provider_fail(self): + delete_mock = fake_requests.FakeResponse(None) + self.ks_adap_mock.delete.return_value = delete_mock + resp_exc_map = {409: exception.ResourceProviderInUse, + 503: exception.ResourceProviderDeletionFailed} + + for status_code, exc in resp_exc_map.items(): + delete_mock.status_code = status_code + self.assertRaises(exc, self.client._delete_provider, uuids.root) + self.ks_adap_mock.delete.assert_called_once_with( + '/resource_providers/' + uuids.root, microversion=None, + endpoint_filter=mock.ANY, headers={}) + + self.ks_adap_mock.delete.reset_mock() + + def test_set_aggregates_for_provider(self): + aggs = [uuids.agg1, uuids.agg2] + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse( + 200, content=jsonutils.dumps({ + 'aggregates': aggs, + 'resource_provider_generation': 1})) + + # Prime the provider tree cache + self.client._provider_tree.new_root('rp', uuids.rp, generation=0) + self.assertEqual(set(), + self.client._provider_tree.data(uuids.rp).aggregates) + + self.client.set_aggregates_for_provider(self.context, uuids.rp, aggs) + + exp_payload = {'aggregates': aggs, + 'resource_provider_generation': 0} + self.ks_adap_mock.put.assert_called_once_with( + '/resource_providers/%s/aggregates' % uuids.rp, json=exp_payload, + microversion='1.19', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # Cache was updated + ptree_data = self.client._provider_tree.data(uuids.rp) + self.assertEqual(set(aggs), ptree_data.aggregates) + self.assertEqual(1, ptree_data.generation) + + def test_set_aggregates_for_provider_bad_args(self): + self.assertRaises(ValueError, self.client.set_aggregates_for_provider, + self.context, uuids.rp, {}, use_cache=False) + self.assertRaises(ValueError, self.client.set_aggregates_for_provider, + self.context, uuids.rp, {}, use_cache=False, + generation=None) + + def test_set_aggregates_for_provider_fail(self): + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse(503) + # Prime the provider tree cache + self.client._provider_tree.new_root('rp', uuids.rp, generation=0) + self.assertRaises( + exception.ResourceProviderUpdateFailed, + self.client.set_aggregates_for_provider, + self.context, uuids.rp, [uuids.agg]) + # The cache wasn't updated + self.assertEqual(set(), + self.client._provider_tree.data(uuids.rp).aggregates) + + def test_set_aggregates_for_provider_conflict(self): + # Prime the provider tree cache + self.client._provider_tree.new_root('rp', uuids.rp, generation=0) + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse(409) + self.assertRaises( + exception.ResourceProviderUpdateConflict, + self.client.set_aggregates_for_provider, + self.context, uuids.rp, [uuids.agg]) + # The cache was invalidated + self.assertNotIn(uuids.rp, + self.client._provider_tree.get_provider_uuids()) + self.assertNotIn(uuids.rp, self.client._association_refresh_time) + + def test_set_aggregates_for_provider_short_circuit(self): + """No-op when aggregates have not changed.""" + # Prime the provider tree cache + self.client._provider_tree.new_root('rp', uuids.rp, generation=7) + self.client.set_aggregates_for_provider(self.context, uuids.rp, []) + self.ks_adap_mock.put.assert_not_called() + + def test_set_aggregates_for_provider_no_short_circuit(self): + """Don't short-circuit if generation doesn't match, even if aggs have + not changed. + """ + # Prime the provider tree cache + self.client._provider_tree.new_root('rp', uuids.rp, generation=2) + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse( + 200, content=jsonutils.dumps({ + 'aggregates': [], + 'resource_provider_generation': 5})) + self.client.set_aggregates_for_provider(self.context, uuids.rp, [], + generation=4) + exp_payload = {'aggregates': [], + 'resource_provider_generation': 4} + self.ks_adap_mock.put.assert_called_once_with( + '/resource_providers/%s/aggregates' % uuids.rp, json=exp_payload, + microversion='1.19', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + # Cache was updated + ptree_data = self.client._provider_tree.data(uuids.rp) + self.assertEqual(set(), ptree_data.aggregates) + self.assertEqual(5, ptree_data.generation) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider', return_value=mock.NonCallableMock) + def test_get_resource_provider_name_from_cache(self, mock_placement_get): + expected_name = 'rp' + self.client._provider_tree.new_root( + expected_name, uuids.rp, generation=0) + + actual_name = self.client.get_resource_provider_name( + self.context, uuids.rp) + + self.assertEqual(expected_name, actual_name) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') + def test_get_resource_provider_name_from_placement( + self, mock_placement_get): + expected_name = 'rp' + mock_placement_get.return_value = { + 'uuid': uuids.rp, + 'name': expected_name + } + + actual_name = self.client.get_resource_provider_name( + self.context, uuids.rp) + + self.assertEqual(expected_name, actual_name) + mock_placement_get.assert_called_once_with(self.context, uuids.rp) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') + def test_get_resource_provider_name_rp_not_found_in_placement( + self, mock_placement_get): + mock_placement_get.side_effect = \ + exception.ResourceProviderNotFound(uuids.rp) + + self.assertRaises( + exception.ResourceProviderNotFound, + self.client.get_resource_provider_name, + self.context, uuids.rp) + + mock_placement_get.assert_called_once_with(self.context, uuids.rp) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_resource_provider') + def test_get_resource_provider_name_placement_unavailable( + self, mock_placement_get): + mock_placement_get.side_effect = \ + exception.ResourceProviderRetrievalFailed(uuid=uuids.rp) + + self.assertRaises( + exception.ResourceProviderRetrievalFailed, + self.client.get_resource_provider_name, + self.context, uuids.rp) + + +class TestAggregates(SchedulerReportClientTestCase): + def test_get_provider_aggregates_found(self): + uuid = uuids.compute_node + resp_mock = mock.Mock(status_code=200) + aggs = [ + uuids.agg1, + uuids.agg2, + ] + resp_mock.json.return_value = {'aggregates': aggs, + 'resource_provider_generation': 42} + self.ks_adap_mock.get.return_value = resp_mock + + result, gen = self.client._get_provider_aggregates(self.context, uuid) + + expected_url = '/resource_providers/' + uuid + '/aggregates' + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.19', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertEqual(set(aggs), result) + self.assertEqual(42, gen) + + @mock.patch.object(report.LOG, 'error') + def test_get_provider_aggregates_error(self, log_mock): + """Test that when the placement API returns any error when looking up a + provider's aggregates, we raise an exception. + """ + uuid = uuids.compute_node + resp_mock = mock.Mock(headers={ + 'x-openstack-request-id': uuids.request_id}) + self.ks_adap_mock.get.return_value = resp_mock + + for status_code in (400, 404, 503): + resp_mock.status_code = status_code + self.assertRaises( + exception.ResourceProviderAggregateRetrievalFailed, + self.client._get_provider_aggregates, self.context, uuid) + + expected_url = '/resource_providers/' + uuid + '/aggregates' + self.ks_adap_mock.get.assert_called_once_with( + expected_url, microversion='1.19', endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertTrue(log_mock.called) + self.assertEqual(uuids.request_id, + log_mock.call_args[0][1]['placement_req_id']) + self.ks_adap_mock.get.reset_mock() + log_mock.reset_mock() + + +class TestTraits(SchedulerReportClientTestCase): + trait_api_kwargs = {'microversion': '1.6'} + + def test_get_provider_traits_found(self): + uuid = uuids.compute_node + resp_mock = mock.Mock(status_code=200) + traits = [ + 'CUSTOM_GOLD', + 'CUSTOM_SILVER', + ] + resp_mock.json.return_value = {'traits': traits, + 'resource_provider_generation': 42} + self.ks_adap_mock.get.return_value = resp_mock + + result, gen = self.client.get_provider_traits(self.context, uuid) + + expected_url = '/resource_providers/' + uuid + '/traits' + self.ks_adap_mock.get.assert_called_once_with( + expected_url, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + self.assertEqual(set(traits), result) + self.assertEqual(42, gen) + + @mock.patch.object(report.LOG, 'error') + def test_get_provider_traits_error(self, log_mock): + """Test that when the placement API returns any error when looking up a + provider's traits, we raise an exception. + """ + uuid = uuids.compute_node + resp_mock = mock.Mock(headers={ + 'x-openstack-request-id': uuids.request_id}) + self.ks_adap_mock.get.return_value = resp_mock + + for status_code in (400, 404, 503): + resp_mock.status_code = status_code + self.assertRaises( + exception.ResourceProviderTraitRetrievalFailed, + self.client.get_provider_traits, self.context, uuid) + + expected_url = '/resource_providers/' + uuid + '/traits' + self.ks_adap_mock.get.assert_called_once_with( + expected_url, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + self.assertTrue(log_mock.called) + self.assertEqual(uuids.request_id, + log_mock.call_args[0][1]['placement_req_id']) + self.ks_adap_mock.get.reset_mock() + log_mock.reset_mock() + + def test_get_provider_traits_placement_comm_error(self): + """ksa ClientException raises through.""" + uuid = uuids.compute_node + self.ks_adap_mock.get.side_effect = ks_exc.EndpointNotFound() + self.assertRaises(ks_exc.ClientException, + self.client.get_provider_traits, self.context, uuid) + expected_url = '/resource_providers/' + uuid + '/traits' + self.ks_adap_mock.get.assert_called_once_with( + expected_url, + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + + def test_ensure_traits(self): + """Successful paths, various permutations of traits existing or needing + to be created. + """ + standard_traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA'] + custom_traits = ['CUSTOM_GOLD', 'CUSTOM_SILVER'] + all_traits = standard_traits + custom_traits + + get_mock = mock.Mock(status_code=200) + self.ks_adap_mock.get.return_value = get_mock + + # Request all traits; custom traits need to be created + get_mock.json.return_value = {'traits': standard_traits} + self.client._ensure_traits(self.context, all_traits) + self.ks_adap_mock.get.assert_called_once_with( + '/traits?name=in:' + ','.join(all_traits), + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + self.ks_adap_mock.put.assert_has_calls( + [mock.call('/traits/' + trait, + endpoint_filter=mock.ANY, + headers={ + 'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + for trait in custom_traits], any_order=True) + + self.ks_adap_mock.reset_mock() + + # Request standard traits; no traits need to be created + get_mock.json.return_value = {'traits': standard_traits} + self.client._ensure_traits(self.context, standard_traits) + self.ks_adap_mock.get.assert_called_once_with( + '/traits?name=in:' + ','.join(standard_traits), + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + self.ks_adap_mock.put.assert_not_called() + + self.ks_adap_mock.reset_mock() + + # Request no traits - short circuit + self.client._ensure_traits(self.context, None) + self.client._ensure_traits(self.context, []) + self.ks_adap_mock.get.assert_not_called() + self.ks_adap_mock.put.assert_not_called() + + def test_ensure_traits_fail_retrieval(self): + self.ks_adap_mock.get.return_value = mock.Mock(status_code=400) + + self.assertRaises(exception.TraitRetrievalFailed, + self.client._ensure_traits, + self.context, ['FOO']) + + self.ks_adap_mock.get.assert_called_once_with( + '/traits?name=in:FOO', + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + self.ks_adap_mock.put.assert_not_called() + + def test_ensure_traits_fail_creation(self): + get_mock = mock.Mock(status_code=200) + get_mock.json.return_value = {'traits': []} + self.ks_adap_mock.get.return_value = get_mock + self.ks_adap_mock.put.return_value = fake_requests.FakeResponse(400) + + self.assertRaises(exception.TraitCreationFailed, + self.client._ensure_traits, + self.context, ['FOO']) + + self.ks_adap_mock.get.assert_called_once_with( + '/traits?name=in:FOO', + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + self.ks_adap_mock.put.assert_called_once_with( + '/traits/FOO', + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + + def test_set_traits_for_provider(self): + traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA'] + + # Make _ensure_traits succeed without PUTting + get_mock = mock.Mock(status_code=200) + get_mock.json.return_value = {'traits': traits} + self.ks_adap_mock.get.return_value = get_mock + + # Prime the provider tree cache + self.client._provider_tree.new_root('rp', uuids.rp, generation=0) + + # Mock the /rp/{u}/traits PUT to succeed + put_mock = mock.Mock(status_code=200) + put_mock.json.return_value = {'traits': traits, + 'resource_provider_generation': 1} + self.ks_adap_mock.put.return_value = put_mock + + # Invoke + self.client.set_traits_for_provider(self.context, uuids.rp, traits) + + # Verify API calls + self.ks_adap_mock.get.assert_called_once_with( + '/traits?name=in:' + ','.join(traits), + endpoint_filter=mock.ANY, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + self.ks_adap_mock.put.assert_called_once_with( + '/resource_providers/%s/traits' % uuids.rp, + endpoint_filter=mock.ANY, + json={'traits': traits, 'resource_provider_generation': 0}, + headers={'X-Openstack-Request-Id': self.context.global_id}, + **self.trait_api_kwargs) + + # And ensure the provider tree cache was updated appropriately + self.assertFalse( + self.client._provider_tree.have_traits_changed(uuids.rp, traits)) + # Validate the generation + self.assertEqual( + 1, self.client._provider_tree.data(uuids.rp).generation) + + def test_set_traits_for_provider_fail(self): + traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA'] + get_mock = mock.Mock() + self.ks_adap_mock.get.return_value = get_mock + + # Prime the provider tree cache + self.client._provider_tree.new_root('rp', uuids.rp, generation=0) + + # _ensure_traits exception bubbles up + get_mock.status_code = 400 + self.assertRaises( + exception.TraitRetrievalFailed, + self.client.set_traits_for_provider, + self.context, uuids.rp, traits) + self.ks_adap_mock.put.assert_not_called() + + get_mock.status_code = 200 + get_mock.json.return_value = {'traits': traits} + + # Conflict + self.ks_adap_mock.put.return_value = mock.Mock(status_code=409) + self.assertRaises( + exception.ResourceProviderUpdateConflict, + self.client.set_traits_for_provider, + self.context, uuids.rp, traits) + + # Other error + self.ks_adap_mock.put.return_value = mock.Mock(status_code=503) + self.assertRaises( + exception.ResourceProviderUpdateFailed, + self.client.set_traits_for_provider, + self.context, uuids.rp, traits) + + +class TestAssociations(SchedulerReportClientTestCase): + def setUp(self): + super(TestAssociations, self).setUp() + + self.mock_get_inv = self.useFixture(fixtures.MockPatch( + 'zun.scheduler.client.report.SchedulerReportClient.' + '_get_inventory')).mock + self.inv = { + 'VCPU': {'total': 16}, + 'MEMORY_MB': {'total': 1024}, + 'DISK_GB': {'total': 10}, + } + self.mock_get_inv.return_value = { + 'resource_provider_generation': 41, + 'inventories': self.inv, + } + + self.mock_get_aggs = self.useFixture(fixtures.MockPatch( + 'zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates')).mock + self.mock_get_aggs.return_value = report.AggInfo( + aggregates=set([uuids.agg1]), generation=42) + + self.mock_get_traits = self.useFixture(fixtures.MockPatch( + 'zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_traits')).mock + self.mock_get_traits.return_value = report.TraitInfo( + traits=set(['CUSTOM_GOLD']), generation=43) + + self.mock_get_sharing = self.useFixture(fixtures.MockPatch( + 'zun.scheduler.client.report.SchedulerReportClient.' + '_get_sharing_providers')).mock + + def assert_getters_were_called(self, uuid, sharing=True): + self.mock_get_inv.assert_called_once_with(self.context, uuid) + self.mock_get_aggs.assert_called_once_with(self.context, uuid) + self.mock_get_traits.assert_called_once_with(self.context, uuid) + if sharing: + self.mock_get_sharing.assert_called_once_with( + self.context, self.mock_get_aggs.return_value[0]) + self.assertIn(uuid, self.client._association_refresh_time) + self.assertFalse( + self.client._provider_tree.has_inventory_changed(uuid, self.inv)) + self.assertTrue( + self.client._provider_tree.in_aggregates(uuid, [uuids.agg1])) + self.assertFalse( + self.client._provider_tree.in_aggregates(uuid, [uuids.agg2])) + self.assertTrue( + self.client._provider_tree.has_traits(uuid, ['CUSTOM_GOLD'])) + self.assertFalse( + self.client._provider_tree.has_traits(uuid, ['CUSTOM_SILVER'])) + self.assertEqual(43, self.client._provider_tree.data(uuid).generation) + + def assert_getters_not_called(self, timer_entry=None): + self.mock_get_inv.assert_not_called() + self.mock_get_aggs.assert_not_called() + self.mock_get_traits.assert_not_called() + self.mock_get_sharing.assert_not_called() + if timer_entry is None: + self.assertFalse(self.client._association_refresh_time) + else: + self.assertIn(timer_entry, self.client._association_refresh_time) + + def reset_getter_mocks(self): + self.mock_get_inv.reset_mock() + self.mock_get_aggs.reset_mock() + self.mock_get_traits.reset_mock() + self.mock_get_sharing.reset_mock() + + def test_refresh_associations_no_last(self): + """Test that associations are refreshed when stale.""" + uuid = uuids.compute_node + # Seed the provider tree so _refresh_associations finds the provider + self.client._provider_tree.new_root('compute', uuid, generation=1) + self.client._refresh_associations(self.context, uuid) + self.assert_getters_were_called(uuid) + + def test_refresh_associations_no_refresh_sharing(self): + """Test refresh_sharing=False.""" + uuid = uuids.compute_node + # Seed the provider tree so _refresh_associations finds the provider + self.client._provider_tree.new_root('compute', uuid, generation=1) + self.client._refresh_associations(self.context, uuid, + refresh_sharing=False) + self.assert_getters_were_called(uuid, sharing=False) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_associations_stale') + def test_refresh_associations_not_stale(self, mock_stale): + """Test that refresh associations is not called when the map is + not stale. + """ + mock_stale.return_value = False + uuid = uuids.compute_node + self.client._refresh_associations(self.context, uuid) + self.assert_getters_not_called() + + @mock.patch.object(report.LOG, 'debug') + def test_refresh_associations_time(self, log_mock): + """Test that refresh associations is called when the map is stale.""" + uuid = uuids.compute_node + # Seed the provider tree so _refresh_associations finds the provider + self.client._provider_tree.new_root('compute', uuid, generation=1) + + # Called a first time because association_refresh_time is empty. + now = time.time() + self.client._refresh_associations(self.context, uuid) + self.assert_getters_were_called(uuid) + log_mock.assert_has_calls([ + mock.call('Refreshing inventories for resource provider %s', uuid), + mock.call('Updating ProviderTree inventory for provider %s from ' + '_refresh_and_get_inventory using data: %s', + uuid, self.inv), + mock.call('Refreshing aggregate associations for resource ' + 'provider %s, aggregates: %s', uuid, uuids.agg1), + mock.call('Refreshing trait associations for resource ' + 'provider %s, traits: %s', uuid, 'CUSTOM_GOLD') + ]) + + # Clear call count. + self.reset_getter_mocks() + + with mock.patch('time.time') as mock_future: + # Not called a second time because not enough time has passed. + mock_future.return_value = ( + now + CONF.compute.resource_provider_association_refresh / 2) + self.client._refresh_associations(self.context, uuid) + self.assert_getters_not_called(timer_entry=uuid) + + # Called because time has passed. + mock_future.return_value = ( + now + CONF.compute.resource_provider_association_refresh + 1) + self.client._refresh_associations(self.context, uuid) + self.assert_getters_were_called(uuid) + + def test_refresh_associations_disabled(self): + """Test that refresh associations can be disabled.""" + CONF.set_override('resource_provider_association_refresh', 0, + group='compute') + uuid = uuids.compute_node + # Seed the provider tree so _refresh_associations finds the provider + self.client._provider_tree.new_root('compute', uuid, generation=1) + + # Called a first time because association_refresh_time is empty. + now = time.time() + self.client._refresh_associations(self.context, uuid) + self.assert_getters_were_called(uuid) + + # Clear call count. + self.reset_getter_mocks() + + with mock.patch('time.time') as mock_future: + # A lot of time passes + mock_future.return_value = now + 10000000000000 + self.client._refresh_associations(self.context, uuid) + self.assert_getters_not_called(timer_entry=uuid) + + self.reset_getter_mocks() + + # Forever passes + mock_future.return_value = float('inf') + self.client._refresh_associations(self.context, uuid) + self.assert_getters_not_called(timer_entry=uuid) + + self.reset_getter_mocks() + + # Even if no time passes, clearing the counter triggers refresh + mock_future.return_value = now + del self.client._association_refresh_time[uuid] + self.client._refresh_associations(self.context, uuid) + self.assert_getters_were_called(uuid) + + +class TestAllocations(SchedulerReportClientTestCase): + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient." + "delete") + @mock.patch("zun.scheduler.client.report.SchedulerReportClient." + "delete_allocation_for_container") + @mock.patch("zun.objects.Container.list_by_host") + def test_delete_resource_provider_cascade( + self, mock_by_host, mock_del_alloc, mock_delete): + self.client._provider_tree.new_root(uuids.cn, uuids.cn, generation=1) + cn = utils.get_test_compute_node(self.context, uuid=uuids.cn, + hostname="fake_hostname") + cont1 = utils.get_test_container(self.context, uuid=uuids.inst1) + cont2 = utils.get_test_container(self.context, uuid=uuids.inst2) + mock_by_host.return_value = [cont1, cont2] + resp_mock = mock.Mock(status_code=204) + mock_delete.return_value = resp_mock + self.client.delete_resource_provider(self.context, cn, cascade=True) + mock_by_host.assert_called_once_with(self.context, cn.hostname) + self.assertEqual(2, mock_del_alloc.call_count) + exp_url = "/resource_providers/%s" % uuids.cn + mock_delete.assert_called_once_with( + exp_url, global_request_id=self.context.global_id) + self.assertFalse(self.client._provider_tree.exists(uuids.cn)) + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient." + "delete") + @mock.patch("zun.scheduler.client.report.SchedulerReportClient." + "delete_allocation_for_container") + @mock.patch("zun.objects.Container.list_by_host") + def test_delete_resource_provider_no_cascade(self, mock_by_host, + mock_del_alloc, mock_delete): + self.client._provider_tree.new_root(uuids.cn, uuids.cn, generation=1) + self.client._association_refresh_time[uuids.cn] = mock.Mock() + cn = utils.get_test_compute_node(self.context, uuid=uuids.cn, + hostname="fake_hostname") + cont1 = utils.get_test_container(self.context, uuid=uuids.inst1) + cont2 = utils.get_test_container(self.context, uuid=uuids.inst2) + mock_by_host.return_value = [cont1, cont2] + resp_mock = mock.Mock(status_code=204) + mock_delete.return_value = resp_mock + self.client.delete_resource_provider(self.context, cn) + mock_del_alloc.assert_not_called() + exp_url = "/resource_providers/%s" % uuids.cn + mock_delete.assert_called_once_with( + exp_url, global_request_id=self.context.global_id) + self.assertNotIn(uuids.cn, self.client._association_refresh_time) + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient." + "delete") + @mock.patch('zun.scheduler.client.report.LOG') + def test_delete_resource_provider_log_calls(self, mock_log, mock_delete): + # First, check a successful call + self.client._provider_tree.new_root(uuids.cn, uuids.cn, generation=1) + cn = utils.get_test_compute_node(self.context, uuid=uuids.cn, + hostname="fake_hostname") + resp_mock = fake_requests.FakeResponse(204) + mock_delete.return_value = resp_mock + self.client.delete_resource_provider(self.context, cn) + # With a 204, only the info should be called + self.assertEqual(1, mock_log.info.call_count) + self.assertEqual(0, mock_log.warning.call_count) + + # Now check a 404 response + mock_log.reset_mock() + resp_mock.status_code = 404 + self.client.delete_resource_provider(self.context, cn) + # With a 404, neither log message should be called + self.assertEqual(0, mock_log.info.call_count) + self.assertEqual(0, mock_log.warning.call_count) + + # Finally, check a 409 response + mock_log.reset_mock() + resp_mock.status_code = 409 + self.client.delete_resource_provider(self.context, cn) + # With a 409, only the error should be called + self.assertEqual(0, mock_log.info.call_count) + self.assertEqual(1, mock_log.error.call_count) + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.get") + def test_get_allocations_for_resource_provider(self, mock_get): + mock_get.return_value = fake_requests.FakeResponse( + 200, content=jsonutils.dumps( + {'allocations': 'fake', 'resource_provider_generation': 42})) + ret = self.client.get_allocations_for_resource_provider( + self.context, 'rpuuid') + self.assertEqual('fake', ret.allocations) + mock_get.assert_called_once_with( + '/resource_providers/rpuuid/allocations', + global_request_id=self.context.global_id) + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.get") + def test_get_allocations_for_resource_provider_fail(self, mock_get): + mock_get.return_value = fake_requests.FakeResponse(400, content="ouch") + self.assertRaises(exception.ResourceProviderAllocationRetrievalFailed, + self.client.get_allocations_for_resource_provider, + self.context, 'rpuuid') + mock_get.assert_called_once_with( + '/resource_providers/rpuuid/allocations', + global_request_id=self.context.global_id) + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.get") + def test_get_allocs_for_consumer(self, mock_get): + mock_get.return_value = fake_requests.FakeResponse( + 200, content=jsonutils.dumps({'foo': 'bar'})) + ret = self.client.get_allocs_for_consumer(self.context, 'consumer') + self.assertEqual({'foo': 'bar'}, ret) + mock_get.assert_called_once_with( + '/allocations/consumer', version='1.28', + global_request_id=self.context.global_id) + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.get") + def test_get_allocs_for_consumer_fail(self, mock_get): + mock_get.return_value = fake_requests.FakeResponse(400, content='err') + self.assertRaises(exception.ConsumerAllocationRetrievalFailed, + self.client.get_allocs_for_consumer, + self.context, 'consumer') + mock_get.assert_called_once_with( + '/allocations/consumer', version='1.28', + global_request_id=self.context.global_id) + + def _test_remove_res_from_alloc( + self, current_allocations, resources_to_remove, + updated_allocations): + + with zun_utils.nested_contexts( + mock.patch( + "zun.scheduler.client.report.SchedulerReportClient.get"), + mock.patch( + "zun.scheduler.client.report.SchedulerReportClient.put") + ) as (mock_get, mock_put): + mock_get.return_value = fake_requests.FakeResponse( + 200, content=jsonutils.dumps(current_allocations)) + + self.client.remove_resources_from_container_allocation( + self.context, uuids.consumer_uuid, resources_to_remove) + + mock_get.assert_called_once_with( + '/allocations/%s' % uuids.consumer_uuid, version='1.28', + global_request_id=self.context.global_id) + mock_put.assert_called_once_with( + '/allocations/%s' % uuids.consumer_uuid, updated_allocations, + version='1.28', global_request_id=self.context.global_id) + + def test_remove_res_from_alloc(self): + current_allocations = { + "allocations": { + uuids.rp1: { + "generation": 13, + "resources": { + 'VCPU': 10, + 'MEMORY_MB': 4096, + }, + }, + uuids.rp2: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + 'NET_BW_IGR_KILOBIT_PER_SEC': 300, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + resources_to_remove = { + uuids.rp1: { + 'VCPU': 1 + }, + uuids.rp2: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 100, + 'NET_BW_IGR_KILOBIT_PER_SEC': 200, + } + } + updated_allocations = { + "allocations": { + uuids.rp1: { + "generation": 13, + "resources": { + 'VCPU': 9, + 'MEMORY_MB': 4096, + }, + }, + uuids.rp2: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 100, + 'NET_BW_IGR_KILOBIT_PER_SEC': 100, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + + self._test_remove_res_from_alloc( + current_allocations, resources_to_remove, updated_allocations) + + def test_remove_res_from_alloc_remove_rc_when_value_dropped_to_zero(self): + current_allocations = { + "allocations": { + uuids.rp1: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + 'NET_BW_IGR_KILOBIT_PER_SEC': 300, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + # this will remove all of NET_BW_EGR_KILOBIT_PER_SEC resources from + # the allocation so the whole resource class will be removed + resources_to_remove = { + uuids.rp1: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + 'NET_BW_IGR_KILOBIT_PER_SEC': 200, + } + } + updated_allocations = { + "allocations": { + uuids.rp1: { + "generation": 42, + "resources": { + 'NET_BW_IGR_KILOBIT_PER_SEC': 100, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + + self._test_remove_res_from_alloc( + current_allocations, resources_to_remove, updated_allocations) + + def test_remove_res_from_alloc_remove_rp_when_all_rc_removed(self): + current_allocations = { + "allocations": { + uuids.rp1: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + 'NET_BW_IGR_KILOBIT_PER_SEC': 300, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + resources_to_remove = { + uuids.rp1: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + 'NET_BW_IGR_KILOBIT_PER_SEC': 300, + } + } + updated_allocations = { + "allocations": {}, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + + self._test_remove_res_from_alloc( + current_allocations, resources_to_remove, updated_allocations) + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.get") + def test_remove_res_from_alloc_failed_to_get_alloc( + self, mock_get): + mock_get.side_effect = ks_exc.EndpointNotFound() + resources_to_remove = { + uuids.rp1: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + 'NET_BW_IGR_KILOBIT_PER_SEC': 200, + } + } + + self.assertRaises( + ks_exc.ClientException, + self.client.remove_resources_from_container_allocation, + self.context, uuids.consumer_uuid, resources_to_remove) + + def test_remove_res_from_alloc_empty_alloc(self): + resources_to_remove = { + uuids.rp1: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + 'NET_BW_IGR_KILOBIT_PER_SEC': 200, + } + } + current_allocations = { + "allocations": {}, + "consumer_generation": 0, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + ex = self.assertRaises( + exception.AllocationUpdateFailed, + self._test_remove_res_from_alloc, current_allocations, + resources_to_remove, None) + self.assertIn('The allocation is empty', six.text_type(ex)) + + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.put") + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.get") + def test_remove_res_from_alloc_no_resource_to_remove( + self, mock_get, mock_put): + self.client.remove_resources_from_container_allocation( + self.context, uuids.consumer_uuid, {}) + + mock_get.assert_not_called() + mock_put.assert_not_called() + + def test_remove_res_from_alloc_missing_rc(self): + current_allocations = { + "allocations": { + uuids.rp1: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + resources_to_remove = { + uuids.rp1: { + 'VCPU': 1, + } + } + + ex = self.assertRaises( + exception.AllocationUpdateFailed, self._test_remove_res_from_alloc, + current_allocations, resources_to_remove, None) + self.assertIn( + "Key 'VCPU' is missing from the allocation", + six.text_type(ex)) + + def test_remove_res_from_alloc_missing_rp(self): + current_allocations = { + "allocations": { + uuids.rp1: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + resources_to_remove = { + uuids.other_rp: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + } + } + + ex = self.assertRaises( + exception.AllocationUpdateFailed, self._test_remove_res_from_alloc, + current_allocations, resources_to_remove, None) + self.assertIn( + "Key '%s' is missing from the allocation" % uuids.other_rp, + six.text_type(ex)) + + def test_remove_res_from_alloc_not_enough_resource_to_remove(self): + current_allocations = { + "allocations": { + uuids.rp1: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + resources_to_remove = { + uuids.rp1: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 400, + } + } + + ex = self.assertRaises( + exception.AllocationUpdateFailed, self._test_remove_res_from_alloc, + current_allocations, resources_to_remove, None) + self.assertIn( + 'There are not enough allocated resources left on %s resource ' + 'provider to remove 400 amount of NET_BW_EGR_KILOBIT_PER_SEC ' + 'resources' % + uuids.rp1, + six.text_type(ex)) + + @mock.patch('time.sleep', new=mock.Mock()) + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.put") + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.get") + def test_remove_res_from_alloc_retry_succeed( + self, mock_get, mock_put): + current_allocations = { + "allocations": { + uuids.rp1: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + current_allocations_2 = copy.deepcopy(current_allocations) + current_allocations_2['consumer_generation'] = 3 + resources_to_remove = { + uuids.rp1: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + } + } + updated_allocations = { + "allocations": {}, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + updated_allocations_2 = copy.deepcopy(updated_allocations) + updated_allocations_2['consumer_generation'] = 3 + mock_get.side_effect = [ + fake_requests.FakeResponse( + 200, content=jsonutils.dumps(current_allocations)), + fake_requests.FakeResponse( + 200, content=jsonutils.dumps(current_allocations_2)) + ] + + mock_put.side_effect = [ + fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps( + {'errors': [{'code': 'placement.concurrent_update', + 'detail': ''}]})), + fake_requests.FakeResponse( + status_code=204) + ] + + self.client.remove_resources_from_container_allocation( + self.context, uuids.consumer_uuid, resources_to_remove) + + self.assertEqual( + [ + mock.call( + '/allocations/%s' % uuids.consumer_uuid, version='1.28', + global_request_id=self.context.global_id), + mock.call( + '/allocations/%s' % uuids.consumer_uuid, version='1.28', + global_request_id=self.context.global_id) + ], + mock_get.mock_calls) + + self.assertEqual( + [ + mock.call( + '/allocations/%s' % uuids.consumer_uuid, + updated_allocations, version='1.28', + global_request_id=self.context.global_id), + mock.call( + '/allocations/%s' % uuids.consumer_uuid, + updated_allocations_2, version='1.28', + global_request_id=self.context.global_id), + ], + mock_put.mock_calls) + + @mock.patch('time.sleep', new=mock.Mock()) + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.put") + @mock.patch("zun.scheduler.client.report.SchedulerReportClient.get") + def test_remove_res_from_alloc_run_out_of_retries( + self, mock_get, mock_put): + current_allocations = { + "allocations": { + uuids.rp1: { + "generation": 42, + "resources": { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + }, + }, + }, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + resources_to_remove = { + uuids.rp1: { + 'NET_BW_EGR_KILOBIT_PER_SEC': 200, + } + } + updated_allocations = { + "allocations": {}, + "consumer_generation": 2, + "project_id": uuids.project_id, + "user_id": uuids.user_id, + } + + get_rsp = fake_requests.FakeResponse( + 200, content=jsonutils.dumps(current_allocations)) + + mock_get.side_effect = [get_rsp] * 4 + + put_rsp = fake_requests.FakeResponse( + status_code=409, + content=jsonutils.dumps({ + 'errors': [{'code': 'placement.concurrent_update', + 'detail': ''}]})) + + mock_put.side_effect = [put_rsp] * 4 + + ex = self.assertRaises( + exception.AllocationUpdateFailed, + self.client.remove_resources_from_container_allocation, + self.context, uuids.consumer_uuid, resources_to_remove) + self.assertIn( + 'due to multiple successive generation conflicts', + six.text_type(ex)) + + get_call = mock.call( + '/allocations/%s' % uuids.consumer_uuid, version='1.28', + global_request_id=self.context.global_id) + + mock_get.assert_has_calls([get_call] * 4) + + put_call = mock.call( + '/allocations/%s' % uuids.consumer_uuid, updated_allocations, + version='1.28', global_request_id=self.context.global_id) + + mock_put.assert_has_calls([put_call] * 4) + + +class TestResourceClass(SchedulerReportClientTestCase): + def setUp(self): + super(TestResourceClass, self).setUp() + _put_patch = mock.patch( + "zun.scheduler.client.report.SchedulerReportClient.put") + self.addCleanup(_put_patch.stop) + self.mock_put = _put_patch.start() + + def test_ensure_resource_classes(self): + rcs = ['VCPU', 'CUSTOM_FOO', 'MEMORY_MB', 'CUSTOM_BAR'] + self.client._ensure_resource_classes(self.context, rcs) + self.mock_put.assert_has_calls([ + mock.call('/resource_classes/%s' % rc, None, version='1.7', + global_request_id=self.context.global_id) + for rc in ('CUSTOM_FOO', 'CUSTOM_BAR') + ], any_order=True) + + def test_ensure_resource_classes_none(self): + for empty in ([], (), set(), {}): + self.client._ensure_resource_classes(self.context, empty) + self.mock_put.assert_not_called() + + def test_ensure_resource_classes_put_fail(self): + self.mock_put.return_value = fake_requests.FakeResponse(503) + rcs = ['VCPU', 'MEMORY_MB', 'CUSTOM_BAD'] + self.assertRaises( + exception.InvalidResourceClass, + self.client._ensure_resource_classes, self.context, rcs) + # Only called with the "bad" one + self.mock_put.assert_called_once_with( + '/resource_classes/CUSTOM_BAD', None, version='1.7', + global_request_id=self.context.global_id) + + +class TestAggregateAddRemoveHost(SchedulerReportClientTestCase): + """Unit tests for the methods of the report client which look up providers + by name and add/remove host aggregates to providers. These methods do not + access the SchedulerReportClient provider_tree attribute and are called + from the zun API, not the zun compute manager/resource tracker. + """ + def setUp(self): + super(TestAggregateAddRemoveHost, self).setUp() + self.mock_get = self.useFixture( + fixtures.MockPatch('zun.scheduler.client.report.' + 'SchedulerReportClient.get')).mock + self.mock_put = self.useFixture( + fixtures.MockPatch('zun.scheduler.client.report.' + 'SchedulerReportClient.put')).mock + + def test_get_provider_by_name_success(self): + get_resp = mock.Mock() + get_resp.status_code = 200 + get_resp.json.return_value = { + "resource_providers": [ + mock.sentinel.expected, + ] + } + self.mock_get.return_value = get_resp + name = 'cn1' + res = self.client.get_provider_by_name(self.context, name) + + exp_url = "/resource_providers?name=%s" % name + self.mock_get.assert_called_once_with( + exp_url, global_request_id=self.context.global_id) + self.assertEqual(mock.sentinel.expected, res) + + @mock.patch.object(report.LOG, 'warning') + def test_get_provider_by_name_multiple_results(self, mock_log): + """Test that if we find multiple resource providers with the same name, + that a ResourceProviderNotFound is raised (the reason being that >1 + resource provider with a name should never happen...) + """ + get_resp = mock.Mock() + get_resp.status_code = 200 + get_resp.json.return_value = { + "resource_providers": [ + {'uuid': uuids.cn1a}, + {'uuid': uuids.cn1b}, + ] + } + self.mock_get.return_value = get_resp + name = 'cn1' + self.assertRaises( + exception.ResourceProviderNotFound, + self.client.get_provider_by_name, self.context, name) + mock_log.assert_called_once() + + @mock.patch.object(report.LOG, 'warning') + def test_get_provider_by_name_500(self, mock_log): + get_resp = mock.Mock() + get_resp.status_code = 500 + self.mock_get.return_value = get_resp + name = 'cn1' + self.assertRaises( + exception.ResourceProviderNotFound, + self.client.get_provider_by_name, self.context, name) + mock_log.assert_called_once() + + @mock.patch.object(report.LOG, 'warning') + def test_get_provider_by_name_404(self, mock_log): + get_resp = mock.Mock() + get_resp.status_code = 404 + self.mock_get.return_value = get_resp + name = 'cn1' + self.assertRaises( + exception.ResourceProviderNotFound, + self.client.get_provider_by_name, self.context, name) + mock_log.assert_not_called() + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name') + def test_aggregate_add_host_success_no_existing( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + 'generation': 1, + } + agg_uuid = uuids.agg1 + mock_get_aggs.return_value = report.AggInfo(aggregates=set([]), + generation=42) + name = 'cn1' + self.client.aggregate_add_host(self.context, agg_uuid, host_name=name) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([agg_uuid]), use_cache=False, + generation=42) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name', new=mock.NonCallableMock()) + def test_aggregate_add_host_rp_uuid(self, mock_get_aggs, mock_set_aggs): + mock_get_aggs.return_value = report.AggInfo( + aggregates=set([]), generation=42) + self.client.aggregate_add_host( + self.context, uuids.agg1, rp_uuid=uuids.cn1) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([uuids.agg1]), use_cache=False, + generation=42) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name') + def test_aggregate_add_host_success_already_existing( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + 'generation': 1, + } + agg1_uuid = uuids.agg1 + agg2_uuid = uuids.agg2 + agg3_uuid = uuids.agg3 + mock_get_aggs.return_value = report.AggInfo( + aggregates=set([agg1_uuid]), generation=42) + name = 'cn1' + self.client.aggregate_add_host(self.context, agg1_uuid, host_name=name) + mock_set_aggs.assert_not_called() + mock_get_aggs.reset_mock() + mock_set_aggs.reset_mock() + mock_get_aggs.return_value = report.AggInfo( + aggregates=set([agg1_uuid, agg3_uuid]), generation=43) + self.client.aggregate_add_host(self.context, agg2_uuid, host_name=name) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([agg1_uuid, agg2_uuid, agg3_uuid]), + use_cache=False, generation=43) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name', + side_effect=exception.PlacementAPIConnectFailure) + def test_aggregate_add_host_no_placement(self, mock_get_by_name): + """Tests that PlacementAPIConnectFailure will be raised up from + aggregate_add_host if get_provider_by_name raises that error. + """ + name = 'cn1' + agg_uuid = uuids.agg1 + self.assertRaises( + exception.PlacementAPIConnectFailure, + self.client.aggregate_add_host, self.context, agg_uuid, + host_name=name) + self.mock_get.assert_not_called() + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name') + def test_aggregate_add_host_retry_success( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + 'generation': 1, + } + gens = (42, 43, 44) + mock_get_aggs.side_effect = ( + report.AggInfo(aggregates=set([]), generation=gen) for gen in gens) + mock_set_aggs.side_effect = ( + exception.ResourceProviderUpdateConflict( + uuid='uuid', generation=42, error='error'), + exception.ResourceProviderUpdateConflict( + uuid='uuid', generation=43, error='error'), + None, + ) + self.client.aggregate_add_host(self.context, uuids.agg1, + host_name='cn1') + mock_set_aggs.assert_has_calls([mock.call( + self.context, uuids.cn1, set([uuids.agg1]), use_cache=False, + generation=gen) for gen in gens]) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name') + def test_aggregate_add_host_retry_raises( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + 'generation': 1, + } + gens = (42, 43, 44, 45) + mock_get_aggs.side_effect = ( + report.AggInfo(aggregates=set([]), generation=gen) for gen in gens) + mock_set_aggs.side_effect = ( + exception.ResourceProviderUpdateConflict( + uuid='uuid', generation=gen, error='error') for gen in gens) + self.assertRaises( + exception.ResourceProviderUpdateConflict, + self.client.aggregate_add_host, self.context, uuids.agg1, + host_name='cn1') + mock_set_aggs.assert_has_calls([mock.call( + self.context, uuids.cn1, set([uuids.agg1]), use_cache=False, + generation=gen) for gen in gens]) + + def test_aggregate_add_host_no_host_name_or_rp_uuid(self): + self.assertRaises( + ValueError, + self.client.aggregate_add_host, self.context, uuids.agg1) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name', + side_effect=exception.PlacementAPIConnectFailure) + def test_aggregate_remove_host_no_placement(self, mock_get_by_name): + """Tests that PlacementAPIConnectFailure will be raised up from + aggregate_remove_host if get_provider_by_name raises that error. + """ + name = 'cn1' + agg_uuid = uuids.agg1 + self.assertRaises( + exception.PlacementAPIConnectFailure, + self.client.aggregate_remove_host, self.context, agg_uuid, name) + self.mock_get.assert_not_called() + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name') + def test_aggregate_remove_host_success_already_existing( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + 'generation': 1, + } + agg_uuid = uuids.agg1 + mock_get_aggs.return_value = report.AggInfo(aggregates=set([agg_uuid]), + generation=42) + name = 'cn1' + self.client.aggregate_remove_host(self.context, agg_uuid, name) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([]), use_cache=False, generation=42) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name') + def test_aggregate_remove_host_success_no_existing( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + 'generation': 1, + } + agg1_uuid = uuids.agg1 + agg2_uuid = uuids.agg2 + agg3_uuid = uuids.agg3 + mock_get_aggs.return_value = report.AggInfo(aggregates=set([]), + generation=42) + name = 'cn1' + self.client.aggregate_remove_host(self.context, agg2_uuid, name) + mock_set_aggs.assert_not_called() + mock_get_aggs.reset_mock() + mock_set_aggs.reset_mock() + mock_get_aggs.return_value = report.AggInfo( + aggregates=set([agg1_uuid, agg2_uuid, agg3_uuid]), generation=43) + self.client.aggregate_remove_host(self.context, agg2_uuid, name) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([agg1_uuid, agg3_uuid]), + use_cache=False, generation=43) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name') + def test_aggregate_remove_host_retry_success( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + 'generation': 1, + } + gens = (42, 43, 44) + mock_get_aggs.side_effect = ( + report.AggInfo(aggregates=set([uuids.agg1]), generation=gen) + for gen in gens) + mock_set_aggs.side_effect = ( + exception.ResourceProviderUpdateConflict( + uuid='uuid', generation=42, error='error'), + exception.ResourceProviderUpdateConflict( + uuid='uuid', generation=43, error='error'), + None, + ) + self.client.aggregate_remove_host(self.context, uuids.agg1, 'cn1') + mock_set_aggs.assert_has_calls([mock.call( + self.context, uuids.cn1, set([]), use_cache=False, + generation=gen) for gen in gens]) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.' + 'get_provider_by_name') + def test_aggregate_remove_host_retry_raises( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + 'generation': 1, + } + gens = (42, 43, 44, 45) + mock_get_aggs.side_effect = ( + report.AggInfo(aggregates=set([uuids.agg1]), generation=gen) + for gen in gens) + mock_set_aggs.side_effect = ( + exception.ResourceProviderUpdateConflict( + uuid='uuid', generation=gen, error='error') for gen in gens) + self.assertRaises( + exception.ResourceProviderUpdateConflict, + self.client.aggregate_remove_host, self.context, uuids.agg1, 'cn1') + mock_set_aggs.assert_has_calls([mock.call( + self.context, uuids.cn1, set([]), use_cache=False, + generation=gen) for gen in gens]) + + +class TestUsages(SchedulerReportClientTestCase): + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.get') + def test_get_usages_counts_for_quota_fail(self, mock_get): + # First call with project fails + mock_get.return_value = fake_requests.FakeResponse(500, content='err') + self.assertRaises(exception.UsagesRetrievalFailed, + self.client.get_usages_counts_for_quota, + self.context, 'fake-project') + mock_get.assert_called_once_with( + '/usages?project_id=fake-project', version='1.9', + global_request_id=self.context.global_id) + # Second call with project + user fails + mock_get.reset_mock() + fake_good_response = fake_requests.FakeResponse( + 200, content=jsonutils.dumps( + {'usages': {orc.VCPU: 2, + orc.MEMORY_MB: 512}})) + mock_get.side_effect = [fake_good_response, + fake_requests.FakeResponse(500, content='err')] + self.assertRaises(exception.UsagesRetrievalFailed, + self.client.get_usages_counts_for_quota, + self.context, 'fake-project', user_id='fake-user') + self.assertEqual(2, mock_get.call_count) + call1 = mock.call( + '/usages?project_id=fake-project', version='1.9', + global_request_id=self.context.global_id) + call2 = mock.call( + '/usages?project_id=fake-project&user_id=fake-user', version='1.9', + global_request_id=self.context.global_id) + mock_get.assert_has_calls([call1, call2]) + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.get') + def test_get_usages_counts_for_quota_retries(self, mock_get): + # Two attempts have a ConnectFailure and the third succeeds + fake_project_response = fake_requests.FakeResponse( + 200, content=jsonutils.dumps( + {'usages': {orc.VCPU: 2, + orc.MEMORY_MB: 512}})) + mock_get.side_effect = [ks_exc.ConnectFailure, + ks_exc.ConnectFailure, + fake_project_response] + counts = self.client.get_usages_counts_for_quota(self.context, + 'fake-project') + self.assertEqual(3, mock_get.call_count) + expected = {'project': {'cores': 2, 'ram': 512}} + self.assertDictEqual(expected, counts) + + # Project query succeeds, first project + user query has a + # ConnectFailure, second project + user query succeeds + mock_get.reset_mock() + fake_user_response = fake_requests.FakeResponse( + 200, content=jsonutils.dumps( + {'usages': {orc.VCPU: 1, + orc.MEMORY_MB: 256}})) + mock_get.side_effect = [fake_project_response, + ks_exc.ConnectFailure, + fake_user_response] + counts = self.client.get_usages_counts_for_quota( + self.context, 'fake-project', user_id='fake-user') + self.assertEqual(3, mock_get.call_count) + expected['user'] = {'cores': 1, 'ram': 256} + self.assertDictEqual(expected, counts) + + # Three attempts in a row have a ConnectFailure + mock_get.reset_mock() + mock_get.side_effect = [ks_exc.ConnectFailure] * 4 + self.assertRaises(ks_exc.ConnectFailure, + self.client.get_usages_counts_for_quota, + self.context, 'fake-project') + + @mock.patch('zun.scheduler.client.report.SchedulerReportClient.get') + def test_get_usages_counts_default_zero(self, mock_get): + # A project and user are not yet consuming any resources. + fake_response = fake_requests.FakeResponse( + 200, content=jsonutils.dumps({'usages': {}})) + mock_get.side_effect = [fake_response, fake_response] + + counts = self.client.get_usages_counts_for_quota( + self.context, 'fake-project', user_id='fake-user') + + self.assertEqual(2, mock_get.call_count) + expected = {'project': {'cores': 0, 'ram': 0}, + 'user': {'cores': 0, 'ram': 0}} + self.assertDictEqual(expected, counts)