
This patch improves the numa objects, mem_available and mem_total are added. This patch adds a parameter which named "cpu_policy" for Zun API to tag the mode of container. This patch adds methods of node resource management for numa. This patch also introduces a filter for cpuset. Co-Authored-By: Hongbin Lu <hongbin034@gmail.com> Change-Id: Ic65b86ceb6f7d37c2b4ef545425e4edd20328282 Implements: blueprint cpuset-container
281 lines
9.6 KiB
Python
281 lines
9.6 KiB
Python
# Copyright (c) 2017 OpenStack Foundation
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""
|
|
Claim objects for use with resource tracking.
|
|
"""
|
|
|
|
from oslo_log import log as logging
|
|
import random
|
|
|
|
from zun.common import exception
|
|
from zun.common.i18n import _
|
|
from zun import objects
|
|
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class NopClaim(object):
|
|
"""For use with compute drivers that do not support resource tracking."""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
self.claimed_numa_topology = None
|
|
|
|
@property
|
|
def memory(self):
|
|
return 0
|
|
|
|
@property
|
|
def cpu(self):
|
|
return 0
|
|
|
|
@property
|
|
def disk(self):
|
|
return 0
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
if exc_type is not None:
|
|
self.abort()
|
|
|
|
def abort(self):
|
|
pass
|
|
|
|
def __str__(self):
|
|
return "[Claim: %s memory, %.2f VCPU]" % (self.memory,
|
|
self.cpu)
|
|
|
|
|
|
class Claim(NopClaim):
|
|
"""A declaration that a compute host operation will require free resources.
|
|
|
|
Claims serve as marker objects that resources are being held until the
|
|
update_available_resource audit process runs to do a full reconciliation
|
|
of resource usage.
|
|
|
|
This information will be used to help keep the local compute hosts's
|
|
ComputeNode model in sync to aid the scheduler in making efficient / more
|
|
correct decisions with respect to host selection.
|
|
"""
|
|
|
|
def __init__(self, context, container, tracker, resources, pci_requests,
|
|
limits=None):
|
|
super(Claim, self).__init__()
|
|
# Stash a copy of the container at the current point of time
|
|
self.container = container.obj_clone()
|
|
self._numa_topology_loaded = False
|
|
self.tracker = tracker
|
|
self.context = context
|
|
self._pci_requests = pci_requests
|
|
|
|
# Check claim at constructor to avoid mess code
|
|
# Raise exception ResourcesUnavailable if claim failed
|
|
self._claim_test(resources, limits)
|
|
if container.cpu_policy == 'dedicated':
|
|
container.cpuset = objects.container.Cpuset()
|
|
self.claim_cpuset_cpu_for_container(container, limits)
|
|
self.claim_cpuset_mem_for_container(container, limits)
|
|
|
|
@property
|
|
def memory(self):
|
|
return int(self.container.memory or "0")
|
|
|
|
@property
|
|
def cpu(self):
|
|
return self.container.cpu or 0
|
|
|
|
@property
|
|
def disk(self):
|
|
return self.container.disk or 0
|
|
|
|
def abort(self):
|
|
"""Requiring claimed resources has failed or been aborted."""
|
|
LOG.debug("Aborting claim: %s", self)
|
|
self.tracker.abort_container_claim(self.context, self.container)
|
|
|
|
def claim_cpuset_cpu_for_container(self, container, limits):
|
|
avaliable_cpu = list(set(limits['cpuset']['cpuset_cpu']) -
|
|
set(limits['cpuset']['cpuset_cpu_pinned']))
|
|
cpuset_cpu_usage = random.sample(avaliable_cpu, int(self.cpu))
|
|
container.cpuset.cpuset_cpus = set(cpuset_cpu_usage)
|
|
|
|
def claim_cpuset_mem_for_container(self, container, limits):
|
|
container.cpuset.cpuset_mems = set(limits['cpuset']['node'])
|
|
|
|
def _claim_test(self, resources, limits=None):
|
|
"""Test if this claim can be satisfied.
|
|
|
|
With given available resources and optional oversubscription limits
|
|
|
|
This should be called before the compute node actually consumes the
|
|
resources required to execute the claim.
|
|
|
|
:param resources: available local compute node resources
|
|
:returns: Return true if resources are available to claim.
|
|
"""
|
|
if not limits:
|
|
limits = {}
|
|
|
|
# If an individual limit is None, the resource will be considered
|
|
# unlimited:
|
|
memory_limit = limits.get('memory')
|
|
cpu_limit = limits.get('cpu')
|
|
disk_limit = limits.get('disk')
|
|
cpuset_limit = limits.get('cpuset', None)
|
|
|
|
LOG.info('Attempting claim: memory %(memory)s, '
|
|
'cpu %(cpu).02f CPU, disk %(disk)s',
|
|
{'memory': self.memory, 'cpu': self.cpu, 'disk': self.disk})
|
|
|
|
reasons = [self._test_memory(resources, memory_limit),
|
|
self._test_cpu(resources, cpu_limit),
|
|
self._test_disk(resources, disk_limit),
|
|
self._test_pci(),
|
|
self._test_cpuset_cpu(resources, cpuset_limit),
|
|
self._test_cpuset_mem(resources, cpuset_limit)]
|
|
reasons = [r for r in reasons if r is not None]
|
|
if len(reasons) > 0:
|
|
raise exception.ResourcesUnavailable(reason="; ".join(reasons))
|
|
|
|
LOG.info('Claim successful')
|
|
|
|
def _test_pci(self):
|
|
pci_requests = self._pci_requests
|
|
if pci_requests and pci_requests.requests:
|
|
stats = self.tracker.pci_tracker.stats
|
|
if not stats.support_requests(pci_requests.requests):
|
|
return _('Claim pci failed')
|
|
|
|
def _test_memory(self, resources, limit):
|
|
type_ = _("memory")
|
|
unit = "MB"
|
|
total = resources.mem_total
|
|
used = resources.mem_used
|
|
requested = self.memory
|
|
|
|
return self._test(type_, unit, total, used, requested, limit)
|
|
|
|
def _test_cpu(self, resources, limit):
|
|
type_ = _("vcpu")
|
|
unit = "VCPU"
|
|
total = resources.cpus
|
|
used = resources.cpu_used
|
|
requested = self.cpu
|
|
|
|
return self._test(type_, unit, total, used, requested, limit)
|
|
|
|
def _test_cpuset_cpu(self, resources, limit):
|
|
if limit:
|
|
type_ = _("cpuset_cpu")
|
|
unit = "core"
|
|
total = len(limit['cpuset_cpu'])
|
|
used = len(limit['cpuset_cpu_pinned'])
|
|
requested = self.cpu
|
|
|
|
return self._test(type_, unit, total, used, requested,
|
|
len(limit['cpuset_cpu']))
|
|
else:
|
|
return
|
|
|
|
def _test_cpuset_mem(self, resources, limit):
|
|
if limit:
|
|
type_ = _("cpuset_mem")
|
|
unit = "M"
|
|
total = resources.numa_topology.nodes[limit['node']].mem_total
|
|
used = 0
|
|
requested = self.memory
|
|
|
|
return self._test(type_, unit, total, used, requested,
|
|
limit['cpuset_mem'])
|
|
else:
|
|
return
|
|
|
|
def _test_disk(self, resources, limit):
|
|
type_ = _("disk")
|
|
unit = "GB"
|
|
total = resources.disk_total
|
|
used = resources.disk_used
|
|
requested = self.disk
|
|
return self._test(type_, unit, total, used, requested, limit)
|
|
|
|
def _test(self, type_, unit, total, used, requested, limit):
|
|
"""Test if the type resource needed for a claim can be allocated."""
|
|
|
|
LOG.info('Total %(type)s: %(total)d %(unit)s, used: %(used).02f '
|
|
'%(unit)s',
|
|
{'type': type_, 'total': total, 'unit': unit, 'used': used})
|
|
|
|
if limit is None:
|
|
# treat resource as unlimited:
|
|
LOG.info('%(type)s limit not specified, defaulting to '
|
|
'unlimited', {'type': type_})
|
|
return
|
|
|
|
free = limit - used
|
|
|
|
# Oversubscribed resource policy info:
|
|
LOG.info('%(type)s limit: %(limit).02f %(unit)s, '
|
|
'free: %(free).02f %(unit)s',
|
|
{'type': type_, 'limit': limit, 'free': free, 'unit': unit})
|
|
|
|
if requested > free:
|
|
return (_('Free %(type)s %(free).02f '
|
|
'%(unit)s < requested %(requested)s %(unit)s') %
|
|
{'type': type_, 'free': free, 'unit': unit,
|
|
'requested': requested})
|
|
|
|
|
|
class UpdateClaim(Claim):
|
|
"""A declaration that a compute host operation will require free resources.
|
|
|
|
Claims serve as marker objects that resources are being held until the
|
|
update_available_resource audit process runs to do a full reconciliation
|
|
of resource usage.
|
|
|
|
This information will be used to help keep the local compute hosts's
|
|
ComputeNode model in sync to aid the scheduler in making efficient / more
|
|
correct decisions with respect to host selection.
|
|
"""
|
|
|
|
def __init__(self, context, new_container, old_container, tracker,
|
|
resources, limits=None):
|
|
# Stash a copy of the container at the current point of time
|
|
self.new_container = new_container.obj_clone()
|
|
self.old_container = old_container.obj_clone()
|
|
super(UpdateClaim, self).__init__(
|
|
context, new_container, tracker, resources,
|
|
objects.ContainerPCIRequests(requests=[]), limits)
|
|
|
|
@property
|
|
def memory(self):
|
|
new_mem_str = self.new_container.memory or "0"
|
|
old_mem_str = self.old_container.memory or "0"
|
|
return int(new_mem_str) - int(old_mem_str)
|
|
|
|
@property
|
|
def cpu(self):
|
|
new_cpu = self.new_container.cpu or 0
|
|
old_cpu = self.old_container.cpu or 0
|
|
return new_cpu - old_cpu
|
|
|
|
def abort(self):
|
|
"""Requiring claimed resources has failed or been aborted."""
|
|
LOG.debug("Aborting claim: %s", self)
|
|
self.tracker.abort_container_update_claim(
|
|
self.context, self.new_container, self.old_container)
|