# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import abc import json import logging import random import six from os_faults.ansible import executor from os_faults.api import cloud_management from os_faults.api import error from os_faults.api import node_collection from os_faults.api import service class FuelNodeCollection(node_collection.NodeCollection): def __init__(self, cloud_management=None, power_management=None, hosts=None): self.cloud_management = cloud_management self.power_management = power_management self.hosts = hosts def __repr__(self): return ('FuelNodeCollection(%s)' % [dict(ip=h['ip'], mac=h['mac']) for h in self.hosts]) def __len__(self): return len(self.hosts) def get_ips(self): return [n['ip'] for n in self.hosts] def get_macs(self): return [n['mac'] for n in self.hosts] def iterate_hosts(self): for host in self.hosts: yield host def filter(self, role): hosts = [h for h in self.hosts if role in h['roles']] return FuelNodeCollection(cloud_management=self.cloud_management, power_management=self.power_management, hosts=hosts) def pick(self, count=1): if count > len(self.hosts): msg = 'Cannot pick {} from {} node(s)'.format( count, len(self.hosts)) raise error.NodeCollectionError(msg) return FuelNodeCollection(cloud_management=self.cloud_management, power_management=self.power_management, hosts=random.sample(self.hosts, count)) def reboot(self): raise NotImplementedError def oom(self): raise NotImplementedError def poweroff(self): self.power_management.poweroff(self.get_macs()) def poweron(self): self.power_management.poweron(self.get_macs()) def reset(self): logging.info('Reset nodes: %s', self) self.power_management.reset(self.get_macs()) def enable_network(self, network_name): logging.info('Enable network: %s on nodes: %s', network_name, self) task = {'fuel_network_mgmt': { 'network_name': network_name, 'operation': 'up', }} self.cloud_management.execute_on_cloud(self.get_ips(), task) def disable_network(self, network_name): logging.info('Disable network: %s on nodes: %s', network_name, self) task = {'fuel_network_mgmt': { 'network_name': network_name, 'operation': 'down', }} self.cloud_management.execute_on_cloud(self.get_ips(), task) @six.add_metaclass(abc.ABCMeta) class FuelService(service.Service): def __init__(self, cloud_management=None, power_management=None): self.cloud_management = cloud_management self.power_management = power_management def __repr__(self): return str(type(self)) def _run_task(self, task, nodes=None): nodes = nodes or self.get_nodes() ips = nodes.get_ips() if not ips: raise error.ServiceError('Node collection is empty') results = self.cloud_management.execute_on_cloud(ips, task) err = False for result in results: if result.status != executor.STATUS_OK: logging.error( 'Task {} failed on node {}'.format(task, result.host)) err = True if err: raise error.ServiceError('Task failed on some nodes') return results def get_nodes(self): nodes = self.cloud_management.get_nodes() ips = nodes.get_ips() results = self.cloud_management.execute_on_cloud( ips, {'command': self.GET_NODES_CMD}, False) success_ips = [r.host for r in results if r.status == executor.STATUS_OK] hosts = [h for h in nodes.hosts if h['ip'] in success_ips] return FuelNodeCollection(cloud_management=self.cloud_management, power_management=self.power_management, hosts=hosts) def restart(self, nodes=None): if not getattr(self, 'RESTART_CMD'): raise NotImplementedError('RESTART_CMD is undefined') nodes = nodes or self.get_nodes() task_result = self._run_task({'command': self.RESTART_CMD}, nodes) logging.info('Restart %s, result: %s', str(self.__class__), task_result) def kill(self, nodes=None): nodes = nodes or self.get_nodes() task_result = self._run_task({'command': self.KILL_CMD}, nodes) logging.info('SIGKILL %s, result: %s', str(self.__class__), task_result) def freeze(self, nodes=None, sec=None): nodes = nodes or self.get_nodes() cmd = self.FREEZE_SEC_CMD.format(sec) if sec else self.FREEZE_CMD task_result = self._run_task({'command': cmd}, nodes) logging.info('FREEZE({0}) {1}, result: {2}'.format(sec or '', self.__class__, task_result)) def unfreeze(self, nodes=None): nodes = nodes or self.get_nodes() task_result = self._run_task({'command': self.UNFREEZE_CMD}, nodes) logging.info('UNFREEZE %s, result: %s', str(self.__class__), task_result) class KeystoneService(FuelService): GET_NODES_CMD = 'bash -c "ps ax | grep \'[k]eystone-main\'"' KILL_CMD = ('bash -c "ps ax | grep [k]eystone' ' | awk {\'print $1\'} | xargs kill -9"') RESTART_CMD = 'service apache2 restart' FREEZE_CMD = ('bash -c "ps ax | grep [k]eystone' ' | awk {\'print $1\'} | xargs kill -19"') FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);' 'echo -n \'#!\' > $tf; ' 'echo -en \'/bin/bash\\npids=`ps ax | ' 'grep [k]eystone | awk {{\\047print $1\\047}}`; ' 'echo $pids | xargs kill -19; sleep {0}; ' 'echo $pids | xargs kill -18; rm \' >> $tf; ' 'echo -n $tf >> $tf; ' 'chmod 770 $tf; nohup $tf &"') UNFREEZE_CMD = ('bash -c "ps ax | grep [k]eystone' ' | awk {\'print $1\'} | xargs kill -18"') class MySQLService(FuelService): GET_NODES_CMD = 'bash -c "netstat -tap | grep \'.*LISTEN.*mysqld\'"' KILL_CMD = ('bash -c "ps ax | grep [m]ysqld' ' | awk {\'print $1\'} | xargs kill -9"') FREEZE_CMD = ('bash -c "ps ax | grep [m]ysqld' ' | awk {\'print $1\'} | xargs kill -19"') FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);' 'echo -n \'#!\' > $tf; ' 'echo -en \'/bin/bash\\npids=`ps ax | ' 'grep [m]ysqld | awk {{\\047print $1\\047}}`; ' 'echo $pids | xargs kill -19; sleep {0}; ' 'echo $pids | xargs kill -18; rm \' >> $tf; ' 'echo -n $tf >> $tf; ' 'chmod 770 $tf; nohup $tf &"') UNFREEZE_CMD = ('bash -c "ps ax | grep [m]ysqld' ' | awk {\'print $1\'} | xargs kill -18"') class RabbitMQService(FuelService): GET_NODES_CMD = 'bash -c "rabbitmqctl status | grep \'pid,\'"' KILL_CMD = ('bash -c "ps ax | grep [r]abbitmq-server' ' | awk {\'print $1\'} | xargs kill -9"') FREEZE_CMD = ('bash -c "ps ax | grep [r]abbitmq-server' ' | awk {\'print $1\'} | xargs kill -19"') FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);' 'echo -n \'#!\' > $tf; ' 'echo -en \'/bin/bash\\npids=`ps ax | ' 'grep [r]abbitmq-server | awk {{\\047print $1\\047}}`; ' 'echo $pids | xargs kill -19; sleep {0}; ' 'echo $pids | xargs kill -18; rm \' >> $tf; ' 'echo -n $tf >> $tf; ' 'chmod 770 $tf; nohup $tf &"') UNFREEZE_CMD = ('bash -c "ps ax | grep [r]abbitmq-server' ' | awk {\'print $1\'} | xargs kill -18"') class NovaAPIService(FuelService): GET_NODES_CMD = 'bash -c "ps ax | grep \'[n]ova-api\'"' KILL_CMD = ('bash -c "ps ax | grep [n]ova-api' ' | awk {\'print $1\'} | xargs kill -9"') FREEZE_CMD = ('bash -c "ps ax | grep [n]ova-api' ' | awk {\'print $1\'} | xargs kill -19"') FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);' 'echo -n \'#!\' > $tf; ' 'echo -en \'/bin/bash\\npids=`ps ax | ' 'grep [n]ova-api | awk {{\\047print $1\\047}}`; ' 'echo $pids | xargs kill -19; sleep {0}; ' 'echo $pids | xargs kill -18; rm \' >> $tf; ' 'echo -n $tf >> $tf; ' 'chmod 770 $tf; nohup $tf &"') UNFREEZE_CMD = ('bash -c "ps ax | grep [n]ova-api' ' | awk {\'print $1\'} | xargs kill -18"') class GlanceAPIService(FuelService): GET_NODES_CMD = 'bash -c "ps ax | grep \'[g]lance-api\'"' KILL_CMD = ('bash -c "ps ax | grep [g]lance-api' ' | awk {\'print $1\'} | xargs kill -9"') FREEZE_CMD = ('bash -c "ps ax | grep [g]lance-api' ' | awk {\'print $1\'} | xargs kill -19"') FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);' 'echo -n \'#!\' > $tf; ' 'echo -en \'/bin/bash\\npids=`ps ax | ' 'grep [g]lance-api | awk {{\\047print $1\\047}}`; ' 'echo $pids | xargs kill -19; sleep {0}; ' 'echo $pids | xargs kill -18; rm \' >> $tf; ' 'echo -n $tf >> $tf; ' 'chmod 770 $tf; nohup $tf &"') UNFREEZE_CMD = ('bash -c "ps ax | grep [g]lance-api' ' | awk {\'print $1\'} | xargs kill -18"') SERVICE_NAME_TO_CLASS = { 'keystone': KeystoneService, 'mysql': MySQLService, 'rabbitmq': RabbitMQService, 'nova-api': NovaAPIService, 'glance-api': GlanceAPIService, } class FuelManagement(cloud_management.CloudManagement): def __init__(self, cloud_management_params): super(FuelManagement, self).__init__() self.master_node_address = cloud_management_params['address'] self.username = cloud_management_params['username'] self.master_node_executor = executor.AnsibleRunner( remote_user=self.username) self.cloud_executor = executor.AnsibleRunner( remote_user=self.username, ssh_common_args='-o ProxyCommand="ssh -W %%h:%%p %s@%s"' % (self.username, self.master_node_address)) self.cached_cloud_hosts = None self.fqdn_to_hosts = dict() def verify(self): """Verify connection to the cloud.""" hosts = self._get_cloud_hosts() logging.debug('Cloud hosts: %s', hosts) task = {'command': 'hostname'} host_addrs = [n['ip'] for n in hosts] logging.debug('Cloud nodes hostnames: %s', self.execute_on_cloud(host_addrs, task)) logging.info('Connected to cloud successfully') def _get_cloud_hosts(self): if not self.cached_cloud_hosts: task = {'command': 'fuel2 node list -f json'} r = self.execute_on_master_node(task) self.cached_cloud_hosts = json.loads(r[0].payload['stdout']) return self.cached_cloud_hosts def execute_on_master_node(self, task): """Execute task on Fuel master node. :param task: Ansible task :return: Ansible execution result (list of records) """ return self.master_node_executor.execute( [self.master_node_address], task) def execute_on_cloud(self, hosts, task, raise_on_error=True): """Execute task on specified hosts within the cloud. :param hosts: List of host FQDNs :param task: Ansible task :return: Ansible execution result (list of records) """ if raise_on_error: return self.cloud_executor.execute(hosts, task) else: return self.cloud_executor.execute(hosts, task, []) def _retrieve_hosts_fqdn(self): for host in self._get_cloud_hosts(): task = {'command': 'fuel2 node show %s -f json' % host['id']} r = self.execute_on_master_node(task) host_ext = json.loads(r[0].payload['stdout']) self.fqdn_to_hosts[host_ext['fqdn']] = host_ext def get_nodes(self, fqdns=None): """Get nodes in the cloud This function returns NodesCollection representing all nodes in the cloud or only those that has specified FQDNs. :param fqdns: list of FQDNs or None to retrieve all nodes :return: NodesCollection """ if not fqdns: # return all hosts hosts = self._get_cloud_hosts() return FuelNodeCollection(cloud_management=self, power_management=self.power_management, hosts=hosts) # return only specified if not self.fqdn_to_hosts: self._retrieve_hosts_fqdn() hosts = [self.fqdn_to_hosts[k] for k in fqdns] return FuelNodeCollection(cloud_management=self, power_management=self.power_management, hosts=hosts) def get_service(self, name): """Get service with specified name :param name: name of the serives :return: Service """ if name in SERVICE_NAME_TO_CLASS: klazz = SERVICE_NAME_TO_CLASS[name] return klazz(cloud_management=self, power_management=self.power_management)