2016-10-10 10:14:25 +00:00

544 lines
23 KiB
Python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import json
import logging
import random
import six
from os_faults.ansible import executor
from os_faults.api import cloud_management
from os_faults.api import error
from os_faults.api import node_collection
from os_faults.api import service
class FuelNodeCollection(node_collection.NodeCollection):
def __init__(self, cloud_management=None, power_management=None,
hosts=None):
self.cloud_management = cloud_management
self.power_management = power_management
self.hosts = hosts
def __repr__(self):
return ('FuelNodeCollection(%s)' %
[dict(ip=h['ip'], mac=h['mac']) for h in self.hosts])
def __len__(self):
return len(self.hosts)
def get_ips(self):
return [n['ip'] for n in self.hosts]
def get_macs(self):
return [n['mac'] for n in self.hosts]
def iterate_hosts(self):
for host in self.hosts:
yield host
def pick(self, count=1):
if count > len(self.hosts):
msg = 'Cannot pick {} from {} node(s)'.format(
count, len(self.hosts))
raise error.NodeCollectionError(msg)
return FuelNodeCollection(cloud_management=self.cloud_management,
power_management=self.power_management,
hosts=random.sample(self.hosts, count))
def reboot(self):
logging.info('Reboot nodes: %s', self)
task = {'command': 'reboot now'}
self.cloud_management.execute_on_cloud(self.get_ips(), task)
def oom(self):
raise NotImplementedError
def poweroff(self):
logging.info('Power off nodes: %s', self)
self.power_management.poweroff(self.get_macs())
def poweron(self):
logging.info('Power on nodes: %s', self)
self.power_management.poweron(self.get_macs())
def reset(self):
logging.info('Reset nodes: %s', self)
self.power_management.reset(self.get_macs())
def connect(self, network_name):
logging.info("Connect network '%s' on nodes: %s", network_name, self)
task = {'fuel_network_mgmt': {
'network_name': network_name,
'operation': 'up',
}}
self.cloud_management.execute_on_cloud(self.get_ips(), task)
def disconnect(self, network_name):
logging.info("Disconnect network '%s' on nodes: %s",
network_name, self)
task = {'fuel_network_mgmt': {
'network_name': network_name,
'operation': 'down',
}}
self.cloud_management.execute_on_cloud(self.get_ips(), task)
@six.add_metaclass(abc.ABCMeta)
class FuelService(service.Service):
def __init__(self, cloud_management=None, power_management=None):
self.cloud_management = cloud_management
self.power_management = power_management
def __repr__(self):
return str(type(self))
def _run_task(self, task, nodes):
ips = nodes.get_ips()
if not ips:
raise error.ServiceError('Node collection is empty')
results = self.cloud_management.execute_on_cloud(ips, task)
err = False
for result in results:
if result.status != executor.STATUS_OK:
logging.error(
'Task {} failed on node {}'.format(task, result.host))
err = True
if err:
raise error.ServiceError('Task failed on some nodes')
return results
def get_nodes(self):
nodes = self.cloud_management.get_nodes()
ips = nodes.get_ips()
results = self.cloud_management.execute_on_cloud(
ips, {'command': self.GET_NODES_CMD}, False)
success_ips = [r.host for r in results
if r.status == executor.STATUS_OK]
hosts = [h for h in nodes.hosts if h['ip'] in success_ips]
return FuelNodeCollection(cloud_management=self.cloud_management,
power_management=self.power_management,
hosts=hosts)
def restart(self, nodes=None):
if not getattr(self, 'RESTART_CMD'):
raise NotImplementedError('RESTART_CMD is undefined')
nodes = nodes if nodes is not None else self.get_nodes()
logging.info("Restart '%s' service on nodes: %s", self.SERVICE_NAME,
nodes.get_ips())
self._run_task({'command': self.RESTART_CMD}, nodes)
def kill(self, nodes=None):
nodes = nodes if nodes is not None else self.get_nodes()
logging.info("Kill '%s' service on nodes: %s", self.SERVICE_NAME,
nodes.get_ips())
self._run_task({'command': self.KILL_CMD}, nodes)
def freeze(self, nodes=None, sec=None):
nodes = nodes if nodes is not None else self.get_nodes()
cmd = self.FREEZE_SEC_CMD.format(sec) if sec else self.FREEZE_CMD
logging.info("Freeze '%s' service %son nodes: %s", self.SERVICE_NAME,
('for %s sec ' % sec) if sec else '', nodes.get_ips())
self._run_task({'command': cmd}, nodes)
def unfreeze(self, nodes=None):
nodes = nodes if nodes is not None else self.get_nodes()
logging.info("Unfreeze '%s' service on nodes: %s", self.SERVICE_NAME,
nodes.get_ips())
self._run_task({'command': self.UNFREEZE_CMD}, nodes)
def plug(self, nodes=None):
nodes = nodes if nodes is not None else self.get_nodes()
logging.info("Open port %d for '%s' service on nodes: %s", self.PORT,
self.SERVICE_NAME, nodes.get_ips())
self._run_task({'command': self.PLUG_CMD.format(self.PORT)}, nodes)
def unplug(self, nodes=None):
nodes = nodes if nodes is not None else self.get_nodes()
logging.info("Close port %d for '%s' service on nodes: %s",
self.PORT, self.SERVICE_NAME, nodes.get_ips())
self._run_task({'command': self.UNPLUG_CMD.format(self.PORT)}, nodes)
class KeystoneService(FuelService):
SERVICE_NAME = 'keystone'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[k]eystone-main\'"'
KILL_CMD = ('bash -c "ps ax | grep [k]eystone'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = 'service apache2 restart'
FREEZE_CMD = ('bash -c "ps ax | grep [k]eystone'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [k]eystone | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [k]eystone'
' | awk {\'print $1\'} | xargs kill -18"')
class MemcachedService(FuelService):
SERVICE_NAME = 'memcached'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[m]emcached\'"'
KILL_CMD = ('bash -c "ps ax | grep [m]emcached'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = 'service memcached restart'
FREEZE_CMD = ('bash -c "ps ax | grep [m]emcached'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [m]emcached | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [m]emcached'
' | awk {\'print $1\'} | xargs kill -18"')
class MySQLService(FuelService):
SERVICE_NAME = 'mysql'
GET_NODES_CMD = 'bash -c "netstat -tap | grep \'.*LISTEN.*mysqld\'"'
KILL_CMD = ('bash -c "ps ax | grep [m]ysqld'
' | awk {\'print $1\'} | xargs kill -9"')
FREEZE_CMD = ('bash -c "ps ax | grep [m]ysqld'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [m]ysqld | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [m]ysqld'
' | awk {\'print $1\'} | xargs kill -18"')
PORT = 3307
PLUG_CMD = ('bash -c "rule=`iptables -L INPUT -n --line-numbers | '
'grep \"MySQL_temporary_DROP\" | cut -d \' \' -f1`; '
'for arg in $rule; do iptables -D INPUT -p tcp --dport {0} '
'-j DROP -m comment --comment "MySQL_temporary_DROP"; done"')
UNPLUG_CMD = ('bash -c "iptables -I INPUT 1 -p tcp --dport {0} -j DROP '
'-m comment --comment \"MySQL_temporary_DROP\""')
class RabbitMQService(FuelService):
SERVICE_NAME = 'rabbitmq'
GET_NODES_CMD = 'bash -c "rabbitmqctl status | grep \'pid,\'"'
KILL_CMD = ('bash -c "ps ax | grep \'[r]abbit tcp_listeners\''
' | awk {\'print $1\'} | xargs kill -9"')
FREEZE_CMD = ('bash -c "ps ax | grep \'[r]abbit tcp_listeners\''
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep \\047[r]abbit tcp_listeners\\047 | '
'awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep \'[r]abbit tcp_listeners\''
' | awk {\'print $1\'} | xargs kill -18"')
class NovaAPIService(FuelService):
SERVICE_NAME = 'nova-api'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[n]ova-api\'"'
KILL_CMD = ('bash -c "ps ax | grep [n]ova-api'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = 'service nova-api restart'
FREEZE_CMD = ('bash -c "ps ax | grep [n]ova-api'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [n]ova-api | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [n]ova-api'
' | awk {\'print $1\'} | xargs kill -18"')
class GlanceAPIService(FuelService):
SERVICE_NAME = 'glance-api'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[g]lance-api\'"'
KILL_CMD = ('bash -c "ps ax | grep [g]lance-api'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = 'service glance-api restart'
FREEZE_CMD = ('bash -c "ps ax | grep [g]lance-api'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [g]lance-api | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [g]lance-api'
' | awk {\'print $1\'} | xargs kill -18"')
class NovaComputeService(FuelService):
SERVICE_NAME = 'nova-compute'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[n]ova-compute\'"'
KILL_CMD = ('bash -c "ps ax | grep [n]ova-compute'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = 'service nova-compute restart'
FREEZE_CMD = ('bash -c "ps ax | grep [n]ova-compute'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [n]ova-compute | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [n]ova-compute'
' | awk {\'print $1\'} | xargs kill -18"')
class NovaSchedulerService(FuelService):
SERVICE_NAME = 'nova-scheduler'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[n]ova-scheduler\'"'
KILL_CMD = ('bash -c "ps ax | grep [n]ova-scheduler'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = 'service nova-scheduler restart'
FREEZE_CMD = ('bash -c "ps ax | grep [n]ova-scheduler'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [n]ova-scheduler | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [n]ova-scheduler'
' | awk {\'print $1\'} | xargs kill -18"')
class NeutronOpenvswitchAgentService(FuelService):
SERVICE_NAME = 'neutron-openvswitch-agent'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[n]eutron-openvswitch-agent\'"'
KILL_CMD = ('bash -c "ps ax | grep [n]eutron-openvswitch-agent'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = ('bash -c "if pcs resource show neutron-openvswitch-agent; '
'then pcs resource restart neutron-openvswitch-agent; '
'else service neutron-openvswitch-agent restart; fi"')
FREEZE_CMD = ('bash -c "ps ax | grep [n]eutron-openvswitch-agent'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [n]eutron-openvswitch-agent'
' | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [n]eutron-openvswitch-agent'
' | awk {\'print $1\'} | xargs kill -18"')
class NeutronL3AgentService(FuelService):
SERVICE_NAME = 'neutron-l3-agent'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[n]eutron-l3-agent\'"'
KILL_CMD = ('bash -c "ps ax | grep [n]eutron-l3-agent'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = ('bash -c "if pcs resource show neutron-l3-agent; '
'then pcs resource restart neutron-l3-agent; '
'else service neutron-l3-agent restart; fi"')
FREEZE_CMD = ('bash -c "ps ax | grep [n]eutron-l3-agent'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [n]eutron-l3-agent | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [n]eutron-l3-agent'
' | awk {\'print $1\'} | xargs kill -18"')
class HeatAPIService(FuelService):
SERVICE_NAME = 'heat-api'
GET_NODES_CMD = 'bash -c "ps ax | grep \'[h]eat-api \'"'
KILL_CMD = ('bash -c "ps ax | grep \'[h]eat-api \''
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = 'service heat-api restart'
FREEZE_CMD = ('bash -c "ps ax | grep \'[h]eat-api \''
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep \\047[h]eat-api \\047'
' | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep \'[h]eat-api \''
' | awk {\'print $1\'} | xargs kill -18"')
class HeatEngineService(FuelService):
SERVICE_NAME = 'heat-engine'
GET_NODES_CMD = 'bash -c "ps ax | grep [h]eat-engine"'
KILL_CMD = ('bash -c "ps ax | grep [h]eat-engine'
' | awk {\'print $1\'} | xargs kill -9"')
RESTART_CMD = 'pcs resource restart p_heat-engine'
FREEZE_CMD = ('bash -c "ps ax | grep [h]eat-engine'
' | awk {\'print $1\'} | xargs kill -19"')
FREEZE_SEC_CMD = ('bash -c "tf=$(mktemp /tmp/script.XXXXXX);'
'echo -n \'#!\' > $tf; '
'echo -en \'/bin/bash\\npids=`ps ax | '
'grep [h]eat-engine | awk {{\\047print $1\\047}}`; '
'echo $pids | xargs kill -19; sleep {0}; '
'echo $pids | xargs kill -18; rm \' >> $tf; '
'echo -n $tf >> $tf; '
'chmod 770 $tf; nohup $tf &"')
UNFREEZE_CMD = ('bash -c "ps ax | grep [h]eat-engine'
' | awk {\'print $1\'} | xargs kill -18"')
SERVICE_NAME_TO_CLASS = {
'keystone': KeystoneService,
'memcached': MemcachedService,
'mysql': MySQLService,
'rabbitmq': RabbitMQService,
'nova-api': NovaAPIService,
'glance-api': GlanceAPIService,
'nova-compute': NovaComputeService,
'nova-scheduler': NovaSchedulerService,
'neutron-openvswitch-agent': NeutronOpenvswitchAgentService,
'neutron-l3-agent': NeutronL3AgentService,
'heat-api': HeatAPIService,
'heat-engine': HeatEngineService,
}
class FuelManagement(cloud_management.CloudManagement):
NAME = 'fuel'
def __init__(self, cloud_management_params):
super(FuelManagement, self).__init__()
self.master_node_address = cloud_management_params['address']
self.username = cloud_management_params['username']
self.private_key_file = cloud_management_params.get('private_key_file')
self.master_node_executor = executor.AnsibleRunner(
remote_user=self.username, private_key_file=self.private_key_file)
self.cloud_executor = executor.AnsibleRunner(
remote_user=self.username, private_key_file=self.private_key_file,
jump_host=self.master_node_address)
self.cached_cloud_hosts = list()
self.fqdn_to_hosts = dict()
def verify(self):
"""Verify connection to the cloud."""
hosts = self._get_cloud_hosts()
logging.debug('Cloud nodes: %s', hosts)
task = {'command': 'hostname'}
host_addrs = [n['ip'] for n in hosts]
task_result = self.execute_on_cloud(host_addrs, task)
logging.debug('Hostnames of cloud nodes: %s',
[r.payload['stdout'] for r in task_result])
logging.info('Connected to cloud successfully!')
def _get_cloud_hosts(self):
if not self.cached_cloud_hosts:
task = {'command': 'fuel node --json'}
result = self.execute_on_master_node(task)
for r in json.loads(result[0].payload['stdout']):
host = {'ip': r['ip'], 'mac': r['mac'], 'fqdn': r['fqdn']}
self.cached_cloud_hosts.append(host)
self.fqdn_to_hosts[host['fqdn']] = host
return self.cached_cloud_hosts
def execute_on_master_node(self, task):
"""Execute task on Fuel master node.
:param task: Ansible task
:return: Ansible execution result (list of records)
"""
return self.master_node_executor.execute(
[self.master_node_address], task)
def execute_on_cloud(self, hosts, task, raise_on_error=True):
"""Execute task on specified hosts within the cloud.
:param hosts: List of host FQDNs
:param task: Ansible task
:return: Ansible execution result (list of records)
"""
if raise_on_error:
return self.cloud_executor.execute(hosts, task)
else:
return self.cloud_executor.execute(hosts, task, [])
def get_nodes(self, fqdns=None):
"""Get nodes in the cloud
This function returns NodesCollection representing all nodes in the
cloud or only those that were specified by FQDNs.
:param fqdns: list of FQDNs or None to retrieve all nodes
:return: NodesCollection
"""
hosts = self._get_cloud_hosts()
if fqdns:
logging.debug('Trying to find nodes with FQDNs: %s', fqdns)
hosts = list()
for fqdn in fqdns:
if fqdn in self.fqdn_to_hosts:
hosts.append(self.fqdn_to_hosts[fqdn])
else:
raise error.NodeCollectionError(
'Node with FQDN \'%s\' not found!' % fqdn)
logging.debug('The following nodes were found: %s', hosts)
return FuelNodeCollection(cloud_management=self,
power_management=self.power_management,
hosts=hosts)
def get_service(self, name):
"""Get service with specified name
:param name: name of the serives
:return: Service
"""
if name in SERVICE_NAME_TO_CLASS:
klazz = SERVICE_NAME_TO_CLASS[name]
return klazz(cloud_management=self,
power_management=self.power_management)