ceph: mgr-restful-plugin restarts on controller unlock

When standby controller is unlocked its mgr-restful-plugin
service starts and generates node specific self-signed
certificates to be used by the restful plugin. This operation
triggers a restart of the "active" mgr restful plugin
which in turn causes Ceph REST API requests to fail.

This failure is handled on the active controller by
restarting the service. This happens while stx-openstack
is reapplied and is the reason why mariadb pod fails to start.

Change ceph-mgr and restful plugin config and startup
procedure so a secondary ceph-mgr service doesn't disrupt
the active one.

Closes-Bug: 1837581
Change-Id: Id8e5e56d48669498202ed319a9aad68365b51f23
Signed-off-by: Daniel Badea <daniel.badea@windriver.com>
This commit is contained in:
Daniel Badea 2019-07-31 12:47:33 +00:00 committed by dbadea
parent 56a91fec13
commit d409d78cc1

View File

@ -25,10 +25,12 @@ import json
import logging import logging
import multiprocessing import multiprocessing
import os import os
import shutil
import signal import signal
import socket import socket
import subprocess import subprocess
import sys import sys
import tempfile
import time import time
import daemon import daemon
@ -73,6 +75,7 @@ class Config(object):
self.ceph_mgr_service = '/usr/bin/ceph-mgr' self.ceph_mgr_service = '/usr/bin/ceph-mgr'
self.ceph_mgr_cluster = 'ceph' self.ceph_mgr_cluster = 'ceph'
self.ceph_mgr_rundir = '/var/run/ceph/mgr' self.ceph_mgr_rundir = '/var/run/ceph/mgr'
self.ceph_mgr_confdir = '/var/lib/ceph/mgr'
self.ceph_mgr_identity = socket.gethostname() self.ceph_mgr_identity = socket.gethostname()
self.service_name = 'mgr-restful-plugin' self.service_name = 'mgr-restful-plugin'
@ -555,10 +558,10 @@ class ServiceMonitor(object):
# steps to configure/start ceph-mgr and restful plugin # steps to configure/start ceph-mgr and restful plugin
self.ceph_fsid_get() self.ceph_fsid_get()
self.ceph_mgr_auth_create() self.ceph_mgr_auth_create()
self.ceph_mgr_start()
self.restful_plugin_set_server_port() self.restful_plugin_set_server_port()
self.restful_plugin_enable()
self.restful_plugin_create_certificate() self.restful_plugin_create_certificate()
self.ceph_mgr_start()
self.restful_plugin_enable()
self.restful_plugin_create_admin_key() self.restful_plugin_create_admin_key()
self.restful_plugin_get_url() self.restful_plugin_get_url()
self.restful_plugin_get_certificate() self.restful_plugin_get_certificate()
@ -651,7 +654,7 @@ class ServiceMonitor(object):
def ceph_mgr_has_auth(self): def ceph_mgr_has_auth(self):
path = '{}/ceph-{}'.format( path = '{}/ceph-{}'.format(
CONFIG.ceph_mgr_rundir, CONFIG.ceph_mgr_identity) CONFIG.ceph_mgr_confdir, CONFIG.ceph_mgr_identity)
try: try:
os.makedirs(path) os.makedirs(path)
except OSError as err: except OSError as err:
@ -718,7 +721,7 @@ class ServiceMonitor(object):
with open(os.devnull, 'wb') as null: with open(os.devnull, 'wb') as null:
out = self.run_with_timeout( out = self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'get', ['/usr/bin/ceph', 'config-key', 'get',
'config/mgr/mgr/restful/server_port'], 'mgr/restful/server_port'],
CONFIG.ceph_cli_timeout_sec, stderr=null) CONFIG.ceph_cli_timeout_sec, stderr=null)
if out == str(CONFIG.restful_plugin_port): if out == str(CONFIG.restful_plugin_port):
return True return True
@ -735,7 +738,7 @@ class ServiceMonitor(object):
return return
LOG.info('Set restful plugin port=%d', CONFIG.restful_plugin_port) LOG.info('Set restful plugin port=%d', CONFIG.restful_plugin_port)
self.run_with_timeout( self.run_with_timeout(
['/usr/bin/ceph', 'config', 'set', 'mgr', ['/usr/bin/ceph', 'config-key', 'set',
'mgr/restful/server_port', str(CONFIG.restful_plugin_port)], 'mgr/restful/server_port', str(CONFIG.restful_plugin_port)],
CONFIG.ceph_cli_timeout_sec) CONFIG.ceph_cli_timeout_sec)
@ -761,10 +764,22 @@ class ServiceMonitor(object):
def restful_plugin_has_certificate(self): def restful_plugin_has_certificate(self):
try: try:
self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'get',
'config/mgr/restful/{}/crt'.format(CONFIG.ceph_mgr_identity)],
CONFIG.ceph_cli_timeout_sec)
self.run_with_timeout( self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'get', ['/usr/bin/ceph', 'config-key', 'get',
'mgr/restful/{}/crt'.format(CONFIG.ceph_mgr_identity)], 'mgr/restful/{}/crt'.format(CONFIG.ceph_mgr_identity)],
CONFIG.ceph_cli_timeout_sec) CONFIG.ceph_cli_timeout_sec)
self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'get',
'config/mgr/restful/{}/key'.format(CONFIG.ceph_mgr_identity)],
CONFIG.ceph_cli_timeout_sec)
self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'get',
'/mgr/restful/{}/key'.format(CONFIG.ceph_mgr_identity)],
CONFIG.ceph_cli_timeout_sec)
return True return True
except CommandFailed: except CommandFailed:
pass pass
@ -774,10 +789,42 @@ class ServiceMonitor(object):
if self.restful_plugin_has_certificate(): if self.restful_plugin_has_certificate():
return return
LOG.info('Create restful plugin self signed certificate') LOG.info('Create restful plugin self signed certificate')
self.run_with_timeout( path = tempfile.mkdtemp()
['/usr/bin/ceph', 'restful', try:
'create-self-signed-cert'], try:
CONFIG.ceph_cli_timeout_sec) subprocess.check_call([
'/usr/bin/openssl', 'req', '-new', '-nodes', '-x509',
'-subj', '/O=IT/CN=ceph-restful', '-days', '3650',
'-out', os.path.join(path, 'crt'),
'-keyout', os.path.join(path, 'key'),
'-extensions', 'v3_ca'])
except subprocess.CalledProcessError as err:
raise CommandFailed(
command=' '.join(err.cmd),
reason='failed to generate self-signed certificate: {}'.format(str(err)),
out=err.output)
self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'set',
'config/mgr/restful/{}/crt'.format(CONFIG.ceph_mgr_identity),
'-i', os.path.join(path, 'crt')],
CONFIG.ceph_cli_timeout_sec)
self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'set',
'mgr/restful/{}/crt'.format(CONFIG.ceph_mgr_identity),
'-i', os.path.join(path, 'crt')],
CONFIG.ceph_cli_timeout_sec)
self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'set',
'config/mgr/restful/{}/key'.format(CONFIG.ceph_mgr_identity),
'-i', os.path.join(path, 'key')],
CONFIG.ceph_cli_timeout_sec)
self.run_with_timeout(
['/usr/bin/ceph', 'config-key', 'set',
'mgr/restful/{}/key'.format(CONFIG.ceph_mgr_identity),
'-i', os.path.join(path, 'key')],
CONFIG.ceph_cli_timeout_sec)
finally:
shutil.rmtree(path)
def restful_plugin_is_enabled(self): def restful_plugin_is_enabled(self):
command = ['/usr/bin/ceph', 'mgr', 'module', 'ls', command = ['/usr/bin/ceph', 'mgr', 'module', 'ls',
@ -825,7 +872,7 @@ class ServiceMonitor(object):
def restful_plugin_get_certificate(self): def restful_plugin_get_certificate(self):
command = ['/usr/bin/ceph', 'config-key', 'get', command = ['/usr/bin/ceph', 'config-key', 'get',
'mgr/restful/controller-0/crt'] 'config/mgr/restful/{}/crt'.format(CONFIG.ceph_mgr_identity)]
with open(os.devnull, 'wb') as null: with open(os.devnull, 'wb') as null:
certificate = self.run_with_timeout( certificate = self.run_with_timeout(
command, CONFIG.ceph_cli_timeout_sec, stderr=null) command, CONFIG.ceph_cli_timeout_sec, stderr=null)