
This change implements major release deploy activate operation. Execute the deploy activate scripts after verify all required state for major release deploy activate in separate sub-process. Deploy state is updated to "activate-done" when activate operation completes successfully and "activate-failed" otherwise. TCs: Passed: USM major release deploy activate completed successfully Passed: USM major release deploy activate failed Passed: USM major release deploy activate after activate failed Story: 2010676 Task: 50082 Change-Id: I8810674cdc3c67700ff7c419528704c4b8905f51 Signed-off-by: Bin Qian <bin.qian@windriver.com>
3621 lines
143 KiB
Python
3621 lines
143 KiB
Python
"""
|
|
Copyright (c) 2023-2024 Wind River Systems, Inc.
|
|
|
|
SPDX-License-Identifier: Apache-2.0
|
|
|
|
"""
|
|
import sys
|
|
|
|
# prevent software_controller from importing osprofiler
|
|
sys.modules['osprofiler'] = None
|
|
|
|
import configparser
|
|
import gc
|
|
import json
|
|
import os
|
|
from packaging import version
|
|
import select
|
|
import sh
|
|
import shutil
|
|
import socket
|
|
import subprocess
|
|
import tempfile
|
|
import threading
|
|
import time
|
|
from wsgiref import simple_server
|
|
|
|
from fm_api import fm_api
|
|
from fm_api import constants as fm_constants
|
|
|
|
|
|
from oslo_config import cfg as oslo_cfg
|
|
|
|
import software.apt_utils as apt_utils
|
|
import software.ostree_utils as ostree_utils
|
|
from software.api import app
|
|
from software.authapi import app as auth_app
|
|
from software.states import DEPLOY_STATES
|
|
from software.base import PatchService
|
|
from software.dc_utils import get_subcloud_groupby_version
|
|
from software.deploy_state import require_deploy_state
|
|
from software.exceptions import APTOSTreeCommandFail
|
|
from software.exceptions import HostNotFound
|
|
from software.exceptions import InternalError
|
|
from software.exceptions import MetadataFail
|
|
from software.exceptions import UpgradeNotSupported
|
|
from software.exceptions import OSTreeCommandFail
|
|
from software.exceptions import OSTreeTarFail
|
|
from software.exceptions import SoftwareError
|
|
from software.exceptions import SoftwareFail
|
|
from software.exceptions import ReleaseInvalidRequest
|
|
from software.exceptions import ReleaseValidationFailure
|
|
from software.exceptions import ReleaseIsoDeleteFailure
|
|
from software.exceptions import SoftwareServiceError
|
|
from software.exceptions import InvalidOperation
|
|
from software.release_data import reload_release_data
|
|
from software.release_data import get_SWReleaseCollection
|
|
from software.software_functions import collect_current_load_for_hosts
|
|
from software.software_functions import create_deploy_hosts
|
|
from software.software_functions import deploy_host_validations
|
|
from software.software_functions import parse_release_metadata
|
|
from software.software_functions import configure_logging
|
|
from software.software_functions import mount_iso_load
|
|
from software.software_functions import unmount_iso_load
|
|
from software.software_functions import read_upgrade_support_versions
|
|
from software.software_functions import BasePackageData
|
|
from software.software_functions import PatchFile
|
|
from software.software_functions import package_dir
|
|
from software.software_functions import repo_dir
|
|
from software.software_functions import root_scripts_dir
|
|
from software.software_functions import set_host_target_load
|
|
from software.software_functions import SW_VERSION
|
|
from software.software_functions import LOG
|
|
from software.software_functions import audit_log_info
|
|
from software.software_functions import repo_root_dir
|
|
from software.software_functions import is_deploy_state_in_sync
|
|
from software.software_functions import is_deployment_in_progress
|
|
from software.software_functions import get_release_from_patch
|
|
from software.release_state import ReleaseState
|
|
from software.deploy_host_state import DeployHostState
|
|
from software.deploy_state import DeployState
|
|
from software.release_verify import verify_files
|
|
import software.config as cfg
|
|
import software.utils as utils
|
|
from software.sysinv_utils import get_k8s_ver
|
|
from software.sysinv_utils import is_system_controller
|
|
|
|
from software.db.api import get_instance
|
|
|
|
import software.messages as messages
|
|
import software.constants as constants
|
|
from software import states
|
|
|
|
from tsconfig.tsconfig import INITIAL_CONFIG_COMPLETE_FLAG
|
|
from tsconfig.tsconfig import INITIAL_CONTROLLER_CONFIG_COMPLETE
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
CONF = oslo_cfg.CONF
|
|
|
|
pidfile_path = "/var/run/patch_controller.pid"
|
|
|
|
sc = None
|
|
state_file = "%s/.controller.state" % constants.SOFTWARE_STORAGE_DIR
|
|
app_dependency_basename = "app_dependencies.json"
|
|
app_dependency_filename = "%s/%s" % (constants.SOFTWARE_STORAGE_DIR, app_dependency_basename)
|
|
|
|
insvc_patch_restart_controller = "/run/software/.restart.software-controller"
|
|
|
|
ETC_HOSTS_FILE_PATH = "/etc/hosts"
|
|
ETC_HOSTS_BACKUP_FILE_PATH = "/etc/hosts.patchbak"
|
|
|
|
stale_hosts = []
|
|
pending_queries = []
|
|
|
|
thread_death = None
|
|
keep_running = True
|
|
|
|
# Limit socket blocking to 5 seconds to allow for thread to shutdown
|
|
api_socket_timeout = 5.0
|
|
|
|
|
|
class ControllerNeighbour(object):
|
|
def __init__(self):
|
|
self.last_ack = 0
|
|
self.synced = False
|
|
|
|
def rx_ack(self):
|
|
self.last_ack = time.time()
|
|
|
|
def get_age(self):
|
|
return int(time.time() - self.last_ack)
|
|
|
|
def rx_synced(self):
|
|
self.synced = True
|
|
|
|
def clear_synced(self):
|
|
self.synced = False
|
|
|
|
def get_synced(self):
|
|
return self.synced
|
|
|
|
|
|
class AgentNeighbour(object):
|
|
def __init__(self, ip):
|
|
self.ip = ip
|
|
self.last_ack = 0
|
|
self.last_query_id = 0
|
|
self.out_of_date = False
|
|
self.hostname = "n/a"
|
|
self.requires_reboot = False
|
|
self.patch_failed = False
|
|
self.stale = False
|
|
self.pending_query = False
|
|
self.latest_sysroot_commit = None
|
|
self.nodetype = None
|
|
self.sw_version = "unknown"
|
|
self.subfunctions = []
|
|
self.state = None
|
|
|
|
def rx_ack(self,
|
|
hostname,
|
|
out_of_date,
|
|
requires_reboot,
|
|
query_id,
|
|
patch_failed,
|
|
sw_version,
|
|
state):
|
|
self.last_ack = time.time()
|
|
self.hostname = hostname
|
|
self.patch_failed = patch_failed
|
|
self.sw_version = sw_version
|
|
self.state = state
|
|
|
|
if out_of_date != self.out_of_date or requires_reboot != self.requires_reboot:
|
|
self.out_of_date = out_of_date
|
|
self.requires_reboot = requires_reboot
|
|
LOG.info("Agent %s (%s) reporting out_of_date=%s, requires_reboot=%s",
|
|
self.hostname,
|
|
self.ip,
|
|
self.out_of_date,
|
|
self.requires_reboot)
|
|
|
|
if self.last_query_id != query_id:
|
|
self.last_query_id = query_id
|
|
self.stale = True
|
|
if self.ip not in stale_hosts and self.ip not in pending_queries:
|
|
stale_hosts.append(self.ip)
|
|
|
|
def get_age(self):
|
|
return int(time.time() - self.last_ack)
|
|
|
|
def handle_query_detailed_resp(self,
|
|
latest_sysroot_commit,
|
|
nodetype,
|
|
sw_version,
|
|
subfunctions,
|
|
state):
|
|
self.latest_sysroot_commit = latest_sysroot_commit
|
|
self.nodetype = nodetype
|
|
self.stale = False
|
|
self.pending_query = False
|
|
self.sw_version = sw_version
|
|
self.subfunctions = subfunctions
|
|
self.state = state
|
|
|
|
if self.ip in pending_queries:
|
|
pending_queries.remove(self.ip)
|
|
|
|
if self.ip in stale_hosts:
|
|
stale_hosts.remove(self.ip)
|
|
|
|
def get_dict(self):
|
|
d = {"ip": self.ip,
|
|
"hostname": self.hostname,
|
|
"deployed": not self.out_of_date,
|
|
"secs_since_ack": self.get_age(),
|
|
"patch_failed": self.patch_failed,
|
|
"stale_details": self.stale,
|
|
"latest_sysroot_commit": self.latest_sysroot_commit,
|
|
"nodetype": self.nodetype,
|
|
"subfunctions": self.subfunctions,
|
|
"sw_version": self.sw_version,
|
|
"state": self.state}
|
|
|
|
global sc
|
|
if self.out_of_date and not sc.allow_insvc_patching:
|
|
d["requires_reboot"] = True
|
|
else:
|
|
d["requires_reboot"] = self.requires_reboot
|
|
|
|
# Included for future enhancement, to allow per-node determination
|
|
# of in-service patching
|
|
d["allow_insvc_patching"] = sc.allow_insvc_patching
|
|
|
|
return d
|
|
|
|
|
|
class PatchMessageHello(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO)
|
|
self.patch_op_counter = 0
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'patch_op_counter' in data:
|
|
self.patch_op_counter = data['patch_op_counter']
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['patch_op_counter'] = sc.patch_op_counter
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
# Send response
|
|
if self.patch_op_counter > 0:
|
|
sc.handle_nbr_patch_op_counter(host, self.patch_op_counter)
|
|
|
|
resp = PatchMessageHelloAck()
|
|
resp.send(sock)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageHelloAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_ACK)
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
sc.controller_neighbours_lock.acquire()
|
|
if not addr[0] in sc.controller_neighbours:
|
|
sc.controller_neighbours[addr[0]] = ControllerNeighbour()
|
|
|
|
sc.controller_neighbours[addr[0]].rx_ack()
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageSyncReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SYNC_REQ)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the SYNC_REQ, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
# We may need to do this in a separate thread, so that we continue to process hellos
|
|
LOG.info("Handling sync req")
|
|
|
|
# NOTE(bqian) sync_from_nbr returns "False" if sync operations failed.
|
|
# need to think of reattempt to deal w/ the potential failure.
|
|
sc.sync_from_nbr(host)
|
|
|
|
resp = PatchMessageSyncComplete()
|
|
resp.send(sock)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync req")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageSyncComplete(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SYNC_COMPLETE)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the SYNC_COMPLETE, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
LOG.info("Handling sync complete")
|
|
|
|
sc.controller_neighbours_lock.acquire()
|
|
if not addr[0] in sc.controller_neighbours:
|
|
sc.controller_neighbours[addr[0]] = ControllerNeighbour()
|
|
|
|
sc.controller_neighbours[addr[0]].rx_synced()
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync complete")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageHelloAgent(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_AGENT)
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['patch_op_counter'] = sc.patch_op_counter
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
local_hostname = utils.ip_to_versioned_localhost(cfg.agent_mcast_group)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
sock.sendto(str.encode(message), (local_hostname, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageSendLatestFeedCommit(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SEND_LATEST_FEED_COMMIT)
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['latest_feed_commit'] = sc.latest_feed_commit
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
local_hostname = utils.ip_to_versioned_localhost(cfg.agent_mcast_group)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
sock.sendto(str.encode(message), (local_hostname, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageHelloAgentAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_AGENT_ACK)
|
|
self.query_id = 0
|
|
self.agent_out_of_date = False
|
|
self.agent_hostname = "n/a"
|
|
self.agent_requires_reboot = False
|
|
self.agent_patch_failed = False
|
|
self.agent_sw_version = "unknown"
|
|
self.agent_state = "unknown"
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'query_id' in data:
|
|
self.query_id = data['query_id']
|
|
if 'out_of_date' in data:
|
|
self.agent_out_of_date = data['out_of_date']
|
|
if 'hostname' in data:
|
|
self.agent_hostname = data['hostname']
|
|
if 'requires_reboot' in data:
|
|
self.agent_requires_reboot = data['requires_reboot']
|
|
if 'patch_failed' in data:
|
|
self.agent_patch_failed = data['patch_failed']
|
|
if 'sw_version' in data:
|
|
self.agent_sw_version = data['sw_version']
|
|
if 'state' in data:
|
|
self.agent_state = data['state']
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
sc.hosts_lock.acquire()
|
|
if not addr[0] in sc.hosts:
|
|
sc.hosts[addr[0]] = AgentNeighbour(addr[0])
|
|
|
|
sc.hosts[addr[0]].rx_ack(self.agent_hostname,
|
|
self.agent_out_of_date,
|
|
self.agent_requires_reboot,
|
|
self.query_id,
|
|
self.agent_patch_failed,
|
|
self.agent_sw_version,
|
|
self.agent_state)
|
|
sc.hosts_lock.release()
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageQueryDetailed(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_QUERY_DETAILED)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the message, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendall(str.encode(message))
|
|
|
|
|
|
class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_QUERY_DETAILED_RESP)
|
|
self.agent_sw_version = "unknown"
|
|
self.latest_sysroot_commit = "unknown"
|
|
self.subfunctions = []
|
|
self.nodetype = "unknown"
|
|
self.agent_sw_version = "unknown"
|
|
self.agent_state = "unknown"
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'latest_sysroot_commit' in data:
|
|
self.latest_sysroot_commit = data['latest_sysroot_commit']
|
|
if 'nodetype' in data:
|
|
self.nodetype = data['nodetype']
|
|
if 'sw_version' in data:
|
|
self.agent_sw_version = data['sw_version']
|
|
if 'subfunctions' in data:
|
|
self.subfunctions = data['subfunctions']
|
|
if 'state' in data:
|
|
self.agent_state = data['state']
|
|
|
|
def encode(self):
|
|
LOG.error("Should not get here")
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
ip = addr[0]
|
|
sc.hosts_lock.acquire()
|
|
if ip in sc.hosts:
|
|
sc.hosts[ip].handle_query_detailed_resp(self.latest_sysroot_commit,
|
|
self.nodetype,
|
|
self.agent_sw_version,
|
|
self.subfunctions,
|
|
self.agent_state)
|
|
for patch_id in list(sc.interim_state):
|
|
if ip in sc.interim_state[patch_id]:
|
|
sc.interim_state[patch_id].remove(ip)
|
|
if len(sc.interim_state[patch_id]) == 0:
|
|
del sc.interim_state[patch_id]
|
|
sc.hosts_lock.release()
|
|
sc.check_patch_states()
|
|
else:
|
|
sc.hosts_lock.release()
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageAgentInstallReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_AGENT_INSTALL_REQ)
|
|
self.ip = None
|
|
self.force = False
|
|
self.major_release = None
|
|
self.commit_id = None
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['force'] = self.force
|
|
self.message['major_release'] = self.major_release
|
|
self.message['commit_id'] = self.commit_id
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
LOG.info("sending install request to node: %s", self.ip)
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (self.ip, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageAgentInstallResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_AGENT_INSTALL_RESP)
|
|
self.status = False
|
|
self.reject_reason = None
|
|
self.reboot_required = False
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'status' in data:
|
|
self.status = data['status']
|
|
if 'reject_reason' in data:
|
|
self.reject_reason = data['reject_reason']
|
|
if 'reboot_required' in data:
|
|
self.reboot_required = data['reboot_required']
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.info("Handling install resp from %s", addr[0])
|
|
global sc
|
|
# LOG.info("Handling hello ack")
|
|
|
|
sc.hosts_lock.acquire()
|
|
try:
|
|
# NOTE(bqian) seems like trying to tolerate a failure situation
|
|
# that a host is directed to install a patch but during the installation
|
|
# software-controller-daemon gets restarted
|
|
# should remove the sc.hosts which is in memory volatile storage and replaced with
|
|
# permanent deploy-host entity
|
|
ip = addr[0]
|
|
if ip not in sc.hosts:
|
|
sc.hosts[ip] = AgentNeighbour(ip)
|
|
|
|
sc.hosts[ip].install_status = self.status
|
|
sc.hosts[ip].install_pending = False
|
|
sc.hosts[ip].install_reject_reason = self.reject_reason
|
|
hostname = sc.hosts[ip].hostname
|
|
finally:
|
|
sc.hosts_lock.release()
|
|
|
|
deploy_host_state = DeployHostState(hostname)
|
|
if self.status:
|
|
deploy_host_state.deployed()
|
|
if self.reboot_required:
|
|
sc.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_SUCCESS_RR,
|
|
fm_constants.FM_ALARM_STATE_SET,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname))
|
|
else:
|
|
deploy_host_state.deploy_failed()
|
|
sc.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_FAILURE,
|
|
fm_constants.FM_ALARM_STATE_SET,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname))
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageDropHostReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DROP_HOST_REQ)
|
|
self.ip = None
|
|
|
|
def encode(self):
|
|
messages.PatchMessage.encode(self)
|
|
self.message['ip'] = self.ip
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'ip' in data:
|
|
self.ip = data['ip']
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
if self.ip is None:
|
|
LOG.error("Received PATCHMSG_DROP_HOST_REQ with no ip: %s", json.dumps(self.data))
|
|
return
|
|
|
|
sc.drop_host(self.ip, sync_nbr=False)
|
|
return
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SoftwareMessageDeployStateUpdate(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_UPDATE)
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
filesystem_data = utils.get_software_filesystem_data()
|
|
deploys_state = {"deploy_host": filesystem_data.get("deploy_host", {}),
|
|
"deploy": filesystem_data.get("deploy", {})}
|
|
self.message["deploy_state"] = deploys_state
|
|
|
|
def handle(self, sock, addr): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
|
|
|
|
class SoftwareMessageDeployStateUpdateAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_UPDATE_ACK)
|
|
self.peer_state_data = {}
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
self.peer_state_data = data
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
if self.peer_state_data["result"] == messages.MSG_ACK_SUCCESS:
|
|
LOG.debug("Peer controller is synced with value: %s",
|
|
self.peer_state_data["deploy_state"])
|
|
else:
|
|
LOG.error("Peer controller deploy state has diverged.")
|
|
|
|
|
|
class SWMessageDeployStateChanged(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_CHANGED)
|
|
self.valid = False
|
|
self.agent = None
|
|
self.deploy_state = None
|
|
self.hostname = None
|
|
self.host_state = None
|
|
|
|
def decode(self, data):
|
|
"""
|
|
The message is a serialized json object:
|
|
{
|
|
"msgtype": "deploy-state-changed",
|
|
"msgversion": 1,
|
|
"agent": "<a valid agent>",
|
|
"deploy-state": "<deploy-state>",
|
|
"hostname": "<hostname>",
|
|
"host-state": "<host-deploy-substate>"
|
|
}
|
|
"""
|
|
|
|
messages.PatchMessage.decode(self, data)
|
|
|
|
self.valid = True
|
|
self.agent = None
|
|
|
|
valid_agents = ['deploy-start', 'deploy-activate']
|
|
if 'agent' in data:
|
|
self.agent = data['agent']
|
|
else:
|
|
self.agent = 'unknown'
|
|
|
|
if self.agent not in valid_agents:
|
|
# ignore msg from unknown senders
|
|
LOG.info("%s received from unknown agent %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent))
|
|
self.valid = False
|
|
|
|
valid_state = {
|
|
DEPLOY_STATES.START_DONE.value: DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED.value: DEPLOY_STATES.START_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_FAILED.value: DEPLOY_STATES.ACTIVATE_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_DONE.value: DEPLOY_STATES.ACTIVATE_DONE
|
|
}
|
|
if 'deploy-state' in data and data['deploy-state']:
|
|
deploy_state = data['deploy-state']
|
|
if deploy_state in valid_state:
|
|
self.deploy_state = valid_state[deploy_state]
|
|
LOG.info("%s received from %s with deploy-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state))
|
|
else:
|
|
self.valid = False
|
|
LOG.error("%s received from %s with invalid deploy-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state))
|
|
|
|
if 'hostname' in data and data['hostname']:
|
|
self.hostname = data['hostname']
|
|
|
|
if 'host-state' in data and data['host-state']:
|
|
host_state = data['host-state']
|
|
if host_state not in states.VALID_HOST_DEPLOY_STATE:
|
|
LOG.error("%s received from %s with invalid host-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, host_state))
|
|
self.valid = False
|
|
else:
|
|
self.host_state = host_state
|
|
|
|
if self.valid:
|
|
self.valid = (bool(self.host_state and self.hostname) != bool(self.deploy_state))
|
|
|
|
if not self.valid:
|
|
LOG.error("%s received from %s as invalid %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, data))
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
if not self.valid:
|
|
# nothing to do
|
|
return
|
|
|
|
if self.deploy_state:
|
|
LOG.info("Received deploy state changed to %s, agent %s" %
|
|
(self.deploy_state, self.agent))
|
|
sc.deploy_state_changed(self.deploy_state)
|
|
else:
|
|
LOG.info("Received %s deploy host state changed to %s, agent %s" %
|
|
(self.hostname, self.host_state, self.agent))
|
|
sc.host_deploy_state_changed(self.hostname, self.host_state)
|
|
|
|
sock.sendto(str.encode("OK"), addr)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync req")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchController(PatchService):
|
|
def __init__(self):
|
|
PatchService.__init__(self)
|
|
|
|
# Locks
|
|
self.socket_lock = threading.RLock()
|
|
self.controller_neighbours_lock = threading.RLock()
|
|
self.hosts_lock = threading.RLock()
|
|
|
|
self.hosts = {}
|
|
self.controller_neighbours = {}
|
|
|
|
self.db_api_instance = get_instance()
|
|
|
|
# interim_state is used to track hosts that have not responded
|
|
# with fresh queries since a patch was applied or removed, on
|
|
# a per-patch basis. This allows the patch controller to move
|
|
# patches immediately into a "Partial" state until all nodes
|
|
# have responded.
|
|
#
|
|
self.interim_state = {}
|
|
|
|
self.sock_out = None
|
|
self.sock_in = None
|
|
self.controller_address = None
|
|
self.agent_address = None
|
|
self.patch_op_counter = 1
|
|
reload_release_data()
|
|
try:
|
|
self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION)
|
|
except OSTreeCommandFail:
|
|
LOG.exception("Failure to fetch the feed ostree latest log while "
|
|
"initializing Patch Controller")
|
|
self.latest_feed_commit = None
|
|
|
|
self.check_patch_states()
|
|
self.base_pkgdata = BasePackageData()
|
|
|
|
# This is for alarm cache. It will be used to store the last raising alarm id
|
|
self.usm_alarm = {constants.LAST_IN_SYNC: False}
|
|
self.hostname = socket.gethostname()
|
|
self.fm_api = fm_api.FaultAPIs()
|
|
|
|
self.allow_insvc_patching = True
|
|
|
|
if os.path.exists(app_dependency_filename):
|
|
try:
|
|
with open(app_dependency_filename, 'r') as f:
|
|
self.app_dependencies = json.loads(f.read())
|
|
except Exception:
|
|
LOG.exception("Failed to read app dependencies: %s", app_dependency_filename)
|
|
else:
|
|
self.app_dependencies = {}
|
|
|
|
if os.path.isfile(state_file):
|
|
self.read_state_file()
|
|
else:
|
|
self.write_state_file()
|
|
|
|
system_mode = utils.get_platform_conf("system_mode")
|
|
if system_mode == constants.SYSTEM_MODE_SIMPLEX:
|
|
self.standby_controller = "controller-0"
|
|
elif system_mode == constants.SYSTEM_MODE_DUPLEX:
|
|
self.standby_controller = "controller-0" \
|
|
if self.hostname == "controller-1" \
|
|
else "controller-1"
|
|
|
|
DeployHostState.register_event_listener(DeployState.host_deploy_updated)
|
|
DeployState.register_event_listener(ReleaseState.deploy_updated)
|
|
|
|
@property
|
|
def release_collection(self):
|
|
swrc = get_SWReleaseCollection()
|
|
return swrc
|
|
|
|
def update_config(self):
|
|
cfg.read_config()
|
|
|
|
if self.port != cfg.controller_port:
|
|
self.port = cfg.controller_port
|
|
|
|
# Loopback interface does not support multicast messaging, therefore
|
|
# revert to using unicast messaging when configured against the
|
|
# loopback device
|
|
if cfg.get_mgmt_iface() == constants.LOOPBACK_INTERFACE_NAME:
|
|
mgmt_ip = cfg.get_mgmt_ip()
|
|
self.mcast_addr = None
|
|
self.controller_address = mgmt_ip
|
|
self.agent_address = mgmt_ip
|
|
else:
|
|
self.mcast_addr = cfg.controller_mcast_group
|
|
self.controller_address = cfg.controller_mcast_group
|
|
self.agent_address = cfg.agent_mcast_group
|
|
|
|
def socket_lock_acquire(self):
|
|
self.socket_lock.acquire()
|
|
|
|
def socket_lock_release(self):
|
|
try:
|
|
self.socket_lock.release()
|
|
except Exception:
|
|
pass
|
|
|
|
def write_state_file(self):
|
|
config = configparser.ConfigParser(strict=False)
|
|
|
|
cfgfile = open(state_file, 'w')
|
|
|
|
config.add_section('runtime')
|
|
config.set('runtime', 'patch_op_counter', str(self.patch_op_counter))
|
|
config.write(cfgfile)
|
|
cfgfile.close()
|
|
|
|
def read_state_file(self):
|
|
config = configparser.ConfigParser(strict=False)
|
|
|
|
config.read(state_file)
|
|
|
|
try:
|
|
counter = config.getint('runtime', 'patch_op_counter')
|
|
self.patch_op_counter = counter
|
|
|
|
LOG.info("patch_op_counter is: %d", self.patch_op_counter)
|
|
except configparser.Error:
|
|
LOG.exception("Failed to read state info")
|
|
|
|
def handle_nbr_patch_op_counter(self, host, nbr_patch_op_counter):
|
|
if self.patch_op_counter >= nbr_patch_op_counter:
|
|
return
|
|
|
|
# NOTE(bqian) sync_from_nbr returns "False" if sync operations failed.
|
|
# need to think of reattempt to deal w/ the potential failure.
|
|
self.sync_from_nbr(host)
|
|
|
|
def sync_from_nbr(self, host):
|
|
# Sync the software repo
|
|
host_url = utils.ip_to_url(host)
|
|
try:
|
|
output = subprocess.check_output(["rsync",
|
|
"-acv",
|
|
"--delete",
|
|
"--exclude", "tmp",
|
|
"rsync://%s/software/" % host_url,
|
|
"%s/" % constants.SOFTWARE_STORAGE_DIR],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate software via rsync: %s", output)
|
|
except subprocess.CalledProcessError as e:
|
|
LOG.error("Failed to rsync: %s", e.output)
|
|
return False
|
|
|
|
try:
|
|
output = subprocess.check_output(["rsync",
|
|
"-acv",
|
|
"--delete",
|
|
"rsync://%s/repo/" % host_url,
|
|
"%s/" % repo_root_dir],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate repo via rsync: %s", output)
|
|
except subprocess.CalledProcessError:
|
|
LOG.error("Failed to rsync: %s", output)
|
|
return False
|
|
|
|
try:
|
|
for neighbour in list(self.hosts):
|
|
if (self.hosts[neighbour].nodetype == "controller" and
|
|
self.hosts[neighbour].ip == host):
|
|
LOG.info("Starting feed sync")
|
|
# The output is a string that lists the directories
|
|
# Example output:
|
|
# >>> dir_names = sh.ls("/var/www/pages/feed/")
|
|
# >>> dir_names.stdout
|
|
# b'rel-22.12 rel-22.5\n'
|
|
dir_names = sh.ls(constants.FEED_OSTREE_BASE_DIR)
|
|
|
|
# Convert the output above into a list that can be iterated
|
|
# >>> list_of_dirs = dir_names.stdout.decode().rstrip().split()
|
|
# >>> print(list_of_dirs)
|
|
# ['rel-22.12', 'rel-22.5']
|
|
|
|
list_of_dirs = dir_names.stdout.decode("utf-8").rstrip().split()
|
|
|
|
for rel_dir in list_of_dirs:
|
|
feed_repo = "%s/%s/ostree_repo/" % (constants.FEED_OSTREE_BASE_DIR, rel_dir)
|
|
if not os.path.isdir(feed_repo):
|
|
LOG.info("Skipping feed dir %s", feed_repo)
|
|
continue
|
|
LOG.info("Syncing %s", feed_repo)
|
|
output = subprocess.check_output(["ostree",
|
|
"--repo=%s" % feed_repo,
|
|
"pull",
|
|
"--depth=-1",
|
|
"--mirror",
|
|
"starlingx"],
|
|
stderr=subprocess.STDOUT)
|
|
output = subprocess.check_output(["ostree",
|
|
"summary",
|
|
"--update",
|
|
"--repo=%s" % feed_repo],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate feed via ostree pull: %s", output)
|
|
except subprocess.CalledProcessError:
|
|
LOG.error("Failed to sync feed repo between controllers: %s", output)
|
|
return False
|
|
|
|
self.read_state_file()
|
|
|
|
self.interim_state = {}
|
|
reload_release_data()
|
|
self.check_patch_states()
|
|
|
|
if os.path.exists(app_dependency_filename):
|
|
try:
|
|
with open(app_dependency_filename, 'r') as f:
|
|
self.app_dependencies = json.loads(f.read())
|
|
except Exception:
|
|
LOG.exception("Failed to read app dependencies: %s", app_dependency_filename)
|
|
else:
|
|
self.app_dependencies = {}
|
|
|
|
return True
|
|
|
|
def inc_patch_op_counter(self):
|
|
self.patch_op_counter += 1
|
|
self.write_state_file()
|
|
|
|
def check_patch_states(self):
|
|
# Default to allowing in-service patching
|
|
self.allow_insvc_patching = True
|
|
|
|
# NOTE(bqian) How is this loop relevant?
|
|
# all_insevc_patching equals not required_reboot in deploy entity
|
|
# see software_entity.
|
|
for ip in (ip for ip in list(self.hosts) if self.hosts[ip].out_of_date):
|
|
for release in self.release_collection.iterate_releases():
|
|
# NOTE(bqian) below consolidates DEPLOYING_START to DEPLOYING
|
|
# all_insevc_patching equals not required_reboot in deploy entity
|
|
# see software_entity.
|
|
# also apparently it is a bug to check release state as it will
|
|
# end up return default (true) when it is not DEPLOYING_START for
|
|
# example, checking during removal.
|
|
if release.reboot_required and release.state == states.DEPLOYING:
|
|
self.allow_insvc_patching = False
|
|
# NOTE(bqian) this function looks very buggy, should probably be rewritten
|
|
|
|
def get_release_dependency_list(self, release_id):
|
|
"""
|
|
Returns a list of software releases that are required by this
|
|
release.
|
|
Example: If R3 requires R2 and R2 requires R1,
|
|
then this patch will return ['R2', 'R1'] for
|
|
input param patch_id='R3'
|
|
:param release: The software release version
|
|
"""
|
|
|
|
# TODO(bqian): this algorithm will fail if dependency is not sequential.
|
|
# i.e, if R5 requires R4 and R1, R4 requires R3 and R1, R3 requires R1
|
|
# this relation will bring R1 before R3.
|
|
# change below is not fixing the algorithm, it converts directly using
|
|
# release_data to release_collection wrapper class.
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is None:
|
|
error = f"Not all required releases are uploaded, missing {release_id}"
|
|
raise SoftwareServiceError(error=error)
|
|
|
|
release_dependency_list = []
|
|
for req_release in release.requires_release_ids:
|
|
release_dependency_list.append(req_release)
|
|
release_dependency_list = release_dependency_list + \
|
|
self.get_release_dependency_list(req_release)
|
|
return release_dependency_list
|
|
|
|
def get_release_required_by_list(self, release_id):
|
|
"""
|
|
Returns a list of software releases that require this
|
|
release.
|
|
Example: If R3 requires R2 and R2 requires R1,
|
|
then this method will return ['R3', 'R2'] for
|
|
input param patch_id='R1'
|
|
:param release_id: The software release id
|
|
"""
|
|
release_required_by_list = []
|
|
# NOTE(bqian) not sure why the check is needed. release_id is always
|
|
# from the release_data collection.
|
|
if self.release_collection.get_release_by_id(release_id):
|
|
for req_release in self.release_collection.iterate_releases():
|
|
if release_id in req_release.requires_release_ids:
|
|
release_required_by_list.append(req_release.id)
|
|
release_required_by_list = release_required_by_list + \
|
|
self.get_release_required_by_list(req_release.id)
|
|
|
|
return release_required_by_list
|
|
|
|
def get_ostree_tar_filename(self, patch_sw_version, patch_id):
|
|
'''
|
|
Returns the path of the ostree tarball
|
|
:param patch_sw_version: sw version this patch must be applied to
|
|
:param patch_id: The patch ID
|
|
'''
|
|
ostree_tar_dir = package_dir[patch_sw_version]
|
|
ostree_tar_filename = "%s/%s-software.tar" % (ostree_tar_dir, patch_id)
|
|
return ostree_tar_filename
|
|
|
|
def delete_restart_script(self, patch_id):
|
|
'''
|
|
Deletes the restart script (if any) associated with the patch
|
|
:param patch_id: The patch ID
|
|
'''
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
restart_script = release.restart_script
|
|
if not restart_script:
|
|
return
|
|
|
|
restart_script_path = "%s/%s" % (root_scripts_dir, restart_script)
|
|
try:
|
|
# Delete the metadata
|
|
os.remove(restart_script_path)
|
|
except OSError:
|
|
msg = "Failed to remove restart script for %s" % patch_id
|
|
LOG.exception(msg)
|
|
raise SoftwareError(msg)
|
|
|
|
def run_semantic_check(self, action, patch_list):
|
|
if not os.path.exists(INITIAL_CONFIG_COMPLETE_FLAG):
|
|
# Skip semantic checks if initial configuration isn't complete
|
|
return
|
|
|
|
# Pass the current patch state to the semantic check as a series of args
|
|
patch_state_args = []
|
|
for release in self.release_collection.iterate_releases():
|
|
patch_state = '%s=%s' % (release.id, release.state)
|
|
patch_state_args += ['-p', patch_state]
|
|
|
|
# Run semantic checks, if any
|
|
for patch_id in patch_list:
|
|
semchk = os.path.join(constants.SEMANTICS_DIR, action, patch_id)
|
|
|
|
if os.path.exists(semchk):
|
|
try:
|
|
LOG.info("Running semantic check: %s", semchk)
|
|
subprocess.check_output([semchk] + patch_state_args,
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Semantic check %s passed", semchk)
|
|
except subprocess.CalledProcessError as e:
|
|
msg = "Semantic check failed for %s:\n%s" % (patch_id, e.output)
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
def software_install_local_api(self):
|
|
"""
|
|
Trigger patch installation prior to configuration
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
# Check to see if initial configuration has completed
|
|
if os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE):
|
|
# Disallow the install
|
|
msg = "This command can only be used before initial system configuration."
|
|
LOG.exception(msg)
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
update_hosts_file = False
|
|
|
|
# Check to see if the controller hostname is already known.
|
|
if not utils.gethostbyname(constants.CONTROLLER_FLOATING_HOSTNAME):
|
|
update_hosts_file = True
|
|
|
|
# To allow software installation to occur before configuration, we need
|
|
# to alias controller to localhost
|
|
# There is a HOSTALIASES feature that would be preferred here, but it
|
|
# unfortunately requires dnsmasq to be running, which it is not at this point.
|
|
|
|
if update_hosts_file:
|
|
# Make a backup of /etc/hosts
|
|
try:
|
|
shutil.copy2(ETC_HOSTS_FILE_PATH, ETC_HOSTS_BACKUP_FILE_PATH)
|
|
except Exception:
|
|
msg = f"Error occurred while copying {ETC_HOSTS_FILE_PATH}."
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
# Update /etc/hosts
|
|
with open(ETC_HOSTS_FILE_PATH, 'a') as f:
|
|
f.write("127.0.0.1 controller\n")
|
|
|
|
# Run the software install
|
|
try:
|
|
# Use the restart option of the sw-patch init script, which will
|
|
# install patches but won't automatically reboot if the RR flag is set
|
|
subprocess.check_output(['/etc/init.d/sw-patch', 'restart'])
|
|
except subprocess.CalledProcessError:
|
|
msg = "Failed to install patches."
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
if update_hosts_file:
|
|
# Restore /etc/hosts
|
|
os.rename(ETC_HOSTS_BACKUP_FILE_PATH, ETC_HOSTS_FILE_PATH)
|
|
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.state == states.DEPLOYING:
|
|
release.update_state(states.DEPLOYED)
|
|
elif release.state == states.REMOVING:
|
|
release.update_state(states.AVAILABLE)
|
|
|
|
msg_info += "Software installation is complete.\n"
|
|
msg_info += "Please reboot before continuing with configuration."
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def major_release_upload_check(self):
|
|
"""
|
|
major release upload semantic check
|
|
"""
|
|
valid_controllers = ['controller-0']
|
|
if socket.gethostname() not in valid_controllers:
|
|
msg = f"Upload rejected, major release must be uploaded to {valid_controllers}"
|
|
LOG.info(msg)
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
max_major_releases = 2
|
|
major_releases = []
|
|
for rel in self.release_collection.iterate_releases():
|
|
major_rel = rel.sw_version
|
|
if major_rel not in major_releases:
|
|
major_releases.append(major_rel)
|
|
|
|
if len(major_releases) >= max_major_releases:
|
|
msg = f"Major releases {major_releases} have already been uploaded. " + \
|
|
f"Max major releases is {max_major_releases}"
|
|
LOG.info(msg)
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
def _process_upload_upgrade_files(self, upgrade_files):
|
|
"""
|
|
Process the uploaded upgrade files
|
|
:param upgrade_files: dict of upgrade files
|
|
:return: info, warning, error messages
|
|
"""
|
|
local_info = ""
|
|
local_warning = ""
|
|
local_error = ""
|
|
release_meta_info = {}
|
|
|
|
# validate this major release upload
|
|
self.major_release_upload_check()
|
|
|
|
to_release = None
|
|
iso_mount_dir = None
|
|
all_good = True
|
|
try:
|
|
iso = upgrade_files[constants.ISO_EXTENSION]
|
|
sig = upgrade_files[constants.SIG_EXTENSION]
|
|
if not verify_files([iso], sig):
|
|
msg = "Software %s:%s signature validation failed" % (iso, sig)
|
|
raise ReleaseValidationFailure(error=msg)
|
|
|
|
LOG.info("iso and signature files upload completed."
|
|
"Importing iso is in progress")
|
|
|
|
iso_file = upgrade_files.get(constants.ISO_EXTENSION)
|
|
|
|
# Mount the iso file after signature verification
|
|
iso_mount_dir = mount_iso_load(iso_file, constants.TMP_DIR)
|
|
LOG.info("Mounted iso file %s to %s", iso_file, iso_mount_dir)
|
|
|
|
# Read the metadata from the iso file
|
|
to_release, supported_from_releases = read_upgrade_support_versions(iso_mount_dir)
|
|
LOG.info("Reading metadata from iso file %s completed", iso_file)
|
|
# Validate that the current release is supported to upgrade to the new release
|
|
supported_versions = [v.get("version") for v in supported_from_releases]
|
|
if SW_VERSION not in supported_versions:
|
|
raise UpgradeNotSupported("Current release %s not supported to upgrade to %s"
|
|
% (SW_VERSION, to_release))
|
|
|
|
# Copy iso /upgrades/software-deploy/ to /opt/software/rel-<rel>/bin/
|
|
to_release_bin_dir = os.path.join(
|
|
constants.SOFTWARE_STORAGE_DIR, ("rel-%s" % to_release), "bin")
|
|
if os.path.exists(to_release_bin_dir):
|
|
shutil.rmtree(to_release_bin_dir)
|
|
shutil.copytree(os.path.join(iso_mount_dir, "upgrades",
|
|
constants.SOFTWARE_DEPLOY_FOLDER), to_release_bin_dir)
|
|
|
|
# Run usm_load_import script
|
|
LOG.info("Starting load import from %s", iso_file)
|
|
import_script = os.path.join(to_release_bin_dir, 'usm_load_import')
|
|
load_import_cmd = [import_script,
|
|
"--from-release=%s" % SW_VERSION,
|
|
"--to-release=%s" % to_release,
|
|
"--iso-dir=%s" % iso_mount_dir]
|
|
LOG.info("Running load import command: %s", " ".join(load_import_cmd))
|
|
load_import_return = subprocess.run(load_import_cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
check=True,
|
|
text=True)
|
|
if load_import_return.returncode != 0:
|
|
local_error += load_import_return.stdout
|
|
else:
|
|
local_info += load_import_return.stdout
|
|
|
|
# Copy metadata.xml to /opt/software/rel-<rel>/
|
|
to_file = os.path.join(constants.SOFTWARE_STORAGE_DIR, ("rel-%s" % to_release), "metadata.xml")
|
|
metadata_file = os.path.join(iso_mount_dir, "upgrades", "metadata.xml")
|
|
shutil.copyfile(metadata_file, to_file)
|
|
|
|
# Update the release metadata
|
|
# metadata files have been copied over to the metadata/available directory
|
|
reload_release_data()
|
|
LOG.info("Updated release metadata for %s", to_release)
|
|
|
|
# Get release metadata
|
|
# NOTE(bqian) to_release is sw_version (MM.mm), the path isn't correct
|
|
# also prepatched iso needs to be handled.
|
|
# should go through the release_data to find the latest release of major release
|
|
# to_release
|
|
abs_stx_release_metadata_file = os.path.join(
|
|
iso_mount_dir, 'upgrades', f"{constants.RELEASE_GA_NAME % to_release}-metadata.xml")
|
|
all_release_meta_info = parse_release_metadata(abs_stx_release_metadata_file)
|
|
release_meta_info = {
|
|
os.path.basename(upgrade_files[constants.ISO_EXTENSION]): {
|
|
"id": all_release_meta_info.get("id"),
|
|
"sw_version": all_release_meta_info.get("sw_version"),
|
|
},
|
|
os.path.basename(upgrade_files[constants.SIG_EXTENSION]): {
|
|
"id": None,
|
|
"sw_version": None,
|
|
}
|
|
}
|
|
except Exception:
|
|
all_good = False
|
|
raise
|
|
finally:
|
|
# Unmount the iso file
|
|
if iso_mount_dir:
|
|
unmount_iso_load(iso_mount_dir)
|
|
LOG.info("Unmounted iso file %s", iso_file)
|
|
|
|
# remove upload leftover in case of failure
|
|
if not all_good and to_release:
|
|
to_release_dir = os.path.join(constants.SOFTWARE_STORAGE_DIR, "rel-%s" % to_release)
|
|
shutil.rmtree(to_release_dir, ignore_errors=True)
|
|
|
|
return local_info, local_warning, local_error, release_meta_info
|
|
|
|
def _process_upload_patch_files(self, patch_files):
|
|
"""
|
|
Process the uploaded patch files
|
|
:param patch_files: list of patch files
|
|
:return: info, warning, error messages
|
|
"""
|
|
|
|
local_info = ""
|
|
local_warning = ""
|
|
local_error = ""
|
|
upload_patch_info = []
|
|
try:
|
|
# Create the directories
|
|
for state_dir in states.DEPLOY_STATE_METADATA_DIR:
|
|
os.makedirs(state_dir, exist_ok=True)
|
|
except os.error:
|
|
msg = "Failed to create directories"
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
for patch_file in patch_files:
|
|
|
|
base_patch_filename = os.path.basename(patch_file)
|
|
|
|
# Get the release_id from the patch's metadata
|
|
# and check to see if it's already uploaded
|
|
release_id = get_release_from_patch(patch_file, 'id')
|
|
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
|
|
if release:
|
|
if release.state == states.COMMITTED:
|
|
msg = "%s is committed. Metadata not updated" % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
elif release.state != states.AVAILABLE:
|
|
msg = "%s is not currently in available state to be deployed." % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
else:
|
|
try:
|
|
# todo(abailey) PatchFile / extract_patch should be renamed
|
|
PatchFile.extract_patch(patch_file,
|
|
metadata_dir=states.AVAILABLE_DIR,
|
|
metadata_only=True,
|
|
existing_content=release.contents,
|
|
base_pkgdata=self.base_pkgdata)
|
|
PatchFile.unpack_patch(patch_file)
|
|
reload_release_data()
|
|
msg = "%s is already uploaded. Updated metadata only" % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
except SoftwareFail:
|
|
msg = "Failed to upload release %s" % release_id
|
|
LOG.exception(msg)
|
|
local_error += msg + "\n"
|
|
else:
|
|
try:
|
|
PatchFile.extract_patch(patch_file,
|
|
metadata_dir=states.AVAILABLE_DIR,
|
|
base_pkgdata=self.base_pkgdata)
|
|
PatchFile.unpack_patch(patch_file)
|
|
local_info += "%s is now uploaded\n" % release_id
|
|
reload_release_data()
|
|
|
|
# NOTE(bqian) Below check an exception raise should be revisit,
|
|
# if applicable, should be applied to the beginning of all requests.
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
except SoftwareFail:
|
|
msg = "Failed to upload release %s" % release_id
|
|
LOG.exception(msg)
|
|
local_error += msg + "\n"
|
|
continue
|
|
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release:
|
|
upload_patch_info.append({
|
|
base_patch_filename: {
|
|
"id": release_id,
|
|
"sw_release": release.sw_release, # MM.mm.pp release version
|
|
}
|
|
})
|
|
|
|
# create versioned precheck for uploaded patches
|
|
for patch in upload_patch_info:
|
|
filename, values = list(patch.items())[0]
|
|
LOG.info("Creating precheck for release %s..." % values.get("id"))
|
|
for pf in patch_files:
|
|
if filename in pf:
|
|
patch_file = pf
|
|
|
|
sw_release = values.get("sw_release")
|
|
|
|
required_patches = []
|
|
for dep_id in self.release_collection.get_release_by_id(values.get("id")).requires_release_ids:
|
|
required_patches.append(version.parse(dep_id))
|
|
|
|
# sort the required patches list and get the latest, if available
|
|
req_patch_version = None
|
|
if len(required_patches) > 0:
|
|
req_patch = str(sorted(required_patches)[-1])
|
|
_, req_patch_version, _, _ = utils.get_component_and_versions(req_patch)
|
|
if self.release_collection.get_release_by_id(req_patch) is None:
|
|
LOG.warning("Required patch '%s' is not uploaded." % req_patch)
|
|
|
|
PatchFile.create_versioned_precheck(patch_file, sw_release, req_patch_version=req_patch_version)
|
|
|
|
return local_info, local_warning, local_error, upload_patch_info
|
|
|
|
def software_release_upload(self, release_files):
|
|
"""
|
|
Upload software release files
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
upload_info = []
|
|
|
|
# Refresh data, if needed
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
msg = "Uploading files: %s" % ",".join(release_files)
|
|
audit_log_info(msg)
|
|
|
|
# We now need to put the files in the category (patch or upgrade)
|
|
patch_files = []
|
|
upgrade_files = {}
|
|
|
|
for uploaded_file in release_files:
|
|
(_, ext) = os.path.splitext(uploaded_file)
|
|
if ext in [constants.PATCH_EXTENSION]:
|
|
patch_files.append(uploaded_file)
|
|
elif ext == constants.ISO_EXTENSION:
|
|
upgrade_files[constants.ISO_EXTENSION] = uploaded_file
|
|
elif ext == constants.SIG_EXTENSION:
|
|
upgrade_files[constants.SIG_EXTENSION] = uploaded_file
|
|
else:
|
|
LOG.exception(
|
|
"The file extension is not supported. Supported extensions include .patch, .iso and .sig")
|
|
|
|
if len(upgrade_files) == 1: # Only one upgrade file uploaded
|
|
msg = "Missing upgrade file or signature file"
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
elif len(upgrade_files) == 2: # Two upgrade files uploaded
|
|
tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_upload_upgrade_files(upgrade_files)
|
|
msg_info += tmp_info
|
|
msg_warning += tmp_warning
|
|
msg_error += tmp_error
|
|
upload_info.append(tmp_release_meta_info)
|
|
|
|
if len(patch_files) > 0:
|
|
tmp_info, tmp_warning, tmp_error, tmp_patch_meta_info = self._process_upload_patch_files(
|
|
patch_files)
|
|
msg_info += tmp_info
|
|
msg_warning += tmp_warning
|
|
msg_error += tmp_error
|
|
upload_info += tmp_patch_meta_info
|
|
|
|
reload_release_data()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, upload_info=upload_info)
|
|
|
|
def release_apply_remove_order(self, release_id, running_sw_version, reverse=False):
|
|
|
|
# If R4 requires R3, R3 requires R2 and R2 requires R1,
|
|
# then release_order = ['R4', 'R3', 'R2', 'R1']
|
|
|
|
if reverse:
|
|
release_order = [release_id] + self.get_release_dependency_list(release_id)
|
|
# If release_order = ['R4', 'R3', 'R2', 'R1']
|
|
# and running_sw_version is the sw_version for R2
|
|
# After the operation below, release_order = ['R4', 'R3']
|
|
for i, rel in enumerate(release_order):
|
|
release = self.release_collection.get_release_by_id(rel)
|
|
if release.sw_release == running_sw_version:
|
|
val = i - len(release_order) + 1
|
|
while val >= 0:
|
|
release_order.pop()
|
|
val = val - 1
|
|
break
|
|
|
|
else:
|
|
release_order = [release_id] + self.get_release_required_by_list(release_id)
|
|
# reverse = True is for apply operation
|
|
# In this case, the release_order = ['R3', 'R4']
|
|
# reverse = False is for remove operation
|
|
# In this case, the release_order = ['R3']
|
|
if reverse:
|
|
release_order.reverse()
|
|
else:
|
|
# Note(bqian) this pop is questionable, specified release would not be removed?
|
|
release_order.pop(0)
|
|
|
|
return release_order
|
|
|
|
def software_release_delete_api(self, release_ids):
|
|
"""
|
|
Delete release(s)
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
# Protect against duplications
|
|
full_list = sorted(list(set(release_ids)))
|
|
|
|
not_founds = []
|
|
cannot_del = []
|
|
used_by_subcloud = []
|
|
release_list = []
|
|
for rel_id in full_list:
|
|
rel = self.release_collection.get_release_by_id(rel_id)
|
|
if rel is None:
|
|
not_founds.append(rel_id)
|
|
else:
|
|
if not rel.is_deletable:
|
|
cannot_del.append(rel_id)
|
|
elif rel.is_ga_release and is_system_controller():
|
|
subcloud_by_sw_version = get_subcloud_groupby_version()
|
|
if rel.sw_version in subcloud_by_sw_version:
|
|
used_by_subcloud.append(rel_id)
|
|
else:
|
|
release_list.append(rel_id)
|
|
else:
|
|
release_list.append(rel_id)
|
|
|
|
err_msg = ""
|
|
if len(not_founds) > 0:
|
|
list_str = ','.join(not_founds)
|
|
err_msg = f"Releases {list_str} can not be found\n"
|
|
|
|
if len(cannot_del) > 0:
|
|
list_str = ','.join(cannot_del)
|
|
err_msg = err_msg + f"Releases {list_str} are not ready to delete\n"
|
|
|
|
if len(used_by_subcloud) > 0:
|
|
list_str = ','.join(used_by_subcloud)
|
|
err_msg = err_msg + f"Releases {list_str} are still used by subcloud(s)"
|
|
|
|
if len(err_msg) > 0:
|
|
raise SoftwareServiceError(error=err_msg)
|
|
|
|
msg = "Deleting releases: %s" % ",".join(release_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# Handle operation
|
|
for release_id in release_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
release_sw_version = release.sw_version
|
|
|
|
# Delete ostree content if it exists.
|
|
# RPM based patches (from upgrades) will not have ostree contents
|
|
ostree_tar_filename = self.get_ostree_tar_filename(release_sw_version, release_id)
|
|
if os.path.isfile(ostree_tar_filename):
|
|
try:
|
|
os.remove(ostree_tar_filename)
|
|
except OSError:
|
|
msg = "Failed to remove ostree tarball %s" % ostree_tar_filename
|
|
LOG.exception(msg)
|
|
raise OSTreeTarFail(msg)
|
|
|
|
package_repo_dir = "%s/rel-%s" % (constants.PACKAGE_FEED_DIR, release_sw_version)
|
|
packages = [pkg.split("_")[0] for pkg in release.packages]
|
|
if packages:
|
|
apt_utils.package_remove(package_repo_dir, packages)
|
|
|
|
# Delete upgrade iso file in folder
|
|
# TODO(heitormatsui): treat the prepatched iso scenario
|
|
metadata_file = "%s-metadata.xml" % release_id
|
|
delete_feed = False
|
|
to_release_iso_dir = os.path.join(constants.FEED_OSTREE_BASE_DIR, ("rel-%s" % release_sw_version))
|
|
if os.path.isdir(to_release_iso_dir):
|
|
# check if the release being deleted is related to this feed
|
|
if os.path.isfile("%s/upgrades/%s" % (to_release_iso_dir, metadata_file)):
|
|
delete_feed = True
|
|
if delete_feed:
|
|
try:
|
|
shutil.rmtree(to_release_iso_dir)
|
|
except OSError:
|
|
msg = "Failed to remove release iso %s folder" % to_release_iso_dir
|
|
LOG.exception(msg)
|
|
raise ReleaseIsoDeleteFailure(msg)
|
|
msg = "Deleted feed directory %s" % to_release_iso_dir
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
|
|
# TODO(lbonatti): treat the upcoming versioning changes
|
|
PatchFile.delete_versioned_directory(release.sw_release)
|
|
|
|
try:
|
|
# Delete the metadata
|
|
deploystate = release.state
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[deploystate]
|
|
os.remove("%s/%s" % (metadata_dir, metadata_file))
|
|
except OSError:
|
|
msg = "Failed to remove metadata for %s" % release_id
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
self.delete_restart_script(release_id)
|
|
reload_release_data()
|
|
msg = "%s has been deleted" % release_id
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
|
|
# Refresh data, if needed
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def in_sync_controller_api(self):
|
|
"""
|
|
Check if both controllers are in sync
|
|
by checking the database JSON file
|
|
"""
|
|
is_in_sync = False
|
|
|
|
does_synced_software_exist = os.path.isfile(constants.SYNCED_SOFTWARE_JSON_FILE)
|
|
does_software_exist = os.path.isfile(constants.SOFTWARE_JSON_FILE)
|
|
|
|
if does_synced_software_exist and does_software_exist:
|
|
# both files exist, compare them
|
|
with open(constants.SYNCED_SOFTWARE_JSON_FILE, 'r') as f:
|
|
synced_software = json.load(f)
|
|
with open(constants.SOFTWARE_JSON_FILE, 'r') as f:
|
|
software = json.load(f)
|
|
LOG.debug("Synced software: %s", synced_software)
|
|
LOG.debug("Software: %s", software)
|
|
|
|
is_in_sync = synced_software == software
|
|
elif not does_synced_software_exist and not does_software_exist:
|
|
# neither file exists, it is not in deploying state
|
|
is_in_sync = True
|
|
else:
|
|
# either file does not exist, it is in deploying state
|
|
is_in_sync = False
|
|
|
|
return {"in_sync": is_in_sync}
|
|
|
|
def patch_init_release_api(self, release_id):
|
|
"""
|
|
Create an empty repo for a new release_id
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
msg = "Initializing repo for: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
if release_id == SW_VERSION:
|
|
msg = "Rejected: Requested release %s is running release" % release_id
|
|
msg_error += msg + "\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# Refresh data
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
reload_release_data()
|
|
|
|
repo_dir[release_id] = "%s/rel-%s" % (repo_root_dir, release_id)
|
|
|
|
# Verify the release doesn't already exist
|
|
if os.path.exists(repo_dir[release_id]):
|
|
msg = "Patch repository for %s already exists" % release_id
|
|
msg_info += msg + "\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# Create the repo
|
|
try:
|
|
# todo(jcasteli) determine if ostree change needs a createrepo equivalent
|
|
output = "UNDER CONSTRUCTION for OSTREE"
|
|
LOG.info("Repo[%s] updated:\n%s", release_id, output)
|
|
except Exception:
|
|
msg = "Failed to update the repo for %s" % release_id
|
|
LOG.exception(msg)
|
|
|
|
# Wipe out what was created
|
|
shutil.rmtree(repo_dir[release_id])
|
|
del repo_dir[release_id]
|
|
|
|
raise SoftwareFail(msg)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def patch_query_what_requires(self, patch_ids):
|
|
"""
|
|
Query the known patches to see which have dependencies on the specified patches
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
msg = "Querying what requires patches: %s" % ",".join(patch_ids)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# First, verify that all specified patches exist
|
|
id_verification = True
|
|
for patch_id in patch_ids:
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
msg = "Patch %s does not exist" % patch_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
id_verification = False
|
|
|
|
if not id_verification:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
required_patches = {}
|
|
for release in self.release_collection.iterate_releases():
|
|
for req_patch in release.requires_release_ids:
|
|
if req_patch not in patch_ids:
|
|
continue
|
|
|
|
if req_patch not in required_patches:
|
|
required_patches[req_patch] = []
|
|
|
|
required_patches[req_patch].append(release.id)
|
|
|
|
for patch_id in patch_ids:
|
|
if patch_id in required_patches:
|
|
iter_patch_list = required_patches[patch_id]
|
|
msg_info += "%s is required by: %s\n" % (patch_id, ", ".join(sorted(iter_patch_list)))
|
|
else:
|
|
msg_info += "%s is not required by any patches.\n" % patch_id
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def send_latest_feed_commit_to_agent(self):
|
|
"""
|
|
Notify the patch agent that the latest commit on the feed
|
|
repo has been updated
|
|
"""
|
|
# Skip sending messages if host not yet provisioned
|
|
if self.sock_out is None:
|
|
LOG.info("Skipping send feed commit to agent")
|
|
return
|
|
|
|
send_commit_to_agent = PatchMessageSendLatestFeedCommit()
|
|
self.socket_lock.acquire()
|
|
send_commit_to_agent.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
def software_sync(self):
|
|
# Increment the software_op_counter here
|
|
self.inc_patch_op_counter()
|
|
|
|
self.check_patch_states()
|
|
|
|
if self.sock_out is None:
|
|
return True
|
|
|
|
# Send the sync requests
|
|
|
|
self.controller_neighbours_lock.acquire()
|
|
for n in self.controller_neighbours:
|
|
self.controller_neighbours[n].clear_synced()
|
|
self.controller_neighbours_lock.release()
|
|
|
|
msg = PatchMessageSyncReq()
|
|
self.socket_lock.acquire()
|
|
msg.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
# Now we wait, up to two mins. future enhancement: Wait on a condition
|
|
my_ip = cfg.get_mgmt_ip()
|
|
sync_rc = False
|
|
max_time = time.time() + 120
|
|
while time.time() < max_time:
|
|
all_done = True
|
|
self.controller_neighbours_lock.acquire()
|
|
for n in self.controller_neighbours:
|
|
if n != my_ip and not self.controller_neighbours[n].get_synced():
|
|
all_done = False
|
|
self.controller_neighbours_lock.release()
|
|
|
|
if all_done:
|
|
LOG.info("Sync complete")
|
|
sync_rc = True
|
|
break
|
|
|
|
time.sleep(0.5)
|
|
|
|
# Send hellos to the hosts now, to get queries performed
|
|
hello_agent = PatchMessageHelloAgent()
|
|
self.socket_lock.acquire()
|
|
hello_agent.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
if not sync_rc:
|
|
LOG.info("Timed out waiting for sync completion")
|
|
return sync_rc
|
|
|
|
def software_release_query_cached(self, **kwargs):
|
|
query_state = None
|
|
if "show" in kwargs:
|
|
valid_query_states = [
|
|
states.AVAILABLE,
|
|
states.UNAVAILABLE,
|
|
states.DEPLOYED,
|
|
states.REMOVING,
|
|
states.COMMITTED,
|
|
states.DEPLOYING
|
|
]
|
|
if kwargs["show"] in valid_query_states:
|
|
query_state = kwargs["show"]
|
|
|
|
query_release = None
|
|
if "release" in kwargs:
|
|
query_release = kwargs["release"]
|
|
|
|
results = []
|
|
|
|
def filter_by_version():
|
|
for r in self.release_collection.iterate_releases():
|
|
if r.sw_version in query_release:
|
|
yield r
|
|
|
|
def filter_by_state():
|
|
for rel in self.release_collection.iterate_releases_by_state(query_state):
|
|
yield rel
|
|
|
|
if query_state is not None:
|
|
iterator = filter_by_state
|
|
elif query_release is not None:
|
|
iterator = filter_by_version
|
|
else:
|
|
iterator = self.release_collection.iterate_releases
|
|
|
|
for i in iterator():
|
|
data = i.to_query_dict()
|
|
results.append(data)
|
|
|
|
return results
|
|
|
|
def software_release_query_specific_cached(self, release_ids):
|
|
LOG.info("software release show")
|
|
|
|
results = []
|
|
|
|
for release_id in release_ids:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is not None:
|
|
results.append(release.to_query_dict())
|
|
|
|
return results
|
|
|
|
def get_dependencies(self, patch_ids, recursive):
|
|
dependencies = set()
|
|
patch_added = False
|
|
|
|
# Add patches to workset
|
|
for patch_id in sorted(patch_ids):
|
|
dependencies.add(patch_id)
|
|
patch_added = True
|
|
|
|
while patch_added:
|
|
patch_added = False
|
|
for patch_id in sorted(dependencies):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
for req in release.requires:
|
|
if req not in dependencies:
|
|
dependencies.add(req)
|
|
patch_added = recursive
|
|
|
|
return sorted(dependencies)
|
|
|
|
def patch_query_dependencies(self, patch_ids, **kwargs):
|
|
msg = "Patch query-dependencies %s" % patch_ids
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
failure = False
|
|
|
|
results = {"patches": [],
|
|
"error": ""}
|
|
|
|
recursive = False
|
|
if kwargs.get("recursive") == "yes":
|
|
recursive = True
|
|
|
|
# Verify patch IDs
|
|
for patch_id in sorted(patch_ids):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
errormsg = "%s is unrecognized\n" % patch_id
|
|
LOG.info("patch_query_dependencies: %s", errormsg)
|
|
results["error"] += errormsg
|
|
failure = True
|
|
|
|
if failure:
|
|
LOG.info("patch_query_dependencies failed")
|
|
return results
|
|
|
|
results["patches"] = self.get_dependencies(patch_ids, recursive)
|
|
|
|
return results
|
|
|
|
def patch_commit(self, patch_ids, dry_run=False):
|
|
msg = "Patch commit %s" % patch_ids
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
try:
|
|
if not os.path.exists(states.COMMITTED_DIR):
|
|
os.makedirs(states.COMMITTED_DIR)
|
|
except os.error:
|
|
msg = "Failed to create %s" % states.COMMITTED_DIR
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
failure = False
|
|
recursive = True
|
|
cleanup_files = set()
|
|
results = {"info": "",
|
|
"error": ""}
|
|
|
|
# Ensure there are only REL patches
|
|
non_rel_list = []
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.status != constants.STATUS_RELEASED:
|
|
non_rel_list.append(release.id)
|
|
|
|
if len(non_rel_list) > 0:
|
|
errormsg = "A commit cannot be performed with non-REL status patches in the system:\n"
|
|
for patch_id in non_rel_list:
|
|
errormsg += " %s\n" % patch_id
|
|
LOG.info("patch_commit rejected: %s", errormsg)
|
|
results["error"] += errormsg
|
|
return results
|
|
|
|
# Verify Release IDs
|
|
for patch_id in sorted(patch_ids):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
errormsg = "%s is unrecognized\n" % patch_id
|
|
LOG.info("patch_commit: %s", errormsg)
|
|
results["error"] += errormsg
|
|
failure = True
|
|
|
|
if failure:
|
|
LOG.info("patch_commit: Failed patch ID check")
|
|
return results
|
|
|
|
commit_list = self.get_dependencies(patch_ids, recursive)
|
|
|
|
# Check patch states
|
|
avail_list = []
|
|
for patch_id in commit_list:
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release.state not in [states.DEPLOYED, states.COMMITTED]:
|
|
avail_list.append(patch_id)
|
|
|
|
if len(avail_list) > 0:
|
|
errormsg = "The following patches are not applied and cannot be committed:\n"
|
|
for patch_id in avail_list:
|
|
errormsg += " %s\n" % patch_id
|
|
LOG.info("patch_commit rejected: %s", errormsg)
|
|
results["error"] += errormsg
|
|
return results
|
|
|
|
for patch_id in commit_list:
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
# Fetch file paths that need to be cleaned up to
|
|
# free patch storage disk space
|
|
if release.restart_script:
|
|
restart_script_path = "%s/%s" % \
|
|
(root_scripts_dir,
|
|
release.restart_script)
|
|
if os.path.exists(restart_script_path):
|
|
cleanup_files.add(restart_script_path)
|
|
patch_sw_version = release.sw_release
|
|
abs_ostree_tar_dir = package_dir[patch_sw_version]
|
|
software_tar_path = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id)
|
|
if os.path.exists(software_tar_path):
|
|
cleanup_files.add(software_tar_path)
|
|
|
|
# Calculate disk space
|
|
disk_space = 0
|
|
for file in cleanup_files:
|
|
statinfo = os.stat(file)
|
|
disk_space += statinfo.st_size
|
|
|
|
if dry_run:
|
|
results["info"] = "This commit operation would free %0.2f MiB" % (disk_space / (1024.0 * 1024.0))
|
|
return results
|
|
|
|
# Do the commit
|
|
|
|
# Move the metadata to the committed dir
|
|
for patch_id in commit_list:
|
|
metadata_fname = "%s-metadata.xml" % patch_id
|
|
deployed_fname = os.path.join(states.DEPLOYED_DIR, metadata_fname)
|
|
committed_fname = os.path.join(states.COMMITTED_DIR, metadata_fname)
|
|
if os.path.exists(deployed_fname):
|
|
try:
|
|
shutil.move(deployed_fname, committed_fname)
|
|
except shutil.Error:
|
|
msg = "Failed to move the metadata for %s" % patch_id
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
# Delete the files
|
|
for file in cleanup_files:
|
|
try:
|
|
os.remove(file)
|
|
except OSError:
|
|
msg = "Failed to remove: %s" % file
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
reload_release_data()
|
|
|
|
results["info"] = "The releases have been committed."
|
|
return results
|
|
|
|
def query_host_cache(self):
|
|
output = []
|
|
|
|
self.hosts_lock.acquire()
|
|
for nbr in list(self.hosts):
|
|
host = self.hosts[nbr].get_dict()
|
|
host["interim_state"] = False
|
|
for patch_id in list(sc.interim_state):
|
|
if nbr in sc.interim_state[patch_id]:
|
|
host["interim_state"] = True
|
|
|
|
output.append(host)
|
|
|
|
self.hosts_lock.release()
|
|
|
|
return output
|
|
|
|
def any_patch_host_installing(self):
|
|
rc = False
|
|
|
|
with self.hosts_lock:
|
|
for host in self.hosts.values():
|
|
if host.state == constants.PATCH_AGENT_STATE_INSTALLING:
|
|
rc = True
|
|
break
|
|
return rc
|
|
|
|
def copy_restart_scripts(self):
|
|
applying_states = [states.DEPLOYING, states.REMOVING]
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.restart_script:
|
|
if release.state in applying_states:
|
|
try:
|
|
restart_script_name = release.restart_script
|
|
restart_script_path = "%s/%s" \
|
|
% (root_scripts_dir, restart_script_name)
|
|
dest_path = constants.PATCH_SCRIPTS_STAGING_DIR
|
|
dest_script_file = "%s/%s" \
|
|
% (constants.PATCH_SCRIPTS_STAGING_DIR, restart_script_name)
|
|
if not os.path.exists(dest_path):
|
|
os.makedirs(dest_path, 0o700)
|
|
shutil.copyfile(restart_script_path, dest_script_file)
|
|
os.chmod(dest_script_file, 0o700)
|
|
msg = "Creating restart script for %s" % release.id
|
|
LOG.info(msg)
|
|
except shutil.Error:
|
|
msg = "Failed to copy the restart script for %s" % release.id
|
|
LOG.exception(msg)
|
|
raise SoftwareError(msg)
|
|
else:
|
|
try:
|
|
restart_script_name = release.restart_script
|
|
restart_script_path = "%s/%s" \
|
|
% (constants.PATCH_SCRIPTS_STAGING_DIR, restart_script_name)
|
|
if os.path.exists(restart_script_path):
|
|
os.remove(restart_script_path)
|
|
msg = "Removing restart script for %s" % release.id
|
|
LOG.info(msg)
|
|
except shutil.Error:
|
|
msg = "Failed to delete the restart script for %s" % release.id
|
|
LOG.exception(msg)
|
|
|
|
def _update_state_to_peer(self):
|
|
state_update_msg = SoftwareMessageDeployStateUpdate()
|
|
self.socket_lock.acquire()
|
|
try:
|
|
state_update_msg.send(self.sock_out)
|
|
finally:
|
|
self.socket_lock.release()
|
|
|
|
def _release_basic_checks(self, deployment):
|
|
"""
|
|
Does basic sanity checks on the release data
|
|
:param deployment: release to be checked
|
|
:return: release object (if exists),
|
|
bool with success output,
|
|
strings with info, warning and error messages
|
|
"""
|
|
|
|
# We need to verify that the software release exists
|
|
release = self.release_collection.get_release_by_id(deployment)
|
|
if not release:
|
|
msg = "Software release version corresponding to the specified release " \
|
|
"%s does not exist." % deployment
|
|
LOG.error(msg)
|
|
msg = msg + " Try deleting and re-uploading the software for recovery."
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
return release
|
|
|
|
def _deploy_precheck(self, release_version: str, force: bool = False,
|
|
region_name: str = "RegionOne", patch: bool = False) -> dict:
|
|
"""
|
|
Verify if system satisfy the requisites to upgrade to a specified deployment.
|
|
:param release_version: full release name, e.g. starlingx-MM.mm.pp
|
|
:param force: if True will ignore minor alarms during precheck
|
|
:param region_name: region_name
|
|
:param patch: if True then indicate precheck is for patch release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
precheck_script = utils.get_precheck_script(release_version)
|
|
|
|
if not os.path.isfile(precheck_script):
|
|
msg = "Release files for deployment %s are not present on the system, " \
|
|
"cannot proceed with the precheck." % release_version
|
|
LOG.error(msg)
|
|
msg_error = "Fail to perform deploy precheck. " \
|
|
"Uploaded release may have been damaged. " \
|
|
"Try delete and re-upload the release.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# parse local config file to pass parameters to precheck script
|
|
try:
|
|
cp = configparser.ConfigParser()
|
|
cp.read(constants.SOFTWARE_CONFIG_FILE_LOCAL)
|
|
ks_section = cp["keystone_authtoken"]
|
|
auth_url = ks_section["auth_url"]
|
|
username = ks_section["username"]
|
|
password = ks_section["password"]
|
|
project_name = ks_section["project_name"]
|
|
user_domain_name = ks_section["user_domain_name"]
|
|
project_domain_name = ks_section["project_domain_name"]
|
|
except Exception as e:
|
|
msg = "Error parsing config file: %s." % str(e)
|
|
LOG.error(msg)
|
|
msg_error = "Fail to perform deploy precheck. Internal error has occured." \
|
|
"Try lock and unlock the controller for recovery.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# TODO(heitormatsui) if different region was passed as parameter then
|
|
# need to discover the subcloud auth_url to pass to precheck script
|
|
if region_name != "RegionOne":
|
|
pass
|
|
|
|
cmd = [precheck_script,
|
|
"--auth_url=%s" % auth_url,
|
|
"--username=%s" % username,
|
|
"--password=%s" % password,
|
|
"--project_name=%s" % project_name,
|
|
"--user_domain_name=%s" % user_domain_name,
|
|
"--project_domain_name=%s" % project_domain_name,
|
|
"--region_name=%s" % region_name]
|
|
if force:
|
|
cmd.append("--force")
|
|
if patch:
|
|
cmd.append("--patch")
|
|
|
|
# Call precheck from the deployment files
|
|
precheck_return = subprocess.run(
|
|
cmd,
|
|
stderr=subprocess.STDOUT,
|
|
stdout=subprocess.PIPE,
|
|
check=False,
|
|
text=True,
|
|
)
|
|
system_healthy = None
|
|
if precheck_return.returncode in [constants.RC_SUCCESS, constants.RC_UNHEALTHY]:
|
|
system_healthy = precheck_return.returncode == constants.RC_SUCCESS
|
|
msg_info += precheck_return.stdout
|
|
else:
|
|
msg_error += precheck_return.stdout
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=system_healthy)
|
|
|
|
def software_deploy_precheck_api(self, deployment: str, force: bool = False, region_name=None) -> dict:
|
|
"""
|
|
Verify if system satisfy the requisites to upgrade to a specified deployment.
|
|
:param deployment: full release name, e.g. starlingx-MM.mm.pp
|
|
:param force: if True will ignore minor alarms during precheck
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
release = self._release_basic_checks(deployment)
|
|
if region_name is None:
|
|
region_name = utils.get_local_region_name()
|
|
|
|
release_version = release.sw_release
|
|
patch = not utils.is_upgrade_deploy(SW_VERSION, release_version)
|
|
return self._deploy_precheck(release_version, force, region_name, patch)
|
|
|
|
def _deploy_upgrade_start(self, to_release, commit_id):
|
|
LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION))
|
|
deploy_script_name = constants.DEPLOY_START_SCRIPT
|
|
cmd_path = utils.get_software_deploy_script(to_release, deploy_script_name)
|
|
if not os.path.isfile(cmd_path):
|
|
msg = f"{deploy_script_name} was not found"
|
|
LOG.error(msg)
|
|
raise SoftwareServiceError(f"{deploy_script_name} was not found. "
|
|
"The uploaded software could have been damaged. "
|
|
"Please delete the software and re-upload it")
|
|
major_to_release = utils.get_major_release_version(to_release)
|
|
k8s_ver = get_k8s_ver()
|
|
postgresql_port = str(cfg.alt_postgresql_port)
|
|
feed = os.path.join(constants.FEED_DIR,
|
|
"rel-%s/ostree_repo" % major_to_release)
|
|
|
|
LOG.info("k8s version %s" % k8s_ver)
|
|
upgrade_start_cmd = [cmd_path, SW_VERSION, major_to_release, k8s_ver, postgresql_port,
|
|
feed]
|
|
if commit_id is not None:
|
|
upgrade_start_cmd.append(commit_id)
|
|
# pass in keystone auth through environment variables
|
|
# OS_AUTH_URL, OS_USERNAME, OS_PASSWORD, OS_PROJECT_NAME, OS_USER_DOMAIN_NAME,
|
|
# OS_PROJECT_DOMAIN_NAME, OS_REGION_NAME are in env variables.
|
|
keystone_auth = CONF.get('keystone_authtoken')
|
|
env = {}
|
|
env["OS_AUTH_URL"] = keystone_auth["auth_url"] + '/v3'
|
|
env["OS_USERNAME"] = keystone_auth["username"]
|
|
env["OS_PASSWORD"] = keystone_auth["password"]
|
|
env["OS_PROJECT_NAME"] = keystone_auth["project_name"]
|
|
env["OS_USER_DOMAIN_NAME"] = keystone_auth["user_domain_name"]
|
|
env["OS_PROJECT_DOMAIN_NAME"] = keystone_auth["project_domain_name"]
|
|
env["OS_REGION_NAME"] = keystone_auth["region_name"]
|
|
|
|
try:
|
|
LOG.info("starting subprocess %s" % ' '.join(upgrade_start_cmd))
|
|
subprocess.Popen(' '.join(upgrade_start_cmd), start_new_session=True, shell=True, env=env)
|
|
LOG.info("subprocess started")
|
|
return True
|
|
except subprocess.SubprocessError as e:
|
|
LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_start_cmd), e))
|
|
return False
|
|
|
|
def deploy_state_changed(self, new_state):
|
|
'''Handle 'deploy state change' event, invoked when operations complete. '''
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
state_event = {
|
|
DEPLOY_STATES.START_DONE: deploy_state.start_done,
|
|
DEPLOY_STATES.START_FAILED: deploy_state.start_failed,
|
|
DEPLOY_STATES.ACTIVATE_DONE: deploy_state.activate_completed,
|
|
DEPLOY_STATES.ACTIVATE_FAILED: deploy_state.activate_failed
|
|
}
|
|
if new_state in state_event:
|
|
state_event[new_state]()
|
|
else:
|
|
msg = f"Received invalid deploy state update {deploy_state}"
|
|
LOG.error(msg)
|
|
|
|
def host_deploy_state_changed(self, hostname, host_deploy_state):
|
|
'''Handle 'host deploy state change' event. '''
|
|
self.db_api_instance.update_deploy_host(hostname, host_deploy_state)
|
|
|
|
def add_text_tag_to_xml(self, parent, name, text):
|
|
tag = ET.SubElement(parent, name)
|
|
tag.text = text
|
|
return tag
|
|
|
|
@require_deploy_state([None],
|
|
"There is already a deployment is in progress ({state}). "
|
|
"Please complete the current deployment.")
|
|
def software_deploy_start_api(self, deployment: str, force: bool, **kwargs) -> dict:
|
|
"""
|
|
to start deploy of a specified release.
|
|
The operation implies deploying all undeployed dependency releases of
|
|
the specified release. i.e, to deploy release 24.09.1, it implies
|
|
deploying 24.09.0 and 24.09.1 when 24.09.0 has not been deployed.
|
|
The operation includes steps:
|
|
1. find all undeployed dependency releases
|
|
2. ensure all releases (dependency and specified release) are ready to deployed
|
|
3. precheck
|
|
4. transform all involved releases to deploying state
|
|
5. start the deploy subprocess
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
deploy_release = self._release_basic_checks(deployment)
|
|
|
|
running_release = self.release_collection.running_release
|
|
deploy_sw_version = deploy_release.sw_version # MM.mm
|
|
|
|
feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, deploy_sw_version)
|
|
commit_id = deploy_release.commit_id
|
|
patch_release = True
|
|
if utils.is_upgrade_deploy(SW_VERSION, deploy_release.sw_release):
|
|
# TODO(bqian) remove default latest commit when a commit-id is built into GA metadata
|
|
if commit_id is None:
|
|
commit_id = ostree_utils.get_feed_latest_commit(deploy_sw_version)
|
|
|
|
patch_release = False
|
|
to_release = deploy_release.sw_release
|
|
ret = self._deploy_precheck(to_release, force, patch=patch_release)
|
|
if ret["system_healthy"] is None:
|
|
ret["error"] = "Fail to perform deploy precheck. Internal error has occurred.\n" + \
|
|
ret["error"]
|
|
return ret
|
|
elif not ret["system_healthy"]:
|
|
ret["info"] = "The following issues have been detected, which prevent " \
|
|
"deploying %s\n" % deployment + ret["info"] + \
|
|
"Please fix above issues then retry the deploy.\n"
|
|
return ret
|
|
|
|
if self._deploy_upgrade_start(to_release, commit_id):
|
|
collect_current_load_for_hosts()
|
|
create_deploy_hosts()
|
|
|
|
release_state = ReleaseState(release_ids=[deploy_release.id])
|
|
release_state.start_deploy()
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required)
|
|
self._update_state_to_peer()
|
|
|
|
msg_info = "Deployment for %s started" % deployment
|
|
else:
|
|
msg_error = "Deployment for %s failed to start" % deployment
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# todo(chuck) Remove once to determine how we are associating a patch
|
|
# with a release.
|
|
# release in release metadata.xml file represents the latest commit
|
|
# for release_id in sorted(list(self.release_data.metadata)):
|
|
# if SW_VERSION == self.release_data.contents[release_id]["release"]:
|
|
# running_sw_version = self.release_data.metadata[release_id]["sw_version"]
|
|
# LOG.info("Running software version: %s", running_sw_version)
|
|
|
|
# TODO(bqian) update references of sw_release (string) to SWRelease object
|
|
|
|
if deploy_release > running_release:
|
|
operation = "apply"
|
|
elif running_release > deploy_release:
|
|
operation = "remove"
|
|
else:
|
|
# NOTE(bqian) The error message doesn't seem right. software version format
|
|
# or any metadata semantic check should be done during upload. If data
|
|
# invalid found subsequently, data is considered damaged, should recommend
|
|
# delete and re-upload
|
|
msg_error += "The software version format for this release is not correct.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# NOTE(bqian) shouldn't that patch release deploy and remove are doing the same thing
|
|
# in terms of ostree commit, that it deploy to a commit specified by the commit-id that
|
|
# associated to the release from the deploy start command?
|
|
# If releases are such that:
|
|
# R2 requires R1, R3 requires R2, R4 requires R3
|
|
# If current running release is R2 and command issued is "software deploy start R4"
|
|
# operation is "apply" with order [R3, R4]
|
|
# If current running release is R4 and command issued is "software deploy start R2"
|
|
# operation is "remove" with order [R4, R3]
|
|
if operation == "apply":
|
|
|
|
collect_current_load_for_hosts()
|
|
create_deploy_hosts()
|
|
|
|
# reverse = True is used for apply operation
|
|
deployment_list = self.release_apply_remove_order(deployment, running_release.sw_release, reverse=True)
|
|
|
|
msg = "Deploy start order for apply operation: %s" % ",".join(deployment_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# todo(jcasteli) Do we need this block below?
|
|
# Check for patches that can't be applied during an upgrade
|
|
upgrade_check = True
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release.sw_version != SW_VERSION and release.apply_active_release_only == "Y":
|
|
msg = "%s cannot be created during an upgrade" % release_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
upgrade_check = False
|
|
|
|
if not upgrade_check:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skip-semantic") != "yes":
|
|
self.run_semantic_check(constants.SEMANTIC_PREAPPLY, deployment_list)
|
|
|
|
# Start applying the releases
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
msg = "Starting deployment for: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
packages = [pkg.split("_")[0] for pkg in release.packages]
|
|
if packages is None:
|
|
msg = "Unable to determine packages to install"
|
|
LOG.error(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
if release.state not in (states.AVAILABLE, states.COMMITTED):
|
|
msg = "%s is already being deployed" % release_id
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
continue
|
|
|
|
latest_commit = ""
|
|
try:
|
|
latest_commit = ostree_utils.get_feed_latest_commit(running_release.sw_version)
|
|
LOG.info("Latest commit: %s" % latest_commit)
|
|
except OSTreeCommandFail:
|
|
LOG.exception("Failure during commit consistency check for %s.", release_id)
|
|
|
|
try:
|
|
apt_utils.run_install(feed_repo, packages)
|
|
except APTOSTreeCommandFail:
|
|
LOG.exception("Failed to intall Debian package.")
|
|
raise APTOSTreeCommandFail(msg)
|
|
|
|
# Update the feed ostree summary
|
|
ostree_utils.update_repo_summary_file(feed_repo)
|
|
|
|
# Get the latest commit after performing "apt-ostree install".
|
|
self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION)
|
|
|
|
try:
|
|
# Move the release metadata to deploying dir
|
|
deploystate = release.state
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[deploystate]
|
|
|
|
metadata_file = "%s/%s-metadata.xml" % (metadata_dir, release_id)
|
|
tree = ET.parse(metadata_file)
|
|
root = tree.getroot()
|
|
|
|
# ostree = ET.SubElement(root, "ostree")
|
|
self.add_text_tag_to_xml(root, "number_of_commits", "1")
|
|
self.add_text_tag_to_xml(root, "previous_commit", latest_commit)
|
|
self.add_text_tag_to_xml(root, "commit", self.latest_feed_commit)
|
|
|
|
ET.indent(tree, ' ')
|
|
with open(metadata_file, "wb") as outfile:
|
|
tree = ET.tostring(root)
|
|
outfile.write(tree)
|
|
|
|
LOG.info("Latest feed commit: %s added to metadata file" % self.latest_feed_commit)
|
|
msg_info += "%s is now in the repo\n" % release_id
|
|
except shutil.Error:
|
|
msg = "Failed to move the metadata for %s" % release_id
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
reload_release_data()
|
|
# NOTE(bqian) Below check an exception raise should be revisit, if applicable,
|
|
# should be applied to the begining of all requests.
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
|
|
# TODO(bqian) get the list of undeployed required release ids
|
|
# i.e, when deploying 24.03.3, which requires 24.03.2 and 24.03.1, all
|
|
# 3 release ids should be passed into to create new ReleaseState
|
|
collect_current_load_for_hosts()
|
|
create_deploy_hosts()
|
|
release_state = ReleaseState(release_ids=[release.id])
|
|
release_state.start_deploy()
|
|
deploy_state = DeployState.get_instance()
|
|
to_release = deploy_release.sw_release
|
|
deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required)
|
|
self._update_state_to_peer()
|
|
|
|
with self.hosts_lock:
|
|
self.interim_state[release_id] = list(self.hosts)
|
|
|
|
# There is no defined behavior for deploy start for patching releases, so
|
|
# move the deploy state to start-done
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_done()
|
|
self._update_state_to_peer()
|
|
|
|
elif operation == "remove":
|
|
collect_current_load_for_hosts()
|
|
create_deploy_hosts()
|
|
deployment_list = self.release_apply_remove_order(deployment, running_release.sw_version)
|
|
msg = "Deploy start order for remove operation: %s" % ",".join(deployment_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
remove_unremovable = False
|
|
|
|
if kwargs.get("removeunremovable") == "yes":
|
|
remove_unremovable = True
|
|
|
|
# See if any of the patches are marked as unremovable
|
|
unremovable_verification = True
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release.unremovable:
|
|
if remove_unremovable:
|
|
msg = "Unremovable release %s being removed" % release_id
|
|
LOG.warning(msg)
|
|
msg_warning = msg + "\n"
|
|
else:
|
|
msg = "Release %s is not removable" % release_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
unremovable_verification = False
|
|
elif release.state == states.COMMITTED:
|
|
msg = "Release %s is committed and cannot be removed" % release_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
unremovable_verification = False
|
|
|
|
if not unremovable_verification:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skipappcheck") != "yes":
|
|
# Check application dependencies before removing
|
|
required_releases = {}
|
|
for release in deployment_list:
|
|
for appname, iter_release_list in self.app_dependencies.items():
|
|
if release in iter_release_list:
|
|
if release not in required_releases:
|
|
required_releases[release] = []
|
|
required_releases[release].append(appname)
|
|
|
|
if len(required_releases) > 0:
|
|
for req_release, app_list in required_releases.items():
|
|
msg = "%s is required by application(s): %s" % (req_release, ", ".join(sorted(app_list)))
|
|
msg_error += msg + "\n"
|
|
LOG.info(msg)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skip-semantic") != "yes":
|
|
self.run_semantic_check(constants.SEMANTIC_PREREMOVE, deployment_list)
|
|
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
msg = "Removing release: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
if release.state == states.AVAILABLE:
|
|
msg = "The deployment for %s has not been created" % release_id
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
continue
|
|
|
|
major_release_sw_version = release.sw_version
|
|
# this is an ostree patch
|
|
# Base commit is fetched from the patch metadata.
|
|
base_commit = release.base_commit_id
|
|
feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_release_sw_version)
|
|
try:
|
|
# Reset the ostree HEAD
|
|
ostree_utils.reset_ostree_repo_head(base_commit, feed_repo)
|
|
|
|
# Delete all commits that belong to this release
|
|
# NOTE(bqian) there should be just one commit per release.
|
|
commit_to_delete = release.commit_id
|
|
ostree_utils.delete_ostree_repo_commit(commit_to_delete, feed_repo)
|
|
|
|
# Update the feed ostree summary
|
|
ostree_utils.update_repo_summary_file(feed_repo)
|
|
|
|
except OSTreeCommandFail:
|
|
LOG.exception("Failure while removing release %s.", release_id)
|
|
try:
|
|
# Move the metadata to the deleted dir
|
|
self.release_collection.update_state([release_id], states.REMOVING_DIR)
|
|
msg_info += "%s has been removed from the repo\n" % release_id
|
|
except shutil.Error:
|
|
msg = "Failed to move the metadata for %s" % release_id
|
|
LOG.Error(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
|
|
# TODO(bqian) get the list of undeployed required release ids
|
|
# i.e, when deploying 24.03.3, which requires 24.03.2 and 24.03.1, all
|
|
# 3 release ids should be passed into to create new ReleaseState
|
|
collect_current_load_for_hosts()
|
|
create_deploy_hosts()
|
|
release_state = ReleaseState(release_ids=[release.id])
|
|
release_state.start_remove()
|
|
deploy_state = DeployState.get_instance()
|
|
to_release = deploy_release.sw_release
|
|
deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required)
|
|
self._update_state_to_peer()
|
|
|
|
# only update lastest_feed_commit if it is an ostree patch
|
|
if release.base_commit_id is not None:
|
|
# Base Commit in this release's metadata.xml file represents the latest commit
|
|
# after this release has been removed from the feed repo
|
|
self.latest_feed_commit = release.base_commit_id
|
|
|
|
with self.hosts_lock:
|
|
self.interim_state[release_id] = list(self.hosts)
|
|
|
|
# There is no defined behavior for deploy start for patching releases, so
|
|
# move the deploy state to start-done
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_done()
|
|
self._update_state_to_peer()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def _deploy_complete(self):
|
|
# TODO(bqian) complete the deploy
|
|
# as deployment has been already activated, there is no return,
|
|
# deploy complete can only succeed.
|
|
# tasks for completion of deploy is to delete leftover data from
|
|
# previous release. If some data could not be deleted, need to
|
|
# automatically reattempt to delete it in later statge. (outside
|
|
# a deployment)
|
|
return True
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE_DONE],
|
|
"Must complete deploy activate before completing the deployment")
|
|
def software_deploy_complete_api(self) -> dict:
|
|
"""
|
|
Completes a deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
|
|
if self._deploy_complete():
|
|
deploy_state.completed()
|
|
msg_info += "Deployment has been completed\n"
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def _activate(self):
|
|
deploy = self.db_api_instance.get_deploy_all()
|
|
if deploy:
|
|
deploy = deploy[0]
|
|
else:
|
|
msg = "Deployment is missing unexpectedly"
|
|
raise InvalidOperation(msg)
|
|
|
|
deploying = ReleaseState(release_state=states.DEPLOYING)
|
|
if deploying.is_major_release_deployment():
|
|
return self._activate_major_release(deploy)
|
|
else:
|
|
return self.activate_patching_release()
|
|
|
|
def activate_patching_release(self):
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.activate()
|
|
# patching release activate operations go here
|
|
deploy_state.activate_completed()
|
|
return True
|
|
|
|
def _activate_major_release(self, deploy):
|
|
cmd_path = "/usr/bin/software-deploy-activate"
|
|
from_release = utils.get_major_release_version(deploy.get("from_release"))
|
|
to_release = utils.get_major_release_version(deploy.get("to_release"))
|
|
|
|
upgrade_activate_cmd = [cmd_path, from_release, to_release]
|
|
|
|
try:
|
|
LOG.info("starting subprocess %s" % ' '.join(upgrade_activate_cmd))
|
|
subprocess.Popen(' '.join(upgrade_activate_cmd), start_new_session=True, shell=True)
|
|
LOG.info("subprocess started")
|
|
except subprocess.SubprocessError as e:
|
|
LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_activate_cmd), e))
|
|
return False
|
|
|
|
return True
|
|
|
|
def _check_pre_activate(self):
|
|
# check current deployment, deploy to all hosts have completed,
|
|
# the deploy state is host-done, or
|
|
# activate-failed' as reattempt from a previous failed activate
|
|
deploy_state = DeployState.get_deploy_state()
|
|
if deploy_state not in [DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.ACTIVATE_FAILED]:
|
|
msg = "Must complete deploying all hosts before activating the deployment"
|
|
raise InvalidOperation(msg)
|
|
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
invalid_hosts = []
|
|
for deploy_host in deploy_hosts:
|
|
if deploy_host['state'] not in [states.DEPLOYED]:
|
|
invalid_hosts.append(deploy_host)
|
|
|
|
if len(invalid_hosts) > 0:
|
|
msg = "All hosts must have completed deployment before activating the deployment"
|
|
for invalid_host in invalid_hosts:
|
|
msg += "%s: %s\n" % (invalid_host["hostname"], invalid_host["state"])
|
|
raise InvalidOperation(msg)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.ACTIVATE_FAILED],
|
|
"Activate deployment only when current deployment state is {require_states}")
|
|
def software_deploy_activate_api(self) -> dict:
|
|
"""
|
|
Activates the deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
self._check_pre_activate()
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.activate()
|
|
|
|
try:
|
|
self._activate()
|
|
msg_info = "Deploy activate has started"
|
|
except Exception:
|
|
deploy_state.activate_failed()
|
|
raise
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def software_deploy_show_api(self, from_release=None, to_release=None):
|
|
# Retrieve deploy state from db
|
|
if from_release and to_release:
|
|
return self.db_api_instance.get_deploy(from_release, to_release)
|
|
else:
|
|
# Retrieve deploy state from db in list format
|
|
return self.db_api_instance.get_deploy_all()
|
|
|
|
@require_deploy_state([DEPLOY_STATES.START_DONE, DEPLOY_STATES.HOST, DEPLOY_STATES.HOST_FAILED],
|
|
"Current deployment ({state}) is not ready to deploy host")
|
|
def software_deploy_host_api(self, hostname, force, async_req=False):
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy_host = self.db_api_instance.get_deploy_host_by_hostname(hostname)
|
|
if deploy_host is None:
|
|
raise HostNotFound(hostname)
|
|
deploy = self.db_api_instance.get_deploy_all()[0]
|
|
to_release = deploy.get("to_release")
|
|
release_id = None
|
|
for release in self.release_collection.iterate_releases():
|
|
if to_release == release.sw_release:
|
|
release_id = release.id
|
|
deploy_host_validations(hostname, self.release_collection.get_release_by_id(release_id).is_ga_release)
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_host_state = DeployHostState(hostname)
|
|
deploy_state.deploy_host()
|
|
deploy_host_state.deploy_started()
|
|
|
|
# if in a 'deploy host' reentrant scenario, i.e. retrying after
|
|
# a failure, then clear the failure alarm before retrying
|
|
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname)
|
|
self.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_FAILURE,
|
|
fm_constants.FM_ALARM_STATE_CLEAR,
|
|
entity_instance_id)
|
|
|
|
# NOTE(bqian) Get IP address to fulfill the need of patching structure.
|
|
# need to review the design
|
|
ip = socket.getaddrinfo(hostname, 0)[0][4][0]
|
|
msg = "Running software deploy host for %s (%s), force=%s, async_req=%s" % (hostname, ip, force, async_req)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
if self.allow_insvc_patching:
|
|
LOG.info("Allowing in-service patching")
|
|
force = True
|
|
self.copy_restart_scripts()
|
|
|
|
# Check if there is a major release deployment in progress
|
|
# and set agent request parameters accordingly
|
|
major_release = None
|
|
commit_id = None
|
|
if self.check_upgrade_in_progress():
|
|
upgrade_release = self.get_software_upgrade()
|
|
major_release = upgrade_release["to_release"]
|
|
commit_id = ostree_utils.get_feed_latest_commit(major_release)
|
|
force = False
|
|
async_req = False
|
|
msg = "Running major release deployment, major_release=%s, force=%s, async_req=%s, commit_id=%s" % (
|
|
major_release, force, async_req, commit_id)
|
|
msg_info += msg + "\n"
|
|
LOG.info(msg)
|
|
set_host_target_load(hostname, major_release)
|
|
|
|
self.hosts_lock.acquire()
|
|
self.hosts[ip].install_pending = True
|
|
self.hosts[ip].install_status = False
|
|
self.hosts[ip].install_reject_reason = None
|
|
self.hosts_lock.release()
|
|
|
|
installreq = PatchMessageAgentInstallReq()
|
|
installreq.ip = ip
|
|
installreq.force = force
|
|
installreq.major_release = major_release
|
|
installreq.commit_id = commit_id
|
|
installreq.encode()
|
|
self.socket_lock.acquire()
|
|
installreq.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
if async_req:
|
|
# async_req install requested, so return now
|
|
msg = "Host installation request sent to %s." % self.hosts[ip].hostname
|
|
msg_info += msg + "\n"
|
|
LOG.info("host-install async_req: %s", msg)
|
|
# TODO(bqian) update deploy state to deploy-host
|
|
|
|
# Now we wait, up to ten mins. future enhancement: Wait on a condition
|
|
resp_rx = False
|
|
max_time = time.time() + 600
|
|
# NOTE(bqian) loop below blocks REST API service (slow thread)
|
|
# Consider remove.
|
|
while time.time() < max_time:
|
|
self.hosts_lock.acquire()
|
|
if ip not in self.hosts:
|
|
# The host aged out while we were waiting
|
|
self.hosts_lock.release()
|
|
msg = "Agent expired while waiting: %s" % ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
break
|
|
|
|
if not self.hosts[ip].install_pending:
|
|
# We got a response
|
|
resp_rx = True
|
|
if self.hosts[ip].install_status:
|
|
msg = "Host installation was successful on %s." % self.hosts[ip].hostname
|
|
msg_info += msg + "\n"
|
|
LOG.info("host-install: %s", msg)
|
|
elif self.hosts[ip].install_reject_reason:
|
|
msg = "Host installation rejected by %s. %s" % (
|
|
self.hosts[ip].hostname,
|
|
self.hosts[ip].install_reject_reason)
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
else:
|
|
msg = "Host installation failed on %s." % self.hosts[ip].hostname
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
|
|
self.hosts_lock.release()
|
|
break
|
|
|
|
self.hosts_lock.release()
|
|
|
|
time.sleep(0.5)
|
|
|
|
if not resp_rx:
|
|
msg = "Timeout occurred while waiting response from %s." % ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def drop_host(self, host_ip, sync_nbr=True):
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
ip = host_ip
|
|
|
|
self.hosts_lock.acquire()
|
|
# If not in hosts table, maybe a hostname was used instead
|
|
if host_ip not in self.hosts:
|
|
try:
|
|
# Because the host may be getting dropped due to deletion,
|
|
# we may be unable to do a hostname lookup. Instead, we'll
|
|
# iterate through the table here.
|
|
for host in list(self.hosts):
|
|
if host_ip == self.hosts[host].hostname:
|
|
ip = host
|
|
break
|
|
|
|
if ip not in self.hosts:
|
|
# Translated successfully, but IP isn't in the table.
|
|
# Raise an exception to drop out to the failure handling
|
|
raise SoftwareError("Host IP (%s) not in table" % ip)
|
|
except Exception:
|
|
self.hosts_lock.release()
|
|
msg = "Unknown host specified: %s" % host_ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in drop-host: %s", msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
msg = "Running drop-host for %s (%s)" % (host_ip, ip)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
del self.hosts[ip]
|
|
for patch_id in list(self.interim_state):
|
|
if ip in self.interim_state[patch_id]:
|
|
self.interim_state[patch_id].remove(ip)
|
|
|
|
self.hosts_lock.release()
|
|
|
|
if sync_nbr:
|
|
sync_msg = PatchMessageDropHostReq()
|
|
sync_msg.ip = ip
|
|
self.socket_lock.acquire()
|
|
sync_msg.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def check_releases_state(self, release_ids, state):
|
|
"""check all releases to be in the specified state"""
|
|
all_matched = True
|
|
|
|
for release_id in release_ids:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is None:
|
|
all_matched = False
|
|
break
|
|
|
|
if release.state != state:
|
|
all_matched = False
|
|
break
|
|
return all_matched
|
|
|
|
def is_available(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.AVAILABLE)
|
|
|
|
def is_deployed(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.DEPLOYED)
|
|
|
|
def is_committed(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.COMMITTED)
|
|
|
|
# NOTE(bqian) report_app_dependencies function not being called?
|
|
# which means self.app_dependencies will always be empty and file
|
|
# app_dependency_filename will never exist?
|
|
def report_app_dependencies(self, patch_ids, **kwargs):
|
|
"""
|
|
Handle report of application dependencies
|
|
"""
|
|
if "app" not in kwargs:
|
|
raise ReleaseInvalidRequest
|
|
|
|
appname = kwargs.get("app")
|
|
|
|
LOG.info("Handling app dependencies report: app=%s, patch_ids=%s",
|
|
appname, ','.join(patch_ids))
|
|
|
|
if len(patch_ids) == 0:
|
|
if appname in self.app_dependencies:
|
|
del self.app_dependencies[appname]
|
|
else:
|
|
self.app_dependencies[appname] = patch_ids
|
|
|
|
try:
|
|
tmpfile, tmpfname = tempfile.mkstemp(
|
|
prefix=app_dependency_basename,
|
|
dir=constants.SOFTWARE_STORAGE_DIR)
|
|
|
|
os.write(tmpfile, json.dumps(self.app_dependencies).encode())
|
|
os.close(tmpfile)
|
|
|
|
os.rename(tmpfname, app_dependency_filename)
|
|
except Exception:
|
|
LOG.exception("Failed in report_app_dependencies")
|
|
raise SoftwareFail("Internal failure")
|
|
|
|
return True
|
|
|
|
# NOTE(bqian) unused function query_app_dependencies
|
|
def query_app_dependencies(self):
|
|
"""
|
|
Query application dependencies
|
|
"""
|
|
data = self.app_dependencies
|
|
|
|
return dict(data)
|
|
|
|
def deploy_host_list(self):
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
deploy = self.db_api_instance.get_deploy_all()
|
|
if not deploy:
|
|
return []
|
|
deploy = deploy[0]
|
|
|
|
deploy_host_list = []
|
|
for host in deploy_hosts:
|
|
state = host.get("state")
|
|
deploy_host = {
|
|
"hostname": host.get("hostname"),
|
|
"software_release": deploy.get("from_release"),
|
|
"target_release": deploy.get("to_release") if state else None,
|
|
"reboot_required": deploy.get("reboot_required") if state else None,
|
|
"host_state": state
|
|
}
|
|
deploy_host_list.append(deploy_host)
|
|
return deploy_host_list
|
|
|
|
def update_and_sync_deploy_state(self, func, *args, **kwargs):
|
|
"""
|
|
:param func: SoftwareApi method
|
|
:param args: arguments passed related to func
|
|
:param kwargs: keyword arguments passed related to func
|
|
|
|
Example:
|
|
-------
|
|
|
|
Usage of *args:
|
|
update_and_sync_deploy_state(self.db_api_instance.create_deploy,
|
|
release_version, to_release, bool)
|
|
Usage of **kwargs:
|
|
update_and_sync_deploy_state(self.db_api_instance.update_deploy_host,
|
|
hostname=hostname, state=state)
|
|
"""
|
|
|
|
func(*args, **kwargs)
|
|
self._update_state_to_peer()
|
|
|
|
def manage_software_alarm(self, alarm_id, alarm_state, entity_instance_id):
|
|
try:
|
|
if alarm_id not in constants.SOFTWARE_ALARMS:
|
|
raise Exception("Unknown software alarm '%s'." % alarm_id)
|
|
|
|
# deal with the alarm clear scenario
|
|
if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR:
|
|
LOG.info("Clearing alarm: %s for %s" % (alarm_id, entity_instance_id))
|
|
self.fm_api.clear_fault(alarm_id, entity_instance_id)
|
|
return
|
|
|
|
# if not clear alarm scenario, create the alarm
|
|
alarm_data = constants.SOFTWARE_ALARMS.get(alarm_id)
|
|
alarm = fm_api.Fault(
|
|
alarm_id=alarm_id,
|
|
alarm_state=alarm_state,
|
|
entity_type_id=alarm_data.get("entity_type_id"),
|
|
entity_instance_id=entity_instance_id,
|
|
severity=alarm_data.get("severity"),
|
|
reason_text=alarm_data.get("reason_text"),
|
|
alarm_type=alarm_data.get("alarm_type"),
|
|
probable_cause=alarm_data.get("probable_cause"),
|
|
proposed_repair_action=alarm_data.get("proposed_repair_action"),
|
|
service_affecting=alarm_data.get("service_affecting"),
|
|
)
|
|
LOG.info("Raising alarm: %s for %s" % (alarm_id, entity_instance_id))
|
|
self.fm_api.set_fault(alarm)
|
|
except Exception as e:
|
|
LOG.exception("Failed to manage alarm %s with action %s: %s" % (
|
|
alarm_id, alarm_state, str(e)
|
|
))
|
|
|
|
def handle_deploy_state_sync(self, alarm_instance_id):
|
|
"""
|
|
Handle the deploy state sync.
|
|
If deploy state is in sync, clear the alarm.
|
|
If not, raise the alarm.
|
|
"""
|
|
is_in_sync = is_deploy_state_in_sync()
|
|
|
|
# Deploy in sync state is not changed, no need to update the alarm
|
|
if is_in_sync == self.usm_alarm.get(constants.LAST_IN_SYNC):
|
|
return
|
|
|
|
try:
|
|
out_of_sync_alarm_fault = sc.fm_api.get_fault(
|
|
fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC, alarm_instance_id)
|
|
|
|
LOG.info("software.json in sync: %s", is_in_sync)
|
|
|
|
if out_of_sync_alarm_fault and is_in_sync:
|
|
# There was an out of sync alarm raised, but local software.json is in sync,
|
|
# we clear the alarm
|
|
LOG.info("Clearing alarm: %s ", out_of_sync_alarm_fault.alarm_id)
|
|
self.fm_api.clear_fault(
|
|
fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC,
|
|
alarm_instance_id)
|
|
|
|
# Deploy in sync state is changed, update the cache
|
|
self.usm_alarm[constants.LAST_IN_SYNC] = is_in_sync
|
|
|
|
elif (not out_of_sync_alarm_fault) and (not is_in_sync):
|
|
# There was no out of sync alarm raised, but local software.json is not in sync,
|
|
# we raise the alarm
|
|
LOG.info("Raising alarm: %s ",
|
|
fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC)
|
|
out_of_sync_fault = fm_api.Fault(
|
|
alarm_id=fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC,
|
|
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
|
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
|
entity_instance_id=alarm_instance_id,
|
|
severity=fm_constants.FM_ALARM_SEVERITY_MAJOR,
|
|
reason_text="Software deployment in progress",
|
|
alarm_type=fm_constants.FM_ALARM_TYPE_11,
|
|
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_65,
|
|
proposed_repair_action="Wait for deployment to complete",
|
|
service_affecting=False
|
|
)
|
|
|
|
self.fm_api.set_fault(out_of_sync_fault)
|
|
|
|
# Deploy in sync state is changed, update the cache
|
|
self.usm_alarm[constants.LAST_IN_SYNC] = is_in_sync
|
|
|
|
else:
|
|
# Shouldn't come to here
|
|
LOG.error("Unexpected case in handling deploy state sync. ")
|
|
|
|
except Exception as ex:
|
|
LOG.exception("Failed in handling deploy state sync. Error: %s" % str(ex))
|
|
|
|
def _get_software_upgrade(self):
|
|
"""
|
|
Get the current software upgrade from/to versions and state
|
|
:return: dict of from_release, to_release and state
|
|
"""
|
|
|
|
all_deploy = self.db_api_instance.get_deploy_all()
|
|
|
|
if not all_deploy:
|
|
return None
|
|
|
|
deploy = all_deploy[0]
|
|
from_maj_min_release = utils.get_major_release_version(deploy.get("from_release"))
|
|
to_maj_min_release = utils.get_major_release_version(deploy.get("to_release"))
|
|
state = deploy.get("state")
|
|
|
|
return {
|
|
"from_release": from_maj_min_release,
|
|
"to_release": to_maj_min_release,
|
|
"state": state
|
|
}
|
|
|
|
def check_upgrade_in_progress(self):
|
|
"""
|
|
Check if major release upgrade is in progress
|
|
"""
|
|
_upgrade_in_progress = False
|
|
upgrade_release = self._get_software_upgrade()
|
|
if not upgrade_release:
|
|
return _upgrade_in_progress
|
|
from_release = version.Version(upgrade_release["from_release"])
|
|
to_release = version.Version(upgrade_release["to_release"])
|
|
if (from_release.major != to_release.major) or (from_release.minor != to_release.minor):
|
|
_upgrade_in_progress = True
|
|
return _upgrade_in_progress
|
|
|
|
def get_software_upgrade(self):
|
|
return self._get_software_upgrade()
|
|
|
|
def get_all_software_host_upgrade(self):
|
|
"""
|
|
Get all software host upgrade from/to versions and state
|
|
:return: list of dict of hostname, current_sw_version, target_sw_version and host_state
|
|
"""
|
|
deploy = self._get_software_upgrade()
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
|
|
if deploy is None or deploy_hosts is None:
|
|
return None
|
|
|
|
from_maj_min_release = deploy.get("from_release")
|
|
to_maj_min_release = deploy.get("to_release")
|
|
|
|
all_host_upgrades = []
|
|
for deploy_host in deploy_hosts:
|
|
all_host_upgrades.append({
|
|
"hostname": deploy_host.get("hostname"),
|
|
"current_sw_version": to_maj_min_release if deploy_host.get(
|
|
"state") == states.DEPLOYED else from_maj_min_release,
|
|
"target_sw_version": to_maj_min_release,
|
|
"host_state": deploy_host.get("state")
|
|
})
|
|
|
|
return all_host_upgrades
|
|
|
|
def get_one_software_host_upgrade(self, hostname):
|
|
"""
|
|
Get the given software host upgrade from/to versions and state
|
|
:param hostname: hostname
|
|
:return: array of dict of hostname, current_sw_version, target_sw_version and host_state
|
|
"""
|
|
|
|
all_host_upgrades = self.get_all_software_host_upgrade()
|
|
|
|
if not all_host_upgrades:
|
|
return None
|
|
|
|
for host_upgrade in all_host_upgrades:
|
|
if host_upgrade.get("hostname") == hostname:
|
|
return [host_upgrade]
|
|
|
|
return None
|
|
|
|
|
|
class PatchControllerApiThread(threading.Thread):
|
|
def __init__(self):
|
|
threading.Thread.__init__(self)
|
|
self.wsgi = None
|
|
|
|
def run(self):
|
|
host = "127.0.0.1"
|
|
port = cfg.api_port
|
|
|
|
try:
|
|
# In order to support IPv6, server_class.address_family must be
|
|
# set to the correct address family. Because the unauthenticated
|
|
# API always uses IPv4 for the loopback address, the address_family
|
|
# variable cannot be set directly in the WSGIServer class, so a
|
|
# local subclass needs to be created for the call to make_server,
|
|
# where the correct address_family can be specified.
|
|
class server_class(simple_server.WSGIServer):
|
|
pass
|
|
|
|
server_class.address_family = socket.AF_INET
|
|
self.wsgi = simple_server.make_server(
|
|
host, port,
|
|
app.VersionSelectorApplication(),
|
|
server_class=server_class)
|
|
|
|
self.wsgi.socket.settimeout(api_socket_timeout)
|
|
global keep_running
|
|
while keep_running:
|
|
self.wsgi.handle_request()
|
|
|
|
# Call garbage collect after wsgi request is handled,
|
|
# to ensure any open file handles are closed in the case
|
|
# of an upload.
|
|
gc.collect()
|
|
except Exception:
|
|
# Log all exceptions
|
|
LOG.exception("Error occurred during request processing")
|
|
|
|
global thread_death
|
|
thread_death.set()
|
|
|
|
def kill(self):
|
|
# Must run from other thread
|
|
if self.wsgi is not None:
|
|
self.wsgi.shutdown()
|
|
|
|
|
|
class PatchControllerAuthApiThread(threading.Thread):
|
|
def __init__(self, port):
|
|
threading.Thread.__init__(self)
|
|
# LOG.info ("Initializing Authenticated API thread")
|
|
self.wsgi = None
|
|
self.port = port
|
|
|
|
def run(self):
|
|
host = CONF.auth_api_bind_ip
|
|
if host is None:
|
|
host = utils.get_versioned_address_all()
|
|
try:
|
|
# Can only launch authenticated server post-config
|
|
while not os.path.exists('/etc/platform/.initial_config_complete'):
|
|
time.sleep(5)
|
|
|
|
# In order to support IPv6, server_class.address_family must be
|
|
# set to the correct address family. Because the unauthenticated
|
|
# API always uses IPv4 for the loopback address, the address_family
|
|
# variable cannot be set directly in the WSGIServer class, so a
|
|
# local subclass needs to be created for the call to make_server,
|
|
# where the correct address_family can be specified.
|
|
class server_class(simple_server.WSGIServer):
|
|
pass
|
|
|
|
server_class.address_family = utils.get_management_family()
|
|
self.wsgi = simple_server.make_server(
|
|
host, self.port,
|
|
auth_app.VersionSelectorApplication(),
|
|
server_class=server_class)
|
|
|
|
# self.wsgi.serve_forever()
|
|
self.wsgi.socket.settimeout(api_socket_timeout)
|
|
|
|
global keep_running
|
|
while keep_running:
|
|
self.wsgi.handle_request()
|
|
|
|
# Call garbage collect after wsgi request is handled,
|
|
# to ensure any open file handles are closed in the case
|
|
# of an upload.
|
|
gc.collect()
|
|
except Exception:
|
|
# Log all exceptions
|
|
LOG.exception("Authorized API failure: Error occurred during request processing")
|
|
|
|
def kill(self):
|
|
# Must run from other thread
|
|
if self.wsgi is not None:
|
|
self.wsgi.shutdown()
|
|
|
|
|
|
class PatchControllerMainThread(threading.Thread):
|
|
def __init__(self):
|
|
threading.Thread.__init__(self)
|
|
# LOG.info ("Initializing Main thread")
|
|
|
|
def run(self):
|
|
global sc
|
|
global thread_death
|
|
|
|
# Send periodic messages to the agents
|
|
# We only can use one inverval
|
|
SEND_MSG_INTERVAL_IN_SECONDS = 30.0
|
|
|
|
alarm_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
|
|
sc.standby_controller)
|
|
|
|
try:
|
|
# Update the out of sync alarm cache when the thread starts
|
|
out_of_sync_alarm_fault = sc.fm_api.get_fault(
|
|
fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC, alarm_instance_id)
|
|
sc.usm_alarm[constants.LAST_IN_SYNC] = not out_of_sync_alarm_fault
|
|
|
|
sock_in = sc.setup_socket()
|
|
|
|
while sock_in is None:
|
|
# Check every thirty seconds?
|
|
# Once we've got a conf file, tied into packstack,
|
|
# we'll get restarted when the file is updated,
|
|
# and this should be unnecessary.
|
|
time.sleep(30)
|
|
sock_in = sc.setup_socket()
|
|
|
|
# Ok, now we've got our socket. Let's start with a hello!
|
|
sc.socket_lock.acquire()
|
|
|
|
hello = PatchMessageHello()
|
|
hello.send(sc.sock_out)
|
|
|
|
hello_agent = PatchMessageHelloAgent()
|
|
hello_agent.send(sc.sock_out)
|
|
|
|
sc.socket_lock.release()
|
|
|
|
# Send hello every thirty seconds
|
|
hello_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
# Send deploy state update every thirty seconds
|
|
deploy_state_update_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
remaining = int(SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
agent_query_conns = []
|
|
|
|
while True:
|
|
# Check to see if any other thread has died
|
|
if thread_death.is_set():
|
|
LOG.info("Detected thread death. Terminating")
|
|
return
|
|
|
|
# Check for in-service patch restart flag
|
|
if os.path.exists(insvc_patch_restart_controller):
|
|
LOG.info("In-service patch restart flag detected. Exiting.")
|
|
global keep_running
|
|
keep_running = False
|
|
os.remove(insvc_patch_restart_controller)
|
|
return
|
|
|
|
inputs = [sc.sock_in] + agent_query_conns
|
|
outputs = []
|
|
|
|
rlist, wlist, xlist = select.select(
|
|
inputs, outputs, inputs, SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
if (len(rlist) == 0 and
|
|
len(wlist) == 0 and
|
|
len(xlist) == 0):
|
|
# Timeout hit
|
|
sc.audit_socket()
|
|
|
|
for s in rlist:
|
|
data = ''
|
|
addr = None
|
|
msg = None
|
|
|
|
if s == sc.sock_in:
|
|
# Receive from UDP
|
|
sc.socket_lock.acquire()
|
|
data, addr = s.recvfrom(1024)
|
|
sc.socket_lock.release()
|
|
else:
|
|
# Receive from TCP
|
|
while True:
|
|
try:
|
|
packet = s.recv(1024)
|
|
except socket.error:
|
|
LOG.exception("Socket error on recv")
|
|
data = ''
|
|
break
|
|
|
|
if packet:
|
|
data += packet.decode()
|
|
|
|
if data == '':
|
|
break
|
|
try:
|
|
json.loads(data)
|
|
break
|
|
except ValueError:
|
|
# Message is incomplete
|
|
continue
|
|
else:
|
|
LOG.info('End of TCP message received')
|
|
break
|
|
|
|
if data == '':
|
|
# Connection dropped
|
|
agent_query_conns.remove(s)
|
|
s.close()
|
|
continue
|
|
|
|
# Get the TCP endpoint address
|
|
addr = s.getpeername()
|
|
|
|
msgdata = json.loads(data)
|
|
|
|
# For now, discard any messages that are not msgversion==1
|
|
if 'msgversion' in msgdata and msgdata['msgversion'] != 1:
|
|
continue
|
|
|
|
if 'msgtype' in msgdata:
|
|
if msgdata['msgtype'] == messages.PATCHMSG_HELLO:
|
|
msg = PatchMessageHello()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_HELLO_ACK:
|
|
msg = PatchMessageHelloAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_SYNC_REQ:
|
|
msg = PatchMessageSyncReq()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_SYNC_COMPLETE:
|
|
msg = PatchMessageSyncComplete()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_HELLO_AGENT_ACK:
|
|
msg = PatchMessageHelloAgentAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_QUERY_DETAILED_RESP:
|
|
msg = PatchMessageQueryDetailedResp()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_AGENT_INSTALL_RESP:
|
|
msg = PatchMessageAgentInstallResp()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DROP_HOST_REQ:
|
|
msg = PatchMessageDropHostReq()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_UPDATE_ACK:
|
|
msg = SoftwareMessageDeployStateUpdateAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_CHANGED:
|
|
msg = SWMessageDeployStateChanged()
|
|
|
|
if msg is None:
|
|
msg = messages.PatchMessage()
|
|
|
|
msg.decode(msgdata)
|
|
if s == sc.sock_in:
|
|
msg.handle(sc.sock_out, addr)
|
|
else:
|
|
msg.handle(s, addr)
|
|
|
|
# We can drop the connection after a query response
|
|
if msg.msgtype == messages.PATCHMSG_QUERY_DETAILED_RESP and s != sc.sock_in:
|
|
agent_query_conns.remove(s)
|
|
s.shutdown(socket.SHUT_RDWR)
|
|
s.close()
|
|
|
|
while len(stale_hosts) > 0 and len(agent_query_conns) <= 5:
|
|
ip = stale_hosts.pop()
|
|
try:
|
|
agent_sock = socket.create_connection((ip, cfg.agent_port))
|
|
query = PatchMessageQueryDetailed()
|
|
query.send(agent_sock)
|
|
agent_query_conns.append(agent_sock)
|
|
except Exception:
|
|
# Put it back on the list
|
|
stale_hosts.append(ip)
|
|
|
|
remaining = int(hello_timeout - time.time())
|
|
if remaining <= 0 or remaining > int(SEND_MSG_INTERVAL_IN_SECONDS):
|
|
hello_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
remaining = int(SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
sc.socket_lock.acquire()
|
|
|
|
hello = PatchMessageHello()
|
|
hello.send(sc.sock_out)
|
|
|
|
hello_agent = PatchMessageHelloAgent()
|
|
hello_agent.send(sc.sock_out)
|
|
|
|
sc.socket_lock.release()
|
|
|
|
# Age out neighbours
|
|
sc.controller_neighbours_lock.acquire()
|
|
nbrs = list(sc.controller_neighbours)
|
|
for n in nbrs:
|
|
# Age out controllers after 2 minutes
|
|
if sc.controller_neighbours[n].get_age() >= 120:
|
|
LOG.info("Aging out controller %s from table", n)
|
|
del sc.controller_neighbours[n]
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
sc.hosts_lock.acquire()
|
|
nbrs = list(sc.hosts)
|
|
for n in nbrs:
|
|
# Age out hosts after 1 hour
|
|
if sc.hosts[n].get_age() >= 3600:
|
|
LOG.info("Aging out host %s from table", n)
|
|
del sc.hosts[n]
|
|
for patch_id in list(sc.interim_state):
|
|
if n in sc.interim_state[patch_id]:
|
|
sc.interim_state[patch_id].remove(n)
|
|
|
|
sc.hosts_lock.release()
|
|
|
|
deploy_state_update_remaining = int(deploy_state_update_timeout - time.time())
|
|
# Only send the deploy state update from the active controller
|
|
if deploy_state_update_remaining <= 0 or deploy_state_update_remaining > int(
|
|
SEND_MSG_INTERVAL_IN_SECONDS):
|
|
deploy_state_update_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
deploy_state_update_remaining = int(
|
|
SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
# Only send the deploy state update from the active controller
|
|
if is_deployment_in_progress() and utils.is_active_controller():
|
|
try:
|
|
sc.socket_lock.acquire()
|
|
deploy_state_update = SoftwareMessageDeployStateUpdate()
|
|
deploy_state_update.send(sc.sock_out)
|
|
sc.handle_deploy_state_sync(alarm_instance_id)
|
|
except Exception as e:
|
|
LOG.exception("Failed to send deploy state update. Error: %s", str(e))
|
|
finally:
|
|
sc.socket_lock.release()
|
|
except Exception:
|
|
# Log all exceptions
|
|
LOG.exception("Error occurred during request processing")
|
|
thread_death.set()
|
|
|
|
|
|
def main():
|
|
# The following call to CONF is to ensure the oslo config
|
|
# has been called to specify a valid config dir.
|
|
# Otherwise oslo_policy will fail when it looks for its files.
|
|
CONF(
|
|
(), # Required to load an anonymous configuration
|
|
default_config_files=['/etc/software/software.conf', ]
|
|
)
|
|
|
|
configure_logging()
|
|
|
|
cfg.read_config()
|
|
|
|
# daemon.pidlockfile.write_pid_to_pidfile(pidfile_path)
|
|
|
|
global thread_death
|
|
thread_death = threading.Event()
|
|
|
|
# Set the TMPDIR environment variable to /scratch so that any modules
|
|
# that create directories with tempfile will not use /tmp
|
|
os.environ['TMPDIR'] = '/scratch'
|
|
|
|
global sc
|
|
sc = PatchController()
|
|
|
|
LOG.info("launching")
|
|
api_thread = PatchControllerApiThread()
|
|
auth_api_thread = PatchControllerAuthApiThread(CONF.auth_api_port)
|
|
auth_api_alt_thread = PatchControllerAuthApiThread(CONF.auth_api_alt_port)
|
|
main_thread = PatchControllerMainThread()
|
|
|
|
api_thread.start()
|
|
auth_api_thread.start()
|
|
auth_api_alt_thread.start()
|
|
main_thread.start()
|
|
|
|
thread_death.wait()
|
|
global keep_running
|
|
keep_running = False
|
|
|
|
api_thread.join()
|
|
auth_api_thread.join()
|
|
auth_api_alt_thread.join()
|
|
main_thread.join()
|