
It has been noted on heavy load test conditions that lsof can hang for a considerable time and cause timeouts on the RabbitMQ stop path triggered from Service Manager on a swact scenario. To avoid that, both netstat or ss commands could be used to check for listening process on the amqp port (5672). The ss command has been chosen since man page of netstat mark it as obsolete and points ss as replacement for the major part of it. Also, note that ss uses Netlink which uses socket API. Closes-Bug: 2018346 Test Plan: PASS: Verify, using ss, the listening amqp socket PASS: Verify AIO-DX is properly deployed PASS: Restart RabbitMQ service successfully using sm-restart PASS: Swact successfully on DX system PASS: Lock/unlock successfully Change-Id: I929b2a1b7a61eb70154c00177aa0b7f2fc46890a Signed-off-by: Adriano Oliveira <adriano.oliveira@windriver.com>
443 lines
13 KiB
Bash
443 lines
13 KiB
Bash
#!/bin/sh
|
|
## The contents of this file are subject to the Mozilla Public License
|
|
## Version 1.1 (the "License"); you may not use this file except in
|
|
## compliance with the License. You may obtain a copy of the License
|
|
## at http://www.mozilla.org/MPL/
|
|
##
|
|
## Software distributed under the License is distributed on an "AS IS"
|
|
## basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
|
|
## the License for the specific language governing rights and
|
|
## limitations under the License.
|
|
##
|
|
## The Original Code is RabbitMQ.
|
|
##
|
|
## The Initial Developer of the Original Code is VMware, Inc.
|
|
## Copyright (c) 2007-2013 VMware, Inc. All rights reserved.
|
|
##
|
|
|
|
##
|
|
## OCF Resource Agent compliant rabbitmq-server resource script.
|
|
##
|
|
|
|
## OCF instance parameters
|
|
## OCF_RESKEY_server
|
|
## OCF_RESKEY_ctl
|
|
## OCF_RESKEY_nodename
|
|
## OCF_RESKEY_ip
|
|
## OCF_RESKEY_port
|
|
## OCF_RESKEY_config_file
|
|
## OCF_RESKEY_log_base
|
|
## OCF_RESKEY_mnesia_base
|
|
## OCF_RESKEY_server_start_args
|
|
## OCF_RESKEY_pid_file
|
|
## WRS
|
|
# OCF_RESKEY_env_config_file
|
|
# OCF_RESKEY_dist_port
|
|
|
|
#######################################################################
|
|
# Initialization:
|
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
|
|
. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs
|
|
|
|
#######################################################################
|
|
|
|
. /etc/platform/platform.conf
|
|
|
|
OCF_RESKEY_server_default="/usr/sbin/rabbitmq-server"
|
|
OCF_RESKEY_ctl_default="/usr/sbin/rabbitmqctl"
|
|
OCF_RESKEY_nodename_default="rabbit@localhost"
|
|
OCF_RESKEY_log_base_default="/var/log/rabbitmq"
|
|
OCF_RESKEY_pid_file_default="/var/run/rabbitmq/pid"
|
|
: ${OCF_RESKEY_server=${OCF_RESKEY_server_default}}
|
|
: ${OCF_RESKEY_ctl=${OCF_RESKEY_ctl_default}}
|
|
: ${OCF_RESKEY_nodename=${OCF_RESKEY_nodename_default}}
|
|
: ${OCF_RESKEY_log_base=${OCF_RESKEY_log_base_default}}
|
|
: ${OCF_RESKEY_pid_file=${OCF_RESKEY_pid_file_default}}
|
|
|
|
meta_data() {
|
|
cat <<END
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
<resource-agent name="rabbitmq-server">
|
|
<version>1.0</version>
|
|
|
|
<longdesc lang="en">
|
|
Resource agent for RabbitMQ-server
|
|
</longdesc>
|
|
|
|
<shortdesc lang="en">Resource agent for RabbitMQ-server</shortdesc>
|
|
|
|
<parameters>
|
|
<parameter name="server" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The path to the rabbitmq-server script
|
|
</longdesc>
|
|
<shortdesc lang="en">Path to rabbitmq-server</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_server_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="ctl" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The path to the rabbitmqctl script
|
|
</longdesc>
|
|
<shortdesc lang="en">Path to rabbitmqctl</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_ctl_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="nodename" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The node name for rabbitmq-server
|
|
</longdesc>
|
|
<shortdesc lang="en">Node name</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_nodename_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="ip" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The IP address for rabbitmq-server to listen on
|
|
</longdesc>
|
|
<shortdesc lang="en">IP Address</shortdesc>
|
|
<content type="string" default="" />
|
|
</parameter>
|
|
|
|
<parameter name="port" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The IP Port for rabbitmq-server to listen on
|
|
</longdesc>
|
|
<shortdesc lang="en">IP Port</shortdesc>
|
|
<content type="integer" default="" />
|
|
</parameter>
|
|
|
|
<parameter name="config_file" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the config file (without the .config suffix)
|
|
</longdesc>
|
|
<shortdesc lang="en">Config file path (without the .config suffix)</shortdesc>
|
|
<content type="string" default="" />
|
|
</parameter>
|
|
|
|
<parameter name="log_base" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the directory under which logs will be created
|
|
</longdesc>
|
|
<shortdesc lang="en">Log base path</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_log_base_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="mnesia_base" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the directory under which mnesia will store data
|
|
</longdesc>
|
|
<shortdesc lang="en">Mnesia base path</shortdesc>
|
|
<content type="string" default="" />
|
|
</parameter>
|
|
|
|
<parameter name="server_start_args" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Additional arguments provided to the server on startup
|
|
</longdesc>
|
|
<shortdesc lang="en">Server start arguments</shortdesc>
|
|
<content type="string" default="" />
|
|
</parameter>
|
|
|
|
<parameter name="pid_file" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the file in which the pid will be stored
|
|
</longdesc>
|
|
<shortdesc lang="en">Pid file path</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_pid_file_default}" />
|
|
</parameter>
|
|
|
|
</parameters>
|
|
|
|
<actions>
|
|
<action name="start" timeout="600" />
|
|
<action name="stop" timeout="120" />
|
|
<action name="status" timeout="20" interval="10" />
|
|
<action name="monitor" timeout="20" interval="10" />
|
|
<action name="validate-all" timeout="30" />
|
|
<action name="meta-data" timeout="5" />
|
|
</actions>
|
|
</resource-agent>
|
|
END
|
|
}
|
|
|
|
rabbit_usage() {
|
|
cat <<END
|
|
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
|
|
|
|
Expects to have a fully populated OCF RA-compliant environment set.
|
|
END
|
|
}
|
|
|
|
RABBITMQ_SERVER=$OCF_RESKEY_server
|
|
RABBITMQ_CTL=$OCF_RESKEY_ctl
|
|
RABBITMQ_NODENAME=$OCF_RESKEY_nodename
|
|
RABBITMQ_NODE_IP_ADDRESS=$OCF_RESKEY_ip
|
|
RABBITMQ_NODE_PORT=$OCF_RESKEY_port
|
|
RABBITMQ_CONFIG_FILE=$OCF_RESKEY_config_file
|
|
RABBITMQ_CONF_ENV_FILE=$OCF_RESKEY_env_config_file
|
|
RABBITMQ_DIST_PORT=$OCF_RESKEY_dist_port
|
|
RABBITMQ_LOG_BASE=$OCF_RESKEY_log_base
|
|
RABBITMQ_MNESIA_BASE=$OCF_RESKEY_mnesia_base
|
|
RABBITMQ_SERVER_START_ARGS=$OCF_RESKEY_server_start_args
|
|
RABBITMQ_PID_FILE=$OCF_RESKEY_pid_file
|
|
[ ! -z $RABBITMQ_NODENAME ] && NODENAME_ARG="-n $RABBITMQ_NODENAME"
|
|
[ ! -z $RABBITMQ_NODENAME ] && export RABBITMQ_NODENAME
|
|
|
|
#
|
|
# Make sure a HOME directory is set and exported for rabbitmqctl
|
|
# to work, otherwise an error "erlexec: HOME must be set" will
|
|
# result. Erlang exec requires a HOME directory to be set.
|
|
# Rabbit-Server will source a different directory from the config
|
|
# file.
|
|
#
|
|
HOME=/tmp
|
|
export HOME
|
|
|
|
ensure_pid_dir () {
|
|
PID_DIR=`dirname ${RABBITMQ_PID_FILE}`
|
|
if [ ! -d ${PID_DIR} ] ; then
|
|
mkdir -p ${PID_DIR}
|
|
chown -R rabbitmq:rabbitmq ${PID_DIR}
|
|
chmod 755 ${PID_DIR}
|
|
fi
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
remove_pid () {
|
|
rm -f ${RABBITMQ_PID_FILE}
|
|
rmdir `dirname ${RABBITMQ_PID_FILE}` || :
|
|
}
|
|
|
|
export_vars() {
|
|
[ ! -z $RABBITMQ_NODE_IP_ADDRESS ] && export RABBITMQ_NODE_IP_ADDRESS
|
|
[ ! -z $RABBITMQ_NODE_PORT ] && export RABBITMQ_NODE_PORT
|
|
[ ! -z $RABBITMQ_DIST_PORT ] && export RABBITMQ_DIST_PORT
|
|
[ ! -z $RABBITMQ_CONFIG_FILE ] && export RABBITMQ_CONFIG_FILE
|
|
[ ! -z $RABBITMQ_CONF_ENV_FILE ] && export RABBITMQ_CONF_ENV_FILE
|
|
[ ! -z $RABBITMQ_LOG_BASE ] && export RABBITMQ_LOG_BASE
|
|
[ ! -z $RABBITMQ_MNESIA_BASE ] && export RABBITMQ_MNESIA_BASE
|
|
[ ! -z $RABBITMQ_SERVER_START_ARGS ] && export RABBITMQ_SERVER_START_ARGS
|
|
[ ! -z $RABBITMQ_PID_FILE ] && ensure_pid_dir && export RABBITMQ_PID_FILE
|
|
}
|
|
|
|
rabbit_validate_partial() {
|
|
if [ ! -x $RABBITMQ_SERVER ]; then
|
|
ocf_log err "rabbitmq-server server $RABBITMQ_SERVER does not exist or is not executable";
|
|
exit $OCF_ERR_INSTALLED;
|
|
fi
|
|
|
|
if [ ! -x $RABBITMQ_CTL ]; then
|
|
ocf_log err "rabbitmq-server ctl $RABBITMQ_CTL does not exist or is not executable";
|
|
exit $OCF_ERR_INSTALLED;
|
|
fi
|
|
}
|
|
|
|
rabbit_validate_full() {
|
|
if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e "${RABBITMQ_CONFIG_FILE}.config" ]; then
|
|
ocf_log err "rabbitmq-server config_file ${RABBITMQ_CONFIG_FILE}.config does not exist or is not a file";
|
|
exit $OCF_ERR_INSTALLED;
|
|
fi
|
|
|
|
if [ ! -z $RABBITMQ_LOG_BASE ] && [ ! -d $RABBITMQ_LOG_BASE ]; then
|
|
ocf_log err "rabbitmq-server log_base $RABBITMQ_LOG_BASE does not exist or is not a directory";
|
|
exit $OCF_ERR_INSTALLED;
|
|
fi
|
|
|
|
if [ ! -z $RABBITMQ_MNESIA_BASE ] && [ ! -d $RABBITMQ_MNESIA_BASE ]; then
|
|
ocf_log err "rabbitmq-server mnesia_base $RABBITMQ_MNESIA_BASE does not exist or is not a directory";
|
|
exit $OCF_ERR_INSTALLED;
|
|
fi
|
|
|
|
rabbit_validate_partial
|
|
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
rabbit_status() {
|
|
# The rabbitmqctl command requires the erlang cookie to be available or it
|
|
# crashes. If we are on the standby controller, the rabbit filesystem
|
|
# (and the cookie) are not available, so lets fail gracefully here if the
|
|
# rabbit database directory is not visible.
|
|
if [ ! -z $RABBITMQ_MNESIA_BASE ] && [ ! -d $RABBITMQ_MNESIA_BASE ]; then
|
|
ocf_log debug "Not checking status because rabbitmq-server mnesia_base $RABBITMQ_MNESIA_BASE does not exist or is not a directory";
|
|
exit $OCF_NOT_RUNNING;
|
|
fi
|
|
|
|
rabbitmqctl_action "status"
|
|
}
|
|
|
|
rabbit_wait() {
|
|
rabbitmqctl_action "wait" "--timeout" "85" $1
|
|
}
|
|
|
|
rabbitmqctl_action() {
|
|
local rc
|
|
local action
|
|
action=$@
|
|
$RABBITMQ_CTL $NODENAME_ARG $action > /dev/null 2> /dev/null
|
|
rc=$?
|
|
case "$rc" in
|
|
0)
|
|
ocf_log debug "RabbitMQ server is running normally"
|
|
return $OCF_SUCCESS
|
|
;;
|
|
# Error code 69 is returned as failed to connect to node and added as a
|
|
# not running case for proper handling on status command
|
|
# TODO: this should be revisited at some point to try not to be specific
|
|
# but generic for any non-zero return to be handled consistently
|
|
2|69)
|
|
ocf_log debug "RabbitMQ server is not running: $rc"
|
|
return $OCF_NOT_RUNNING
|
|
;;
|
|
*)
|
|
ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG $action: $rc"
|
|
exit $OCF_ERR_GENERIC
|
|
esac
|
|
}
|
|
|
|
rabbit_start() {
|
|
local rc
|
|
|
|
if rabbit_status; then
|
|
ocf_log info "Resource already running."
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
export_vars
|
|
|
|
# Increase the maximum number of file descriptors that can be open at
|
|
# once - required for large systems.
|
|
ulimit -n 8192
|
|
if [ "${system_type}" = "All-in-one" ]; then
|
|
# Rabbit/beam related tasks should be on platform cores from the get go.
|
|
# If they are affined to all cores during initialization sequence of AIO,
|
|
# the system will end up with many extra beam threads that are not in use.
|
|
source /etc/init.d/cpumap_functions.sh
|
|
PLATFORM_CPULIST=$(platform_expanded_cpu_list)
|
|
PLATFORM_CPUS=$(get_platform_cpus)
|
|
# Calculate thread pool size based on PLATFORM_CPUS
|
|
# Refer to: https://github.com/rabbitmq/rabbitmq-common/commit/4f9ef33cf9ba52197ff210ffcdf6629c1b7a6e9e
|
|
RABBITMQ_IO_THREAD_POOL_SIZE=$((${PLATFORM_CPUS} * 16))
|
|
if [ ${RABBITMQ_IO_THREAD_POOL_SIZE} -lt 64 ]; then
|
|
RABBITMQ_IO_THREAD_POOL_SIZE=64
|
|
elif [ ${RABBITMQ_IO_THREAD_POOL_SIZE} -gt 1024 ]; then
|
|
RABBITMQ_IO_THREAD_POOL_SIZE=1024
|
|
fi
|
|
export RABBITMQ_IO_THREAD_POOL_SIZE
|
|
setsid sh -c "exec taskset -c ${PLATFORM_CPULIST} $RABBITMQ_SERVER >> ${RABBITMQ_LOG_BASE}/startup_log 2>> ${RABBITMQ_LOG_BASE}/startup_err" &
|
|
else
|
|
setsid sh -c "$RABBITMQ_SERVER >> ${RABBITMQ_LOG_BASE}/startup_log 2>> ${RABBITMQ_LOG_BASE}/startup_err" &
|
|
fi
|
|
|
|
# Wait for the server to come up.
|
|
# Let the CRM/LRM time us out if required
|
|
rabbit_wait $RABBITMQ_PID_FILE
|
|
rc=$?
|
|
if [ "$rc" != $OCF_SUCCESS ]; then
|
|
remove_pid
|
|
ocf_log info "rabbitmq-server start failed: $rc"
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
rabbit_stop() {
|
|
local rc
|
|
|
|
if ! rabbit_status; then
|
|
ocf_log info "Resource not running."
|
|
|
|
# On rare occasions, the status check could indicate rabbitmq is not running,
|
|
# but there could be partial service. As such, ignore this case and just fall
|
|
# through to continue with the stop
|
|
#
|
|
# return $OCF_SUCCESS
|
|
fi
|
|
|
|
if [ -f "$RABBITMQ_PID_FILE" ]; then
|
|
$RABBITMQ_CTL stop "$RABBITMQ_PID_FILE"
|
|
else
|
|
$RABBITMQ_CTL stop
|
|
fi
|
|
rc=$?
|
|
|
|
if [ "$rc" != 0 ]; then
|
|
ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_CTL stop, $rc"
|
|
fi
|
|
|
|
process_info=$(ss -ntlp | grep -w 5672 | awk '{print $6}' | sed 1q)
|
|
|
|
if [ ! -z "${process_info}" ]; then
|
|
ocf_log err "rabbitmq-server stop command executed: '$RABBITMQ_CTL stop $RABBITMQ_PID_FILE', but port is still in use by ${process_info}."
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
# Spin waiting for the server to shut down.
|
|
# Let the CRM/LRM time us out if required
|
|
stop_wait=1
|
|
while [ $stop_wait = 1 ]; do
|
|
rabbit_status
|
|
rc=$?
|
|
if [ "$rc" = $OCF_NOT_RUNNING ]; then
|
|
remove_pid
|
|
stop_wait=0
|
|
break
|
|
elif [ "$rc" != $OCF_SUCCESS ]; then
|
|
ocf_log info "rabbitmq-server stop failed: $rc"
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
rabbit_monitor() {
|
|
rabbit_status
|
|
return $?
|
|
}
|
|
|
|
case $__OCF_ACTION in
|
|
meta-data)
|
|
meta_data
|
|
exit $OCF_SUCCESS
|
|
;;
|
|
usage|help)
|
|
rabbit_usage
|
|
exit $OCF_SUCCESS
|
|
;;
|
|
esac
|
|
|
|
if ocf_is_probe; then
|
|
rabbit_validate_partial
|
|
else
|
|
rabbit_validate_full
|
|
fi
|
|
|
|
export_vars
|
|
|
|
case $__OCF_ACTION in
|
|
start)
|
|
rabbit_start
|
|
;;
|
|
stop)
|
|
rabbit_stop
|
|
;;
|
|
status|monitor)
|
|
rabbit_monitor
|
|
;;
|
|
validate-all)
|
|
exit $OCF_SUCCESS
|
|
;;
|
|
*)
|
|
rabbit_usage
|
|
exit $OCF_ERR_UNIMPLEMENTED
|
|
;;
|
|
esac
|
|
|
|
exit $?
|