
Update collect to save kubelet information including state files and config data from /var/lib/kubelet, and query pods with Pod Disruption Budget. Test plan: AIO-SX: PASS: Verify collect has var/lib/kubelet contents PASS: Verify collect containerization_kube.info contains PDB Closes-Bug: #2091674 Implements: stx utilities tools Change-Id: Ib025696f0832d557614b2d20f735fa102460bde8 Signed-off-by: wguy <wey-yi.guy@windriver.com>
526 lines
17 KiB
Bash
Executable File
526 lines
17 KiB
Bash
Executable File
#! /bin/bash
|
|
########################################################################
|
|
#
|
|
# Copyright (c) 2016-2022, 2024 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
########################################################################
|
|
|
|
# make these platform.conf variables global.
|
|
# values are loaded in source_openrc_if_needed.
|
|
export nodetype=""
|
|
export subfunction=""
|
|
export system_type=""
|
|
export security_profile=""
|
|
export sdn_enabled=""
|
|
export region_config=""
|
|
export vswitch_type=""
|
|
export system_mode=""
|
|
export sw_version=""
|
|
|
|
# assume this is not the active controller until learned
|
|
export ACTIVE=false
|
|
|
|
#
|
|
# Import commands, variables and convenience functions available to
|
|
# all collectors ; common and user defined.
|
|
#
|
|
source /usr/local/sbin/collect_utils
|
|
source_openrc_if_needed
|
|
|
|
#
|
|
# parse input parameters
|
|
#
|
|
COLLECT_NAME="${1}"
|
|
DEBUG=${8}
|
|
INVENTORY=${9}
|
|
set_debug_mode ${DEBUG}
|
|
expect_debug=$([ "${DEBUG}" = true ] && echo "-d" || echo "")
|
|
|
|
# Calling parms
|
|
#
|
|
# 1 = collect name
|
|
# 2 = start date option
|
|
# 3 = start date
|
|
# 4 = "any" (ignored - no longer used ; kept to support upgrades/downgrades)
|
|
# 5 = end date option
|
|
# 6 = end date
|
|
# 7 = "any" (ignored - no longer used ; kept to support upgrades/downgrades)
|
|
# 8 = debug mode
|
|
# 9 = inventory
|
|
logger -t ${COLLECT_TAG} "${0} ${1} ${2} ${3} ${4} ${5} ${6} ${7} ${8} ${9}"
|
|
|
|
# parse out the start data/time data if it is present
|
|
STARTDATE_RANGE=false
|
|
STARTDATE="any"
|
|
if [ "${2}" == "${STARTDATE_OPTION}" ] ; then
|
|
if [ "${3}" != "any" -a ${#3} -gt 7 ] ; then
|
|
STARTDATE_RANGE=true
|
|
STARTDATE="${3}"
|
|
fi
|
|
fi
|
|
|
|
# parse out the end date/time if it is present
|
|
ENDDATE_RANGE=false
|
|
ENDDATE="any"
|
|
if [ "${5}" == "${ENDDATE_OPTION}" ] ; then
|
|
if [ "${6}" != "any" -a ${#6} -gt 7 ] ; then
|
|
ENDDATE_RANGE=true
|
|
ENDDATE="${6}"
|
|
fi
|
|
fi
|
|
|
|
COLLECT_BASE_DIR="/scratch"
|
|
EXTRA="var/extra"
|
|
COLLECT_NAME_DIR="${COLLECT_BASE_DIR}/${COLLECT_NAME}"
|
|
EXTRA_DIR="${COLLECT_NAME_DIR}/${EXTRA}"
|
|
TARBALL="${COLLECT_NAME_DIR}.tgz"
|
|
COLLECT_PATH="/etc/collect.d"
|
|
RUN_EXCLUDE="/etc/collect/run.exclude"
|
|
ETC_EXCLUDE="/etc/collect/etc.exclude"
|
|
VAR_LOG_EXCLUDE="/etc/collect/varlog.exclude"
|
|
COLLECT_INCLUDE="/var/run /etc /root"
|
|
FLIGHT_RECORDER_PATH="var/lib/sm/"
|
|
FLIGHT_RECORDER_FILE="sm.eru.v1"
|
|
VAR_LOG_INCLUDE_LIST="/tmp/${COLLECT_NAME}.lst"
|
|
COLLECT_DIR_USAGE_CMD="df -h ${COLLECT_BASE_DIR}"
|
|
COLLECT_DATE="/usr/local/sbin/collect_date"
|
|
COLLECT_SYSINV="${COLLECT_PATH}/collect_sysinv"
|
|
|
|
rm -f ${COLLECT_CMD_TIMING_LOG}
|
|
|
|
function log_space()
|
|
{
|
|
local msg=${1}
|
|
|
|
space="`${COLLECT_DIR_USAGE_CMD}`"
|
|
space1=`echo "${space}" | grep -v Filesystem`
|
|
ilog "${COLLECT_BASE_DIR} ${msg} ${space1}"
|
|
}
|
|
|
|
space_precheck ${HOSTNAME} ${COLLECT_BASE_DIR}
|
|
|
|
CURR_DIR=`pwd`
|
|
mkdir -p ${COLLECT_NAME_DIR}
|
|
cd ${COLLECT_NAME_DIR}
|
|
|
|
# create dump target extra-stuff directory
|
|
mkdir -p ${EXTRA_DIR}
|
|
|
|
RETVAL=0
|
|
|
|
# Remove any previous collect error log.
|
|
# Start this collect with an empty file.
|
|
#
|
|
# stderr is directed to this log during the collect process.
|
|
# By searching this log after collect_host is run we can find
|
|
# errors that occured during collect.
|
|
# The only real error that we care about right now is the
|
|
#
|
|
# "No space left on device" error
|
|
#
|
|
rm -f ${COLLECT_ERROR_LOG}
|
|
touch ${COLLECT_ERROR_LOG}
|
|
chmod 644 ${COLLECT_ERROR_LOG}
|
|
echo "`date '+%F %T'` :${COLLECT_NAME_DIR}" > ${COLLECT_ERROR_LOG}
|
|
|
|
ilog "creating local collect tarball ${COLLECT_NAME_DIR}.tgz"
|
|
|
|
################################################################################
|
|
# Run collect scripts to check system status
|
|
################################################################################
|
|
function collect_parts()
|
|
{
|
|
if [ -d ${COLLECT_PATH} ]; then
|
|
for i in ${COLLECT_PATH}/*; do
|
|
if [ -f $i ]; then
|
|
local start=$(date ${DATE_FORMAT})
|
|
if [ ${i} = ${COLLECT_SYSINV} ]; then
|
|
${IONICE_CMD} ${NICE_CMD} $i ${COLLECT_NAME_DIR} ${EXTRA_DIR} ${hostname} ${INVENTORY}
|
|
else
|
|
${IONICE_CMD} ${NICE_CMD} $i ${COLLECT_NAME_DIR} ${EXTRA_DIR} ${hostname}
|
|
fi
|
|
|
|
local stop=$(date ${DATE_FORMAT})
|
|
duration=$(get_time_delta "${start}" "${stop}")
|
|
echo "${stop}: ${duration} Plugin $i" >> ${COLLECT_CMD_TIMING_LOG}
|
|
|
|
# Add delay between parts ; i.e. collect plugins
|
|
sleep ${COLLECT_RUNPARTS_DELAY}
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
|
|
|
|
function collect_extra()
|
|
{
|
|
# dump process lists
|
|
LOGFILE="${EXTRA_DIR}/process.info"
|
|
echo "${hostname}: Process Info ......: ${LOGFILE}"
|
|
|
|
run_command "${IONICE_CMD} ${NICE_CMD} ${PROCESS_DETAIL_CMD}" "${LOGFILE}"
|
|
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
# Collect process and thread info (tree view)
|
|
run_command "${IONICE_CMD} ${NICE_CMD} pstree --arguments --ascii --long --show-pids" "${LOGFILE}"
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
# Collect process, thread and scheduling info (worker subfunction only)
|
|
# (also gets process 'affinity' which is useful on workers;
|
|
which ps-sched.sh >/dev/null 2>&1
|
|
if [ $? -eq 0 ]; then
|
|
run_command "${IONICE_CMD} ${NICE_CMD} ps-sched.sh" "${LOGFILE}"
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
fi
|
|
|
|
# Collect per kubernetes container name, QoS, and cpusets per numa node
|
|
run_command "${IONICE_CMD} ${NICE_CMD} kube-cpusets" "${LOGFILE}"
|
|
|
|
# For debugging pods and cgroups, etc
|
|
run_command "sudo LANG=POSIX systemd-cgls cpu -k -l" "${LOGFILE}"
|
|
|
|
# Various host attributes
|
|
LOGFILE="${EXTRA_DIR}/host.info"
|
|
echo "${hostname}: Host Info .........: ${LOGFILE}"
|
|
|
|
# StarlingX build info
|
|
run_command "${BUILD_INFO_CMD}" "${LOGFILE}"
|
|
|
|
run_command "timedatectl" "${LOGFILE}"
|
|
|
|
run_command "uptime" "${LOGFILE}"
|
|
|
|
run_command "cat /proc/cmdline" "${LOGFILE}"
|
|
|
|
run_command "cat /proc/version" "${LOGFILE}"
|
|
|
|
run_command "lscpu" "${LOGFILE}"
|
|
|
|
run_command "lscpu -e" "${LOGFILE}"
|
|
|
|
run_command "cat /proc/cpuinfo" "${LOGFILE}"
|
|
|
|
run_command "cat /sys/devices/system/cpu/isolated" "${LOGFILE}"
|
|
|
|
run_command "ip addr show" "${LOGFILE}"
|
|
|
|
run_command "lspci -nn" "${LOGFILE}"
|
|
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
run_command "find /sys/kernel/iommu_groups/ -type l" "${LOGFILE}"
|
|
|
|
# networking totals
|
|
run_command "cat /proc/net/dev" "${LOGFILE}"
|
|
|
|
run_command "dmidecode" "${LOGFILE}"
|
|
|
|
# summary of scheduler tunable settings
|
|
run_command "cat /proc/sched_debug | head -15" "${LOGFILE}"
|
|
|
|
if [ "${SKIP_MASK}" = "true" ]; then
|
|
run_command "facter | grep -iv '^ssh'" "${LOGFILE}"
|
|
else
|
|
run_command "facter" "${LOGFILE}"
|
|
fi
|
|
|
|
if [[ "$nodetype" == "worker" || "$subfunction" == *"worker"* ]] ; then
|
|
run_command "topology" "${LOGFILE}"
|
|
fi
|
|
|
|
# CPU C-state power info
|
|
run_command "${IONICE_CMD} ${NICE_CMD} cpupower monitor" "${LOGFILE}"
|
|
|
|
LOGFILE="${EXTRA_DIR}/memory.info"
|
|
echo "${hostname}: Memory Info .......: ${LOGFILE}"
|
|
|
|
run_command "cat /proc/meminfo" "${LOGFILE}"
|
|
|
|
run_command "cat /sys/devices/system/node/node?/meminfo" "${LOGFILE}"
|
|
|
|
run_command "log_slabinfo" "${LOGFILE}"
|
|
|
|
run_command "${IONICE_CMD} ${NICE_CMD} ps -e -o ppid,pid,nlwp,rss:10,vsz:10,cmd --sort=-rss" "${LOGFILE}"
|
|
|
|
# list open files
|
|
run_command "timeout 60 ${IONICE_CMD} ${NICE_CMD} lsof -lwX" "${LOGFILE}"
|
|
|
|
# hugepages numa mapping
|
|
delimiter ${LOGFILE} "grep huge /proc/*/numa_maps"
|
|
grep -e " huge " /proc/*/numa_maps >> ${LOGFILE} 2>>${COLLECT_ERROR_LOG}
|
|
|
|
|
|
LOGFILE="${EXTRA_DIR}/filesystem.info"
|
|
echo "${hostname}: Filesystem Info ...: ${LOGFILE}"
|
|
|
|
# rootfs and tmpfs usage
|
|
run_command "df -h -H -T --local -t rootfs -t tmpfs" "${LOGFILE}"
|
|
|
|
# disk space usage
|
|
run_command "df -h -H -T --local -t ext2 -t ext3 -t ext4 -t xfs --total" "${LOGFILE}"
|
|
|
|
# disk inodes usage
|
|
run_command "df -h -H -T --local -i -t ext2 -t ext3 -t ext4 -t xfs --total" "${LOGFILE}"
|
|
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
# disks by-path values
|
|
run_command "ls -lR /dev/disk" "${LOGFILE}"
|
|
|
|
# disk summary (requires sudo/root)
|
|
run_command "fdisk -l" "${LOGFILE}"
|
|
|
|
run_command "cat /proc/scsi/scsi" "${LOGFILE}"
|
|
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
# Controller specific stuff
|
|
if [ "$nodetype" = "controller" ] ; then
|
|
run_command "cat /proc/drbd" "${LOGFILE}"
|
|
|
|
run_command "${IONICE_CMD} ${NICE_CMD} /sbin/drbdadm dump" "${LOGFILE}"
|
|
fi
|
|
|
|
# LVM summary
|
|
run_command "/usr/sbin/vgs --version" "${LOGFILE}"
|
|
run_command "/usr/sbin/pvs --version" "${LOGFILE}"
|
|
run_command "/usr/sbin/lvs --version" "${LOGFILE}"
|
|
|
|
run_command "${IONICE_CMD} ${NICE_CMD} /usr/sbin/vgs --all --options all" "${LOGFILE}"
|
|
|
|
run_command "${IONICE_CMD} ${NICE_CMD} /usr/sbin/pvs --all --options all" "${LOGFILE}"
|
|
|
|
run_command "${IONICE_CMD} ${NICE_CMD} /usr/sbin/lvs --all --options all" "${LOGFILE}"
|
|
|
|
# iSCSI Information
|
|
LOGFILE="${EXTRA_DIR}/iscsi.info"
|
|
echo "${hostname}: iSCSI Info ........: ${LOGFILE}"
|
|
|
|
if [ "$nodetype" = "controller" ] ; then
|
|
# Controller- LIO exported initiators summary
|
|
run_command "${IONICE_CMD} ${NICE_CMD} targetcli ls" "${LOGFILE}"
|
|
|
|
# Controller - LIO sessions
|
|
run_command "${IONICE_CMD} ${NICE_CMD} targetcli sessions detail" "${LOGFILE}"
|
|
|
|
elif [[ "$nodetype" == "worker" || "$subfunction" == *"worker"* ]] ; then
|
|
# Worker - iSCSI initiator information
|
|
collect_dir=${EXTRA_DIR}/iscsi_initiator_info
|
|
mkdir -p ${collect_dir}
|
|
cp -rf /run/iscsi-cache/nodes/* ${collect_dir}
|
|
find ${collect_dir} -type d -exec chmod 750 {} \;
|
|
|
|
# Worker - iSCSI initiator active sessions
|
|
run_command "iscsiadm -m session" "${LOGFILE}"
|
|
|
|
# Worker - iSCSI udev created nodes
|
|
delimiter ${LOGFILE} "ls -la /dev/disk/by-path | grep \"iqn\""
|
|
ls -la /dev/disk/by-path | grep "iqn" >> ${LOGFILE} 2>>${COLLECT_ERROR_LOG}
|
|
fi
|
|
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
LOGFILE="${EXTRA_DIR}/history.info"
|
|
echo "${hostname}: Bash History ......: ${LOGFILE}"
|
|
|
|
# history
|
|
run_command "cat /home/sysadmin/.bash_history" "${LOGFILE}"
|
|
|
|
LOGFILE="${EXTRA_DIR}/interrupt.info"
|
|
echo "${hostname}: Interrupt Info ....: ${LOGFILE}"
|
|
|
|
# interrupts
|
|
run_command "${IONICE_CMD} ${NICE_CMD} cat /proc/interrupts" "${LOGFILE}"
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
run_command "${IONICE_CMD} ${NICE_CMD} cat /proc/softirqs" "${LOGFILE}"
|
|
|
|
# Controller specific stuff
|
|
if [ "$nodetype" = "controller" ] ; then
|
|
LOGFILE="${EXTRA_DIR}/netstat.info"
|
|
run_command "${IONICE_CMD} ${NICE_CMD} netstat -pan" "${LOGFILE}"
|
|
fi
|
|
|
|
LOGFILE="${EXTRA_DIR}/blockdev.info"
|
|
echo "${hostname}: Block Devices Info : ${LOGFILE}"
|
|
|
|
# Collect block devices - show all sda and cinder devices, and size
|
|
run_command "lsblk" "${LOGFILE}"
|
|
|
|
# Collect block device topology - show devices and which io-scheduler
|
|
run_command "${IONICE_CMD} ${NICE_CMD} lsblk --topology" "${LOGFILE}"
|
|
|
|
# Collect SCSI devices - show devices and cinder attaches, etc
|
|
run_command "${IONICE_CMD} ${NICE_CMD} lsblk --scsi" "${LOGFILE}"
|
|
}
|
|
|
|
log_space "before collect ......:"
|
|
|
|
collect_extra_start=$(date ${DATE_FORMAT})
|
|
echo "${collect_extra_start}: collect_extra start" >> ${COLLECT_CMD_TIMING_LOG}
|
|
collect_extra
|
|
collect_extra_stop=$(date ${DATE_FORMAT})
|
|
duration=$(get_time_delta "${collect_extra_start}" "${collect_extra_stop}")
|
|
echo "${collect_extra_stop}: ${duration} collect_extra done" >> ${COLLECT_CMD_TIMING_LOG}
|
|
|
|
collect_parts_start=$(date ${DATE_FORMAT})
|
|
echo "$(date ${DATE_FORMAT}): collect_parts start" >> ${COLLECT_CMD_TIMING_LOG}
|
|
collect_parts
|
|
collect_parts_stop=$(date ${DATE_FORMAT})
|
|
duration=$(get_time_delta "${collect_parts_start}" "${collect_parts_stop}")
|
|
echo "$(date ${DATE_FORMAT}): ${duration} collect_parts done" >> ${COLLECT_CMD_TIMING_LOG}
|
|
|
|
#
|
|
# handle collect collect-after and collect-range and then
|
|
# in elif clause collect-before
|
|
#
|
|
VAR_LOG="/var/log"
|
|
rm -f ${VAR_LOG_INCLUDE_LIST}
|
|
|
|
# Dated collect defaults to 1 month of logs.
|
|
# Consider adding a check for how long the system has been provisioned
|
|
# and avoid running dated collect, which causes a CPU spike that can
|
|
# take up to 10 seconds or more even on newly provisioned systems.
|
|
echo "$(date ${DATE_FORMAT}): Date range start" >> ${COLLECT_CMD_TIMING_LOG}
|
|
if [ "${STARTDATE_RANGE}" == true ] ; then
|
|
if [ "${ENDDATE_RANGE}" == false ] ; then
|
|
ilog "collecting $VAR_LOG files containing logs after ${STARTDATE}"
|
|
${IONICE_CMD} ${NICE_CMD} ${COLLECT_DATE} ${STARTDATE} ${ENDDATE} ${VAR_LOG_INCLUDE_LIST} ${DEBUG} ""
|
|
else
|
|
ilog "collecting $VAR_LOG files containing logs between ${STARTDATE} and ${ENDDATE}"
|
|
${IONICE_CMD} ${NICE_CMD} ${COLLECT_DATE} ${STARTDATE} ${ENDDATE} ${VAR_LOG_INCLUDE_LIST} ${DEBUG} ""
|
|
fi
|
|
elif [ "${ENDDATE_RANGE}" == true ] ; then
|
|
STARTDATE="20130101"
|
|
ilog "collecting $VAR_LOG files containing logs before ${ENDDATE}"
|
|
${IONICE_CMD} ${NICE_CMD} ${COLLECT_DATE} ${STARTDATE} ${ENDDATE} ${VAR_LOG_INCLUDE_LIST} ${DEBUG} ""
|
|
else
|
|
ilog "collecting all of $VAR_LOG"
|
|
${IONICE_CMD} ${NICE_CMD} find $VAR_LOG ! -empty > ${VAR_LOG_INCLUDE_LIST}
|
|
fi
|
|
echo "$(date ${DATE_FORMAT}): Date range stop" >> ${COLLECT_CMD_TIMING_LOG}
|
|
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
# collect the www lighttpd logs if they exists and are not empty.
|
|
# note: The lighttpd logs don't have the date in the right place.
|
|
# So there is no point in passing those potentially large
|
|
# files through the dated collect; it would just waste time.
|
|
# Add that path in the raw form after.
|
|
WWW_LOG="www/var/log"
|
|
if [ -e "/var/${WWW_LOG}" ] ; then
|
|
find "/var/${WWW_LOG}" ! -empty >> ${VAR_LOG_INCLUDE_LIST}
|
|
elif [ -e "/${WWW_LOG}" ] ; then
|
|
find "/${WWW_LOG}" ! -empty >> ${VAR_LOG_INCLUDE_LIST}
|
|
fi
|
|
|
|
# Filter any dirs or files in the exclude list from the include list
|
|
remove_common_paths "${VAR_LOG_INCLUDE_LIST}" "${VAR_LOG_EXCLUDE}"
|
|
|
|
# Add VM console.log
|
|
for i in /var/lib/nova/instances/*/console.log; do
|
|
if [ -e "$i" ]; then
|
|
tmp=`dirname $i`
|
|
mkdir -p ${COLLECT_NAME_DIR}/$tmp
|
|
cp $i ${COLLECT_NAME_DIR}/$tmp
|
|
fi
|
|
done
|
|
|
|
# Add Rook-ceph logs
|
|
for i in /var/lib/ceph/data/rook-ceph/log/*; do
|
|
if [ -e "$i" ]; then
|
|
rook_ceph_log_dir="var/log/rook-ceph"
|
|
mkdir -p ${COLLECT_NAME_DIR}/$rook_ceph_log_dir
|
|
cp $i ${COLLECT_NAME_DIR}/$rook_ceph_log_dir
|
|
fi
|
|
done
|
|
|
|
# Add kubelet config state files
|
|
for i in /var/lib/kubelet/*; do
|
|
if [ -e "$i" ]; then
|
|
kube_config_dir="var/lib/kubelet"
|
|
mkdir -p ${COLLECT_NAME_DIR}/$kube_config_dir
|
|
cp $i ${COLLECT_NAME_DIR}/$kube_config_dir
|
|
fi
|
|
done
|
|
|
|
sleep ${COLLECT_RUNEXTRA_DELAY}
|
|
|
|
echo "$(date +'%H:%M:%S.%3N'): Running host tars" >> ${COLLECT_CMD_TIMING_LOG}
|
|
|
|
log_space "before first tar ....:"
|
|
|
|
(cd ${COLLECT_NAME_DIR} ; ${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${COLLECT_NAME_DIR}/${COLLECT_NAME}.tar -T ${VAR_LOG_INCLUDE_LIST} -X ${RUN_EXCLUDE} -X ${ETC_EXCLUDE} -X ${VAR_LOG_EXCLUDE} ${COLLECT_INCLUDE} ${CHECKPOINT_CMD} 2>>${COLLECT_ERROR_LOG} 1>>${COLLECT_ERROR_LOG} )
|
|
|
|
log_space "after first tar .....:"
|
|
|
|
(cd ${COLLECT_NAME_DIR} ; ${IONICE_CMD} ${NICE_CMD} ${UNTAR_CMD} ${COLLECT_NAME_DIR}/${COLLECT_NAME}.tar ${CHECKPOINT_CMD} 2>>${COLLECT_ERROR_LOG} 1>>${COLLECT_ERROR_LOG} )
|
|
|
|
log_space "after first untar ...:"
|
|
|
|
rm -f ${COLLECT_NAME_DIR}/${COLLECT_NAME}.tar
|
|
|
|
log_space "after delete tar ....:"
|
|
|
|
if [ "${SKIP_MASK}" != "true" ]; then
|
|
# Run password masking before final tar
|
|
dlog "running /usr/local/sbin/collect_mask_passwords ${COLLECT_NAME_DIR} ${EXTRA_DIR}"
|
|
/usr/local/sbin/collect_mask_passwords ${COLLECT_NAME_DIR} ${EXTRA_DIR}
|
|
log_space "after passwd masking :"
|
|
fi
|
|
|
|
if [ "${OMIT_CERTS}" != "true" ]; then
|
|
# Collect certificates from the host
|
|
dlog "running /usr/local/sbin/collect_certificates ${EXTRA_DIR}"
|
|
COLLECT_ERROR_LOG="$COLLECT_ERROR_LOG" \
|
|
/usr/local/sbin/collect_certificates ${EXTRA_DIR}
|
|
log_space "after certificates ..:"
|
|
fi
|
|
|
|
mkdir -p ${COLLECT_NAME_DIR}/${FLIGHT_RECORDER_PATH}
|
|
|
|
(cd /${FLIGHT_RECORDER_PATH} ; ${TAR_ZIP_CMD} ${COLLECT_NAME_DIR}/${FLIGHT_RECORDER_PATH}/${FLIGHT_RECORDER_FILE}.tgz ./${FLIGHT_RECORDER_FILE} ${CHECKPOINT_CMD} 2>>${COLLECT_ERROR_LOG} 1>>${COLLECT_ERROR_LOG})
|
|
|
|
|
|
# save the collect.log file to this host's tarball
|
|
cp -a ${COLLECT_ERROR_LOG} ${COLLECT_NAME_DIR}/${COLLECT_LOG}
|
|
cp -a ${COLLECT_CMD_TIMING_LOG} "${COLLECT_NAME_DIR}/${HOST_COLLECT_CMD_TIMING_LOG}"
|
|
|
|
log_space "with flight data ....:"
|
|
|
|
(cd ${COLLECT_BASE_DIR} ; ${IONICE_CMD} ${NICE_CMD} ${TAR_ZIP_CMD} ${COLLECT_NAME_DIR}.tgz ${COLLECT_NAME} ${CHECKPOINT_CMD} 2>>${COLLECT_ERROR_LOG} 1>>${COLLECT_ERROR_LOG} )
|
|
|
|
log_space "after collect .......:"
|
|
|
|
echo "$(date +'%H:%M:%S.%3N'): Finished host tars" >> ${COLLECT_CMD_TIMING_LOG}
|
|
|
|
rm -rf ${COLLECT_NAME_DIR}
|
|
rm -f ${VAR_LOG_INCLUDE_LIST}
|
|
|
|
log_space "after cleanup .......:"
|
|
|
|
# Check for collect errors
|
|
# Only out of space error is enough to fail this hosts's collect
|
|
collect_errors "${HOSTNAME}"
|
|
RC=${?}
|
|
|
|
rm -f ${COLLECT_ERROR_LOG}
|
|
|
|
if [ ${RC} -ne 0 ] ; then
|
|
rm -f ${COLLECT_NAME_DIR}.tgz
|
|
if [ "${REMOTE_HOST}" = true ] ; then
|
|
ilog "${FAIL_OUT_OF_SPACE_REMOTE_STR} ${COLLECT_BASE_DIR}"
|
|
else
|
|
ilog "${FAIL_OUT_OF_SPACE_STR} ${COLLECT_BASE_DIR}"
|
|
fi
|
|
else
|
|
ilog "collect of ${COLLECT_NAME_DIR}.tgz succeeded"
|
|
echo "${collect_done}"
|
|
fi
|
|
|
|
dlog "collect_host exit code: ${rc}"
|
|
exit ${rc}
|