
Currently, all of the code for the plugin algorithms is in one file, and all of the code for the correlator is in one file as well. As the correlator begins to find more failures/events/state changes and more plugin algorithms are added to the tool, these files will begin to get very long. In order for the code to be more modular and easier to extend. This update - Splits the plugin algorithms into individual files. - Adds a --report command line option to collect so that the report analysis can be run as part of the collect operation and include the results in the collect bundle itself. - Support running tool manually on any system node. - Organize tool files in git so it can be run directly from git. - Removes the unnecessary report 'tool' directory in report. - Moves the report_analysis output directory to the collect bundle itselt or redirects to /tmp/report_analysis if the collect bundle directory is not writable by the current user. - Adds support for developer plugins in /etc/collect/plugins - Removed ^M's from execution_engine.py - Added debug logging to the report.log file - Removed some default on logging making report output less verbose - Added a --file option so it can be pointed at a bundle.tar file Usage: Run report as part of collect - Analyse during collect. Package results > collect all --report Note: The analysis is stored as report_analysis.tgz inside the bundle Run On System - Point to scratch or tar file in scratch > /usr/local/bin/report/report.py --directory /scratch > /usr/local/bin/report/report.py --file /scratch/<bundle.tar> Run from utilities Git - > stx/utilities/collector/debian-scripts/report/report.py -d /path Note: The analysis is placed with the bundle in 'report_analysis' dir . ├── ALL_NODES_20221211.214001.tar ├── ALL_NODES_20221211.214001 │ ├── controller-0_20221211.214001 │ ├── controller-0_20221211.214001.tgz │ ├── controller-1_20221211.214001 │ ├── controller-1_20221211.214001.tgz │ └── report_analysis Test Plan: PASS: Verify tool is packaged in Debian PASS: Verify tool and all files are inserted into 'collect' tarballs PASS: Verify alarm plugin algorithm is working PASS: Verify daemon failures plugin algorithm is working PASS: Verify heartbeat loss plugin algorithm is working PASS: Verify maintenance errors plugin algorithm is working PASS: Verify process failures plugin algorithm is working PASS: Verify puppet errors plugin algorithm is working PASS: Verify substring plugin algorithm is working PASS: Verify swact activity plugin algorithm is working PASS: Verify state changes plugin algorithm is working PASS: Verify system info plugin algorithm is working PASS: Verify failures and correct root causes are found by correlator PASS: Verify significant events are found by correlator PASS: Verify state changes are found by correlator PASS: Verify results from plugin algorithms are printed into files PASS: Verify report tool is not double packaged for subcloud collects PASS: Verify the new --report option is passed to subcloud collect PASS: Verify creating a plugin list from both built-in as well as local host designer plugins. PASS: Verify the -d | --directory option handling; success and failure PASS: Verify the -f | --file option handling; success and failure PASS: Verify new collect --report option and when used the currently collected bundle gets auto analyzed and that analysis is included in the bundle. PASS: Verify report tool can be run on any host in the system. PASS: Verify report tool can be run directly from its git. PASS: Verify tool presents a list of collect bundles to analyze when pointed to a directory containing more than one bundle. User can select one of the presented bundles to analyze. PASS: Verify tool logging to stdio and log files ; also debug logs PASS: Verify analysis is created in the bundle directory PASS: Verify analysis output when run using the collect --report option PASS: Verify pep8 static analysis Story: 2010166 Task: 47033 Authored-By: Angela Mao <Angela.Mao@windriver.com Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com> Change-Id: Ifa17588de1297d7e199c667a05cb1d21e6ae0382
130 lines
4.5 KiB
Python
130 lines
4.5 KiB
Python
########################################################################
|
|
#
|
|
# Copyright (c) 2022 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
########################################################################
|
|
#
|
|
# This file contains the functions for the alarm plugin algorithm.
|
|
#
|
|
# The alarm plugin algorithm gathers and presents a list of all alarms
|
|
# and customer logs, except those specified.
|
|
#
|
|
########################################################################
|
|
|
|
import os
|
|
import re
|
|
|
|
|
|
def alarm(host_dir, start, end, alarm_exclude=None,
|
|
entity_exclude=None):
|
|
"""Alarm algorithm
|
|
Presents all alarms and customer logs, except those specified
|
|
|
|
Parameters:
|
|
host_dir (string): path to the host directory
|
|
start (string): Start time for analysis
|
|
end (string): End time for analysis
|
|
|
|
alarm_exclude (string list): List of alarms to ignore
|
|
entity_exclude (string list): List of entity ids to ignore
|
|
"""
|
|
alarm_data = {}
|
|
log_data = {}
|
|
|
|
fm_database = os.path.join(
|
|
host_dir, "var", "extra", "database", "fm.db.sql.txt")
|
|
if not os.path.exists(fm_database):
|
|
return None, None
|
|
|
|
if alarm_exclude is None:
|
|
alarm_exclude = []
|
|
if entity_exclude is None:
|
|
entity_exclude = []
|
|
|
|
with open(fm_database) as file:
|
|
alarms_start = False
|
|
for line in file:
|
|
# start of event log
|
|
if re.search(r"COPY (public\.)?event_log", line):
|
|
alarms_start = True
|
|
elif alarms_start and line == "\\.\n":
|
|
break
|
|
elif alarms_start:
|
|
entry = re.split(r"\t", line)
|
|
|
|
INDEX_ALARM_ID = 5
|
|
INDEX_ACTION = 6
|
|
INDEX_ENTITY_ID = 8
|
|
INDEX_ALARM_DATE = 9
|
|
INDEX_SEVERITY = 10
|
|
|
|
alarm_id = entry[INDEX_ALARM_ID]
|
|
entity_id = entry[INDEX_ENTITY_ID]
|
|
action = entry[INDEX_ACTION]
|
|
severity = entry[INDEX_SEVERITY]
|
|
alarm_date = entry[INDEX_ALARM_DATE]
|
|
|
|
entry_date = alarm_date.replace(
|
|
" ", "T"
|
|
) # making time format of alarm the same
|
|
if start <= entry_date and entry_date <= end:
|
|
cont = True
|
|
# Checks if the alarm is in the user specified list of
|
|
# alarm or entity ids
|
|
for id in alarm_exclude:
|
|
if id in alarm_id:
|
|
cont = False
|
|
break
|
|
|
|
for entity in entity_exclude:
|
|
if entity in entity_id:
|
|
cont = False
|
|
break
|
|
|
|
if not cont:
|
|
continue
|
|
|
|
try:
|
|
if action == "log":
|
|
log_info = log_data[
|
|
f"{alarm_id} {entity_id} {severity}"
|
|
]
|
|
log_info["count"] += 1
|
|
log_info["dates"].append(alarm_date)
|
|
else:
|
|
alarm_info = alarm_data[
|
|
f"{alarm_id} {entity_id} {severity}"
|
|
]
|
|
alarm_info["dates"].append(
|
|
f"{alarm_date} {action}")
|
|
except KeyError:
|
|
if entry[6] != "log":
|
|
alarm_data[
|
|
f"{alarm_id} {entity_id} {severity}"
|
|
] = {
|
|
"dates": [f"{alarm_date} {action}"],
|
|
}
|
|
else:
|
|
log_data[
|
|
f"{alarm_id} {entity_id} {severity}"
|
|
] = {
|
|
"count": 1,
|
|
"dates": [alarm_date],
|
|
}
|
|
|
|
for _, v in alarm_data.items():
|
|
v["dates"] = sorted(v["dates"])
|
|
temp = []
|
|
temp.append(v["dates"][0])
|
|
for i in range(1, len(v["dates"])):
|
|
if v["dates"][i].split()[2] != v["dates"][i-1].split()[2]:
|
|
temp.append(v["dates"][i])
|
|
v["dates"] = temp
|
|
|
|
for _, v in log_data.items():
|
|
v["dates"] = sorted(v["dates"])
|
|
|
|
return alarm_data, log_data
|