Check namespace label for platform
This commit checks for label "app.starlingx.io/component=platform" in namespace label. Earlier we were checking in pod labels for platform component or whether namespace is in a list of hardcoded namespaces. With this commit, in addition to considering previous checks, we also check if platform label is present in k8s namespace label, if so it will be considered as a platform resource in collectd. It also de-duplicates code by combining common code of cpu and memory. Test Plan: Pass: Code changes on AIO-SX and collectd was restarted successfully Pass: Verify that /var/log/collectd.log contains cpu and memory consumption info for platform pods Pass: Verify namespace platform label is taken into account by collectd Closes-Bug: #2105964 Change-Id: Ia038d0aa9b59a6904b0dcb756c38dc80693457b1 Signed-off-by: sshathee <shunmugam.shatheesh@windriver.com>
This commit is contained in:
parent
044cd5c525
commit
5b258533ed
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2018-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2018-2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -504,7 +504,7 @@ def calculate_occupancy(
|
||||
continue
|
||||
|
||||
# K8S platform system usage, i.e., essential: kube-system
|
||||
# check for component label app.starlingx.io/component=platform
|
||||
# check for component label app.starlingx.io/component=platform
|
||||
if pod.is_platform_resource():
|
||||
cpuacct[pc.GROUP_OVERALL][pc.GROUP_K8S_SYSTEM] += acct
|
||||
cpuwait[pc.GROUP_OVERALL][pc.GROUP_K8S_SYSTEM] += wait
|
||||
@ -757,6 +757,8 @@ def update_cpu_data(init=False):
|
||||
since this routine was last run.
|
||||
"""
|
||||
|
||||
global obj
|
||||
|
||||
# Get epoch time in floating seconds
|
||||
now = time.time()
|
||||
|
||||
@ -795,51 +797,7 @@ def update_cpu_data(init=False):
|
||||
|
||||
# Refresh the k8s pod information if we have discovered new cgroups
|
||||
cg_pods = set(t1_cpuacct[pc.GROUP_PODS].keys())
|
||||
if not cg_pods.issubset(obj.k8s_pods):
|
||||
if obj.debug:
|
||||
collectd.info('%s Refresh k8s pod information.' % (PLUGIN_DEBUG))
|
||||
obj.k8s_pods = set()
|
||||
try:
|
||||
pods = obj._k8s_client.kube_get_local_pods()
|
||||
for i in pods:
|
||||
# NOTE: parent pod cgroup name contains annotation config.hash as
|
||||
# part of its name, otherwise it contains the pod uid.
|
||||
uid = i.metadata.uid
|
||||
if ((i.metadata.annotations) and
|
||||
(pc.POD_ANNOTATION_KEY in i.metadata.annotations)):
|
||||
hash_uid = i.metadata.annotations.get(pc.POD_ANNOTATION_KEY,
|
||||
None)
|
||||
if hash_uid:
|
||||
if obj.debug:
|
||||
collectd.info('%s POD_ANNOTATION_KEY: '
|
||||
'hash=%s, uid=%s, '
|
||||
'name=%s, namespace=%s, qos_class=%s,'
|
||||
'is_platform_label=%s'
|
||||
% (PLUGIN_DEBUG,
|
||||
hash_uid,
|
||||
i.metadata.uid,
|
||||
i.metadata.name,
|
||||
i.metadata.namespace,
|
||||
i.status.qos_class,
|
||||
i.metadata.labels.get(pc.PLATFORM_LABEL_KEY) ==
|
||||
pc.GROUP_PLATFORM))
|
||||
uid = hash_uid
|
||||
|
||||
obj.k8s_pods.add(uid)
|
||||
if uid not in obj._cache:
|
||||
obj._cache[uid] = pc.POD_object(i.metadata.uid,
|
||||
i.metadata.name,
|
||||
i.metadata.namespace,
|
||||
i.status.qos_class,
|
||||
i.metadata.labels)
|
||||
# Remove stale _cache entries
|
||||
remove_uids = set(obj._cache.keys()) - obj.k8s_pods
|
||||
for uid in remove_uids:
|
||||
del obj._cache[uid]
|
||||
except ApiException:
|
||||
# continue with remainder of calculations, keeping cache
|
||||
collectd.warning('%s encountered kube ApiException' % (PLUGIN))
|
||||
pass
|
||||
obj = pc.pods_monitoring(cg_pods, obj, PLUGIN_DEBUG)
|
||||
|
||||
# Save initial state information
|
||||
if init:
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2018-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2018-2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -686,6 +686,8 @@ def init_func():
|
||||
def read_func():
|
||||
"""collectd memory monitor plugin read function"""
|
||||
|
||||
global obj
|
||||
|
||||
if obj.init_complete is False:
|
||||
init_func()
|
||||
return 0
|
||||
@ -710,52 +712,7 @@ def read_func():
|
||||
|
||||
# Refresh the k8s pod information if we have discovered new cgroups
|
||||
cg_pods = set(memory[pc.GROUP_PODS].keys())
|
||||
if not cg_pods.issubset(obj.k8s_pods):
|
||||
if obj.debug:
|
||||
collectd.info('%s: Refresh k8s pod information.' % (PLUGIN_DEBUG))
|
||||
obj.k8s_pods = set()
|
||||
try:
|
||||
pods = obj._k8s_client.kube_get_local_pods()
|
||||
for i in pods:
|
||||
# NOTE: parent pod cgroup name contains annotation config.hash as
|
||||
# part of its name, otherwise it contains the pod uid.
|
||||
uid = i.metadata.uid
|
||||
if ((i.metadata.annotations) and
|
||||
(pc.POD_ANNOTATION_KEY in i.metadata.annotations)):
|
||||
hash_uid = i.metadata.annotations.get(pc.POD_ANNOTATION_KEY,
|
||||
None)
|
||||
if hash_uid:
|
||||
if obj.debug:
|
||||
collectd.info('%s: POD_ANNOTATION_KEY: '
|
||||
'hash=%s, uid=%s, '
|
||||
'name=%s, namespace=%s, qos_class=%s, '
|
||||
'is_platform_label=%s'
|
||||
% (PLUGIN_DEBUG,
|
||||
hash_uid,
|
||||
i.metadata.uid,
|
||||
i.metadata.name,
|
||||
i.metadata.namespace,
|
||||
i.status.qos_class,
|
||||
i.metadata.labels.get(pc.PLATFORM_LABEL_KEY) ==
|
||||
pc.GROUP_PLATFORM))
|
||||
uid = hash_uid
|
||||
|
||||
obj.k8s_pods.add(uid)
|
||||
if uid not in obj._cache:
|
||||
obj._cache[uid] = pc.POD_object(i.metadata.uid,
|
||||
i.metadata.name,
|
||||
i.metadata.namespace,
|
||||
i.status.qos_class,
|
||||
i.metadata.labels)
|
||||
|
||||
# Remove stale _cache entries
|
||||
remove_uids = set(obj._cache.keys()) - obj.k8s_pods
|
||||
for uid in remove_uids:
|
||||
del obj._cache[uid]
|
||||
except ApiException:
|
||||
# continue with remainder of calculations, keeping cache
|
||||
collectd.warning("memory plugin encountered kube ApiException")
|
||||
pass
|
||||
obj = pc.pods_monitoring(cg_pods, obj, PLUGIN_DEBUG)
|
||||
|
||||
# Summarize memory usage for various groupings
|
||||
for g in pc.OVERALL_GROUPS:
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2019-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2019-2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -27,6 +27,7 @@ from kubernetes import __version__ as K8S_MODULE_VERSION
|
||||
from kubernetes import client
|
||||
from kubernetes import config
|
||||
from kubernetes.client import Configuration
|
||||
from kubernetes.client.rest import ApiException
|
||||
import urllib3
|
||||
|
||||
|
||||
@ -106,10 +107,7 @@ BASE_GROUPS = [CGROUP_INIT, CGROUP_DOCKER, CGROUP_SYSTEM, CGROUP_USER,
|
||||
BASE_GROUPS_EXCLUDE = [CGROUP_K8S, CGROUP_MACHINE]
|
||||
|
||||
# Groupings of pods by kubernetes namespace
|
||||
K8S_NAMESPACE_SYSTEM = ['kube-system', 'armada', 'cert-manager', 'portieris',
|
||||
'vault', 'notification', 'platform-deployment-manager',
|
||||
'flux-helm', 'metrics-server', 'node-feature-discovery',
|
||||
'intel-power', 'power-metrics', 'sriov-fec-system']
|
||||
K8S_NAMESPACE_SYSTEM = ['kube-system']
|
||||
K8S_NAMESPACE_ADDON = ['monitor', 'openstack']
|
||||
PLATFORM_LABEL_KEY = "app.starlingx.io/component"
|
||||
|
||||
@ -701,6 +699,16 @@ class K8sClient(object):
|
||||
spec=pod.get('spec'),
|
||||
status=self._as_kube_status(pod.get('status')))
|
||||
|
||||
def _get_namespace_labels(self, namespace_list):
|
||||
# some namespaces might not have label, so
|
||||
# return empty dict for them
|
||||
namespace_labels = {}
|
||||
for n in namespace_list:
|
||||
labels = n["metadata"].get("labels", {})
|
||||
name = n["metadata"]["name"]
|
||||
namespace_labels.update({name: labels})
|
||||
return namespace_labels
|
||||
|
||||
def _as_kube_status(self, status):
|
||||
# status (json) dictionary has the following keys:
|
||||
# 'conditions', 'containerStatuses', 'hostIP', 'phase',
|
||||
@ -778,14 +786,36 @@ class K8sClient(object):
|
||||
collectd.error("kube_get_local_pods: error=%s" % (str(err)))
|
||||
raise
|
||||
|
||||
def get_namespace_labels(self):
|
||||
# Get namespace labels
|
||||
try:
|
||||
kube_results = subprocess.check_output(
|
||||
['kubectl', '--kubeconfig', KUBELET_CONF,
|
||||
'get', 'namespaces',
|
||||
'-o', 'json',
|
||||
], timeout=K8S_TIMEOUT).decode()
|
||||
json_results = json.loads(kube_results)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
collectd.error('kube_get_namespaces: Timeout')
|
||||
return {}
|
||||
except json.JSONDecodeError as e:
|
||||
collectd.error('kube_get_namespaces: Could not parse json output, error=%s' % (str(e)))
|
||||
return {}
|
||||
except subprocess.CalledProcessError as e:
|
||||
collectd.error('kube_get_namespaces: Could not get namespaces, error=%s' % (str(e)))
|
||||
return {}
|
||||
|
||||
return self._get_namespace_labels(json_results["items"])
|
||||
|
||||
|
||||
class POD_object:
|
||||
def __init__(self, uid, name, namespace, qos_class, labels=None):
|
||||
def __init__(self, uid, name, namespace, qos_class, platform_label=False):
|
||||
self.uid = uid
|
||||
self.name = name
|
||||
self.namespace = namespace
|
||||
self.qos_class = qos_class
|
||||
self.labels = labels
|
||||
self.platform_label = platform_label
|
||||
|
||||
def __str__(self):
|
||||
return str(self.__class__) + ": " + str(self.__dict__)
|
||||
@ -797,8 +827,7 @@ class POD_object:
|
||||
"""Check whether pod contains platform namespace or platform label"""
|
||||
|
||||
if (self.namespace in K8S_NAMESPACE_SYSTEM
|
||||
or (self.labels is not None and
|
||||
self.labels.get(PLATFORM_LABEL_KEY) == GROUP_PLATFORM)):
|
||||
or self.platform_label):
|
||||
return True
|
||||
return False
|
||||
|
||||
@ -909,3 +938,63 @@ def format_range_set(items):
|
||||
s = "%s-%s" % (rng[0][1], rng[-1][1])
|
||||
ranges.append(s)
|
||||
return ','.join(ranges)
|
||||
|
||||
|
||||
def pods_monitoring(cg_pods, obj, PLUGIN_DEBUG):
|
||||
if not cg_pods.issubset(obj.k8s_pods):
|
||||
if obj.debug:
|
||||
collectd.info('%s: Refresh k8s pod information.' % (PLUGIN_DEBUG))
|
||||
obj.k8s_pods = set()
|
||||
try:
|
||||
namespace_labels = obj._k8s_client.get_namespace_labels()
|
||||
pods = obj._k8s_client.kube_get_local_pods()
|
||||
for i in pods:
|
||||
# NOTE: parent pod cgroup name contains annotation config.hash as
|
||||
# part of its name, otherwise it contains the pod uid.
|
||||
uid = i.metadata.uid
|
||||
|
||||
namespace = i.metadata.namespace
|
||||
platform_label = False
|
||||
# Check if platform label is present in namespace label or pod label
|
||||
if namespace_labels[namespace].get(PLATFORM_LABEL_KEY) == GROUP_PLATFORM \
|
||||
or i.metadata.labels.get(PLATFORM_LABEL_KEY) == GROUP_PLATFORM:
|
||||
platform_label = True
|
||||
|
||||
if ((i.metadata.annotations) and
|
||||
(POD_ANNOTATION_KEY in i.metadata.annotations)):
|
||||
hash_uid = i.metadata.annotations.get(POD_ANNOTATION_KEY,
|
||||
None)
|
||||
if hash_uid:
|
||||
if obj.debug:
|
||||
collectd.info('%s: POD_ANNOTATION_KEY: '
|
||||
'hash=%s, uid=%s, '
|
||||
'name=%s, namespace=%s, qos_class=%s, '
|
||||
'is_platform_label=%s'
|
||||
% (PLUGIN_DEBUG,
|
||||
hash_uid,
|
||||
i.metadata.uid,
|
||||
i.metadata.name,
|
||||
namespace,
|
||||
i.status.qos_class,
|
||||
platform_label))
|
||||
uid = hash_uid
|
||||
|
||||
obj.k8s_pods.add(uid)
|
||||
if uid not in obj._cache:
|
||||
obj._cache[uid] = POD_object(i.metadata.uid,
|
||||
i.metadata.name,
|
||||
namespace,
|
||||
i.status.qos_class,
|
||||
platform_label)
|
||||
|
||||
# Remove stale _cache entries
|
||||
remove_uids = set(obj._cache.keys()) - obj.k8s_pods
|
||||
for uid in remove_uids:
|
||||
del obj._cache[uid]
|
||||
|
||||
except ApiException:
|
||||
# continue with remainder of calculations, keeping cache
|
||||
collectd.warning('%s encountered kube ApiException' % (PLUGIN_DEBUG))
|
||||
pass
|
||||
|
||||
return obj
|
||||
|
Loading…
x
Reference in New Issue
Block a user