Merge "Aggregate by fqdn label instead instance in host cpu metrics"

This commit is contained in:
Zuul 2025-04-08 17:37:10 +00:00 committed by Gerrit Code Review
commit c9a1d06e7c
2 changed files with 9 additions and 9 deletions

View File

@ -264,8 +264,8 @@ class PrometheusHelper(base.DataSourceBase):
This function builds and returns the string query that will be sent
to the Prometheus server /query endpoint. For host cpu usage we use:
100 - (avg by (instance)(rate(node_cpu_seconds_total{mode='idle',
instance='some_host'}[300s])) * 100)
100 - (avg by (fqdn)(rate(node_cpu_seconds_total{mode='idle',
fqdn='some_host'}[300s])) * 100)
so using prometheus rate function over the specified period, we average
per instance (all cpus) idle time and then 'everything else' is cpu
@ -307,7 +307,7 @@ class PrometheusHelper(base.DataSourceBase):
if meter == 'node_cpu_seconds_total':
query_args = (
"100 - (%(agg)s by (instance)(rate(%(meter)s"
"100 - (%(agg)s by (%(label)s)(rate(%(meter)s"
"{mode='idle',%(label)s='%(label_value)s'}[%(period)ss])) "
"* 100)"
% {'label': self.prometheus_fqdn_label,
@ -464,8 +464,8 @@ class PrometheusHelper(base.DataSourceBase):
This calculates the host cpu usage and returns it as a percentage
The calculation is made by using the cpu 'idle' time, per
instance (so all CPUs are included). For example the query looks like
(100 - (avg by (instance)(rate(node_cpu_seconds_total
{mode='idle',instance='localhost:9100'}[300s])) * 100))
(100 - (avg by (fqdn)(rate(node_cpu_seconds_total
{mode='idle',fqdn='compute1.example.com'}[300s])) * 100))
"""
aggregate = self._invert_max_min_aggregate(aggregate)
cpu_usage = self.statistic_aggregation(

View File

@ -146,7 +146,7 @@ class TestPrometheusHelper(base.BaseTestCase):
)
self.assertEqual(expected_cpu_usage, result)
mock_prometheus_query.assert_called_once_with(
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
"100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
"{mode='idle',fqdn='marios-env.controlplane.domain'}[300s]))"
" * 100)")
@ -575,7 +575,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_build_prometheus_query_node_cpu_avg_agg(self):
expected_query = (
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
"100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
"{mode='idle',fqdn='a_host'}[111s])) * 100)")
result = self.helper._build_prometheus_query(
'avg', 'node_cpu_seconds_total', 'a_host', '111')
@ -583,7 +583,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_build_prometheus_query_node_cpu_max_agg(self):
expected_query = (
"100 - (max by (instance)(rate(node_cpu_seconds_total"
"100 - (max by (fqdn)(rate(node_cpu_seconds_total"
"{mode='idle',fqdn='b_host'}[444s])) * 100)")
result = self.helper._build_prometheus_query(
'max', 'node_cpu_seconds_total', 'b_host', '444')
@ -610,7 +610,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_build_prometheus_query_node_cpu_avg_agg_custom_label(self):
self.helper.prometheus_fqdn_label = 'custom_fqdn_label'
expected_query = (
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
"100 - (avg by (custom_fqdn_label)(rate(node_cpu_seconds_total"
"{mode='idle',custom_fqdn_label='a_host'}[111s])) * 100)")
result = self.helper._build_prometheus_query(
'avg', 'node_cpu_seconds_total', 'a_host', '111')