Merge "Aggregate by fqdn label instead instance in host cpu metrics"
This commit is contained in:
commit
c9a1d06e7c
@ -264,8 +264,8 @@ class PrometheusHelper(base.DataSourceBase):
|
||||
This function builds and returns the string query that will be sent
|
||||
to the Prometheus server /query endpoint. For host cpu usage we use:
|
||||
|
||||
100 - (avg by (instance)(rate(node_cpu_seconds_total{mode='idle',
|
||||
instance='some_host'}[300s])) * 100)
|
||||
100 - (avg by (fqdn)(rate(node_cpu_seconds_total{mode='idle',
|
||||
fqdn='some_host'}[300s])) * 100)
|
||||
|
||||
so using prometheus rate function over the specified period, we average
|
||||
per instance (all cpus) idle time and then 'everything else' is cpu
|
||||
@ -307,7 +307,7 @@ class PrometheusHelper(base.DataSourceBase):
|
||||
|
||||
if meter == 'node_cpu_seconds_total':
|
||||
query_args = (
|
||||
"100 - (%(agg)s by (instance)(rate(%(meter)s"
|
||||
"100 - (%(agg)s by (%(label)s)(rate(%(meter)s"
|
||||
"{mode='idle',%(label)s='%(label_value)s'}[%(period)ss])) "
|
||||
"* 100)"
|
||||
% {'label': self.prometheus_fqdn_label,
|
||||
@ -464,8 +464,8 @@ class PrometheusHelper(base.DataSourceBase):
|
||||
This calculates the host cpu usage and returns it as a percentage
|
||||
The calculation is made by using the cpu 'idle' time, per
|
||||
instance (so all CPUs are included). For example the query looks like
|
||||
(100 - (avg by (instance)(rate(node_cpu_seconds_total
|
||||
{mode='idle',instance='localhost:9100'}[300s])) * 100))
|
||||
(100 - (avg by (fqdn)(rate(node_cpu_seconds_total
|
||||
{mode='idle',fqdn='compute1.example.com'}[300s])) * 100))
|
||||
"""
|
||||
aggregate = self._invert_max_min_aggregate(aggregate)
|
||||
cpu_usage = self.statistic_aggregation(
|
||||
|
@ -146,7 +146,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
||||
)
|
||||
self.assertEqual(expected_cpu_usage, result)
|
||||
mock_prometheus_query.assert_called_once_with(
|
||||
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
|
||||
"100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
|
||||
"{mode='idle',fqdn='marios-env.controlplane.domain'}[300s]))"
|
||||
" * 100)")
|
||||
|
||||
@ -575,7 +575,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
||||
|
||||
def test_build_prometheus_query_node_cpu_avg_agg(self):
|
||||
expected_query = (
|
||||
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
|
||||
"100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
|
||||
"{mode='idle',fqdn='a_host'}[111s])) * 100)")
|
||||
result = self.helper._build_prometheus_query(
|
||||
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
||||
@ -583,7 +583,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
||||
|
||||
def test_build_prometheus_query_node_cpu_max_agg(self):
|
||||
expected_query = (
|
||||
"100 - (max by (instance)(rate(node_cpu_seconds_total"
|
||||
"100 - (max by (fqdn)(rate(node_cpu_seconds_total"
|
||||
"{mode='idle',fqdn='b_host'}[444s])) * 100)")
|
||||
result = self.helper._build_prometheus_query(
|
||||
'max', 'node_cpu_seconds_total', 'b_host', '444')
|
||||
@ -610,7 +610,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
||||
def test_build_prometheus_query_node_cpu_avg_agg_custom_label(self):
|
||||
self.helper.prometheus_fqdn_label = 'custom_fqdn_label'
|
||||
expected_query = (
|
||||
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
|
||||
"100 - (avg by (custom_fqdn_label)(rate(node_cpu_seconds_total"
|
||||
"{mode='idle',custom_fqdn_label='a_host'}[111s])) * 100)")
|
||||
result = self.helper._build_prometheus_query(
|
||||
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
||||
|
Loading…
x
Reference in New Issue
Block a user