diff --git a/doc/source/user/guides/stats.rst b/doc/source/user/guides/stats.rst index d01ef8b4f..537a12707 100644 --- a/doc/source/user/guides/stats.rst +++ b/doc/source/user/guides/stats.rst @@ -57,3 +57,15 @@ Metrics will be reported only when corresponding client libraries ( `statsd` for 'statsd' reporting, `influxdb` for influxdb reporting correspondingly). When those libraries are not available reporting will be silently ignored. + +InfluxDB reporting allows setting additional tags into the metrics based on the +selected cloud. + +.. code-block:: yaml + + clouds: + my_cloud: + profile: some_profile + ... + additional_metric_tags: + environment: production diff --git a/openstack/config/loader.py b/openstack/config/loader.py index 50bc22adf..6e931d663 100644 --- a/openstack/config/loader.py +++ b/openstack/config/loader.py @@ -289,25 +289,28 @@ class OpenStackConfig: influxdb_cfg = metrics_config.get('influxdb', {}) # Parse InfluxDB configuration - if influxdb_config: - influxdb_cfg.update(influxdb_config) - if influxdb_cfg: - config = {} - if 'use_udp' in influxdb_cfg: - use_udp = influxdb_cfg['use_udp'] - if isinstance(use_udp, str): - use_udp = use_udp.lower() in ('true', 'yes', '1') - elif not isinstance(use_udp, bool): - use_udp = False - self.log.warning('InfluxDB.use_udp value type is not ' - 'supported. Use one of ' - '[true|false|yes|no|1|0]') - config['use_udp'] = use_udp - for key in ['host', 'port', 'username', 'password', 'database', - 'measurement', 'timeout']: - if key in influxdb_cfg: - config[key] = influxdb_cfg[key] - self._influxdb_config = config + if not influxdb_config: + influxdb_config = influxdb_cfg + else: + influxdb_config.update(influxdb_cfg) + + if influxdb_config: + config = {} + if 'use_udp' in influxdb_config: + use_udp = influxdb_config['use_udp'] + if isinstance(use_udp, str): + use_udp = use_udp.lower() in ('true', 'yes', '1') + elif not isinstance(use_udp, bool): + use_udp = False + self.log.warning('InfluxDB.use_udp value type is not ' + 'supported. Use one of ' + '[true|false|yes|no|1|0]') + config['use_udp'] = use_udp + for key in ['host', 'port', 'username', 'password', 'database', + 'measurement', 'timeout']: + if key in influxdb_config: + config[key] = influxdb_config[key] + self._influxdb_config = config if load_envvars: statsd_host = statsd_host or os.environ.get('STATSD_HOST') diff --git a/openstack/connection.py b/openstack/connection.py index 3d39440d4..eaade0c9c 100644 --- a/openstack/connection.py +++ b/openstack/connection.py @@ -435,6 +435,14 @@ class Connection( self.log.warning('Configured hook %s cannot be executed: %s', vendor_hook, e) + # Add additional metrics into the configuration according to the + # selected connection. We don't want to deal with overall config in the + # proxy, just pass required part. + if (self.config._influxdb_config + and 'additional_metric_tags' in self.config.config): + self.config._influxdb_config['additional_metric_tags'] = \ + self.config.config['additional_metric_tags'] + @property def session(self): if not self._session: diff --git a/openstack/proxy.py b/openstack/proxy.py index dbb246399..a8812de70 100644 --- a/openstack/proxy.py +++ b/openstack/proxy.py @@ -91,15 +91,23 @@ class Proxy(adapter.Adapter): if conn: # Per-request setting should take precedence global_request_id = conn._global_request_id - response = super(Proxy, self).request( - url, method, - connect_retries=connect_retries, raise_exc=raise_exc, - global_request_id=global_request_id, - **kwargs) - for h in response.history: - self._report_stats(h) - self._report_stats(response) - return response + try: + response = super(Proxy, self).request( + url, method, + connect_retries=connect_retries, raise_exc=raise_exc, + global_request_id=global_request_id, + **kwargs) + for h in response.history: + self._report_stats(h) + self._report_stats(response) + return response + except Exception as e: + # If we want metrics to be generated we also need to generate some + # in case of exceptions as well, so that timeouts and connection + # problems (especially when called from ansible) are being + # generated as well. + self._report_stats(None, url, method, e) + raise def _extract_name(self, url, service_type=None, project_id=None): '''Produce a key name to use in logging/metrics from the URL path. @@ -185,58 +193,91 @@ class Proxy(adapter.Adapter): return name_parts - def _report_stats(self, response): + def _report_stats(self, response, url=None, method=None, exc=None): if self._statsd_client: - self._report_stats_statsd(response) + self._report_stats_statsd(response, url, method, exc) if self._prometheus_counter and self._prometheus_histogram: - self._report_stats_prometheus(response) + self._report_stats_prometheus(response, url, method, exc) if self._influxdb_client: - self._report_stats_influxdb(response) + self._report_stats_influxdb(response, url, method, exc) - def _report_stats_statsd(self, response): - name_parts = self._extract_name(response.request.url, + def _report_stats_statsd(self, response, url=None, method=None, exc=None): + if response is not None and not url: + url = response.request.url + if response is not None and not method: + method = response.request.method + name_parts = self._extract_name(url, self.service_type, self.session.get_project_id()) key = '.'.join( - [self._statsd_prefix, self.service_type, response.request.method] + [self._statsd_prefix, self.service_type, method] + name_parts) - self._statsd_client.timing(key, int( - response.elapsed.microseconds / 1000)) - self._statsd_client.incr(key) + if response is not None: + duration = int(response.elapsed.microseconds / 1000) + self._statsd_client.timing(key, duration) + self._statsd_client.incr(key) + elif exc is not None: + self._statsd_client.incr('%s.failed' % key) - def _report_stats_prometheus(self, response): - labels = dict( - method=response.request.method, - endpoint=response.request.url, - service_type=self.service_type, - status_code=response.status_code, - ) - self._prometheus_counter.labels(**labels).inc() - self._prometheus_histogram.labels(**labels).observe( - response.elapsed.microseconds / 1000) + def _report_stats_prometheus(self, response, url=None, method=None, + exc=None): + if response is not None and not url: + url = response.request.url + if response is not None and not method: + method = response.request.method + if response is not None: + labels = dict( + method=method, + endpoint=url, + service_type=self.service_type, + status_code=response.status_code, + ) + self._prometheus_counter.labels(**labels).inc() + self._prometheus_histogram.labels(**labels).observe( + response.elapsed.microseconds / 1000) - def _report_stats_influxdb(self, response): + def _report_stats_influxdb(self, response, url=None, method=None, + exc=None): # NOTE(gtema): status_code is saved both as tag and field to give # ability showing it as a value and not only as a legend. # However Influx is not ok with having same name in tags and fields, # therefore use different names. + if response is not None and not url: + url = response.request.url + if response is not None and not method: + method = response.request.method + tags = dict( + method=method, + name='_'.join(self._extract_name( + url, self.service_type, + self.session.get_project_id())) + ) + fields = dict( + attempted=1 + ) + if response is not None: + fields['duration'] = int(response.elapsed.microseconds / 1000) + tags['status_code'] = str(response.status_code) + # Note(gtema): emit also status_code as a value (counter) + fields[str(response.status_code)] = 1 + fields['%s.%s' % (method, response.status_code)] = 1 + # Note(gtema): status_code field itself is also very helpful on the + # graphs to show what was the code, instead of counting its + # occurences + fields['status_code_val'] = response.status_code + elif exc: + fields['failed'] = 1 + if 'additional_metric_tags' in self._influxdb_config: + tags.update(self._influxdb_config['additional_metric_tags']) + measurement = self._influxdb_config.get( + 'measurement', 'openstack_api') \ + if self._influxdb_config else 'openstack_api' + # Note(gtema) append service name into the measurement name + measurement = '%s.%s' % (measurement, self.service_type) data = [dict( - measurement=(self._influxdb_config.get('measurement', - 'openstack_api') - if self._influxdb_config else 'openstack_api'), - tags=dict( - method=response.request.method, - service_type=self.service_type, - status_code=response.status_code, - name='_'.join(self._extract_name( - response.request.url, self.service_type, - self.session.get_project_id()) - ) - ), - fields=dict( - duration=int(response.elapsed.microseconds / 1000), - status_code_val=int(response.status_code) - ) + measurement=measurement, + tags=tags, + fields=fields )] try: self._influxdb_client.write_points(data)