WIP-Templatizing Per-Process Openstack Dashboards.

+ Tag OSP General performance Dashboards and Link all together
+ Display Per-Process CPU as 100% = one core utilized rather than 100% = all cores utilized
+ Better display of Process/Thread Counts
+ Add ceilometer-polling process to collectd-openstack configs

Change-Id: I066d298cb1394b581d993ea91154aa73604b5dde
This commit is contained in:
Alex Krzos 2016-03-31 22:54:41 -04:00
parent e658e7f815
commit 06427c343a
11 changed files with 4438 additions and 12206 deletions

View File

@ -8,5 +8,21 @@
vars:
ansible_connection: local
overwrite_existing: true
dashboards:
- template_name: openstack
template_node_type: undercloud
process_list_name: Openstack-Undercloud
- template_name: openstack
template_node_type: controller
process_list_name: Openstack-Controller
- template_name: openstack
template_node_type: compute
process_list_name: Openstack-Compute
- template_name: openstack
template_node_type: ceph
process_list_name: Openstack-Ceph
- template_name: openstack
template_node_type: "*"
process_list_name: Openstack
roles:
- dashboard-openstack

View File

@ -33,8 +33,8 @@ images:
# DNS Server to add
dns_server: 8.8.8.8
# epel Repository for collectd packages
epel_repo: https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
# epel7 rpm for collectd packages
epel7_rpm: https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
# Host where connmond will be running
connmon_host: 192.0.2.1

View File

@ -3,14 +3,24 @@
# Install/run collectd for browbeat (Generic)
#
- name: Import EPEL GPG Key
rpm_key: key=https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7
state=present
- name: Check for EPEL
shell: rpm -qa | grep -q epel-release
ignore_errors: true
register: epel_installed
- name: Check for EPEL repo
yum: name=https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
state=present
#
# (akrzos) I have found the use of the yum module for installing EPEL on rhel7 to encounter issues,
# thus using rpm to install via ansible. This does display a warning in Ansible output.
#
- name: Install EPEL rpm
command: rpm -ivh {{ epel7_rpm }}
become: true
when: epel_installed.rc != 0
#
# (akrzos) yum module works at this point due to the fact the EPEL repo now exists. EPEL rpm is
# installed at this point in time.
#
- name: Install collectd rpms
yum: name={{ item }} state=present
become: true
@ -119,8 +129,7 @@
service: name=collectd state=restarted enabled=true
become: true
- name: Disable EPEL Repo
ini_file: dest=/etc/yum.repos.d/epel.repo
section=epel
option=enabled
value=0
- name: Disable EPEL
shell: rpm -e epel-release
ignore_errors: true
become: true

View File

@ -3,14 +3,24 @@
# Install/run collectd for browbeat
#
- name: Import EPEL GPG Key
rpm_key: key=https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7
state=present
- name: Check for EPEL
shell: rpm -qa | grep -q epel-release
ignore_errors: true
register: epel_installed
- name: Check for EPEL repo
yum: name=https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
state=present
#
# (akrzos) I have found the use of the yum module for installing EPEL on rhel7 to encounter issues,
# thus using rpm to install via ansible. This does display a warning in Ansible output.
#
- name: Install EPEL rpm
command: rpm -ivh {{ epel7_rpm }}
become: true
when: epel_installed.rc != 0
#
# (akrzos) yum module works at this point due to the fact the EPEL repo now exists. EPEL rpm is
# installed at this point in time.
#
- name: Install collectd rpms
yum: name={{ item }} state=present
become: true
@ -83,8 +93,7 @@
service: name=collectd state=restarted enabled=true
become: true
- name: Disable EPEL Repo
ini_file: dest=/etc/yum.repos.d/epel.repo
section=epel
option=enabled
value=0
- name: Disable EPEL
shell: rpm -e epel-release
ignore_errors: true
become: true

View File

@ -89,6 +89,7 @@ PreCacheChain "PreCache"
ProcessMatch "ceilometer-alarm-notifier" "python.+ceilometer-alarm-notifier"
ProcessMatch "ceilometer-api" "python.+ceilometer-api"
ProcessMatch "ceilometer-collector" "python.+ceilometer-collector"
ProcessMatch "ceilometer-polling" "python.+ceilometer-polling"
# Ceph
ProcessMatch "ceph-mon" "^/usr/bin/ceph-mon"

View File

@ -102,6 +102,7 @@ PreCacheChain "PreCache"
ProcessMatch "ceilometer-alarm-notifier" "python.+ceilometer-alarm-notifier"
ProcessMatch "ceilometer-api" "python.+ceilometer-api"
ProcessMatch "ceilometer-collector" "python.+ceilometer-collector"
ProcessMatch "ceilometer-polling" "python.+ceilometer-polling"
# Ceph
ProcessMatch "ceph-mon" "^/usr/bin/ceph-mon"

View File

@ -98,6 +98,7 @@ PreCacheChain "PreCache"
ProcessMatch "ceilometer-alarm-notifier" "python.+ceilometer-alarm-notifier"
ProcessMatch "ceilometer-api" "python.+ceilometer-api"
ProcessMatch "ceilometer-collector" "python.+ceilometer-collector"
ProcessMatch "ceilometer-polling" "python.+ceilometer-polling"
# Ceph
ProcessMatch "ceph-mon" "^/usr/bin/ceph-mon"

View File

@ -3,6 +3,12 @@
# Generate Openstack collectd to graphite dashboards
#
- name: Generate Individual Machine Dashboards
template:
src: "{{role_path}}/templates/{{item.template_name}}_general_system_performance.json.j2"
dest: "{{role_path}}/files/{{item.process_list_name}}_general_system_performance.json"
with_items: "{{dashboards}}"
- name: Generate All Openstack Nodes CPU/Memory/Disk/Network Dashboards
template:
src: "{{item}}.json.j2"
@ -13,7 +19,12 @@
- all_disk_graphs
- all_network_graphs
- name: Remove Existing Dashboards
- name: Remove Existing Individual Machine Dashboards
command: "curl -X DELETE -H 'Content-Type: application/json' http://{{grafana_username}}:{{grafana_password}}@{{grafana_host}}:{{grafana_port}}/api/dashboards/db/{{item.process_list_name|lower}}-general-system-performance"
when: overwrite_existing
with_items: "{{dashboards}}"
- name: Remove Existing All Openstack Nodes CPU/Memory/Disk/Network Dashboards
command: "curl -X DELETE -H 'Content-Type: application/json' http://{{grafana_username}}:{{grafana_password}}@{{grafana_host}}:{{grafana_port}}/api/dashboards/db/{{item}}"
when: overwrite_existing
with_items:
@ -21,9 +32,12 @@
- "{{dashboard_cloud_name}}-all-nodes-memory"
- "{{dashboard_cloud_name}}-all-nodes-disk"
- "{{dashboard_cloud_name}}-all-nodes-network"
- openstack-general-system-performance
- cloud-system-performance-comparsion
- name: Upload dashboards to Grafana
command: "curl -X POST -H 'Content-Type: application/json' -d @{{role_path}}/files/{{item.process_list_name}}_general_system_performance.json http://{{grafana_username}}:{{grafana_password}}@{{grafana_host}}:{{grafana_port}}/api/dashboards/db"
with_items: "{{dashboards}}"
- name: Upload Dashboards to Grafana
command: "curl -X POST -H 'Content-Type: application/json' -d @{{item}} http://{{grafana_username}}:{{grafana_password}}@{{grafana_host}}:{{grafana_port}}/api/dashboards/db"
with_items:
@ -31,13 +45,16 @@
- "{{role_path}}/files/all_memory_graphs.json"
- "{{role_path}}/files/all_disk_graphs.json"
- "{{role_path}}/files/all_network_graphs.json"
- "{{role_path}}/files/openstack_general_system_performance.json"
- "{{role_path}}/files/cloud_system_performance_comparsion.json"
- name: Remove leftover json file(s)
file: path={{item}} state=absent
- name: Remove leftover json file(s) from Individual Machine Dashboards
file: path={{role_path}}/files/{{item.process_list_name}}_general_system_performance.json state=absent
with_items: "{{dashboards}}"
- name: Remove leftover json file(s) from All Openstack Nodes CPU/Memory/Disk/Network Dashboards
file: path={{role_path}}/files/{{item}} state=absent
with_items:
- "{{role_path}}/files/all_cpu_graphs.json"
- "{{role_path}}/files/all_memory_graphs.json"
- "{{role_path}}/files/all_disk_graphs.json"
- "{{role_path}}/files/all_network_graphs.json"
- all_cpu_graphs.json
- all_memory_graphs.json
- all_disk_graphs.json
- all_network_graphs.json

View File

@ -0,0 +1,343 @@
---
#
# Vars to generate Per-Process component of Openstack Dashboards
#
per_process_metrics:
- name: "Process/Thread Counts"
y1units: "short"
metrics:
- name: "Processes"
query: ".ps_count.processes"
- name: "Threads"
query: ".ps_count.threads"
nullPointMode: "connected"
- name: "Process CPU"
y1units: "percent"
metrics:
- name: "System"
query: ".ps_cputime.syst"
- name: "User"
query: ".ps_cputime.user"
nullPointMode: "connected"
- name: "Process Memory"
y1units: "bits"
metrics:
- name: "RSS"
query: ".ps_rss"
- name: "Virtual"
query: ".ps_vm"
nullPointMode: "connected"
- name: "Process Page Faults"
y1units: "short"
metrics:
- name: "Majflt"
query: ".ps_pagefaults.majflt"
- name: "Minflt"
query: ".ps_pagefaults.minflt"
nullPointMode: "connected"
- name: "Process IOPs(Estimated via SYSCALLS)"
y1units: "iops"
metrics:
- name: "Read"
query: ".ps_disk_ops.read"
- name: "Write"
query: ".ps_disk_ops.write"
nullPointMode: "null"
- name: "Process IO Throughput(Estimated via SYSCALLS)"
y1units: "bytes"
metrics:
- name: "Read"
query: ".ps_disk_octets.read"
- name: "Write"
query: ".ps_disk_octets.write"
nullPointMode: "null"
per_process_panels:
#
# This dashboard should only contain Openstack Undercloud Node processes
#
Openstack-Undercloud:
- name: "Everything Else"
processes:
- dnsmasq
- httpd
- memcached
- mysqld
- ovs-vswitchd
- ovsdb-server
- rabbitmq
- name: "Nova"
processes:
- nova-api
- nova-cert
- nova-conductor
- nova-scheduler
- name: "Neutron"
processes:
- neutron-server
- neutron-openvswitch-agent
- neutron-ns-metadata-proxy
- neutron-metadata-proxy
- neutron-l3-agent
- neutron-dhcp-agent
- name: "Keystone"
processes:
- keystone-all
- name: "Heat"
processes:
- heat-api
- heat-api-cfn
- heat-api-cloudwatch
- heat-engine
- name: "Glance"
processes:
- glance-agent
- glance-registry
- name: "Ceilometer"
processes:
- ceilometer-api
- ceilometer-agent-central
- ceilometer-agent-notification
- ceilometer-alarm-evaluator
- ceilometer-alaram-notifier
- ceilometer-collector
- name: "Ironic"
processes:
- ironic-api
- ironic-conductor
- dnsmasq-ironic
- name: "Collectd"
processes:
- collectd
#
# This dashboard should only contain Openstack Controller Node processes
#
Openstack-Controller:
- name: "Everything Else"
processes:
- dnsmasq
- haproxy
- httpd
- memcached
- mongod
- mysqld
- ovs-vswitchd
- ovsdb-server
- rabbitmq
- redis-server
- name: "Nova"
processes:
- nova-api
- nova-cert
- nova-conductor
- nova-scheduler
- nova-consoleauth
- nova-novncproxy
- name: "Neutron"
processes:
- neutron-server
- neutron-openvswitch-agent
- neutron-ns-metadata-proxy
- neutron-metadata-proxy
- neutron-l3-agent
- neutron-dhcp-agent
- name: "Keystone"
processes:
- keystone-all
- name: "Cinder"
processes:
- cinder-api
- cinder-scheduler
- cinder-volume
- name: "Heat"
processes:
- heat-api
- heat-api-cfn
- heat-api-cloudwatch
- heat-engine
- name: "Glance"
processes:
- glance-agent
- glance-registry
- name: "Swift"
processes:
- swift-account-auditor
- swift-account-reaper
- swift-account-replicator
- swift-account-server
- swift-container-auditor
- swift-container-replicator
- swift-container-server
- swift-container-updater
- swift-object-auditor
- swift-object-replicator
- swift-object-server
- swift-object-updater
- swift-proxy-server
- name: "Ceilometer"
processes:
- ceilometer-api
- ceilometer-agent-central
- ceilometer-agent-notification
- ceilometer-alarm-evaluator
- ceilometer-alaram-notifier
- ceilometer-collector
- name: "Corosync/Pacemaker"
processes:
- corosync
- pacemakerd
- cib
- stonithd
- attrd
- pengine
- crmd
- lrmd
- pcsd
- name: "Collectd"
processes:
- collectd
#
# This dashboard should only contain Openstack Compute Node processes
#
Openstack-Compute:
- name: "Everything Else"
processes:
- ovs-vswitchd
- ovsdb-server
- qemu-kvm
- name: "Nova"
processes:
- nova-compute
- name: "Neutron"
processes:
- neutron-openvswitch-agent
- name: "Ceilometer"
processes:
- ceilometer-agent-compute
- ceilometer-polling
- name: "Collectd"
processes:
- collectd
#
# This dashboard should only contain Openstack Ceph Node processes
#
Openstack-Ceph:
- name: "Ceph"
processes:
- ceph-osd
- ceph-mon
- salt-minion
- diamond
- name: "Collectd"
processes:
- collectd
#
# This dashboard "Openstack" aims to be comprehensive with all processes across:
# Undercloud, Controller, Compute, Ceph, etc... Nodes
#
Openstack:
- name: "Everything Else"
processes:
- dnsmasq
- haproxy
- httpd
- memcached
- mongod
- mysqld
- ovs-vswitchd
- ovsdb-server
- qemu-kvm
- rabbitmq
- redis-server
- tuskar-api
- name: "Nova"
processes:
- nova-api
- nova-cert
- nova-conductor
- nova-scheduler
- nova-compute
- nova-consoleauth
- nova-novncproxy
- name: "Neutron"
processes:
- neutron-server
- neutron-openvswitch-agent
- neutron-ns-metadata-proxy
- neutron-metadata-proxy
- neutron-l3-agent
- neutron-dhcp-agent
- name: "Keystone"
processes:
- keystone-all
- name: "Cinder"
processes:
- cinder-api
- cinder-scheduler
- cinder-volume
- name: "Heat"
processes:
- heat-api
- heat-api-cfn
- heat-api-cloudwatch
- heat-engine
- name: "Glance"
processes:
- glance-agent
- glance-registry
- name: "Swift"
processes:
- swift-account-auditor
- swift-account-reaper
- swift-account-replicator
- swift-account-server
- swift-container-auditor
- swift-container-replicator
- swift-container-server
- swift-container-updater
- swift-object-auditor
- swift-object-replicator
- swift-object-server
- swift-object-updater
- swift-proxy-server
- name: "Ceilometer"
processes:
- ceilometer-api
- ceilometer-agent-central
- ceilometer-agent-notification
- ceilometer-alarm-evaluator
- ceilometer-alaram-notifier
- ceilometer-collector
- ceilometer-agent-compute
- ceilometer-polling
- name: "Ironic"
processes:
- ironic-api
- ironic-conductor
- dnsmasq-ironic
- name: "Ceph"
processes:
- ceph-osd
- ceph-mon
- salt-minion
- diamond
- name: "Corosync/Pacemaker"
processes:
- corosync
- pacemakerd
- cib
- stonithd
- attrd
- pengine
- crmd
- lrmd
- pcsd
- name: "Collectd"
processes:
- collectd