From 185797a0e5e46fd0f68f7b423e79f732c8541d68 Mon Sep 17 00:00:00 2001
From: Ian Wienand <iwienand@redhat.com>
Date: Fri, 26 Jun 2020 10:23:16 +1000
Subject: [PATCH] Graphite container deployment

This deploys graphite from the upstream container.

We override the statsd configuration to have it listen on ipv6.
Similarly we override the ngnix config to listen on ipv6, enable ssl,
forward port 80 to 443, block the /admin page (we don't use it).

For production we will just want to put some cinder storage in
/opt/graphite/storage on the production host and figure out how to
migrate the old stats.  The is also a bit of cleanup that will follow,
because we half-converted grafana01.opendev.org -- so everything can't
be in the same group till that is gone.

Testing has been added to push some stats and ensure they are seen.

Change-Id: Ie843b3d90a72564ef90805f820c8abc61a71017d
---
 .../service/group_vars/graphite_opendev.org   | 14 +++++
 inventory/service/groups.yaml                 | 13 ++++-
 .../host_vars/graphite02.opendev.org.yaml     |  4 ++
 playbooks/roles/graphite/README.rst           |  1 +
 playbooks/roles/graphite/handlers/main.yaml   |  4 ++
 playbooks/roles/graphite/tasks/main.yaml      | 50 ++++++++++++++++
 .../graphite/templates/docker-compose.yaml.j2 | 15 +++++
 .../templates/graphite-statsd.conf.j2         | 57 ++++++++++++++++++
 .../roles/graphite/templates/statsd.js.j2     |  9 +++
 .../test-fixtures/results.yaml                |  5 +-
 .../handlers/main.yaml                        |  3 +
 .../handlers/restart_graphite.yaml            | 12 ++++
 playbooks/service-graphite.yaml               |  6 ++
 testinfra/test_graphite.py                    | 58 +++++++++++++++++++
 zuul.d/infra-prod.yaml                        | 16 +++++
 zuul.d/project.yaml                           |  3 +
 zuul.d/system-config-run.yaml                 | 33 +++++++++++
 17 files changed, 299 insertions(+), 4 deletions(-)
 create mode 100644 inventory/service/group_vars/graphite_opendev.org
 create mode 100644 inventory/service/host_vars/graphite02.opendev.org.yaml
 create mode 100644 playbooks/roles/graphite/README.rst
 create mode 100644 playbooks/roles/graphite/handlers/main.yaml
 create mode 100644 playbooks/roles/graphite/tasks/main.yaml
 create mode 100644 playbooks/roles/graphite/templates/docker-compose.yaml.j2
 create mode 100644 playbooks/roles/graphite/templates/graphite-statsd.conf.j2
 create mode 100644 playbooks/roles/graphite/templates/statsd.js.j2
 create mode 100644 playbooks/roles/letsencrypt-create-certs/handlers/restart_graphite.yaml
 create mode 100644 playbooks/service-graphite.yaml
 create mode 100644 testinfra/test_graphite.py

diff --git a/inventory/service/group_vars/graphite_opendev.org b/inventory/service/group_vars/graphite_opendev.org
new file mode 100644
index 0000000000..ccbd309b14
--- /dev/null
+++ b/inventory/service/group_vars/graphite_opendev.org
@@ -0,0 +1,14 @@
+iptables_extra_allowed_hosts:
+  - hostname: bridge.openstack.org
+    port: 8125
+    protocol: udp
+  - hostname: opendev.org
+    port: 8125
+    protocol: udp
+
+iptables_extra_allowed_groups:
+  - {'protocol': 'udp', 'port': '8125', 'group': 'firehose'}
+  - {'protocol': 'udp', 'port': '8125', 'group': 'mirror-update'}
+  - {'protocol': 'udp', 'port': '8125', 'group': 'logstash'}
+  - {'protocol': 'udp', 'port': '8125', 'group': 'nodepool'}
+  - {'protocol': 'udp', 'port': '8125', 'group': 'zuul'}
diff --git a/inventory/service/groups.yaml b/inventory/service/groups.yaml
index 9aa74a05cb..eb80ce26c8 100644
--- a/inventory/service/groups.yaml
+++ b/inventory/service/groups.yaml
@@ -65,6 +65,10 @@ groups:
     - grafana[0-9]*.opendev.org
   graphite:
     - graphite*.open*.org
+  # NOTE(ianw) : to be cleaned up once the half-puppet
+  # graphite01.opendev.org is gone.
+  graphite_opendev:
+    - graphite02.opendev.org
   health:
     - health[0-9]*.openstack.org
   jvb:
@@ -74,7 +78,7 @@ groups:
   letsencrypt:
     - etherpad[0-9]*.opendev.org
     - gitea[0-9]*.opendev.org
-    - graphite01.opendev.org
+    - graphite[0-9]*.opendev.org
     - grafana[0-9]*.opendev.org
     - insecure-ci-registry[0-9]*.opendev.org
     - meetpad[0-9]*.opendev.org
@@ -130,7 +134,10 @@ groups:
     - ethercalc[0-9]*.open*.org
     - firehose[0-9]*.open*.org
     - grafana[0-9]*.open*.org
-    - graphite*.open*.org
+    # TODO(ianw) : this is a weird one we half-converted and moved
+    # into opendev.org in the early days of opendev.  remove when
+    # graphite02 up.
+    - graphite01.opendev.org
     - health[0-9]*.openstack.org
     - kdc[0-9]*.open*.org
     - lists*.katacontainers.io
@@ -167,7 +174,7 @@ groups:
     - ethercalc[0-9]*.open*.org
     - firehose[0-9]*.open*.org
     - grafana[0-9]*.open*.org
-    - graphite[0-9]*.open*.org
+    - graphite01.opendev.org
     - health[0-9]*.openstack.org
     - kdc[0-9]*.open*.org
     - lists*.katacontainers.io
diff --git a/inventory/service/host_vars/graphite02.opendev.org.yaml b/inventory/service/host_vars/graphite02.opendev.org.yaml
new file mode 100644
index 0000000000..939d0ca663
--- /dev/null
+++ b/inventory/service/host_vars/graphite02.opendev.org.yaml
@@ -0,0 +1,4 @@
+letsencrypt_certs:
+  graphite02-main:
+    - graphite02.opendev.org
+    - graphite.opendev.org
diff --git a/playbooks/roles/graphite/README.rst b/playbooks/roles/graphite/README.rst
new file mode 100644
index 0000000000..e045c696eb
--- /dev/null
+++ b/playbooks/roles/graphite/README.rst
@@ -0,0 +1 @@
+Run Graphite
diff --git a/playbooks/roles/graphite/handlers/main.yaml b/playbooks/roles/graphite/handlers/main.yaml
new file mode 100644
index 0000000000..08b750b530
--- /dev/null
+++ b/playbooks/roles/graphite/handlers/main.yaml
@@ -0,0 +1,4 @@
+- name: graphite Reload apache2
+  service:
+    name: apache2
+    state: reloaded
diff --git a/playbooks/roles/graphite/tasks/main.yaml b/playbooks/roles/graphite/tasks/main.yaml
new file mode 100644
index 0000000000..852f9a9b00
--- /dev/null
+++ b/playbooks/roles/graphite/tasks/main.yaml
@@ -0,0 +1,50 @@
+- name: Ensure docker-compose directory exists
+  file:
+    state: directory
+    path: /etc/graphite-docker
+
+- name: Write settings file
+  template:
+    src: docker-compose.yaml.j2
+    dest: /etc/graphite-docker/docker-compose.yaml
+
+- name: Write nginx override config
+  template:
+    src: graphite-statsd.conf.j2
+    dest: /etc/graphite-docker/graphite-statsd.conf
+
+- name: Write statsd override config
+  template:
+    src: statsd.js.j2
+    dest: /etc/graphite-docker/statsd.js
+
+- name: Ensure storage directory exists
+  file:
+    state: directory
+    path: /opt/graphite/storage
+
+- name: Ensure log directory exists
+  file:
+    state: directory
+    path: /var/log/graphite
+
+- name: Run docker-compose pull
+  shell:
+    cmd: docker-compose pull
+    chdir: /etc/graphite-docker/
+
+- name: Run docker-compose up
+  shell:
+    cmd: docker-compose up -d
+    chdir: /etc/graphite-docker/
+
+- name: Run docker prune to cleanup unneeded images
+  shell:
+    cmd: docker image prune -f
+
+# This is handy to have on the host for checking stat ingestion
+- name: Install netcat
+  package:
+    name:
+      - netcat
+    state: present
diff --git a/playbooks/roles/graphite/templates/docker-compose.yaml.j2 b/playbooks/roles/graphite/templates/docker-compose.yaml.j2
new file mode 100644
index 0000000000..02154cdfe0
--- /dev/null
+++ b/playbooks/roles/graphite/templates/docker-compose.yaml.j2
@@ -0,0 +1,15 @@
+# Version 2 is the latest that is supported by docker-compose in
+# Ubuntu Xenial.
+version: '2'
+
+services:
+  graphite:
+    restart: always
+    image: docker.io/graphiteapp/graphite-statsd
+    network_mode: host
+    volumes:
+      - /etc/graphite-docker/graphite-statsd.conf:/etc/nginx/sites-enabled/graphite-statsd.conf
+      - /etc/graphite-docker/statsd.js:/opt/statsd/config/udp.js
+      - /etc/letsencrypt-certs:/etc/letsencrypt-certs
+      - /opt/graphite/storage:/opt/graphite/storage
+      - /var/log/graphite:/var/log/
diff --git a/playbooks/roles/graphite/templates/graphite-statsd.conf.j2 b/playbooks/roles/graphite/templates/graphite-statsd.conf.j2
new file mode 100644
index 0000000000..ec7e73c7d4
--- /dev/null
+++ b/playbooks/roles/graphite/templates/graphite-statsd.conf.j2
@@ -0,0 +1,57 @@
+server {
+  listen 80 default_server;
+  listen [::]:80 default_server;
+  server_name _;
+
+  return 301 https://$host$request_uri;
+}
+
+server {
+  listen 443 ssl;
+  listen [::]:443 ssl;
+  server_name {{ inventory_hostname }};
+
+  ssl_certificate /etc/letsencrypt-certs/{{ inventory_hostname }}/{{ inventory_hostname }}.cer;
+  ssl_certificate_key /etc/letsencrypt-certs/{{ inventory_hostname }}/{{ inventory_hostname }}.key;
+  root /opt/graphite/static;
+  index index.html;
+
+  location /nginx_status {
+    stub_status on;
+    access_log   off;
+    allow 127.0.0.1;
+    deny all;
+  }
+
+  # No remote login
+  location /admin {
+    allow 127.0.0.1;
+    deny all;
+  }
+
+  location /media {
+    # django admin static files
+    alias /usr/local/lib/python3.6/dist-packages/django/contrib/admin/media/;
+  }
+
+  location /admin/auth/admin {
+    alias /usr/local/lib/python3.6/dist-packages/django/contrib/admin/static/admin;
+  }
+
+  location /admin/auth/user/admin {
+    alias /usr/local/lib/python3.6/dist-packages/django/contrib/admin/static/admin;
+  }
+
+  location / {
+    proxy_pass http://127.0.0.1:8080;
+    proxy_set_header  Host      $http_host;
+    proxy_set_header  X-Real-IP $remote_addr;
+    proxy_set_header  X-Forwarded-For $proxy_add_x_forwarded_for;
+
+    add_header 'Access-Control-Allow-Origin' '*';
+    add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS';
+    add_header 'Access-Control-Allow-Headers' 'Authorization, Content-Type';
+    add_header 'Access-Control-Allow-Credentials' 'true';
+  }
+
+}
diff --git a/playbooks/roles/graphite/templates/statsd.js.j2 b/playbooks/roles/graphite/templates/statsd.js.j2
new file mode 100644
index 0000000000..de9d7e85c3
--- /dev/null
+++ b/playbooks/roles/graphite/templates/statsd.js.j2
@@ -0,0 +1,9 @@
+{
+    "graphiteHost": "127.0.0.1",
+    "graphitePort": 2003,
+    "port": 8125,
+    "flushInterval": 10000,
+    "servers": [
+	{ server: "./servers/udp", address: "::", port: 8125, address_ipv6: true }
+    ]
+}
diff --git a/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml b/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml
index 91571a98ab..ad47003481 100644
--- a/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml
+++ b/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml
@@ -18,8 +18,11 @@ results:
     - puppet
     - puppet4
 
-  graphite.opendev.org:
+  graphite01.opendev.org:
+    - puppet
+    - puppet4
     - graphite
+    - letsencrypt
     - puppet
     - webservers
 
diff --git a/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml b/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml
index 813bf47198..2de2d6302c 100644
--- a/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml
+++ b/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml
@@ -12,6 +12,9 @@
 # https://docs.ansible.com/ansible/latest/porting_guides/porting_guide_2.8.html#imports-as-handlers
 
 - name: letsencrypt updated graphite01-main
+  include_tasks: roles/letsencrypt-create-certs/handlers/restart_graphite.yaml
+
+- name: letsencrypt updated graphite02-main
   include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml
 
 - name: letsencrypt updated tarballs-main
diff --git a/playbooks/roles/letsencrypt-create-certs/handlers/restart_graphite.yaml b/playbooks/roles/letsencrypt-create-certs/handlers/restart_graphite.yaml
new file mode 100644
index 0000000000..ae8c27402d
--- /dev/null
+++ b/playbooks/roles/letsencrypt-create-certs/handlers/restart_graphite.yaml
@@ -0,0 +1,12 @@
+- name: Check for running nginx
+  command: pgrep -f nginx
+  ignore_errors: yes
+  register: nginx_pids
+
+- name: Restart graphite container
+  when: nginx_pids.rc == 0
+  block:
+    - name: Restart nginx
+      shell:
+        cmd: docker-compose restart graphite
+        chdir: /etc/graphite-docker
diff --git a/playbooks/service-graphite.yaml b/playbooks/service-graphite.yaml
new file mode 100644
index 0000000000..e7c31e7c55
--- /dev/null
+++ b/playbooks/service-graphite.yaml
@@ -0,0 +1,6 @@
+- hosts: "graphite_opendev:!disabled"
+  name: "Base: configure graphite"
+  roles:
+    - iptables
+    - install-docker
+    - graphite
diff --git a/testinfra/test_graphite.py b/testinfra/test_graphite.py
new file mode 100644
index 0000000000..351187943b
--- /dev/null
+++ b/testinfra/test_graphite.py
@@ -0,0 +1,58 @@
+# Copyright 2020 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import json
+import logging
+import ssl
+import urllib.request
+
+testinfra_hosts = ['graphite02.opendev.org']
+
+
+def test_graphite_container_web_listening(host):
+    graphite_http = host.socket("tcp://127.0.0.1:80")
+    assert graphite_http.is_listening
+
+    graphite_https = host.socket("tcp://127.0.0.1:443")
+    assert graphite_https.is_listening
+
+def test_graphite(host):
+    cmd = host.run('curl --insecure '
+                   '--resolve graphite.opendev.org:443:127.0.0.1 '
+                   'https://graphite.opendev.org')
+    assert '<title>Graphite Browser</title>' in cmd.stdout
+
+def test_graphite_data(host):
+    # seed some data; send it over ipv6
+    cmd = ('timeout 20 bash -c '
+           '\'while true; do echo -n "example:$((RANDOM % 100))|c" '
+           '| nc -6 -w 1 -u localhost 8125; done\'')
+    host.run(cmd)
+
+    url='render?from=-10mins&until=now&target=stats.example&format=json'
+
+    # Assert we see some non-null values for this stat
+    # multi-node-hosts-file has setup graphite02.opendev.org to
+    # resolve from hosts.
+    found_value = False
+    with urllib.request.urlopen('https://graphite02.opendev.org/%s' % (url),
+                                context=ssl._create_unverified_context()) \
+                                as req:
+        data = json.loads(req.read().decode())
+        logging.debug('got: %s' % data)
+        datapoints = (data[0]['datapoints'])
+        for p in datapoints:
+            if p[0] != None:
+                found_value = True
+
+    assert found_value
diff --git a/zuul.d/infra-prod.yaml b/zuul.d/infra-prod.yaml
index 37538e6b23..d52bbe21c2 100644
--- a/zuul.d/infra-prod.yaml
+++ b/zuul.d/infra-prod.yaml
@@ -526,6 +526,22 @@
       - playbooks/roles/logrotate
       - playbooks/roles/iptables/
 
+- job:
+    name: infra-prod-service-graphite
+    parent: infra-prod-service-base
+    description: Run service-graphite.yaml playbook.
+    vars:
+      playbook_name: service-graphite.yaml
+    files:
+      - inventory/
+      - playbooks/service-graphite.yaml
+      - inventory/service/host_vars/graphite02.opendev.org.yaml
+      - inventory/service/group_vars/graphite
+      - playbooks/roles/install-docker/
+      - playbooks/roles/pip3/
+      - playbooks/roles/graphite/
+      - playbooks/roles/iptables/
+
 # Run AFS changes separately so we can make sure to only do one at a time
 # (turns out quorum is nice to have)
 - job:
diff --git a/zuul.d/project.yaml b/zuul.d/project.yaml
index f2e376a347..1ec9e233f8 100644
--- a/zuul.d/project.yaml
+++ b/zuul.d/project.yaml
@@ -48,6 +48,7 @@
               - name: opendev-buildset-registry
               - name: system-config-build-image-grafana
                 soft: true
+        - system-config-run-graphite
         - system-config-run-review:
             dependencies:
               - name: opendev-buildset-registry
@@ -132,6 +133,7 @@
               - name: opendev-buildset-registry
               - name: system-config-upload-image-grafana
                 soft: true
+        - system-config-run-graphite
         - system-config-run-review:
             dependencies:
               - name: opendev-buildset-registry
@@ -226,6 +228,7 @@
                 soft: true
               - name: system-config-promote-image-grafana
                 soft: true
+        - infra-prod-service-graphite
         - infra-prod-service-meetpad
         - infra-prod-service-mirror-update
         - infra-prod-service-mirror
diff --git a/zuul.d/system-config-run.yaml b/zuul.d/system-config-run.yaml
index af85b61dc5..98a25c9329 100644
--- a/zuul.d/system-config-run.yaml
+++ b/zuul.d/system-config-run.yaml
@@ -572,6 +572,39 @@
       - docker/grafana/
       - testinfra/test_grafana.py
 
+- job:
+    name: system-config-run-graphite
+    parent: system-config-run
+    description: |
+      Run the playbook for the graphite servers.
+    timeout: 3600
+    required-projects:
+      - opendev/system-config
+    nodeset:
+      nodes:
+        - name: bridge.openstack.org
+          label: ubuntu-bionic
+        # NOTE(ianw): 01 is a half-puppet opendev.org
+        # server
+        - name: graphite02.opendev.org
+          label: ubuntu-focal
+    vars:
+      run_playbooks:
+        - playbooks/letsencrypt.yaml
+        - playbooks/service-graphite.yaml
+    host-vars:
+      graphite02.opendev.org:
+        host_copy_output:
+          '/var/log/graphite': logs
+    files:
+      - playbooks/bridge.yaml
+      - playbooks/letsencrypt.yaml
+      - playbooks/service-graphite.yaml
+      - playbooks/roles/graphite
+      - playbooks/roles/install-docker/
+      - playbooks/roles/pip3/
+      - testinfra/test_graphite.py
+
 - job:
     name: system-config-run-meetpad
     parent: system-config-run-containers