From 74005bb29a9cae0dd272014cf986624b7e9490aa Mon Sep 17 00:00:00 2001
From: Ian Wienand <iwienand@redhat.com>
Date: Mon, 24 Feb 2020 11:56:43 +1100
Subject: [PATCH] static: add a periodic 404 checker

This is an alternative to Iccf24a72cf82592bae8c699f9f857aa54fc74f10
which removes the 404 scraping tool.  It creates a zuul user and
enables login via the system-config per-project ssh key, and then runs
the 404 scraping script against it periodically.

Change-Id: I30467d791a7877b5469b173926216615eb57d035
---
 .zuul.yaml                             |  9 +++++++
 playbooks/periodic/404.yaml            | 37 ++++++++++++++++++++++++++
 playbooks/roles/static/files/zuul.sudo |  1 +
 playbooks/roles/static/tasks/main.yaml |  3 +++
 playbooks/roles/static/tasks/zuul.yaml | 20 ++++++++++++++
 testinfra/test_static.py               |  8 ++++++
 6 files changed, 78 insertions(+)
 create mode 100644 playbooks/periodic/404.yaml
 create mode 100644 playbooks/roles/static/files/zuul.sudo
 create mode 100644 playbooks/roles/static/tasks/zuul.yaml

diff --git a/.zuul.yaml b/.zuul.yaml
index 330919209c..e4f4bff22a 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -1145,6 +1145,12 @@
     vars:
       playbook_name: zuul_reconfigure.yaml
 
+- job:
+    name: system-config-static-404-checker
+    description: |
+      Run 404 scraping script on static.opendev.org
+    run: playbooks/periodic/404.yaml
+
 - project:
     templates:
       - system-config-zuul-role-integration
@@ -1312,3 +1318,6 @@
         - system-config-promote-image-haproxy-statsd
         - system-config-promote-image-python-base
         - system-config-promote-image-python-builder
+    periodic:
+      jobs:
+        - system-config-static-404-checker
diff --git a/playbooks/periodic/404.yaml b/playbooks/periodic/404.yaml
new file mode 100644
index 0000000000..438621acf8
--- /dev/null
+++ b/playbooks/periodic/404.yaml
@@ -0,0 +1,37 @@
+- hosts: localhost
+  tasks:
+    - name: Add static.opendev.org to inventory
+      add_host:
+        name: static.opendev.org
+        ansible_connection: ssh
+        ansible_host: static.opendev.org
+        ansible_port: 22
+        ansible_user: zuul
+
+# NOTE(ianw): 2020-02-25 just for initial testing run this for one log
+# in a dumb way.  We can scrape a few more sites.  Overall, we expect
+# this to be replaced with a better analysis tool, see
+#   https://review.opendev.org/709236
+- hosts: static.opendev.org
+  tasks:
+    - name: Run 404 scraping script
+      become: yes
+      shell: |
+        SOURCE_FILE=/var/log/docs.openstack.org_access.log
+        INTERMEDIATE_FILE=$(mktemp)
+
+        # Get just the lines with 404s in them
+        grep ' 404 ' $SOURCE_FILE | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' > $INTERMEDIATE_FILE
+
+        if [ -f "$SOURCE_FILE.1" ] ; then
+        # We get roughly the last days worth of logs by looking at the last two
+        # log files.
+        grep ' 404 ' $SOURCE_FILE.1 | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' >> $INTERMEDIATE_FILE
+        fi
+
+        # Process those 404s to count them and return sorted by count
+        sort $INTERMEDIATE_FILE | uniq -c | sort -rn | grep '\(html\|\/$\)'
+
+        rm ${INTERMEDIATE_FILE}
+      args:
+        executable: /bin/bash
diff --git a/playbooks/roles/static/files/zuul.sudo b/playbooks/roles/static/files/zuul.sudo
new file mode 100644
index 0000000000..e6ad93aa15
--- /dev/null
+++ b/playbooks/roles/static/files/zuul.sudo
@@ -0,0 +1 @@
+zuul ALL=(ALL) NOPASSWD: ALL
diff --git a/playbooks/roles/static/tasks/main.yaml b/playbooks/roles/static/tasks/main.yaml
index 5263d312b9..a57504f91b 100644
--- a/playbooks/roles/static/tasks/main.yaml
+++ b/playbooks/roles/static/tasks/main.yaml
@@ -71,3 +71,6 @@
     - 50-tarballs.opendev.org
     - 50-tarballs.openstack.org
     - 50-zuul-ci.org
+
+- name: Install zuul user
+  include_tasks: zuul.yaml
\ No newline at end of file
diff --git a/playbooks/roles/static/tasks/zuul.yaml b/playbooks/roles/static/tasks/zuul.yaml
new file mode 100644
index 0000000000..beb2d5b780
--- /dev/null
+++ b/playbooks/roles/static/tasks/zuul.yaml
@@ -0,0 +1,20 @@
+- name: Create zuul user
+  user:
+    name: zuul
+    comment: User for running remote zuul jobs
+    shell: /bin/bash
+
+- name: Install sudo permissions for zuul
+  copy:
+    src: zuul.sudo
+    dest: '/etc/sudoers.d/zuul'
+    owner: root
+    group: root
+    mode: 0440
+
+- name: Install system-config per-project key for zuul
+  authorized_key:
+    user: zuul
+    state: present
+    key: |
+      ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDcXd/QJDEprSLh6N6bULnhchf9M+uzYBEJ2b51Au67FON+5M6VEj5Ut+DlkEPhabOP+tSv9Cn1HpmpBjdEOXdmBj6JS7G/gBb4w28oZDyNjrPT2ebpRw/XnVEkGfikR2J+j3o7CV+ybhLDalXm2TUDReVXnONUq3YzZbjRzoYs0xxrxyss47vZP0xFpsAt9jCMAJW2k6H589VUY38k9LFyhZUZ72FB6eJ68B9GN0TimBYm2DqvupBGQrRhkP8OZ0WoBV8PulKXaHVFdmfBNHB7E7FLlZKuiM6nkV4bOWMGOB/TF++wXBK86t9po3pWCM7+kr72xGRTE+6LuZ2z1K+h'
diff --git a/testinfra/test_static.py b/testinfra/test_static.py
index b3029bf59c..e4b896533f 100644
--- a/testinfra/test_static.py
+++ b/testinfra/test_static.py
@@ -21,6 +21,14 @@ def test_apache(host):
     apache = host.service('apache2')
     assert apache.is_running
 
+def test_zuul_user(host):
+    user = host.user('zuul')
+    assert user.exists
+
+    authorized_keys = host.file('%s/.ssh/authorized_keys' %
+                                user.home)
+    assert authorized_keys.exists
+
 def test_static_opendev_org(host):
     cmd = host.run('curl --insecure '
                    '--resolve static.opendev.org:443:127.0.0.1 '