From 62801d8a931d649a8132346f0a92612bf548cf86 Mon Sep 17 00:00:00 2001
From: Ian Wienand <iwienand@redhat.com>
Date: Tue, 9 Feb 2021 10:39:23 +1100
Subject: [PATCH] borg-backup-server: volume space monitor

Due to backups running in append-only mode, we do not have a way to
safely automatically prune backups.  To reduce the likelyhood we
forget about backups and end up with failing jobs, add a cron job to
send a email to infra-root if the backup partition goes over 90%
usage.  At this point a manual prune should be run
(I9559bb8aeeef06b95fb9e172a2c5bfb5be5b480e).

Change-Id: I250d84c4a9f707e63fef6f70cfdcc1fb7807d3a7
---
 .../files/backup-volume-monitor.sh               | 12 ++++++++++++
 .../roles/borg-backup-server/tasks/main.yaml     | 16 ++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 playbooks/roles/borg-backup-server/files/backup-volume-monitor.sh

diff --git a/playbooks/roles/borg-backup-server/files/backup-volume-monitor.sh b/playbooks/roles/borg-backup-server/files/backup-volume-monitor.sh
new file mode 100644
index 0000000000..2c2e8fb767
--- /dev/null
+++ b/playbooks/roles/borg-backup-server/files/backup-volume-monitor.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+THRESHOLD=90
+
+df -PkH | grep '/opt/backups' | awk '{ print $5 " " $6 }' | while read output;
+do
+    usep=$(echo $output | awk '{ print $1}' | cut -d'%' -f1 )
+    partition=$(echo $output | awk '{print $2}' )
+    if [ $usep -ge $THRESHOLD ]; then
+        echo "Backup volume \"$partition ($usep%)\" on $(hostname) at $(date)" |
+            mail -s "ACTION REQUIRED: Backup volume usage at $usep%" infra-root@openstack.org
+    fi
+done
diff --git a/playbooks/roles/borg-backup-server/tasks/main.yaml b/playbooks/roles/borg-backup-server/tasks/main.yaml
index cc8c4ca370..fa38c961ce 100644
--- a/playbooks/roles/borg-backup-server/tasks/main.yaml
+++ b/playbooks/roles/borg-backup-server/tasks/main.yaml
@@ -15,6 +15,22 @@
     group: root
     mode: '0755'
 
+- name: Install backup volume monitor
+  copy:
+    src: 'backup-volume-monitor.sh'
+    dest: '/usr/local/bin/backup-volume-monitor'
+    owner: root
+    group: root
+    mode: '0755'
+
+- name: Run backup volume monitor
+  cron:
+    name: backup-volume-monitor
+    state: present
+    job: '/usr/local/bin/backup-volume-monitor'
+    minute: '0'
+    hour: '0'
+
 - name: Build all borg users from backup hosts
   set_fact:
     borg_users: '{{ borg_users }} + [ {{ hostvars[item]["borg_user"] }} ]'