From e748c1d5911ea899ce2b97ffcb2755940e52fbfd Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Mon, 20 Aug 2018 17:04:23 +0100
Subject: [PATCH] RabbitMQ upgrade fails due to stale /etc/hosts

This bug was previously fixed but the fix did not cover all cases. This
issue is still present if an IP address other than 127.0.0.1 or the
admin IP address (but not the internal API IP address, which is correct)
is in /etc/hosts. For example, in CI we often see 127.0.1.1.

This change removes all entries from /etc/hosts for the host's current
hostname, other than the internal API IP address. It also adds a call to
rabbitmqctl status to verify that the change has worked.

Related: We ought to improve the kolla ansible prechecks to catch the
case when there is more than one IP address mapping for the host's
current hostname.

Change-Id: I2cb9928e04005c6961f3de7c571c9a06361c4f23
Story: 2003496
Task: 24773
---
 ansible/overcloud-etc-hosts-fixup.yml | 73 ++++++++++++++-------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/ansible/overcloud-etc-hosts-fixup.yml b/ansible/overcloud-etc-hosts-fixup.yml
index 6c1546222..1d170383c 100644
--- a/ansible/overcloud-etc-hosts-fixup.yml
+++ b/ansible/overcloud-etc-hosts-fixup.yml
@@ -7,23 +7,23 @@
 # which it is listening. As a workaround, we remove the stale entries from
 # /etc/hosts.  See https://github.com/stackhpc/kayobe/issues/14.
 
-- name: Ensure overcloud hosts' /etc/hosts does not contain admin network IP
+- name: Ensure overcloud hosts' /etc/hosts does not contain incorrect IPs
   hosts: overcloud
   tags:
     - etc-hosts-fixup
   tasks:
-    - name: Ensure overcloud hosts' /etc/hosts does not contain admin network or loopback IPs
+    # Remove any entries from /etc/hosts that map the current hostname to an IP
+    # other than the host's IP on the internal API network.
+    - name: Ensure overcloud hosts' /etc/hosts does not contain incorrect IPs
       lineinfile:
         dest: /etc/hosts
-        regexp: "^{{ item }}[ \t]*{{ inventory_hostname }}"
+        regexp: "^(?!{{ internal_net_name | net_ip | regex_escape }})[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+[ \t]*{{ ansible_hostname }}"
         state: absent
-      with_items:
-        - "127.0.0.1"
-        - "{{ admin_oc_net_name | net_ip }}"
-      when: admin_oc_net_name | net_ip != None
+        # Ensure that the correct entry is present.
+        validate: "grep -E '^({{ internal_net_name | net_ip | regex_escape }}).*{{ ansible_hostname }}' %s"
       become: True
 
-- name: Ensure rabbitmq containers' /etc/hosts does not contain admin network or loopback IPs
+- name: Ensure rabbitmq containers' /etc/hosts does not contain incorrect IPs
   hosts: overcloud
   tags:
     - etc-hosts-fixup
@@ -32,30 +32,35 @@
       - rabbitmq
       - outward_rabbitmq
   tasks:
-    - block:
-        - name: Check whether rabbitmq container is running
-          command: docker inspect -f {{ '{{.Id}}' }} {{ item }}
-          changed_when: False
-          failed_when: False
-          with_items: "{{ rabbitmq_containers }}"
-          register: ps_result
+    - name: Check whether rabbitmq container is running
+      command: docker inspect -f {{ '{{.Id}}' }} {{ item }}
+      changed_when: False
+      failed_when: False
+      with_items: "{{ rabbitmq_containers }}"
+      register: ps_result
 
-        - name: Ensure rabbitmq containers' /etc/hosts does not contain admin network or loopback IPs
-          command: >
-            docker exec -u root {{ item.0.item }}
-            bash -c
-            'cp /etc/hosts /tmp/hosts &&
-             sed -i -e "/^{{ item.1 }}[ \t]*{{ inventory_hostname }}/d" /tmp/hosts &&
-             if ! diff -q /tmp/hosts /etc/hosts >/dev/null; then
-               cp /tmp/hosts /etc/hosts &&
-               echo changed
-             fi &&
-             rm /tmp/hosts'
-          changed_when: "'changed' in sed_result.stdout"
-          with_nested:
-            - "{{ ps_result.results }}"
-            - - "127.0.0.1"
-              - "{{ admin_oc_net_name | net_ip }}"
-          when: item.0.rc == 0
-          register: sed_result
-      when: admin_oc_net_name | net_ip != None
+    - name: Copy /etc/hosts into rabbitmq containers
+      command: docker cp /etc/hosts {{ item.item }}:/tmp/hosts
+      with_items: "{{ ps_result.results }}"
+      when: item.rc == 0
+      changed_when: false
+
+    - name: Ensure rabbitmq containers' /etc/hosts does not contain incorrect IPs
+      command: >
+        docker exec -u root {{ item.item }}
+        bash -c
+         'if ! diff -q /tmp/hosts /etc/hosts >/dev/null; then
+           cp /tmp/hosts /etc/hosts &&
+           echo changed
+         fi &&
+         rm /tmp/hosts'
+      changed_when: "'changed' in sed_result.stdout"
+      with_items: "{{ ps_result.results }}"
+      when: item.rc == 0
+      register: sed_result
+
+    - name: Check that RabbitMQ client works
+      command: docker exec {{ item.item }} rabbitmqctl status
+      with_items: "{{ ps_result.results }}"
+      when: item.rc == 0
+      changed_when: false