From 8060628beed36259bb9df53405035540b6daad4f Mon Sep 17 00:00:00 2001 From: Ben Cooper Date: Fri, 21 Jul 2017 06:50:25 +0100 Subject: [PATCH] Change wait_for_boot method for node availability The current wait_for_boot method uses a socket connection to port 22 to determine whether commands can be run via SSH on a given node. However this is not definitive as even if a connection to port 22 can be made, SSH may not be immediately available. A more robust approach is to repeatedly attempt a basic command until it executes successfully. Change-Id: Ib80e29f947a47e1b5349236a7e9db42b59c09b10 --- ramdisk_func_test/exception.py | 6 +++--- ramdisk_func_test/node.py | 16 +++++++++++++++- ramdisk_func_test/utils.py | 29 ----------------------------- 3 files changed, 18 insertions(+), 33 deletions(-) diff --git a/ramdisk_func_test/exception.py b/ramdisk_func_test/exception.py index a536773..2d4d48e 100644 --- a/ramdisk_func_test/exception.py +++ b/ramdisk_func_test/exception.py @@ -63,9 +63,9 @@ class TimeoutException(RamDiskTestException): _msg = "Timeout expired." -class NetServiceStartTimeout(TimeoutException): - _msg = ("Timeout %(timeout)ss for waiting for IP %(ip)s port %(port)s " - "to start expired.") +class NodeSSHTimeout(TimeoutException): + _msg = ("Waiting for ssh to become available on node %(node_name)s " + "exceeded timeout %(timeout)ss.") class NodeCallbackTimeout(TimeoutException): diff --git a/ramdisk_func_test/node.py b/ramdisk_func_test/node.py index 3fd9e26..8131ac3 100644 --- a/ramdisk_func_test/node.py +++ b/ramdisk_func_test/node.py @@ -136,7 +136,21 @@ class Node(base.LibvirtBase): def wait_for_boot(self): LOG.info("Waiting {0} node to boot".format( self.name)) - utils.wait_net_service(self.ip, 22, timeout=CONF.node_boot_timeout) + timeout = CONF.node_boot_timeout + end = time() + timeout + + while time() < end: + try: + self.run_cmd('ls') # dummy cmd to check connection + return + except(paramiko.ssh_exception.NoValidConnectionsError, + paramiko.ssh_exception.SSHException): + pass + + sleep(1) + + raise exception.NodeSSHTimeout(timeout=timeout, + node_name=self.name) def wait_for_callback(self): diff --git a/ramdisk_func_test/utils.py b/ramdisk_func_test/utils.py index b4f6b48..86d7d2b 100644 --- a/ramdisk_func_test/utils.py +++ b/ramdisk_func_test/utils.py @@ -18,15 +18,11 @@ import os import logging import shutil import random -import socket from subprocess import check_output -from time import time -from time import sleep from oslo_config import cfg from ramdisk_func_test import conf -from ramdisk_func_test import exception CONF = conf.CONF @@ -77,31 +73,6 @@ def get_random_mac(): return "52:54:00:%02x:%02x:%02x" % (rnd(), rnd(), rnd()) -def wait_net_service(ip, port, timeout, try_interval=2): - """Wait for network service to appear""" - LOG.info("Waiting for IP {0} port {1} to start".format(ip, port)) - s = socket.socket() - s.settimeout(try_interval) - end = time() + timeout - while time() < end: - try: - s.connect((ip, port)) - except socket.timeout: - # cannot connect after timeout - continue - except socket.error: - # cannot connect immediately (e.g. no route) - # wait timeout before next try - sleep(try_interval) - continue - else: - # success! - s.close() - return - - raise exception.NetServiceStartTimeout(timeout=timeout, ip=ip, port=port) - - class FakeGlobalSectionHead(object): def __init__(self, fp): self.fp = fp