Add the ability to ignore offline targets
If a jenkins is in shutdown mode or is offline, ignore that jenkins for the purposes of launching nodes. Node updates (used/complete) for that jenkins will still be processed. This should allow another jenkins to gracefully accept the increased load if one goes offline. Also, log the IP address when spinning up a node. Change-Id: I3a8720dd5aaf154ca91cdc36136decad52eb6afa
This commit is contained in:
parent
d9ad4e91eb
commit
b2367d88a6
@ -18,6 +18,7 @@ import uuid
|
||||
import time
|
||||
import threading
|
||||
import novaclient
|
||||
from jenkins import JenkinsException
|
||||
|
||||
|
||||
class Dummy(object):
|
||||
@ -108,8 +109,14 @@ class FakeSSHClient(object):
|
||||
|
||||
|
||||
class FakeJenkins(object):
|
||||
def __init__(self):
|
||||
def __init__(self, user):
|
||||
self._nodes = {}
|
||||
self.quiet = False
|
||||
self.down = False
|
||||
if user == 'quiet':
|
||||
self.quiet = True
|
||||
if user == 'down':
|
||||
self.down = True
|
||||
|
||||
def node_exists(self, name):
|
||||
return name in self._nodes
|
||||
@ -120,5 +127,29 @@ class FakeJenkins(object):
|
||||
def delete_node(self, name):
|
||||
del self._nodes[name]
|
||||
|
||||
def get_info(self):
|
||||
if self.down:
|
||||
raise JenkinsException("Jenkins is down")
|
||||
d = {u'assignedLabels': [{}],
|
||||
u'description': None,
|
||||
u'jobs': [{u'color': u'red',
|
||||
u'name': u'test-job',
|
||||
u'url': u'https://jenkins.example.com/job/test-job/'}],
|
||||
u'mode': u'NORMAL',
|
||||
u'nodeDescription': u'the master Jenkins node',
|
||||
u'nodeName': u'',
|
||||
u'numExecutors': 1,
|
||||
u'overallLoad': {},
|
||||
u'primaryView': {u'name': u'Overview',
|
||||
u'url': u'https://jenkins.example.com/'},
|
||||
u'quietingDown': self.quiet,
|
||||
u'slaveAgentPort': 8090,
|
||||
u'unlabeledLoad': {},
|
||||
u'useCrumbs': False,
|
||||
u'useSecurity': True,
|
||||
u'views': [
|
||||
{u'name': u'test-view',
|
||||
u'url': u'https://jenkins.example.com/view/test-view/'}]}
|
||||
return d
|
||||
|
||||
FAKE_CLIENT = FakeClient()
|
||||
|
@ -80,6 +80,11 @@ class StartBuildTask(Task):
|
||||
parameters=self.args['params'])
|
||||
|
||||
|
||||
class GetInfoTask(Task):
|
||||
def main(self, jenkins):
|
||||
return jenkins.get_info()
|
||||
|
||||
|
||||
class JenkinsManager(TaskManager):
|
||||
log = logging.getLogger("nodepool.JenkinsManager")
|
||||
|
||||
@ -90,7 +95,7 @@ class JenkinsManager(TaskManager):
|
||||
|
||||
def _getClient(self):
|
||||
if self.target.jenkins_apikey == 'fake':
|
||||
return fakeprovider.FakeJenkins()
|
||||
return fakeprovider.FakeJenkins(self.target.jenkins_user)
|
||||
return myjenkins.Jenkins(self.target.jenkins_url,
|
||||
self.target.jenkins_user,
|
||||
self.target.jenkins_apikey)
|
||||
@ -127,3 +132,6 @@ class JenkinsManager(TaskManager):
|
||||
|
||||
def startBuild(self, name, params):
|
||||
self.submitTask(StartBuildTask(name=name, params=params))
|
||||
|
||||
def getInfo(self):
|
||||
return self.submitTask(GetInfoTask())
|
||||
|
@ -293,7 +293,8 @@ class NodeLauncher(threading.Thread):
|
||||
raise Exception("Unable to find public IP of server")
|
||||
|
||||
self.node.ip = ip
|
||||
self.log.debug("Node id: %s is running, testing ssh" % self.node.id)
|
||||
self.log.debug("Node id: %s is running, ip: %s, testing ssh" %
|
||||
(ip, self.node.id))
|
||||
connect_kwargs = dict(key_filename=self.image.private_key)
|
||||
if not utils.ssh_connect(ip, self.image.username,
|
||||
connect_kwargs=connect_kwargs,
|
||||
@ -630,6 +631,7 @@ class NodePool(threading.Thread):
|
||||
t.name = target['name']
|
||||
newconfig.targets[t.name] = t
|
||||
jenkins = target.get('jenkins')
|
||||
t.online = True
|
||||
if jenkins:
|
||||
t.jenkins_url = jenkins['url']
|
||||
t.jenkins_user = jenkins['user']
|
||||
@ -711,6 +713,18 @@ class NodePool(threading.Thread):
|
||||
for oldmanager in stop_managers:
|
||||
oldmanager.stop()
|
||||
|
||||
for t in config.targets.values():
|
||||
try:
|
||||
info = config.jenkins_managers[t.name].getInfo()
|
||||
if info['quietingDown']:
|
||||
self.log.info("Target %s is offline" % t.name)
|
||||
t.online = False
|
||||
else:
|
||||
t.online = True
|
||||
except Exception:
|
||||
self.log.exception("Unable to check status of %s" % t.name)
|
||||
t.online = False
|
||||
|
||||
def reconfigureCrons(self, config):
|
||||
cron_map = {
|
||||
'image-update': self._doUpdateImages,
|
||||
@ -809,22 +823,29 @@ class NodePool(threading.Thread):
|
||||
# Make sure that the current demand includes at least the
|
||||
# configured min_ready values
|
||||
total_image_min_ready = {}
|
||||
online_targets = set()
|
||||
for target in self.config.targets.values():
|
||||
if not target.online:
|
||||
continue
|
||||
online_targets.add(target.name)
|
||||
for image in target.images.values():
|
||||
min_ready = total_image_min_ready.get(image.name, 0)
|
||||
min_ready += image.min_ready
|
||||
total_image_min_ready[image.name] = min_ready
|
||||
|
||||
def count_nodes(image_name, state):
|
||||
nodes = session.getNodes(image_name=image_name,
|
||||
state=state)
|
||||
return len([n for n in nodes
|
||||
if n.target_name in online_targets])
|
||||
|
||||
# Actual need is demand - (ready + building)
|
||||
for image_name in total_image_min_ready:
|
||||
start_demand = image_demand.get(image_name, 0)
|
||||
min_demand = max(start_demand, total_image_min_ready[image_name])
|
||||
n_ready = len(session.getNodes(image_name=image_name,
|
||||
state=nodedb.READY))
|
||||
n_building = len(session.getNodes(image_name=image_name,
|
||||
state=nodedb.BUILDING))
|
||||
n_test = len(session.getNodes(image_name=image_name,
|
||||
state=nodedb.TEST))
|
||||
n_ready = count_nodes(image_name, nodedb.READY)
|
||||
n_building = count_nodes(image_name, nodedb.BUILDING)
|
||||
n_test = count_nodes(image_name, nodedb.TEST)
|
||||
ready = n_ready + n_building + n_test
|
||||
demand = max(min_demand - ready, 0)
|
||||
image_demand[image_name] = demand
|
||||
@ -851,6 +872,8 @@ class NodePool(threading.Thread):
|
||||
allocation_requests = {}
|
||||
# Set up the request values in the allocation system
|
||||
for target in self.config.targets.values():
|
||||
if not target.online:
|
||||
continue
|
||||
at = allocation.AllocationTarget(target.name)
|
||||
for image in target.images.values():
|
||||
ar = allocation_requests.get(image.name)
|
||||
@ -915,6 +938,8 @@ class NodePool(threading.Thread):
|
||||
self.checkForMissingImages(session)
|
||||
nodes_to_launch = self.getNeededNodes(session)
|
||||
for target in self.config.targets.values():
|
||||
if not target.online:
|
||||
continue
|
||||
self.log.debug("Examining target: %s" % target.name)
|
||||
for image in target.images.values():
|
||||
for provider in image.providers.values():
|
||||
|
@ -31,9 +31,9 @@ import gear
|
||||
|
||||
class MyGearmanServer(gear.Server):
|
||||
def handleStatus(self, request):
|
||||
request.connection.conn.send(("fake_job\t%s\t0\t0\n" %
|
||||
request.connection.conn.send(("build:fake_job\t%s\t0\t0\n" %
|
||||
self._count).encode('utf8'))
|
||||
request.connection.conn.send(("fake_job:nodepool-fake\t%s\t0\t0\n" %
|
||||
request.connection.conn.send(("build:fake_job:devstack-precise\t%s\t0\t0\n" %
|
||||
0).encode('utf8'))
|
||||
request.connection.conn.send(b'.\n')
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user