Add the ability to ignore offline targets

If a jenkins is in shutdown mode or is offline, ignore that jenkins
for the purposes of launching nodes.  Node updates (used/complete)
for that jenkins will still be processed.

This should allow another jenkins to gracefully accept the increased
load if one goes offline.

Also, log the IP address when spinning up a node.

Change-Id: I3a8720dd5aaf154ca91cdc36136decad52eb6afa
This commit is contained in:
James E. Blair 2013-10-10 12:44:33 -07:00
parent d9ad4e91eb
commit b2367d88a6
4 changed files with 75 additions and 11 deletions

View File

@ -18,6 +18,7 @@ import uuid
import time
import threading
import novaclient
from jenkins import JenkinsException
class Dummy(object):
@ -108,8 +109,14 @@ class FakeSSHClient(object):
class FakeJenkins(object):
def __init__(self):
def __init__(self, user):
self._nodes = {}
self.quiet = False
self.down = False
if user == 'quiet':
self.quiet = True
if user == 'down':
self.down = True
def node_exists(self, name):
return name in self._nodes
@ -120,5 +127,29 @@ class FakeJenkins(object):
def delete_node(self, name):
del self._nodes[name]
def get_info(self):
if self.down:
raise JenkinsException("Jenkins is down")
d = {u'assignedLabels': [{}],
u'description': None,
u'jobs': [{u'color': u'red',
u'name': u'test-job',
u'url': u'https://jenkins.example.com/job/test-job/'}],
u'mode': u'NORMAL',
u'nodeDescription': u'the master Jenkins node',
u'nodeName': u'',
u'numExecutors': 1,
u'overallLoad': {},
u'primaryView': {u'name': u'Overview',
u'url': u'https://jenkins.example.com/'},
u'quietingDown': self.quiet,
u'slaveAgentPort': 8090,
u'unlabeledLoad': {},
u'useCrumbs': False,
u'useSecurity': True,
u'views': [
{u'name': u'test-view',
u'url': u'https://jenkins.example.com/view/test-view/'}]}
return d
FAKE_CLIENT = FakeClient()

View File

@ -80,6 +80,11 @@ class StartBuildTask(Task):
parameters=self.args['params'])
class GetInfoTask(Task):
def main(self, jenkins):
return jenkins.get_info()
class JenkinsManager(TaskManager):
log = logging.getLogger("nodepool.JenkinsManager")
@ -90,7 +95,7 @@ class JenkinsManager(TaskManager):
def _getClient(self):
if self.target.jenkins_apikey == 'fake':
return fakeprovider.FakeJenkins()
return fakeprovider.FakeJenkins(self.target.jenkins_user)
return myjenkins.Jenkins(self.target.jenkins_url,
self.target.jenkins_user,
self.target.jenkins_apikey)
@ -127,3 +132,6 @@ class JenkinsManager(TaskManager):
def startBuild(self, name, params):
self.submitTask(StartBuildTask(name=name, params=params))
def getInfo(self):
return self.submitTask(GetInfoTask())

View File

@ -293,7 +293,8 @@ class NodeLauncher(threading.Thread):
raise Exception("Unable to find public IP of server")
self.node.ip = ip
self.log.debug("Node id: %s is running, testing ssh" % self.node.id)
self.log.debug("Node id: %s is running, ip: %s, testing ssh" %
(ip, self.node.id))
connect_kwargs = dict(key_filename=self.image.private_key)
if not utils.ssh_connect(ip, self.image.username,
connect_kwargs=connect_kwargs,
@ -630,6 +631,7 @@ class NodePool(threading.Thread):
t.name = target['name']
newconfig.targets[t.name] = t
jenkins = target.get('jenkins')
t.online = True
if jenkins:
t.jenkins_url = jenkins['url']
t.jenkins_user = jenkins['user']
@ -711,6 +713,18 @@ class NodePool(threading.Thread):
for oldmanager in stop_managers:
oldmanager.stop()
for t in config.targets.values():
try:
info = config.jenkins_managers[t.name].getInfo()
if info['quietingDown']:
self.log.info("Target %s is offline" % t.name)
t.online = False
else:
t.online = True
except Exception:
self.log.exception("Unable to check status of %s" % t.name)
t.online = False
def reconfigureCrons(self, config):
cron_map = {
'image-update': self._doUpdateImages,
@ -809,22 +823,29 @@ class NodePool(threading.Thread):
# Make sure that the current demand includes at least the
# configured min_ready values
total_image_min_ready = {}
online_targets = set()
for target in self.config.targets.values():
if not target.online:
continue
online_targets.add(target.name)
for image in target.images.values():
min_ready = total_image_min_ready.get(image.name, 0)
min_ready += image.min_ready
total_image_min_ready[image.name] = min_ready
def count_nodes(image_name, state):
nodes = session.getNodes(image_name=image_name,
state=state)
return len([n for n in nodes
if n.target_name in online_targets])
# Actual need is demand - (ready + building)
for image_name in total_image_min_ready:
start_demand = image_demand.get(image_name, 0)
min_demand = max(start_demand, total_image_min_ready[image_name])
n_ready = len(session.getNodes(image_name=image_name,
state=nodedb.READY))
n_building = len(session.getNodes(image_name=image_name,
state=nodedb.BUILDING))
n_test = len(session.getNodes(image_name=image_name,
state=nodedb.TEST))
n_ready = count_nodes(image_name, nodedb.READY)
n_building = count_nodes(image_name, nodedb.BUILDING)
n_test = count_nodes(image_name, nodedb.TEST)
ready = n_ready + n_building + n_test
demand = max(min_demand - ready, 0)
image_demand[image_name] = demand
@ -851,6 +872,8 @@ class NodePool(threading.Thread):
allocation_requests = {}
# Set up the request values in the allocation system
for target in self.config.targets.values():
if not target.online:
continue
at = allocation.AllocationTarget(target.name)
for image in target.images.values():
ar = allocation_requests.get(image.name)
@ -915,6 +938,8 @@ class NodePool(threading.Thread):
self.checkForMissingImages(session)
nodes_to_launch = self.getNeededNodes(session)
for target in self.config.targets.values():
if not target.online:
continue
self.log.debug("Examining target: %s" % target.name)
for image in target.images.values():
for provider in image.providers.values():

View File

@ -31,9 +31,9 @@ import gear
class MyGearmanServer(gear.Server):
def handleStatus(self, request):
request.connection.conn.send(("fake_job\t%s\t0\t0\n" %
request.connection.conn.send(("build:fake_job\t%s\t0\t0\n" %
self._count).encode('utf8'))
request.connection.conn.send(("fake_job:nodepool-fake\t%s\t0\t0\n" %
request.connection.conn.send(("build:fake_job:devstack-precise\t%s\t0\t0\n" %
0).encode('utf8'))
request.connection.conn.send(b'.\n')