Package buildbot :: Module buildslave
[frames] | no frames]

Source Code for Module buildbot.buildslave

   1  # This file is part of Buildbot.  Buildbot is free software: you can 
   2  # redistribute it and/or modify it under the terms of the GNU General Public 
   3  # License as published by the Free Software Foundation, version 2. 
   4  # 
   5  # This program is distributed in the hope that it will be useful, but WITHOUT 
   6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
   7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
   8  # details. 
   9  # 
  10  # You should have received a copy of the GNU General Public License along with 
  11  # this program; if not, write to the Free Software Foundation, Inc., 51 
  12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
  13  # 
  14  # Portions Copyright Buildbot Team Members 
  15  # Portions Copyright Canonical Ltd. 2009 
  16   
  17  import time 
  18  from email.Message import Message 
  19  from email.Utils import formatdate 
  20  from zope.interface import implements 
  21  from twisted.python import log, failure 
  22  from twisted.internet import defer, reactor 
  23  from twisted.application import service 
  24  from twisted.spread import pb 
  25  from twisted.python.reflect import namedModule 
  26   
  27  from buildbot.status.slave import SlaveStatus 
  28  from buildbot.status.mail import MailNotifier 
  29  from buildbot.process import metrics, botmaster 
  30  from buildbot.interfaces import IBuildSlave, ILatentBuildSlave 
  31  from buildbot.process.properties import Properties 
  32  from buildbot.locks import LockAccess 
  33  from buildbot.util import subscription 
  34  from buildbot import config 
35 36 -class AbstractBuildSlave(config.ReconfigurableServiceMixin, pb.Avatar, 37 service.MultiService):
38 """This is the master-side representative for a remote buildbot slave. 39 There is exactly one for each slave described in the config file (the 40 c['slaves'] list). When buildbots connect in (.attach), they get a 41 reference to this instance. The BotMaster object is stashed as the 42 .botmaster attribute. The BotMaster is also our '.parent' Service. 43 44 I represent a build slave -- a remote machine capable of 45 running builds. I am instantiated by the configuration file, and can be 46 subclassed to add extra functionality.""" 47 48 implements(IBuildSlave) 49 keepalive_timer = None 50 keepalive_interval = None 51 52 # reconfig slaves after builders 53 reconfig_priority = 64 54
55 - def __init__(self, name, password, max_builds=None, 56 notify_on_missing=[], missing_timeout=3600, 57 properties={}, locks=None, keepalive_interval=3600):
58 """ 59 @param name: botname this machine will supply when it connects 60 @param password: password this machine will supply when 61 it connects 62 @param max_builds: maximum number of simultaneous builds that will 63 be run concurrently on this buildslave (the 64 default is None for no limit) 65 @param properties: properties that will be applied to builds run on 66 this slave 67 @type properties: dictionary 68 @param locks: A list of locks that must be acquired before this slave 69 can be used 70 @type locks: dictionary 71 """ 72 service.MultiService.__init__(self) 73 self.slavename = name 74 self.password = password 75 76 # PB registration 77 self.registration = None 78 self.registered_port = None 79 80 # these are set when the service is started, and unset when it is 81 # stopped 82 self.botmaster = None 83 self.master = None 84 85 self.slave_status = SlaveStatus(name) 86 self.slave = None # a RemoteReference to the Bot, when connected 87 self.slave_commands = None 88 self.slavebuilders = {} 89 self.max_builds = max_builds 90 self.access = [] 91 if locks: 92 self.access = locks 93 self.lock_subscriptions = [] 94 95 self.properties = Properties() 96 self.properties.update(properties, "BuildSlave") 97 self.properties.setProperty("slavename", name, "BuildSlave") 98 99 self.lastMessageReceived = 0 100 if isinstance(notify_on_missing, str): 101 notify_on_missing = [notify_on_missing] 102 self.notify_on_missing = notify_on_missing 103 for i in notify_on_missing: 104 if not isinstance(i, str): 105 config.error( 106 'notify_on_missing arg %r is not a string' % (i,)) 107 self.missing_timeout = missing_timeout 108 self.missing_timer = None 109 self.keepalive_interval = keepalive_interval 110 111 self.detached_subs = None 112 113 self._old_builder_list = None
114
115 - def __repr__(self):
116 return "<%s %r>" % (self.__class__.__name__, self.slavename)
117
118 - def updateLocks(self):
119 """Convert the L{LockAccess} objects in C{self.locks} into real lock 120 objects, while also maintaining the subscriptions to lock releases.""" 121 # unsubscribe from any old locks 122 for s in self.lock_subscriptions: 123 s.unsubscribe() 124 125 # convert locks into their real form 126 locks = [] 127 for access in self.access: 128 if not isinstance(access, LockAccess): 129 access = access.defaultAccess() 130 lock = self.botmaster.getLockByID(access.lockid) 131 locks.append((lock, access)) 132 self.locks = [(l.getLock(self), la) for l, la in locks] 133 self.lock_subscriptions = [ l.subscribeToReleases(self._lockReleased) 134 for l, la in self.locks ]
135
136 - def locksAvailable(self):
137 """ 138 I am called to see if all the locks I depend on are available, 139 in which I return True, otherwise I return False 140 """ 141 if not self.locks: 142 return True 143 for lock, access in self.locks: 144 if not lock.isAvailable(self, access): 145 return False 146 return True
147
148 - def acquireLocks(self):
149 """ 150 I am called when a build is preparing to run. I try to claim all 151 the locks that are needed for a build to happen. If I can't, then 152 my caller should give up the build and try to get another slave 153 to look at it. 154 """ 155 log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) 156 if not self.locksAvailable(): 157 log.msg("slave %s can't lock, giving up" % (self, )) 158 return False 159 # all locks are available, claim them all 160 for lock, access in self.locks: 161 lock.claim(self, access) 162 return True
163
164 - def releaseLocks(self):
165 """ 166 I am called to release any locks after a build has finished 167 """ 168 log.msg("releaseLocks(%s): %s" % (self, self.locks)) 169 for lock, access in self.locks: 170 lock.release(self, access)
171
172 - def _lockReleased(self):
173 """One of the locks for this slave was released; try scheduling 174 builds.""" 175 if not self.botmaster: 176 return # oh well.. 177 self.botmaster.maybeStartBuildsForSlave(self.slavename)
178
179 - def setServiceParent(self, parent):
180 # botmaster needs to set before setServiceParent which calls startService 181 self.botmaster = parent 182 self.master = parent.master 183 service.MultiService.setServiceParent(self, parent)
184
185 - def startService(self):
186 self.updateLocks() 187 self.startMissingTimer() 188 return service.MultiService.startService(self)
189
190 - def reconfigService(self, new_config):
191 # Given a new BuildSlave, configure this one identically. Because 192 # BuildSlave objects are remotely referenced, we can't replace them 193 # without disconnecting the slave, yet there's no reason to do that. 194 new = self.findNewSlaveInstance(new_config) 195 196 assert self.slavename == new.slavename 197 198 # do we need to re-register? 199 if (not self.registration or 200 self.password != new.password or 201 new_config.slavePortnum != self.registered_port): 202 if self.registration: 203 self.registration.unregister() 204 self.password = new.password 205 self.registered_port = new_config.slavePortnum 206 self.registration = self.master.pbmanager.register( 207 self.registered_port, self.slavename, 208 self.password, self.getPerspective) 209 210 # adopt new instance's configuration parameters 211 self.max_builds = new.max_builds 212 self.access = new.access 213 self.notify_on_missing = new.notify_on_missing 214 self.keepalive_interval = new.keepalive_interval 215 216 if self.missing_timeout != new.missing_timeout: 217 running_missing_timer = self.missing_timer 218 self.stopMissingTimer() 219 self.missing_timeout = new.missing_timeout 220 if running_missing_timer: 221 self.startMissingTimer() 222 223 properties = Properties() 224 properties.updateFromProperties(new.properties) 225 self.properties = properties 226 227 self.updateLocks() 228 229 # update the attached slave's notion of which builders are attached. 230 # This assumes that the relevant builders have already been configured, 231 # which is why the reconfig_priority is set low in this class. 232 d = self.updateSlave() 233 234 # and chain up 235 d.addCallback(lambda _ : 236 config.ReconfigurableServiceMixin.reconfigService(self, 237 new_config)) 238 239 return d
240
241 - def stopService(self):
242 if self.registration: 243 self.registration.unregister() 244 self.stopMissingTimer() 245 return service.MultiService.stopService(self)
246
247 - def findNewSlaveInstance(self, new_config):
248 # TODO: called multiple times per reconfig; use 1-element cache? 249 for sl in new_config.slaves: 250 if sl.slavename == self.slavename: 251 return sl 252 assert 0, "no new slave named '%s'" % self.slavename
253
254 - def startMissingTimer(self):
255 if self.notify_on_missing and self.missing_timeout and self.parent: 256 self.stopMissingTimer() # in case it's already running 257 self.missing_timer = reactor.callLater(self.missing_timeout, 258 self._missing_timer_fired)
259
260 - def stopMissingTimer(self):
261 if self.missing_timer: 262 self.missing_timer.cancel() 263 self.missing_timer = None
264
265 - def getPerspective(self, mind, slavename):
266 assert slavename == self.slavename 267 metrics.MetricCountEvent.log("attached_slaves", 1) 268 269 # record when this connection attempt occurred 270 if self.slave_status: 271 self.slave_status.recordConnectTime() 272 273 # try to use TCP keepalives 274 try: 275 mind.broker.transport.setTcpKeepAlive(1) 276 except: 277 pass 278 279 if self.isConnected(): 280 # duplicate slave - send it to arbitration 281 arb = botmaster.DuplicateSlaveArbitrator(self) 282 return arb.getPerspective(mind, slavename) 283 else: 284 log.msg("slave '%s' attaching from %s" % (slavename, mind.broker.transport.getPeer())) 285 return self
286
287 - def doKeepalive(self):
288 self.keepalive_timer = reactor.callLater(self.keepalive_interval, 289 self.doKeepalive) 290 if not self.slave: 291 return 292 d = self.slave.callRemote("print", "Received keepalive from master") 293 d.addErrback(log.msg, "Keepalive failed for '%s'" % (self.slavename, ))
294
295 - def stopKeepaliveTimer(self):
296 if self.keepalive_timer: 297 self.keepalive_timer.cancel()
298
299 - def startKeepaliveTimer(self):
300 assert self.keepalive_interval 301 log.msg("Starting buildslave keepalive timer for '%s'" % \ 302 (self.slavename, )) 303 self.doKeepalive()
304
305 - def isConnected(self):
306 return self.slave
307
308 - def _missing_timer_fired(self):
309 self.missing_timer = None 310 # notify people, but only if we're still in the config 311 if not self.parent: 312 return 313 314 buildmaster = self.botmaster.master 315 status = buildmaster.getStatus() 316 text = "The Buildbot working for '%s'\n" % status.getTitle() 317 text += ("has noticed that the buildslave named %s went away\n" % 318 self.slavename) 319 text += "\n" 320 text += ("It last disconnected at %s (buildmaster-local time)\n" % 321 time.ctime(time.time() - self.missing_timeout)) # approx 322 text += "\n" 323 text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" 324 text += "was '%s'.\n" % self.slave_status.getAdmin() 325 text += "\n" 326 text += "Sincerely,\n" 327 text += " The Buildbot\n" 328 text += " %s\n" % status.getTitleURL() 329 subject = "Buildbot: buildslave %s was lost" % self.slavename 330 return self._mail_missing_message(subject, text)
331 332
333 - def updateSlave(self):
334 """Called to add or remove builders after the slave has connected. 335 336 @return: a Deferred that indicates when an attached slave has 337 accepted the new builders and/or released the old ones.""" 338 if self.slave: 339 return self.sendBuilderList() 340 else: 341 return defer.succeed(None)
342
343 - def updateSlaveStatus(self, buildStarted=None, buildFinished=None):
344 if buildStarted: 345 self.slave_status.buildStarted(buildStarted) 346 if buildFinished: 347 self.slave_status.buildFinished(buildFinished)
348 349 @metrics.countMethod('AbstractBuildSlave.attached()')
350 - def attached(self, bot):
351 """This is called when the slave connects. 352 353 @return: a Deferred that fires when the attachment is complete 354 """ 355 356 # the botmaster should ensure this. 357 assert not self.isConnected() 358 359 metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1) 360 361 # set up the subscription point for eventual detachment 362 self.detached_subs = subscription.SubscriptionPoint("detached") 363 364 # now we go through a sequence of calls, gathering information, then 365 # tell the Botmaster that it can finally give this slave to all the 366 # Builders that care about it. 367 368 # we accumulate slave information in this 'state' dictionary, then 369 # set it atomically if we make it far enough through the process 370 state = {} 371 372 # Reset graceful shutdown status 373 self.slave_status.setGraceful(False) 374 # We want to know when the graceful shutdown flag changes 375 self.slave_status.addGracefulWatcher(self._gracefulChanged) 376 377 d = defer.succeed(None) 378 def _log_attachment_on_slave(res): 379 d1 = bot.callRemote("print", "attached") 380 d1.addErrback(lambda why: None) 381 return d1
382 d.addCallback(_log_attachment_on_slave) 383 384 def _get_info(res): 385 d1 = bot.callRemote("getSlaveInfo") 386 def _got_info(info): 387 log.msg("Got slaveinfo from '%s'" % self.slavename) 388 # TODO: info{} might have other keys 389 state["admin"] = info.get("admin") 390 state["host"] = info.get("host") 391 state["access_uri"] = info.get("access_uri", None) 392 state["slave_environ"] = info.get("environ", {}) 393 state["slave_basedir"] = info.get("basedir", None) 394 state["slave_system"] = info.get("system", None)
395 def _info_unavailable(why): 396 why.trap(pb.NoSuchMethod) 397 # maybe an old slave, doesn't implement remote_getSlaveInfo 398 log.msg("BuildSlave.info_unavailable") 399 log.err(why) 400 d1.addCallbacks(_got_info, _info_unavailable) 401 return d1 402 d.addCallback(_get_info) 403 self.startKeepaliveTimer() 404 405 def _get_version(res): 406 d = bot.callRemote("getVersion") 407 def _got_version(version): 408 state["version"] = version 409 def _version_unavailable(why): 410 why.trap(pb.NoSuchMethod) 411 # probably an old slave 412 state["version"] = '(unknown)' 413 d.addCallbacks(_got_version, _version_unavailable) 414 return d 415 d.addCallback(_get_version) 416 417 def _get_commands(res): 418 d1 = bot.callRemote("getCommands") 419 def _got_commands(commands): 420 state["slave_commands"] = commands 421 def _commands_unavailable(why): 422 # probably an old slave 423 log.msg("BuildSlave._commands_unavailable") 424 if why.check(AttributeError): 425 return 426 log.err(why) 427 d1.addCallbacks(_got_commands, _commands_unavailable) 428 return d1 429 d.addCallback(_get_commands) 430 431 def _accept_slave(res): 432 self.slave_status.setAdmin(state.get("admin")) 433 self.slave_status.setHost(state.get("host")) 434 self.slave_status.setAccessURI(state.get("access_uri")) 435 self.slave_status.setVersion(state.get("version")) 436 self.slave_status.setConnected(True) 437 self.slave_commands = state.get("slave_commands") 438 self.slave_environ = state.get("slave_environ") 439 self.slave_basedir = state.get("slave_basedir") 440 self.slave_system = state.get("slave_system") 441 self.slave = bot 442 if self.slave_system == "win32": 443 self.path_module = namedModule("win32path") 444 else: 445 # most eveything accepts / as separator, so posix should be a 446 # reasonable fallback 447 self.path_module = namedModule("posixpath") 448 log.msg("bot attached") 449 self.messageReceivedFromSlave() 450 self.stopMissingTimer() 451 self.botmaster.master.status.slaveConnected(self.slavename) 452 453 return self.updateSlave() 454 d.addCallback(_accept_slave) 455 d.addCallback(lambda _: 456 self.botmaster.maybeStartBuildsForSlave(self.slavename)) 457 458 # Finally, the slave gets a reference to this BuildSlave. They 459 # receive this later, after we've started using them. 460 d.addCallback(lambda _: self) 461 return d 462
463 - def messageReceivedFromSlave(self):
464 now = time.time() 465 self.lastMessageReceived = now 466 self.slave_status.setLastMessageReceived(now)
467
468 - def detached(self, mind):
469 metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", -1) 470 self.slave = None 471 self._old_builder_list = [] 472 self.slave_status.removeGracefulWatcher(self._gracefulChanged) 473 self.slave_status.setConnected(False) 474 log.msg("BuildSlave.detached(%s)" % self.slavename) 475 self.botmaster.master.status.slaveDisconnected(self.slavename) 476 self.stopKeepaliveTimer() 477 self.releaseLocks() 478 479 # notify watchers, but do so in the next reactor iteration so that 480 # any further detached() action by subclasses happens first 481 def notif(): 482 subs = self.detached_subs 483 self.detached_subs = None 484 subs.deliver()
485 reactor.callLater(0, notif) 486
487 - def subscribeToDetach(self, callback):
488 """ 489 Request that C{callable} be invoked with no arguments when the 490 L{detached} method is invoked. 491 492 @returns: L{Subscription} 493 """ 494 assert self.detached_subs, "detached_subs is only set if attached" 495 return self.detached_subs.subscribe(callback)
496
497 - def disconnect(self):
498 """Forcibly disconnect the slave. 499 500 This severs the TCP connection and returns a Deferred that will fire 501 (with None) when the connection is probably gone. 502 503 If the slave is still alive, they will probably try to reconnect 504 again in a moment. 505 506 This is called in two circumstances. The first is when a slave is 507 removed from the config file. In this case, when they try to 508 reconnect, they will be rejected as an unknown slave. The second is 509 when we wind up with two connections for the same slave, in which 510 case we disconnect the older connection. 511 """ 512 513 if not self.slave: 514 return defer.succeed(None) 515 log.msg("disconnecting old slave %s now" % self.slavename) 516 # When this Deferred fires, we'll be ready to accept the new slave 517 return self._disconnect(self.slave)
518
519 - def _disconnect(self, slave):
520 # all kinds of teardown will happen as a result of 521 # loseConnection(), but it happens after a reactor iteration or 522 # two. Hook the actual disconnect so we can know when it is safe 523 # to connect the new slave. We have to wait one additional 524 # iteration (with callLater(0)) to make sure the *other* 525 # notifyOnDisconnect handlers have had a chance to run. 526 d = defer.Deferred() 527 528 # notifyOnDisconnect runs the callback with one argument, the 529 # RemoteReference being disconnected. 530 def _disconnected(rref): 531 reactor.callLater(0, d.callback, None)
532 slave.notifyOnDisconnect(_disconnected) 533 tport = slave.broker.transport 534 # this is the polite way to request that a socket be closed 535 tport.loseConnection() 536 try: 537 # but really we don't want to wait for the transmit queue to 538 # drain. The remote end is unlikely to ACK the data, so we'd 539 # probably have to wait for a (20-minute) TCP timeout. 540 #tport._closeSocket() 541 # however, doing _closeSocket (whether before or after 542 # loseConnection) somehow prevents the notifyOnDisconnect 543 # handlers from being run. Bummer. 544 tport.offset = 0 545 tport.dataBuffer = "" 546 except: 547 # however, these hacks are pretty internal, so don't blow up if 548 # they fail or are unavailable 549 log.msg("failed to accelerate the shutdown process") 550 log.msg("waiting for slave to finish disconnecting") 551 552 return d 553
554 - def sendBuilderList(self):
555 our_builders = self.botmaster.getBuildersForSlave(self.slavename) 556 blist = [(b.name, b.config.slavebuilddir) for b in our_builders] 557 if blist == self._old_builder_list: 558 return defer.succeed(None) 559 560 d = self.slave.callRemote("setBuilderList", blist) 561 def sentBuilderList(ign): 562 self._old_builder_list = blist 563 return ign
564 d.addCallback(sentBuilderList) 565 return d 566
567 - def perspective_keepalive(self):
568 self.messageReceivedFromSlave()
569
570 - def perspective_shutdown(self):
571 log.msg("slave %s wants to shut down" % self.slavename) 572 self.slave_status.setGraceful(True)
573
574 - def addSlaveBuilder(self, sb):
575 self.slavebuilders[sb.builder_name] = sb
576
577 - def removeSlaveBuilder(self, sb):
578 try: 579 del self.slavebuilders[sb.builder_name] 580 except KeyError: 581 pass
582
583 - def buildFinished(self, sb):
584 """This is called when a build on this slave is finished.""" 585 self.botmaster.maybeStartBuildsForSlave(self.slavename)
586
587 - def canStartBuild(self):
588 """ 589 I am called when a build is requested to see if this buildslave 590 can start a build. This function can be used to limit overall 591 concurrency on the buildslave. 592 593 Note for subclassers: if a slave can become willing to start a build 594 without any action on that slave (for example, by a resource in use on 595 another slave becoming available), then you must arrange for 596 L{maybeStartBuildsForSlave} to be called at that time, or builds on 597 this slave will not start. 598 """ 599 # If we're waiting to shutdown gracefully, then we shouldn't 600 # accept any new jobs. 601 if self.slave_status.getGraceful(): 602 return False 603 604 if self.max_builds: 605 active_builders = [sb for sb in self.slavebuilders.values() 606 if sb.isBusy()] 607 if len(active_builders) >= self.max_builds: 608 return False 609 610 if not self.locksAvailable(): 611 return False 612 613 return True
614
615 - def _mail_missing_message(self, subject, text):
616 # first, see if we have a MailNotifier we can use. This gives us a 617 # fromaddr and a relayhost. 618 buildmaster = self.botmaster.master 619 for st in buildmaster.status: 620 if isinstance(st, MailNotifier): 621 break 622 else: 623 # if not, they get a default MailNotifier, which always uses SMTP 624 # to localhost and uses a dummy fromaddr of "buildbot". 625 log.msg("buildslave-missing msg using default MailNotifier") 626 st = MailNotifier("buildbot") 627 # now construct the mail 628 629 m = Message() 630 m.set_payload(text) 631 m['Date'] = formatdate(localtime=True) 632 m['Subject'] = subject 633 m['From'] = st.fromaddr 634 recipients = self.notify_on_missing 635 m['To'] = ", ".join(recipients) 636 d = st.sendMessage(m, recipients) 637 # return the Deferred for testing purposes 638 return d
639
640 - def _gracefulChanged(self, graceful):
641 """This is called when our graceful shutdown setting changes""" 642 self.maybeShutdown()
643 644 @defer.inlineCallbacks
645 - def shutdown(self):
646 """Shutdown the slave""" 647 if not self.slave: 648 log.msg("no remote; slave is already shut down") 649 return 650 651 # First, try the "new" way - calling our own remote's shutdown 652 # method. The method was only added in 0.8.3, so ignore NoSuchMethod 653 # failures. 654 def new_way(): 655 d = self.slave.callRemote('shutdown') 656 d.addCallback(lambda _ : True) # successful shutdown request 657 def check_nsm(f): 658 f.trap(pb.NoSuchMethod) 659 return False # fall through to the old way
660 d.addErrback(check_nsm) 661 def check_connlost(f): 662 f.trap(pb.PBConnectionLost) 663 return True # the slave is gone, so call it finished 664 d.addErrback(check_connlost) 665 return d 666 667 if (yield new_way()): 668 return # done! 669 670 # Now, the old way. Look for a builder with a remote reference to the 671 # client side slave. If we can find one, then call "shutdown" on the 672 # remote builder, which will cause the slave buildbot process to exit. 673 def old_way(): 674 d = None 675 for b in self.slavebuilders.values(): 676 if b.remote: 677 d = b.remote.callRemote("shutdown") 678 break 679 680 if d: 681 log.msg("Shutting down (old) slave: %s" % self.slavename) 682 # The remote shutdown call will not complete successfully since the 683 # buildbot process exits almost immediately after getting the 684 # shutdown request. 685 # Here we look at the reason why the remote call failed, and if 686 # it's because the connection was lost, that means the slave 687 # shutdown as expected. 688 def _errback(why): 689 if why.check(pb.PBConnectionLost): 690 log.msg("Lost connection to %s" % self.slavename) 691 else: 692 log.err("Unexpected error when trying to shutdown %s" % self.slavename) 693 d.addErrback(_errback) 694 return d 695 log.err("Couldn't find remote builder to shut down slave") 696 return defer.succeed(None) 697 yield old_way() 698
699 - def maybeShutdown(self):
700 """Shut down this slave if it has been asked to shut down gracefully, 701 and has no active builders.""" 702 if not self.slave_status.getGraceful(): 703 return 704 active_builders = [sb for sb in self.slavebuilders.values() 705 if sb.isBusy()] 706 if active_builders: 707 return 708 d = self.shutdown() 709 d.addErrback(log.err, 'error while shutting down slave')
710
711 -class BuildSlave(AbstractBuildSlave):
712
713 - def sendBuilderList(self):
714 d = AbstractBuildSlave.sendBuilderList(self) 715 def _sent(slist): 716 # Nothing has changed, so don't need to re-attach to everything 717 if not slist: 718 return 719 dl = [] 720 for name, remote in slist.items(): 721 # use get() since we might have changed our mind since then 722 b = self.botmaster.builders.get(name) 723 if b: 724 d1 = b.attached(self, remote, self.slave_commands) 725 dl.append(d1) 726 return defer.DeferredList(dl)
727 def _set_failed(why): 728 log.msg("BuildSlave.sendBuilderList (%s) failed" % self) 729 log.err(why)
730 # TODO: hang up on them?, without setBuilderList we can't use 731 # them 732 d.addCallbacks(_sent, _set_failed) 733 return d 734
735 - def detached(self, mind):
736 AbstractBuildSlave.detached(self, mind) 737 self.botmaster.slaveLost(self) 738 self.startMissingTimer()
739
740 - def buildFinished(self, sb):
741 """This is called when a build on this slave is finished.""" 742 AbstractBuildSlave.buildFinished(self, sb) 743 744 # If we're gracefully shutting down, and we have no more active 745 # builders, then it's safe to disconnect 746 self.maybeShutdown()
747
748 -class AbstractLatentBuildSlave(AbstractBuildSlave):
749 """A build slave that will start up a slave instance when needed. 750 751 To use, subclass and implement start_instance and stop_instance. 752 753 See ec2buildslave.py for a concrete example. Also see the stub example in 754 test/test_slaves.py. 755 """ 756 757 implements(ILatentBuildSlave) 758 759 substantiated = False 760 substantiation_deferred = None 761 substantiation_build = None 762 insubstantiating = False 763 build_wait_timer = None 764 _shutdown_callback_handle = None 765
766 - def __init__(self, name, password, max_builds=None, 767 notify_on_missing=[], missing_timeout=60*20, 768 build_wait_timeout=60*10, 769 properties={}, locks=None):
770 AbstractBuildSlave.__init__( 771 self, name, password, max_builds, notify_on_missing, 772 missing_timeout, properties, locks) 773 self.building = set() 774 self.build_wait_timeout = build_wait_timeout
775
776 - def start_instance(self, build):
777 # responsible for starting instance that will try to connect with this 778 # master. Should return deferred with either True (instance started) 779 # or False (instance not started, so don't run a build here). Problems 780 # should use an errback. 781 raise NotImplementedError
782
783 - def stop_instance(self, fast=False):
784 # responsible for shutting down instance. 785 raise NotImplementedError
786
787 - def substantiate(self, sb, build):
788 if self.substantiated: 789 self._clearBuildWaitTimer() 790 self._setBuildWaitTimer() 791 return defer.succeed(True) 792 if self.substantiation_deferred is None: 793 if self.parent and not self.missing_timer: 794 # start timer. if timer times out, fail deferred 795 self.missing_timer = reactor.callLater( 796 self.missing_timeout, 797 self._substantiation_failed, defer.TimeoutError()) 798 self.substantiation_deferred = defer.Deferred() 799 self.substantiation_build = build 800 if self.slave is None: 801 d = self._substantiate(build) # start up instance 802 d.addErrback(log.err, "while substantiating") 803 # else: we're waiting for an old one to detach. the _substantiate 804 # will be done in ``detached`` below. 805 return self.substantiation_deferred
806
807 - def _substantiate(self, build):
808 # register event trigger 809 d = self.start_instance(build) 810 self._shutdown_callback_handle = reactor.addSystemEventTrigger( 811 'before', 'shutdown', self._soft_disconnect, fast=True) 812 def start_instance_result(result): 813 # If we don't report success, then preparation failed. 814 if not result: 815 log.msg("Slave '%s' doesn not want to substantiate at this time" % (self.slavename,)) 816 d = self.substantiation_deferred 817 self.substantiation_deferred = None 818 d.callback(False) 819 return result
820 def clean_up(failure): 821 if self.missing_timer is not None: 822 self.missing_timer.cancel() 823 self._substantiation_failed(failure) 824 if self._shutdown_callback_handle is not None: 825 handle = self._shutdown_callback_handle 826 del self._shutdown_callback_handle 827 reactor.removeSystemEventTrigger(handle) 828 return failure
829 d.addCallbacks(start_instance_result, clean_up) 830 return d 831
832 - def attached(self, bot):
833 if self.substantiation_deferred is None and self.build_wait_timeout >= 0: 834 msg = 'Slave %s received connection while not trying to ' \ 835 'substantiate. Disconnecting.' % (self.slavename,) 836 log.msg(msg) 837 self._disconnect(bot) 838 return defer.fail(RuntimeError(msg)) 839 return AbstractBuildSlave.attached(self, bot)
840
841 - def detached(self, mind):
842 AbstractBuildSlave.detached(self, mind) 843 if self.substantiation_deferred is not None: 844 d = self._substantiate(self.substantiation_build) 845 d.addErrback(log.err, 'while re-substantiating')
846
847 - def _substantiation_failed(self, failure):
848 self.missing_timer = None 849 if self.substantiation_deferred: 850 d = self.substantiation_deferred 851 self.substantiation_deferred = None 852 self.substantiation_build = None 853 d.errback(failure) 854 self.insubstantiate() 855 # notify people, but only if we're still in the config 856 if not self.parent or not self.notify_on_missing: 857 return 858 859 buildmaster = self.botmaster.master 860 status = buildmaster.getStatus() 861 text = "The Buildbot working for '%s'\n" % status.getTitle() 862 text += ("has noticed that the latent buildslave named %s \n" % 863 self.slavename) 864 text += "never substantiated after a request\n" 865 text += "\n" 866 text += ("The request was made at %s (buildmaster-local time)\n" % 867 time.ctime(time.time() - self.missing_timeout)) # approx 868 text += "\n" 869 text += "Sincerely,\n" 870 text += " The Buildbot\n" 871 text += " %s\n" % status.getTitleURL() 872 subject = "Buildbot: buildslave %s never substantiated" % self.slavename 873 return self._mail_missing_message(subject, text)
874
875 - def canStartBuild(self):
876 if self.insubstantiating: 877 return False 878 return AbstractBuildSlave.canStartBuild(self)
879
880 - def buildStarted(self, sb):
881 assert self.substantiated 882 self._clearBuildWaitTimer() 883 self.building.add(sb.builder_name)
884
885 - def buildFinished(self, sb):
886 AbstractBuildSlave.buildFinished(self, sb) 887 888 self.building.remove(sb.builder_name) 889 if not self.building: 890 if self.build_wait_timeout == 0: 891 self.insubstantiate() 892 else: 893 self._setBuildWaitTimer()
894
895 - def _clearBuildWaitTimer(self):
896 if self.build_wait_timer is not None: 897 if self.build_wait_timer.active(): 898 self.build_wait_timer.cancel() 899 self.build_wait_timer = None
900
901 - def _setBuildWaitTimer(self):
902 self._clearBuildWaitTimer() 903 if self.build_wait_timeout < 0: 904 return 905 self.build_wait_timer = reactor.callLater( 906 self.build_wait_timeout, self._soft_disconnect)
907 908 @defer.inlineCallbacks
909 - def insubstantiate(self, fast=False):
910 self.insubstantiating = True 911 self._clearBuildWaitTimer() 912 d = self.stop_instance(fast) 913 if self._shutdown_callback_handle is not None: 914 handle = self._shutdown_callback_handle 915 del self._shutdown_callback_handle 916 reactor.removeSystemEventTrigger(handle) 917 self.substantiated = False 918 self.building.clear() # just to be sure 919 yield d 920 self.insubstantiating = False
921
922 - def _soft_disconnect(self, fast=False):
923 if not self.build_wait_timeout < 0: 924 return AbstractBuildSlave.disconnect(self) 925 926 d = AbstractBuildSlave.disconnect(self) 927 if self.slave is not None: 928 # this could be called when the slave needs to shut down, such as 929 # in BotMaster.removeSlave, *or* when a new slave requests a 930 # connection when we already have a slave. It's not clear what to 931 # do in the second case: this shouldn't happen, and if it 932 # does...if it's a latent slave, shutting down will probably kill 933 # something we want...but we can't know what the status is. So, 934 # here, we just do what should be appropriate for the first case, 935 # and put our heads in the sand for the second, at least for now. 936 # The best solution to the odd situation is removing it as a 937 # possibilty: make the master in charge of connecting to the 938 # slave, rather than vice versa. TODO. 939 d = defer.DeferredList([d, self.insubstantiate(fast)]) 940 else: 941 if self.substantiation_deferred is not None: 942 # unlike the previous block, we don't expect this situation when 943 # ``attached`` calls ``disconnect``, only when we get a simple 944 # request to "go away". 945 d = self.substantiation_deferred 946 self.substantiation_deferred = None 947 self.substantiation_build = None 948 d.errback(failure.Failure( 949 RuntimeError("soft disconnect aborted substantiation"))) 950 if self.missing_timer: 951 self.missing_timer.cancel() 952 self.missing_timer = None 953 self.stop_instance() 954 return d
955
956 - def disconnect(self):
957 # This returns a Deferred but we don't use it 958 self._soft_disconnect() 959 # this removes the slave from all builders. It won't come back 960 # without a restart (or maybe a sighup) 961 self.botmaster.slaveLost(self)
962
963 - def stopService(self):
964 res = defer.maybeDeferred(AbstractBuildSlave.stopService, self) 965 if self.slave is not None: 966 d = self._soft_disconnect() 967 res = defer.DeferredList([res, d]) 968 return res
969
970 - def updateSlave(self):
971 """Called to add or remove builders after the slave has connected. 972 973 Also called after botmaster's builders are initially set. 974 975 @return: a Deferred that indicates when an attached slave has 976 accepted the new builders and/or released the old ones.""" 977 for b in self.botmaster.getBuildersForSlave(self.slavename): 978 if b.name not in self.slavebuilders: 979 b.addLatentSlave(self) 980 return AbstractBuildSlave.updateSlave(self)
981
982 - def sendBuilderList(self):
983 d = AbstractBuildSlave.sendBuilderList(self) 984 def _sent(slist): 985 if not slist: 986 return 987 dl = [] 988 for name, remote in slist.items(): 989 # use get() since we might have changed our mind since then. 990 # we're checking on the builder in addition to the 991 # slavebuilders out of a bit of paranoia. 992 b = self.botmaster.builders.get(name) 993 sb = self.slavebuilders.get(name) 994 if b and sb: 995 d1 = sb.attached(self, remote, self.slave_commands) 996 dl.append(d1) 997 return defer.DeferredList(dl)
998 def _set_failed(why): 999 log.msg("BuildSlave.sendBuilderList (%s) failed" % self) 1000 log.err(why) 1001 # TODO: hang up on them?, without setBuilderList we can't use 1002 # them 1003 if self.substantiation_deferred: 1004 d = self.substantiation_deferred 1005 self.substantiation_deferred = None 1006 self.substantiation_build = None 1007 d.errback(why) 1008 if self.missing_timer: 1009 self.missing_timer.cancel() 1010 self.missing_timer = None 1011 # TODO: maybe log? send an email? 1012 return why 1013 d.addCallbacks(_sent, _set_failed) 1014 def _substantiated(res): 1015 log.msg("Slave %s substantiated \o/" % self.slavename) 1016 self.substantiated = True 1017 if not self.substantiation_deferred: 1018 log.msg("No substantiation deferred for %s" % self.slavename) 1019 if self.substantiation_deferred: 1020 log.msg("Firing %s substantiation deferred with success" % self.slavename) 1021 d = self.substantiation_deferred 1022 self.substantiation_deferred = None 1023 self.substantiation_build = None 1024 d.callback(True) 1025 # note that the missing_timer is already handled within 1026 # ``attached`` 1027 if not self.building: 1028 self._setBuildWaitTimer() 1029 d.addCallback(_substantiated) 1030 return d 1031