Package buildbot :: Module buildslave
[frames] | no frames]

Source Code for Module buildbot.buildslave

   1  # This file is part of Buildbot.  Buildbot is free software: you can 
   2  # redistribute it and/or modify it under the terms of the GNU General Public 
   3  # License as published by the Free Software Foundation, version 2. 
   4  # 
   5  # This program is distributed in the hope that it will be useful, but WITHOUT 
   6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
   7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
   8  # details. 
   9  # 
  10  # You should have received a copy of the GNU General Public License along with 
  11  # this program; if not, write to the Free Software Foundation, Inc., 51 
  12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
  13  # 
  14  # Portions Copyright Buildbot Team Members 
  15  # Portions Copyright Canonical Ltd. 2009 
  16   
  17  import time 
  18  from email.Message import Message 
  19  from email.Utils import formatdate 
  20  from zope.interface import implements 
  21  from twisted.python import log, failure 
  22  from twisted.internet import defer, reactor 
  23  from twisted.application import service 
  24  from twisted.spread import pb 
  25  from twisted.python.reflect import namedModule 
  26   
  27  from buildbot.status.slave import SlaveStatus 
  28  from buildbot.status.mail import MailNotifier 
  29  from buildbot.process import metrics, botmaster 
  30  from buildbot.interfaces import IBuildSlave, ILatentBuildSlave 
  31  from buildbot.process.properties import Properties 
  32  from buildbot.locks import LockAccess 
  33  from buildbot.util import subscription 
  34  from buildbot import config 
35 36 -class AbstractBuildSlave(config.ReconfigurableServiceMixin, pb.Avatar, 37 service.MultiService):
38 """This is the master-side representative for a remote buildbot slave. 39 There is exactly one for each slave described in the config file (the 40 c['slaves'] list). When buildbots connect in (.attach), they get a 41 reference to this instance. The BotMaster object is stashed as the 42 .botmaster attribute. The BotMaster is also our '.parent' Service. 43 44 I represent a build slave -- a remote machine capable of 45 running builds. I am instantiated by the configuration file, and can be 46 subclassed to add extra functionality.""" 47 48 implements(IBuildSlave) 49 keepalive_timer = None 50 keepalive_interval = None 51 52 # reconfig slaves after builders 53 reconfig_priority = 64 54
55 - def __init__(self, name, password, max_builds=None, 56 notify_on_missing=[], missing_timeout=3600, 57 properties={}, locks=None, keepalive_interval=3600):
58 """ 59 @param name: botname this machine will supply when it connects 60 @param password: password this machine will supply when 61 it connects 62 @param max_builds: maximum number of simultaneous builds that will 63 be run concurrently on this buildslave (the 64 default is None for no limit) 65 @param properties: properties that will be applied to builds run on 66 this slave 67 @type properties: dictionary 68 @param locks: A list of locks that must be acquired before this slave 69 can be used 70 @type locks: dictionary 71 """ 72 service.MultiService.__init__(self) 73 self.slavename = name 74 self.password = password 75 76 # PB registration 77 self.registration = None 78 self.registered_port = None 79 80 # these are set when the service is started, and unset when it is 81 # stopped 82 self.botmaster = None 83 self.master = None 84 85 self.slave_status = SlaveStatus(name) 86 self.slave = None # a RemoteReference to the Bot, when connected 87 self.slave_commands = None 88 self.slavebuilders = {} 89 self.max_builds = max_builds 90 self.access = [] 91 if locks: 92 self.access = locks 93 self.lock_subscriptions = [] 94 95 self.properties = Properties() 96 self.properties.update(properties, "BuildSlave") 97 self.properties.setProperty("slavename", name, "BuildSlave") 98 99 self.lastMessageReceived = 0 100 if isinstance(notify_on_missing, str): 101 notify_on_missing = [notify_on_missing] 102 self.notify_on_missing = notify_on_missing 103 for i in notify_on_missing: 104 if not isinstance(i, str): 105 config.error( 106 'notify_on_missing arg %r is not a string' % (i,)) 107 self.missing_timeout = missing_timeout 108 self.missing_timer = None 109 self.keepalive_interval = keepalive_interval 110 111 self.detached_subs = None 112 113 self._old_builder_list = None
114
115 - def __repr__(self):
116 return "<%s %r>" % (self.__class__.__name__, self.slavename)
117
118 - def updateLocks(self):
119 """Convert the L{LockAccess} objects in C{self.locks} into real lock 120 objects, while also maintaining the subscriptions to lock releases.""" 121 # unsubscribe from any old locks 122 for s in self.lock_subscriptions: 123 s.unsubscribe() 124 125 # convert locks into their real form 126 locks = [] 127 for access in self.access: 128 if not isinstance(access, LockAccess): 129 access = access.defaultAccess() 130 lock = self.botmaster.getLockByID(access.lockid) 131 locks.append((lock, access)) 132 self.locks = [(l.getLock(self), la) for l, la in locks] 133 self.lock_subscriptions = [ l.subscribeToReleases(self._lockReleased) 134 for l, la in self.locks ]
135
136 - def locksAvailable(self):
137 """ 138 I am called to see if all the locks I depend on are available, 139 in which I return True, otherwise I return False 140 """ 141 if not self.locks: 142 return True 143 for lock, access in self.locks: 144 if not lock.isAvailable(access): 145 return False 146 return True
147
148 - def acquireLocks(self):
149 """ 150 I am called when a build is preparing to run. I try to claim all 151 the locks that are needed for a build to happen. If I can't, then 152 my caller should give up the build and try to get another slave 153 to look at it. 154 """ 155 log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) 156 if not self.locksAvailable(): 157 log.msg("slave %s can't lock, giving up" % (self, )) 158 return False 159 # all locks are available, claim them all 160 for lock, access in self.locks: 161 lock.claim(self, access) 162 return True
163
164 - def releaseLocks(self):
165 """ 166 I am called to release any locks after a build has finished 167 """ 168 log.msg("releaseLocks(%s): %s" % (self, self.locks)) 169 for lock, access in self.locks: 170 lock.release(self, access)
171
172 - def _lockReleased(self):
173 """One of the locks for this slave was released; try scheduling 174 builds.""" 175 if not self.botmaster: 176 return # oh well.. 177 self.botmaster.maybeStartBuildsForSlave(self.slavename)
178
179 - def startService(self):
180 self.updateLocks() 181 self.startMissingTimer() 182 return service.MultiService.startService(self)
183
184 - def reconfigService(self, new_config):
185 # Given a new BuildSlave, configure this one identically. Because 186 # BuildSlave objects are remotely referenced, we can't replace them 187 # without disconnecting the slave, yet there's no reason to do that. 188 new = self.findNewSlaveInstance(new_config) 189 190 assert self.slavename == new.slavename 191 192 # do we need to re-register? 193 if (not self.registration or 194 self.password != new.password or 195 new_config.slavePortnum != self.registered_port): 196 if self.registration: 197 self.registration.unregister() 198 self.password = new.password 199 self.registered_port = new_config.slavePortnum 200 self.registration = self.master.pbmanager.register( 201 self.registered_port, self.slavename, 202 self.password, self.getPerspective) 203 204 # adopt new instance's configuration parameters 205 self.max_builds = new.max_builds 206 self.access = new.access 207 self.notify_on_missing = new.notify_on_missing 208 self.keepalive_interval = new.keepalive_interval 209 210 if self.missing_timeout != new.missing_timeout: 211 running_missing_timer = self.missing_timer 212 self.stopMissingTimer() 213 self.missing_timeout = new.missing_timeout 214 if running_missing_timer: 215 self.startMissingTimer() 216 217 properties = Properties() 218 properties.updateFromProperties(new.properties) 219 self.properties = properties 220 221 self.updateLocks() 222 223 # update the attached slave's notion of which builders are attached. 224 # This assumes that the relevant builders have already been configured, 225 # which is why the reconfig_priority is set low in this class. 226 d = self.updateSlave() 227 228 # and chain up 229 d.addCallback(lambda _ : 230 config.ReconfigurableServiceMixin.reconfigService(self, 231 new_config)) 232 233 return d
234
235 - def stopService(self):
236 self.stopMissingTimer() 237 return service.MultiService.stopService(self)
238
239 - def findNewSlaveInstance(self, new_config):
240 # TODO: called multiple times per reconfig; use 1-element cache? 241 for sl in new_config.slaves: 242 if sl.slavename == self.slavename: 243 return sl 244 assert 0, "no new slave named '%s'" % self.slavename
245
246 - def startMissingTimer(self):
247 if self.notify_on_missing and self.missing_timeout and self.parent: 248 self.stopMissingTimer() # in case it's already running 249 self.missing_timer = reactor.callLater(self.missing_timeout, 250 self._missing_timer_fired)
251
252 - def stopMissingTimer(self):
253 if self.missing_timer: 254 self.missing_timer.cancel() 255 self.missing_timer = None
256
257 - def getPerspective(self, mind, slavename):
258 assert slavename == self.slavename 259 metrics.MetricCountEvent.log("attached_slaves", 1) 260 261 # record when this connection attempt occurred 262 if self.slave_status: 263 self.slave_status.recordConnectTime() 264 265 266 if self.isConnected(): 267 # duplicate slave - send it to arbitration 268 arb = botmaster.DuplicateSlaveArbitrator(self) 269 return arb.getPerspective(mind, slavename) 270 else: 271 log.msg("slave '%s' attaching from %s" % (slavename, mind.broker.transport.getPeer())) 272 return self
273
274 - def doKeepalive(self):
275 self.keepalive_timer = reactor.callLater(self.keepalive_interval, 276 self.doKeepalive) 277 if not self.slave: 278 return 279 d = self.slave.callRemote("print", "Received keepalive from master") 280 d.addErrback(log.msg, "Keepalive failed for '%s'" % (self.slavename, ))
281
282 - def stopKeepaliveTimer(self):
283 if self.keepalive_timer: 284 self.keepalive_timer.cancel()
285
286 - def startKeepaliveTimer(self):
287 assert self.keepalive_interval 288 log.msg("Starting buildslave keepalive timer for '%s'" % \ 289 (self.slavename, )) 290 self.doKeepalive()
291
292 - def isConnected(self):
293 return self.slave
294
295 - def _missing_timer_fired(self):
296 self.missing_timer = None 297 # notify people, but only if we're still in the config 298 if not self.parent: 299 return 300 301 buildmaster = self.botmaster.master 302 status = buildmaster.getStatus() 303 text = "The Buildbot working for '%s'\n" % status.getTitle() 304 text += ("has noticed that the buildslave named %s went away\n" % 305 self.slavename) 306 text += "\n" 307 text += ("It last disconnected at %s (buildmaster-local time)\n" % 308 time.ctime(time.time() - self.missing_timeout)) # approx 309 text += "\n" 310 text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" 311 text += "was '%s'.\n" % self.slave_status.getAdmin() 312 text += "\n" 313 text += "Sincerely,\n" 314 text += " The Buildbot\n" 315 text += " %s\n" % status.getTitleURL() 316 subject = "Buildbot: buildslave %s was lost" % self.slavename 317 return self._mail_missing_message(subject, text)
318 319
320 - def updateSlave(self):
321 """Called to add or remove builders after the slave has connected. 322 323 @return: a Deferred that indicates when an attached slave has 324 accepted the new builders and/or released the old ones.""" 325 if self.slave: 326 return self.sendBuilderList() 327 else: 328 return defer.succeed(None)
329
330 - def updateSlaveStatus(self, buildStarted=None, buildFinished=None):
331 if buildStarted: 332 self.slave_status.buildStarted(buildStarted) 333 if buildFinished: 334 self.slave_status.buildFinished(buildFinished)
335 336 @metrics.countMethod('AbstractBuildSlave.attached()')
337 - def attached(self, bot):
338 """This is called when the slave connects. 339 340 @return: a Deferred that fires when the attachment is complete 341 """ 342 343 # the botmaster should ensure this. 344 assert not self.isConnected() 345 346 metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1) 347 348 # set up the subscription point for eventual detachment 349 self.detached_subs = subscription.SubscriptionPoint("detached") 350 351 # now we go through a sequence of calls, gathering information, then 352 # tell the Botmaster that it can finally give this slave to all the 353 # Builders that care about it. 354 355 # we accumulate slave information in this 'state' dictionary, then 356 # set it atomically if we make it far enough through the process 357 state = {} 358 359 # Reset graceful shutdown status 360 self.slave_status.setGraceful(False) 361 # We want to know when the graceful shutdown flag changes 362 self.slave_status.addGracefulWatcher(self._gracefulChanged) 363 364 d = defer.succeed(None) 365 def _log_attachment_on_slave(res): 366 d1 = bot.callRemote("print", "attached") 367 d1.addErrback(lambda why: None) 368 return d1
369 d.addCallback(_log_attachment_on_slave) 370 371 def _get_info(res): 372 d1 = bot.callRemote("getSlaveInfo") 373 def _got_info(info): 374 log.msg("Got slaveinfo from '%s'" % self.slavename) 375 # TODO: info{} might have other keys 376 state["admin"] = info.get("admin") 377 state["host"] = info.get("host") 378 state["access_uri"] = info.get("access_uri", None) 379 state["slave_environ"] = info.get("environ", {}) 380 state["slave_basedir"] = info.get("basedir", None) 381 state["slave_system"] = info.get("system", None)
382 def _info_unavailable(why): 383 why.trap(pb.NoSuchMethod) 384 # maybe an old slave, doesn't implement remote_getSlaveInfo 385 log.msg("BuildSlave.info_unavailable") 386 log.err(why) 387 d1.addCallbacks(_got_info, _info_unavailable) 388 return d1 389 d.addCallback(_get_info) 390 self.startKeepaliveTimer() 391 392 def _get_version(res): 393 d = bot.callRemote("getVersion") 394 def _got_version(version): 395 state["version"] = version 396 def _version_unavailable(why): 397 why.trap(pb.NoSuchMethod) 398 # probably an old slave 399 state["version"] = '(unknown)' 400 d.addCallbacks(_got_version, _version_unavailable) 401 return d 402 d.addCallback(_get_version) 403 404 def _get_commands(res): 405 d1 = bot.callRemote("getCommands") 406 def _got_commands(commands): 407 state["slave_commands"] = commands 408 def _commands_unavailable(why): 409 # probably an old slave 410 log.msg("BuildSlave._commands_unavailable") 411 if why.check(AttributeError): 412 return 413 log.err(why) 414 d1.addCallbacks(_got_commands, _commands_unavailable) 415 return d1 416 d.addCallback(_get_commands) 417 418 def _accept_slave(res): 419 self.slave_status.setAdmin(state.get("admin")) 420 self.slave_status.setHost(state.get("host")) 421 self.slave_status.setAccessURI(state.get("access_uri")) 422 self.slave_status.setVersion(state.get("version")) 423 self.slave_status.setConnected(True) 424 self.slave_commands = state.get("slave_commands") 425 self.slave_environ = state.get("slave_environ") 426 self.slave_basedir = state.get("slave_basedir") 427 self.slave_system = state.get("slave_system") 428 self.slave = bot 429 if self.slave_system == "win32": 430 self.path_module = namedModule("win32path") 431 else: 432 # most eveything accepts / as separator, so posix should be a 433 # reasonable fallback 434 self.path_module = namedModule("posixpath") 435 log.msg("bot attached") 436 self.messageReceivedFromSlave() 437 self.stopMissingTimer() 438 self.botmaster.master.status.slaveConnected(self.slavename) 439 440 return self.updateSlave() 441 d.addCallback(_accept_slave) 442 d.addCallback(lambda _: 443 self.botmaster.maybeStartBuildsForSlave(self.slavename)) 444 445 # Finally, the slave gets a reference to this BuildSlave. They 446 # receive this later, after we've started using them. 447 d.addCallback(lambda _: self) 448 return d 449
450 - def messageReceivedFromSlave(self):
451 now = time.time() 452 self.lastMessageReceived = now 453 self.slave_status.setLastMessageReceived(now)
454
455 - def detached(self, mind):
456 metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", -1) 457 self.slave = None 458 self._old_builder_list = [] 459 self.slave_status.removeGracefulWatcher(self._gracefulChanged) 460 self.slave_status.setConnected(False) 461 log.msg("BuildSlave.detached(%s)" % self.slavename) 462 self.botmaster.master.status.slaveDisconnected(self.slavename) 463 self.stopKeepaliveTimer() 464 self.releaseLocks() 465 466 # notify watchers, but do so in the next reactor iteration so that 467 # any further detached() action by subclasses happens first 468 def notif(): 469 subs = self.detached_subs 470 self.detached_subs = None 471 subs.deliver()
472 reactor.callLater(0, notif) 473
474 - def subscribeToDetach(self, callback):
475 """ 476 Request that C{callable} be invoked with no arguments when the 477 L{detached} method is invoked. 478 479 @returns: L{Subscription} 480 """ 481 assert self.detached_subs, "detached_subs is only set if attached" 482 return self.detached_subs.subscribe(callback)
483
484 - def disconnect(self):
485 """Forcibly disconnect the slave. 486 487 This severs the TCP connection and returns a Deferred that will fire 488 (with None) when the connection is probably gone. 489 490 If the slave is still alive, they will probably try to reconnect 491 again in a moment. 492 493 This is called in two circumstances. The first is when a slave is 494 removed from the config file. In this case, when they try to 495 reconnect, they will be rejected as an unknown slave. The second is 496 when we wind up with two connections for the same slave, in which 497 case we disconnect the older connection. 498 """ 499 500 if not self.slave: 501 return defer.succeed(None) 502 log.msg("disconnecting old slave %s now" % self.slavename) 503 # When this Deferred fires, we'll be ready to accept the new slave 504 return self._disconnect(self.slave)
505
506 - def _disconnect(self, slave):
507 # all kinds of teardown will happen as a result of 508 # loseConnection(), but it happens after a reactor iteration or 509 # two. Hook the actual disconnect so we can know when it is safe 510 # to connect the new slave. We have to wait one additional 511 # iteration (with callLater(0)) to make sure the *other* 512 # notifyOnDisconnect handlers have had a chance to run. 513 d = defer.Deferred() 514 515 # notifyOnDisconnect runs the callback with one argument, the 516 # RemoteReference being disconnected. 517 def _disconnected(rref): 518 reactor.callLater(0, d.callback, None)
519 slave.notifyOnDisconnect(_disconnected) 520 tport = slave.broker.transport 521 # this is the polite way to request that a socket be closed 522 tport.loseConnection() 523 try: 524 # but really we don't want to wait for the transmit queue to 525 # drain. The remote end is unlikely to ACK the data, so we'd 526 # probably have to wait for a (20-minute) TCP timeout. 527 #tport._closeSocket() 528 # however, doing _closeSocket (whether before or after 529 # loseConnection) somehow prevents the notifyOnDisconnect 530 # handlers from being run. Bummer. 531 tport.offset = 0 532 tport.dataBuffer = "" 533 except: 534 # however, these hacks are pretty internal, so don't blow up if 535 # they fail or are unavailable 536 log.msg("failed to accelerate the shutdown process") 537 log.msg("waiting for slave to finish disconnecting") 538 539 return d 540
541 - def sendBuilderList(self):
542 our_builders = self.botmaster.getBuildersForSlave(self.slavename) 543 blist = [(b.name, b.config.slavebuilddir) for b in our_builders] 544 if blist == self._old_builder_list: 545 return defer.succeed(None) 546 547 d = self.slave.callRemote("setBuilderList", blist) 548 def sentBuilderList(ign): 549 self._old_builder_list = blist 550 return ign
551 d.addCallback(sentBuilderList) 552 return d 553
554 - def perspective_keepalive(self):
555 self.messageReceivedFromSlave()
556
557 - def perspective_shutdown(self):
558 log.msg("slave %s wants to shut down" % self.slavename) 559 self.slave_status.setGraceful(True)
560
561 - def addSlaveBuilder(self, sb):
562 self.slavebuilders[sb.builder_name] = sb
563
564 - def removeSlaveBuilder(self, sb):
565 try: 566 del self.slavebuilders[sb.builder_name] 567 except KeyError: 568 pass
569
570 - def buildFinished(self, sb):
571 """This is called when a build on this slave is finished.""" 572 self.botmaster.maybeStartBuildsForSlave(self.slavename)
573
574 - def canStartBuild(self):
575 """ 576 I am called when a build is requested to see if this buildslave 577 can start a build. This function can be used to limit overall 578 concurrency on the buildslave. 579 580 Note for subclassers: if a slave can become willing to start a build 581 without any action on that slave (for example, by a resource in use on 582 another slave becoming available), then you must arrange for 583 L{maybeStartBuildsForSlave} to be called at that time, or builds on 584 this slave will not start. 585 """ 586 # If we're waiting to shutdown gracefully, then we shouldn't 587 # accept any new jobs. 588 if self.slave_status.getGraceful(): 589 return False 590 591 if self.max_builds: 592 active_builders = [sb for sb in self.slavebuilders.values() 593 if sb.isBusy()] 594 if len(active_builders) >= self.max_builds: 595 return False 596 597 if not self.locksAvailable(): 598 return False 599 600 return True
601
602 - def _mail_missing_message(self, subject, text):
603 # first, see if we have a MailNotifier we can use. This gives us a 604 # fromaddr and a relayhost. 605 buildmaster = self.botmaster.master 606 for st in buildmaster.statusTargets: 607 if isinstance(st, MailNotifier): 608 break 609 else: 610 # if not, they get a default MailNotifier, which always uses SMTP 611 # to localhost and uses a dummy fromaddr of "buildbot". 612 log.msg("buildslave-missing msg using default MailNotifier") 613 st = MailNotifier("buildbot") 614 # now construct the mail 615 616 m = Message() 617 m.set_payload(text) 618 m['Date'] = formatdate(localtime=True) 619 m['Subject'] = subject 620 m['From'] = st.fromaddr 621 recipients = self.notify_on_missing 622 m['To'] = ", ".join(recipients) 623 d = st.sendMessage(m, recipients) 624 # return the Deferred for testing purposes 625 return d
626
627 - def _gracefulChanged(self, graceful):
628 """This is called when our graceful shutdown setting changes""" 629 self.maybeShutdown()
630 631 @defer.deferredGenerator
632 - def shutdown(self):
633 """Shutdown the slave""" 634 if not self.slave: 635 log.msg("no remote; slave is already shut down") 636 return 637 638 # First, try the "new" way - calling our own remote's shutdown 639 # method. The method was only added in 0.8.3, so ignore NoSuchMethod 640 # failures. 641 def new_way(): 642 d = self.slave.callRemote('shutdown') 643 d.addCallback(lambda _ : True) # successful shutdown request 644 def check_nsm(f): 645 f.trap(pb.NoSuchMethod) 646 return False # fall through to the old way
647 d.addErrback(check_nsm) 648 def check_connlost(f): 649 f.trap(pb.PBConnectionLost) 650 return True # the slave is gone, so call it finished 651 d.addErrback(check_connlost) 652 return d 653 654 wfd = defer.waitForDeferred(new_way()) 655 yield wfd 656 if wfd.getResult(): 657 return # done! 658 659 # Now, the old way. Look for a builder with a remote reference to the 660 # client side slave. If we can find one, then call "shutdown" on the 661 # remote builder, which will cause the slave buildbot process to exit. 662 def old_way(): 663 d = None 664 for b in self.slavebuilders.values(): 665 if b.remote: 666 d = b.remote.callRemote("shutdown") 667 break 668 669 if d: 670 log.msg("Shutting down (old) slave: %s" % self.slavename) 671 # The remote shutdown call will not complete successfully since the 672 # buildbot process exits almost immediately after getting the 673 # shutdown request. 674 # Here we look at the reason why the remote call failed, and if 675 # it's because the connection was lost, that means the slave 676 # shutdown as expected. 677 def _errback(why): 678 if why.check(pb.PBConnectionLost): 679 log.msg("Lost connection to %s" % self.slavename) 680 else: 681 log.err("Unexpected error when trying to shutdown %s" % self.slavename) 682 d.addErrback(_errback) 683 return d 684 log.err("Couldn't find remote builder to shut down slave") 685 return defer.succeed(None) 686 wfd = defer.waitForDeferred(old_way()) 687 yield wfd 688 wfd.getResult() 689
690 - def maybeShutdown(self):
691 """Shut down this slave if it has been asked to shut down gracefully, 692 and has no active builders.""" 693 if not self.slave_status.getGraceful(): 694 return 695 active_builders = [sb for sb in self.slavebuilders.values() 696 if sb.isBusy()] 697 if active_builders: 698 return 699 d = self.shutdown() 700 d.addErrback(log.err, 'error while shutting down slave')
701
702 -class BuildSlave(AbstractBuildSlave):
703
704 - def sendBuilderList(self):
705 d = AbstractBuildSlave.sendBuilderList(self) 706 def _sent(slist): 707 # Nothing has changed, so don't need to re-attach to everything 708 if not slist: 709 return 710 dl = [] 711 for name, remote in slist.items(): 712 # use get() since we might have changed our mind since then 713 b = self.botmaster.builders.get(name) 714 if b: 715 d1 = b.attached(self, remote, self.slave_commands) 716 dl.append(d1) 717 return defer.DeferredList(dl)
718 def _set_failed(why): 719 log.msg("BuildSlave.sendBuilderList (%s) failed" % self) 720 log.err(why)
721 # TODO: hang up on them?, without setBuilderList we can't use 722 # them 723 d.addCallbacks(_sent, _set_failed) 724 return d 725
726 - def detached(self, mind):
727 AbstractBuildSlave.detached(self, mind) 728 self.botmaster.slaveLost(self) 729 self.startMissingTimer()
730
731 - def buildFinished(self, sb):
732 """This is called when a build on this slave is finished.""" 733 AbstractBuildSlave.buildFinished(self, sb) 734 735 # If we're gracefully shutting down, and we have no more active 736 # builders, then it's safe to disconnect 737 self.maybeShutdown()
738
739 -class AbstractLatentBuildSlave(AbstractBuildSlave):
740 """A build slave that will start up a slave instance when needed. 741 742 To use, subclass and implement start_instance and stop_instance. 743 744 See ec2buildslave.py for a concrete example. Also see the stub example in 745 test/test_slaves.py. 746 """ 747 748 implements(ILatentBuildSlave) 749 750 substantiated = False 751 substantiation_deferred = None 752 substantiation_build = None 753 build_wait_timer = None 754 _shutdown_callback_handle = None 755
756 - def __init__(self, name, password, max_builds=None, 757 notify_on_missing=[], missing_timeout=60*20, 758 build_wait_timeout=60*10, 759 properties={}, locks=None):
760 AbstractBuildSlave.__init__( 761 self, name, password, max_builds, notify_on_missing, 762 missing_timeout, properties, locks) 763 self.building = set() 764 self.build_wait_timeout = build_wait_timeout
765
766 - def start_instance(self, build):
767 # responsible for starting instance that will try to connect with this 768 # master. Should return deferred with either True (instance started) 769 # or False (instance not started, so don't run a build here). Problems 770 # should use an errback. 771 raise NotImplementedError
772
773 - def stop_instance(self, fast=False):
774 # responsible for shutting down instance. 775 raise NotImplementedError
776
777 - def substantiate(self, sb, build):
778 if self.substantiated: 779 self._clearBuildWaitTimer() 780 self._setBuildWaitTimer() 781 return defer.succeed(True) 782 if self.substantiation_deferred is None: 783 if self.parent and not self.missing_timer: 784 # start timer. if timer times out, fail deferred 785 self.missing_timer = reactor.callLater( 786 self.missing_timeout, 787 self._substantiation_failed, defer.TimeoutError()) 788 self.substantiation_deferred = defer.Deferred() 789 self.substantiation_build = build 790 if self.slave is None: 791 d = self._substantiate(build) # start up instance 792 d.addErrback(log.err, "while substantiating") 793 # else: we're waiting for an old one to detach. the _substantiate 794 # will be done in ``detached`` below. 795 return self.substantiation_deferred
796
797 - def _substantiate(self, build):
798 # register event trigger 799 d = self.start_instance(build) 800 self._shutdown_callback_handle = reactor.addSystemEventTrigger( 801 'before', 'shutdown', self._soft_disconnect, fast=True) 802 def start_instance_result(result): 803 # If we don't report success, then preparation failed. 804 if not result: 805 log.msg("Slave '%s' doesn not want to substantiate at this time" % (self.slavename,)) 806 d = self.substantiation_deferred 807 self.substantiation_deferred = None 808 d.callback(False) 809 return result
810 def clean_up(failure): 811 if self.missing_timer is not None: 812 self.missing_timer.cancel() 813 self._substantiation_failed(failure) 814 if self._shutdown_callback_handle is not None: 815 handle = self._shutdown_callback_handle 816 del self._shutdown_callback_handle 817 reactor.removeSystemEventTrigger(handle) 818 return failure
819 d.addCallbacks(start_instance_result, clean_up) 820 return d 821
822 - def attached(self, bot):
823 if self.substantiation_deferred is None: 824 msg = 'Slave %s received connection while not trying to ' \ 825 'substantiate. Disconnecting.' % (self.slavename,) 826 log.msg(msg) 827 self._disconnect(bot) 828 return defer.fail(RuntimeError(msg)) 829 return AbstractBuildSlave.attached(self, bot)
830
831 - def detached(self, mind):
832 AbstractBuildSlave.detached(self, mind) 833 if self.substantiation_deferred is not None: 834 d = self._substantiate(self.substantiation_build) 835 d.addErrback(log.err, 'while re-substantiating')
836
837 - def _substantiation_failed(self, failure):
838 self.missing_timer = None 839 if self.substantiation_deferred: 840 d = self.substantiation_deferred 841 self.substantiation_deferred = None 842 self.substantiation_build = None 843 d.errback(failure) 844 self.insubstantiate() 845 # notify people, but only if we're still in the config 846 if not self.parent or not self.notify_on_missing: 847 return 848 849 buildmaster = self.botmaster.master 850 status = buildmaster.getStatus() 851 text = "The Buildbot working for '%s'\n" % status.getTitle() 852 text += ("has noticed that the latent buildslave named %s \n" % 853 self.slavename) 854 text += "never substantiated after a request\n" 855 text += "\n" 856 text += ("The request was made at %s (buildmaster-local time)\n" % 857 time.ctime(time.time() - self.missing_timeout)) # approx 858 text += "\n" 859 text += "Sincerely,\n" 860 text += " The Buildbot\n" 861 text += " %s\n" % status.getTitleURL() 862 subject = "Buildbot: buildslave %s never substantiated" % self.slavename 863 return self._mail_missing_message(subject, text)
864
865 - def buildStarted(self, sb):
866 assert self.substantiated 867 self._clearBuildWaitTimer() 868 self.building.add(sb.builder_name)
869
870 - def buildFinished(self, sb):
871 AbstractBuildSlave.buildFinished(self, sb) 872 873 self.building.remove(sb.builder_name) 874 if not self.building: 875 self._setBuildWaitTimer()
876
877 - def _clearBuildWaitTimer(self):
878 if self.build_wait_timer is not None: 879 if self.build_wait_timer.active(): 880 self.build_wait_timer.cancel() 881 self.build_wait_timer = None
882
883 - def _setBuildWaitTimer(self):
884 self._clearBuildWaitTimer() 885 self.build_wait_timer = reactor.callLater( 886 self.build_wait_timeout, self._soft_disconnect)
887
888 - def insubstantiate(self, fast=False):
889 self._clearBuildWaitTimer() 890 d = self.stop_instance(fast) 891 if self._shutdown_callback_handle is not None: 892 handle = self._shutdown_callback_handle 893 del self._shutdown_callback_handle 894 reactor.removeSystemEventTrigger(handle) 895 self.substantiated = False 896 self.building.clear() # just to be sure 897 return d
898
899 - def _soft_disconnect(self, fast=False):
900 d = AbstractBuildSlave.disconnect(self) 901 if self.slave is not None: 902 # this could be called when the slave needs to shut down, such as 903 # in BotMaster.removeSlave, *or* when a new slave requests a 904 # connection when we already have a slave. It's not clear what to 905 # do in the second case: this shouldn't happen, and if it 906 # does...if it's a latent slave, shutting down will probably kill 907 # something we want...but we can't know what the status is. So, 908 # here, we just do what should be appropriate for the first case, 909 # and put our heads in the sand for the second, at least for now. 910 # The best solution to the odd situation is removing it as a 911 # possibilty: make the master in charge of connecting to the 912 # slave, rather than vice versa. TODO. 913 d = defer.DeferredList([d, self.insubstantiate(fast)]) 914 else: 915 if self.substantiation_deferred is not None: 916 # unlike the previous block, we don't expect this situation when 917 # ``attached`` calls ``disconnect``, only when we get a simple 918 # request to "go away". 919 d = self.substantiation_deferred 920 self.substantiation_deferred = None 921 self.substantiation_build = None 922 d.errback(failure.Failure( 923 RuntimeError("soft disconnect aborted substantiation"))) 924 if self.missing_timer: 925 self.missing_timer.cancel() 926 self.missing_timer = None 927 self.stop_instance() 928 return d
929
930 - def disconnect(self):
931 # This returns a Deferred but we don't use it 932 self._soft_disconnect() 933 # this removes the slave from all builders. It won't come back 934 # without a restart (or maybe a sighup) 935 self.botmaster.slaveLost(self)
936
937 - def stopService(self):
938 res = defer.maybeDeferred(AbstractBuildSlave.stopService, self) 939 if self.slave is not None: 940 d = self._soft_disconnect() 941 res = defer.DeferredList([res, d]) 942 return res
943
944 - def updateSlave(self):
945 """Called to add or remove builders after the slave has connected. 946 947 Also called after botmaster's builders are initially set. 948 949 @return: a Deferred that indicates when an attached slave has 950 accepted the new builders and/or released the old ones.""" 951 for b in self.botmaster.getBuildersForSlave(self.slavename): 952 if b.name not in self.slavebuilders: 953 b.addLatentSlave(self) 954 return AbstractBuildSlave.updateSlave(self)
955
956 - def sendBuilderList(self):
957 d = AbstractBuildSlave.sendBuilderList(self) 958 def _sent(slist): 959 if not slist: 960 return 961 dl = [] 962 for name, remote in slist.items(): 963 # use get() since we might have changed our mind since then. 964 # we're checking on the builder in addition to the 965 # slavebuilders out of a bit of paranoia. 966 b = self.botmaster.builders.get(name) 967 sb = self.slavebuilders.get(name) 968 if b and sb: 969 d1 = sb.attached(self, remote, self.slave_commands) 970 dl.append(d1) 971 return defer.DeferredList(dl)
972 def _set_failed(why): 973 log.msg("BuildSlave.sendBuilderList (%s) failed" % self) 974 log.err(why) 975 # TODO: hang up on them?, without setBuilderList we can't use 976 # them 977 if self.substantiation_deferred: 978 d = self.substantiation_deferred 979 self.substantiation_deferred = None 980 self.substantiation_build = None 981 d.errback(why) 982 if self.missing_timer: 983 self.missing_timer.cancel() 984 self.missing_timer = None 985 # TODO: maybe log? send an email? 986 return why 987 d.addCallbacks(_sent, _set_failed) 988 def _substantiated(res): 989 log.msg("Slave %s substantiated \o/" % self.slavename) 990 self.substantiated = True 991 if not self.substantiation_deferred: 992 log.msg("No substantiation deferred for %s" % self.slavename) 993 if self.substantiation_deferred: 994 log.msg("Firing %s substantiation deferred with success" % self.slavename) 995 d = self.substantiation_deferred 996 self.substantiation_deferred = None 997 self.substantiation_build = None 998 d.callback(True) 999 # note that the missing_timer is already handled within 1000 # ``attached`` 1001 if not self.building: 1002 self._setBuildWaitTimer() 1003 d.addCallback(_substantiated) 1004 return d 1005