Package buildslave :: Module bot
[frames] | no frames]

Source Code for Module buildslave.bot

  1  # This file is part of Buildbot.  Buildbot is free software: you can 
  2  # redistribute it and/or modify it under the terms of the GNU General Public 
  3  # License as published by the Free Software Foundation, version 2. 
  4  # 
  5  # This program is distributed in the hope that it will be useful, but WITHOUT 
  6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
  8  # details. 
  9  # 
 10  # You should have received a copy of the GNU General Public License along with 
 11  # this program; if not, write to the Free Software Foundation, Inc., 51 
 12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 13  # 
 14  # Copyright Buildbot Team Members 
 15   
 16  import os.path 
 17  import socket 
 18  import sys 
 19  import signal 
 20   
 21  from twisted.spread import pb 
 22  from twisted.python import log 
 23  from twisted.internet import error, reactor, task, defer 
 24  from twisted.application import service, internet 
 25  from twisted.cred import credentials 
 26   
 27  import buildslave 
 28  from buildslave.pbutil import ReconnectingPBClientFactory 
 29  from buildslave.commands import registry, base 
 30  from buildslave import monkeypatches 
31 32 -class UnknownCommand(pb.Error):
33 pass
34
35 -class SlaveBuilder(pb.Referenceable, service.Service):
36 37 """This is the local representation of a single Builder: it handles a 38 single kind of build (like an all-warnings build). It has a name and a 39 home directory. The rest of its behavior is determined by the master. 40 """ 41 42 stopCommandOnShutdown = True 43 44 # remote is a ref to the Builder object on the master side, and is set 45 # when they attach. We use it to detect when the connection to the master 46 # is severed. 47 remote = None 48 49 # .command points to a SlaveCommand instance, and is set while the step 50 # is running. We use it to implement the stopBuild method. 51 command = None 52 53 # .remoteStep is a ref to the master-side BuildStep object, and is set 54 # when the step is started 55 remoteStep = None 56
57 - def __init__(self, name):
58 #service.Service.__init__(self) # Service has no __init__ method 59 self.setName(name)
60
61 - def __repr__(self):
62 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self))
63
64 - def setServiceParent(self, parent):
65 service.Service.setServiceParent(self, parent) 66 self.bot = self.parent
67 # note that self.parent will go away when the buildmaster's config 68 # file changes and this Builder is removed (possibly because it has 69 # been changed, so the Builder will be re-added again in a moment). 70 # This may occur during a build, while a step is running. 71
72 - def setBuilddir(self, builddir):
73 assert self.parent 74 self.builddir = builddir 75 self.basedir = os.path.join(self.bot.basedir, self.builddir) 76 if not os.path.isdir(self.basedir): 77 os.makedirs(self.basedir)
78
79 - def stopService(self):
80 service.Service.stopService(self) 81 if self.stopCommandOnShutdown: 82 self.stopCommand()
83
84 - def activity(self):
85 bot = self.parent 86 if bot: 87 bslave = bot.parent 88 if bslave: 89 bf = bslave.bf 90 bf.activity()
91
92 - def remote_setMaster(self, remote):
93 self.remote = remote 94 self.remote.notifyOnDisconnect(self.lostRemote)
95
96 - def remote_print(self, message):
97 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" % 98 (self.name, message))
99
100 - def lostRemote(self, remote):
101 log.msg("lost remote") 102 self.remote = None
103
104 - def lostRemoteStep(self, remotestep):
105 log.msg("lost remote step") 106 self.remoteStep = None 107 if self.stopCommandOnShutdown: 108 self.stopCommand()
109 110 # the following are Commands that can be invoked by the master-side 111 # Builder
112 - def remote_startBuild(self):
113 """This is invoked before the first step of any new build is run. It 114 doesn't do much, but masters call it so it's still here.""" 115 pass
116
117 - def remote_startCommand(self, stepref, stepId, command, args):
118 """ 119 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as 120 part of various master-side BuildSteps, to start various commands 121 that actually do the build. I return nothing. Eventually I will call 122 .commandComplete() to notify the master-side RemoteCommand that I'm 123 done. 124 """ 125 126 self.activity() 127 128 if self.command: 129 log.msg("leftover command, dropping it") 130 self.stopCommand() 131 132 try: 133 factory = registry.getFactory(command) 134 except KeyError: 135 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command 136 self.command = factory(self, stepId, args) 137 138 log.msg(" startCommand:%s [id %s]" % (command,stepId)) 139 self.remoteStep = stepref 140 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep) 141 d = self.command.doStart() 142 d.addCallback(lambda res: None) 143 d.addBoth(self.commandComplete) 144 return None
145
146 - def remote_interruptCommand(self, stepId, why):
147 """Halt the current step.""" 148 log.msg("asked to interrupt current command: %s" % why) 149 self.activity() 150 if not self.command: 151 # TODO: just log it, a race could result in their interrupting a 152 # command that wasn't actually running 153 log.msg(" .. but none was running") 154 return 155 self.command.doInterrupt()
156 157
158 - def stopCommand(self):
159 """Make any currently-running command die, with no further status 160 output. This is used when the buildslave is shutting down or the 161 connection to the master has been lost. Interrupt the command, 162 silence it, and then forget about it.""" 163 if not self.command: 164 return 165 log.msg("stopCommand: halting current command %s" % self.command) 166 self.command.doInterrupt() # shut up! and die! 167 self.command = None # forget you!
168 169 # sendUpdate is invoked by the Commands we spawn
170 - def sendUpdate(self, data):
171 """This sends the status update to the master-side 172 L{buildbot.process.step.RemoteCommand} object, giving it a sequence 173 number in the process. It adds the update to a queue, and asks the 174 master to acknowledge the update so it can be removed from that 175 queue.""" 176 177 if not self.running: 178 # .running comes from service.Service, and says whether the 179 # service is running or not. If we aren't running, don't send any 180 # status messages. 181 return 182 # the update[1]=0 comes from the leftover 'updateNum', which the 183 # master still expects to receive. Provide it to avoid significant 184 # interoperability issues between new slaves and old masters. 185 if self.remoteStep: 186 update = [data, 0] 187 updates = [update] 188 d = self.remoteStep.callRemote("update", updates) 189 d.addCallback(self.ackUpdate) 190 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
191
192 - def ackUpdate(self, acknum):
193 self.activity() # update the "last activity" timer
194
195 - def ackComplete(self, dummy):
196 self.activity() # update the "last activity" timer
197
198 - def _ackFailed(self, why, where):
199 log.msg("SlaveBuilder._ackFailed:", where) 200 log.err(why) # we don't really care
201 202 203 # this is fired by the Deferred attached to each Command
204 - def commandComplete(self, failure):
205 if failure: 206 log.msg("SlaveBuilder.commandFailed", self.command) 207 log.err(failure) 208 # failure, if present, is a failure.Failure. To send it across 209 # the wire, we must turn it into a pb.CopyableFailure. 210 failure = pb.CopyableFailure(failure) 211 failure.unsafeTracebacks = True 212 else: 213 # failure is None 214 log.msg("SlaveBuilder.commandComplete", self.command) 215 self.command = None 216 if not self.running: 217 log.msg(" but we weren't running, quitting silently") 218 return 219 if self.remoteStep: 220 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep) 221 d = self.remoteStep.callRemote("complete", failure) 222 d.addCallback(self.ackComplete) 223 d.addErrback(self._ackFailed, "sendComplete") 224 self.remoteStep = None
225 226
227 - def remote_shutdown(self):
228 log.msg("slave shutting down on command from master") 229 log.msg("NOTE: master is using deprecated slavebuilder.shutdown method") 230 reactor.stop()
231
232 233 -class Bot(pb.Referenceable, service.MultiService):
234 """I represent the slave-side bot.""" 235 usePTY = None 236 name = "bot" 237
238 - def __init__(self, basedir, usePTY, unicode_encoding=None):
239 service.MultiService.__init__(self) 240 self.basedir = basedir 241 self.usePTY = usePTY 242 self.unicode_encoding = unicode_encoding or sys.getfilesystemencoding() or 'ascii' 243 self.builders = {}
244
245 - def startService(self):
246 assert os.path.isdir(self.basedir) 247 service.MultiService.startService(self)
248
249 - def remote_getCommands(self):
250 commands = dict([ 251 (n, base.command_version) 252 for n in registry.getAllCommandNames() 253 ]) 254 return commands
255 256 @defer.deferredGenerator
257 - def remote_setBuilderList(self, wanted):
258 retval = {} 259 wanted_names = set([ name for (name, builddir) in wanted ]) 260 wanted_dirs = set([ builddir for (name, builddir) in wanted ]) 261 wanted_dirs.add('info') 262 for (name, builddir) in wanted: 263 b = self.builders.get(name, None) 264 if b: 265 if b.builddir != builddir: 266 log.msg("changing builddir for builder %s from %s to %s" \ 267 % (name, b.builddir, builddir)) 268 b.setBuilddir(builddir) 269 else: 270 b = SlaveBuilder(name) 271 b.usePTY = self.usePTY 272 b.unicode_encoding = self.unicode_encoding 273 b.setServiceParent(self) 274 b.setBuilddir(builddir) 275 self.builders[name] = b 276 retval[name] = b 277 278 # disown any builders no longer desired 279 to_remove = list(set(self.builders.keys()) - wanted_names) 280 dl = defer.DeferredList([ 281 defer.maybeDeferred(self.builders[name].disownServiceParent) 282 for name in to_remove ]) 283 wfd = defer.waitForDeferred(dl) 284 yield wfd 285 wfd.getResult() 286 287 # and *then* remove them from the builder list 288 for name in to_remove: 289 del self.builders[name] 290 291 # finally warn about any leftover dirs 292 for dir in os.listdir(self.basedir): 293 if os.path.isdir(os.path.join(self.basedir, dir)): 294 if dir not in wanted_dirs: 295 log.msg("I have a leftover directory '%s' that is not " 296 "being used by the buildmaster: you can delete " 297 "it now" % dir) 298 299 yield retval # return value
300
301 - def remote_print(self, message):
302 log.msg("message from master:", message)
303
304 - def remote_getSlaveInfo(self):
305 """This command retrieves data from the files in SLAVEDIR/info/* and 306 sends the contents to the buildmaster. These are used to describe 307 the slave and its configuration, and should be created and 308 maintained by the slave administrator. They will be retrieved each 309 time the master-slave connection is established. 310 """ 311 312 files = {} 313 basedir = os.path.join(self.basedir, "info") 314 if os.path.isdir(basedir): 315 for f in os.listdir(basedir): 316 filename = os.path.join(basedir, f) 317 if os.path.isfile(filename): 318 files[f] = open(filename, "r").read() 319 files['environ'] = os.environ.copy() 320 files['system'] = os.name 321 files['basedir'] = self.basedir 322 return files
323
324 - def remote_getVersion(self):
325 """Send our version back to the Master""" 326 return buildslave.version
327
328 - def remote_shutdown(self):
329 log.msg("slave shutting down on command from master") 330 # there's no good way to learn that the PB response has been delivered, 331 # so we'll just wait a bit, in hopes the master hears back. Masters are 332 # resilinet to slaves dropping their connections, so there is no harm 333 # if this timeout is too short. 334 reactor.callLater(0.2, reactor.stop)
335
336 -class BotFactory(ReconnectingPBClientFactory):
337 # 'keepaliveInterval' serves two purposes. The first is to keep the 338 # connection alive: it guarantees that there will be at least some 339 # traffic once every 'keepaliveInterval' seconds, which may help keep an 340 # interposed NAT gateway from dropping the address mapping because it 341 # thinks the connection has been abandoned. This also gives the operating 342 # system a chance to notice that the master has gone away, and inform us 343 # of such (although this could take several minutes). 344 keepaliveInterval = None # None = do not use keepalives 345 346 # 'maxDelay' determines the maximum amount of time the slave will wait 347 # between connection retries 348 maxDelay = 300 349 350 keepaliveTimer = None 351 unsafeTracebacks = 1 352 perspective = None 353 354 # for tests 355 _reactor = reactor 356
357 - def __init__(self, buildmaster_host, port, keepaliveInterval, maxDelay):
358 ReconnectingPBClientFactory.__init__(self) 359 self.maxDelay = maxDelay 360 self.keepaliveInterval = keepaliveInterval 361 # NOTE: this class does not actually make the TCP connections - this information is 362 # only here to print useful error messages 363 self.buildmaster_host = buildmaster_host 364 self.port = port
365
366 - def startedConnecting(self, connector):
367 log.msg("Connecting to %s:%s" % (self.buildmaster_host, self.port)) 368 ReconnectingPBClientFactory.startedConnecting(self, connector) 369 self.connector = connector
370
371 - def gotPerspective(self, perspective):
372 log.msg("Connected to %s:%s; slave is ready" % (self.buildmaster_host, self.port)) 373 ReconnectingPBClientFactory.gotPerspective(self, perspective) 374 self.perspective = perspective 375 try: 376 perspective.broker.transport.setTcpKeepAlive(1) 377 except: 378 log.msg("unable to set SO_KEEPALIVE") 379 if not self.keepaliveInterval: 380 self.keepaliveInterval = 10*60 381 self.activity() 382 if self.keepaliveInterval: 383 log.msg("sending application-level keepalives every %d seconds" \ 384 % self.keepaliveInterval) 385 self.startTimers()
386
387 - def clientConnectionFailed(self, connector, reason):
388 self.connector = None 389 why = reason 390 if reason.check(error.ConnectionRefusedError): 391 why = "Connection Refused" 392 log.msg("Connection to %s:%s failed: %s" % (self.buildmaster_host, self.port, why)) 393 ReconnectingPBClientFactory.clientConnectionFailed(self, 394 connector, reason)
395
396 - def clientConnectionLost(self, connector, reason):
397 log.msg("Lost connection to %s:%s" % (self.buildmaster_host, self.port)) 398 self.connector = None 399 self.stopTimers() 400 self.perspective = None 401 ReconnectingPBClientFactory.clientConnectionLost(self, 402 connector, reason)
403
404 - def startTimers(self):
405 assert self.keepaliveInterval 406 assert not self.keepaliveTimer 407 408 def doKeepalive(): 409 self.keepaliveTimer = None 410 self.startTimers() 411 412 # Send the keepalive request. If an error occurs 413 # was already dropped, so just log and ignore. 414 log.msg("sending app-level keepalive") 415 d = self.perspective.callRemote("keepalive") 416 d.addErrback(log.err, "eror sending keepalive")
417 self.keepaliveTimer = self._reactor.callLater(self.keepaliveInterval, 418 doKeepalive)
419
420 - def stopTimers(self):
421 if self.keepaliveTimer: 422 self.keepaliveTimer.cancel() 423 self.keepaliveTimer = None
424
425 - def activity(self, res=None):
426 """Subclass or monkey-patch this method to be alerted whenever there is 427 active communication between the master and slave.""" 428 pass
429
430 - def stopFactory(self):
431 ReconnectingPBClientFactory.stopFactory(self) 432 self.stopTimers()
433
434 435 -class BuildSlave(service.MultiService):
436 - def __init__(self, buildmaster_host, port, name, passwd, basedir, 437 keepalive, usePTY, keepaliveTimeout=None, umask=None, 438 maxdelay=300, unicode_encoding=None, allow_shutdown=None):
439 440 # note: keepaliveTimeout is ignored, but preserved here for 441 # backward-compatibility 442 443 service.MultiService.__init__(self) 444 bot = Bot(basedir, usePTY, unicode_encoding=unicode_encoding) 445 bot.setServiceParent(self) 446 self.bot = bot 447 if keepalive == 0: 448 keepalive = None 449 self.umask = umask 450 self.basedir = basedir 451 452 self.shutdown_loop = None 453 454 if allow_shutdown == 'signal': 455 if not hasattr(signal, 'SIGHUP'): 456 raise ValueError("Can't install signal handler") 457 elif allow_shutdown == 'file': 458 self.shutdown_file = os.path.join(basedir, 'shutdown.stamp') 459 self.shutdown_mtime = 0 460 461 self.allow_shutdown = allow_shutdown 462 bf = self.bf = BotFactory(buildmaster_host, port, keepalive, maxdelay) 463 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot) 464 self.connection = c = internet.TCPClient(buildmaster_host, port, bf) 465 c.setServiceParent(self)
466
467 - def startService(self):
468 # first, apply all monkeypatches 469 monkeypatches.patch_all() 470 471 log.msg("Starting BuildSlave -- version: %s" % buildslave.version) 472 473 self.recordHostname(self.basedir) 474 if self.umask is not None: 475 os.umask(self.umask) 476 477 service.MultiService.startService(self) 478 479 if self.allow_shutdown == 'signal': 480 log.msg("Setting up SIGHUP handler to initiate shutdown") 481 signal.signal(signal.SIGHUP, self._handleSIGHUP) 482 elif self.allow_shutdown == 'file': 483 log.msg("Watching %s's mtime to initiate shutdown" % self.shutdown_file) 484 if os.path.exists(self.shutdown_file): 485 self.shutdown_mtime = os.path.getmtime(self.shutdown_file) 486 self.shutdown_loop = l = task.LoopingCall(self._checkShutdownFile) 487 l.start(interval=10)
488
489 - def stopService(self):
490 self.bf.continueTrying = 0 491 self.bf.stopTrying() 492 if self.shutdown_loop: 493 self.shutdown_loop.stop() 494 self.shutdown_loop = None 495 return service.MultiService.stopService(self)
496
497 - def recordHostname(self, basedir):
498 "Record my hostname in twistd.hostname, for user convenience" 499 log.msg("recording hostname in twistd.hostname") 500 filename = os.path.join(basedir, "twistd.hostname") 501 502 try: 503 hostname = os.uname()[1] # only on unix 504 except AttributeError: 505 # this tends to fail on non-connected hosts, e.g., laptops 506 # on planes 507 hostname = socket.getfqdn() 508 509 try: 510 open(filename, "w").write("%s\n" % hostname) 511 except: 512 log.msg("failed - ignoring")
513
514 - def _handleSIGHUP(self, *args):
515 log.msg("Initiating shutdown because we got SIGHUP") 516 return self.gracefulShutdown()
517
518 - def _checkShutdownFile(self):
519 if os.path.exists(self.shutdown_file) and \ 520 os.path.getmtime(self.shutdown_file) > self.shutdown_mtime: 521 log.msg("Initiating shutdown because %s was touched" % self.shutdown_file) 522 self.gracefulShutdown() 523 524 # In case the shutdown fails, update our mtime so we don't keep 525 # trying to shutdown over and over again. 526 # We do want to be able to try again later if the master is 527 # restarted, so we'll keep monitoring the mtime. 528 self.shutdown_mtime = os.path.getmtime(self.shutdown_file)
529
530 - def gracefulShutdown(self):
531 """Start shutting down""" 532 if not self.bf.perspective: 533 log.msg("No active connection, shutting down NOW") 534 reactor.stop() 535 return 536 537 log.msg("Telling the master we want to shutdown after any running builds are finished") 538 d = self.bf.perspective.callRemote("shutdown") 539 def _shutdownfailed(err): 540 if err.check(AttributeError): 541 log.msg("Master does not support slave initiated shutdown. Upgrade master to 0.8.3 or later to use this feature.") 542 else: 543 log.msg('callRemote("shutdown") failed') 544 log.err(err)
545 546 d.addErrback(_shutdownfailed) 547 return d
548