Package buildslave :: Module bot
[frames] | no frames]

Source Code for Module buildslave.bot

  1  # This file is part of Buildbot.  Buildbot is free software: you can 
  2  # redistribute it and/or modify it under the terms of the GNU General Public 
  3  # License as published by the Free Software Foundation, version 2. 
  4  # 
  5  # This program is distributed in the hope that it will be useful, but WITHOUT 
  6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
  8  # details. 
  9  # 
 10  # You should have received a copy of the GNU General Public License along with 
 11  # this program; if not, write to the Free Software Foundation, Inc., 51 
 12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 13  # 
 14  # Copyright Buildbot Team Members 
 15   
 16  import os.path 
 17  import socket 
 18  import sys 
 19  import signal 
 20   
 21  from twisted.spread import pb 
 22  from twisted.python import log 
 23  from twisted.internet import error, reactor, task 
 24  from twisted.application import service, internet 
 25  from twisted.cred import credentials 
 26   
 27  import buildslave 
 28  from buildslave.util import now 
 29  from buildslave.pbutil import ReconnectingPBClientFactory 
 30  from buildslave.commands import registry, base 
 31   
32 -class UnknownCommand(pb.Error):
33 pass
34
35 -class SlaveBuilder(pb.Referenceable, service.Service):
36 37 """This is the local representation of a single Builder: it handles a 38 single kind of build (like an all-warnings build). It has a name and a 39 home directory. The rest of its behavior is determined by the master. 40 """ 41 42 stopCommandOnShutdown = True 43 44 # remote is a ref to the Builder object on the master side, and is set 45 # when they attach. We use it to detect when the connection to the master 46 # is severed. 47 remote = None 48 49 # .command points to a SlaveCommand instance, and is set while the step 50 # is running. We use it to implement the stopBuild method. 51 command = None 52 53 # .remoteStep is a ref to the master-side BuildStep object, and is set 54 # when the step is started 55 remoteStep = None 56
57 - def __init__(self, name):
58 #service.Service.__init__(self) # Service has no __init__ method 59 self.setName(name)
60
61 - def __repr__(self):
62 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self))
63
64 - def setServiceParent(self, parent):
65 service.Service.setServiceParent(self, parent) 66 self.bot = self.parent
67 # note that self.parent will go away when the buildmaster's config 68 # file changes and this Builder is removed (possibly because it has 69 # been changed, so the Builder will be re-added again in a moment). 70 # This may occur during a build, while a step is running. 71
72 - def setBuilddir(self, builddir):
73 assert self.parent 74 self.builddir = builddir 75 self.basedir = os.path.join(self.bot.basedir, self.builddir) 76 if not os.path.isdir(self.basedir): 77 os.makedirs(self.basedir)
78
79 - def stopService(self):
80 service.Service.stopService(self) 81 if self.stopCommandOnShutdown: 82 self.stopCommand()
83
84 - def activity(self):
85 bot = self.parent 86 if bot: 87 bslave = bot.parent 88 if bslave: 89 bf = bslave.bf 90 bf.activity()
91
92 - def remote_setMaster(self, remote):
93 self.remote = remote 94 self.remote.notifyOnDisconnect(self.lostRemote)
95
96 - def remote_print(self, message):
97 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" % 98 (self.name, message))
99
100 - def lostRemote(self, remote):
101 log.msg("lost remote") 102 self.remote = None
103
104 - def lostRemoteStep(self, remotestep):
105 log.msg("lost remote step") 106 self.remoteStep = None 107 if self.stopCommandOnShutdown: 108 self.stopCommand()
109 110 # the following are Commands that can be invoked by the master-side 111 # Builder
112 - def remote_startBuild(self):
113 """This is invoked before the first step of any new build is run. It 114 doesn't do much, but masters call it so it's still here.""" 115 pass
116
117 - def remote_startCommand(self, stepref, stepId, command, args):
118 """ 119 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as 120 part of various master-side BuildSteps, to start various commands 121 that actually do the build. I return nothing. Eventually I will call 122 .commandComplete() to notify the master-side RemoteCommand that I'm 123 done. 124 """ 125 126 self.activity() 127 128 if self.command: 129 log.msg("leftover command, dropping it") 130 self.stopCommand() 131 132 try: 133 factory = registry.getFactory(command) 134 except KeyError: 135 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command 136 self.command = factory(self, stepId, args) 137 138 log.msg(" startCommand:%s [id %s]" % (command,stepId)) 139 self.remoteStep = stepref 140 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep) 141 d = self.command.doStart() 142 d.addCallback(lambda res: None) 143 d.addBoth(self.commandComplete) 144 return None
145
146 - def remote_interruptCommand(self, stepId, why):
147 """Halt the current step.""" 148 log.msg("asked to interrupt current command: %s" % why) 149 self.activity() 150 if not self.command: 151 # TODO: just log it, a race could result in their interrupting a 152 # command that wasn't actually running 153 log.msg(" .. but none was running") 154 return 155 self.command.doInterrupt()
156 157
158 - def stopCommand(self):
159 """Make any currently-running command die, with no further status 160 output. This is used when the buildslave is shutting down or the 161 connection to the master has been lost. Interrupt the command, 162 silence it, and then forget about it.""" 163 if not self.command: 164 return 165 log.msg("stopCommand: halting current command %s" % self.command) 166 self.command.doInterrupt() # shut up! and die! 167 self.command = None # forget you!
168 169 # sendUpdate is invoked by the Commands we spawn
170 - def sendUpdate(self, data):
171 """This sends the status update to the master-side 172 L{buildbot.process.step.RemoteCommand} object, giving it a sequence 173 number in the process. It adds the update to a queue, and asks the 174 master to acknowledge the update so it can be removed from that 175 queue.""" 176 177 if not self.running: 178 # .running comes from service.Service, and says whether the 179 # service is running or not. If we aren't running, don't send any 180 # status messages. 181 return 182 # the update[1]=0 comes from the leftover 'updateNum', which the 183 # master still expects to receive. Provide it to avoid significant 184 # interoperability issues between new slaves and old masters. 185 if self.remoteStep: 186 update = [data, 0] 187 updates = [update] 188 d = self.remoteStep.callRemote("update", updates) 189 d.addCallback(self.ackUpdate) 190 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
191
192 - def ackUpdate(self, acknum):
193 self.activity() # update the "last activity" timer
194
195 - def ackComplete(self, dummy):
196 self.activity() # update the "last activity" timer
197
198 - def _ackFailed(self, why, where):
199 log.msg("SlaveBuilder._ackFailed:", where) 200 log.err(why) # we don't really care
201 202 203 # this is fired by the Deferred attached to each Command
204 - def commandComplete(self, failure):
205 if failure: 206 log.msg("SlaveBuilder.commandFailed", self.command) 207 log.err(failure) 208 # failure, if present, is a failure.Failure. To send it across 209 # the wire, we must turn it into a pb.CopyableFailure. 210 failure = pb.CopyableFailure(failure) 211 failure.unsafeTracebacks = True 212 else: 213 # failure is None 214 log.msg("SlaveBuilder.commandComplete", self.command) 215 self.command = None 216 if not self.running: 217 log.msg(" but we weren't running, quitting silently") 218 return 219 if self.remoteStep: 220 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep) 221 d = self.remoteStep.callRemote("complete", failure) 222 d.addCallback(self.ackComplete) 223 d.addErrback(self._ackFailed, "sendComplete") 224 self.remoteStep = None
225 226
227 - def remote_shutdown(self):
228 log.msg("slave shutting down on command from master") 229 log.msg("NOTE: master is using deprecated slavebuilder.shutdown method") 230 reactor.stop()
231 232
233 -class Bot(pb.Referenceable, service.MultiService):
234 """I represent the slave-side bot.""" 235 usePTY = None 236 name = "bot" 237
238 - def __init__(self, basedir, usePTY, unicode_encoding=None):
239 service.MultiService.__init__(self) 240 self.basedir = basedir 241 self.usePTY = usePTY 242 self.unicode_encoding = unicode_encoding or sys.getfilesystemencoding() or 'ascii' 243 self.builders = {}
244
245 - def startService(self):
246 assert os.path.isdir(self.basedir) 247 service.MultiService.startService(self)
248
249 - def remote_getCommands(self):
250 commands = dict([ 251 (n, base.command_version) 252 for n in registry.getAllCommandNames() 253 ]) 254 return commands
255
256 - def remote_setBuilderList(self, wanted):
257 retval = {} 258 wanted_dirs = ["info"] 259 for (name, builddir) in wanted: 260 wanted_dirs.append(builddir) 261 b = self.builders.get(name, None) 262 if b: 263 if b.builddir != builddir: 264 log.msg("changing builddir for builder %s from %s to %s" \ 265 % (name, b.builddir, builddir)) 266 b.setBuilddir(builddir) 267 else: 268 b = SlaveBuilder(name) 269 b.usePTY = self.usePTY 270 b.unicode_encoding = self.unicode_encoding 271 b.setServiceParent(self) 272 b.setBuilddir(builddir) 273 self.builders[name] = b 274 retval[name] = b 275 for name in self.builders.keys(): 276 if not name in map(lambda a: a[0], wanted): 277 log.msg("removing old builder %s" % name) 278 self.builders[name].disownServiceParent() 279 del(self.builders[name]) 280 281 for d in os.listdir(self.basedir): 282 if os.path.isdir(os.path.join(self.basedir, d)): 283 if d not in wanted_dirs: 284 log.msg("I have a leftover directory '%s' that is not " 285 "being used by the buildmaster: you can delete " 286 "it now" % d) 287 return retval
288
289 - def remote_print(self, message):
290 log.msg("message from master:", message)
291
292 - def remote_getSlaveInfo(self):
293 """This command retrieves data from the files in SLAVEDIR/info/* and 294 sends the contents to the buildmaster. These are used to describe 295 the slave and its configuration, and should be created and 296 maintained by the slave administrator. They will be retrieved each 297 time the master-slave connection is established. 298 """ 299 300 files = {} 301 basedir = os.path.join(self.basedir, "info") 302 if os.path.isdir(basedir): 303 for f in os.listdir(basedir): 304 filename = os.path.join(basedir, f) 305 if os.path.isfile(filename): 306 files[f] = open(filename, "r").read() 307 files['environ'] = os.environ.copy() 308 files['system'] = os.name 309 files['basedir'] = self.basedir 310 return files
311
312 - def remote_getVersion(self):
313 """Send our version back to the Master""" 314 return buildslave.version
315
316 - def remote_shutdown(self):
317 log.msg("slave shutting down on command from master") 318 # there's no good way to learn that the PB response has been delivered, 319 # so we'll just wait a bit, in hopes the master hears back. Masters are 320 # resilinet to slaves dropping their connections, so there is no harm 321 # if this timeout is too short. 322 reactor.callLater(0.2, reactor.stop)
323
324 -class BotFactory(ReconnectingPBClientFactory):
325 # 'keepaliveInterval' serves two purposes. The first is to keep the 326 # connection alive: it guarantees that there will be at least some 327 # traffic once every 'keepaliveInterval' seconds, which may help keep an 328 # interposed NAT gateway from dropping the address mapping because it 329 # thinks the connection has been abandoned. The second is to put an upper 330 # limit on how long the buildmaster might have gone away before we notice 331 # it. For this second purpose, we insist upon seeing *some* evidence of 332 # the buildmaster at least once every 'keepaliveInterval' seconds. 333 keepaliveInterval = None # None = do not use keepalives 334 335 # 'keepaliveTimeout' seconds before the interval expires, we will send a 336 # keepalive request, both to add some traffic to the connection, and to 337 # prompt a response from the master in case all our builders are idle. We 338 # don't insist upon receiving a timely response from this message: a slow 339 # link might put the request at the wrong end of a large build message. 340 keepaliveTimeout = 30 # how long we will go without a response 341 342 # 'maxDelay' determines the maximum amount of time the slave will wait 343 # between connection retries 344 maxDelay = 300 345 346 keepaliveTimer = None 347 activityTimer = None 348 lastActivity = 0 349 unsafeTracebacks = 1 350 perspective = None 351
352 - def __init__(self, buildmaster_host, port, keepaliveInterval, keepaliveTimeout, maxDelay):
353 ReconnectingPBClientFactory.__init__(self) 354 self.maxDelay = maxDelay 355 self.keepaliveInterval = keepaliveInterval 356 self.keepaliveTimeout = keepaliveTimeout 357 # NOTE: this class does not actually make the TCP connections - this information is 358 # only here to print useful error messages 359 self.buildmaster_host = buildmaster_host 360 self.port = port
361
362 - def startedConnecting(self, connector):
363 log.msg("Connecting to %s:%s" % (self.buildmaster_host, self.port)) 364 ReconnectingPBClientFactory.startedConnecting(self, connector) 365 self.connector = connector
366
367 - def gotPerspective(self, perspective):
368 log.msg("Connected to %s:%s; slave is ready" % (self.buildmaster_host, self.port)) 369 ReconnectingPBClientFactory.gotPerspective(self, perspective) 370 self.perspective = perspective 371 try: 372 perspective.broker.transport.setTcpKeepAlive(1) 373 except: 374 log.msg("unable to set SO_KEEPALIVE") 375 if not self.keepaliveInterval: 376 self.keepaliveInterval = 10*60 377 self.activity() 378 if self.keepaliveInterval: 379 log.msg("sending application-level keepalives every %d seconds" \ 380 % self.keepaliveInterval) 381 self.startTimers()
382
383 - def clientConnectionFailed(self, connector, reason):
384 self.connector = None 385 why = reason 386 if reason.check(error.ConnectionRefusedError): 387 why = "Connection Refused" 388 log.msg("Connection to %s:%s failed: %s" % (self.buildmaster_host, self.port, why)) 389 ReconnectingPBClientFactory.clientConnectionFailed(self, 390 connector, reason)
391
392 - def clientConnectionLost(self, connector, reason):
393 log.msg("Lost connection to %s:%s" % (self.buildmaster_host, self.port)) 394 self.connector = None 395 self.stopTimers() 396 self.perspective = None 397 ReconnectingPBClientFactory.clientConnectionLost(self, 398 connector, reason)
399
400 - def startTimers(self):
401 assert self.keepaliveInterval 402 assert not self.keepaliveTimer 403 assert not self.activityTimer 404 # Insist that doKeepalive fires before checkActivity. Really, it 405 # needs to happen at least one RTT beforehand. 406 assert self.keepaliveInterval > self.keepaliveTimeout 407 408 # arrange to send a keepalive a little while before our deadline 409 when = self.keepaliveInterval - self.keepaliveTimeout 410 self.keepaliveTimer = reactor.callLater(when, self.doKeepalive) 411 # and check for activity too 412 self.activityTimer = reactor.callLater(self.keepaliveInterval, 413 self.checkActivity)
414
415 - def stopTimers(self):
416 if self.keepaliveTimer: 417 self.keepaliveTimer.cancel() 418 self.keepaliveTimer = None 419 if self.activityTimer: 420 self.activityTimer.cancel() 421 self.activityTimer = None
422
423 - def activity(self, res=None):
424 self.lastActivity = now()
425
426 - def doKeepalive(self):
427 # send the keepalive request. If it fails outright, the connection 428 # was already dropped, so just log and ignore. 429 self.keepaliveTimer = None 430 log.msg("sending app-level keepalive") 431 d = self.perspective.callRemote("keepalive") 432 d.addCallback(self.activity) 433 d.addErrback(self.keepaliveLost)
434
435 - def keepaliveLost(self, f):
436 log.msg("BotFactory.keepaliveLost")
437
438 - def checkActivity(self):
439 self.activityTimer = None 440 if self.lastActivity + self.keepaliveInterval < now(): 441 log.msg("BotFactory.checkActivity: nothing from master for " 442 "%d secs" % (now() - self.lastActivity)) 443 self.perspective.broker.transport.loseConnection() 444 return 445 self.startTimers()
446
447 - def stopFactory(self):
450 451
452 -class BuildSlave(service.MultiService):
453 - def __init__(self, buildmaster_host, port, name, passwd, basedir, 454 keepalive, usePTY, keepaliveTimeout=30, umask=None, 455 maxdelay=300, unicode_encoding=None, allow_shutdown=None):
456 log.msg("Creating BuildSlave -- version: %s" % buildslave.version) 457 self.recordHostname(basedir) 458 service.MultiService.__init__(self) 459 bot = Bot(basedir, usePTY, unicode_encoding=unicode_encoding) 460 bot.setServiceParent(self) 461 self.bot = bot 462 if keepalive == 0: 463 keepalive = None 464 self.umask = umask 465 466 if allow_shutdown == 'signal': 467 if not hasattr(signal, 'SIGHUP'): 468 raise ValueError("Can't install signal handler") 469 elif allow_shutdown == 'file': 470 self.shutdown_file = os.path.join(basedir, 'shutdown.stamp') 471 self.shutdown_mtime = 0 472 473 self.allow_shutdown = allow_shutdown 474 bf = self.bf = BotFactory(buildmaster_host, port, keepalive, keepaliveTimeout, maxdelay) 475 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot) 476 self.connection = c = internet.TCPClient(buildmaster_host, port, bf) 477 c.setServiceParent(self)
478
479 - def recordHostname(self, basedir):
480 "Record my hostname in twistd.hostname, for user convenience" 481 log.msg("recording hostname in twistd.hostname") 482 filename = os.path.join(basedir, "twistd.hostname") 483 try: 484 open(filename, "w").write("%s\n" % socket.getfqdn()) 485 except: 486 log.msg("failed - ignoring")
487
488 - def startService(self):
489 if self.umask is not None: 490 os.umask(self.umask) 491 service.MultiService.startService(self) 492 493 if self.allow_shutdown == 'signal': 494 log.msg("Setting up SIGHUP handler to initiate shutdown") 495 signal.signal(signal.SIGHUP, self._handleSIGHUP) 496 elif self.allow_shutdown == 'file': 497 log.msg("Watching %s's mtime to initiate shutdown" % self.shutdown_file) 498 if os.path.exists(self.shutdown_file): 499 self.shutdown_mtime = os.path.getmtime(self.shutdown_file) 500 l = task.LoopingCall(self._checkShutdownFile) 501 l.start(interval=10)
502
503 - def stopService(self):
504 self.bf.continueTrying = 0 505 self.bf.stopTrying() 506 service.MultiService.stopService(self)
507
508 - def _handleSIGHUP(self, *args):
509 log.msg("Initiating shutdown because we got SIGHUP") 510 return self.gracefulShutdown()
511
512 - def _checkShutdownFile(self):
513 if os.path.exists(self.shutdown_file) and \ 514 os.path.getmtime(self.shutdown_file) > self.shutdown_mtime: 515 log.msg("Initiating shutdown because %s was touched" % self.shutdown_file) 516 self.gracefulShutdown() 517 518 # In case the shutdown fails, update our mtime so we don't keep 519 # trying to shutdown over and over again. 520 # We do want to be able to try again later if the master is 521 # restarted, so we'll keep monitoring the mtime. 522 self.shutdown_mtime = os.path.getmtime(self.shutdown_file)
523
524 - def gracefulShutdown(self):
525 """Start shutting down""" 526 if not self.bf.perspective: 527 log.msg("No active connection, shutting down NOW") 528 reactor.stop() 529 530 log.msg("Telling the master we want to shutdown after any running builds are finished") 531 d = self.bf.perspective.callRemote("shutdown") 532 def _shutdownfailed(err): 533 if err.check(AttributeError): 534 log.msg("Master does not support slave initiated shutdown. Upgrade master to 0.8.3 or later to use this feature.") 535 else: 536 log.msg('callRemote("shutdown") failed') 537 log.err(err)
538 539 d.addErrback(_shutdownfailed) 540 return d
541