Package buildslave :: Module bot
[frames] | no frames]

Source Code for Module buildslave.bot

  1  # This file is part of Buildbot.  Buildbot is free software: you can 
  2  # redistribute it and/or modify it under the terms of the GNU General Public 
  3  # License as published by the Free Software Foundation, version 2. 
  4  # 
  5  # This program is distributed in the hope that it will be useful, but WITHOUT 
  6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
  8  # details. 
  9  # 
 10  # You should have received a copy of the GNU General Public License along with 
 11  # this program; if not, write to the Free Software Foundation, Inc., 51 
 12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 13  # 
 14  # Copyright Buildbot Team Members 
 15   
 16  import os.path 
 17  import socket 
 18  import sys 
 19  import signal 
 20   
 21  from twisted.spread import pb 
 22  from twisted.python import log 
 23  from twisted.internet import error, reactor, task 
 24  from twisted.application import service, internet 
 25  from twisted.cred import credentials 
 26   
 27  import buildslave 
 28  from buildslave.pbutil import ReconnectingPBClientFactory 
 29  from buildslave.commands import registry, base 
 30  from buildslave import monkeypatches 
 31   
32 -class UnknownCommand(pb.Error):
33 pass
34
35 -class SlaveBuilder(pb.Referenceable, service.Service):
36 37 """This is the local representation of a single Builder: it handles a 38 single kind of build (like an all-warnings build). It has a name and a 39 home directory. The rest of its behavior is determined by the master. 40 """ 41 42 stopCommandOnShutdown = True 43 44 # remote is a ref to the Builder object on the master side, and is set 45 # when they attach. We use it to detect when the connection to the master 46 # is severed. 47 remote = None 48 49 # .command points to a SlaveCommand instance, and is set while the step 50 # is running. We use it to implement the stopBuild method. 51 command = None 52 53 # .remoteStep is a ref to the master-side BuildStep object, and is set 54 # when the step is started 55 remoteStep = None 56
57 - def __init__(self, name):
58 #service.Service.__init__(self) # Service has no __init__ method 59 self.setName(name)
60
61 - def __repr__(self):
62 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self))
63
64 - def setServiceParent(self, parent):
65 service.Service.setServiceParent(self, parent) 66 self.bot = self.parent
67 # note that self.parent will go away when the buildmaster's config 68 # file changes and this Builder is removed (possibly because it has 69 # been changed, so the Builder will be re-added again in a moment). 70 # This may occur during a build, while a step is running. 71
72 - def setBuilddir(self, builddir):
73 assert self.parent 74 self.builddir = builddir 75 self.basedir = os.path.join(self.bot.basedir, self.builddir) 76 if not os.path.isdir(self.basedir): 77 os.makedirs(self.basedir)
78
79 - def stopService(self):
80 service.Service.stopService(self) 81 if self.stopCommandOnShutdown: 82 self.stopCommand()
83
84 - def activity(self):
85 bot = self.parent 86 if bot: 87 bslave = bot.parent 88 if bslave: 89 bf = bslave.bf 90 bf.activity()
91
92 - def remote_setMaster(self, remote):
93 self.remote = remote 94 self.remote.notifyOnDisconnect(self.lostRemote)
95
96 - def remote_print(self, message):
97 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" % 98 (self.name, message))
99
100 - def lostRemote(self, remote):
101 log.msg("lost remote") 102 self.remote = None
103
104 - def lostRemoteStep(self, remotestep):
105 log.msg("lost remote step") 106 self.remoteStep = None 107 if self.stopCommandOnShutdown: 108 self.stopCommand()
109 110 # the following are Commands that can be invoked by the master-side 111 # Builder
112 - def remote_startBuild(self):
113 """This is invoked before the first step of any new build is run. It 114 doesn't do much, but masters call it so it's still here.""" 115 pass
116
117 - def remote_startCommand(self, stepref, stepId, command, args):
118 """ 119 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as 120 part of various master-side BuildSteps, to start various commands 121 that actually do the build. I return nothing. Eventually I will call 122 .commandComplete() to notify the master-side RemoteCommand that I'm 123 done. 124 """ 125 126 self.activity() 127 128 if self.command: 129 log.msg("leftover command, dropping it") 130 self.stopCommand() 131 132 try: 133 factory = registry.getFactory(command) 134 except KeyError: 135 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command 136 self.command = factory(self, stepId, args) 137 138 log.msg(" startCommand:%s [id %s]" % (command,stepId)) 139 self.remoteStep = stepref 140 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep) 141 d = self.command.doStart() 142 d.addCallback(lambda res: None) 143 d.addBoth(self.commandComplete) 144 return None
145
146 - def remote_interruptCommand(self, stepId, why):
147 """Halt the current step.""" 148 log.msg("asked to interrupt current command: %s" % why) 149 self.activity() 150 if not self.command: 151 # TODO: just log it, a race could result in their interrupting a 152 # command that wasn't actually running 153 log.msg(" .. but none was running") 154 return 155 self.command.doInterrupt()
156 157
158 - def stopCommand(self):
159 """Make any currently-running command die, with no further status 160 output. This is used when the buildslave is shutting down or the 161 connection to the master has been lost. Interrupt the command, 162 silence it, and then forget about it.""" 163 if not self.command: 164 return 165 log.msg("stopCommand: halting current command %s" % self.command) 166 self.command.doInterrupt() # shut up! and die! 167 self.command = None # forget you!
168 169 # sendUpdate is invoked by the Commands we spawn
170 - def sendUpdate(self, data):
171 """This sends the status update to the master-side 172 L{buildbot.process.step.RemoteCommand} object, giving it a sequence 173 number in the process. It adds the update to a queue, and asks the 174 master to acknowledge the update so it can be removed from that 175 queue.""" 176 177 if not self.running: 178 # .running comes from service.Service, and says whether the 179 # service is running or not. If we aren't running, don't send any 180 # status messages. 181 return 182 # the update[1]=0 comes from the leftover 'updateNum', which the 183 # master still expects to receive. Provide it to avoid significant 184 # interoperability issues between new slaves and old masters. 185 if self.remoteStep: 186 update = [data, 0] 187 updates = [update] 188 d = self.remoteStep.callRemote("update", updates) 189 d.addCallback(self.ackUpdate) 190 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
191
192 - def ackUpdate(self, acknum):
193 self.activity() # update the "last activity" timer
194
195 - def ackComplete(self, dummy):
196 self.activity() # update the "last activity" timer
197
198 - def _ackFailed(self, why, where):
199 log.msg("SlaveBuilder._ackFailed:", where) 200 log.err(why) # we don't really care
201 202 203 # this is fired by the Deferred attached to each Command
204 - def commandComplete(self, failure):
205 if failure: 206 log.msg("SlaveBuilder.commandFailed", self.command) 207 log.err(failure) 208 # failure, if present, is a failure.Failure. To send it across 209 # the wire, we must turn it into a pb.CopyableFailure. 210 failure = pb.CopyableFailure(failure) 211 failure.unsafeTracebacks = True 212 else: 213 # failure is None 214 log.msg("SlaveBuilder.commandComplete", self.command) 215 self.command = None 216 if not self.running: 217 log.msg(" but we weren't running, quitting silently") 218 return 219 if self.remoteStep: 220 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep) 221 d = self.remoteStep.callRemote("complete", failure) 222 d.addCallback(self.ackComplete) 223 d.addErrback(self._ackFailed, "sendComplete") 224 self.remoteStep = None
225 226
227 - def remote_shutdown(self):
228 log.msg("slave shutting down on command from master") 229 log.msg("NOTE: master is using deprecated slavebuilder.shutdown method") 230 reactor.stop()
231 232
233 -class Bot(pb.Referenceable, service.MultiService):
234 """I represent the slave-side bot.""" 235 usePTY = None 236 name = "bot" 237
238 - def __init__(self, basedir, usePTY, unicode_encoding=None):
239 service.MultiService.__init__(self) 240 self.basedir = basedir 241 self.usePTY = usePTY 242 self.unicode_encoding = unicode_encoding or sys.getfilesystemencoding() or 'ascii' 243 self.builders = {}
244
245 - def startService(self):
246 assert os.path.isdir(self.basedir) 247 service.MultiService.startService(self)
248
249 - def remote_getCommands(self):
250 commands = dict([ 251 (n, base.command_version) 252 for n in registry.getAllCommandNames() 253 ]) 254 return commands
255
256 - def remote_setBuilderList(self, wanted):
257 retval = {} 258 wanted_dirs = ["info"] 259 for (name, builddir) in wanted: 260 wanted_dirs.append(builddir) 261 b = self.builders.get(name, None) 262 if b: 263 if b.builddir != builddir: 264 log.msg("changing builddir for builder %s from %s to %s" \ 265 % (name, b.builddir, builddir)) 266 b.setBuilddir(builddir) 267 else: 268 b = SlaveBuilder(name) 269 b.usePTY = self.usePTY 270 b.unicode_encoding = self.unicode_encoding 271 b.setServiceParent(self) 272 b.setBuilddir(builddir) 273 self.builders[name] = b 274 retval[name] = b 275 for name in self.builders.keys(): 276 if not name in map(lambda a: a[0], wanted): 277 log.msg("removing old builder %s" % name) 278 self.builders[name].disownServiceParent() 279 del(self.builders[name]) 280 281 for d in os.listdir(self.basedir): 282 if os.path.isdir(os.path.join(self.basedir, d)): 283 if d not in wanted_dirs: 284 log.msg("I have a leftover directory '%s' that is not " 285 "being used by the buildmaster: you can delete " 286 "it now" % d) 287 return retval
288
289 - def remote_print(self, message):
290 log.msg("message from master:", message)
291
292 - def remote_getSlaveInfo(self):
293 """This command retrieves data from the files in SLAVEDIR/info/* and 294 sends the contents to the buildmaster. These are used to describe 295 the slave and its configuration, and should be created and 296 maintained by the slave administrator. They will be retrieved each 297 time the master-slave connection is established. 298 """ 299 300 files = {} 301 basedir = os.path.join(self.basedir, "info") 302 if os.path.isdir(basedir): 303 for f in os.listdir(basedir): 304 filename = os.path.join(basedir, f) 305 if os.path.isfile(filename): 306 files[f] = open(filename, "r").read() 307 files['environ'] = os.environ.copy() 308 files['system'] = os.name 309 files['basedir'] = self.basedir 310 return files
311
312 - def remote_getVersion(self):
313 """Send our version back to the Master""" 314 return buildslave.version
315
316 - def remote_shutdown(self):
317 log.msg("slave shutting down on command from master") 318 # there's no good way to learn that the PB response has been delivered, 319 # so we'll just wait a bit, in hopes the master hears back. Masters are 320 # resilinet to slaves dropping their connections, so there is no harm 321 # if this timeout is too short. 322 reactor.callLater(0.2, reactor.stop)
323
324 -class BotFactory(ReconnectingPBClientFactory):
325 # 'keepaliveInterval' serves two purposes. The first is to keep the 326 # connection alive: it guarantees that there will be at least some 327 # traffic once every 'keepaliveInterval' seconds, which may help keep an 328 # interposed NAT gateway from dropping the address mapping because it 329 # thinks the connection has been abandoned. This also gives the operating 330 # system a chance to notice that the master has gone away, and inform us 331 # of such (although this could take several minutes). 332 keepaliveInterval = None # None = do not use keepalives 333 334 # 'maxDelay' determines the maximum amount of time the slave will wait 335 # between connection retries 336 maxDelay = 300 337 338 keepaliveTimer = None 339 unsafeTracebacks = 1 340 perspective = None 341 342 # for tests 343 _reactor = reactor 344
345 - def __init__(self, buildmaster_host, port, keepaliveInterval, maxDelay):
346 ReconnectingPBClientFactory.__init__(self) 347 self.maxDelay = maxDelay 348 self.keepaliveInterval = keepaliveInterval 349 # NOTE: this class does not actually make the TCP connections - this information is 350 # only here to print useful error messages 351 self.buildmaster_host = buildmaster_host 352 self.port = port
353
354 - def startedConnecting(self, connector):
355 log.msg("Connecting to %s:%s" % (self.buildmaster_host, self.port)) 356 ReconnectingPBClientFactory.startedConnecting(self, connector) 357 self.connector = connector
358
359 - def gotPerspective(self, perspective):
360 log.msg("Connected to %s:%s; slave is ready" % (self.buildmaster_host, self.port)) 361 ReconnectingPBClientFactory.gotPerspective(self, perspective) 362 self.perspective = perspective 363 try: 364 perspective.broker.transport.setTcpKeepAlive(1) 365 except: 366 log.msg("unable to set SO_KEEPALIVE") 367 if not self.keepaliveInterval: 368 self.keepaliveInterval = 10*60 369 self.activity() 370 if self.keepaliveInterval: 371 log.msg("sending application-level keepalives every %d seconds" \ 372 % self.keepaliveInterval) 373 self.startTimers()
374
375 - def clientConnectionFailed(self, connector, reason):
376 self.connector = None 377 why = reason 378 if reason.check(error.ConnectionRefusedError): 379 why = "Connection Refused" 380 log.msg("Connection to %s:%s failed: %s" % (self.buildmaster_host, self.port, why)) 381 ReconnectingPBClientFactory.clientConnectionFailed(self, 382 connector, reason)
383
384 - def clientConnectionLost(self, connector, reason):
385 log.msg("Lost connection to %s:%s" % (self.buildmaster_host, self.port)) 386 self.connector = None 387 self.stopTimers() 388 self.perspective = None 389 ReconnectingPBClientFactory.clientConnectionLost(self, 390 connector, reason)
391
392 - def startTimers(self):
393 assert self.keepaliveInterval 394 assert not self.keepaliveTimer 395 396 def doKeepalive(): 397 self.keepaliveTimer = None 398 self.startTimers() 399 400 # Send the keepalive request. If an error occurs 401 # was already dropped, so just log and ignore. 402 log.msg("sending app-level keepalive") 403 d = self.perspective.callRemote("keepalive") 404 d.addErrback(log.err, "eror sending keepalive")
405 self.keepaliveTimer = self._reactor.callLater(self.keepaliveInterval, 406 doKeepalive)
407
408 - def stopTimers(self):
409 if self.keepaliveTimer: 410 self.keepaliveTimer.cancel() 411 self.keepaliveTimer = None
412
413 - def activity(self, res=None):
414 """Subclass or monkey-patch this method to be alerted whenever there is 415 active communication between the master and slave.""" 416 pass
417
418 - def stopFactory(self):
419 ReconnectingPBClientFactory.stopFactory(self) 420 self.stopTimers()
421 422
423 -class BuildSlave(service.MultiService):
424 - def __init__(self, buildmaster_host, port, name, passwd, basedir, 425 keepalive, usePTY, keepaliveTimeout=None, umask=None, 426 maxdelay=300, unicode_encoding=None, allow_shutdown=None):
427 428 # note: keepaliveTimeout is ignored, but preserved here for 429 # backward-compatibility 430 431 service.MultiService.__init__(self) 432 bot = Bot(basedir, usePTY, unicode_encoding=unicode_encoding) 433 bot.setServiceParent(self) 434 self.bot = bot 435 if keepalive == 0: 436 keepalive = None 437 self.umask = umask 438 self.basedir = basedir 439 440 self.shutdown_loop = None 441 442 if allow_shutdown == 'signal': 443 if not hasattr(signal, 'SIGHUP'): 444 raise ValueError("Can't install signal handler") 445 elif allow_shutdown == 'file': 446 self.shutdown_file = os.path.join(basedir, 'shutdown.stamp') 447 self.shutdown_mtime = 0 448 449 self.allow_shutdown = allow_shutdown 450 bf = self.bf = BotFactory(buildmaster_host, port, keepalive, maxdelay) 451 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot) 452 self.connection = c = internet.TCPClient(buildmaster_host, port, bf) 453 c.setServiceParent(self)
454
455 - def startService(self):
456 # first, apply all monkeypatches 457 monkeypatches.patch_all() 458 459 log.msg("Starting BuildSlave -- version: %s" % buildslave.version) 460 461 self.recordHostname(self.basedir) 462 if self.umask is not None: 463 os.umask(self.umask) 464 465 service.MultiService.startService(self) 466 467 if self.allow_shutdown == 'signal': 468 log.msg("Setting up SIGHUP handler to initiate shutdown") 469 signal.signal(signal.SIGHUP, self._handleSIGHUP) 470 elif self.allow_shutdown == 'file': 471 log.msg("Watching %s's mtime to initiate shutdown" % self.shutdown_file) 472 if os.path.exists(self.shutdown_file): 473 self.shutdown_mtime = os.path.getmtime(self.shutdown_file) 474 self.shutdown_loop = l = task.LoopingCall(self._checkShutdownFile) 475 l.start(interval=10)
476
477 - def stopService(self):
478 self.bf.continueTrying = 0 479 self.bf.stopTrying() 480 if self.shutdown_loop: 481 self.shutdown_loop.stop() 482 self.shutdown_loop = None 483 return service.MultiService.stopService(self)
484
485 - def recordHostname(self, basedir):
486 "Record my hostname in twistd.hostname, for user convenience" 487 log.msg("recording hostname in twistd.hostname") 488 filename = os.path.join(basedir, "twistd.hostname") 489 490 try: 491 hostname = os.uname()[1] # only on unix 492 except AttributeError: 493 # this tends to fail on non-connected hosts, e.g., laptops 494 # on planes 495 hostname = socket.getfqdn() 496 497 try: 498 open(filename, "w").write("%s\n" % hostname) 499 except: 500 log.msg("failed - ignoring")
501
502 - def _handleSIGHUP(self, *args):
503 log.msg("Initiating shutdown because we got SIGHUP") 504 return self.gracefulShutdown()
505
506 - def _checkShutdownFile(self):
507 if os.path.exists(self.shutdown_file) and \ 508 os.path.getmtime(self.shutdown_file) > self.shutdown_mtime: 509 log.msg("Initiating shutdown because %s was touched" % self.shutdown_file) 510 self.gracefulShutdown() 511 512 # In case the shutdown fails, update our mtime so we don't keep 513 # trying to shutdown over and over again. 514 # We do want to be able to try again later if the master is 515 # restarted, so we'll keep monitoring the mtime. 516 self.shutdown_mtime = os.path.getmtime(self.shutdown_file)
517
518 - def gracefulShutdown(self):
519 """Start shutting down""" 520 if not self.bf.perspective: 521 log.msg("No active connection, shutting down NOW") 522 reactor.stop() 523 return 524 525 log.msg("Telling the master we want to shutdown after any running builds are finished") 526 d = self.bf.perspective.callRemote("shutdown") 527 def _shutdownfailed(err): 528 if err.check(AttributeError): 529 log.msg("Master does not support slave initiated shutdown. Upgrade master to 0.8.3 or later to use this feature.") 530 else: 531 log.msg('callRemote("shutdown") failed') 532 log.err(err)
533 534 d.addErrback(_shutdownfailed) 535 return d
536