Package buildslave :: Module bot
[frames] | no frames]

Source Code for Module buildslave.bot

  1  import os.path 
  2  import sys 
  3   
  4  from twisted.spread import pb 
  5  from twisted.python import log 
  6  from twisted.internet import reactor, defer 
  7  from twisted.application import service, internet 
  8  from twisted.cred import credentials 
  9   
 10  import buildslave 
 11  from buildslave.util import now 
 12  from buildslave.pbutil import ReconnectingPBClientFactory 
 13  from buildslave.commands import registry 
 14   
 15  # make sure the standard commands get registered. This import is performed 
 16  # for its side-effects. 
 17  from buildslave.commands import base, transfer, vcs 
 18   
19 -class NoCommandRunning(pb.Error):
20 pass
21 -class WrongCommandRunning(pb.Error):
22 pass
23 -class UnknownCommand(pb.Error):
24 pass
25
26 -class Master:
27 - def __init__(self, host, port, username, password):
28 self.host = host 29 self.port = port 30 self.username = username 31 self.password = password
32
33 -class SlaveBuild:
34 35 """This is an object that can hold state from one step to another in the 36 same build. All SlaveCommands have access to it. 37 """
38 - def __init__(self, builder):
39 self.builder = builder
40
41 -class SlaveBuilder(pb.Referenceable, service.Service):
42 43 """This is the local representation of a single Builder: it handles a 44 single kind of build (like an all-warnings build). It has a name and a 45 home directory. The rest of its behavior is determined by the master. 46 """ 47 48 stopCommandOnShutdown = True 49 50 # remote is a ref to the Builder object on the master side, and is set 51 # when they attach. We use it to detect when the connection to the master 52 # is severed. 53 remote = None 54 55 # .build points to a SlaveBuild object, a new one for each build 56 build = None 57 58 # .command points to a SlaveCommand instance, and is set while the step 59 # is running. We use it to implement the stopBuild method. 60 command = None 61 62 # .remoteStep is a ref to the master-side BuildStep object, and is set 63 # when the step is started 64 remoteStep = None 65
66 - def __init__(self, name, not_really):
67 #service.Service.__init__(self) # Service has no __init__ method 68 self.setName(name) 69 self.not_really = not_really
70
71 - def __repr__(self):
72 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self))
73
74 - def setServiceParent(self, parent):
75 service.Service.setServiceParent(self, parent) 76 self.bot = self.parent
77 # note that self.parent will go away when the buildmaster's config 78 # file changes and this Builder is removed (possibly because it has 79 # been changed, so the Builder will be re-added again in a moment). 80 # This may occur during a build, while a step is running. 81
82 - def setBuilddir(self, builddir):
83 assert self.parent 84 self.builddir = builddir 85 self.basedir = os.path.join(self.bot.basedir, self.builddir) 86 if not os.path.isdir(self.basedir): 87 os.makedirs(self.basedir)
88
89 - def stopService(self):
90 service.Service.stopService(self) 91 if self.stopCommandOnShutdown: 92 self.stopCommand()
93
94 - def activity(self):
95 bot = self.parent 96 if bot: 97 buildslave = bot.parent 98 if buildslave: 99 bf = buildslave.bf 100 bf.activity()
101
102 - def remote_setMaster(self, remote):
103 self.remote = remote 104 self.remote.notifyOnDisconnect(self.lostRemote)
105 - def remote_print(self, message):
106 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" % 107 (self.name, message)) 108 if message == "ping": 109 return self.remote_ping()
110
111 - def remote_ping(self):
112 log.msg("SlaveBuilder.remote_ping(%s)" % self) 113 if self.bot and self.bot.parent: 114 debugOpts = self.bot.parent.debugOpts 115 if debugOpts.get("stallPings"): 116 log.msg(" debug_stallPings") 117 timeout, timers = debugOpts["stallPings"] 118 d = defer.Deferred() 119 t = reactor.callLater(timeout, d.callback, None) 120 timers.append(t) 121 return d 122 if debugOpts.get("failPingOnce"): 123 log.msg(" debug_failPingOnce") 124 class FailPingError(pb.Error): pass 125 del debugOpts['failPingOnce'] 126 raise FailPingError("debug_failPingOnce means we should fail")
127
128 - def lostRemote(self, remote):
129 log.msg("lost remote") 130 self.remote = None
131
132 - def lostRemoteStep(self, remotestep):
133 log.msg("lost remote step") 134 self.remoteStep = None 135 if self.stopCommandOnShutdown: 136 self.stopCommand()
137 138 # the following are Commands that can be invoked by the master-side 139 # Builder
140 - def remote_startBuild(self):
141 """This is invoked before the first step of any new build is run. It 142 creates a new SlaveBuild object, which holds slave-side state from 143 one step to the next.""" 144 self.build = SlaveBuild(self) 145 log.msg("%s.startBuild" % self)
146
147 - def remote_startCommand(self, stepref, stepId, command, args):
148 """ 149 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as 150 part of various master-side BuildSteps, to start various commands 151 that actually do the build. I return nothing. Eventually I will call 152 .commandComplete() to notify the master-side RemoteCommand that I'm 153 done. 154 """ 155 156 self.activity() 157 158 if self.command: 159 log.msg("leftover command, dropping it") 160 self.stopCommand() 161 162 try: 163 factory, version = registry.commandRegistry[command] 164 except KeyError: 165 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command 166 self.command = factory(self, stepId, args) 167 168 log.msg(" startCommand:%s [id %s]" % (command,stepId)) 169 self.remoteStep = stepref 170 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep) 171 d = self.command.doStart() 172 d.addCallback(lambda res: None) 173 d.addBoth(self.commandComplete) 174 return None
175
176 - def remote_interruptCommand(self, stepId, why):
177 """Halt the current step.""" 178 log.msg("asked to interrupt current command: %s" % why) 179 self.activity() 180 if not self.command: 181 # TODO: just log it, a race could result in their interrupting a 182 # command that wasn't actually running 183 log.msg(" .. but none was running") 184 return 185 self.command.doInterrupt()
186 187
188 - def stopCommand(self):
189 """Make any currently-running command die, with no further status 190 output. This is used when the buildslave is shutting down or the 191 connection to the master has been lost. Interrupt the command, 192 silence it, and then forget about it.""" 193 if not self.command: 194 return 195 log.msg("stopCommand: halting current command %s" % self.command) 196 self.command.doInterrupt() # shut up! and die! 197 self.command = None # forget you!
198 199 # sendUpdate is invoked by the Commands we spawn
200 - def sendUpdate(self, data):
201 """This sends the status update to the master-side 202 L{buildbot.process.step.RemoteCommand} object, giving it a sequence 203 number in the process. It adds the update to a queue, and asks the 204 master to acknowledge the update so it can be removed from that 205 queue.""" 206 207 if not self.running: 208 # .running comes from service.Service, and says whether the 209 # service is running or not. If we aren't running, don't send any 210 # status messages. 211 return 212 # the update[1]=0 comes from the leftover 'updateNum', which the 213 # master still expects to receive. Provide it to avoid significant 214 # interoperability issues between new slaves and old masters. 215 if self.remoteStep: 216 update = [data, 0] 217 updates = [update] 218 d = self.remoteStep.callRemote("update", updates) 219 d.addCallback(self.ackUpdate) 220 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
221
222 - def ackUpdate(self, acknum):
223 self.activity() # update the "last activity" timer
224
225 - def ackComplete(self, dummy):
226 self.activity() # update the "last activity" timer
227
228 - def _ackFailed(self, why, where):
229 log.msg("SlaveBuilder._ackFailed:", where)
230 #log.err(why) # we don't really care 231 232 233 # this is fired by the Deferred attached to each Command
234 - def commandComplete(self, failure):
235 if failure: 236 log.msg("SlaveBuilder.commandFailed", self.command) 237 log.err(failure) 238 # failure, if present, is a failure.Failure. To send it across 239 # the wire, we must turn it into a pb.CopyableFailure. 240 failure = pb.CopyableFailure(failure) 241 failure.unsafeTracebacks = True 242 else: 243 # failure is None 244 log.msg("SlaveBuilder.commandComplete", self.command) 245 self.command = None 246 if not self.running: 247 log.msg(" but we weren't running, quitting silently") 248 return 249 if self.remoteStep: 250 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep) 251 d = self.remoteStep.callRemote("complete", failure) 252 d.addCallback(self.ackComplete) 253 d.addErrback(self._ackFailed, "sendComplete") 254 self.remoteStep = None
255 256
257 - def remote_shutdown(self):
258 print "slave shutting down on command from master" 259 reactor.stop()
260 261
262 -class Bot(pb.Referenceable, service.MultiService):
263 """I represent the slave-side bot.""" 264 usePTY = None 265 name = "bot" 266
267 - def __init__(self, basedir, usePTY, not_really=0, unicode_encoding=None):
268 service.MultiService.__init__(self) 269 self.basedir = basedir 270 self.usePTY = usePTY 271 self.not_really = not_really 272 self.unicode_encoding = unicode_encoding or sys.getfilesystemencoding() or 'ascii' 273 self.builders = {}
274
275 - def startService(self):
276 assert os.path.isdir(self.basedir) 277 service.MultiService.startService(self)
278
279 - def remote_getDirs(self):
280 return filter(lambda d: os.path.isdir(d), os.listdir(self.basedir))
281
282 - def remote_getCommands(self):
283 commands = {} 284 for name, (factory, version) in registry.commandRegistry.items(): 285 commands[name] = version 286 return commands
287
288 - def remote_setBuilderList(self, wanted):
289 retval = {} 290 wanted_dirs = ["info"] 291 for (name, builddir) in wanted: 292 wanted_dirs.append(builddir) 293 b = self.builders.get(name, None) 294 if b: 295 if b.builddir != builddir: 296 log.msg("changing builddir for builder %s from %s to %s" \ 297 % (name, b.builddir, builddir)) 298 b.setBuilddir(builddir) 299 else: 300 b = SlaveBuilder(name, self.not_really) 301 b.usePTY = self.usePTY 302 b.unicode_encoding = self.unicode_encoding 303 b.setServiceParent(self) 304 b.setBuilddir(builddir) 305 self.builders[name] = b 306 retval[name] = b 307 for name in self.builders.keys(): 308 if not name in map(lambda a: a[0], wanted): 309 log.msg("removing old builder %s" % name) 310 self.builders[name].disownServiceParent() 311 del(self.builders[name]) 312 313 for d in os.listdir(self.basedir): 314 if os.path.isdir(d): 315 if d not in wanted_dirs: 316 log.msg("I have a leftover directory '%s' that is not " 317 "being used by the buildmaster: you can delete " 318 "it now" % d) 319 return retval
320
321 - def remote_print(self, message):
322 log.msg("message from master:", message)
323
324 - def remote_getSlaveInfo(self):
325 """This command retrieves data from the files in SLAVEDIR/info/* and 326 sends the contents to the buildmaster. These are used to describe 327 the slave and its configuration, and should be created and 328 maintained by the slave administrator. They will be retrieved each 329 time the master-slave connection is established. 330 """ 331 332 files = {} 333 basedir = os.path.join(self.basedir, "info") 334 if not os.path.isdir(basedir): 335 return files 336 for f in os.listdir(basedir): 337 filename = os.path.join(basedir, f) 338 if os.path.isfile(filename): 339 files[f] = open(filename, "r").read() 340 return files
341
342 - def remote_getVersion(self):
343 """Send our version back to the Master""" 344 return buildslave.version
345 346 347
348 -class BotFactory(ReconnectingPBClientFactory):
349 # 'keepaliveInterval' serves two purposes. The first is to keep the 350 # connection alive: it guarantees that there will be at least some 351 # traffic once every 'keepaliveInterval' seconds, which may help keep an 352 # interposed NAT gateway from dropping the address mapping because it 353 # thinks the connection has been abandoned. The second is to put an upper 354 # limit on how long the buildmaster might have gone away before we notice 355 # it. For this second purpose, we insist upon seeing *some* evidence of 356 # the buildmaster at least once every 'keepaliveInterval' seconds. 357 keepaliveInterval = None # None = do not use keepalives 358 359 # 'keepaliveTimeout' seconds before the interval expires, we will send a 360 # keepalive request, both to add some traffic to the connection, and to 361 # prompt a response from the master in case all our builders are idle. We 362 # don't insist upon receiving a timely response from this message: a slow 363 # link might put the request at the wrong end of a large build message. 364 keepaliveTimeout = 30 # how long we will go without a response 365 366 # 'maxDelay' determines the maximum amount of time the slave will wait 367 # between connection retries 368 maxDelay = 300 369 370 keepaliveTimer = None 371 activityTimer = None 372 lastActivity = 0 373 unsafeTracebacks = 1 374 perspective = None 375
376 - def __init__(self, keepaliveInterval, keepaliveTimeout, maxDelay):
381
382 - def startedConnecting(self, connector):
385
386 - def gotPerspective(self, perspective):
387 ReconnectingPBClientFactory.gotPerspective(self, perspective) 388 self.perspective = perspective 389 try: 390 perspective.broker.transport.setTcpKeepAlive(1) 391 except: 392 log.msg("unable to set SO_KEEPALIVE") 393 if not self.keepaliveInterval: 394 self.keepaliveInterval = 10*60 395 self.activity() 396 if self.keepaliveInterval: 397 log.msg("sending application-level keepalives every %d seconds" \ 398 % self.keepaliveInterval) 399 self.startTimers()
400
401 - def clientConnectionFailed(self, connector, reason):
405
406 - def clientConnectionLost(self, connector, reason):
407 self.connector = None 408 self.stopTimers() 409 self.perspective = None 410 ReconnectingPBClientFactory.clientConnectionLost(self, 411 connector, reason)
412
413 - def startTimers(self):
414 assert self.keepaliveInterval 415 assert not self.keepaliveTimer 416 assert not self.activityTimer 417 # Insist that doKeepalive fires before checkActivity. Really, it 418 # needs to happen at least one RTT beforehand. 419 assert self.keepaliveInterval > self.keepaliveTimeout 420 421 # arrange to send a keepalive a little while before our deadline 422 when = self.keepaliveInterval - self.keepaliveTimeout 423 self.keepaliveTimer = reactor.callLater(when, self.doKeepalive) 424 # and check for activity too 425 self.activityTimer = reactor.callLater(self.keepaliveInterval, 426 self.checkActivity)
427
428 - def stopTimers(self):
429 if self.keepaliveTimer: 430 self.keepaliveTimer.cancel() 431 self.keepaliveTimer = None 432 if self.activityTimer: 433 self.activityTimer.cancel() 434 self.activityTimer = None
435
436 - def activity(self, res=None):
437 self.lastActivity = now()
438
439 - def doKeepalive(self):
440 # send the keepalive request. If it fails outright, the connection 441 # was already dropped, so just log and ignore. 442 self.keepaliveTimer = None 443 log.msg("sending app-level keepalive") 444 d = self.perspective.callRemote("keepalive") 445 d.addCallback(self.activity) 446 d.addErrback(self.keepaliveLost)
447
448 - def keepaliveLost(self, f):
449 log.msg("BotFactory.keepaliveLost")
450
451 - def checkActivity(self):
452 self.activityTimer = None 453 if self.lastActivity + self.keepaliveInterval < now(): 454 log.msg("BotFactory.checkActivity: nothing from master for " 455 "%d secs" % (now() - self.lastActivity)) 456 self.perspective.broker.transport.loseConnection() 457 return 458 self.startTimers()
459
460 - def stopFactory(self):
463 464
465 -class BuildSlave(service.MultiService):
466 botClass = Bot 467 468 # debugOpts is a dictionary used during unit tests. 469 470 # debugOpts['stallPings'] can be set to a tuple of (timeout, []). Any 471 # calls to remote_print will stall for 'timeout' seconds before 472 # returning. The DelayedCalls used to implement this are stashed in the 473 # list so they can be cancelled later. 474 475 # debugOpts['failPingOnce'] can be set to True to make the slaveping fail 476 # exactly once. 477
478 - def __init__(self, buildmaster_host, port, name, passwd, basedir, 479 keepalive, usePTY, keepaliveTimeout=30, umask=None, 480 maxdelay=300, debugOpts={}, unicode_encoding=None):
481 log.msg("Creating BuildSlave -- version: %s" % buildslave.version) 482 service.MultiService.__init__(self) 483 self.debugOpts = debugOpts.copy() 484 bot = self.botClass(basedir, usePTY, unicode_encoding=unicode_encoding) 485 bot.setServiceParent(self) 486 self.bot = bot 487 if keepalive == 0: 488 keepalive = None 489 self.umask = umask 490 bf = self.bf = BotFactory(keepalive, keepaliveTimeout, maxdelay) 491 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot) 492 self.connection = c = internet.TCPClient(buildmaster_host, port, bf) 493 c.setServiceParent(self)
494
495 - def waitUntilDisconnected(self):
496 # utility method for testing. Returns a Deferred that will fire when 497 # we lose the connection to the master. 498 if not self.bf.perspective: 499 return defer.succeed(None) 500 d = defer.Deferred() 501 self.bf.perspective.notifyOnDisconnect(lambda res: d.callback(None)) 502 return d
503
504 - def startService(self):
505 if self.umask is not None: 506 os.umask(self.umask) 507 service.MultiService.startService(self)
508
509 - def stopService(self):
510 self.bf.continueTrying = 0 511 self.bf.stopTrying() 512 service.MultiService.stopService(self)
513