Package buildbot :: Package slave :: Module bot
[frames] | no frames]

Source Code for Module buildbot.slave.bot

  1   
  2  import os.path 
  3  import sys 
  4   
  5  import buildbot 
  6   
  7  from twisted.spread import pb 
  8  from twisted.python import log 
  9  from twisted.internet import reactor, defer 
 10  from twisted.application import service, internet 
 11  from twisted.cred import credentials 
 12   
 13  from buildbot.util import now 
 14  from buildbot.pbutil import ReconnectingPBClientFactory 
 15  from buildbot.slave.commands import registry 
 16   
 17  # make sure the standard commands get registered. This import is performed 
 18  # for its side-effects. 
 19  from buildbot.slave.commands import base, transfer, vcs 
 20   
21 -class NoCommandRunning(pb.Error):
22 pass
23 -class WrongCommandRunning(pb.Error):
24 pass
25 -class UnknownCommand(pb.Error):
26 pass
27
28 -class Master:
29 - def __init__(self, host, port, username, password):
30 self.host = host 31 self.port = port 32 self.username = username 33 self.password = password
34
35 -class SlaveBuild:
36 37 """This is an object that can hold state from one step to another in the 38 same build. All SlaveCommands have access to it. 39 """
40 - def __init__(self, builder):
41 self.builder = builder
42
43 -class SlaveBuilder(pb.Referenceable, service.Service):
44 45 """This is the local representation of a single Builder: it handles a 46 single kind of build (like an all-warnings build). It has a name and a 47 home directory. The rest of its behavior is determined by the master. 48 """ 49 50 stopCommandOnShutdown = True 51 52 # remote is a ref to the Builder object on the master side, and is set 53 # when they attach. We use it to detect when the connection to the master 54 # is severed. 55 remote = None 56 57 # .build points to a SlaveBuild object, a new one for each build 58 build = None 59 60 # .command points to a SlaveCommand instance, and is set while the step 61 # is running. We use it to implement the stopBuild method. 62 command = None 63 64 # .remoteStep is a ref to the master-side BuildStep object, and is set 65 # when the step is started 66 remoteStep = None 67
68 - def __init__(self, name, not_really):
69 #service.Service.__init__(self) # Service has no __init__ method 70 self.setName(name) 71 self.not_really = not_really
72
73 - def __repr__(self):
74 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self))
75
76 - def setServiceParent(self, parent):
77 service.Service.setServiceParent(self, parent) 78 self.bot = self.parent
79 # note that self.parent will go away when the buildmaster's config 80 # file changes and this Builder is removed (possibly because it has 81 # been changed, so the Builder will be re-added again in a moment). 82 # This may occur during a build, while a step is running. 83
84 - def setBuilddir(self, builddir):
85 assert self.parent 86 self.builddir = builddir 87 self.basedir = os.path.join(self.bot.basedir, self.builddir) 88 if not os.path.isdir(self.basedir): 89 os.makedirs(self.basedir)
90
91 - def stopService(self):
92 service.Service.stopService(self) 93 if self.stopCommandOnShutdown: 94 self.stopCommand()
95
96 - def activity(self):
97 bot = self.parent 98 if bot: 99 buildslave = bot.parent 100 if buildslave: 101 bf = buildslave.bf 102 bf.activity()
103
104 - def remote_setMaster(self, remote):
105 self.remote = remote 106 self.remote.notifyOnDisconnect(self.lostRemote)
107 - def remote_print(self, message):
108 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" % 109 (self.name, message)) 110 if message == "ping": 111 return self.remote_ping()
112
113 - def remote_ping(self):
114 log.msg("SlaveBuilder.remote_ping(%s)" % self) 115 if self.bot and self.bot.parent: 116 debugOpts = self.bot.parent.debugOpts 117 if debugOpts.get("stallPings"): 118 log.msg(" debug_stallPings") 119 timeout, timers = debugOpts["stallPings"] 120 d = defer.Deferred() 121 t = reactor.callLater(timeout, d.callback, None) 122 timers.append(t) 123 return d 124 if debugOpts.get("failPingOnce"): 125 log.msg(" debug_failPingOnce") 126 class FailPingError(pb.Error): pass 127 del debugOpts['failPingOnce'] 128 raise FailPingError("debug_failPingOnce means we should fail")
129
130 - def lostRemote(self, remote):
131 log.msg("lost remote") 132 self.remote = None
133
134 - def lostRemoteStep(self, remotestep):
135 log.msg("lost remote step") 136 self.remoteStep = None 137 if self.stopCommandOnShutdown: 138 self.stopCommand()
139 140 # the following are Commands that can be invoked by the master-side 141 # Builder
142 - def remote_startBuild(self):
143 """This is invoked before the first step of any new build is run. It 144 creates a new SlaveBuild object, which holds slave-side state from 145 one step to the next.""" 146 self.build = SlaveBuild(self) 147 log.msg("%s.startBuild" % self)
148
149 - def remote_startCommand(self, stepref, stepId, command, args):
150 """ 151 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as 152 part of various master-side BuildSteps, to start various commands 153 that actually do the build. I return nothing. Eventually I will call 154 .commandComplete() to notify the master-side RemoteCommand that I'm 155 done. 156 """ 157 158 self.activity() 159 160 if self.command: 161 log.msg("leftover command, dropping it") 162 self.stopCommand() 163 164 try: 165 factory, version = registry.commandRegistry[command] 166 except KeyError: 167 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command 168 self.command = factory(self, stepId, args) 169 170 log.msg(" startCommand:%s [id %s]" % (command,stepId)) 171 self.remoteStep = stepref 172 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep) 173 d = self.command.doStart() 174 d.addCallback(lambda res: None) 175 d.addBoth(self.commandComplete) 176 return None
177
178 - def remote_interruptCommand(self, stepId, why):
179 """Halt the current step.""" 180 log.msg("asked to interrupt current command: %s" % why) 181 self.activity() 182 if not self.command: 183 # TODO: just log it, a race could result in their interrupting a 184 # command that wasn't actually running 185 log.msg(" .. but none was running") 186 return 187 self.command.doInterrupt()
188 189
190 - def stopCommand(self):
191 """Make any currently-running command die, with no further status 192 output. This is used when the buildslave is shutting down or the 193 connection to the master has been lost. Interrupt the command, 194 silence it, and then forget about it.""" 195 if not self.command: 196 return 197 log.msg("stopCommand: halting current command %s" % self.command) 198 self.command.doInterrupt() # shut up! and die! 199 self.command = None # forget you!
200 201 # sendUpdate is invoked by the Commands we spawn
202 - def sendUpdate(self, data):
203 """This sends the status update to the master-side 204 L{buildbot.process.step.RemoteCommand} object, giving it a sequence 205 number in the process. It adds the update to a queue, and asks the 206 master to acknowledge the update so it can be removed from that 207 queue.""" 208 209 if not self.running: 210 # .running comes from service.Service, and says whether the 211 # service is running or not. If we aren't running, don't send any 212 # status messages. 213 return 214 # the update[1]=0 comes from the leftover 'updateNum', which the 215 # master still expects to receive. Provide it to avoid significant 216 # interoperability issues between new slaves and old masters. 217 if self.remoteStep: 218 update = [data, 0] 219 updates = [update] 220 d = self.remoteStep.callRemote("update", updates) 221 d.addCallback(self.ackUpdate) 222 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
223
224 - def ackUpdate(self, acknum):
225 self.activity() # update the "last activity" timer
226
227 - def ackComplete(self, dummy):
228 self.activity() # update the "last activity" timer
229
230 - def _ackFailed(self, why, where):
231 log.msg("SlaveBuilder._ackFailed:", where)
232 #log.err(why) # we don't really care 233 234 235 # this is fired by the Deferred attached to each Command
236 - def commandComplete(self, failure):
237 if failure: 238 log.msg("SlaveBuilder.commandFailed", self.command) 239 log.err(failure) 240 # failure, if present, is a failure.Failure. To send it across 241 # the wire, we must turn it into a pb.CopyableFailure. 242 failure = pb.CopyableFailure(failure) 243 failure.unsafeTracebacks = True 244 else: 245 # failure is None 246 log.msg("SlaveBuilder.commandComplete", self.command) 247 self.command = None 248 if not self.running: 249 log.msg(" but we weren't running, quitting silently") 250 return 251 if self.remoteStep: 252 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep) 253 d = self.remoteStep.callRemote("complete", failure) 254 d.addCallback(self.ackComplete) 255 d.addErrback(self._ackFailed, "sendComplete") 256 self.remoteStep = None
257 258
259 - def remote_shutdown(self):
260 print "slave shutting down on command from master" 261 reactor.stop()
262 263
264 -class Bot(pb.Referenceable, service.MultiService):
265 """I represent the slave-side bot.""" 266 usePTY = None 267 name = "bot" 268
269 - def __init__(self, basedir, usePTY, not_really=0, unicode_encoding=None):
270 service.MultiService.__init__(self) 271 self.basedir = basedir 272 self.usePTY = usePTY 273 self.not_really = not_really 274 self.unicode_encoding = unicode_encoding or sys.getfilesystemencoding() or 'ascii' 275 self.builders = {}
276
277 - def startService(self):
278 assert os.path.isdir(self.basedir) 279 service.MultiService.startService(self)
280
281 - def remote_getDirs(self):
282 return filter(lambda d: os.path.isdir(d), os.listdir(self.basedir))
283
284 - def remote_getCommands(self):
285 commands = {} 286 for name, (factory, version) in registry.commandRegistry.items(): 287 commands[name] = version 288 return commands
289
290 - def remote_setBuilderList(self, wanted):
291 retval = {} 292 wanted_dirs = ["info"] 293 for (name, builddir) in wanted: 294 wanted_dirs.append(builddir) 295 b = self.builders.get(name, None) 296 if b: 297 if b.builddir != builddir: 298 log.msg("changing builddir for builder %s from %s to %s" \ 299 % (name, b.builddir, builddir)) 300 b.setBuilddir(builddir) 301 else: 302 b = SlaveBuilder(name, self.not_really) 303 b.usePTY = self.usePTY 304 b.unicode_encoding = self.unicode_encoding 305 b.setServiceParent(self) 306 b.setBuilddir(builddir) 307 self.builders[name] = b 308 retval[name] = b 309 for name in self.builders.keys(): 310 if not name in map(lambda a: a[0], wanted): 311 log.msg("removing old builder %s" % name) 312 self.builders[name].disownServiceParent() 313 del(self.builders[name]) 314 315 for d in os.listdir(self.basedir): 316 if os.path.isdir(d): 317 if d not in wanted_dirs: 318 log.msg("I have a leftover directory '%s' that is not " 319 "being used by the buildmaster: you can delete " 320 "it now" % d) 321 return retval
322
323 - def remote_print(self, message):
324 log.msg("message from master:", message)
325
326 - def remote_getSlaveInfo(self):
327 """This command retrieves data from the files in SLAVEDIR/info/* and 328 sends the contents to the buildmaster. These are used to describe 329 the slave and its configuration, and should be created and 330 maintained by the slave administrator. They will be retrieved each 331 time the master-slave connection is established. 332 """ 333 334 files = {} 335 basedir = os.path.join(self.basedir, "info") 336 if not os.path.isdir(basedir): 337 return files 338 for f in os.listdir(basedir): 339 filename = os.path.join(basedir, f) 340 if os.path.isfile(filename): 341 files[f] = open(filename, "r").read() 342 return files
343
344 - def remote_getVersion(self):
345 """Send our version back to the Master""" 346 return buildbot.version
347 348 349
350 -class BotFactory(ReconnectingPBClientFactory):
351 # 'keepaliveInterval' serves two purposes. The first is to keep the 352 # connection alive: it guarantees that there will be at least some 353 # traffic once every 'keepaliveInterval' seconds, which may help keep an 354 # interposed NAT gateway from dropping the address mapping because it 355 # thinks the connection has been abandoned. The second is to put an upper 356 # limit on how long the buildmaster might have gone away before we notice 357 # it. For this second purpose, we insist upon seeing *some* evidence of 358 # the buildmaster at least once every 'keepaliveInterval' seconds. 359 keepaliveInterval = None # None = do not use keepalives 360 361 # 'keepaliveTimeout' seconds before the interval expires, we will send a 362 # keepalive request, both to add some traffic to the connection, and to 363 # prompt a response from the master in case all our builders are idle. We 364 # don't insist upon receiving a timely response from this message: a slow 365 # link might put the request at the wrong end of a large build message. 366 keepaliveTimeout = 30 # how long we will go without a response 367 368 # 'maxDelay' determines the maximum amount of time the slave will wait 369 # between connection retries 370 maxDelay = 300 371 372 keepaliveTimer = None 373 activityTimer = None 374 lastActivity = 0 375 unsafeTracebacks = 1 376 perspective = None 377
378 - def __init__(self, keepaliveInterval, keepaliveTimeout, maxDelay):
383
384 - def startedConnecting(self, connector):
387
388 - def gotPerspective(self, perspective):
389 ReconnectingPBClientFactory.gotPerspective(self, perspective) 390 self.perspective = perspective 391 try: 392 perspective.broker.transport.setTcpKeepAlive(1) 393 except: 394 log.msg("unable to set SO_KEEPALIVE") 395 if not self.keepaliveInterval: 396 self.keepaliveInterval = 10*60 397 self.activity() 398 if self.keepaliveInterval: 399 log.msg("sending application-level keepalives every %d seconds" \ 400 % self.keepaliveInterval) 401 self.startTimers()
402
403 - def clientConnectionFailed(self, connector, reason):
407
408 - def clientConnectionLost(self, connector, reason):
409 self.connector = None 410 self.stopTimers() 411 self.perspective = None 412 ReconnectingPBClientFactory.clientConnectionLost(self, 413 connector, reason)
414
415 - def startTimers(self):
416 assert self.keepaliveInterval 417 assert not self.keepaliveTimer 418 assert not self.activityTimer 419 # Insist that doKeepalive fires before checkActivity. Really, it 420 # needs to happen at least one RTT beforehand. 421 assert self.keepaliveInterval > self.keepaliveTimeout 422 423 # arrange to send a keepalive a little while before our deadline 424 when = self.keepaliveInterval - self.keepaliveTimeout 425 self.keepaliveTimer = reactor.callLater(when, self.doKeepalive) 426 # and check for activity too 427 self.activityTimer = reactor.callLater(self.keepaliveInterval, 428 self.checkActivity)
429
430 - def stopTimers(self):
431 if self.keepaliveTimer: 432 self.keepaliveTimer.cancel() 433 self.keepaliveTimer = None 434 if self.activityTimer: 435 self.activityTimer.cancel() 436 self.activityTimer = None
437
438 - def activity(self, res=None):
439 self.lastActivity = now()
440
441 - def doKeepalive(self):
442 # send the keepalive request. If it fails outright, the connection 443 # was already dropped, so just log and ignore. 444 self.keepaliveTimer = None 445 log.msg("sending app-level keepalive") 446 d = self.perspective.callRemote("keepalive") 447 d.addCallback(self.activity) 448 d.addErrback(self.keepaliveLost)
449
450 - def keepaliveLost(self, f):
451 log.msg("BotFactory.keepaliveLost")
452
453 - def checkActivity(self):
454 self.activityTimer = None 455 if self.lastActivity + self.keepaliveInterval < now(): 456 log.msg("BotFactory.checkActivity: nothing from master for " 457 "%d secs" % (now() - self.lastActivity)) 458 self.perspective.broker.transport.loseConnection() 459 return 460 self.startTimers()
461
462 - def stopFactory(self):
465 466
467 -class BuildSlave(service.MultiService):
468 botClass = Bot 469 470 # debugOpts is a dictionary used during unit tests. 471 472 # debugOpts['stallPings'] can be set to a tuple of (timeout, []). Any 473 # calls to remote_print will stall for 'timeout' seconds before 474 # returning. The DelayedCalls used to implement this are stashed in the 475 # list so they can be cancelled later. 476 477 # debugOpts['failPingOnce'] can be set to True to make the slaveping fail 478 # exactly once. 479
480 - def __init__(self, buildmaster_host, port, name, passwd, basedir, 481 keepalive, usePTY, keepaliveTimeout=30, umask=None, 482 maxdelay=300, debugOpts={}, unicode_encoding=None):
483 log.msg("Creating BuildSlave -- buildbot.version: %s" % buildbot.version) 484 service.MultiService.__init__(self) 485 self.debugOpts = debugOpts.copy() 486 bot = self.botClass(basedir, usePTY, unicode_encoding=unicode_encoding) 487 bot.setServiceParent(self) 488 self.bot = bot 489 if keepalive == 0: 490 keepalive = None 491 self.umask = umask 492 bf = self.bf = BotFactory(keepalive, keepaliveTimeout, maxdelay) 493 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot) 494 self.connection = c = internet.TCPClient(buildmaster_host, port, bf) 495 c.setServiceParent(self)
496
497 - def waitUntilDisconnected(self):
498 # utility method for testing. Returns a Deferred that will fire when 499 # we lose the connection to the master. 500 if not self.bf.perspective: 501 return defer.succeed(None) 502 d = defer.Deferred() 503 self.bf.perspective.notifyOnDisconnect(lambda res: d.callback(None)) 504 return d
505
506 - def startService(self):
507 if self.umask is not None: 508 os.umask(self.umask) 509 service.MultiService.startService(self)
510
511 - def stopService(self):
512 self.bf.continueTrying = 0 513 self.bf.stopTrying() 514 service.MultiService.stopService(self) 515 # now kill the TCP connection 516 # twisted >2.0.1 does this for us, and leaves _connection=None 517 if self.connection._connection: 518 self.connection._connection.disconnect()
519