Package buildslave :: Module bot
[frames] | no frames]

Source Code for Module buildslave.bot

  1  import os.path 
  2  import socket 
  3  import sys 
  4   
  5  from twisted.spread import pb 
  6  from twisted.python import log 
  7  from twisted.internet import reactor, defer, error 
  8  from twisted.application import service, internet 
  9  from twisted.cred import credentials 
 10   
 11  import buildslave 
 12  from buildslave.util import now 
 13  from buildslave.pbutil import ReconnectingPBClientFactory 
 14  from buildslave.commands import registry, base 
 15   
16 -class UnknownCommand(pb.Error):
17 pass
18
19 -class SlaveBuilder(pb.Referenceable, service.Service):
20 21 """This is the local representation of a single Builder: it handles a 22 single kind of build (like an all-warnings build). It has a name and a 23 home directory. The rest of its behavior is determined by the master. 24 """ 25 26 stopCommandOnShutdown = True 27 28 # remote is a ref to the Builder object on the master side, and is set 29 # when they attach. We use it to detect when the connection to the master 30 # is severed. 31 remote = None 32 33 # .command points to a SlaveCommand instance, and is set while the step 34 # is running. We use it to implement the stopBuild method. 35 command = None 36 37 # .remoteStep is a ref to the master-side BuildStep object, and is set 38 # when the step is started 39 remoteStep = None 40 41 # useful for replacing the reactor in tests 42 _reactor = reactor 43
44 - def __init__(self, name):
45 #service.Service.__init__(self) # Service has no __init__ method 46 self.setName(name)
47
48 - def __repr__(self):
49 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self))
50
51 - def setServiceParent(self, parent):
52 service.Service.setServiceParent(self, parent) 53 self.bot = self.parent
54 # note that self.parent will go away when the buildmaster's config 55 # file changes and this Builder is removed (possibly because it has 56 # been changed, so the Builder will be re-added again in a moment). 57 # This may occur during a build, while a step is running. 58
59 - def setBuilddir(self, builddir):
60 assert self.parent 61 self.builddir = builddir 62 self.basedir = os.path.join(self.bot.basedir, self.builddir) 63 if not os.path.isdir(self.basedir): 64 os.makedirs(self.basedir)
65
66 - def stopService(self):
67 service.Service.stopService(self) 68 if self.stopCommandOnShutdown: 69 self.stopCommand()
70
71 - def activity(self):
72 bot = self.parent 73 if bot: 74 buildslave = bot.parent 75 if buildslave: 76 bf = buildslave.bf 77 bf.activity()
78
79 - def remote_setMaster(self, remote):
80 self.remote = remote 81 self.remote.notifyOnDisconnect(self.lostRemote)
82
83 - def remote_print(self, message):
84 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" % 85 (self.name, message))
86
87 - def lostRemote(self, remote):
88 log.msg("lost remote") 89 self.remote = None
90
91 - def lostRemoteStep(self, remotestep):
92 log.msg("lost remote step") 93 self.remoteStep = None 94 if self.stopCommandOnShutdown: 95 self.stopCommand()
96 97 # the following are Commands that can be invoked by the master-side 98 # Builder
99 - def remote_startBuild(self):
100 """This is invoked before the first step of any new build is run. It 101 doesn't do much, but masters call it so it's still here.""" 102 pass
103
104 - def remote_startCommand(self, stepref, stepId, command, args):
105 """ 106 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as 107 part of various master-side BuildSteps, to start various commands 108 that actually do the build. I return nothing. Eventually I will call 109 .commandComplete() to notify the master-side RemoteCommand that I'm 110 done. 111 """ 112 113 self.activity() 114 115 if self.command: 116 log.msg("leftover command, dropping it") 117 self.stopCommand() 118 119 try: 120 factory = registry.getFactory(command) 121 except KeyError: 122 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command 123 self.command = factory(self, stepId, args) 124 125 log.msg(" startCommand:%s [id %s]" % (command,stepId)) 126 self.remoteStep = stepref 127 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep) 128 d = self.command.doStart() 129 d.addCallback(lambda res: None) 130 d.addBoth(self.commandComplete) 131 return None
132
133 - def remote_interruptCommand(self, stepId, why):
134 """Halt the current step.""" 135 log.msg("asked to interrupt current command: %s" % why) 136 self.activity() 137 if not self.command: 138 # TODO: just log it, a race could result in their interrupting a 139 # command that wasn't actually running 140 log.msg(" .. but none was running") 141 return 142 self.command.doInterrupt()
143 144
145 - def stopCommand(self):
146 """Make any currently-running command die, with no further status 147 output. This is used when the buildslave is shutting down or the 148 connection to the master has been lost. Interrupt the command, 149 silence it, and then forget about it.""" 150 if not self.command: 151 return 152 log.msg("stopCommand: halting current command %s" % self.command) 153 self.command.doInterrupt() # shut up! and die! 154 self.command = None # forget you!
155 156 # sendUpdate is invoked by the Commands we spawn
157 - def sendUpdate(self, data):
158 """This sends the status update to the master-side 159 L{buildbot.process.step.RemoteCommand} object, giving it a sequence 160 number in the process. It adds the update to a queue, and asks the 161 master to acknowledge the update so it can be removed from that 162 queue.""" 163 164 if not self.running: 165 # .running comes from service.Service, and says whether the 166 # service is running or not. If we aren't running, don't send any 167 # status messages. 168 return 169 # the update[1]=0 comes from the leftover 'updateNum', which the 170 # master still expects to receive. Provide it to avoid significant 171 # interoperability issues between new slaves and old masters. 172 if self.remoteStep: 173 update = [data, 0] 174 updates = [update] 175 d = self.remoteStep.callRemote("update", updates) 176 d.addCallback(self.ackUpdate) 177 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
178
179 - def ackUpdate(self, acknum):
180 self.activity() # update the "last activity" timer
181
182 - def ackComplete(self, dummy):
183 self.activity() # update the "last activity" timer
184
185 - def _ackFailed(self, why, where):
186 log.msg("SlaveBuilder._ackFailed:", where) 187 log.err(why) # we don't really care
188 189 190 # this is fired by the Deferred attached to each Command
191 - def commandComplete(self, failure):
192 if failure: 193 log.msg("SlaveBuilder.commandFailed", self.command) 194 log.err(failure) 195 # failure, if present, is a failure.Failure. To send it across 196 # the wire, we must turn it into a pb.CopyableFailure. 197 failure = pb.CopyableFailure(failure) 198 failure.unsafeTracebacks = True 199 else: 200 # failure is None 201 log.msg("SlaveBuilder.commandComplete", self.command) 202 self.command = None 203 if not self.running: 204 log.msg(" but we weren't running, quitting silently") 205 return 206 if self.remoteStep: 207 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep) 208 d = self.remoteStep.callRemote("complete", failure) 209 d.addCallback(self.ackComplete) 210 d.addErrback(self._ackFailed, "sendComplete") 211 self.remoteStep = None
212 213
214 - def remote_shutdown(self):
215 log.msg("slave shutting down on command from master") 216 self._reactor.stop()
217 218
219 -class Bot(pb.Referenceable, service.MultiService):
220 """I represent the slave-side bot.""" 221 usePTY = None 222 name = "bot" 223
224 - def __init__(self, basedir, usePTY, unicode_encoding=None):
225 service.MultiService.__init__(self) 226 self.basedir = basedir 227 self.usePTY = usePTY 228 self.unicode_encoding = unicode_encoding or sys.getfilesystemencoding() or 'ascii' 229 self.builders = {}
230
231 - def startService(self):
232 assert os.path.isdir(self.basedir) 233 service.MultiService.startService(self)
234
235 - def remote_getCommands(self):
236 commands = dict([ 237 (n, base.command_version) 238 for n in registry.getAllCommandNames() 239 ]) 240 return commands
241
242 - def remote_setBuilderList(self, wanted):
243 retval = {} 244 wanted_dirs = ["info"] 245 for (name, builddir) in wanted: 246 wanted_dirs.append(builddir) 247 b = self.builders.get(name, None) 248 if b: 249 if b.builddir != builddir: 250 log.msg("changing builddir for builder %s from %s to %s" \ 251 % (name, b.builddir, builddir)) 252 b.setBuilddir(builddir) 253 else: 254 b = SlaveBuilder(name) 255 b.usePTY = self.usePTY 256 b.unicode_encoding = self.unicode_encoding 257 b.setServiceParent(self) 258 b.setBuilddir(builddir) 259 self.builders[name] = b 260 retval[name] = b 261 for name in self.builders.keys(): 262 if not name in map(lambda a: a[0], wanted): 263 log.msg("removing old builder %s" % name) 264 self.builders[name].disownServiceParent() 265 del(self.builders[name]) 266 267 for d in os.listdir(self.basedir): 268 if os.path.isdir(os.path.join(self.basedir, d)): 269 if d not in wanted_dirs: 270 log.msg("I have a leftover directory '%s' that is not " 271 "being used by the buildmaster: you can delete " 272 "it now" % d) 273 return retval
274
275 - def remote_print(self, message):
276 log.msg("message from master:", message)
277
278 - def remote_getSlaveInfo(self):
279 """This command retrieves data from the files in SLAVEDIR/info/* and 280 sends the contents to the buildmaster. These are used to describe 281 the slave and its configuration, and should be created and 282 maintained by the slave administrator. They will be retrieved each 283 time the master-slave connection is established. 284 """ 285 286 files = {} 287 basedir = os.path.join(self.basedir, "info") 288 if not os.path.isdir(basedir): 289 return files 290 for f in os.listdir(basedir): 291 filename = os.path.join(basedir, f) 292 if os.path.isfile(filename): 293 files[f] = open(filename, "r").read() 294 return files
295
296 - def remote_getVersion(self):
297 """Send our version back to the Master""" 298 return buildslave.version
299 300 301
302 -class BotFactory(ReconnectingPBClientFactory):
303 # 'keepaliveInterval' serves two purposes. The first is to keep the 304 # connection alive: it guarantees that there will be at least some 305 # traffic once every 'keepaliveInterval' seconds, which may help keep an 306 # interposed NAT gateway from dropping the address mapping because it 307 # thinks the connection has been abandoned. The second is to put an upper 308 # limit on how long the buildmaster might have gone away before we notice 309 # it. For this second purpose, we insist upon seeing *some* evidence of 310 # the buildmaster at least once every 'keepaliveInterval' seconds. 311 keepaliveInterval = None # None = do not use keepalives 312 313 # 'keepaliveTimeout' seconds before the interval expires, we will send a 314 # keepalive request, both to add some traffic to the connection, and to 315 # prompt a response from the master in case all our builders are idle. We 316 # don't insist upon receiving a timely response from this message: a slow 317 # link might put the request at the wrong end of a large build message. 318 keepaliveTimeout = 30 # how long we will go without a response 319 320 # 'maxDelay' determines the maximum amount of time the slave will wait 321 # between connection retries 322 maxDelay = 300 323 324 keepaliveTimer = None 325 activityTimer = None 326 lastActivity = 0 327 unsafeTracebacks = 1 328 perspective = None 329
330 - def __init__(self, buildmaster_host, port, keepaliveInterval, keepaliveTimeout, maxDelay):
331 ReconnectingPBClientFactory.__init__(self) 332 self.maxDelay = maxDelay 333 self.keepaliveInterval = keepaliveInterval 334 self.keepaliveTimeout = keepaliveTimeout 335 # NOTE: this class does not actually make the TCP connections - this information is 336 # only here to print useful error messages 337 self.buildmaster_host = buildmaster_host 338 self.port = port
339
340 - def startedConnecting(self, connector):
341 log.msg("Connecting to %s:%s" % (self.buildmaster_host, self.port)) 342 ReconnectingPBClientFactory.startedConnecting(self, connector) 343 self.connector = connector
344
345 - def gotPerspective(self, perspective):
346 log.msg("Connected to %s:%s; slave is ready" % (self.buildmaster_host, self.port)) 347 ReconnectingPBClientFactory.gotPerspective(self, perspective) 348 self.perspective = perspective 349 try: 350 perspective.broker.transport.setTcpKeepAlive(1) 351 except: 352 log.msg("unable to set SO_KEEPALIVE") 353 if not self.keepaliveInterval: 354 self.keepaliveInterval = 10*60 355 self.activity() 356 if self.keepaliveInterval: 357 log.msg("sending application-level keepalives every %d seconds" \ 358 % self.keepaliveInterval) 359 self.startTimers()
360
361 - def clientConnectionFailed(self, connector, reason):
362 self.connector = None 363 why = reason 364 if reason.check(error.ConnectionRefusedError): 365 why = "Connection Refused" 366 log.msg("Connection to %s:%s failed: %s" % (self.buildmaster_host, self.port, why)) 367 ReconnectingPBClientFactory.clientConnectionFailed(self, 368 connector, reason)
369
370 - def clientConnectionLost(self, connector, reason):
371 log.msg("Lost connection to %s:%s" % (self.buildmaster_host, self.port)) 372 self.connector = None 373 self.stopTimers() 374 self.perspective = None 375 ReconnectingPBClientFactory.clientConnectionLost(self, 376 connector, reason)
377
378 - def startTimers(self):
379 assert self.keepaliveInterval 380 assert not self.keepaliveTimer 381 assert not self.activityTimer 382 # Insist that doKeepalive fires before checkActivity. Really, it 383 # needs to happen at least one RTT beforehand. 384 assert self.keepaliveInterval > self.keepaliveTimeout 385 386 # arrange to send a keepalive a little while before our deadline 387 when = self.keepaliveInterval - self.keepaliveTimeout 388 self.keepaliveTimer = reactor.callLater(when, self.doKeepalive) 389 # and check for activity too 390 self.activityTimer = reactor.callLater(self.keepaliveInterval, 391 self.checkActivity)
392
393 - def stopTimers(self):
394 if self.keepaliveTimer: 395 self.keepaliveTimer.cancel() 396 self.keepaliveTimer = None 397 if self.activityTimer: 398 self.activityTimer.cancel() 399 self.activityTimer = None
400
401 - def activity(self, res=None):
402 self.lastActivity = now()
403
404 - def doKeepalive(self):
405 # send the keepalive request. If it fails outright, the connection 406 # was already dropped, so just log and ignore. 407 self.keepaliveTimer = None 408 log.msg("sending app-level keepalive") 409 d = self.perspective.callRemote("keepalive") 410 d.addCallback(self.activity) 411 d.addErrback(self.keepaliveLost)
412
413 - def keepaliveLost(self, f):
414 log.msg("BotFactory.keepaliveLost")
415
416 - def checkActivity(self):
417 self.activityTimer = None 418 if self.lastActivity + self.keepaliveInterval < now(): 419 log.msg("BotFactory.checkActivity: nothing from master for " 420 "%d secs" % (now() - self.lastActivity)) 421 self.perspective.broker.transport.loseConnection() 422 return 423 self.startTimers()
424
425 - def stopFactory(self):
428 429
430 -class BuildSlave(service.MultiService):
431 - def __init__(self, buildmaster_host, port, name, passwd, basedir, 432 keepalive, usePTY, keepaliveTimeout=30, umask=None, 433 maxdelay=300, unicode_encoding=None):
434 log.msg("Creating BuildSlave -- version: %s" % buildslave.version) 435 self.recordHostname() 436 service.MultiService.__init__(self) 437 bot = Bot(basedir, usePTY, unicode_encoding=unicode_encoding) 438 bot.setServiceParent(self) 439 self.bot = bot 440 if keepalive == 0: 441 keepalive = None 442 self.umask = umask 443 bf = self.bf = BotFactory(buildmaster_host, port, keepalive, keepaliveTimeout, maxdelay) 444 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot) 445 self.connection = c = internet.TCPClient(buildmaster_host, port, bf) 446 c.setServiceParent(self)
447
448 - def recordHostname(self):
449 "Record my hostname in twistd.hostname, for user convenience" 450 log.msg("recording hostname in twistd.hostname") 451 try: 452 open("twistd.hostname", "w").write("%s\n" % socket.getfqdn()) 453 except: 454 log.msg("failed - ignoring")
455
456 - def startService(self):
457 if self.umask is not None: 458 os.umask(self.umask) 459 service.MultiService.startService(self)
460
461 - def stopService(self):
462 self.bf.continueTrying = 0 463 self.bf.stopTrying() 464 service.MultiService.stopService(self)
465