Package buildbot :: Package changes :: Module svnpoller
[frames] | no frames]

Source Code for Module buildbot.changes.svnpoller

  1  # This file is part of Buildbot.  Buildbot is free software: you can 
  2  # redistribute it and/or modify it under the terms of the GNU General Public 
  3  # License as published by the Free Software Foundation, version 2. 
  4  # 
  5  # This program is distributed in the hope that it will be useful, but WITHOUT 
  6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
  8  # details. 
  9  # 
 10  # You should have received a copy of the GNU General Public License along with 
 11  # this program; if not, write to the Free Software Foundation, Inc., 51 
 12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 13  # 
 14  # Copyright Buildbot Team Members 
 15   
 16   
 17  # Based on the work of Dave Peticolas for the P4poll 
 18  # Changed to svn (using xml.dom.minidom) by Niklaus Giger 
 19  # Hacked beyond recognition by Brian Warner 
 20   
 21  from twisted.python import log 
 22  from twisted.internet import defer, reactor, utils 
 23  from twisted.internet.task import LoopingCall 
 24   
 25  from buildbot import util 
 26  from buildbot.changes import base 
 27  from buildbot.changes.changes import Change 
 28   
 29  import xml.dom.minidom 
 30  import os, urllib 
 31   
32 -def _assert(condition, msg):
33 if condition: 34 return True 35 raise AssertionError(msg)
36 37 # these split_file_* functions are available for use as values to the 38 # split_file= argument.
39 -def split_file_alwaystrunk(path):
40 return (None, path)
41
42 -def split_file_branches(path):
43 # turn trunk/subdir/file.c into (None, "subdir/file.c") 44 # and branches/1.5.x/subdir/file.c into ("branches/1.5.x", "subdir/file.c") 45 pieces = path.split('/') 46 if pieces[0] == 'trunk': 47 return (None, '/'.join(pieces[1:])) 48 elif pieces[0] == 'branches': 49 return ('/'.join(pieces[0:2]), '/'.join(pieces[2:])) 50 else: 51 return None
52 53
54 -class SVNPoller(base.ChangeSource, util.ComparableMixin):
55 """This source will poll a Subversion repository for changes and submit 56 them to the change master.""" 57 58 compare_attrs = ["svnurl", "split_file_function", 59 "svnuser", "svnpasswd", 60 "pollinterval", "histmax", 61 "svnbin", "category", "cachepath"] 62 63 parent = None # filled in when we're added 64 last_change = None 65 loop = None 66 working = False 67
68 - def __init__(self, svnurl, split_file=None, 69 svnuser=None, svnpasswd=None, 70 pollinterval=10*60, histmax=100, 71 svnbin='svn', revlinktmpl='', category=None, 72 project='', cachepath=None):
73 """ 74 @type svnurl: string 75 @param svnurl: the SVN URL that describes the repository and 76 subdirectory to watch. If this ChangeSource should 77 only pay attention to a single branch, this should 78 point at the repository for that branch, like 79 svn://svn.twistedmatrix.com/svn/Twisted/trunk . If it 80 should follow multiple branches, point it at the 81 repository directory that contains all the branches 82 like svn://svn.twistedmatrix.com/svn/Twisted and also 83 provide a branch-determining function. 84 85 Each file in the repository has a SVN URL in the form 86 (SVNURL)/(BRANCH)/(FILEPATH), where (BRANCH) could be 87 empty or not, depending upon your branch-determining 88 function. Only files that start with (SVNURL)/(BRANCH) 89 will be monitored. The Change objects that are sent to 90 the Schedulers will see (FILEPATH) for each modified 91 file. 92 93 @type split_file: callable or None 94 @param split_file: a function that is called with a string of the 95 form (BRANCH)/(FILEPATH) and should return a tuple 96 (BRANCH, FILEPATH). This function should match 97 your repository's branch-naming policy. Each 98 changed file has a fully-qualified URL that can be 99 split into a prefix (which equals the value of the 100 'svnurl' argument) and a suffix; it is this suffix 101 which is passed to the split_file function. 102 103 If the function returns None, the file is ignored. 104 Use this to indicate that the file is not relevant 105 to this buildmaster. 106 107 For example, if your repository puts the trunk in 108 trunk/... and branches are in places like 109 branches/1.5/..., your split_file function could 110 look like the following (this function is 111 available as svnpoller.split_file_branches):: 112 113 pieces = path.split('/') 114 if pieces[0] == 'trunk': 115 return (None, '/'.join(pieces[1:])) 116 elif pieces[0] == 'branches': 117 return ('/'.join(pieces[0:2]), 118 '/'.join(pieces[2:])) 119 else: 120 return None 121 122 If instead your repository layout puts the trunk 123 for ProjectA in trunk/ProjectA/... and the 1.5 124 branch in branches/1.5/ProjectA/..., your 125 split_file function could look like:: 126 127 pieces = path.split('/') 128 if pieces[0] == 'trunk': 129 branch = None 130 pieces.pop(0) # remove 'trunk' 131 elif pieces[0] == 'branches': 132 pieces.pop(0) # remove 'branches' 133 # grab branch name 134 branch = 'branches/' + pieces.pop(0) 135 else: 136 return None # something weird 137 productname = pieces.pop(0) 138 if productname != 'ProjectA': 139 return None # wrong product 140 return (branch, '/'.join(pieces)) 141 142 The default of split_file= is None, which 143 indicates that no splitting should be done. This 144 is equivalent to the following function:: 145 146 return (None, path) 147 148 If you wish, you can override the split_file 149 method with the same sort of function instead of 150 passing in a split_file= argument. 151 152 153 @type svnuser: string 154 @param svnuser: If set, the --username option will be added to 155 the 'svn log' command. You may need this to get 156 access to a private repository. 157 @type svnpasswd: string 158 @param svnpasswd: If set, the --password option will be added. 159 160 @type pollinterval: int 161 @param pollinterval: interval in seconds between polls. The default 162 is 600 seconds (10 minutes). Smaller values 163 decrease the latency between the time a change 164 is recorded and the time the buildbot notices 165 it, but it also increases the system load. 166 167 @type histmax: int 168 @param histmax: maximum number of changes to look back through. 169 The default is 100. Smaller values decrease 170 system load, but if more than histmax changes 171 are recorded between polls, the extra ones will 172 be silently lost. 173 174 @type svnbin: string 175 @param svnbin: path to svn binary, defaults to just 'svn'. Use 176 this if your subversion command lives in an 177 unusual location. 178 179 @type revlinktmpl: string 180 @param revlinktmpl: A format string to use for hyperlinks to revision 181 information. For example, setting this to 182 "http://reposerver/websvn/revision.php?rev=%s" 183 would create suitable links on the build pages 184 to information in websvn on each revision. 185 186 @type category: string 187 @param category: A single category associated with the changes that 188 could be used by schedulers watch for branches of a 189 certain name AND category. 190 191 @type project string 192 @param project A single project that the changes are associated with 193 the repository, added to the changes, for the use in 194 change filters 195 196 @type cachepath string 197 @param cachepath A path to a file that can be used to store the last 198 rev that was processed, so we can grab changes that 199 happened while we were offline 200 """ 201 202 if svnurl.endswith("/"): 203 svnurl = svnurl[:-1] # strip the trailing slash 204 self.svnurl = svnurl 205 self.split_file_function = split_file or split_file_alwaystrunk 206 self.svnuser = svnuser 207 self.svnpasswd = svnpasswd 208 209 self.revlinktmpl = revlinktmpl 210 211 self.environ = os.environ.copy() # include environment variables 212 # required for ssh-agent auth 213 214 self.svnbin = svnbin 215 self.pollinterval = pollinterval 216 self.histmax = histmax 217 self._prefix = None 218 self.overrun_counter = 0 219 self.loop = LoopingCall(self.checksvn) 220 self.category = category 221 self.project = project 222 223 self.cachepath = cachepath 224 if self.cachepath and os.path.exists(self.cachepath): 225 try: 226 f = open(self.cachepath, "r") 227 self.last_change = int(f.read().strip()) 228 log.msg("SVNPoller(%s) setting last_change to %s" % (self.svnurl, self.last_change)) 229 f.close() 230 except: 231 self.cachepath = None 232 log.msg("SVNPoller(%s) cache file corrupt, skipping and not using" % self.svnurl) 233 log.err()
234
235 - def split_file(self, path):
236 # use getattr() to avoid turning this function into a bound method, 237 # which would require it to have an extra 'self' argument 238 f = getattr(self, "split_file_function") 239 return f(path)
240
241 - def startService(self):
242 log.msg("SVNPoller(%s) starting" % self.svnurl) 243 base.ChangeSource.startService(self) 244 # Don't start the loop just yet because the reactor isn't running. 245 # Give it a chance to go and install our SIGCHLD handler before 246 # spawning processes. 247 reactor.callLater(0, self.loop.start, self.pollinterval)
248
249 - def stopService(self):
250 log.msg("SVNPoller(%s) shutting down" % self.svnurl) 251 self.loop.stop() 252 return base.ChangeSource.stopService(self)
253
254 - def describe(self):
255 return "SVNPoller watching %s" % self.svnurl
256
257 - def checksvn(self):
258 # Our return value is only used for unit testing. 259 260 # we need to figure out the repository root, so we can figure out 261 # repository-relative pathnames later. Each SVNURL is in the form 262 # (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something 263 # like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a 264 # physical repository at /svn/Twisted on that host), (PROJECT) is 265 # something like Projects/Twisted (i.e. within the repository's 266 # internal namespace, everything under Projects/Twisted/ has 267 # something to do with Twisted, but these directory names do not 268 # actually appear on the repository host), (BRANCH) is something like 269 # "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative 270 # filename like "twisted/internet/defer.py". 271 272 # our self.svnurl attribute contains (ROOT)/(PROJECT) combined 273 # together in a way that we can't separate without svn's help. If the 274 # user is not using the split_file= argument, then self.svnurl might 275 # be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will 276 # get back from 'svn log' will be of the form 277 # (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove 278 # that (PROJECT) prefix from them. To do this without requiring the 279 # user to tell us how svnurl is split into ROOT and PROJECT, we do an 280 # 'svn info --xml' command at startup. This command will include a 281 # <root> element that tells us ROOT. We then strip this prefix from 282 # self.svnurl to determine PROJECT, and then later we strip the 283 # PROJECT prefix from the filenames reported by 'svn log --xml' to 284 # get a (BRANCH)/(FILEPATH) that can be passed to split_file() to 285 # turn into separate BRANCH and FILEPATH values. 286 287 # whew. 288 289 if self.working: 290 log.msg("SVNPoller(%s) overrun: timer fired but the previous " 291 "poll had not yet finished." % self.svnurl) 292 self.overrun_counter += 1 293 return defer.succeed(None) 294 self.working = True 295 296 if self.project: 297 log.msg("SVNPoller polling " + self.project) 298 else: 299 log.msg("SVNPoller polling") 300 if not self._prefix: 301 # this sets self._prefix when it finishes. It fires with 302 # self._prefix as well, because that makes the unit tests easier 303 # to write. 304 d = self.get_root() 305 d.addCallback(self.determine_prefix) 306 else: 307 d = defer.succeed(self._prefix) 308 309 d.addCallback(self.get_logs) 310 d.addCallback(self.parse_logs) 311 d.addCallback(self.get_new_logentries) 312 d.addCallback(self.create_changes) 313 d.addCallback(self.submit_changes) 314 d.addCallbacks(self.finished_ok, self.finished_failure) 315 return d
316
317 - def getProcessOutput(self, args):
318 # this exists so we can override it during the unit tests 319 d = utils.getProcessOutput(self.svnbin, args, self.environ) 320 return d
321
322 - def get_root(self):
323 args = ["info", "--xml", "--non-interactive", self.svnurl] 324 if self.svnuser: 325 args.extend(["--username=%s" % self.svnuser]) 326 if self.svnpasswd: 327 args.extend(["--password=%s" % self.svnpasswd]) 328 d = self.getProcessOutput(args) 329 return d
330
331 - def determine_prefix(self, output):
332 try: 333 doc = xml.dom.minidom.parseString(output) 334 except xml.parsers.expat.ExpatError: 335 log.msg("SVNPoller._determine_prefix_2: ExpatError in '%s'" 336 % output) 337 raise 338 rootnodes = doc.getElementsByTagName("root") 339 if not rootnodes: 340 # this happens if the URL we gave was already the root. In this 341 # case, our prefix is empty. 342 self._prefix = "" 343 return self._prefix 344 rootnode = rootnodes[0] 345 root = "".join([c.data for c in rootnode.childNodes]) 346 # root will be a unicode string 347 _assert(self.svnurl.startswith(root), 348 "svnurl='%s' doesn't start with <root>='%s'" % 349 (self.svnurl, root)) 350 self._prefix = self.svnurl[len(root):] 351 if self._prefix.startswith("/"): 352 self._prefix = self._prefix[1:] 353 log.msg("SVNPoller: svnurl=%s, root=%s, so prefix=%s" % 354 (self.svnurl, root, self._prefix)) 355 return self._prefix
356
357 - def get_logs(self, ignored_prefix=None):
358 args = [] 359 args.extend(["log", "--xml", "--verbose", "--non-interactive"]) 360 if self.svnuser: 361 args.extend(["--username=%s" % self.svnuser]) 362 if self.svnpasswd: 363 args.extend(["--password=%s" % self.svnpasswd]) 364 args.extend(["--limit=%d" % (self.histmax), self.svnurl]) 365 d = self.getProcessOutput(args) 366 return d
367
368 - def parse_logs(self, output):
369 # parse the XML output, return a list of <logentry> nodes 370 try: 371 doc = xml.dom.minidom.parseString(output) 372 except xml.parsers.expat.ExpatError: 373 log.msg("SVNPoller.parse_logs: ExpatError in '%s'" % output) 374 raise 375 logentries = doc.getElementsByTagName("logentry") 376 return logentries
377 378
379 - def _filter_new_logentries(self, logentries, last_change):
380 # given a list of logentries, return a tuple of (new_last_change, 381 # new_logentries), where new_logentries contains only the ones after 382 # last_change 383 if not logentries: 384 # no entries, so last_change must stay at None 385 return (None, []) 386 387 mostRecent = int(logentries[0].getAttribute("revision")) 388 389 if last_change is None: 390 # if this is the first time we've been run, ignore any changes 391 # that occurred before now. This prevents a build at every 392 # startup. 393 log.msg('svnPoller: starting at change %s' % mostRecent) 394 return (mostRecent, []) 395 396 if last_change == mostRecent: 397 # an unmodified repository will hit this case 398 log.msg('svnPoller: _process_changes last %s mostRecent %s' % ( 399 last_change, mostRecent)) 400 return (mostRecent, []) 401 402 new_logentries = [] 403 for el in logentries: 404 if last_change == int(el.getAttribute("revision")): 405 break 406 new_logentries.append(el) 407 new_logentries.reverse() # return oldest first 408 return (mostRecent, new_logentries)
409
410 - def get_new_logentries(self, logentries):
411 last_change = self.last_change 412 (new_last_change, 413 new_logentries) = self._filter_new_logentries(logentries, 414 self.last_change) 415 self.last_change = new_last_change 416 log.msg('svnPoller: _process_changes %s .. %s' % 417 (last_change, new_last_change)) 418 return new_logentries
419 420
421 - def _get_text(self, element, tag_name):
422 try: 423 child_nodes = element.getElementsByTagName(tag_name)[0].childNodes 424 text = "".join([t.data for t in child_nodes]) 425 except: 426 text = "<unknown>" 427 return text
428
429 - def _transform_path(self, path):
430 _assert(path.startswith(self._prefix), 431 "filepath '%s' should start with prefix '%s'" % 432 (path, self._prefix)) 433 relative_path = path[len(self._prefix):] 434 if relative_path.startswith("/"): 435 relative_path = relative_path[1:] 436 where = self.split_file(relative_path) 437 # 'where' is either None or (branch, final_path) 438 return where
439
440 - def create_changes(self, new_logentries):
441 changes = [] 442 443 for el in new_logentries: 444 revision = str(el.getAttribute("revision")) 445 446 revlink='' 447 448 if self.revlinktmpl: 449 if revision: 450 revlink = self.revlinktmpl % urllib.quote_plus(revision) 451 452 log.msg("Adding change revision %s" % (revision,)) 453 # TODO: the rest of buildbot may not be ready for unicode 'who' 454 # values 455 author = self._get_text(el, "author") 456 comments = self._get_text(el, "msg") 457 # there is a "date" field, but it provides localtime in the 458 # repository's timezone, whereas we care about buildmaster's 459 # localtime (since this will get used to position the boxes on 460 # the Waterfall display, etc). So ignore the date field and use 461 # our local clock instead. 462 #when = self._get_text(el, "date") 463 #when = time.mktime(time.strptime("%.19s" % when, 464 # "%Y-%m-%dT%H:%M:%S")) 465 branches = {} 466 try: 467 pathlist = el.getElementsByTagName("paths")[0] 468 except IndexError: # weird, we got an empty revision 469 log.msg("ignoring commit with no paths") 470 continue 471 472 for p in pathlist.getElementsByTagName("path"): 473 action = p.getAttribute("action") 474 path = "".join([t.data for t in p.childNodes]) 475 # the rest of buildbot is certaily not yet ready to handle 476 # unicode filenames, because they get put in RemoteCommands 477 # which get sent via PB to the buildslave, and PB doesn't 478 # handle unicode. 479 path = path.encode("ascii") 480 if path.startswith("/"): 481 path = path[1:] 482 where = self._transform_path(path) 483 484 # if 'where' is None, the file was outside any project that 485 # we care about and we should ignore it 486 if where: 487 branch, filename = where 488 if not branch in branches: 489 branches[branch] = { 'files': []} 490 branches[branch]['files'].append(filename) 491 492 if not branches[branch].has_key('action'): 493 branches[branch]['action'] = action 494 495 for branch in branches.keys(): 496 action = branches[branch]['action'] 497 files = branches[branch]['files'] 498 number_of_files_changed = len(files) 499 500 if action == u'D' and number_of_files_changed == 1 and files[0] == '': 501 log.msg("Ignoring deletion of branch '%s'" % branch) 502 else: 503 c = Change(who=author, 504 files=files, 505 comments=comments, 506 revision=revision, 507 branch=branch, 508 revlink=revlink, 509 category=self.category, 510 repository=self.svnurl, 511 project = self.project) 512 changes.append(c) 513 514 return changes
515
516 - def submit_changes(self, changes):
517 for c in changes: 518 self.parent.addChange(c)
519
520 - def finished_ok(self, res):
521 if self.cachepath: 522 f = open(self.cachepath, "w") 523 f.write(str(self.last_change)) 524 f.close() 525 526 log.msg("SVNPoller finished polling %s" % res) 527 assert self.working 528 self.working = False 529 return res
530
531 - def finished_failure(self, f):
532 log.msg("SVNPoller failed %s" % f) 533 assert self.working 534 self.working = False 535 return None # eat the failure
536