Package buildbot :: Package changes :: Module svnpoller
[frames] | no frames]

Source Code for Module buildbot.changes.svnpoller

  1  # -*- test-case-name: buildbot.test.test_svnpoller -*- 
  2   
  3  # Based on the work of Dave Peticolas for the P4poll 
  4  # Changed to svn (using xml.dom.minidom) by Niklaus Giger 
  5  # Hacked beyond recognition by Brian Warner 
  6   
  7  from twisted.python import log 
  8  from twisted.internet import defer, reactor, utils 
  9  from twisted.internet.task import LoopingCall 
 10   
 11  from buildbot import util 
 12  from buildbot.changes import base 
 13  from buildbot.changes.changes import Change 
 14   
 15  import xml.dom.minidom 
 16  import os, urllib 
 17   
18 -def _assert(condition, msg):
19 if condition: 20 return True 21 raise AssertionError(msg)
22 23 # these split_file_* functions are available for use as values to the 24 # split_file= argument.
25 -def split_file_alwaystrunk(path):
26 return (None, path)
27
28 -def split_file_branches(path):
29 # turn trunk/subdir/file.c into (None, "subdir/file.c") 30 # and branches/1.5.x/subdir/file.c into ("branches/1.5.x", "subdir/file.c") 31 pieces = path.split('/') 32 if pieces[0] == 'trunk': 33 return (None, '/'.join(pieces[1:])) 34 elif pieces[0] == 'branches': 35 return ('/'.join(pieces[0:2]), '/'.join(pieces[2:])) 36 else: 37 return None
38 39
40 -class SVNPoller(base.ChangeSource, util.ComparableMixin):
41 """This source will poll a Subversion repository for changes and submit 42 them to the change master.""" 43 44 compare_attrs = ["svnurl", "split_file_function", 45 "svnuser", "svnpasswd", 46 "pollinterval", "histmax", 47 "svnbin", "category", "cachepath"] 48 49 parent = None # filled in when we're added 50 last_change = None 51 loop = None 52 working = False 53
54 - def __init__(self, svnurl, split_file=None, 55 svnuser=None, svnpasswd=None, 56 pollinterval=10*60, histmax=100, 57 svnbin='svn', revlinktmpl='', category=None, 58 project=None, cachepath=None):
59 """ 60 @type svnurl: string 61 @param svnurl: the SVN URL that describes the repository and 62 subdirectory to watch. If this ChangeSource should 63 only pay attention to a single branch, this should 64 point at the repository for that branch, like 65 svn://svn.twistedmatrix.com/svn/Twisted/trunk . If it 66 should follow multiple branches, point it at the 67 repository directory that contains all the branches 68 like svn://svn.twistedmatrix.com/svn/Twisted and also 69 provide a branch-determining function. 70 71 Each file in the repository has a SVN URL in the form 72 (SVNURL)/(BRANCH)/(FILEPATH), where (BRANCH) could be 73 empty or not, depending upon your branch-determining 74 function. Only files that start with (SVNURL)/(BRANCH) 75 will be monitored. The Change objects that are sent to 76 the Schedulers will see (FILEPATH) for each modified 77 file. 78 79 @type split_file: callable or None 80 @param split_file: a function that is called with a string of the 81 form (BRANCH)/(FILEPATH) and should return a tuple 82 (BRANCH, FILEPATH). This function should match 83 your repository's branch-naming policy. Each 84 changed file has a fully-qualified URL that can be 85 split into a prefix (which equals the value of the 86 'svnurl' argument) and a suffix; it is this suffix 87 which is passed to the split_file function. 88 89 If the function returns None, the file is ignored. 90 Use this to indicate that the file is not a part 91 of this project. 92 93 For example, if your repository puts the trunk in 94 trunk/... and branches are in places like 95 branches/1.5/..., your split_file function could 96 look like the following (this function is 97 available as svnpoller.split_file_branches):: 98 99 pieces = path.split('/') 100 if pieces[0] == 'trunk': 101 return (None, '/'.join(pieces[1:])) 102 elif pieces[0] == 'branches': 103 return ('/'.join(pieces[0:2]), 104 '/'.join(pieces[2:])) 105 else: 106 return None 107 108 If instead your repository layout puts the trunk 109 for ProjectA in trunk/ProjectA/... and the 1.5 110 branch in branches/1.5/ProjectA/..., your 111 split_file function could look like:: 112 113 pieces = path.split('/') 114 if pieces[0] == 'trunk': 115 branch = None 116 pieces.pop(0) # remove 'trunk' 117 elif pieces[0] == 'branches': 118 pieces.pop(0) # remove 'branches' 119 # grab branch name 120 branch = 'branches/' + pieces.pop(0) 121 else: 122 return None # something weird 123 projectname = pieces.pop(0) 124 if projectname != 'ProjectA': 125 return None # wrong project 126 return (branch, '/'.join(pieces)) 127 128 The default of split_file= is None, which 129 indicates that no splitting should be done. This 130 is equivalent to the following function:: 131 132 return (None, path) 133 134 If you wish, you can override the split_file 135 method with the same sort of function instead of 136 passing in a split_file= argument. 137 138 139 @type svnuser: string 140 @param svnuser: If set, the --username option will be added to 141 the 'svn log' command. You may need this to get 142 access to a private repository. 143 @type svnpasswd: string 144 @param svnpasswd: If set, the --password option will be added. 145 146 @type pollinterval: int 147 @param pollinterval: interval in seconds between polls. The default 148 is 600 seconds (10 minutes). Smaller values 149 decrease the latency between the time a change 150 is recorded and the time the buildbot notices 151 it, but it also increases the system load. 152 153 @type histmax: int 154 @param histmax: maximum number of changes to look back through. 155 The default is 100. Smaller values decrease 156 system load, but if more than histmax changes 157 are recorded between polls, the extra ones will 158 be silently lost. 159 160 @type svnbin: string 161 @param svnbin: path to svn binary, defaults to just 'svn'. Use 162 this if your subversion command lives in an 163 unusual location. 164 165 @type revlinktmpl: string 166 @param revlinktmpl: A format string to use for hyperlinks to revision 167 information. For example, setting this to 168 "http://reposerver/websvn/revision.php?rev=%s" 169 would create suitable links on the build pages 170 to information in websvn on each revision. 171 172 @type category: string 173 @param category: A single category associated with the changes that 174 could be used by schedulers watch for branches of a 175 certain name AND category. 176 177 @type project string 178 @param project A single project that the changes are associated with 179 the repository, added to the changes, for the use in 180 change filters 181 182 @type cachepath string 183 @param cachepath A path to a file that can be used to store the last 184 rev that was processed, so we can grab changes that 185 happened while we were offline 186 """ 187 188 if svnurl.endswith("/"): 189 svnurl = svnurl[:-1] # strip the trailing slash 190 self.svnurl = svnurl 191 self.split_file_function = split_file or split_file_alwaystrunk 192 self.svnuser = svnuser 193 self.svnpasswd = svnpasswd 194 195 self.revlinktmpl = revlinktmpl 196 197 self.environ = os.environ.copy() # include environment variables 198 # required for ssh-agent auth 199 200 self.svnbin = svnbin 201 self.pollinterval = pollinterval 202 self.histmax = histmax 203 self._prefix = None 204 self.overrun_counter = 0 205 self.loop = LoopingCall(self.checksvn) 206 self.category = category 207 self.project = project 208 209 self.cachepath = cachepath 210 if self.cachepath and os.path.exists(self.cachepath): 211 try: 212 f = open(self.cachepath, "r") 213 self.last_change = int(f.read().strip()) 214 log.msg("SVNPoller(%s) setting last_change to %s" % (self.svnurl, self.last_change)) 215 f.close() 216 except: 217 self.cachepath = None 218 log.msg("SVNPoller(%s) cache file corrupt, skipping and not using" % self.svnurl) 219 log.err()
220
221 - def split_file(self, path):
222 # use getattr() to avoid turning this function into a bound method, 223 # which would require it to have an extra 'self' argument 224 f = getattr(self, "split_file_function") 225 return f(path)
226
227 - def startService(self):
228 log.msg("SVNPoller(%s) starting" % self.svnurl) 229 base.ChangeSource.startService(self) 230 # Don't start the loop just yet because the reactor isn't running. 231 # Give it a chance to go and install our SIGCHLD handler before 232 # spawning processes. 233 reactor.callLater(0, self.loop.start, self.pollinterval)
234
235 - def stopService(self):
236 log.msg("SVNPoller(%s) shutting down" % self.svnurl) 237 self.loop.stop() 238 return base.ChangeSource.stopService(self)
239
240 - def describe(self):
241 return "SVNPoller watching %s" % self.svnurl
242
243 - def checksvn(self):
244 # Our return value is only used for unit testing. 245 246 # we need to figure out the repository root, so we can figure out 247 # repository-relative pathnames later. Each SVNURL is in the form 248 # (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something 249 # like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a 250 # physical repository at /svn/Twisted on that host), (PROJECT) is 251 # something like Projects/Twisted (i.e. within the repository's 252 # internal namespace, everything under Projects/Twisted/ has 253 # something to do with Twisted, but these directory names do not 254 # actually appear on the repository host), (BRANCH) is something like 255 # "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative 256 # filename like "twisted/internet/defer.py". 257 258 # our self.svnurl attribute contains (ROOT)/(PROJECT) combined 259 # together in a way that we can't separate without svn's help. If the 260 # user is not using the split_file= argument, then self.svnurl might 261 # be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will 262 # get back from 'svn log' will be of the form 263 # (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove 264 # that (PROJECT) prefix from them. To do this without requiring the 265 # user to tell us how svnurl is split into ROOT and PROJECT, we do an 266 # 'svn info --xml' command at startup. This command will include a 267 # <root> element that tells us ROOT. We then strip this prefix from 268 # self.svnurl to determine PROJECT, and then later we strip the 269 # PROJECT prefix from the filenames reported by 'svn log --xml' to 270 # get a (BRANCH)/(FILEPATH) that can be passed to split_file() to 271 # turn into separate BRANCH and FILEPATH values. 272 273 # whew. 274 275 if self.working: 276 log.msg("SVNPoller(%s) overrun: timer fired but the previous " 277 "poll had not yet finished." % self.svnurl) 278 self.overrun_counter += 1 279 return defer.succeed(None) 280 self.working = True 281 282 if self.project: 283 log.msg("SVNPoller polling " + self.project) 284 else: 285 log.msg("SVNPoller polling") 286 if not self._prefix: 287 # this sets self._prefix when it finishes. It fires with 288 # self._prefix as well, because that makes the unit tests easier 289 # to write. 290 d = self.get_root() 291 d.addCallback(self.determine_prefix) 292 else: 293 d = defer.succeed(self._prefix) 294 295 d.addCallback(self.get_logs) 296 d.addCallback(self.parse_logs) 297 d.addCallback(self.get_new_logentries) 298 d.addCallback(self.create_changes) 299 d.addCallback(self.submit_changes) 300 d.addCallbacks(self.finished_ok, self.finished_failure) 301 return d
302
303 - def getProcessOutput(self, args):
304 # this exists so we can override it during the unit tests 305 d = utils.getProcessOutput(self.svnbin, args, self.environ) 306 return d
307
308 - def get_root(self):
309 args = ["info", "--xml", "--non-interactive", self.svnurl] 310 if self.svnuser: 311 args.extend(["--username=%s" % self.svnuser]) 312 if self.svnpasswd: 313 args.extend(["--password=%s" % self.svnpasswd]) 314 d = self.getProcessOutput(args) 315 return d
316
317 - def determine_prefix(self, output):
318 try: 319 doc = xml.dom.minidom.parseString(output) 320 except xml.parsers.expat.ExpatError: 321 log.msg("SVNPoller._determine_prefix_2: ExpatError in '%s'" 322 % output) 323 raise 324 rootnodes = doc.getElementsByTagName("root") 325 if not rootnodes: 326 # this happens if the URL we gave was already the root. In this 327 # case, our prefix is empty. 328 self._prefix = "" 329 return self._prefix 330 rootnode = rootnodes[0] 331 root = "".join([c.data for c in rootnode.childNodes]) 332 # root will be a unicode string 333 _assert(self.svnurl.startswith(root), 334 "svnurl='%s' doesn't start with <root>='%s'" % 335 (self.svnurl, root)) 336 self._prefix = self.svnurl[len(root):] 337 if self._prefix.startswith("/"): 338 self._prefix = self._prefix[1:] 339 log.msg("SVNPoller: svnurl=%s, root=%s, so prefix=%s" % 340 (self.svnurl, root, self._prefix)) 341 return self._prefix
342
343 - def get_logs(self, ignored_prefix=None):
344 args = [] 345 args.extend(["log", "--xml", "--verbose", "--non-interactive"]) 346 if self.svnuser: 347 args.extend(["--username=%s" % self.svnuser]) 348 if self.svnpasswd: 349 args.extend(["--password=%s" % self.svnpasswd]) 350 args.extend(["--limit=%d" % (self.histmax), self.svnurl]) 351 d = self.getProcessOutput(args) 352 return d
353
354 - def parse_logs(self, output):
355 # parse the XML output, return a list of <logentry> nodes 356 try: 357 doc = xml.dom.minidom.parseString(output) 358 except xml.parsers.expat.ExpatError: 359 log.msg("SVNPoller.parse_logs: ExpatError in '%s'" % output) 360 raise 361 logentries = doc.getElementsByTagName("logentry") 362 return logentries
363 364
365 - def _filter_new_logentries(self, logentries, last_change):
366 # given a list of logentries, return a tuple of (new_last_change, 367 # new_logentries), where new_logentries contains only the ones after 368 # last_change 369 if not logentries: 370 # no entries, so last_change must stay at None 371 return (None, []) 372 373 mostRecent = int(logentries[0].getAttribute("revision")) 374 375 if last_change is None: 376 # if this is the first time we've been run, ignore any changes 377 # that occurred before now. This prevents a build at every 378 # startup. 379 log.msg('svnPoller: starting at change %s' % mostRecent) 380 return (mostRecent, []) 381 382 if last_change == mostRecent: 383 # an unmodified repository will hit this case 384 log.msg('svnPoller: _process_changes last %s mostRecent %s' % ( 385 last_change, mostRecent)) 386 return (mostRecent, []) 387 388 new_logentries = [] 389 for el in logentries: 390 if last_change == int(el.getAttribute("revision")): 391 break 392 new_logentries.append(el) 393 new_logentries.reverse() # return oldest first 394 return (mostRecent, new_logentries)
395
396 - def get_new_logentries(self, logentries):
397 last_change = self.last_change 398 (new_last_change, 399 new_logentries) = self._filter_new_logentries(logentries, 400 self.last_change) 401 self.last_change = new_last_change 402 log.msg('svnPoller: _process_changes %s .. %s' % 403 (last_change, new_last_change)) 404 return new_logentries
405 406
407 - def _get_text(self, element, tag_name):
408 try: 409 child_nodes = element.getElementsByTagName(tag_name)[0].childNodes 410 text = "".join([t.data for t in child_nodes]) 411 except: 412 text = "<unknown>" 413 return text
414
415 - def _transform_path(self, path):
416 _assert(path.startswith(self._prefix), 417 "filepath '%s' should start with prefix '%s'" % 418 (path, self._prefix)) 419 relative_path = path[len(self._prefix):] 420 if relative_path.startswith("/"): 421 relative_path = relative_path[1:] 422 where = self.split_file(relative_path) 423 # 'where' is either None or (branch, final_path) 424 return where
425
426 - def create_changes(self, new_logentries):
427 changes = [] 428 429 for el in new_logentries: 430 revision = str(el.getAttribute("revision")) 431 432 revlink='' 433 434 if self.revlinktmpl: 435 if revision: 436 revlink = self.revlinktmpl % urllib.quote_plus(revision) 437 438 log.msg("Adding change revision %s" % (revision,)) 439 # TODO: the rest of buildbot may not be ready for unicode 'who' 440 # values 441 author = self._get_text(el, "author") 442 comments = self._get_text(el, "msg") 443 # there is a "date" field, but it provides localtime in the 444 # repository's timezone, whereas we care about buildmaster's 445 # localtime (since this will get used to position the boxes on 446 # the Waterfall display, etc). So ignore the date field and use 447 # our local clock instead. 448 #when = self._get_text(el, "date") 449 #when = time.mktime(time.strptime("%.19s" % when, 450 # "%Y-%m-%dT%H:%M:%S")) 451 branches = {} 452 try: 453 pathlist = el.getElementsByTagName("paths")[0] 454 except IndexError: # weird, we got an empty revision 455 log.msg("ignoring commit with no paths") 456 continue 457 458 for p in pathlist.getElementsByTagName("path"): 459 action = p.getAttribute("action") 460 path = "".join([t.data for t in p.childNodes]) 461 # the rest of buildbot is certaily not yet ready to handle 462 # unicode filenames, because they get put in RemoteCommands 463 # which get sent via PB to the buildslave, and PB doesn't 464 # handle unicode. 465 path = path.encode("ascii") 466 if path.startswith("/"): 467 path = path[1:] 468 where = self._transform_path(path) 469 470 # if 'where' is None, the file was outside any project that 471 # we care about and we should ignore it 472 if where: 473 branch, filename = where 474 if not branch in branches: 475 branches[branch] = { 'files': []} 476 branches[branch]['files'].append(filename) 477 478 if not branches[branch].has_key('action'): 479 branches[branch]['action'] = action 480 481 for branch in branches.keys(): 482 action = branches[branch]['action'] 483 files = branches[branch]['files'] 484 number_of_files_changed = len(files) 485 486 if action == u'D' and number_of_files_changed == 1 and files[0] == '': 487 log.msg("Ignoring deletion of branch '%s'" % branch) 488 else: 489 c = Change(who=author, 490 files=files, 491 comments=comments, 492 revision=revision, 493 branch=branch, 494 revlink=revlink, 495 category=self.category, 496 repository=self.svnurl, 497 project = self.project) 498 changes.append(c) 499 500 return changes
501
502 - def submit_changes(self, changes):
503 for c in changes: 504 self.parent.addChange(c)
505
506 - def finished_ok(self, res):
507 if self.cachepath: 508 f = open(self.cachepath, "w") 509 f.write(str(self.last_change)) 510 f.close() 511 512 log.msg("SVNPoller finished polling %s" % res) 513 assert self.working 514 self.working = False 515 return res
516
517 - def finished_failure(self, f):
518 log.msg("SVNPoller failed %s" % f) 519 assert self.working 520 self.working = False 521 return None # eat the failure
522