Package buildbot :: Package changes :: Module svnpoller
[frames] | no frames]

Source Code for Module buildbot.changes.svnpoller

  1  # -*- test-case-name: buildbot.test.test_svnpoller -*- 
  2   
  3  # Based on the work of Dave Peticolas for the P4poll 
  4  # Changed to svn (using xml.dom.minidom) by Niklaus Giger 
  5  # Hacked beyond recognition by Brian Warner 
  6   
  7  from twisted.python import log 
  8  from twisted.internet import defer, reactor, utils 
  9  from twisted.internet.task import LoopingCall 
 10   
 11  from buildbot import util 
 12  from buildbot.changes import base 
 13  from buildbot.changes.changes import Change 
 14   
 15  import xml.dom.minidom 
 16  import urllib 
 17   
18 -def _assert(condition, msg):
19 if condition: 20 return True 21 raise AssertionError(msg)
22 23 # these split_file_* functions are available for use as values to the 24 # split_file= argument.
25 -def split_file_alwaystrunk(path):
26 return (None, path)
27
28 -def split_file_branches(path):
29 # turn trunk/subdir/file.c into (None, "subdir/file.c") 30 # and branches/1.5.x/subdir/file.c into ("branches/1.5.x", "subdir/file.c") 31 pieces = path.split('/') 32 if pieces[0] == 'trunk': 33 return (None, '/'.join(pieces[1:])) 34 elif pieces[0] == 'branches': 35 return ('/'.join(pieces[0:2]), '/'.join(pieces[2:])) 36 else: 37 return None
38 39
40 -class SVNPoller(base.ChangeSource, util.ComparableMixin):
41 """This source will poll a Subversion repository for changes and submit 42 them to the change master.""" 43 44 compare_attrs = ["svnurl", "split_file_function", 45 "svnuser", "svnpasswd", 46 "pollinterval", "histmax", 47 "svnbin", "category"] 48 49 parent = None # filled in when we're added 50 last_change = None 51 loop = None 52 working = False 53
54 - def __init__(self, svnurl, split_file=None, 55 svnuser=None, svnpasswd=None, 56 pollinterval=10*60, histmax=100, 57 svnbin='svn', revlinktmpl='', category=None):
58 """ 59 @type svnurl: string 60 @param svnurl: the SVN URL that describes the repository and 61 subdirectory to watch. If this ChangeSource should 62 only pay attention to a single branch, this should 63 point at the repository for that branch, like 64 svn://svn.twistedmatrix.com/svn/Twisted/trunk . If it 65 should follow multiple branches, point it at the 66 repository directory that contains all the branches 67 like svn://svn.twistedmatrix.com/svn/Twisted and also 68 provide a branch-determining function. 69 70 Each file in the repository has a SVN URL in the form 71 (SVNURL)/(BRANCH)/(FILEPATH), where (BRANCH) could be 72 empty or not, depending upon your branch-determining 73 function. Only files that start with (SVNURL)/(BRANCH) 74 will be monitored. The Change objects that are sent to 75 the Schedulers will see (FILEPATH) for each modified 76 file. 77 78 @type split_file: callable or None 79 @param split_file: a function that is called with a string of the 80 form (BRANCH)/(FILEPATH) and should return a tuple 81 (BRANCH, FILEPATH). This function should match 82 your repository's branch-naming policy. Each 83 changed file has a fully-qualified URL that can be 84 split into a prefix (which equals the value of the 85 'svnurl' argument) and a suffix; it is this suffix 86 which is passed to the split_file function. 87 88 If the function returns None, the file is ignored. 89 Use this to indicate that the file is not a part 90 of this project. 91 92 For example, if your repository puts the trunk in 93 trunk/... and branches are in places like 94 branches/1.5/..., your split_file function could 95 look like the following (this function is 96 available as svnpoller.split_file_branches):: 97 98 pieces = path.split('/') 99 if pieces[0] == 'trunk': 100 return (None, '/'.join(pieces[1:])) 101 elif pieces[0] == 'branches': 102 return ('/'.join(pieces[0:2]), 103 '/'.join(pieces[2:])) 104 else: 105 return None 106 107 If instead your repository layout puts the trunk 108 for ProjectA in trunk/ProjectA/... and the 1.5 109 branch in branches/1.5/ProjectA/..., your 110 split_file function could look like:: 111 112 pieces = path.split('/') 113 if pieces[0] == 'trunk': 114 branch = None 115 pieces.pop(0) # remove 'trunk' 116 elif pieces[0] == 'branches': 117 pieces.pop(0) # remove 'branches' 118 # grab branch name 119 branch = 'branches/' + pieces.pop(0) 120 else: 121 return None # something weird 122 projectname = pieces.pop(0) 123 if projectname != 'ProjectA': 124 return None # wrong project 125 return (branch, '/'.join(pieces)) 126 127 The default of split_file= is None, which 128 indicates that no splitting should be done. This 129 is equivalent to the following function:: 130 131 return (None, path) 132 133 If you wish, you can override the split_file 134 method with the same sort of function instead of 135 passing in a split_file= argument. 136 137 138 @type svnuser: string 139 @param svnuser: If set, the --username option will be added to 140 the 'svn log' command. You may need this to get 141 access to a private repository. 142 @type svnpasswd: string 143 @param svnpasswd: If set, the --password option will be added. 144 145 @type pollinterval: int 146 @param pollinterval: interval in seconds between polls. The default 147 is 600 seconds (10 minutes). Smaller values 148 decrease the latency between the time a change 149 is recorded and the time the buildbot notices 150 it, but it also increases the system load. 151 152 @type histmax: int 153 @param histmax: maximum number of changes to look back through. 154 The default is 100. Smaller values decrease 155 system load, but if more than histmax changes 156 are recorded between polls, the extra ones will 157 be silently lost. 158 159 @type svnbin: string 160 @param svnbin: path to svn binary, defaults to just 'svn'. Use 161 this if your subversion command lives in an 162 unusual location. 163 164 @type revlinktmpl: string 165 @param revlinktmpl: A format string to use for hyperlinks to revision 166 information. For example, setting this to 167 "http://reposerver/websvn/revision.php?rev=%s" 168 would create suitable links on the build pages 169 to information in websvn on each revision. 170 171 @type category: string 172 @param category: A single category associated with the changes that 173 could be used by schedulers watch for branches of a 174 certain name AND category. 175 """ 176 177 if svnurl.endswith("/"): 178 svnurl = svnurl[:-1] # strip the trailing slash 179 self.svnurl = svnurl 180 self.split_file_function = split_file or split_file_alwaystrunk 181 self.svnuser = svnuser 182 self.svnpasswd = svnpasswd 183 184 self.revlinktmpl = revlinktmpl 185 186 self.svnbin = svnbin 187 self.pollinterval = pollinterval 188 self.histmax = histmax 189 self._prefix = None 190 self.overrun_counter = 0 191 self.loop = LoopingCall(self.checksvn) 192 self.category = category
193
194 - def split_file(self, path):
195 # use getattr() to avoid turning this function into a bound method, 196 # which would require it to have an extra 'self' argument 197 f = getattr(self, "split_file_function") 198 return f(path)
199
200 - def startService(self):
201 log.msg("SVNPoller(%s) starting" % self.svnurl) 202 base.ChangeSource.startService(self) 203 # Don't start the loop just yet because the reactor isn't running. 204 # Give it a chance to go and install our SIGCHLD handler before 205 # spawning processes. 206 reactor.callLater(0, self.loop.start, self.pollinterval)
207
208 - def stopService(self):
209 log.msg("SVNPoller(%s) shutting down" % self.svnurl) 210 self.loop.stop() 211 return base.ChangeSource.stopService(self)
212
213 - def describe(self):
214 return "SVNPoller watching %s" % self.svnurl
215
216 - def checksvn(self):
217 # Our return value is only used for unit testing. 218 219 # we need to figure out the repository root, so we can figure out 220 # repository-relative pathnames later. Each SVNURL is in the form 221 # (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something 222 # like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a 223 # physical repository at /svn/Twisted on that host), (PROJECT) is 224 # something like Projects/Twisted (i.e. within the repository's 225 # internal namespace, everything under Projects/Twisted/ has 226 # something to do with Twisted, but these directory names do not 227 # actually appear on the repository host), (BRANCH) is something like 228 # "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative 229 # filename like "twisted/internet/defer.py". 230 231 # our self.svnurl attribute contains (ROOT)/(PROJECT) combined 232 # together in a way that we can't separate without svn's help. If the 233 # user is not using the split_file= argument, then self.svnurl might 234 # be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will 235 # get back from 'svn log' will be of the form 236 # (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove 237 # that (PROJECT) prefix from them. To do this without requiring the 238 # user to tell us how svnurl is split into ROOT and PROJECT, we do an 239 # 'svn info --xml' command at startup. This command will include a 240 # <root> element that tells us ROOT. We then strip this prefix from 241 # self.svnurl to determine PROJECT, and then later we strip the 242 # PROJECT prefix from the filenames reported by 'svn log --xml' to 243 # get a (BRANCH)/(FILEPATH) that can be passed to split_file() to 244 # turn into separate BRANCH and FILEPATH values. 245 246 # whew. 247 248 if self.working: 249 log.msg("SVNPoller(%s) overrun: timer fired but the previous " 250 "poll had not yet finished." % self.svnurl) 251 self.overrun_counter += 1 252 return defer.succeed(None) 253 self.working = True 254 255 log.msg("SVNPoller polling") 256 if not self._prefix: 257 # this sets self._prefix when it finishes. It fires with 258 # self._prefix as well, because that makes the unit tests easier 259 # to write. 260 d = self.get_root() 261 d.addCallback(self.determine_prefix) 262 else: 263 d = defer.succeed(self._prefix) 264 265 d.addCallback(self.get_logs) 266 d.addCallback(self.parse_logs) 267 d.addCallback(self.get_new_logentries) 268 d.addCallback(self.create_changes) 269 d.addCallback(self.submit_changes) 270 d.addCallbacks(self.finished_ok, self.finished_failure) 271 return d
272
273 - def getProcessOutput(self, args):
274 # this exists so we can override it during the unit tests 275 d = utils.getProcessOutput(self.svnbin, args, {}) 276 return d
277
278 - def get_root(self):
279 args = ["info", "--xml", "--non-interactive", self.svnurl] 280 if self.svnuser: 281 args.extend(["--username=%s" % self.svnuser]) 282 if self.svnpasswd: 283 args.extend(["--password=%s" % self.svnpasswd]) 284 d = self.getProcessOutput(args) 285 return d
286
287 - def determine_prefix(self, output):
288 try: 289 doc = xml.dom.minidom.parseString(output) 290 except xml.parsers.expat.ExpatError: 291 log.msg("SVNPoller._determine_prefix_2: ExpatError in '%s'" 292 % output) 293 raise 294 rootnodes = doc.getElementsByTagName("root") 295 if not rootnodes: 296 # this happens if the URL we gave was already the root. In this 297 # case, our prefix is empty. 298 self._prefix = "" 299 return self._prefix 300 rootnode = rootnodes[0] 301 root = "".join([c.data for c in rootnode.childNodes]) 302 # root will be a unicode string 303 _assert(self.svnurl.startswith(root), 304 "svnurl='%s' doesn't start with <root>='%s'" % 305 (self.svnurl, root)) 306 self._prefix = self.svnurl[len(root):] 307 if self._prefix.startswith("/"): 308 self._prefix = self._prefix[1:] 309 log.msg("SVNPoller: svnurl=%s, root=%s, so prefix=%s" % 310 (self.svnurl, root, self._prefix)) 311 return self._prefix
312
313 - def get_logs(self, ignored_prefix=None):
314 args = [] 315 args.extend(["log", "--xml", "--verbose", "--non-interactive"]) 316 if self.svnuser: 317 args.extend(["--username=%s" % self.svnuser]) 318 if self.svnpasswd: 319 args.extend(["--password=%s" % self.svnpasswd]) 320 args.extend(["--limit=%d" % (self.histmax), self.svnurl]) 321 d = self.getProcessOutput(args) 322 return d
323
324 - def parse_logs(self, output):
325 # parse the XML output, return a list of <logentry> nodes 326 try: 327 doc = xml.dom.minidom.parseString(output) 328 except xml.parsers.expat.ExpatError: 329 log.msg("SVNPoller.parse_logs: ExpatError in '%s'" % output) 330 raise 331 logentries = doc.getElementsByTagName("logentry") 332 return logentries
333 334
335 - def _filter_new_logentries(self, logentries, last_change):
336 # given a list of logentries, return a tuple of (new_last_change, 337 # new_logentries), where new_logentries contains only the ones after 338 # last_change 339 if not logentries: 340 # no entries, so last_change must stay at None 341 return (None, []) 342 343 mostRecent = int(logentries[0].getAttribute("revision")) 344 345 if last_change is None: 346 # if this is the first time we've been run, ignore any changes 347 # that occurred before now. This prevents a build at every 348 # startup. 349 log.msg('svnPoller: starting at change %s' % mostRecent) 350 return (mostRecent, []) 351 352 if last_change == mostRecent: 353 # an unmodified repository will hit this case 354 log.msg('svnPoller: _process_changes last %s mostRecent %s' % ( 355 last_change, mostRecent)) 356 return (mostRecent, []) 357 358 new_logentries = [] 359 for el in logentries: 360 if last_change == int(el.getAttribute("revision")): 361 break 362 new_logentries.append(el) 363 new_logentries.reverse() # return oldest first 364 return (mostRecent, new_logentries)
365
366 - def get_new_logentries(self, logentries):
367 last_change = self.last_change 368 (new_last_change, 369 new_logentries) = self._filter_new_logentries(logentries, 370 self.last_change) 371 self.last_change = new_last_change 372 log.msg('svnPoller: _process_changes %s .. %s' % 373 (last_change, new_last_change)) 374 return new_logentries
375 376
377 - def _get_text(self, element, tag_name):
378 try: 379 child_nodes = element.getElementsByTagName(tag_name)[0].childNodes 380 text = "".join([t.data for t in child_nodes]) 381 except: 382 text = "<unknown>" 383 return text
384
385 - def _transform_path(self, path):
386 _assert(path.startswith(self._prefix), 387 "filepath '%s' should start with prefix '%s'" % 388 (path, self._prefix)) 389 relative_path = path[len(self._prefix):] 390 if relative_path.startswith("/"): 391 relative_path = relative_path[1:] 392 where = self.split_file(relative_path) 393 # 'where' is either None or (branch, final_path) 394 return where
395
396 - def create_changes(self, new_logentries):
397 changes = [] 398 399 for el in new_logentries: 400 revision = str(el.getAttribute("revision")) 401 402 revlink='' 403 404 if self.revlinktmpl: 405 if revision: 406 revlink = self.revlinktmpl % urllib.quote_plus(revision) 407 408 log.msg("Adding change revision %s" % (revision,)) 409 # TODO: the rest of buildbot may not be ready for unicode 'who' 410 # values 411 author = self._get_text(el, "author") 412 comments = self._get_text(el, "msg") 413 # there is a "date" field, but it provides localtime in the 414 # repository's timezone, whereas we care about buildmaster's 415 # localtime (since this will get used to position the boxes on 416 # the Waterfall display, etc). So ignore the date field and use 417 # our local clock instead. 418 #when = self._get_text(el, "date") 419 #when = time.mktime(time.strptime("%.19s" % when, 420 # "%Y-%m-%dT%H:%M:%S")) 421 branches = {} 422 pathlist = el.getElementsByTagName("paths")[0] 423 for p in pathlist.getElementsByTagName("path"): 424 action = p.getAttribute("action") 425 path = "".join([t.data for t in p.childNodes]) 426 # the rest of buildbot is certaily not yet ready to handle 427 # unicode filenames, because they get put in RemoteCommands 428 # which get sent via PB to the buildslave, and PB doesn't 429 # handle unicode. 430 path = path.encode("ascii") 431 if path.startswith("/"): 432 path = path[1:] 433 where = self._transform_path(path) 434 435 # if 'where' is None, the file was outside any project that 436 # we care about and we should ignore it 437 if where: 438 branch, filename = where 439 if not branch in branches: 440 branches[branch] = { 'files': []} 441 branches[branch]['files'].append(filename) 442 443 if not branches[branch].has_key('action'): 444 branches[branch]['action'] = action 445 446 for branch in branches.keys(): 447 action = branches[branch]['action'] 448 files = branches[branch]['files'] 449 number_of_files_changed = len(files) 450 451 if action == u'D' and number_of_files_changed == 1 and files[0] == '': 452 log.msg("Ignoring deletion of branch '%s'" % branch) 453 else: 454 c = Change(who=author, 455 files=files, 456 comments=comments, 457 revision=revision, 458 branch=branch, 459 revlink=revlink, 460 category=self.category, 461 repository=self.svnurl) 462 changes.append(c) 463 464 return changes
465
466 - def submit_changes(self, changes):
467 for c in changes: 468 self.parent.addChange(c)
469
470 - def finished_ok(self, res):
471 log.msg("SVNPoller finished polling %s" % res) 472 assert self.working 473 self.working = False 474 return res
475
476 - def finished_failure(self, f):
477 log.msg("SVNPoller failed %s" % f) 478 assert self.working 479 self.working = False 480 return None # eat the failure
481