Package buildbot :: Package process :: Module metrics
[frames] | no frames]

Source Code for Module buildbot.process.metrics

  1  # This file is part of Buildbot.  Buildbot is free software: you can 
  2  # redistribute it and/or modify it under the terms of the GNU General Public 
  3  # License as published by the Free Software Foundation, version 2. 
  4  # 
  5  # This program is distributed in the hope that it will be useful, but WITHOUT 
  6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
  8  # details. 
  9  # 
 10  # You should have received a copy of the GNU General Public License along with 
 11  # this program; if not, write to the Free Software Foundation, Inc., 51 
 12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 13  # 
 14  # Copyright Buildbot Team Members 
 15  """ 
 16  Buildbot metrics module 
 17   
 18  Keeps track of counts and timings of various internal buildbot 
 19  activities. 
 20   
 21  Basic architecture: 
 22   
 23      MetricEvent.log(...) 
 24            || 
 25            \/ 
 26      MetricLogObserver 
 27            || 
 28            \/ 
 29      MetricHandler 
 30            || 
 31            \/ 
 32      MetricWatcher 
 33  """ 
 34  from collections import deque 
 35   
 36  from twisted.python import log 
 37  from twisted.internet.task import LoopingCall 
 38  from twisted.internet import reactor 
 39  from twisted.application import service 
 40   
 41  from buildbot import util 
 42  from buildbot.util.bbcollections import defaultdict 
 43   
 44  import gc, os, sys 
 45  # Make use of the resource module if we can 
 46  try: 
 47      import resource 
 48      assert resource 
 49  except ImportError: 
 50      resource = None 
51 52 -class MetricEvent(object):
53 @classmethod
54 - def log(cls, *args, **kwargs):
55 log.msg(metric=cls(*args, **kwargs))
56
57 -class MetricCountEvent(MetricEvent):
58 - def __init__(self, counter, count=1, absolute=False):
59 self.counter = counter 60 self.count = count 61 self.absolute = absolute
62
63 -class MetricTimeEvent(MetricEvent):
64 - def __init__(self, timer, elapsed):
65 self.timer = timer 66 self.elapsed = elapsed
67 68 ALARM_OK, ALARM_WARN, ALARM_CRIT = range(3) 69 ALARM_TEXT = ["OK", "WARN", "CRIT"]
70 71 -class MetricAlarmEvent(MetricEvent):
72 - def __init__(self, alarm, msg=None, level=ALARM_OK):
73 self.alarm = alarm 74 self.level = level 75 self.msg = msg
76
77 -def countMethod(counter):
78 def decorator(func): 79 def wrapper(*args, **kwargs): 80 MetricCountEvent.log(counter=counter) 81 return func(*args, **kwargs)
82 return wrapper 83 return decorator 84
85 -class Timer(object):
86 # For testing 87 _reactor = None 88
89 - def __init__(self, name):
90 self.name = name 91 self.started = None
92
93 - def startTimer(self, func):
94 def wrapper(*args, **kwargs): 95 self.start() 96 return func(*args, **kwargs)
97 return wrapper
98
99 - def stopTimer(self, func):
100 def wrapper(*args, **kwargs): 101 try: 102 return func(*args, **kwargs) 103 finally: 104 self.stop()
105 return wrapper 106
107 - def start(self):
108 self.started = util.now(self._reactor)
109
110 - def stop(self):
111 if self.started is not None: 112 elapsed = util.now(self._reactor) - self.started 113 MetricTimeEvent.log(timer=self.name, elapsed=elapsed) 114 self.started = None
115
116 -def timeMethod(name, _reactor=None):
117 def decorator(func): 118 t = Timer(name) 119 t._reactor=_reactor 120 def wrapper(*args, **kwargs): 121 t.start() 122 try: 123 return func(*args, **kwargs) 124 finally: 125 t.stop()
126 return wrapper 127 return decorator 128
129 -class FiniteList(deque):
130 - def __init__(self, maxlen=10):
131 self._maxlen = maxlen 132 deque.__init__(self)
133
134 - def append(self, o):
135 deque.append(self, o) 136 if len(self) > self._maxlen: 137 self.popleft()
138
139 -class AveragingFiniteList(FiniteList):
140 - def __init__(self, maxlen=10):
141 FiniteList.__init__(self, maxlen) 142 self.average = 0
143
144 - def append(self, o):
145 FiniteList.append(self, o) 146 self._calc()
147
148 - def _calc(self):
149 if len(self) == 0: 150 self.average = 0 151 else: 152 self.average = float(sum(self)) / len(self) 153 154 return self.average
155
156 -class MetricHandler(object):
157 - def __init__(self, metrics):
158 self.metrics = metrics 159 self.watchers = [] 160 161 self.reset()
162
163 - def addWatcher(self, watcher):
164 self.watchers.append(watcher)
165
166 - def removeWatcher(self, watcher):
167 self.watchers.remove(watcher)
168 169 # For subclasses to define
170 - def reset(self):
171 raise NotImplementedError
172
173 - def handle(self, eventDict, metric):
174 raise NotImplementedError
175
176 - def get(self, metric):
177 raise NotImplementedError
178
179 - def keys(self):
180 raise NotImplementedError
181
182 - def report(self):
183 raise NotImplementedError
184
185 - def asDict(self):
186 raise NotImplementedError
187
188 -class MetricCountHandler(MetricHandler):
189 _counters = None
190 - def reset(self):
191 self._counters = defaultdict(int)
192
193 - def handle(self, eventDict, metric):
194 if metric.absolute: 195 self._counters[metric.counter] = metric.count 196 else: 197 self._counters[metric.counter] += metric.count
198
199 - def keys(self):
200 return self._counters.keys()
201
202 - def get(self, counter):
203 return self._counters[counter]
204
205 - def report(self):
206 retval = [] 207 for counter in sorted(self.keys()): 208 retval.append("Counter %s: %i" % (counter, self.get(counter))) 209 return "\n".join(retval)
210
211 - def asDict(self):
212 retval = {} 213 for counter in sorted(self.keys()): 214 retval[counter] = self.get(counter) 215 return dict(counters=retval)
216
217 -class MetricTimeHandler(MetricHandler):
218 _timers = None
219 - def reset(self):
220 self._timers = defaultdict(AveragingFiniteList)
221
222 - def handle(self, eventDict, metric):
223 self._timers[metric.timer].append(metric.elapsed)
224
225 - def keys(self):
226 return self._timers.keys()
227
228 - def get(self, timer):
229 return self._timers[timer].average
230
231 - def report(self):
232 retval = [] 233 for timer in sorted(self.keys()): 234 retval.append("Timer %s: %.3g" % (timer, self.get(timer))) 235 return "\n".join(retval)
236
237 - def asDict(self):
238 retval = {} 239 for timer in sorted(self.keys()): 240 retval[timer] = self.get(timer) 241 return dict(timers=retval)
242
243 -class MetricAlarmHandler(MetricHandler):
244 _alarms = None
245 - def reset(self):
246 self._alarms = defaultdict(lambda x: ALARM_OK)
247
248 - def handle(self, eventDict, metric):
249 self._alarms[metric.alarm] = (metric.level, metric.msg)
250
251 - def report(self):
252 retval = [] 253 for alarm, (level, msg) in sorted(self._alarms.items()): 254 if msg: 255 retval.append("%s %s: %s" % (ALARM_TEXT[level], alarm, msg)) 256 else: 257 retval.append("%s %s" % (ALARM_TEXT[level], alarm)) 258 return "\n".join(retval)
259
260 - def asDict(self):
261 retval = {} 262 for alarm, (level, msg) in sorted(self._alarms.items()): 263 retval[alarm] = (ALARM_TEXT[level], msg) 264 return dict(alarms=retval)
265
266 -class PollerWatcher(object):
267 - def __init__(self, metrics):
268 self.metrics = metrics
269
270 - def run(self):
271 # Check if 'BuildMaster.pollDatabaseChanges()' and 272 # 'BuildMaster.pollDatabaseBuildRequests()' are running fast enough 273 h = self.metrics.getHandler(MetricTimeEvent) 274 if not h: 275 log.msg("Couldn't get MetricTimeEvent handler") 276 MetricAlarmEvent.log('PollerWatcher', 277 msg="Coudln't get MetricTimeEvent handler", 278 level=ALARM_WARN) 279 return 280 281 for method in ('BuildMaster.pollDatabaseChanges()', 282 'BuildMaster.pollDatabaseBuildRequests()'): 283 t = h.get(method) 284 db_poll_interval = self.metrics.parent.db_poll_interval 285 286 if t < 0.8 * db_poll_interval: 287 level = ALARM_OK 288 elif t < db_poll_interval: 289 level = ALARM_WARN 290 else: 291 level = ALARM_CRIT 292 MetricAlarmEvent.log(method, level=level)
293
294 -class AttachedSlavesWatcher(object):
295 - def __init__(self, metrics):
296 self.metrics = metrics
297
298 - def run(self):
299 # Check if 'BotMaster.attached_slaves' equals 300 # 'AbstractBuildSlave.attached_slaves' 301 h = self.metrics.getHandler(MetricCountEvent) 302 if not h: 303 log.msg("Couldn't get MetricCountEvent handler") 304 MetricAlarmEvent.log('AttachedSlavesWatcher', 305 msg="Coudln't get MetricCountEvent handler", 306 level=ALARM_WARN) 307 return 308 botmaster_count = h.get('BotMaster.attached_slaves') 309 buildslave_count = h.get('AbstractBuildSlave.attached_slaves') 310 311 # We let these be off by one since they're counted at slightly 312 # different times 313 if abs(botmaster_count - buildslave_count) > 1: 314 level = ALARM_WARN 315 else: 316 level = ALARM_OK 317 318 MetricAlarmEvent.log('attached_slaves', 319 msg='%s %s' % (botmaster_count, buildslave_count), 320 level=level)
321
322 -def _get_rss():
323 if sys.platform == 'linux2': 324 try: 325 return int(open("/proc/%i/statm" % os.getpid()).read().split()[1]) 326 except: 327 return 0 328 return 0
329
330 -def periodicCheck(_reactor=reactor):
331 # Measure how much garbage we have 332 garbage_count = len(gc.garbage) 333 MetricCountEvent.log('gc.garbage', garbage_count, absolute=True) 334 if garbage_count == 0: 335 level = ALARM_OK 336 else: 337 level = ALARM_WARN 338 MetricAlarmEvent.log('gc.garbage', level=level) 339 340 if resource: 341 r = resource.getrusage(resource.RUSAGE_SELF) 342 attrs = ['ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_idrss', 343 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 344 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 345 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw'] 346 for i,a in enumerate(attrs): 347 # Linux versions prior to 2.6.32 didn't report this value, but we 348 # can calculate it from /proc/<pid>/statm 349 v = r[i] 350 if a == 'ru_maxrss' and v == 0: 351 v = _get_rss() * resource.getpagesize() / 1024 352 MetricCountEvent.log('resource.%s' % a, v, absolute=True) 353 MetricCountEvent.log('resource.pagesize', resource.getpagesize(), absolute=True) 354 # Measure the reactor delay 355 then = util.now(_reactor) 356 dt = 0.1 357 def cb(): 358 now = util.now(_reactor) 359 delay = (now - then) - dt 360 MetricTimeEvent.log("reactorDelay", delay)
361 _reactor.callLater(dt, cb) 362
363 -class MetricLogObserver(service.MultiService):
364 _reactor = reactor
365 - def __init__(self, config):
366 service.MultiService.__init__(self) 367 368 self.config = config 369 self.periodic_task = None 370 self.log_task = None 371 372 # Mapping of metric type to handlers for that type 373 self.handlers = {} 374 375 # Register our default handlers 376 self.registerHandler(MetricCountEvent, MetricCountHandler(self)) 377 self.registerHandler(MetricTimeEvent, MetricTimeHandler(self)) 378 self.registerHandler(MetricAlarmEvent, MetricAlarmHandler(self)) 379 380 # Make sure our changes poller is behaving 381 self.getHandler(MetricTimeEvent).addWatcher(PollerWatcher(self)) 382 self.getHandler(MetricCountEvent).addWatcher( 383 AttachedSlavesWatcher(self))
384
385 - def reloadConfig(self, config):
386 self.config = config 387 log_interval = self.config.get('log_interval', 60) 388 if self.log_task: 389 self.log_task.stop() 390 if log_interval: 391 # Start up periodic logging 392 self.log_task = LoopingCall(self.report) 393 self.log_task.clock = self._reactor 394 self.log_task.start(log_interval) 395 else: 396 self.log_task = None 397 398 periodic_interval = self.config.get('periodic_interval', 10) 399 if self.periodic_task: 400 self.periodic_task.stop() 401 if periodic_interval: 402 self.periodic_task = LoopingCall(periodicCheck, self._reactor) 403 self.periodic_task.clock = self._reactor 404 self.periodic_task.start(periodic_interval) 405 else: 406 self.periodic_task = None
407
408 - def startService(self):
409 log.msg("Starting %s" % self) 410 service.MultiService.startService(self) 411 log.addObserver(self.emit) 412 413 self.reloadConfig(self.config)
414
415 - def stopService(self):
416 log.msg("Stopping %s" % self) 417 service.MultiService.stopService(self) 418 419 if self.periodic_task: 420 self.periodic_task.stop() 421 self.periodic_task = None 422 423 if self.log_task: 424 self.log_task.stop() 425 self.log_task = None 426 427 log.removeObserver(self.emit)
428
429 - def registerHandler(self, interface, handler):
430 old = self.getHandler(interface) 431 self.handlers[interface] = handler 432 return old
433
434 - def getHandler(self, interface):
435 return self.handlers.get(interface)
436
437 - def emit(self, eventDict):
438 # Ignore non-statistic events 439 metric = eventDict.get('metric') 440 if not metric or not isinstance(metric, MetricEvent): 441 return 442 443 if metric.__class__ not in self.handlers: 444 return 445 446 h = self.handlers[metric.__class__] 447 h.handle(eventDict, metric) 448 for w in h.watchers: 449 w.run()
450
451 - def asDict(self):
452 retval = {} 453 for interface, handler in self.handlers.iteritems(): 454 retval.update(handler.asDict()) 455 return retval
456
457 - def report(self):
458 try: 459 for interface, handler in self.handlers.iteritems(): 460 report = handler.report() 461 if not report: 462 continue 463 for line in report.split("\n"): 464 log.msg(line) 465 except: 466 log.err()
467