Package buildbot :: Package process :: Module metrics
[frames] | no frames]

Source Code for Module buildbot.process.metrics

  1  # This file is part of Buildbot.  Buildbot is free software: you can 
  2  # redistribute it and/or modify it under the terms of the GNU General Public 
  3  # License as published by the Free Software Foundation, version 2. 
  4  # 
  5  # This program is distributed in the hope that it will be useful, but WITHOUT 
  6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
  8  # details. 
  9  # 
 10  # You should have received a copy of the GNU General Public License along with 
 11  # this program; if not, write to the Free Software Foundation, Inc., 51 
 12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 13  # 
 14  # Copyright Buildbot Team Members 
 15  """ 
 16  Buildbot metrics module 
 17   
 18  Keeps track of counts and timings of various internal buildbot 
 19  activities. 
 20   
 21  Basic architecture: 
 22   
 23      MetricEvent.log(...) 
 24            || 
 25            \/ 
 26      MetricLogObserver 
 27            || 
 28            \/ 
 29      MetricHandler 
 30            || 
 31            \/ 
 32      MetricWatcher 
 33  """ 
 34  from collections import deque 
 35   
 36  from twisted.python import log 
 37  from twisted.internet.task import LoopingCall 
 38  from twisted.internet import reactor 
 39  from twisted.application import service 
 40  from buildbot import util, config 
 41  from buildbot.util.bbcollections import defaultdict 
 42   
 43  import gc, os, sys 
 44  # Make use of the resource module if we can 
 45  try: 
 46      import resource 
 47      assert resource 
 48  except ImportError: 
 49      resource = None 
50 51 -class MetricEvent(object):
52 @classmethod
53 - def log(cls, *args, **kwargs):
54 log.msg(metric=cls(*args, **kwargs))
55
56 -class MetricCountEvent(MetricEvent):
57 - def __init__(self, counter, count=1, absolute=False):
58 self.counter = counter 59 self.count = count 60 self.absolute = absolute
61
62 -class MetricTimeEvent(MetricEvent):
63 - def __init__(self, timer, elapsed):
64 self.timer = timer 65 self.elapsed = elapsed
66 67 ALARM_OK, ALARM_WARN, ALARM_CRIT = range(3) 68 ALARM_TEXT = ["OK", "WARN", "CRIT"]
69 70 -class MetricAlarmEvent(MetricEvent):
71 - def __init__(self, alarm, msg=None, level=ALARM_OK):
72 self.alarm = alarm 73 self.level = level 74 self.msg = msg
75
76 -def countMethod(counter):
77 def decorator(func): 78 def wrapper(*args, **kwargs): 79 MetricCountEvent.log(counter=counter) 80 return func(*args, **kwargs)
81 return wrapper 82 return decorator 83
84 -class Timer(object):
85 # For testing 86 _reactor = None 87
88 - def __init__(self, name):
89 self.name = name 90 self.started = None
91
92 - def startTimer(self, func):
93 def wrapper(*args, **kwargs): 94 self.start() 95 return func(*args, **kwargs)
96 return wrapper
97
98 - def stopTimer(self, func):
99 def wrapper(*args, **kwargs): 100 try: 101 return func(*args, **kwargs) 102 finally: 103 self.stop()
104 return wrapper 105
106 - def start(self):
107 self.started = util.now(self._reactor)
108
109 - def stop(self):
110 if self.started is not None: 111 elapsed = util.now(self._reactor) - self.started 112 MetricTimeEvent.log(timer=self.name, elapsed=elapsed) 113 self.started = None
114
115 -def timeMethod(name, _reactor=None):
116 def decorator(func): 117 t = Timer(name) 118 t._reactor=_reactor 119 def wrapper(*args, **kwargs): 120 t.start() 121 try: 122 return func(*args, **kwargs) 123 finally: 124 t.stop()
125 return wrapper 126 return decorator 127
128 -class FiniteList(deque):
129 - def __init__(self, maxlen=10):
130 self._maxlen = maxlen 131 deque.__init__(self)
132
133 - def append(self, o):
134 deque.append(self, o) 135 if len(self) > self._maxlen: 136 self.popleft()
137
138 -class AveragingFiniteList(FiniteList):
139 - def __init__(self, maxlen=10):
140 FiniteList.__init__(self, maxlen) 141 self.average = 0
142
143 - def append(self, o):
144 FiniteList.append(self, o) 145 self._calc()
146
147 - def _calc(self):
148 if len(self) == 0: 149 self.average = 0 150 else: 151 self.average = float(sum(self)) / len(self) 152 153 return self.average
154
155 -class MetricHandler(object):
156 - def __init__(self, metrics):
157 self.metrics = metrics 158 self.watchers = [] 159 160 self.reset()
161
162 - def addWatcher(self, watcher):
163 self.watchers.append(watcher)
164
165 - def removeWatcher(self, watcher):
166 self.watchers.remove(watcher)
167 168 # For subclasses to define
169 - def reset(self):
170 raise NotImplementedError
171
172 - def handle(self, eventDict, metric):
173 raise NotImplementedError
174
175 - def get(self, metric):
176 raise NotImplementedError
177
178 - def keys(self):
179 raise NotImplementedError
180
181 - def report(self):
182 raise NotImplementedError
183
184 - def asDict(self):
185 raise NotImplementedError
186
187 -class MetricCountHandler(MetricHandler):
188 _counters = None
189 - def reset(self):
190 self._counters = defaultdict(int)
191
192 - def handle(self, eventDict, metric):
193 if metric.absolute: 194 self._counters[metric.counter] = metric.count 195 else: 196 self._counters[metric.counter] += metric.count
197
198 - def keys(self):
199 return self._counters.keys()
200
201 - def get(self, counter):
202 return self._counters[counter]
203
204 - def report(self):
205 retval = [] 206 for counter in sorted(self.keys()): 207 retval.append("Counter %s: %i" % (counter, self.get(counter))) 208 return "\n".join(retval)
209
210 - def asDict(self):
211 retval = {} 212 for counter in sorted(self.keys()): 213 retval[counter] = self.get(counter) 214 return dict(counters=retval)
215
216 -class MetricTimeHandler(MetricHandler):
217 _timers = None
218 - def reset(self):
219 self._timers = defaultdict(AveragingFiniteList)
220
221 - def handle(self, eventDict, metric):
222 self._timers[metric.timer].append(metric.elapsed)
223
224 - def keys(self):
225 return self._timers.keys()
226
227 - def get(self, timer):
228 return self._timers[timer].average
229
230 - def report(self):
231 retval = [] 232 for timer in sorted(self.keys()): 233 retval.append("Timer %s: %.3g" % (timer, self.get(timer))) 234 return "\n".join(retval)
235
236 - def asDict(self):
237 retval = {} 238 for timer in sorted(self.keys()): 239 retval[timer] = self.get(timer) 240 return dict(timers=retval)
241
242 -class MetricAlarmHandler(MetricHandler):
243 _alarms = None
244 - def reset(self):
245 self._alarms = defaultdict(lambda x: ALARM_OK)
246
247 - def handle(self, eventDict, metric):
248 self._alarms[metric.alarm] = (metric.level, metric.msg)
249
250 - def report(self):
251 retval = [] 252 for alarm, (level, msg) in sorted(self._alarms.items()): 253 if msg: 254 retval.append("%s %s: %s" % (ALARM_TEXT[level], alarm, msg)) 255 else: 256 retval.append("%s %s" % (ALARM_TEXT[level], alarm)) 257 return "\n".join(retval)
258
259 - def asDict(self):
260 retval = {} 261 for alarm, (level, msg) in sorted(self._alarms.items()): 262 retval[alarm] = (ALARM_TEXT[level], msg) 263 return dict(alarms=retval)
264
265 -class PollerWatcher(object):
266 - def __init__(self, metrics):
267 self.metrics = metrics
268
269 - def run(self):
270 # Check if 'BuildMaster.pollDatabaseChanges()' and 271 # 'BuildMaster.pollDatabaseBuildRequests()' are running fast enough 272 h = self.metrics.getHandler(MetricTimeEvent) 273 if not h: 274 log.msg("Couldn't get MetricTimeEvent handler") 275 MetricAlarmEvent.log('PollerWatcher', 276 msg="Coudln't get MetricTimeEvent handler", 277 level=ALARM_WARN) 278 return 279 280 for method in ('BuildMaster.pollDatabaseChanges()', 281 'BuildMaster.pollDatabaseBuildRequests()'): 282 t = h.get(method) 283 master = self.metrics.parent 284 db_poll_interval = master.config.db['db_poll_interval'] 285 286 if db_poll_interval: 287 if t < 0.8 * db_poll_interval: 288 level = ALARM_OK 289 elif t < db_poll_interval: 290 level = ALARM_WARN 291 else: 292 level = ALARM_CRIT 293 MetricAlarmEvent.log(method, level=level)
294
295 -class AttachedSlavesWatcher(object):
296 - def __init__(self, metrics):
297 self.metrics = metrics
298
299 - def run(self):
300 # Check if 'BotMaster.attached_slaves' equals 301 # 'AbstractBuildSlave.attached_slaves' 302 h = self.metrics.getHandler(MetricCountEvent) 303 if not h: 304 log.msg("Couldn't get MetricCountEvent handler") 305 MetricAlarmEvent.log('AttachedSlavesWatcher', 306 msg="Coudln't get MetricCountEvent handler", 307 level=ALARM_WARN) 308 return 309 botmaster_count = h.get('BotMaster.attached_slaves') 310 buildslave_count = h.get('AbstractBuildSlave.attached_slaves') 311 312 # We let these be off by one since they're counted at slightly 313 # different times 314 if abs(botmaster_count - buildslave_count) > 1: 315 level = ALARM_WARN 316 else: 317 level = ALARM_OK 318 319 MetricAlarmEvent.log('attached_slaves', 320 msg='%s %s' % (botmaster_count, buildslave_count), 321 level=level)
322
323 -def _get_rss():
324 if sys.platform == 'linux2': 325 try: 326 return int(open("/proc/%i/statm" % os.getpid()).read().split()[1]) 327 except: 328 return 0 329 return 0
330
331 -def periodicCheck(_reactor=reactor):
332 # Measure how much garbage we have 333 garbage_count = len(gc.garbage) 334 MetricCountEvent.log('gc.garbage', garbage_count, absolute=True) 335 if garbage_count == 0: 336 level = ALARM_OK 337 else: 338 level = ALARM_WARN 339 MetricAlarmEvent.log('gc.garbage', level=level) 340 341 if resource: 342 r = resource.getrusage(resource.RUSAGE_SELF) 343 attrs = ['ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_idrss', 344 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 345 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 346 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw'] 347 for i,a in enumerate(attrs): 348 # Linux versions prior to 2.6.32 didn't report this value, but we 349 # can calculate it from /proc/<pid>/statm 350 v = r[i] 351 if a == 'ru_maxrss' and v == 0: 352 v = _get_rss() * resource.getpagesize() / 1024 353 MetricCountEvent.log('resource.%s' % a, v, absolute=True) 354 MetricCountEvent.log('resource.pagesize', resource.getpagesize(), absolute=True) 355 # Measure the reactor delay 356 then = util.now(_reactor) 357 dt = 0.1 358 def cb(): 359 now = util.now(_reactor) 360 delay = (now - then) - dt 361 MetricTimeEvent.log("reactorDelay", delay)
362 _reactor.callLater(dt, cb) 363
364 -class MetricLogObserver(config.ReconfigurableServiceMixin, 365 service.MultiService):
366 _reactor = reactor
367 - def __init__(self):
368 service.MultiService.__init__(self) 369 self.setName('metrics') 370 371 self.enabled = False 372 self.periodic_task = None 373 self.periodic_interval = None 374 self.log_task = None 375 self.log_interval = None 376 377 # Mapping of metric type to handlers for that type 378 self.handlers = {} 379 380 # Register our default handlers 381 self.registerHandler(MetricCountEvent, MetricCountHandler(self)) 382 self.registerHandler(MetricTimeEvent, MetricTimeHandler(self)) 383 self.registerHandler(MetricAlarmEvent, MetricAlarmHandler(self)) 384 385 # Make sure our changes poller is behaving 386 self.getHandler(MetricTimeEvent).addWatcher(PollerWatcher(self)) 387 self.getHandler(MetricCountEvent).addWatcher( 388 AttachedSlavesWatcher(self))
389
390 - def reconfigService(self, new_config):
391 # first, enable or disable 392 if new_config.metrics is None: 393 self.disable() 394 else: 395 self.enable() 396 397 metrics_config = new_config.metrics 398 399 # Start up periodic logging 400 log_interval = metrics_config.get('log_interval', 60) 401 if log_interval != self.log_interval: 402 if self.log_task: 403 self.log_task.stop() 404 self.log_task = None 405 if log_interval: 406 self.log_task = LoopingCall(self.report) 407 self.log_task.clock = self._reactor 408 self.log_task.start(log_interval) 409 410 # same for the periodic task 411 periodic_interval = metrics_config.get('periodic_interval', 10) 412 if periodic_interval != self.periodic_interval: 413 if self.periodic_task: 414 self.periodic_task.stop() 415 self.periodic_task = None 416 if periodic_interval: 417 self.periodic_task = LoopingCall(periodicCheck, 418 self._reactor) 419 self.periodic_task.clock = self._reactor 420 self.periodic_task.start(periodic_interval) 421 422 # upcall 423 return config.ReconfigurableServiceMixin.reconfigService(self, 424 new_config)
425
426 - def stopService(self):
427 self.disable() 428 service.MultiService.stopService(self)
429
430 - def enable(self):
431 if self.enabled: 432 return 433 log.addObserver(self.emit) 434 self.enabled = True
435
436 - def disable(self):
437 if not self.enabled: 438 return 439 440 if self.periodic_task: 441 self.periodic_task.stop() 442 self.periodic_task = None 443 444 if self.log_task: 445 self.log_task.stop() 446 self.log_task = None 447 448 log.removeObserver(self.emit) 449 self.enabled = False
450
451 - def registerHandler(self, interface, handler):
452 old = self.getHandler(interface) 453 self.handlers[interface] = handler 454 return old
455
456 - def getHandler(self, interface):
457 return self.handlers.get(interface)
458
459 - def emit(self, eventDict):
460 # Ignore non-statistic events 461 metric = eventDict.get('metric') 462 if not metric or not isinstance(metric, MetricEvent): 463 return 464 465 if metric.__class__ not in self.handlers: 466 return 467 468 h = self.handlers[metric.__class__] 469 h.handle(eventDict, metric) 470 for w in h.watchers: 471 w.run()
472
473 - def asDict(self):
474 retval = {} 475 for interface, handler in self.handlers.iteritems(): 476 retval.update(handler.asDict()) 477 return retval
478
479 - def report(self):
480 try: 481 for interface, handler in self.handlers.iteritems(): 482 report = handler.report() 483 if not report: 484 continue 485 for line in report.split("\n"): 486 log.msg(line) 487 except: 488 log.err()
489