Package buildbot :: Package process :: Module metrics
[frames] | no frames]

Source Code for Module buildbot.process.metrics

  1  # This file is part of Buildbot.  Buildbot is free software: you can 
  2  # redistribute it and/or modify it under the terms of the GNU General Public 
  3  # License as published by the Free Software Foundation, version 2. 
  4  # 
  5  # This program is distributed in the hope that it will be useful, but WITHOUT 
  6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
  8  # details. 
  9  # 
 10  # You should have received a copy of the GNU General Public License along with 
 11  # this program; if not, write to the Free Software Foundation, Inc., 51 
 12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 13  # 
 14  # Copyright Buildbot Team Members 
 15   
 16  from __future__ import with_statement 
 17  """ 
 18  Buildbot metrics module 
 19   
 20  Keeps track of counts and timings of various internal buildbot 
 21  activities. 
 22   
 23  Basic architecture: 
 24   
 25      MetricEvent.log(...) 
 26            || 
 27            \/ 
 28      MetricLogObserver 
 29            || 
 30            \/ 
 31      MetricHandler 
 32            || 
 33            \/ 
 34      MetricWatcher 
 35  """ 
 36  from collections import deque 
 37   
 38  from twisted.python import log 
 39  from twisted.internet.task import LoopingCall 
 40  from twisted.internet import reactor 
 41  from twisted.application import service 
 42  from buildbot import util, config 
 43  from collections import defaultdict 
 44   
 45  import gc, os, sys 
 46  # Make use of the resource module if we can 
 47  try: 
 48      import resource 
 49      assert resource 
 50  except ImportError: 
 51      resource = None 
52 53 -class MetricEvent(object):
54 @classmethod
55 - def log(cls, *args, **kwargs):
56 log.msg(metric=cls(*args, **kwargs))
57
58 -class MetricCountEvent(MetricEvent):
59 - def __init__(self, counter, count=1, absolute=False):
60 self.counter = counter 61 self.count = count 62 self.absolute = absolute
63
64 -class MetricTimeEvent(MetricEvent):
65 - def __init__(self, timer, elapsed):
66 self.timer = timer 67 self.elapsed = elapsed
68 69 ALARM_OK, ALARM_WARN, ALARM_CRIT = range(3) 70 ALARM_TEXT = ["OK", "WARN", "CRIT"]
71 72 -class MetricAlarmEvent(MetricEvent):
73 - def __init__(self, alarm, msg=None, level=ALARM_OK):
74 self.alarm = alarm 75 self.level = level 76 self.msg = msg
77
78 -def countMethod(counter):
79 def decorator(func): 80 def wrapper(*args, **kwargs): 81 MetricCountEvent.log(counter=counter) 82 return func(*args, **kwargs)
83 return wrapper 84 return decorator 85
86 -class Timer(object):
87 # For testing 88 _reactor = None 89
90 - def __init__(self, name):
91 self.name = name 92 self.started = None
93
94 - def startTimer(self, func):
95 def wrapper(*args, **kwargs): 96 self.start() 97 return func(*args, **kwargs)
98 return wrapper
99
100 - def stopTimer(self, func):
101 def wrapper(*args, **kwargs): 102 try: 103 return func(*args, **kwargs) 104 finally: 105 self.stop()
106 return wrapper 107
108 - def start(self):
109 self.started = util.now(self._reactor)
110
111 - def stop(self):
112 if self.started is not None: 113 elapsed = util.now(self._reactor) - self.started 114 MetricTimeEvent.log(timer=self.name, elapsed=elapsed) 115 self.started = None
116
117 -def timeMethod(name, _reactor=None):
118 def decorator(func): 119 t = Timer(name) 120 t._reactor=_reactor 121 def wrapper(*args, **kwargs): 122 t.start() 123 try: 124 return func(*args, **kwargs) 125 finally: 126 t.stop()
127 return wrapper 128 return decorator 129
130 -class FiniteList(deque):
131 - def __init__(self, maxlen=10):
132 self._maxlen = maxlen 133 deque.__init__(self)
134
135 - def append(self, o):
136 deque.append(self, o) 137 if len(self) > self._maxlen: 138 self.popleft()
139
140 -class AveragingFiniteList(FiniteList):
141 - def __init__(self, maxlen=10):
142 FiniteList.__init__(self, maxlen) 143 self.average = 0
144
145 - def append(self, o):
146 FiniteList.append(self, o) 147 self._calc()
148
149 - def _calc(self):
150 if len(self) == 0: 151 self.average = 0 152 else: 153 self.average = float(sum(self)) / len(self) 154 155 return self.average
156
157 -class MetricHandler(object):
158 - def __init__(self, metrics):
159 self.metrics = metrics 160 self.watchers = [] 161 162 self.reset()
163
164 - def addWatcher(self, watcher):
165 self.watchers.append(watcher)
166
167 - def removeWatcher(self, watcher):
168 self.watchers.remove(watcher)
169 170 # For subclasses to define
171 - def reset(self):
172 raise NotImplementedError
173
174 - def handle(self, eventDict, metric):
175 raise NotImplementedError
176
177 - def get(self, metric):
178 raise NotImplementedError
179
180 - def keys(self):
181 raise NotImplementedError
182
183 - def report(self):
184 raise NotImplementedError
185
186 - def asDict(self):
187 raise NotImplementedError
188
189 -class MetricCountHandler(MetricHandler):
190 _counters = None
191 - def reset(self):
192 self._counters = defaultdict(int)
193
194 - def handle(self, eventDict, metric):
195 if metric.absolute: 196 self._counters[metric.counter] = metric.count 197 else: 198 self._counters[metric.counter] += metric.count
199
200 - def keys(self):
201 return self._counters.keys()
202
203 - def get(self, counter):
204 return self._counters[counter]
205
206 - def report(self):
207 retval = [] 208 for counter in sorted(self.keys()): 209 retval.append("Counter %s: %i" % (counter, self.get(counter))) 210 return "\n".join(retval)
211
212 - def asDict(self):
213 retval = {} 214 for counter in sorted(self.keys()): 215 retval[counter] = self.get(counter) 216 return dict(counters=retval)
217
218 -class MetricTimeHandler(MetricHandler):
219 _timers = None
220 - def reset(self):
221 self._timers = defaultdict(AveragingFiniteList)
222
223 - def handle(self, eventDict, metric):
224 self._timers[metric.timer].append(metric.elapsed)
225
226 - def keys(self):
227 return self._timers.keys()
228
229 - def get(self, timer):
230 return self._timers[timer].average
231
232 - def report(self):
233 retval = [] 234 for timer in sorted(self.keys()): 235 retval.append("Timer %s: %.3g" % (timer, self.get(timer))) 236 return "\n".join(retval)
237
238 - def asDict(self):
239 retval = {} 240 for timer in sorted(self.keys()): 241 retval[timer] = self.get(timer) 242 return dict(timers=retval)
243
244 -class MetricAlarmHandler(MetricHandler):
245 _alarms = None
246 - def reset(self):
247 self._alarms = defaultdict(lambda x: ALARM_OK)
248
249 - def handle(self, eventDict, metric):
250 self._alarms[metric.alarm] = (metric.level, metric.msg)
251
252 - def report(self):
253 retval = [] 254 for alarm, (level, msg) in sorted(self._alarms.items()): 255 if msg: 256 retval.append("%s %s: %s" % (ALARM_TEXT[level], alarm, msg)) 257 else: 258 retval.append("%s %s" % (ALARM_TEXT[level], alarm)) 259 return "\n".join(retval)
260
261 - def asDict(self):
262 retval = {} 263 for alarm, (level, msg) in sorted(self._alarms.items()): 264 retval[alarm] = (ALARM_TEXT[level], msg) 265 return dict(alarms=retval)
266
267 -class PollerWatcher(object):
268 - def __init__(self, metrics):
269 self.metrics = metrics
270
271 - def run(self):
272 # Check if 'BuildMaster.pollDatabaseChanges()' and 273 # 'BuildMaster.pollDatabaseBuildRequests()' are running fast enough 274 h = self.metrics.getHandler(MetricTimeEvent) 275 if not h: 276 log.msg("Couldn't get MetricTimeEvent handler") 277 MetricAlarmEvent.log('PollerWatcher', 278 msg="Coudln't get MetricTimeEvent handler", 279 level=ALARM_WARN) 280 return 281 282 for method in ('BuildMaster.pollDatabaseChanges()', 283 'BuildMaster.pollDatabaseBuildRequests()'): 284 t = h.get(method) 285 master = self.metrics.parent 286 db_poll_interval = master.config.db['db_poll_interval'] 287 288 if db_poll_interval: 289 if t < 0.8 * db_poll_interval: 290 level = ALARM_OK 291 elif t < db_poll_interval: 292 level = ALARM_WARN 293 else: 294 level = ALARM_CRIT 295 MetricAlarmEvent.log(method, level=level)
296
297 -class AttachedSlavesWatcher(object):
298 - def __init__(self, metrics):
299 self.metrics = metrics
300
301 - def run(self):
302 # Check if 'BotMaster.attached_slaves' equals 303 # 'AbstractBuildSlave.attached_slaves' 304 h = self.metrics.getHandler(MetricCountEvent) 305 if not h: 306 log.msg("Couldn't get MetricCountEvent handler") 307 MetricAlarmEvent.log('AttachedSlavesWatcher', 308 msg="Coudln't get MetricCountEvent handler", 309 level=ALARM_WARN) 310 return 311 botmaster_count = h.get('BotMaster.attached_slaves') 312 buildslave_count = h.get('AbstractBuildSlave.attached_slaves') 313 314 # We let these be off by one since they're counted at slightly 315 # different times 316 if abs(botmaster_count - buildslave_count) > 1: 317 level = ALARM_WARN 318 else: 319 level = ALARM_OK 320 321 MetricAlarmEvent.log('attached_slaves', 322 msg='%s %s' % (botmaster_count, buildslave_count), 323 level=level)
324
325 -def _get_rss():
326 if sys.platform == 'linux2': 327 try: 328 with open("/proc/%i/statm" % os.getpid()) as f: 329 return int(f.read().split()[1]) 330 except: 331 return 0 332 return 0
333
334 -def periodicCheck(_reactor=reactor):
335 # Measure how much garbage we have 336 garbage_count = len(gc.garbage) 337 MetricCountEvent.log('gc.garbage', garbage_count, absolute=True) 338 if garbage_count == 0: 339 level = ALARM_OK 340 else: 341 level = ALARM_WARN 342 MetricAlarmEvent.log('gc.garbage', level=level) 343 344 if resource: 345 r = resource.getrusage(resource.RUSAGE_SELF) 346 attrs = ['ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_idrss', 347 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 348 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 349 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw'] 350 for i,a in enumerate(attrs): 351 # Linux versions prior to 2.6.32 didn't report this value, but we 352 # can calculate it from /proc/<pid>/statm 353 v = r[i] 354 if a == 'ru_maxrss' and v == 0: 355 v = _get_rss() * resource.getpagesize() / 1024 356 MetricCountEvent.log('resource.%s' % a, v, absolute=True) 357 MetricCountEvent.log('resource.pagesize', resource.getpagesize(), absolute=True) 358 # Measure the reactor delay 359 then = util.now(_reactor) 360 dt = 0.1 361 def cb(): 362 now = util.now(_reactor) 363 delay = (now - then) - dt 364 MetricTimeEvent.log("reactorDelay", delay)
365 _reactor.callLater(dt, cb) 366
367 -class MetricLogObserver(config.ReconfigurableServiceMixin, 368 service.MultiService):
369 _reactor = reactor
370 - def __init__(self):
371 service.MultiService.__init__(self) 372 self.setName('metrics') 373 374 self.enabled = False 375 self.periodic_task = None 376 self.periodic_interval = None 377 self.log_task = None 378 self.log_interval = None 379 380 # Mapping of metric type to handlers for that type 381 self.handlers = {} 382 383 # Register our default handlers 384 self.registerHandler(MetricCountEvent, MetricCountHandler(self)) 385 self.registerHandler(MetricTimeEvent, MetricTimeHandler(self)) 386 self.registerHandler(MetricAlarmEvent, MetricAlarmHandler(self)) 387 388 # Make sure our changes poller is behaving 389 self.getHandler(MetricTimeEvent).addWatcher(PollerWatcher(self)) 390 self.getHandler(MetricCountEvent).addWatcher( 391 AttachedSlavesWatcher(self))
392
393 - def reconfigService(self, new_config):
394 # first, enable or disable 395 if new_config.metrics is None: 396 self.disable() 397 else: 398 self.enable() 399 400 metrics_config = new_config.metrics 401 402 # Start up periodic logging 403 log_interval = metrics_config.get('log_interval', 60) 404 if log_interval != self.log_interval: 405 if self.log_task: 406 self.log_task.stop() 407 self.log_task = None 408 if log_interval: 409 self.log_task = LoopingCall(self.report) 410 self.log_task.clock = self._reactor 411 self.log_task.start(log_interval) 412 413 # same for the periodic task 414 periodic_interval = metrics_config.get('periodic_interval', 10) 415 if periodic_interval != self.periodic_interval: 416 if self.periodic_task: 417 self.periodic_task.stop() 418 self.periodic_task = None 419 if periodic_interval: 420 self.periodic_task = LoopingCall(periodicCheck, 421 self._reactor) 422 self.periodic_task.clock = self._reactor 423 self.periodic_task.start(periodic_interval) 424 425 # upcall 426 return config.ReconfigurableServiceMixin.reconfigService(self, 427 new_config)
428
429 - def stopService(self):
430 self.disable() 431 service.MultiService.stopService(self)
432
433 - def enable(self):
434 if self.enabled: 435 return 436 log.addObserver(self.emit) 437 self.enabled = True
438
439 - def disable(self):
440 if not self.enabled: 441 return 442 443 if self.periodic_task: 444 self.periodic_task.stop() 445 self.periodic_task = None 446 447 if self.log_task: 448 self.log_task.stop() 449 self.log_task = None 450 451 log.removeObserver(self.emit) 452 self.enabled = False
453
454 - def registerHandler(self, interface, handler):
455 old = self.getHandler(interface) 456 self.handlers[interface] = handler 457 return old
458
459 - def getHandler(self, interface):
460 return self.handlers.get(interface)
461
462 - def emit(self, eventDict):
463 # Ignore non-statistic events 464 metric = eventDict.get('metric') 465 if not metric or not isinstance(metric, MetricEvent): 466 return 467 468 if metric.__class__ not in self.handlers: 469 return 470 471 h = self.handlers[metric.__class__] 472 h.handle(eventDict, metric) 473 for w in h.watchers: 474 w.run()
475
476 - def asDict(self):
477 retval = {} 478 for interface, handler in self.handlers.iteritems(): 479 retval.update(handler.asDict()) 480 return retval
481
482 - def report(self):
483 try: 484 for interface, handler in self.handlers.iteritems(): 485 report = handler.report() 486 if not report: 487 continue 488 for line in report.split("\n"): 489 log.msg(line) 490 except: 491 log.err()
492