Package buildbot :: Package process :: Module metrics
[frames] | no frames]

Source Code for Module buildbot.process.metrics

  1  # This file is part of Buildbot.  Buildbot is free software: you can 
  2  # redistribute it and/or modify it under the terms of the GNU General Public 
  3  # License as published by the Free Software Foundation, version 2. 
  4  # 
  5  # This program is distributed in the hope that it will be useful, but WITHOUT 
  6  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more 
  8  # details. 
  9  # 
 10  # You should have received a copy of the GNU General Public License along with 
 11  # this program; if not, write to the Free Software Foundation, Inc., 51 
 12  # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 13  # 
 14  # Copyright Buildbot Team Members 
 15   
 16  from __future__ import with_statement 
 17  """ 
 18  Buildbot metrics module 
 19   
 20  Keeps track of counts and timings of various internal buildbot 
 21  activities. 
 22   
 23  Basic architecture: 
 24   
 25      MetricEvent.log(...) 
 26            || 
 27            \/ 
 28      MetricLogObserver 
 29            || 
 30            \/ 
 31      MetricHandler 
 32            || 
 33            \/ 
 34      MetricWatcher 
 35  """ 
 36  from collections import deque 
 37   
 38  from twisted.python import log 
 39  from twisted.internet.task import LoopingCall 
 40  from twisted.internet import reactor 
 41  from twisted.application import service 
 42  from buildbot import util, config 
 43  from collections import defaultdict 
 44   
 45  import gc, os, sys 
 46  # Make use of the resource module if we can 
 47  try: 
 48      import resource 
 49      assert resource 
 50  except ImportError: 
 51      resource = None 
52 53 -class MetricEvent(object):
54 @classmethod
55 - def log(cls, *args, **kwargs):
56 log.msg(metric=cls(*args, **kwargs))
57
58 -class MetricCountEvent(MetricEvent):
59 - def __init__(self, counter, count=1, absolute=False):
60 self.counter = counter 61 self.count = count 62 self.absolute = absolute
63
64 -class MetricTimeEvent(MetricEvent):
65 - def __init__(self, timer, elapsed):
66 self.timer = timer 67 self.elapsed = elapsed
68 69 ALARM_OK, ALARM_WARN, ALARM_CRIT = range(3) 70 ALARM_TEXT = ["OK", "WARN", "CRIT"]
71 72 -class MetricAlarmEvent(MetricEvent):
73 - def __init__(self, alarm, msg=None, level=ALARM_OK):
74 self.alarm = alarm 75 self.level = level 76 self.msg = msg
77
78 -def countMethod(counter):
79 def decorator(func): 80 def wrapper(*args, **kwargs): 81 MetricCountEvent.log(counter=counter) 82 return func(*args, **kwargs)
83 return wrapper 84 return decorator 85
86 -class Timer(object):
87 # For testing 88 _reactor = None 89
90 - def __init__(self, name):
91 self.name = name 92 self.started = None
93
94 - def startTimer(self, func):
95 def wrapper(*args, **kwargs): 96 self.start() 97 return func(*args, **kwargs)
98 return wrapper
99
100 - def stopTimer(self, func):
101 def wrapper(*args, **kwargs): 102 try: 103 return func(*args, **kwargs) 104 finally: 105 self.stop()
106 return wrapper 107
108 - def start(self):
109 self.started = util.now(self._reactor)
110
111 - def stop(self):
112 if self.started is not None: 113 elapsed = util.now(self._reactor) - self.started 114 MetricTimeEvent.log(timer=self.name, elapsed=elapsed) 115 self.started = None
116
117 -def timeMethod(name, _reactor=None):
118 def decorator(func): 119 t = Timer(name) 120 t._reactor=_reactor 121 def wrapper(*args, **kwargs): 122 t.start() 123 try: 124 return func(*args, **kwargs) 125 finally: 126 t.stop()
127 return wrapper 128 return decorator 129
130 -class FiniteList(deque):
131 - def __init__(self, maxlen=10):
132 self._maxlen = maxlen 133 deque.__init__(self)
134
135 - def append(self, o):
136 deque.append(self, o) 137 if len(self) > self._maxlen: 138 self.popleft()
139
140 -class AveragingFiniteList(FiniteList):
141 - def __init__(self, maxlen=10):
142 FiniteList.__init__(self, maxlen) 143 self.average = 0
144
145 - def append(self, o):
146 FiniteList.append(self, o) 147 self._calc()
148
149 - def _calc(self):
150 if len(self) == 0: 151 self.average = 0 152 else: 153 self.average = float(sum(self)) / len(self) 154 155 return self.average
156
157 -class MetricHandler(object):
158 - def __init__(self, metrics):
159 self.metrics = metrics 160 self.watchers = [] 161 162 self.reset()
163
164 - def addWatcher(self, watcher):
165 self.watchers.append(watcher)
166
167 - def removeWatcher(self, watcher):
168 self.watchers.remove(watcher)
169 170 # For subclasses to define
171 - def reset(self):
172 raise NotImplementedError
173
174 - def handle(self, eventDict, metric):
175 raise NotImplementedError
176
177 - def get(self, metric):
178 raise NotImplementedError
179
180 - def keys(self):
181 raise NotImplementedError
182
183 - def report(self):
184 raise NotImplementedError
185
186 - def asDict(self):
187 raise NotImplementedError
188
189 -class MetricCountHandler(MetricHandler):
190 _counters = None
191 - def reset(self):
192 self._counters = defaultdict(int)
193
194 - def handle(self, eventDict, metric):
195 if metric.absolute: 196 self._counters[metric.counter] = metric.count 197 else: 198 self._counters[metric.counter] += metric.count
199
200 - def keys(self):
201 return self._counters.keys()
202
203 - def get(self, counter):
204 return self._counters[counter]
205
206 - def report(self):
207 retval = [] 208 for counter in sorted(self.keys()): 209 retval.append("Counter %s: %i" % (counter, self.get(counter))) 210 return "\n".join(retval)
211
212 - def asDict(self):
213 retval = {} 214 for counter in sorted(self.keys()): 215 retval[counter] = self.get(counter) 216 return dict(counters=retval)
217
218 -class MetricTimeHandler(MetricHandler):
219 _timers = None
220 - def reset(self):
221 self._timers = defaultdict(AveragingFiniteList)
222
223 - def handle(self, eventDict, metric):
224 self._timers[metric.timer].append(metric.elapsed)
225
226 - def keys(self):
227 return self._timers.keys()
228
229 - def get(self, timer):
230 return self._timers[timer].average
231
232 - def report(self):
233 retval = [] 234 for timer in sorted(self.keys()): 235 retval.append("Timer %s: %.3g" % (timer, self.get(timer))) 236 return "\n".join(retval)
237
238 - def asDict(self):
239 retval = {} 240 for timer in sorted(self.keys()): 241 retval[timer] = self.get(timer) 242 return dict(timers=retval)
243
244 -class MetricAlarmHandler(MetricHandler):
245 _alarms = None
246 - def reset(self):
247 self._alarms = defaultdict(lambda x: ALARM_OK)
248
249 - def handle(self, eventDict, metric):
250 self._alarms[metric.alarm] = (metric.level, metric.msg)
251
252 - def report(self):
253 retval = [] 254 for alarm, (level, msg) in sorted(self._alarms.items()): 255 if msg: 256 retval.append("%s %s: %s" % (ALARM_TEXT[level], alarm, msg)) 257 else: 258 retval.append("%s %s" % (ALARM_TEXT[level], alarm)) 259 return "\n".join(retval)
260
261 - def asDict(self):
262 retval = {} 263 for alarm, (level, msg) in sorted(self._alarms.items()): 264 retval[alarm] = (ALARM_TEXT[level], msg) 265 return dict(alarms=retval)
266
267 -class PollerWatcher(object):
268 - def __init__(self, metrics):
269 self.metrics = metrics
270
271 - def run(self):
272 # Check if 'BuildMaster.pollDatabaseChanges()' and 273 # 'BuildMaster.pollDatabaseBuildRequests()' are running fast enough 274 h = self.metrics.getHandler(MetricTimeEvent) 275 if not h: 276 log.msg("Couldn't get MetricTimeEvent handler") 277 MetricAlarmEvent.log('PollerWatcher', 278 msg="Coudln't get MetricTimeEvent handler", 279 level=ALARM_WARN) 280 return 281 282 for method in ('BuildMaster.pollDatabaseChanges()', 283 'BuildMaster.pollDatabaseBuildRequests()'): 284 t = h.get(method) 285 master = self.metrics.parent 286 db_poll_interval = master.config.db['db_poll_interval'] 287 288 if db_poll_interval: 289 if t < 0.8 * db_poll_interval: 290 level = ALARM_OK 291 elif t < db_poll_interval: 292 level = ALARM_WARN 293 else: 294 level = ALARM_CRIT 295 MetricAlarmEvent.log(method, level=level)
296
297 -class AttachedSlavesWatcher(object):
298 - def __init__(self, metrics):
299 self.metrics = metrics
300
301 - def run(self):
302 # Check if 'BotMaster.attached_slaves' equals 303 # 'AbstractBuildSlave.attached_slaves' 304 h = self.metrics.getHandler(MetricCountEvent) 305 if not h: 306 log.msg("Couldn't get MetricCountEvent handler") 307 MetricAlarmEvent.log('AttachedSlavesWatcher', 308 msg="Coudln't get MetricCountEvent handler", 309 level=ALARM_WARN) 310 return 311 botmaster_count = h.get('BotMaster.attached_slaves') 312 buildslave_count = h.get('AbstractBuildSlave.attached_slaves') 313 314 # We let these be off by one since they're counted at slightly 315 # different times 316 if abs(botmaster_count - buildslave_count) > 1: 317 level = ALARM_WARN 318 else: 319 level = ALARM_OK 320 321 MetricAlarmEvent.log('attached_slaves', 322 msg='%s %s' % (botmaster_count, buildslave_count), 323 level=level)
324
325 -def _get_rss():
326 if sys.platform == 'linux2': 327 try: 328 with open("/proc/%i/statm" % os.getpid()) as f: 329 return int(f.read().split()[1]) 330 except: 331 return 0 332 return 0
333
334 -def periodicCheck(_reactor=reactor):
335 try: 336 # Measure how much garbage we have 337 garbage_count = len(gc.garbage) 338 MetricCountEvent.log('gc.garbage', garbage_count, absolute=True) 339 if garbage_count == 0: 340 level = ALARM_OK 341 else: 342 level = ALARM_WARN 343 MetricAlarmEvent.log('gc.garbage', level=level) 344 345 if resource: 346 r = resource.getrusage(resource.RUSAGE_SELF) 347 attrs = ['ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_idrss', 348 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 349 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 350 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw'] 351 for i,a in enumerate(attrs): 352 # Linux versions prior to 2.6.32 didn't report this value, but we 353 # can calculate it from /proc/<pid>/statm 354 v = r[i] 355 if a == 'ru_maxrss' and v == 0: 356 v = _get_rss() * resource.getpagesize() / 1024 357 MetricCountEvent.log('resource.%s' % a, v, absolute=True) 358 MetricCountEvent.log('resource.pagesize', resource.getpagesize(), absolute=True) 359 # Measure the reactor delay 360 then = util.now(_reactor) 361 dt = 0.1 362 def cb(): 363 now = util.now(_reactor) 364 delay = (now - then) - dt 365 MetricTimeEvent.log("reactorDelay", delay)
366 _reactor.callLater(dt, cb) 367 except Exception: 368 log.err(None, "while collecting VM metrics") 369
370 -class MetricLogObserver(config.ReconfigurableServiceMixin, 371 service.MultiService):
372 _reactor = reactor
373 - def __init__(self):
374 service.MultiService.__init__(self) 375 self.setName('metrics') 376 377 self.enabled = False 378 self.periodic_task = None 379 self.periodic_interval = None 380 self.log_task = None 381 self.log_interval = None 382 383 # Mapping of metric type to handlers for that type 384 self.handlers = {} 385 386 # Register our default handlers 387 self.registerHandler(MetricCountEvent, MetricCountHandler(self)) 388 self.registerHandler(MetricTimeEvent, MetricTimeHandler(self)) 389 self.registerHandler(MetricAlarmEvent, MetricAlarmHandler(self)) 390 391 # Make sure our changes poller is behaving 392 self.getHandler(MetricTimeEvent).addWatcher(PollerWatcher(self)) 393 self.getHandler(MetricCountEvent).addWatcher( 394 AttachedSlavesWatcher(self))
395
396 - def reconfigService(self, new_config):
397 # first, enable or disable 398 if new_config.metrics is None: 399 self.disable() 400 else: 401 self.enable() 402 403 metrics_config = new_config.metrics 404 405 # Start up periodic logging 406 log_interval = metrics_config.get('log_interval', 60) 407 if log_interval != self.log_interval: 408 if self.log_task: 409 self.log_task.stop() 410 self.log_task = None 411 if log_interval: 412 self.log_task = LoopingCall(self.report) 413 self.log_task.clock = self._reactor 414 self.log_task.start(log_interval) 415 416 # same for the periodic task 417 periodic_interval = metrics_config.get('periodic_interval', 10) 418 if periodic_interval != self.periodic_interval: 419 if self.periodic_task: 420 self.periodic_task.stop() 421 self.periodic_task = None 422 if periodic_interval: 423 self.periodic_task = LoopingCall(periodicCheck, 424 self._reactor) 425 self.periodic_task.clock = self._reactor 426 self.periodic_task.start(periodic_interval) 427 428 # upcall 429 return config.ReconfigurableServiceMixin.reconfigService(self, 430 new_config)
431
432 - def stopService(self):
433 self.disable() 434 service.MultiService.stopService(self)
435
436 - def enable(self):
437 if self.enabled: 438 return 439 log.addObserver(self.emit) 440 self.enabled = True
441
442 - def disable(self):
443 if not self.enabled: 444 return 445 446 if self.periodic_task: 447 self.periodic_task.stop() 448 self.periodic_task = None 449 450 if self.log_task: 451 self.log_task.stop() 452 self.log_task = None 453 454 log.removeObserver(self.emit) 455 self.enabled = False
456
457 - def registerHandler(self, interface, handler):
458 old = self.getHandler(interface) 459 self.handlers[interface] = handler 460 return old
461
462 - def getHandler(self, interface):
463 return self.handlers.get(interface)
464
465 - def emit(self, eventDict):
466 # Ignore non-statistic events 467 metric = eventDict.get('metric') 468 if not metric or not isinstance(metric, MetricEvent): 469 return 470 471 if metric.__class__ not in self.handlers: 472 return 473 474 h = self.handlers[metric.__class__] 475 h.handle(eventDict, metric) 476 for w in h.watchers: 477 w.run()
478
479 - def asDict(self):
480 retval = {} 481 for interface, handler in self.handlers.iteritems(): 482 retval.update(handler.asDict()) 483 return retval
484
485 - def report(self):
486 try: 487 for interface, handler in self.handlers.iteritems(): 488 report = handler.report() 489 if not report: 490 continue 491 for line in report.split("\n"): 492 log.msg(line) 493 except: 494 log.err(None, "generating metric report")
495