同事有个需求,需要监控mongodb数据库中某一个collection的数据增长情况,原因是由于有定时任务不断往数据库里填充数据,如果定时任务挂掉了可以及时重启任务,保证数据的准确性。其实也可以监控定时任务的执行情况,之后再研究。
    跟博客:使用ganglia监控mongodb集群   文章介绍的一样,使用python脚本来扩展,所以也就不多说,直接上传脚本代码
1. 创建mongodb_collection.pyconf脚本(collection根据情况换成表名)

点击(此处)折叠或打开

  1. # mongodb_collection.pyconf
  2. modules {
  3.     module {
  4.         name = "mongodb_EPGInfo_www"    #EPGInfo表示数据库名,www表示collection的名字
  5.         language = "python"
  6.         param total_count{
  7.             value = "/opt/mongodb/bin/mongo --host 10.9.201.190 --port 27017 EPGInfo --quiet --eval 'printjson(db.www.find().count())'"            #语句的作用是获得www表中的记录数
  8.         }
  9.     }
  10. }
  11. collection_group {
  12.     collect_every = 30
  13.     time_threshold = 90
  14.     metric {
  15.         name = "www_count_total"
  16.         title = "Total Count"
  17.     }
  18.     metric {
  19.         name = "www_count_insert"
  20.         title = "Insert per Sec"
  21.     }
  22. }
    具体说明看代码中的注释
2. 创建mongodb_collection.py脚本

点击(此处)折叠或打开

  1. #!/usr/bin/env python
  2. import os
  3. import json
  4. import string
  5. import time
  6. import copy
  7. import re
  8. PARAMS = {
  9.     "total_count" : "/opt/mongodb/bin/mongo --host 10.9.201.190 --port 27017 EPGInfo --quiet --eval 'printjson(db.www.find().count())'"
  10. }
  11. NAME_PREFIX = 'www_'
  12. METRICS = {
  13.     'time' : 0,
  14.     'data' : {}
  15. }
  16. LAST_METRICS = copy.deepcopy(METRICS)
  17. METRICS_CACHE_TTL = 3
  18. def get_metrics():
  19.     """Return all metrics"""
  20.     global METRICS, LAST_METRICS
  21.     if (time.time() - METRICS['time']) > METRICS_CACHE_TTL:
  22.         metrics = {}
  23.     # get raw metric data
  24.     o = os.popen(PARAMS['total_count'])
  25.     # clean up
  26.     metrics_str = ''.join(o.readlines()).strip() # convert to string
  27.     metrics_str = re.sub('\w+\((.*)\)', r"\1", metrics_str) # remove functions
  28.     metrics['count_total']=float(json.loads(metrics_str))     #增加监控字典
  29.     metrics['count_insert']=metrics['count_total']            #增加监控字典
  30.     # update cache
  31.     LAST_METRICS = copy.deepcopy(METRICS)
  32.     METRICS = {
  33.         'time': time.time(),
  34.         'data': metrics
  35.     }
  36.     print(METRICS)
  37.     return [METRICS, LAST_METRICS]
  38. def get_value(name):
  39.     """Return a value for the requested metric"""
  40.     # get metrics
  41.     metrics = get_metrics()[0]
  42.     print(metrics)
  43.     # get value
  44.     name = name[len(NAME_PREFIX):] # remove prefix from name
  45.     try:
  46.         result = metrics['data'][name]
  47.     except StandardError:
  48.         result = 0
  49.     return result
  50. def get_rate(name):
  51.     """Return change over time for the requested metric"""
  52.     # get metrics
  53.     [curr_metrics, last_metrics] = get_metrics()
  54.     # get rate
  55.     name = name[len(NAME_PREFIX):] # remove prefix from name
  56.     try:
  57.         rate = float(curr_metrics['data'][name] - last_metrics['data'][name]) / \
  58.         float(curr_metrics['time'] - last_metrics['time'])
  59.         if rate < 0:
  60.             rate = float(0)
  61.     except StandardError:
  62.         rate = float(0)
  63.     return rate
  64. def get_total_count(name):
  65.     total_count = get_value(name)
  66.     return total_count
  67. def get_count_insert(name):
  68.     count_insert = get_rate(name)
  69.     return count_insert
  70. def metric_init(lparams):
  71.     global PARAMS
  72.     # set parameters
  73.     for key in lparams:
  74.         PARAMS[key] = lparams[key]
  75.     # define descriptors
  76.     time_max = 60
  77.     groups = 'EPGInfo_www'
  78.     descriptors = [
  79.         {
  80.             'name': NAME_PREFIX + 'count_total',
  81.             'call_back': get_total_count,
  82.             'time_max': time_max,
  83.             'value_type': 'float',
  84.             'units': 'Total',
  85.             'slope': 'both',
  86.             'format': '%f',
  87.             'description': 'Total Count',
  88.             'groups': groups
  89.         },
  90.         {
  91.             'name': NAME_PREFIX + 'count_insert',
  92.             'call_back': get_count_insert,
  93.             'time_max': time_max,
  94.             'value_type': 'float',
  95.             'units': 'Inserts/Sec',
  96.             'slope': 'both',
  97.             'format': '%f',
  98.             'description': 'Insert per Sec',
  99.             'groups': groups
  100.         }
  101.     ]
  102.     return descriptors
  103. def metric_cleanup():
  104.     pass
  105. if __name__ == '__main__':
  106.     descriptors = metric_init(PARAMS)
  107.     while True:
  108.         for d in descriptors:
  109.             print (('%s = %s') % (d['name'], d['format'])) % (d['call_back'](d['name']))
  110.         print ''
  111.         time.sleep(METRICS_CACHE_TTL)

    具体说明看代码中的注释

    

10-28 17:28