启动日志

[root@bjyh-48-86 nagios]# cat nagios.log
[1433471109] Nagios 4.1.0rc1 starting... (PID=5136)
[1433471109] Local time is Fri Jun 05 10:25:09 CST 2015
[1433471109] LOG VERSION: 2.0
[1433471109] qh: Socket '/var/nagios/rw/nagios.qh' successfully initialized
[1433471109] qh: core query handler registered
[1433471109] nerd: Channel hostchecks registered successfully
[1433471109] nerd: Channel servicechecks registered successfully
[1433471109] nerd: Channel opathchecks registered successfully
[1433471109] nerd: Fully initialized and ready to rock!
[1433471109] wproc: Successfully registered manager as @wproc with query handler
[1433471109] wproc: Registry request: name=Core Worker 5141;pid=5141
[1433471109] wproc: Registry request: name=Core Worker 5140;pid=5140
[1433471109] wproc: Registry request: name=Core Worker 5139;pid=5139
[1433471109] wproc: Registry request: name=Core Worker 5138;pid=5138
[1433471109] Warning: Service 'icmp' on host 'bjyh-48-86' has no notification time period defined! # 注意 qh & nerd & wproc

格式

@service command\0

@service 服务名,使用@做为前缀
command 命令参数
\0 结束符,表示命令结束

测试

echo -e '@echo Query handler is working properly!\0' | nc -U /var/nagios/rw/nagios.qh
#!/usr/bin/env python
# -*- encoding: utf-8; py-indent-offset: 4 -*- import socket, sys, os nagios_qh = '/var/nagios/rw/nagios.qh'
message = 'Test Message!' s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
s.connect(nagios_qh) s.sendall('@echo ' + message + '\0')
results = s.recv(len(message)) s.close() print type(results), results
# python test.py
<type 'str'> Test Message!

@core

@core服务,获取和配置nagios process相关信息

@core squeuestats

@core squeuestats\0
查询queue of scheduled jobs的信息
#!/usr/bin/env python
# -*- encoding: utf-8; py-indent-offset: 4 -*- import socket, sys, os nagios_qh = '/var/nagios/rw/nagios.qh' s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) s.connect(nagios_qh)
s.sendall('@core squeuestats\0') result = ''
while True:
b = s.recv(1)
if ord(b) == 0:
break
result += b
s.close() # chr()函数用一个范围在range(256)内的整数作参数,返回一个对应的字符。unichr()跟它一样,只不过返回的是Unicode字符
# ord()函数是chr()函数(对于8位的ASCII字符串)或unichr()函数(对于Unicode对象)的配对函数.
# 以一个字符(长度为1的字符串)作为参数,返回对应的ASCII数值,或者Unicode数值, result = sorted(result.split(";"))
print "\n".join(result) # The code connects to the Nagios socket, sends the @core squeuestats command, and reads the response until the \0 character is sent.
# python test2.py
CHECK_PROGRAM_UPDATE=1
CHECK_REAPER=1
COMMAND_CHECK=0
EXPIRE_COMMENT=0
EXPIRE_DOWNTIME=0
HFRESHNESS_CHECK=0
HOST_CHECK=2
LOG_ROTATION=1
ORPHAN_CHECK=1
PROGRAM_RESTART=0
PROGRAM_SHUTDOWN=0
RESCHEDULE_CHECKS=0
RETENTION_SAVE=1
SCHEDULED_DOWNTIME=0
SERVICE_CHECK=2
SFRESHNESS_CHECK=1
SLEEP=0
SQUEUE_ENTRIES=11
STATUS_SAVE=1
USER_FUNCTION=0

@core loadctl

获取和配置load control settings的值

@core loadctl
@core loadctl setting=value
@core loadctl setting1=value1;setting2=value2;... # The load control settings are Nagios internal settings, we do not recommend that you modify them unless needed.
#!/usr/bin/env python
# -*- encoding: utf-8; py-indent-offset: 4 -*- import socket, sys, os nagios_qh = '/var/nagios/rw/nagios.qh' s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) s.connect(nagios_qh)
s.sendall('@core loadctl\0') result = ''
while True:
b = s.recv(1)
if ord(b) == 0:
break
result += b
s.close() result = sorted(result.split(";"))
print "\n".join(result)
# python test3.py
backoff_change=4723
backoff_limit=2.50
changes=0
jobs_limit=15745
jobs_max=15745
jobs_min=20
jobs_running=0
load=0.00
nofile_limit=65535
nproc_limit=15845
options=0
rampup_change=1180
rampup_limit=0.80

@nerd

# NERD (Nagios Event Radio Dispatcher),允许订阅service or host check results
@nerd list\0
@nerd subscribe <channel>\0
@nerd unsubscribe <channel>\0 # echo -e '@nerd list\0' | nc -U /var/nagios/rw/nagios.qh
hostchecks Host check results
servicechecks Service check results
opathchecks Host and service checks in gource s log format # hostchecks & servicechecks channels 接收主机及服务状态的更新信息,以行结束符\n分隔。
格式如下:
主机 <hostname> from <old_code> -> <new_code>: <description>
服务 <hostname>;<servicename> from <old_code> -> <new_code>: <description> # host exit code:
0 UP
1 DOWN
2 UNREACHABLE # service exit code:
0 OK
1 WARNING
2 CRITICAL
3 UNKNOWN
# output
bjyh-48-81;icmp from 0 -> 0: OK - 192.168.48.81: rta 0.511ms, lost 0%|rta=0.511ms;500.000;500.000;0; pl=0%;80;80;; rtmax=0.695ms;;;; rtmin=0.375ms;;;; bjyh-48-81 from 0 -> 0: OK - 192.168.48.81: rta 0.493ms, lost 0%|rta=0.493ms;500.000;500.000;0; pl=0%;80;80;; rtmax=0.520ms;;;; rtmin=0.464ms;;;; bjyh-48-87;icmp from 0 -> 2: CRITICAL - 192.168.48.87: rta nan, lost 100%|rta=0.000ms;500.000;500.000;0; pl=100%;80;80;; rtmax=0.000ms;;;; rtmin=0.000ms;;;; bjyh-48-87 from 0 -> 1: CRITICAL - 192.168.48.87: rta nan, lost 100%|rta=0.000ms;500.000;500.000;0; pl=100%;80;80;; rtmax=0.000ms;;;; rtmin=0.000ms;;;;
#!/usr/bin/env python
# -*- encoding: utf-8; py-indent-offset: 4 -*- import socket
import re host_status = {'0': 'UP', '1': 'DOWN', '2': 'UNREACHABLE'}
service_status = {'0': 'OK', '1': 'WARNING', '2': 'CRITICAL', '3': 'UNKNOWN'} nagios_qh = '/var/nagios/rw/nagios.qh' s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) s.connect(nagios_qh)
s.sendall('@nerd subscribe hostchecks\0')
s.sendall('@nerd subscribe servicechecks\0') while True:
line = s.recv(1024) m = re.match('(.*?);(.*?) from ([0-9]+) -> ([0-9]+): (.*)$', line)
if m:
print '[ service ] host: %s, service: %s, check_status: %s, info: %s .' % (m.group(1), m.group(2), service_status[m.group(4)], m.group(5))
else:
m = re.match('(.*?) from ([0-9]+) -> ([0-9]+): (.*)$', line)
print '[ host ] host: %s, check_status: %s, info: %s .' % (m.group(1), host_status[m.group(3)], m.group(4)) s.close()
[ service ] host: bjyh-48-87, service: icmp, check_status: CRITICAL, info: CRITICAL - 192.168.48.87: rta nan, lost 100%|rta=0.000ms;500.000;500.000;0; pl=100%;80;80;; rtmax=0.000ms;;;; rtmin=0.000ms;;;;
[ host ] host: bjyh-48-87, check_status: DOWN, info: CRITICAL - 192.168.48.87: Host unreachable @ 192.168.48.86. rta nan, lost 100%|rta=0.000ms;500.000;500.000;0; pl=100%;80;80;; rtmax=0.000ms;;;; rtmin=0.000ms;;;;
[ host ] host: bjyh-48-85, check_status: UP, info: OK - 192.168.48.85: rta 0.501ms, lost 0%|rta=0.501ms;500.000;500.000;0; pl=0%;80;80;; rtmax=0.608ms;;;; rtmin=0.446ms;;;;
[ service ] host: bjyh-48-83, service: icmp, check_status: OK, info: OK - 192.168.48.83: rta 0.449ms, lost 0%|rta=0.449ms;500.000;500.000;0; pl=0%;80;80;; rtmax=0.505ms;;;; rtmin=0.364ms;;;;
05-22 23:35