1蹋订、前言
在某些特定的場景中橄教,我們可能需要使用nagios來監(jiān)控某些特定的進程的運行狀態(tài),一旦出現(xiàn)異常就觸發(fā)報警郵件浩村,以便運維人員及時排查解決問題做葵。
參考文章:
編譯安裝nagios:http://www.reibang.com/p/22cb1ad26117
npre監(jiān)控Linux主機:http://www.reibang.com/p/bc04a9980edc
2、配置被監(jiān)控主機的監(jiān)控腳本
通常來說在被監(jiān)控主機上心墅,我們都會安裝相應(yīng)的nrpe插件來實現(xiàn)nagios監(jiān)控酿矢。
因此首先我們需要在nrpe的配置文件中添加自定義的監(jiān)控命令配置
#在文件末尾添加下述配置
[root@web ~]# vim /usr/local/nagios/etc/nrpe.cfg
command[check_nginx]=/usr/local/nagios/libexec/check_nginx #設(shè)置check_nginx監(jiān)控命令所對應(yīng)的監(jiān)控腳本
然后創(chuàng)建相應(yīng)的監(jiān)控腳本文件:
[root@web ~]# vim /usr/local/nagios/libexec/check_nginx
#!/bin/sh
NPD=`ps aux|grep 'nginx master' | wc -l`
if [ $NPD == 1 ];then
echo "Nginx process running ok."
exit 0
else
echo "Nginx process is down."
exit 2
fi
[root@web ~]# chown nagios.nagios /usr/local/nagios/libexec/check_nginx
[root@web ~]# chmod +x /usr/local/nagios/libexec/check_nginx
最后重啟被監(jiān)控主機的nrpe進程:
[root@web ~]# killall -9 nrpe
[root@web ~]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
[root@web ~]# ps aux | grep nrpe
nagios 26761 0.1 0.1 39272 1460 ? Ss 11:42 0:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
root 26763 0.0 0.0 103320 880 pts/0 S+ 11:43 0:00 grep nrpe
3、編輯監(jiān)控主機中相應(yīng)的主機配置文件
接著我們需要到監(jiān)控主機上編輯配置相應(yīng)的被監(jiān)控主機配置文件:
[root@nagios objects]# vim /usr/local/nagios/etc/objects/newlinux.cfg #編輯相應(yīng)的被監(jiān)控主機的配置文件
define host{ #定義遠程主機
use linux-server
host_name newlinux
alias Linux Server
address 10.10.10.8
}
define hostgroup{ #定義主機組怎燥,可以將多個具有相同關(guān)系的主機添加到同一個主機組
hostgroup_name nginx-servers
alias The servers who have nginx service
members newlinux
}
define service{
use generic-service
hostgroup_name nginx-servers
service_description check_nginx
check_command check_nrpe!check_nginx #設(shè)置檢查命令為此前設(shè)置的check_nginx瘫筐,nagios監(jiān)控主機會將此命令傳遞給被監(jiān)控主機的npre進程,然后執(zhí)行該命令對應(yīng)的腳本后刺覆,將腳本的返回值反饋給監(jiān)控主機严肪。
check_interval 0.5
retry_check_interval 0.1
notification_interval 1
contact_groups OP-group #指定郵件報警組
}
[root@nagios objects]# chown nagios:nagios /usr/local/nagios/etc/objects/newlinux.cfg
[root@nagios objects]# chmod 755 /usr/local/nagios/etc/objects/newlinux.cfg
定義郵件報警的相關(guān)聯(lián)系人和組:
[root@nagios ~]# vim /usr/local/nagios/etc/objects/contacts.cfg
define contact{
contact_name charles
use generic-contact
alias ops-IT
email charles@xxxxxxxx.com.cn
}
define contactgroup{
contactgroup_name OP-group
alias OP
members charles
}
最后重啟nagios服務(wù)即可:
[root@nagios objects]# service nagios restart