续:
推荐阅读:
Zabbix-(10)-Zabbix-Nginx状态监控
Zabbix-(11)-Zabbix-SNMP监控和WEB监控
Zabbix-(12)-Zabbix-MySQL监控Ⅰ
环境:
故障自治愈功能实际上是多种功能的集合,是 zabbix 内的多个模块之间交互提供的功能,当 zabbix 监控到指定的监控项异常的时候,通过指定的操作使故障自动恢复,通常是重启服务等一些简单的操作,也可以调用脚本执行比较复杂的操作。
具体的操作就是:
设置监控项和触发器 --> 新建动作 --> 在触发条件里面添加操作 --> 在远程 主机通过 zabbix 客户端执行命令或脚本
大概的步骤如下:
1.开启zabbix sudu权限
2.配置允许允许特殊字符
3.配置远程命令
4.验证和测试
root@Mysql-slave:~# grep "^[a-Z]" /etc/zabbix/zabbix_agentd.conf
PidFile=/var/run/zabbix/zabbix_agentd.pid
LogFile=/var/log/zabbix/zabbix_agentd.log
LogFileSize=0
EnableRemoteCommands=1
LogRemoteCommands=1
Server=172.20.32.102,172.20.32.101
ListenPort=10050
ListenIP=0.0.0.0
StartAgents=5
ServerActive=172.20.32.102
Hostname=172.20.32.105
AllowRoot=1
User=root
Include=/etc/zabbix/zabbix_agentd.d/*.conf
UnsafeUserParameters=1
root@Mysql-slave:~# systemctl restart zabbix-agent.service
root@Mysql-slave:~# vim /etc/sudoers
#
# This file MUST be edited with the 'visudo' command as root.
#
# Please consider adding local content in /etc/sudoers.d/ instead of
# directly modifying this file.
#
# See the man page for details on how to write a sudoers file.
#
Defaults env_reset
Defaults mail_badpass
Defaults secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin"
# Host alias specification
# User alias specification
# Cmnd alias specification
# User privilege specification
root ALL=(ALL:ALL) ALL
zabbix ALL = NOPASSWD: ALL #增加zabbix用户sudu权限
#zabbix ALL = NOPASSWD: ALL
# Members of the admin group may gain root privileges
%admin ALL=(ALL) ALL
# Allow members of group sudo to execute any command
%sudo ALL=(ALL:ALL) ALL
# See sudoers(5) for more information on "#include" directives:
#includedir /etc/sudoers.d
root@Mysql-slave:~# vim /lib/systemd/system/zabbix-agent.service
[Unit]
Description=Zabbix Agent
After=syslog.target
After=network.target
[Service]
EnvirOnment="COnFFILE=/etc/zabbix/zabbix_agentd.conf"
EnvirOnmentFile=-/etc/default/zabbix-agent
Type=forking
Restart=on-failure
PIDFile=/run/zabbix/zabbix_agentd.pid
KillMode=control-group
ExecStart=/usr/sbin/zabbix_agentd -c $CONFFILE
ExecStop=/bin/kill -SIGTERM $MAINPID
RestartSec=10s
User=root
Group=root
[Install]
WantedBy=multi-user.target
root@Mysql-slave:~# systemctl daemon-reload
root@Mysql-slave:~# systemctl restart zabbix-agent.service
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# pwd
/etc/zabbix/zabbix_agentd.d
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# vim nginx_status.sh
/usr/bin/curl "http://127.0.0.1:"$NGINX_PORT"/nginx_status/" 2>/dev/null| awk NR==3 | awk '{print $2}'
}
nginx_requests(){
/usr/bin/curl "http://127.0.0.1:"$NGINX_PORT"/nginx_status/" 2>/dev/null| awk NR==3 | awk '{print $3}'
}
case $NGINX_COMMAND in
active)
nginx_active;
;;
reading)
nginx_reading;
;;
writing)
nginx_writing;
;;
waiting)
nginx_waiting;
;;
accepts)
nginx_accepts;
;;
handled)
nginx_handled;
;;
requests)
nginx_requests;
esac
}
main(){ #主函数内容
case $1 in #分支结构,用于判断用户的输入而进行响应的操作
nginx_status) #当输入nginx_status就调用nginx_status_fun,并传递第二和第三个参数
nginx_status_fun $2 $3;
;;
*) #其他的输入打印帮助信息
echo $"Usage: $0 {nginx_status + PORT + key}"
esac #分支结束符
}
main $1 $2 $3
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# chown a+x nginx_status.sh
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# bash nginx_status.sh nginx_status 80 active
1
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# bash nginx_status.sh nginx_status 80 reading
0
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# bash nginx_status.sh nginx_status 80 requests
29952
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# vim nginx_status1.sh
#!/bin/bash
#
# Edited on 20200501 by likai.tech
if [[ $# -eq 0 ]]; then
echo "Usage: `basename $0` get_nginx_status.sh STATS"
fi
save_status(){
/usr/bin/curl http://172.20.32.105/nginx_status 2> /dev/null > /tmp/nginx_status.log
}
get_status(){
case $1 in
active)
active_cOnns=`/bin/cat /tmp/nginx_status.log | grep -i "active" | awk '{print $3}'`;
echo $active_conns;
;;
accepts)
accepts_cOnns=`/bin/cat /tmp/nginx_status.log | grep "^ [0-9]" | awk '{print $1}'`;
echo $accepts_conns;
;;
handled)
handled_cOnns=`/bin/cat /tmp/nginx_status.log | grep "^ [0-9]" | awk '{print $2}'`;
echo $handled_conns;
;;
requests)
requests_cOnns=`/bin/cat /tmp/nginx_status.log | grep "^ [0-9]" | awk '{print $3}'`;
echo $requests_conns;
;;
reading)
reading_cOnns=`/bin/cat /tmp/nginx_status.log | tail -n1 | awk '{print $2}'`;
echo $reading_conns;
;;
writing)
writing_cOnns=`/bin/cat /tmp/nginx_status.log | tail -n1 | awk '{print $4}'`;
echo $writing_conns;
;;
waiting)
waiting_cOnns=`/bin/cat /tmp/nginx_status.log | tail -n1 | awk '{print $6}'`;
echo $waiting_conns;
esac
}
main(){
save_status
get_status $1
}
main $1
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# chown a+x nginx_status1.sh
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# bash nginx_status1.sh active
1
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# bash nginx_status1.sh reading
0
root@Mysql-slave:/etc/zabbix/zabbix_agentd.d# bash nginx_status1.sh requests
30168
添加自定义监控项再配置文件中。
### Option: UserParameter
# User-defined parameter to monitor. There can be several user-defined parameters.
# Format: UserParameter=
# See 'zabbix_agentd' directory for examples.
#
# Mandatory: no
# Default:
# UserParameter=
UserParameter=nginx_status[*],bin/bash /etc/zabbix/zabbix_agentd.d/nginx_status1.sh "$1"
UserParameter=nginx.status[*],bin/bash /etc/zabbix/zabbix_agentd.d/nginx_status.sh "$1" "$2" "$3"
root@Mysql-slave:~# systemctl restart zabbix-agent.service
导入以及准备好的nginx-temlapte-nginx模板
键值:net.tcp.listen[80]
root@Zabbix-server:~# /apps/zabbix_server/bin/zabbix_get -s 172.20.32.105 -p10050 -k "net.tcp.listen[80]"
1
将被测试的服务手动停止运行,验证能否自动启动或重启,更多操作可以远程执行脚本。
如下:手动将 Nginx、Tomcat 等 web 服务停止后,验证 zabbix agent 能否自动启动或重启。
root@Mysql-slave:~# /apps/nginx/sbin/nginx -s stop
在 zabbix server主机上测试:
root@Zabbix-server:~# /apps/zabbix_server/bin/zabbix_get -s 172.20.32.105 -p10050 -k "net.tcp.listen[80]"
0
root@Zabbix-server:~# /apps/zabbix_server/bin/zabbix_get -s 172.20.32.105 -p10050 -k "net.tcp.listen[80]"
0
root@Zabbix-server:~# /apps/zabbix_server/bin/zabbix_get -s 172.20.32.105 -p10050 -k "net.tcp.listen[80]"
1
root@Zabbix-server:~# /apps/zabbix_server/bin/zabbix_get -s 172.20.32.105 -p10050 -k "net.tcp.listen[80]"
1
#可以看到当nginx停止之后获取到的数值为0 ,过了一段时间之后变成了1 成功自己启动nginx服务
推荐阅读:
Zabbix-(10)-Zabbix-Nginx状态监控
Zabbix-(11)-Zabbix-SNMP监控和WEB监控
Zabbix-(12)-Zabbix-MySQL监控Ⅰ