#!/bin/bash
date=$(date +"%Y-%m-%d %H:%M:%S")
getpid=$(pgrep nginx | wc -l
if [ "$getpid" > 0 ]
then
echo 'Nginx is Fine, It is Running at' $date
else
echo "Error on Nginx and stoped at" $date
sudo fuser -k 443/tcp
sudo service nginx start
sudo service monit start
sudo monit monitor all
echo "Error on Nginx and stoped at" $date | mailx -s "The Nginx Stop - But it is Fixed" -A /root/nginx_log3.txt mymail.com
fi
exit 0;
我看到几种检查服务的方法:
$(ps ax | grep myName | fgrep -v grep | awk '{ print $1 }')
或
if (( $(ps -ef | grep -v grep | grep $service | wc -l) > 0 ))
以及几种方法...
无数种检查进程是否正在运行的方法,但是问题是,即使我终止了nginx
,使用PS的任何命令总是会带来许多带有pid
值的结果!因为pgrep
在执行nginx停止服务后始终显示0,并且gprep
似乎可以与小于任何pid的值0进行比较,
因为它已经运行,所以总是在velor 0上方显示pid,pgrep
是路径,但是问题是我有一个crontab
每3分钟运行一次此脚本,即使进程正在运行,并且pid大于零,它正在重新启动服务!
我找到了几个博客,这些博客都在教一些脚本,但是没有一个!
我有monit
可以保持服务正常运行,但是有时会失败。
很明显,我不确定如何比较提取的pid与值0的值:
getpid=$(pgrep nginx | wc -l)
if [ "$getpid" > 0 ]
因为该服务始终每3分钟重新启动一次,即使该服务运行的是pid(值大于0)也是如此
非常感谢您的帮助!
答案 0 :(得分:2)
我可能会检查您尝试提供的www服务,而不是检查该服务是否正在运行。看看这些不同的例子:
https=$(nc -z localhost 443)
http=$(nc -z localhost 80)
netstt_cnt=$(netstat -ntlp | grep httpd | wc -l)
http_issues=""
if [[ ! $https ]] || [[ ! $http ]] ;then
http_issues=" -Http/https ports not detected "
fi
if [ $netstt_cnt -ne 2 ] ;then
http_issues="${http_issues} -Netstat not reporting httpd "
fi
# -- if http_host_check is set perform httpd checks
local code_stat=""
if [[ $http_host_check ]] ;then
http_code=$(curl --write-out %{http_code} --silent --output /dev/null $http_host_check)
if [ $http_code -lt 1 ] ;then
http_issues="${http_issues} -Apache NOT serving pages http_code=$http_code. "
elif [ $http_code -gt 399 ] ;then
http_issues="${http_issues} -Apache Error http_code=$http_code on test page ${http_host_check}. "
fi
code_stat=", (http_code=${http_code}) "
fi
# -- php FPM sock, see readonly var $PHP_FPM_SOCK for use set to "" to disable this check.
if [[ $PHP_FPM_SOCK ]] ;then
if ! echo /dev/null | socat UNIX:${PHP_FPM_SOCK} - ;then
http_issues="${http_issues} -php-fpm sock not communicating"
fi
fi
if [[ $http_issues ]] ;then
echo "Error on Nginx and stoped at" $date
sudo service nginx stop
sudo service monit stop
sudo fuser -k 443/tcp
sleep 10
sudo service nginx start
sudo service monit start
sudo monit monitor all
if [[ $http_host_check ]] ;then
http_code=$(curl --write-out %{http_code} --silent --output /dev/null $http_host_check)
if [ $http_code -lt 1 ] ;then
http_issues="${http_issues} -Apache NOT serving pages http_code=$http_code. "
elif [ $http_code -gt 399 ] ;then
http_issues="${http_issues} -Apache Error http_code=$http_code on test page ${http_host_check}. "
fi
sleep 5
code_stat=", (http_code=${http_code}) "
echo "Webserver had a problem, current status is $code_stat" $date | mailx -s "The Nginx stop: $code_stat" -A /root/nginx_log3.txt mymail.com
fi
echo "Current Status : $http_issues"
更新:在此处添加了超级简单的示例:
http_code=$(curl --write-out %{http_code} --silent --output /dev/null http://my_domain.com/)
if [ $http_code -lt 1 ] ;then
echo "${http_issues} -Apache NOT serving pages http_code=$http_code. "
// ... do something here (restart web server)..
fi