Question

这是我的代码，用于连续运行的脚本来检查etc集群状态：

#!/usr/bin/python
# -*- coding: utf-8 -*-
import datetime
import time
from apscheduler.scheduler import Scheduler
import json
import yaml
import requests
import sys
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

user_input=input("Time interval for script to re-run(in seconds): ")
# Start the scheduler
sched = Scheduler()
sched.daemonic = False
sched.start()    

#Function to get the health of the cluster
def getClusterHealth():
    with open('config1.yaml','r') as f:
        doc = yaml.load(f)
    IP = doc["nodes"]
    x = len(IP)
    i = 0
    if(i < x):
        url = 'http://'+IP[i]+':2379/health'
        print url
    try:
        response = requests.get(url)
        data = response.json()
        if data['health']=="true":
            print("Cluster is healthy")
        elif data['health']!="true":
            print ("Cluster is not healthy")
            sendEmail()
    except requests.exceptions.ConnectionError as e:
        print e
    i = i + 1

#Function to get the netrics of the cluster
def getClusterMetrics():
    try:
        response = requests.get(url)
        with open('clusterMetrics.txt','w') as f:
            f.write(response.text)
            f.close()
            print("Cluster Metrics saved in file: clusterMetrics.txt")
    except requests.exceptions.ConnectionError as e:
        print("Cluster Down.")
        sendEmail()

#Function to send emails in case of failures
def sendEmail():
    msg = MIMEText("etcd Cluster Down Sample Mail")
    sender = "etcd Cluster - 10.3.3.3"
    recipients = ["sample@email.com"]
    msg["Subject"] = "etcd Cluster Monitoring Test Multiple ID"
    msg['From'] = sender
    msg['To'] = ", ".join(recipients)
    s = smtplib.SMTP('localhost')
    s.sendmail(sender,recipients,msg.as_string())
    s.quit()

if __name__ == "__main__":
    sched.add_interval_job(lambda: getClusterHealth(),  seconds=user_input)
    sched.add_interval_job(lambda: getClusterMetrics(), seconds=user_input)

运行状况检查必须每30秒执行一次，即脚本每30秒运行一次。但是，我必须具有一个切换值，该值每两分钟检查一次健康状况的变化。如果初始和最终健康状态相同，则在健康的情况下不发送邮件，在不健康的情况下发送邮件。如果最终状态不变，那么即使我必须发送电子邮件，也可以从健康状态切换到不健康状态，反之亦然。即使进行一次切换，也必须发送邮件。

因此，当群集从正常状态变为不正常状态（反之亦然）时，我需要发送电子邮件。该脚本每30秒运行一次，但是我们必须每两分钟检查一次状态。如果初始值和最终值存在切换/更改，请发送电子邮件。如果没有更改，则仅在初始状态和最终状态不健康时发送电子邮件

切换变量以检查状态变化

0 个答案: