使用SSH在pathon中创建一个可配置的多线程日志文件累加器(paramiko?)

时间:2014-03-20 14:30:21

标签: python multithreading ssh paramiko

我即将编写代码并认为我先把我的想法放在这里,看是否有人有评论等。

我想在python中创建一个类,用于同时监视(和合并)多个日志文件,这些日志文件将用作自动化测试的一部分。我希望只使用一个单独的线程在每个文件上执行'tail -f'(通过SSH与paramiko,也许)。然后,每隔几秒钟,从每个线程获取stdout并将其合并到一个文件中,并在每行标识源中添加一个后缀。通过这种方式,我可以编写分布式系统的测试并同时监控十几台机器的日志(其中许多机器具有相同的用途并且位于负载均衡器之后,等等)

Startup:
    for machine, logfile in config_list:
        create thread running tail -f on logfile on machine
    create accumulator thread that:
        wakes up each second and 
        gets all config_list stdout and merges it into one in-memory list

Test_API:
    method to get/query data from the in memory accumulator.  
    in memory list would be the only data item needed to be synchronized

所以,我想知道:paramiko是正确的选择吗?有关处理线程的任何警告等(从未在python中使用线程做过任何事情)?想到的任何其他想法?

提前感谢!

随时发布代码段。一旦完成,我将用一个有效的解决方案更新这篇文章。我预计它会很小

刚刚发现: Creating multiple SSH connections at a time using Paramiko


修改

从其他几篇文章来看,我到目前为止还有这个帖子。它只是做尾巴,而不是尾巴-f,没有我需要的轮询。

from someplace import TestLogger
import threading
import paramiko


def start_watching():

    logger = TestLogger().get()
    logs_to_watch = [('somemachine1', '/var/log/foo'),
                     ('somemachine2', '/var/log/bar')]

    threads = []
    for machine, filename in logs_to_watch:
        logger.info(machine)
        logger.info(filename)
        t = threading.Thread(target=workon, args=(machine, filename))
        t.start()
        threads.append(t)

    for t in threads:
        t.join()

    for merge_line in merged_log:
        logger.info(merge_line.dump())

outlock = threading.Lock()
merged_log = []

def workon(host, logfile):
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(host, username='yourusername', allow_agent=True, look_for_keys=True)
    stdin, stdout, stderr = ssh.exec_command('sudo tail ' + logfile)

    stdin.flush()

    with outlock:
        line = stdout.readline()
        while line:
            line = stdout.readline()
            merged_log.append(MergeLogLine(line, host, logfile))


class MergeLogLine():
    def __init__(self, line, host, logfile):
        self._line = line
        self._host = host
        self._logfile = logfile

    def line(self):
        return self._line

    def host(self):
        return self._host

    def logfile(self):
        return self._logfile

    def dump(self):
        return self._line + '(from host = ' + self._host + ', log = ' + self._logfile + ')'

1 个答案:

答案 0 :(得分:1)

事实证明这很难。这是一个工作样本:

示例'客户代码':

import sys
import traceback
import tail_accumulate as ta
import time


def main(argv):

    user = 'cmead'
    logs_to_watch = [('somemachine1', '/var/log/bar/sample.log'),
                     ('somemachine2', '/var/log/foo')]

    tac = ta.TailAccumulateConfig(logs_to_watch, user)

    try:
        ta.start_watching(tac)

        time.sleep(10)

        for merge_line in ta.get_merged_log():
            print merge_line.dump()

    except Exception as e:
        print traceback.format_exc()

    ta.stop()


if __name__ == "__main__":
    main(sys.argv[1:])

尾巴累积包裹:

import threading
import paramiko
import select
import time

threads = []
stopFlag = None


class TailAccumulateConfig():
    def __init__(self, log_list, user):
        self._log_list = log_list
        self._user = user

    def user(self):
        return self._user

    def log_list(self):
        return self._log_list


def start_watching(tail_accumulate_config):
    global stopFlag
    stopFlag = threading.Event()
    for machine, filename in tail_accumulate_config.log_list():
        t = LogListenWorker(stopFlag, machine, filename, tail_accumulate_config.user())
        t.start()
        global threads
        threads.append(t)


def stop():
    global stopFlag
    stopFlag.set()


def get_merged_log():
    with outlock:
        global merged_log
        temp = merged_log[:]
        del merged_log[:]
        return temp

outlock = threading.Lock()
merged_log = []


class LogListenWorker(threading.Thread):
    def __init__(self, event, host, logfile, username):
        threading.Thread.__init__(self)
        self.stopped = event
        self.host = host
        self.logfile = logfile
        self.username = username

    def run(self):
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        ssh.connect(self.host, username=self.username)
        transport = ssh.get_transport()
        channel = transport.open_session()
        channel.exec_command('sudo tail -f ' + self.logfile)

        while not self.stopped.isSet():
            try:
                rl, wl, xl = select.select([channel],[],[],3.0)
                if len(rl) > 0:
                    # Must be stdout
                    line = channel.recv(1024)
                else:
                    time.sleep(1.0)
                    continue

            except Exception as e:
                break
            if line:
                with outlock:
                    sublines = line.split('\n')
                    for subline in sublines:
                        merged_log.append(MergeLogLine(subline, self.host, self.logfile))

        ssh.close()


class MergeLogLine():
    def __init__(self, line, host, logfile):
        self._line = line
        self._host = host
        self._logfile = logfile

    def line(self):
        return self._line

    def host(self):
        return self._host

    def logfile(self):
        return self._logfile

    def dump(self):
        return self._line + ' ---> (from host = ' + self._host + ', log = ' + self._logfile + ')'