如何在Flask中并行化任务?

时间:2016-08-19 08:51:12

标签: python flask parallel-processing ping flask-restful

我正在向我的Flask服务器发送XHR请求,以便在网络上进行多次ping操作

资源

def get(self, site_id):
    …
    for printer in printers:
        hostname = printer['hostname']
        response[site_id][hostname] = network_utils.ping(hostname)

    return response

shell.execute以下我使用subprocess.check_output来运行原生ping

def ping(hostname):
    command = ['ping', '-c', '1', '-W', '1', '-q', hostname]

    response = shell.execute(command)
    return output_parser.ping(response['results'])

输出

{
    "test-site": {
        "avg": 0.093, "max": 0.093, "mdev": 0.0, "min": 0.093,
        "1.1.1.1": { "avg": null, "max": null, "mdev": null, "min": null},
        "1.2.3.4": { "avg": null, "max": null, "mdev": null, "min": null},
        "127.0.0.1": { "avg": 0.061, "max": 0.061, "mdev": 0.0, "min": 0.061}
    }
}

问题

ping是按顺序运行的,这使得请求超级慢(几秒钟,我怎么能加快速度呢?

3 个答案:

答案 0 :(得分:3)

最好的选择是线程,因为您的问题是 I / O绑定。我正在使用Semaphore限制为5个线程。

我发送响应dict到ping dict是线程安全的,但是如果你想到更复杂的东西,你应该阅读this

def get(self, site_id):
    …
    semaphore = threading.Semaphore(5)
    threads = []

    for printer in printers:
        hostname = printer['hostname']
        threads.append(threading.Thread(target=network_utils.ping,
                          args=(semaphore, response, site_id, hostname)))

    # Start and wait to all threads to finish
    map(lambda t: t.start(), threads)
    map(lambda t: t.join(), threads)

    return response

def ping(semaphore, response, site_id, hostname):
    semaphore.acquire()

    command = ['ping', '-c', '1', '-W', '1', '-q', hostname]
    response = shell.execute(command)
    ping_data = output_parser.ping(response['results'])

    response[site_id][hostname] = ping_data

    semaphore.release()

答案 1 :(得分:2)

例如,通过gevent使子流程异步。

from gevent import subprocess
import gevent

def ping(hostname):
    command = ['ping', '-c', '1', '-W', '1', '-q', hostname]
    return subprocess.Popen(command, stdout=subprocess.PIPE)

def get(self, site_id):
    …
    # Start all the pings in parallel, asynchronously
    # Use dict to reference host: ping subprocess
    # as results will come in at different times
    pings = {printer['hostname']: ping(printer['hostname']) 
             for printer in printers}
    # Wait for all of them to complete
    gevent.wait(pings.values())
    for hostname in pings:
        response[site_id][hostname] = output_parser.ping(pings[hostname].stdout.read())
    return response

答案 2 :(得分:1)

Upvote Or Duan answer因为我的答案是基于他的答案:

资源

class Ping(Resource):
    def get(self, site_id):
        site_hostname = mast_utils.list_sites(site_id)['results'][0]['hostname']
        printers = mast_utils.list_printers(site_id)['results']['channels']

        response = network_utils.parellelize(network_utils.ping, site_hostname, printers)
        return response

api.add_resource(Ping, '/ping/<string:site_id>/')

network_utils.py

def ping(hostname):
    command = ['ping', '-q', hostname,
               '-w', '1',
               '-W', '1',
               '-i', '0.2'
               ]

    response = shell.execute(command)

    return output_parser.ping(response['results'])


def collect(task, response, **kwargs):
    hostname = kwargs['hostname']

    response[hostname] = task(**kwargs)


def parellelize(task, site_id, printers, **kwargs):
    response = {}
    kw = kwargs.copy()
    kw.update({'hostname': site_id})
    collect(task, response, **kw)

    printers_response = {}
    threads = []
    for printer in printers:
        hostname = printer['hostname']
        kw = kwargs.copy()
        kw.update({'hostname': hostname})

        threads.append(
            threading.Thread(
                target=collect,
                args=(task, printers_response),
                kwargs=kw
            )
        )

    for thread in threads:
        thread.start()
        thread.join()

    response[site_id].update(printers_response)

    return response

test_network_utils.py

class NetwrokUtilsTestCase(unittest.TestCase):
    def test_ping_is_null_when_host_unreachable(self):
        hostname = 'unreachable'

        response = network_utils.ping(hostname)

        self.assertDictEqual(response, {
            'avg': None,
            'max': None,
            'mdev': None,
            'min': None
        })

    def test_ping_reply_time_when_reachable(self):
        hostname = '127.0.0.1'

        response = network_utils.ping(hostname)

        self.assertGreater(response['avg'], 0)

    def test_ping_with_only_a_site(self):
        site_hostname = 'localhost'
        printers = []
        response = {}

        response = network_utils.parellelize(network_utils.ping, site_hostname, printers)

        self.assertGreater(response[site_hostname]['avg'], 0)

    def test_ping_with_printers(self):
        site_hostname = 'localhost'
        printers = [
            {'hostname': '127.0.0.1', 'port': 22},
            {'hostname': '0.0.0.0', 'port': 22},
        ]

        response = network_utils.parellelize(network_utils.ping, site_hostname, printers)

        self.assertGreater(response[site_hostname]['avg'], 0)
        self.assertGreater(response[site_hostname]['127.0.0.1']['avg'], 0)