进程池执行程序使用映射挂起

时间:2019-05-03 11:38:25

标签: python python-3.x concurrency threadpool

我在使用python 3和 concurrent.futures ProcessPoolExecutor和map函数时遇到问题。

我的代码是这样的:

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import psycopg2
import psycopg2.extensions
import psycopg2.extras
from asq import query
import select
import concurrent.futures
import asyncio

class UpdateElastic:
    def __init__(self):
        conn = psycopg2.connect(
            "dbname=db user=mad password=hat host=blah",
            async_=True
        )
        self.wait(conn)
        cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
        cur.execute("SELECT * FROM table",)
        self.wait(cur.connection)
        self.report_files = cur.fetchall()
        cur.execute("SELECT * FROM othertable",)
        self.wait(cur.connection)
        self.payment_events = cur.fetchall()
        cur.close()
        conn.close()
        self.esconn = Elasticsearch([{'host':'elasticsearch.example.com','port':1234}])
        # pass

    def wait(self, conn):
        while 1:
            state = conn.poll()
            if state == psycopg2.extensions.POLL_OK:
                break
            elif state == psycopg2.extensions.POLL_WRITE:
                select.select([], [conn.fileno()], [])
            elif state == psycopg2.extensions.POLL_READ:
                select.select([conn.fileno()], [], [])
            else:
                raise psycopg2.OperationalError("poll() returned %s" % state)

    def get_es_indices(self):
        indices = self.esconn.indices.get_alias("digital-sales-csv*")
        return list(indices.keys())

    def update_documents(self, index, scroll_id=None):
        print(index)
        # return index
        # documents = _get_es_documents(conn, index)
        # print(documents['_scroll_id'])
        # scroll_id = documents['_scroll_id']
        # for document in documents['hits']['hits']:
        #     ids = {
        #         "report_id": document['_source']['report_id'],
        #         "payment_id": document['_source']['payment_id'],
        #         "document_id": document['_id']
        #     }
        #     asyncio.run(_update_es_document(conn, index, report_files, payment_events, ids))

        # update_documents(index, conn, report_files, payment_events, scroll_id)

def main():
    print('main called')

    print('instantiating UpdateElastic')
    us = UpdateElastic()
    print('UpdateElastic instantiated')

    print('setting up ProcessPoolExecutor')
    blah = ['abc', 'def', 'ghi']
    with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
        print('calling executor.map')
        executor.map(us.update_documents, blah, timeout=10)

if __name__ == "__main__":
    main()

使用此代码,我所期望的只是打印出我传递的数组的值,所以:

  • 'abc'
  • 'def'
  • 'ghi'

但是,在打印后:调用executor.map ,它会挂起。

当我将构造函数更改为:

class UpdateElastic:
        def __init__(self):
            # conn = psycopg2.connect(
            #     "dbname=db user=mad password=hat host=blah",
            #     async_=True
            # )
            # self.wait(conn)
            # cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
            # cur.execute("SELECT * FROM table",)
            # self.wait(cur.connection)
            # self.report_files = cur.fetchall()
            # cur.execute("SELECT * FROM othertable",)
            # self.wait(cur.connection)
            # self.payment_events = cur.fetchall()
            # cur.close()
            # conn.close()
            # self.esconn = Elasticsearch([{'host':'elasticsearch.example.com','port':1234}])
            pass

(在构造函数中仅包含“ pass”),它实际上将按预期方式打印出数组的值。

我正在python 3.7.3上,OSX Mojave 10.14.2。上运行它。

0 个答案:

没有答案