Python多线程/队列生成器和消费者竞争问题

时间:2016-11-16 06:11:41

标签: python multithreading concurrency queue

我有以下python代码,每个线程都在执行以下角色。

制作人 - 生成json文本
Processor-解析json并生成json对象列表
消费者 - 获取列表并持久存储到数据存储

生产者发送json和消费者获取json列表,问题是消息计数从生产者到消费者不匹配。一些消息正在消失之间。我看到生成器和解析器之间的消息计数是同步的。

已解决:我必须在将它们发布到队列/列表后创建新对象。在我之前的代码中,我只是通过清除它们来重新使用列表和dict对象(可能是我在思考:))。这导致对象数据在队列中被清除,因为下游线程还没有消耗它......

以下是工作代码

import threading
import sys, time
import logging
import random
import Queue, json
from datetime import datetime

logging.basicConfig(level=logging.DEBUG,
                    format='(%(threadName)-9s) %(message)s', )

MAX_SIZE = -1
producerq = Queue.Queue(maxsize=MAX_SIZE)
consumerq = Queue.Queue(maxsize=MAX_SIZE)


class ProduceJson(threading.Thread):
    def __init__(self, group=None, target=None, name=None,
                 args=(), kwargs=None, verbose=None, producerq=producerq):
        super(ProduceJson, self).__init__()
        self.target = target
        self.name = name
        self.producerq = producerq
        logging.debug(self.name +" Started")
        return

    def run(self):
        cnt = 1
        try:
            while True:
                #time.sleep(2)
                if not self.producerq.full():
                    timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S6Z")
                    item = json.dumps({
                        'seq': cnt,
                        'timestamp': timestamp
                    })
                    logging.debug(self.name + " Posting " + str(item))
                    self.producerq.put(item)
                    cnt = cnt + 1
                    #logging.info(self.name + ' current count : ' + str(cnt))

        except Exception as e:
            logging.error("ERROR " + self.name + e.message)

        return


class ParseJson(threading.Thread):
    def __init__(self, group=None, target=None, name=None,
                 args=(), kwargs=None, verbose=None, producerq=producerq, consumerq=consumerq):

        super(ParseJson, self).__init__()

        self.target = target
        self.name = name
        self.producerq = producerq
        self.consumerq = consumerq
        logging.debug(self.name +" Started")
        return

    def run(self):
        cnt = 0
        itemlist = []
        try:
            while True:
                #time.sleep(2)
                if not self.producerq.empty():
                    item = json.loads(self.producerq.get())
                    #logging.debug(self.name+" Received "+ str(item))
                    pi = dict()
                    pi['pseq'] = item['seq']
                    pi['ptime'] = item['timestamp']
                    itemlist.append(pi)
                    #
                    if len(itemlist) == 5: # Load lits of 10 elements to queue
                        self.consumerq.put(itemlist)
                        logging.debug(self.name + " Posting " + str(itemlist) )
                        itemlist = []
                    cnt = cnt + 1
                    #logging.debug(self.name + '  current count :  ' + str(cnt))
        except Exception as e:
            logging.error("ERROR " + self.name + e.message)
        return


class ConsumeJsonList(threading.Thread):
    def __init__(self, group=None, target=None, name=None,
                 args=(), kwargs=None, verbose=None, consumerq=consumerq):
        super(ConsumeJsonList, self).__init__()
        self.target = target
        self.name = name
        self.consumerq = consumerq
        logging.debug(self.name +" Started")
        return

    def run(self):
        cnt = 0
        cl = []
        try:
            while True:
                #time.sleep(2)
                if not self.consumerq.empty():
                    cl = self.consumerq.get()
                    cnt = cnt + len(cl)
                    logging.info(" Consumed items  "+  str(cl) )
                    cl = []
                    #else:
                    #    logging.debug(self.name + '****Is EMPTY**')
                    #
                    #logging.info(self.name + ' current count : ' + str(cnt))
        except Exception as e:
            logging.error("ERROR " + self.name + e.message)
        return


if __name__ == '__main__':
    producer1 = ProduceJson(name='PRODUCER1', producerq=producerq)
    #producer2 = ProduceJson(name='PRODUCER1', producerq=producerq)
    parser1 = ParseJson(name='parser1', producerq=producerq, consumerq=consumerq)
    parser2 = ParseJson(name='parser2', producerq=producerq, consumerq=consumerq)
    #parser3 = ParseJson(name='parser2', producerq=producerq, consumerq=consumerq)
    consumer1 = ConsumeJsonList(name='CONSUMER1', consumerq=consumerq)
    #consumer2 = ConsumeJsonList(name='consumer2', consumerq=consumerq)
    #consumer3 = ConsumeJsonList(name='consumer3', consumerq=consumerq)
    ######
    producer1.start()
    parser1.start()
    parser2.start()
    consumer1.start()
    #consumer2.start()
    #consumer3.start()

0 个答案:

没有答案