如何在python中的进程之间并行化一个类方法或共享类对象?

时间:2018-05-25 14:28:59

标签: python python-multiprocessing data-sharing multiprocessing-manager

我创建了一个包含其所有实例的类,并且需要并行化实例化过程,但无法解决将类作为类对象共享的问题。是否可以在python 2.7中使用多处理?

OUTPUT_HEADINGS = []
class MyContainer(object):
    """
    """
    instances = []
    children = []

    @classmethod
    def export_to_csv(cls):
        with open(args.output, "w") as output_file:
            f_csv = csv.DictWriter(output_file, fieldnames=OUTPUT_HEADINGS)
            f_csv.writeheader()
            for instance in cls.instances:
                f_csv.writerow(instance.to_dict())

    def __new__(cls, dat_file):
        try:
            tree = ElementTree.parse(dat_file)
            cls.children = tree.findall("parent_element/child_element")
        except ElementTree.ParseError as err:
            logging.exception(err)

        if not cls.children:
            msg = ("{}: No \"parent_element/child_element\""
                   " element found".format(os.path.basename(dat_file)))
            logging.warning(msg)
            cls.children = []
            return False
        else:
            instance = super(MyContainer, cls).__new__(cls, dat_file)
            instance.__init__(dat_file)
            cls.instances.append(instance)
            cls.children = []
            return True

    def __init__(self, dat_file):
        self._name = os.path.basename(dat_file)
        self.attr_value_sum = defaultdict(list)

        var1 = MyContainer.children[0].find("var1")
        var2 = MyContainer.children[0].get("var2")
        cat_name = "{}.{}".format(var1, var2)

        if cat_name not in OUTPUT_HEADINGS:
            OUTPUT_HEADINGS.append(cat_name)
        # processing and summarizing of xml data

    def to_dict(self):
        return output_dict

def main():
    i = 0
    try:
        for f in FILE_LIST:
            i += 1
            print "{}/{}: {} in progress...".format(i, len(FILE_LIST), f)
            print "{}".format("...DONE" if MyContainer(f) else "...SKIPPED")
    except Exception as err:
        logging.exception(err)
    finally:
        MyContainer.export_to_csv()

if __name__ == '__main__':
    FILE_LIST = []
    for d in args.dirs:
        FILE_LIST.extend(get_name_defined_files(dir_path=d,
                                                pattern=args.filename,
                                                recursive=args.recursive))
    main()

我尝试使用multiprocessing.managers.BaseManager为MyContainer类创建代理,但它只能以这种方式创建实例对象。我希望实际上并行化MyContainer(dat_file)调用。

0 个答案:

没有答案