Python多处理程序挂在pool.map

时间:2018-09-19 09:08:27

标签: python multiprocessing

我正在尝试应用python多处理模块来创建多个spss(.sav)文件。

但是,不幸的是,它被绞死了几个小时,我最终不得不预言要停止该过程以终止该过程。

请在下面找到代码段,

import csv
import os
import copy
import sys
import savReaderWriter
from functools import partial

def write_to_savfile(metadata_dict, exported_file):
    sav_file_name = exported_file[:-4] + ".sav"
    with savReaderWriter.SavWriter(savFileName=sav_file_name, varNames=metadata_dict['var_names'],
                                           varTypes=metadata_dict['var_types']
                    , varLabels=metadata_dict['var_labs'], missingValues=metadata_dict['miss_vals'],
                                           valueLabels=metadata_dict['value_labels']
                    , measureLevels=metadata_dict['measure'], columnWidths=metadata_dict['col_width'],
                                           formats=metadata_dict['allFmt']
                    , ioLocale='en_US.UTF-8'
                    , ioUtf8=True) as writer:

        try:
            variable_position = metadata_dict["varPostion"]
            template=[-1.7976931345812589e+208]*(len(variable_position))
            stringVars=[metadata_dict['var_names'].index(k) for k, v in metadata_dict['var_types'].items() if v>0]
            for z in stringVars:
                template[z]=u""
            lastCase="-125485698569"
            tline=copy.copy(template)
            outLines = []
            freshStart=0
            fileIn=open(exported_file,"r")
            for lineToProcess in fileIn:
                lineSplit=lineToProcess[:-1].split("~!#")
                if lineSplit[1]!=lastCase and freshStart==0:
                    outLines.append(tline)
                    tline=copy.copy(template)
                    tline[0]=lineSplit[0]
                    #tline[1]=lineSplit[1]
                lastCase=lineSplit[1]
                tline[variable_position[lineSplit[2]]]=lineSplit[3]
                freshStart=0
            outLines.append(tline)
            outLines.pop(0) # First element is the blank template
            print "Writing data to .zsav file for survey "+str(exported_file)
            for record in outLines:
                writer.writerow(record)
            print "done: ",exported_file

        except Exception as e:
            print e
            raise Exception('Failed to create .sav File' + str(e))
if __name__ == "__main__":
    sorted_file_names = ['1.csv','2.csv','3.csv']
    metadata_dict = {some dictionary used in the above function}
    pool = mp.Pool(processes=mp.cpu_count())
    multi_sav_func = partial(write_to_savfile, copy.deepcopy(metadata_dict))
    pool.map(multi_sav_func, sorted_file_names)
    pool.close()
    pool.join()

有人可以给一个解决方法吗?

一个更新: 当我尝试处理2个文件

sorted_file_names = ['1.csv','2.csv']

一切都是文件,我正在获取spss文件(1.sav,2.sav),但是尝试使用2个以上的文件时,它会卡住。

1 个答案:

答案 0 :(得分:0)

您可能需要将使用多重处理的代码放入其自己的函数中。当多处理在单独的进程中重新导入模块时,它将停止递归启动新池: