我正在开发一个用于创建数据列表的应用程序。实际的数据应该是大数据,这就是为什么我尝试通过使用多处理库在多个进程上分离python进程的原因。但是当我尝试使用多进程调用方法时,进程本身会停止而不显示任何错误。
我在我的环境中使用python 3.6,并且我已经尝试使用itertools和functool的partial来确保正确发送我的参数。但目前没有任何效果。
导入系统 导入操作系统 将numpy导入为np 进口烧瓶 导入matplotlib 导入matplotlib.pyplot作为plt 从mpl_finance导入Candlestick_ohlc 将matplotlib.dates导入为mdates 将熊猫作为pd导入 从io import BytesIO 从matplotlib.font_manager导入FontProperties 从matplotlib.dates导入date2num 导入日期时间 导入日历 导入多处理 从functools导入部分 从contextlib导入contextmanager 从itertools导入产品
def count_defect_by_defName_and_part(dataFrame,target,target2,row,defectName,partName,tempCount):
count = pd.pivot_table(dataFrame,columns=[target,target2],index=row["groupName"],values=row["itemName"],aggfunc="count",dropna=False)[defectName][partName]
count = pd.DataFrame(count)
count["subGroup"]=count.index
count["groupName"]=row["groupName"]
count.rename(columns={partName:"count"}, inplace=True)
tempCount.append(count)
return tempCount
#parameter for testing
filterParam=pd.DataFrame(columns=["filterName","param1","param2"])
filterParam = filterParam.append({"filterName":"成形日","param1":"2018-12-28","param2":"2019-02-12"},ignore_index=True)
dataFrame = csv_file
groupList = ["型番区分","一分値区分","比重区分","揺変性区分","流れ区分","泥温区分","比重区分","肉厚区分","珪曹添加量区分"]
defectName = "キレ"
partName = "トラップ"
target = "欠点"
target2 = "部位名称"
#データ区分マスターから情報を取得
dataGroup = pd.read_csv('../../Dataframe_Creator/config/data_group_master.csv',encoding="ms932")
countList = pd.merge(pd.DataFrame(groupList,columns=["groupName"]),dataGroup, on=["groupName"], how="left", indicator="exist")
rankingList=pd.DataFrame(columns=["groupName","min","max","rift"])
loopFrame = countList.drop_duplicates(subset=["groupName"])
tempCount = []
with multiprocessing.Pool(processes=1) as pool:
results = pool.map(partial(count_defect_by_defName_and_part, dataFrame=dataFrame,target=target,target2=target2,
defectName=defectName,partName=partName,tempCount=tempCount), product(loopFrame, repeat=2))
pool.close() # no more tasks
pool.join() # wrap up current tasks
当我尝试直接运行该过程而不进行多处理时,它运行得很好,并且过程在大约5分钟内完成。