我在pandas中比较两个数据框。我已按列拆分了这些数据框,所以每个数据框都有n个pandas系列,我想两个比较这些数据框之间的每个pandas系列。我在python中使用多进程,每个进程比较两个pandas系列和i当一个进程发现这些熊猫系列之间的差异太大时(too_many = 20),我想终止进程池。我使用starmap_async的回调方法,我想获得一个元素来检查该回调方法,但找到了列表。请帮我
def kill_process(result):
if result['too_many_rows'] == true
p.terminate()
def worker(r, ctx):
df_preprod = r['data_frame_pprod']
df_prod = r['data_frame_prod']
series_pprod = [df_preprod[col_name] for col_name in df_preprod.columns]
series_prod = [df_prod[col_name] for col_name in df_prod.columns]
nb_worker = cpu_count()
if (len(series_pprod)) > cpu_count():
if len(series_pprod) > 61:
nb_worker = 60
else:
nb_worker = len(series_pprod)
pool = ctx.Pool(nb_worker)
results = pool.starmap_async(compare_series, zip(series_pprod, series_prod),callback=kill_process).get()
pool.close()
pool.join()
def compare_series(serie_source, serie_dest):
res_diff = dict()
data_frame_diff = pd.DataFrame(columns=['Ligne', 'Cellule', 'Report PreProd', 'Report Prod'])
res_diff['too_many_result_diff'] = False
if serie_source.isnull().all() and serie_dest.isnull().all():
res_diff['df_diff_res'] = data_frame_diff
return res_diff
else:
i = 0
for index, value in enumerate(serie_source):
if i == 20:
res_diff['too_many_result_diff'] = True
print('too many diff')
#print(data_frame_diff)
#raise TooManyDiffBetweenSeries
break
try:
if isinstance(value, numbers.Number):
val_preprod = str(round(value, 2))
else:
val_preprod = value
except:
val_preprod = np.nan
try:
if isinstance(serie_dest[index], numbers.Number):
val_prod = str(round(serie_dest[index], 2))
else:
val_prod = serie_dest[index]
except:
val_prod = np.nan
if str(val_preprod) != str(val_prod):
cell = xl_rowcol_to_cell(index, 0)
row_excel = index + 1
df_temp = pd.DataFrame([[row_excel, cell, val_preprod, val_prod]],
columns=['Ligne', 'Cellule', 'Report PreProd', 'Report Prod'])
data_frame_diff = data_frame_diff.append(df_temp)
i = i + 1
res_diff['df_diff_res'] = data_frame_diff
return res_diff
if __name__ == '__main__':
for r in list_rpt_nb_lignes_sm_than_2000:
logger.info('doc name {}'.format(r['doc_name']))
logger.info('report name {}'.format(r['name']))
logger.info('number {}/Total number {}'.format(i, nb_rpt_nb_lignes_sm_than_2000))
diff_res = worker(r, ctx)
print(len(diff_res['result']))
</pre>`enter code here`
But in the callback i not get one element but a list, i want to check each result return by each process and terminate the pool when the result is expected