我正在定期将30 gb xls文件批量转换为csv。为了减少时间,我应该如何在i5 Windows pc上使用并行处理。 (目前,我需要3个小时才能完成30 GB的转换)。我是新手
import glob
import pandas as pd
excel_files = glob.glob(r'filepath\*.xls')
for excel in excel_files:
out = excel.split('.')[0] +'.csv'
df = pd.read_excel(excel) # if only the first sheet is needed.
df.to_csv(out)
import multiprocessing
import glob
import pandas as pd
excel_files = glob.glob(r'C:\Test\*.xls') # the path where your files are
counter= 0
def multi():
for excel in excel_files:
out = excel.split('.')[0] +'.csv'
df = pd.read_excel(excel) # if only the first sheet is needed.
df.to_csv(out)
return
if __name__ =='__main__':
#freeze_support()
queue = multiprocessing.Queue()
#num_cores=multiprocessing.cpu_count()
#print(num_cores)
pool = multiprocessing.Pool()
result= pool.map(multi,excel_files,8)