我创建了一个循环,该循环从主目录打开并合并子文件夹中包含的txt文件负载。
该循环需要打开332个文件夹中的124,050个文件。使用强大的Alienware笔记本电脑大约需要30分钟才能完成此工作。有什么办法可以加快这个循环?
start_dirctory='D:/RPi Noise/' # change this
df_result= None
for path, dirs, files in os.walk(start_dirctory):
for file in fnmatch.filter(files, '*.txt'): # find .txt files
full_name=os.path.join(path, file) # make full file path
df_tmp= pd.read_csv(full_name, header=None) # read file to df_tmp
df_tmp['date']=os.path.basename(path) # df.date = filepath
df_tmp['file']=os.path.basename(file) # df.file = file name
df_tmp['line_number']= range(df_tmp.shape[0])
if df_result is None:
df_result= df_tmp
else:
df_result= pd.concat([df_result, df_tmp], axis='index', ignore_index=True)
print(full_name)