这是处理一个文件的代码。
import pandas as pd
import numpy as npimport pandas as pd
inputfile = open('dataset1.csv', 'r')
df = pd.read_csv("cleaning/semicleaned1.csv", sep=",",
names["press", "gph", "temp","wspd"])
df = df.drop(['gph'], axis=1)
df.to_csv("cleaning/FINAL.csv", sep=',', index=False)
答案 0 :(得分:1)
要处理cleaning
文件夹中的所有CSV文件,您可以使用glob
来获取列表。我建议根据您的输入文件名创建一个输出文件名,例如:将cleaned_
添加到文件名:
import pandas as pd
import glob
import os
for csv_filename in glob.glob('cleaning/*.csv'):
# Create a suitable output filename based on the input filename
split = list(os.path.split(csv_filename))
name, ext = os.path.splitext(split[-1])
split[-1] = 'cleaned_{}{}'.format(name, ext)
cleaned_filename = os.path.join(*split)
print('{} -> {}'.format(csv_filename, cleaned_filename))
# Process CSV
df = pd.read_csv(csv_filename, names=["press", "gph", "temp", "wspd"])
df = df.drop(['gph'], axis=1)
df.to_csv(cleaned_filename, sep=',', index=False)
请注意,如果您的文件有标题,则需要小心。
您可以使用multiprocessing.Pool()
并行运行:
from multiprocessing import Pool
import pandas as pd
import glob
import os
def clean_csv(csv_filename):
# Create a suitable output filename based on the input filename
split = list(os.path.split(csv_filename))
name, ext = os.path.splitext(split[-1])
split[-1] = 'cleaned_{}{}'.format(name, ext)
cleaned_filename = os.path.join(*split)
print('{} -> {}'.format(csv_filename, cleaned_filename))
# Process CSV
df = pd.read_csv(csv_filename, names=["press", "gph", "temp", "wspd"])
df = df.drop(['gph'], axis=1)
df.to_csv(cleaned_filename, sep=',', index=False)
if __name__ == '__main__':
with Pool(10) as pool:
print(pool.map(clean_csv, glob.glob('cleaning/*.csv')))