我有多个.csv
个文件,我想将它们连接成一个文件。基本上我想选择某些列并将它们并排添加。
我这里的代码不起作用。完全没有错误消息。它什么也没做。
有人知道如何解决它吗?
import pandas as pd
import datetime
import numpy as np
import glob
import csv
import os
def concatenate(indir='/My Documents/Python/Test/in',
outfile='/My Documents/Python/Test/out/Forecast.csv'):
os.chdir(indir)
fileList = glob.glob('*.csv')
print(fileList)
dfList = []
colnames=["DateTime","WindSpeed","Capacity","p0.025","p0.05","p0.1","p0.5","p0.9","p0.95","p0.975","suffix"]
for filename in fileList:
print(filename)
df = pd.read_csv(filename ,delimiter=',',engine = 'python', encoding='latin-1', index_col = False)
dfList.append(df)
concatDF = pd.concat(dfList,axis=0)
concatDF.columns=colnames
concatDF.to_csv(outfile,index=None)
答案 0 :(得分:0)
我运行此代码以在我的文件系统上设置文件
<强> 设置 强>
import pandas as pd
import numpy as np
def setup_test_files(indir='in'):
colnames = [
"WindSpeed", "Capacity",
"p0.025", "p0.05", "p0.1", "p0.5",
"p0.9", "p0.95", "p0.975", "suffix"
]
tidx = pd.date_range('2016-03-31', periods=3, freq='M', name='DateTime')
for filename in ['in/fn_{}.csv'.format(i) for i in range(3)]:
pd.DataFrame(
np.random.rand(3, len(colnames)),
tidx, colnames
).round(2).to_csv(filename)
print(filename)
setup_test_files()
这创建了3个名为['fn_0.csv', 'fn_1.csv', 'fn_2.csv']
的文件
他们看起来像这样
with open('in/fn_0.csv', 'r') as fo:
print(''.join(fo.readlines()))
DateTime,WindSpeed,Capacity,p0.025,p0.05,p0.1,p0.5,p0.9,p0.95,p0.975,suffix
2016-03-31,0.03,0.76,0.62,0.21,0.76,0.36,0.44,0.61,0.23,0.04
2016-04-30,0.39,0.12,0.31,0.99,0.86,0.35,0.15,0.61,0.55,0.03
2016-05-31,0.72,1.0,0.71,0.86,0.41,0.79,0.22,0.76,0.92,0.79
我将定义一个解析器函数和一个单独执行串联的函数。为什么?因为我认为这样更容易。
import pandas as pd
import glob
import os
def read_csv(fn):
colnames = [
"DateTime", "WindSpeed", "Capacity",
"p0.025", "p0.05", "p0.1", "p0.5",
"p0.9", "p0.95", "p0.975", "suffix"
]
df = pd.read_csv(fn, encoding='latin-1')
df.columns = colnames
return df
def concatenate(indir='in', outfile='out/Forecast.csv'):
curdir = os.getcwd()
try:
os.chdir(indir)
file_list = glob.glob('*.csv')
df_names = [fn.replace('.csv', '') for fn in file_list]
concat_df = pd.concat(
[read_csv(fn) for fn in file_list],
axis=1, keys=df_names)
# notice I was nice enough to change directory back :-)
os.chdir(curdir)
concat_df.to_csv(outfile, index=None)
except:
os.chdir(curdir)
然后运行连接
concatenate()
您可以像这样阅读结果
print(pd.read_csv('out/Forecast.csv', header=[0, 1]))
fn_0 \
DateTime WindSpeed Capacity p0.025 p0.05 p0.1 p0.5 p0.9 p0.95 p0.975
0 2016-03-31 0.03 0.76 0.62 0.21 0.76 0.36 0.44 0.61 0.23
1 2016-04-30 0.39 0.12 0.31 0.99 0.86 0.35 0.15 0.61 0.55
2 2016-05-31 0.72 1.00 0.71 0.86 0.41 0.79 0.22 0.76 0.92
... fn_2
... WindSpeed Capacity p0.025 p0.05 p0.1 p0.5 p0.9 p0.95 p0.975 suffix
0 ... 0.80 0.79 0.38 0.94 0.91 0.18 0.27 0.14 0.39 0.91
1 ... 0.60 0.97 0.04 0.69 0.04 0.65 0.94 0.81 0.37 0.22
2 ... 0.78 0.53 0.83 0.93 0.92 0.12 0.15 0.65 0.06 0.11
[3 rows x 33 columns]
注意:
您没有注意将DateTime
作为索引。我想这可能就是你想要的。如果是,请将read_csv
和concatenate
功能更改为此
import pandas as pd
import glob
import os
def read_csv(fn):
colnames = [
"WindSpeed", "Capacity",
"p0.025", "p0.05", "p0.1", "p0.5",
"p0.9", "p0.95", "p0.975", "suffix"
]
# notice extra parameters for specifying index and parsing dates
df = pd.read_csv(fn, index_col=0, parse_dates=[0], encoding='latin-1')
df.index.name = "DateTime"
df.columns = colnames
return df
def concatenate(indir='in', outfile='out/Forecast.csv'):
curdir = os.getcwd()
try:
os.chdir(indir)
file_list = glob.glob('*.csv')
df_names = [fn.replace('.csv', '') for fn in file_list]
concat_df = pd.concat(
[read_csv(fn) for fn in file_list],
axis=1, keys=df_names)
os.chdir(curdir)
concat_df.to_csv(outfile)
except:
os.chdir(curdir)
这是此更改的最终结果,请注意日期将以这种方式对齐
fn_0 \
WindSpeed Capacity p0.025 p0.05 p0.1 p0.5 p0.9 p0.95 p0.975
DateTime
2016-03-31 0.03 0.76 0.62 0.21 0.76 0.36 0.44 0.61 0.23
2016-04-30 0.39 0.12 0.31 0.99 0.86 0.35 0.15 0.61 0.55
2016-05-31 0.72 1.00 0.71 0.86 0.41 0.79 0.22 0.76 0.92
... fn_2 \
suffix ... WindSpeed Capacity p0.025 p0.05 p0.1 p0.5 p0.9
DateTime ...
2016-03-31 0.04 ... 0.80 0.79 0.38 0.94 0.91 0.18 0.27
2016-04-30 0.03 ... 0.60 0.97 0.04 0.69 0.04 0.65 0.94
2016-05-31 0.79 ... 0.78 0.53 0.83 0.93 0.92 0.12 0.15
p0.95 p0.975 suffix
DateTime
2016-03-31 0.14 0.39 0.91
2016-04-30 0.81 0.37 0.22
2016-05-31 0.65 0.06 0.11
[3 rows x 30 columns]