我正在读取多个文件(相同的标题,列等)并执行一些预处理。如何将“ for”循环的输出合并到一个单独的DataFrame中?我尝试使用data.append(df),但它也包含标题列,并将其变成列表,而不是保持数据帧格式
dir = 'C:/PythonProjects/PAMAP2_Dataset/Protocol/'
filelist = ['subject101.dat','subject102.dat','subject103.dat','subject104.dat','subject105.dat','subject106.dat','subject107.dat']
columns = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,20,21,22,23,24,25,26,27,28,29,30,31,32,37,38,39,40,41,42,43,44,45,46,47,48,49]
ID_rows = [3,4,12,13]
data=[]
for file in filelist:
input = dir + file
df = pd.read_csv(input, header=None, delim_whitespace=True)
print('Done reading data file ', input)
df = df[columns]
df = df[df[1].isin(ID_rows)]
df=df.fillna(0)
df = (df - df.mean()) / df.std()
#print(df)
data.append(df)
#print(data)