enter image description here enter image description here enter image description here我得到了一些csv文件。文件名分别为filename1.in.csv和filename1.out.csv,filename2.in.csv和filename2.out.csv。这些文件位于文件夹和子文件夹中。我正在尝试分别计算.in.csv文件和.out.csv文件的一些统计值。但是最后,所有这些都需要一行一行地写入一个csv文件(在我的代码中称为OutputFile)。每行都将输入文件的名称作为className连同计算所得的值一起获得。我已经附加了一个csv文件的图像,作为输入(inFile)来计算统计值。我没有得到想要的输出。给出NameError: maxTimeIn, minTimeIn, stdTimeIn, qual1TimeIn, qual2TimeIn, maxLenIn, minLenIn, stdLenIn, qual1LenIn, qua12LenIn, maxTimeOut, minTimeOut, stdTimeOut, qual1TimeOut, qual2TimeOut, maxLenOut, minLenOut, stdLenOut, qual1LenOut, qua12LenOut, className not defined
。
我是Python的新手,所以不确定我的代码是否会按要求提供输出,非常感谢您的帮助。谢谢
import os
import pandas as pd
import csv
startdir= '.'
suffix= '.csv'
for root,dirs, files, in os.walk(startdir):
for name in files:
if not name.endswith(suffix):
continue
inFile = os.path.join(root,name)
data = pd.read_csv(inFile)
base = os.path.basename(inFile)
className = os.path.splitext(base)[0]
if inFile.endswith('.in.csv'):
maxTimeIn = data['frame.time_delta_displayed'].max()
minTimeIn = data['frame.time_delta_displayed'].min()
stdTimeIn = data['frame.time_delta_displayed'].std()
qual1TimeIn = data['frame.time_delta_displayed'].quantile(0.25)
qual2TimeIn = data['frame.time_delta_displayed'].quantile(0.5)
maxLenIn = data['frame.len'].max()
minLenIn = data['frame.len'].min()
stdLenIn = data['frame.len'].std()
qual1LenIn = data['frame.len'].quantile(0.25)
qua12LenIn = data['frame.len'].quantile(0.5)
if inFile.endswith('.out.csv'):
maxTimeOut = data['frame.time_delta_displayed'].max()
minTimeOut = data['frame.time_delta_displayed'].min()
stdTimeOut = data['frame.time_delta_displayed'].std()
qual1TimeOut = data['frame.time_delta_displayed'].quantile(0.25)
qual2TimeOut = data['frame.time_delta_displayed'].quantile(0.5)
maxLenOut = data['frame.len'].max()
minLenOut = data['frame.len'].min()
stdLenOut = data['frame.len'].std()
qual1LenOut = data['frame.len'].quantile(0.25)
qua12LenOut = data['frame.len'].quantile(0.5)
csvData = [['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn', 'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn', 'q2LenIn', 'maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut', 'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut','activity'],
[maxTimeIn, minTimeIn, stdTimeIn, qual1TimeIn, qual2TimeIn, maxLenIn, minLenIn, stdLenIn, qual1LenIn, qua12LenIn, maxTimeOut, minTimeOut, stdTimeOut, qual1TimeOut, qual2TimeOut, maxLenOut, minLenOut, stdLenOut, qual1LenOut, qua12LenOut, className]]
with open('/root/Desktop/OutputFile.csv','w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(csvData)
csvFile.close()
答案 0 :(得分:0)
在使用pathlib
而不是os.path
并重构函数以使用Pandas方法的地方尝试以下代码:
from pathlib import Path
import pandas as pd
def prepare_values(df):
df_columns = ['frame.time_delta_displayed', 'frame.len']
df_values = []
for col in df_columns:
df_values += [
df[col].max(),
df[col].min(),
df[col].std(),
df[col].quantile(0.25),
df[col].quantile(0.5),
]
return df_values
source_dir = Path('stat')
in_data = []
for file in source_dir.glob('**/*.in.csv'):
activity = {'activity': file.stem.split('.')[0]}
df = pd.read_csv(file)
cols = ['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn',
'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn', 'q2LenIn']
values = prepare_values(df)
file_data = {**activity, **dict(zip(cols, values))}
in_data.append(file_data)
out_data = []
for file in source_dir.glob('**/*.out.csv'):
activity = {'activity': file.stem.split('.')[0]}
df = pd.read_csv(file)
cols = ['maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut',
'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut']
values = prepare_values(df)
file_data = {**activity, **dict(zip(cols, values))}
out_data.append(file_data)
in_df = pd.DataFrame(in_data)
out_df = pd.DataFrame(out_data)
all_df = in_df.join(out_df.set_index('activity'), on='activity', how='outer')
all_df.dropna(subset=df_all.columns.tolist()[1:], how='all', inplace=True)
all_df.fillna(0, inplace=True)
all_df.to_csv('all_data.csv', index=False)