我正在创建数据帧df_stats
,并希望用每个t for t in t_list
中的变量填充它。运行此df_stats
时不会填充值,但是如果我独立运行行df_stats.append({...
,则如果当前的t
会用值填充一行数据。缺少每个df_stats
t_list中的一行数据来填充t1 in
的地方是什么?
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import glob
#add all data files into large df so all dates are acessible
path = 'C:\Users\data' # use your path
all_files = glob.glob(path + "/*.csv")
li = []
for filename in all_files:
df_data = pd.read_csv(filename, index_col=None, header=0)
li.append(df_data)
df_data = pd.concat(li, axis=0, ignore_index=True)
df_data['datetime'] = pd.to_datetime(df_data['TimeStamp'] )
df = df_data[(df_data['datetime']>= datetime(2017, 11,9, 00,00, 00)) &
(df_data['datetime']< datetime(2017, 11, 9, 23,50, 00))]
##want a time array for all of the datetimes in the df
t_list = df.groupby("datetime").all().index
df_stats = pd.DataFrame(columns = ['t', 'min_ws', 'max_ws', 'mean_ws','stdev_ws',
'TI_var_ws', 'min_power', 'max_power', 'mean_power', 'stdev_pwr', 'TI_var_pwr'])
for t in t_list:
df_t = df[(df['datetime']>=t) & (df['datetime']<t_end)]
#calc min/max for setting scale on images
t = t
min_ws = df['wtc_AcWindSp_mean'].min()
max_ws = df['wtc_AcWindSp_mean'].max()
mean_ws = df['wtc_AcWindSp_mean'].mean()
stdev_ws = df['wtc_AcWindSp_mean'].std()
TI_var_ws = stdev_ws/mean_ws
min_power = df['wtc_ActPower_mean'].min()
max_power = df['wtc_ActPower_mean'].max()
mean_power = df_t['wtc_ActPower_mean'].mean()
stdev_pwr = df_t['wtc_ActPower_mean'].std()
TI_var_pwr = stdev_pwr/mean_power
df_stats.append({'t':t, 'min_ws':min_ws, 'max_ws':max_ws, 'mean_ws':mean_ws,'stdev_ws':stdev_ws,
'TI_var_ws':TI_var_ws, 'min_power':min_power,...'max_power':max_power, 'mean_power': mean_power,
'stdev_pwr':stdev_pwr, 'TI_var_pwr':TI_var_pwr}, ignore_index=True)
答案 0 :(得分:1)
您需要重新分配DataFrame,因为append
总是返回一个新对象:
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.append.html
df_stats = df_stats.append({'t':t, 'min_ws':min_ws, 'max_ws':max_ws, 'mean_ws':mean_ws,'stdev_ws':stdev_ws,
'TI_var_ws':TI_var_ws, 'min_power':min_power,...'max_power':max_power, 'mean_power': mean_power,
'stdev_pwr':stdev_pwr, 'TI_var_pwr':TI_var_pwr}, ignore_index=True)
也就是说,您最好从头开始创建索引,例如:
# Pass an index argument
df_stats = pd.DataFrame(index=range(len(t_list)), columns = ['t', 'min_ws', 'max_ws', 'mean_ws','stdev_ws',
'TI_var_ws', 'min_power', 'max_power', 'mean_power', 'stdev_pwr', 'TI_var_pwr'])
# ...
for i, t in enumerate(t_list):
# ...
df.iloc[i] = {'t':t, 'min_ws':min_ws, 'max_ws':max_ws, 'mean_ws':mean_ws,'stdev_ws':stdev_ws,
'TI_var_ws':TI_var_ws, 'min_power':min_power,...'max_power':max_power, 'mean_power': mean_power,
'stdev_pwr':stdev_pwr, 'TI_var_pwr':TI_var_pwr}