所以我试图在数据框中添加一列,并使用另一列来计算其值。
import pandas as pd
import pandasql as pdsql
import csv
def filter_by_regular(filename):
turnstile_data = pd.read_csv(filename)
turnstile_data = pd.DataFrame(turnstile_data)
q = "SELECT * FROM turnstile_data WHERE 'DESCn == REGULAR';"
return turnstile_data
turnstile_regular = filter_by_regular('master_file.txt')
turnstile_regular.head()
turnstile_regular.columns
Index([u'C/A', u' UNIT', u' SCP', u' DATEn', u' TIMEn', u' DESCn',
u' ENTRIESn', u' EXITSn'],
dtype='object')
然后,当我尝试访问ENTRIESn列以使用其值添加另一列时,python无法识别它。
import pandas
def get_hourly_entries(df):
df['ENTRIESn_hourly'] = df.ENTRIESn.diff(1)
df.ENTRIESn_hourly.fillna(1, inplace = True)
return df
turnstile_hourly = get_hourly_entries(turnstile_regular)
turnstile_hourly.head()
AttributeError Traceback (most recent call last)
<ipython-input-70-890cc0bc29bd> in <module>()
6 return df
7
----> 8 turnstile_hourly = get_hourly_entries(turnstile_regular)
9 turnstile_hourly.head()
<ipython-input-70-890cc0bc29bd> in get_hourly_entries(df)
2
3 def get_hourly_entries(df):
----> 4 df['ENTRIESn_hourly'] = df.ENTRIESn.diff(1)
5 df.ENTRIESn_hourly.fillna(1, inplace = True)
6 return df
/Users/flmlopes/anaconda3/envs/py2/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
3079 if name in self._info_axis:
3080 return self[name]
-> 3081 return object.__getattribute__(self, name)
3082
3083 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'ENTRIESn'
所以任何人都知道如何解决这个问题?
答案 0 :(得分:2)
这是你的索引:
Index([u'C/A', u' UNIT', u' SCP', u' DATEn', u' TIMEn', u' DESCn',
u' ENTRIESn', u' EXITSn'],
dtype='object')
请注意前导空格:
u' ENTRIESn'
因此,改变:
df['ENTRIESn_hourly'] = df.ENTRIESn.diff(1)
为:
df['ENTRIESn_hourly'] = df[u' ENTRIESn'].diff(1)
或者,首先修复列:
turnstile_regular.columns = [x.strip() for x in turnstile_regular.columns]