我正在尝试创建代码,以按小时绘制MTA中的条目数量。我有一个条目的csv数据集及其发生的时间。我已将数据集简化为仅包含条目的Pandas数据框,并为“小时”添加了一列以显示条目发生的时间。
但是,在绘制时,我不断收到“ ValueError:系列的真值不明确。请使用a.empty,a.bool(),a.item(),a.any()或a.all ()。”我不清楚这是什么意思,为什么会这样。
我尝试将“小时”列调整为日期时间;我尝试使用索引而不是数据框中的“小时”列。
from ggplot import *
import pandas as pd
turnstile_weather = pd.read_csv('/home/pi/Documents/Data analysis/turnstile_data_master_with_weather.csv')
def plot_weather_data(turnstile_weather):
entries_by_hour = pd.DataFrame(turnstile_weather['ENTRIESn_hourly'] \
.groupby(turnstile_weather['Hour']).sum())
entries_by_hour['Hour'] = set(turnstile_weather['Hour'])
plot = ggplot(entries_by_hour, \
aes(entries_by_hour['Hour'], entries_by_hour['ENTRIESn_hourly'])) \
+ geom_line()
print(plot)
plot_weather_data(turnstile_weather)
我希望得到一个折线图,小时为X轴,按小时数为Y轴,但是我得到一个错误:
ValueError Traceback (most recent call last)
<ipython-input-9-3cf39740bb64> in <module>
10 print(plot)
11
---> 12 plot_weather_data(turnstile_weather)
<ipython-input-9-3cf39740bb64> in plot_weather_data(turnstile_weather)
7 entries_by_hour = pd.DataFrame(turnstile_weather['ENTRIESn_hourly'].groupby(turnstile_weather['Hour']).sum())
8 entries_by_hour['Hour'] = set(turnstile_weather['Hour'])
----> 9 plot = ggplot(entries_by_hour, aes(entries_by_hour['Hour'], entries_by_hour['ENTRIESn_hourly'])) + geom_line()
10 print(plot)
11
/usr/local/lib/python3.5/dist-packages/ggplot/ggplot.py in __init__(self, aesthetics, data)
53 self._aes = aesthetics
54 self.data = data.copy()
---> 55 self._handle_index()
56 self.data = self._aes._evaluate_expressions(self.data)
57 self.data = self._aes.handle_identity_values(self.data)
/usr/local/lib/python3.5/dist-packages/ggplot/ggplot.py in _handle_index(self)
132
133 def _handle_index(self):
--> 134 if '__index__' in self._aes.values():
135 self.data['__index__'] = self.data.index
136
/usr/lib/python3.5/_collections_abc.py in __contains__(self, value)
688 def __contains__(self, value):
689 for key in self._mapping:
--> 690 if value == self._mapping[key]:
691 return True
692 return False
~/.local/lib/python3.5/site-packages/pandas/core/generic.py in __nonzero__(self)
1476 raise ValueError("The truth value of a {0} is ambiguous. "
1477 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
-> 1478 .format(self.__class__.__name__))
1479
1480 __bool__ = __nonzero__
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().