我试图从像这样的数据框中制作一系列折线图
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({ 'CITY' : np.random.choice(['PHOENIX','ATLANTA','CHICAGO', 'MIAMI', 'DENVER'], 10000),
'DAY': np.random.choice(['Monday','Tuesday','Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 10000),
'TIME_BIN': np.random.randint(1, 86400, size=10000),
'COUNT': np.random.randint(1, 700, size=10000)})
df['TIME_BIN'] = pd.to_datetime(df['TIME_BIN'], unit='s').dt.round('10min').dt.strftime('%H:%M:%S')
print(df)
CITY COUNT DAY TIME_BIN
0 ATLANTA 270 Wednesday 10:50:00
1 CHICAGO 375 Wednesday 12:20:00
2 MIAMI 490 Thursday 11:30:00
3 MIAMI 571 Sunday 23:30:00
4 DENVER 379 Saturday 07:30:00
... ... ... ... ...
9995 ATLANTA 107 Saturday 21:10:00
9996 DENVER 127 Tuesday 15:00:00
9997 DENVER 330 Friday 06:20:00
9998 PHOENIX 379 Saturday 19:50:00
9999 CHICAGO 628 Saturday 01:30:00
这就是我现在所拥有的:
piv = df.pivot(columns="DAY").plot(x='TIME_BIN', kind="Line", subplots=True)
plt.show()
但是x轴格式混乱,我需要每个城市都是自己的线。我该如何解决这个问题?我认为我需要遍历一周中的每一天而不是尝试在一行中创建一个数组。我没有运气就尝试过seaborn。总而言之,这就是我想要实现的目标:
答案 0 :(得分:2)
我不知道旋转在这里有多大帮助,因为最后你需要将数据分成两次,一次是针对一周中的几天,这些天数应该分成几个子图,再次针对城市,应有自己的彩色线条。在这一点上,我们处于大熊猫可以用它的绘图包装器做的极限。
使用matplotlib可以遍历两个类别,天和城市,只绘制数据。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates
df = pd.DataFrame({
'CITY' : np.random.choice(['PHOENIX','ATLANTA','CHICAGO', 'MIAMI', 'DENVER'], 10000),
'DAY': np.random.choice(['Monday','Tuesday','Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday'], 10000),
'TIME_BIN': np.random.randint(1, 86400, size=10000),
'COUNT': np.random.randint(1, 700, size=10000)})
df['TIME_BIN'] = pd.to_datetime(df['TIME_BIN'], unit='s').dt.round('10min')
days = ['Monday','Tuesday','Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
cities = np.unique(df["CITY"])
fig, axes = plt.subplots(nrows=len(days), figsize=(13,8), sharex=True)
# loop over days (one could use groupby here, but that would lead to days unsorted)
for i, day in enumerate(days):
ddf = df[df["DAY"] == day].sort_values("TIME_BIN")
# loop over cities
for city in cities:
dddf = ddf[ddf["CITY"] == city]
axes[i].plot(dddf["TIME_BIN"], dddf["COUNT"], label=city)
axes[i].margins(x=0)
axes[i].set_title(day)
fmt = matplotlib.dates.DateFormatter("%H:%M")
axes[-1].xaxis.set_major_formatter(fmt)
axes[0].legend(bbox_to_anchor=(1.02,1))
fig.subplots_adjust(left=0.05,bottom=0.05, top=0.95,right=0.85, hspace=0.8)
plt.show()
使用Seaborn FacetGrid可以获得大致相同的效果。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates
import seaborn as sns
df = pd.DataFrame({
'CITY' : np.random.choice(['PHOENIX','ATLANTA','CHICAGO', 'MIAMI', 'DENVER'], 10000),
'DAY': np.random.choice(['Monday','Tuesday','Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday'], 10000),
'TIME_BIN': np.random.randint(1, 86400, size=10000),
'COUNT': np.random.randint(1, 700, size=10000)})
df['TIME_BIN'] = pd.to_datetime(df['TIME_BIN'], unit='s').dt.round('10min')
days = ['Monday','Tuesday','Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
cities = np.unique(df["CITY"])
g = sns.FacetGrid(data=df.sort_values('TIME_BIN'),
row="DAY", row_order=days,
hue="CITY", hue_order=cities, sharex=True, aspect=5)
g.map(plt.plot, "TIME_BIN", "COUNT")
g.add_legend()
g.fig.subplots_adjust(left=0.05,bottom=0.05, top=0.95,hspace=0.8)
fmt = matplotlib.dates.DateFormatter("%H:%M")
g.axes[-1,-1].xaxis.set_major_formatter(fmt)
plt.show()