我有一系列简单的用户事件,其中包括登录和注销,以及几个活动之间的执行。我尝试了各种类型的图表和种类,但没有运气。
我想以某种颜色表示登录和注销之间的时间,因此它代表一项活动。在Y轴上,活动应该表示为一个星。
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from collections import namedtuple
Event = namedtuple('Event',('event_time', 'event_type'))
# event_type = 1 -> Login, 2 -> watch , 3 -> buy , 4 -> like , 5 -> Logout
user_events = [ Event(event_time=datetime.strptime('2018-04-20 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
Event(event_time=datetime.strptime('2018-04-20 12:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
Event(event_time=datetime.strptime('2018-04-20 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=3),
Event(event_time=datetime.strptime('2018-04-20 16:00:00', "%Y-%m-%d %H:%M:%S"), event_type=4),
Event(event_time=datetime.strptime('2018-04-20 19:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5),
Event(event_time=datetime.strptime('2018-04-21 07:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
Event(event_time=datetime.strptime('2018-04-21 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
Event(event_time=datetime.strptime('2018-04-21 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5)
]
df = pd.DataFrame(user_events, columns=['event_time','event_type'])
df = df.set_index('event_time')
df['event_type'].plot()
我正试图绕过这条线。任何建议表示赞赏。
基于ALollz解决方案的完整工作解决方案
import pandas as pd
# Used for the legend
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
from datetime import datetime
from collections import namedtuple
Event = namedtuple('Event',('event_time', 'event_type'))
# event_type = 1 -> Login, 2 -> watch , 3 -> buy , 4 -> like , 5 -> Logout
user_events = [ Event(event_time=datetime.strptime('2018-04-20 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
Event(event_time=datetime.strptime('2018-04-20 12:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
Event(event_time=datetime.strptime('2018-04-20 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=3),
Event(event_time=datetime.strptime('2018-04-20 16:00:00', "%Y-%m-%d %H:%M:%S"), event_type=4),
Event(event_time=datetime.strptime('2018-04-20 19:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5),
Event(event_time=datetime.strptime('2018-04-21 07:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
Event(event_time=datetime.strptime('2018-04-21 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
Event(event_time=datetime.strptime('2018-04-21 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5)
]
df = pd.DataFrame(user_events, columns=['event_time','event_type'])
df = df.set_index('event_time')
# Set a margin for the time on the plot
tmargin = pd.Timedelta(hours=2)
# Set up the figure and the plotting region
_ = plt.figure(figsize=(12,8))
_ = plt.xlabel(df.index.name)
_ = plt.xlim(df.index.min()-tmargin, df.index.max()+tmargin)
# Plot the points with scatter, using the colors you can define for each event type
cdict = {1: 'green', 2: 'maroon', 3: 'yellow', 4: 'cyan', 5: 'red'}
ldict = {1: '1 - Login', 2: '2 - Watch', 3: '3 - Buy', 4: '4 - Like', 5: '5 - Logout'}
_ = plt.scatter(df.index, df.event_type, marker='*', zorder=2, s=100,
c=[cdict[value] for value in df.event_type.values])
_ = plt.xticks(rotation=90)
# make default background red for inactive regions
_ = plt.axvspan(df.index.min()-tmargin, df.index.max()+tmargin, color='pink', alpha=0.5)
# make the active regions blue
for region in (zip(df[df.event_type==1].index, df[df.event_type==5].index)):
_ = plt.axvspan(region[0], region[1], color='white', alpha=1, lw=0)
_ = plt.axvspan(region[0], region[1], color='black', alpha=0.2, lw=0)
# Set a margin for the event type
_ = plt.margins(.02)
# Create the legend for the points
labels=[]
for item in df.event_type.unique():
labels.append(mlines.Line2D([], [], color=cdict[item], marker='*',
markersize=15, label=ldict[item], lw=0))
# Add the session information to the legend
labels.append(mpatches.Patch(color='pink', alpha=0.5, label='Inactive Session'))
labels.append(mpatches.Patch(color='grey', alpha=0.2, label='Active Session'))
_ = plt.legend(bbox_to_anchor=(1,1), handles=labels, fontsize=20)
# Display the plot
_ = plt.show()
答案 0 :(得分:2)
我发现像这样复杂的事情,你总是要回到matplotlib
。这是我的评论代码,因此您可以看到每个部分的功能。
import pandas as pd
# Used for the legend
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
# Set a margin for the time on the plot
tmargin = pd.Timedelta(hours=2)
# Set up the figure and the plotting region
_ = plt.figure(figsize=(12,8))
_ = plt.xlabel(df.index.name)
_ = plt.xlim(df.index.min()-tmargin, df.index.max()+tmargin)
# Plot the points with scatter, using the colors you can define for each event type
cdict = {1: 'green', 2: 'maroon', 3: 'yellow', 4: 'cyan', 5: 'red'}
ldict = {1: '1 - Login', 2: '2 - Watch', 3: '3 - Buy', 4: '4 - Like', 5: '5 - Logout'}
_ = plt.scatter(df.index, df.event_type, marker='*', zorder=2, s=400,
c=[cdict[value] for value in df.event_type.values])
_ = plt.xticks(rotation=30)
# make default background red for inactive regions
_ = plt.axvspan(df.index.min()-tmargin, df.index.max()+tmargin, color='r', alpha=0.5)
# make the active regions blue
for region in (zip(df[df.event_type==1].index, df[df.event_type==5].index)):
_ = plt.axvspan(region[0], region[1], color='white', alpha=1, lw=0)
_ = plt.axvspan(region[0], region[1], color='b', alpha=0.2, lw=0)
# Set a margin for the event type
_ = plt.margins(.02)
# Create the legend for the points
labels=[]
for item in df.event_type.unique():
labels.append(mlines.Line2D([], [], color=cdict[item], marker='*',
markersize=15, label=ldict[item], lw=0))
# Add the session information to the legend
labels.append(mpatches.Patch(color='red', alpha=0.5, label='Inactive Session'))
labels.append(mpatches.Patch(color='blue', alpha=0.2, label='Active Session'))
_ = plt.legend(bbox_to_anchor=(1,1), handles=labels, fontsize=20)
# Display the plot
_ = plt.show()
如果你想在此之前添加一些虚线。 (如果df
没有太多行,那就没问了。
for index, (event_type, event_duration) in df.iterrows():
_ = plt.hlines(y=event_type, xmin=index, xmax=index+event_duration, linestyle='--',
color=cdict[event_type], lw=3)
由于你希望星星在这段时间结束时,你应该更改代码中的其他一些东西以包括持续时间
_ = plt.scatter(df.index+df.event_duration.values, df.event_type, marker='*', zorder=2, s=100,
c=[cdict[value] for value in df.event_type.values])