我具有以下数据框,并且在对数据进行分组后尝试绘制值和误差线
import pandas as pd
from pprint import pprint
def classify_data():
race = ['race1','race1','race1','race1','race2','race2','race2', 'race2']
qualifier = ['last','first','first','first','last','last','first','first']
participant = ['rat','rat','cat','cat','rat','dog','dog','dog']
avgsteps = ['223','321', '445', '165', '216', '127', '122', '214']
measureerror = ['0.02', '0.01', '0.01', '0.05', '0.02', '0.01', '0.01', '0.05']
id = ['a', ' b', 'c', 'd', 'e', 'f', 'g', 'h']
df = pd.DataFrame(
{'race': race,
'qualifier': qualifier,
'participant': participant,
'avgsteps' : avgsteps,
'measureerror': measureerror
}
)
我想将数据分组:
race qualifier participant avgsteps measureerror id
0 race1 first rat 321 0.01 b
1 race1 first cat 445 0.01 c
2 race1 first cat 165 0.05 d
3 race2 first dog 122 0.01 g
4 race2 first dog 214 0.05 h
我打算为每种种族创建单独的剧情。
例如,一个race 1
的图,其ID在x轴上,avgsteps
在
y轴上的measureerror
的误差线。我想根据
participant
类型。
首先,我尝试了
df2 = (df.groupby(['race', 'qualifier'])
)
这将返回一个对象,我不确定如何继续。
编辑:
df = df[df.qualifier.str.contains('first')]
给予
1 race1 first rat 321 0.01 b
2 race1 first cat 445 0.01 c
3 race1 first cat 165 0.05 d
6 race2 first dog 122 0.01 g
7 race2 first dog 214 0.05 h
但是,我不确定如何直接使用groupby
我想问有关如何进行的建议
答案 0 :(得分:0)
我设法不用groupby来找到替代品
import pandas as pd
from pprint import pprint
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
def classify_data():
race = ['race1','race1','race1','race1','race2','race2','race2', 'race2']
qualifier = ['last','first','first','first','last','last','first','first']
participant = ['rat','rat','cat','cat','rat','dog','dog','dog']
avgsteps = ['223','321', '445', '165', '216', '127', '122', '214']
measureerror = ['20', '30', '121', '55', '21', '121', '100', '5']
id = ['a', ' b', 'c', 'd', 'e', 'f', 'g', 'h']
df = pd.DataFrame(
{'race': race,
'qualifier': qualifier,
'participant': participant,
'avgsteps' : avgsteps,
'measureerror': measureerror,
'id' : id
}
)
df = df[df.qualifier.str.contains('first')]
met_sids = df.race.unique()
pprint(met_sids)
colorsIdx = {'rat': 'rgb(215,48,39)', 'cat': 'rgb(215,148,39)', 'dog':'rgb(215,148,20)'}
cols = df['participant'].map(colorsIdx)
for met_sid in met_sids:
temp_df = df[df.race.str.contains(f'{met_sid}')]
pprint(temp_df)
data = [
go.Scatter(
x=temp_df['id'],
y=temp_df['avgsteps'],
error_y=dict(
type='data',
array=temp_df['measureerror'],
visible=True,
),
mode='markers',
marker=dict(size=15, color=cols)
)
]
plot(data, filename='error-bar.html')
classify_data()
唯一的问题是误差线的颜色与数据点的颜色不同。关于如何为值栏和误差线分配相同颜色的任何建议?
我也很高兴知道其他方法