python的新手,我确实有医疗数据,其中有基于医院的数据。现在,我确实想绘制每个医院的数据,收集的时间段和患者数,但是我也想在图表中包括每个医院的丢失数据数,这样我也可以知道每个月没有输入数据医院。这是我的数据的摘要
data = {'Hosp_name':['Hos1', 'Hos1', 'Hos2', 'Hos2','Hos3','Hos3'], 'Period':['20-Apr', '21-Apr', '20-Apr', '21-Aug','20-Apr','21-Apr'],
'Num_of_patients':[30,'NAN',45,56,'NAN',67]}
df = pd.DataFrame(data)
如何转换此数据,以便可以正确地绘制它并在图形上包括缺失的数据。这就是我尝试过的
import plotly.express as px
fig = px.bar(df, x='Hosp_name', y=['period','Num_of_patients'])
fig.show()
预期输出是每个医院的条形图,显示了收集的期间数据,缺失值数量和患者数量
答案 0 :(得分:0)
这是我的版本,我如何理解以及使用matplotlib会如何
import pandas as pd
data = {'Hosp_name':['Hos1', 'Hos1', 'Hos2', 'Hos2','Hos3','Hos3'], 'Period':['20-Apr', '21-Apr', '20-Apr', '21-Aug','20-Apr','21-Apr'], 'Num_of_patients':[30,'NAN',45,56,'NAN',67]}
df = pd.DataFrame(data).groupby(['Period','Hosp_name'])[['Num_of_patients']].sum()#.reset_index().set_index('Period')
df['Num_of_patients'] = pd.to_numeric(df['Num_of_patients'], errors = 'coerce')
Out[1]:
Num_of_patients
Period Hosp_name
20-Apr Hos1 30.0
Hos2 45.0
Hos3 NaN
21-Apr Hos1 NaN
Hos3 67.0
21-Aug Hos2 56.0
df1 = df.pivot_table(index = 'Period', values = 'Num_of_patients', columns = 'Hosp_name').fillna(0)
df1
Out[2]:
Hosp_name Hos1 Hos2 Hos3
Period
20-Apr 30.0 45.0 0.0
21-Apr 0.0 0.0 67.0
21-Aug 0.0 56.0 0.0
df1 = df1.reset_index()
import matplotlib.pyplot as plt
%matplotlib inline
for i,col in enumerate(df1.columns[1:]):
bars= plt.bar([x+i*0.3 for x in list(df1.index)], df1[col], width = 0.3, label = col)
for bar in bars:
plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() - 5, str(int(bar.get_height())),ha='center', color='w', fontsize=11)
plt.xticks(list(df1.index), df1['Period'])
plt.legend()
data = {'Hosp_name':['Hos1', 'Hos1', 'Hos2', 'Hos2','Hos3','Hos3'],'Period':['20-Apr', '21-Apr', '20-Apr', '21-Aug','20-Apr','21-Apr'], 'Num_of_patients':[30,'NAN',45,56,'NAN',67],'age':[11,21,31,36,26,16], 'patient_visits':[20,30,40,50,60,10]}
# i don't know your case, but i can't imagine the reasin for sum(age), so i evaluate the mean(age)
df = pd.DataFrame(data).groupby(['Period','Hosp_name'])[['Num_of_patients','age','patient_visits']].agg({'Num_of_patients': 'sum', 'patient_visits': 'sum', 'age':'mean'})
df['Num_of_patients'] = pd.to_numeric(df['Num_of_patients'], errors = 'coerce')
df
Out[10]:
Num_of_patients patient_visits age
Period Hosp_name
20-Apr Hos1 30.0 20 11
Hos2 45.0 40 31
Hos3 NaN 60 26
21-Apr Hos1 NaN 30 21
Hos3 67.0 10 16
21-Aug Hos2 56.0 50 36
df1 = df.pivot_table(index = 'Period', values = 'Num_of_patients','patient_visits','age'], columns = 'Hosp_name').fillna(0)
df1 = df1.reset_index()
df1
Out[11]:
Period Num_of_patients age patient_visits
Hosp_name Hos1 Hos2 Hos3 Hos1 Hos2 Hos3 Hos1 Hos2 Hos3
0 20-Apr 30.0 45.0 0.0 11.0 31.0 26.0 20.0 40.0 60.0
1 21-Apr 0.0 0.0 67.0 21.0 0.0 16.0 30.0 0.0 10.0
2 21-Aug 0.0 56.0 0.0 0.0 36.0 0.0 0.0 50.0 0.0
您可以根据需要进行可视化。
import matplotlib.pyplot as plt
%matplotlib inline
level_1 = ['Num_of_patients', 'age', 'patient_visits']
level_2 = ['Hos1', 'Hos2', 'Hos3']
color = ['r','g','b']
fig, ax = plt.subplots(3,1, sharex = True, gridspec_kw={'hspace': 0.3}, figsize=(10,5))
for i, metric in enumerate(level_1):
for j, hosp in enumerate(level_2):
bars = ax[i].bar([x+j*0.2 for x in list(df1.index)], df1[(metric, hosp)], width = 0.2, color = color[j], label = hosp)
for bar in bars:
ax[i].text(bar.get_x() + bar.get_width()/2, bar.get_height() - 8, str(int(bar.get_height())),
ha='center', color='w', fontsize=8)
ax[i].set_title(f'{metric}', loc = 'right')
ax[i].legend()
plt.xticks(list(df1.index), df1['Period'])
import matplotlib.pyplot as plt
%matplotlib inline
level_1 = ['Hos1', 'Hos2', 'Hos3']
level_2 = ['Num_of_patients', 'age', 'patient_visits']
color = ['r','g','b']
fig, ax = plt.subplots(3,1, sharex = True, gridspec_kw={'hspace': 0.3}, figsize=(5,5))
for i, hosp in enumerate(level_1):
for j, metric in enumerate(level_2):
bars = ax[i].bar([x+j*0.2 for x in list(df1.index)], df1[(metric, hosp)], width = 0.2, color = color[j], label = metric)
for bar in bars:
ax[i].text(bar.get_x() + bar.get_width()/2, bar.get_height() - 8, str(int(bar.get_height())),
ha='center', color='w', fontsize=8)
ax[i].set_title(f'{metric}', loc = 'right')
ax[i].legend()
_ = plt.xticks(list(df1.index), df1['Period'])