我正在寻找一种计算因素之间得分变化的方法(例如,治疗前后的问卷得分)。我想弄清楚参与者的百分比提高了,而事前和事后之间没有改善。
我看过一些dplyr
解决方案,但我认为我缺少其中的一行代码,但不确定。
ID<-c("aaa","bbb","ccc","ddd","eee","fff", "ggg","aaa","bbb","ccc","ddd","eee","fff", "ggg")
Score<-sample(40,14)
Pre_Post<-c(1,1,1,1,1,1,1,2,2,2,2,2,2,2)
df<-cbind(ID, Pre_Post, Score)
df$Score<-as.numeric(df$Score)
df<-as.data.frame(df)
#what I have tried
df2<-df%>%
group_by(ID, Pre_post)
mutate(Pct_change=mutate(Score/lead(Score)*100))
但是我收到错误消息。同样,我也不认为代码是正确的开始。
预期结果:- 我要实现的是获取已改善的ID的百分比。因此,以我提供的模拟示例为例,从Pre到Post仅改善了ID的42.86%,而在Pre和Post之间实际上恶化了57.14%。
任何建议都将受到欢迎:)
答案 0 :(得分:2)
您有几种错别字,这就是为什么您会收到错误消息。
您可以执行以下操作来同时获得新旧分数:
#!/usr/bin/env python3
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
from mpl_toolkits.axes_grid1 import make_axes_locatable, axes_size
# border limits for plots
lowlim = 0.43
uplim = 0.52
nx = 10
kernels = ['cubic_spline', 'quintic_spline',
'wendland_C2', 'wendland_C4', 'wendland_C6']
#========================
def main():
#========================
eta_facts = [ 0, 1, 2, 3, 4, 5 ]
nrows = len(eta_facts)
ncols = len(kernels)
Ay_list = [[None for c in range(ncols)] for r in range(nrows)]
#--------------------------------
# Loop and compute As
#--------------------------------
dx = (uplim - lowlim)/nx
for row, eta in enumerate(eta_facts):
for col, kernel in enumerate(kernels):
A = np.zeros((nx, nx), dtype=np.float)
for i in range(nx):
for j in range(nx):
A[j,i] = row + np.random.random()/10 # not a typo: need A[j,i] for imshow
Ay_list[row][col] = A
#------------------------------------
# Now plot it
#------------------------------------
fig = plt.figure(figsize=(3.5*ncols, 3.5*nrows))
axrows = []
i = 1
for r in range(nrows):
axcols = []
for c in range(ncols):
if r > 0:
if c > 0:
axcols.append(fig.add_subplot(nrows, ncols, i,
aspect='equal', sharex=axrows[r-1][c], sharey=axcols[c-1]))
else:
axcols.append(fig.add_subplot(nrows, ncols, i,
aspect='equal', sharex=axrows[r-1][c]))
else:
if c > 0:
axcols.append(fig.add_subplot(nrows, ncols, i,
aspect='equal', sharey=axcols[c-1]))
else:
axcols.append(fig.add_subplot(nrows, ncols, i, aspect='equal'))
i+=1
axrows.append(axcols)
cmap = 'YlGnBu_r'
lw = 2
for row in range(nrows):
axcols = axrows[row]
minval = min([np.min(Ay_list[row][c]) for c in range(ncols)])
maxval = max([np.max(Ay_list[row][c]) for c in range(ncols)])
for col, ax in enumerate(axcols):
im = ax.imshow(Ay_list[row][col], origin='lower',
vmin=minval, vmax=maxval, cmap=cmap,
extent=(lowlim, uplim, lowlim, uplim),
# norm=matplotlib.colors.SymLogNorm(1e-3),
zorder=1)
# only plot colorbar for last column
if col==len(kernels)-1:
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.15)
fig.colorbar(im, cax=cax)
ax.set_xlim((lowlim,uplim))
ax.set_ylim((lowlim,uplim))
# cosmetics
if col > 0:
left = False
else:
left = True
if row == len(eta_facts)-1 :
bottom = True
else:
bottom = False
ax.tick_params(
axis='both', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom=bottom, # ticks along the bottom edge are off
top=False, # ticks along the top edge are off
left=left, # ticks along the left edge are off
right=False, # ticks along the rigt edge are off
labelbottom=bottom, # labels along the bottom edge are off
labeltop=False, # labels along the top edge are off
labelleft=left, # labels along the left edge are off
labelright=False) # labels along the right edge are off
if row==0:
ax.set_title(kernels[col] + ' kernel', fontsize=14)
if col==0:
ax.set_ylabel(r"$\eta = $ "+str(eta_facts[row])+r"$\eta_0$")
fig.suptitle(r"Some title", fontsize=18)
plt.tight_layout(rect=(0, 0, 1, 0.97))
plt.subplots_adjust(wspace=0, hspace=0)
plt.savefig('for_stackexchange.png', dpi=150)
plt.close()
print('finished.')
return
if __name__ == '__main__':
main()
要获取改进数量,您必须先将library(tidyverse)
df %>%
spread(Pre_Post, Score) %>%
rename(Score_pre = `1`, Score_post = `2`)
ID Score_pre Score_post
1 aaa 19 24
2 bbb 39 35
3 ccc 2 29
4 ddd 38 15
5 eee 36 9
6 fff 23 10
7 ggg 21 27
转换为数字:
Score
答案 1 :(得分:1)
另一种选择dplyr
的选项是假设您总是有两个值,其中Pre
为1,Post
为2,这将是group_by
ID
并减去第二个值和第一个值,并计算正负值的比率。
library(dplyr)
df %>%
arrange(ID, Pre_Post) %>%
group_by(ID) %>%
summarise(val = Score[2] - Score[1]) %>%
summarise(total_pos = sum(val > 0)/n(),
total_neg = sum(val < 0)/ n())
# A tibble: 1 x 2
# total_pos total_neg
# <dbl> <dbl>
#1 0.429 0.571
数据
ID <- c("aaa","bbb","ccc","ddd","eee","fff", "ggg","aaa","bbb",
"ccc","ddd","eee","fff", "ggg")
Score <- sample(40,14)
Pre_Post <- c(1,1,1,1,1,1,1,2,2,2,2,2,2,2)
df <- data.frame(ID, Pre_Post, Score)