我非常接近在一组面板数据框中进行排名练习的解决方案(了解围绕此问题的类似问题,例如:Specific group rankings in R)。
Category ID Score.08.2007 Score.09.2007 Rank.08.2007 Rank.09.2007 ...
Orange FSGBR070N3 0.16 ... 5 ...
Orange FSGBR070N3 0.05 ... 7 ...
Orange FSGBR070N3 0.11 6
Orange FS00008L4G 0.28 1
Orange FS00008VLD 0.27 2
Orange FS00008VLD 0.27 3
Orange FS00008VLD 0.27 4
Orange FS00009SQX -2.03 8
Orange FS00009SQX NA
Orange FSUSA0A1KW NA
Orange FSUSA0A1KW NA
Orange FSUSA0A1KX NA
Orange FSUSA0A1KY NA
Orange FS0000B389 NA
Banana FS000092GP 96.25 1
Banana FS000092GP 96.25 2
Banana FS000092GP 96.25 3
Banana FS000092GP 52.33 4
Banana FS0000ATLN 31.73 5
Banana FSUSA0AVMF 1.38 7
Banana FSGBR058O8 1.37 8
Banana FSGBR05845 2.24 6
代码:
for (i in 4:ncol(MRAR)){
eq_ranks[i] <- lapply(unique(MRAR$Morningstar.Category),function(x)
{
a <- rank(MRAR[MRAR$Morningstar.Category == x, i], na.last="keep")
return(a)
})
}
.error:
Error in `[<-.data.frame`(`*tmp*`, i, value = list(c(NA, 1047, NA, NA, :
replacement element 1 has 3159 rows, need 3530
我也看过ave方法,但ave语法似乎不允许na.last =&#34; keep&#34;需求。但我也开发了一种dplyr方法:
aux <- as.vector(cbind(names(ER)))
eq_ranks <- function(MRAR,group_by){
group_by %>%
group_by(!!Morningstar.Category) %>%
mutate_at(MRAR,quo(eq_rank=rank(MRAR)), vars(aux))
}
再次,这将NA值视为&#34; last&#34; (而不是&#34;保持&#34;),语义逻辑似乎是正确的。
真的很感谢你的帮助, Wilhelm Fantastisch。
答案 0 :(得分:0)
不确定这是否正是您想要的,但下面有data.frame
这对我有用。希望它有所帮助
df <- data.frame(Category=c(rep("Orange",10), rep("Banana",10)),
Score.08.2007=c(runif(6),rep(NA,4),runif(4),rep(NA,2),runif(4)),
Score.09.2017=c(runif(5),rep(NA,3),runif(2),runif(4),rep(NA,4),runif(2)),
stringsAsFactors=F)
library(dplyr)
eq_ranks <- function(theseCols, newCols, df){
theseCols <- enquo(theseCols)
df1 <- df %>%
group_by(Category) %>%
mutate_at(vars(!!theseCols), funs(rank(., na.last="keep"))) %>%
ungroup() %>%
select(-Category) %>%
setNames(newCols)
df2 <- cbind(df, df1)
return(df2)
}
aux <- colnames(df)[-1]
newCols <- sub("Score", "Rank", aux)
eq_ranks(aux,newCols,df)
structure(list(Category = c("Orange", "Orange", "Orange", "Orange",
"Orange", "Orange", "Orange", "Orange", "Orange", "Orange", "Banana",
"Banana", "Banana", "Banana", "Banana", "Banana", "Banana", "Banana",
"Banana", "Banana"), Score.08.2007 = c(0.757087148027495, 0.202692255144939,
0.711121222469956, 0.121691921027377, 0.245488513959572, 0.14330437942408,
NA, NA, NA, NA, 0.239629415096715, 0.0589343772735447, 0.642288258532062,
0.876269212691113, NA, NA, 0.778914677444845, 0.79730882588774,
0.455274453619495, 0.410084082046524), Score.09.2017 = c(0.810870242770761,
0.604933290276676, 0.654723928077146, 0.353197271935642, 0.270260145887733,
NA, NA, NA, 0.99268406117335, 0.633493264438584, 0.213208135217428,
0.129372348077595, 0.478118034312502, 0.924074469832703, NA,
NA, NA, NA, 0.59876096714288, 0.976170694921166), Rank.08.2007 = c(6,
3, 5, 1, 4, 2, NA, NA, NA, NA, 2, 1, 5, 8, NA, NA, 6, 7, 4, 3
), Rank.09.2017 = c(6, 3, 5, 2, 1, NA, NA, NA, 7, 4, 2, 1, 3,
5, NA, NA, NA, NA, 4, 6)), .Names = c("Category", "Score.08.2007",
"Score.09.2017", "Rank.08.2007", "Rank.09.2017"), row.names = c(NA,
-20L), class = "data.frame")