我想按规则对级别名称进行修改,但我在下面遇到问题:
我的数据;最初的df变量是类矩阵我改变了它
df <- data.frame(x = c("P27C", "P31B", "P12E", "P3E", "P7A", "P7D", "P2A", "P7D",
"P34", "P10C"),
y = rnorm(10), stringsAsFactors = F)
s<-c("P27CvsP31B","P27CvsP3C","P27CvsP3E","P27CvsP6B","P27CvsP7A","P27CvsP7C",
"P27DvsP27E","P27DvsP2B","P27DvsP31A","P27DvsP31B","P27DvsP3D","P27DvsP7D",
"P27EvsP2A","P27EvsP2B","P27EvsP2E","P27EvsP2F","P27EvsP2G","P27EvsP34",
"P7AvsP7H","P7BvsP7D","P7CvsP7G","P7DvsP7E","P7DvsP7F","P7DvsP7G","P7DvsP7H")
df
df$z <- lapply(df$x, grep, s, value = T)
# gives you the matches but empty slots for a missing value like "P12E"
df
for (r in 1:nrow(df)) {
if (length(df$z[[r]]) == 0) {
df$z[[r]] <- df$x[[r]]
}
else {
df$z[[r]] <- df$z[[r]]
}
}
# restores the original name of unmatched values
df$z
#Rename but in list format!!!
我想要的输出是:
x y z
1 P27C 2.22354499 "P27CvsP31B, P27CvsP3C, P27CvsP3E, P27CvsP6B, P27CvsP7A, P27CvsP7C"
2 P31B 0.89197064 "P27CvsP31B, P27DvsP31B"
3 P12E -0.02313754 "P12E"
4 P3E 0.69916446 "P27CvsP3E"
5 P7A -0.44895512 "P27CvsP7A, P7AvsP7H"
6 P7D 1.77619979 "P27DvsP7D, P7BvsP7D, P7DvsP7E, P7DvsP7F, P7DvsP7G, P7DvsP7H"
7 P2A -0.18261732 "P27EvsP2A"
8 P7D 0.12025524 "P27DvsP7D, P7BvsP7D, P7DvsP7E, P7DvsP7F, P7DvsP7G, P7DvsP7H"
9 P34 -0.13434265 "P27EvsP34"
10 P10C 0.19971201 "P10C"
由于
答案 0 :(得分:1)
嵌套sapply
看起来有点难看。它遍历x
的{{1}}列,并与您的向量df
匹配所有条目,创建匹配结果的列表。第二个s
循环遍历该列表并粘贴所有条目。如果没有匹配,则它返回一个空单元格,我们通过在其位置替换sapply
条目来处理它。
df$x
修改强>
基于@ akrun的建议,df$z <- sapply(sapply(df$x, function(i) s[grepl(i, s)]), paste, collapse = ',')
df$z[df$z == ''] <- df$x[df$z == '']
df
# x y z
#1 P27C -0.95290496 P27CvsP31B,P27CvsP3C,P27CvsP3E,P27CvsP6B,P27CvsP7A,P27CvsP7C
#2 P31B 1.62237939 P27CvsP31B,P27DvsP31B
#3 P12E 2.60014202 P12E
#4 P3E 0.13964851 P27CvsP3E
#5 P7A -1.35071967 P27CvsP7A,P7AvsP7H
#6 P7D 0.79893102 P27DvsP7D,P7BvsP7D,P7DvsP7E,P7DvsP7F,P7DvsP7G,P7DvsP7H
#7 P2A -1.55499584 P27EvsP2A
#8 P7D 0.46372006 P27DvsP7D,P7BvsP7D,P7DvsP7E,P7DvsP7F,P7DvsP7G,P7DvsP7H
#9 P34 0.05242956 P27EvsP34
#10 P10C -0.20203180 P10C
的选项将是,
data.table