我有一个如下所示的数据框:
LOS RFG TMV SHN QRE TES KLO
1 0 0 3 0 0 4 28
2 1 0 0 9 0 0 0
3 0 0 39 98 0 0 0
4 2 0 0 10 0 0 0
5 0 0 7 5 0 0 0
6 0 0 0 0 0 2 6
7 0 2 3 9 0 3 0
我想弄清楚当某个列填充大于0的某个列时,哪些列也填充了大于0的值,但是我很难解决这个问题。我试着用
library(dplyr)
df %>%
group_by(LOS,RFG,TMV,SHN,QRE,TES,KLO) %>%
mutate(n = n()) %>%
group_by(row) %>%
slice(which.max(n)) %>%
select(-n)
但它无法正常工作。也许我应该使用聚合?基本上我想返回行中通常具有大于0的值的列的名称。
理想情况下,我想弄清楚如何让R返回类似这样的东西
LOS: SHN
RFG: TMV, SHN, TES
SHN: LOS, TMV, RFG, TES
etc.
我对R很新,所以我不确定这是否可能,或者是否有更好的方法来获得类似的结果,但我真的很感激任何见解。
提前感谢任何建议!
更新: 答案很好 - 但有没有办法按填充列而不是按字母顺序排列返回的列名,特别是从最大值到最小值?
答案 0 :(得分:2)
library(data.table)
dat <- fread('LOS RFG TMV SHN QRE TES KLO
0 0 3 0 0 4 28
1 0 0 9 0 0 0
0 0 39 98 0 0 0
2 0 0 10 0 0 0
0 0 7 5 0 0 0
0 0 0 0 0 2 6
0 2 3 9 0 3 0')
dat_names <- names(dat)
funky <- function(x) {
res <- apply(dat[get(x) > 0], 2, function(y) length(y[y > 0]))
vals <- names(res[res > 0])
vals[!vals == x]
}
sapply(dat_names, funky)
结果:
# $LOS
# [1] "SHN"
#
# $RFG
# [1] "TMV" "SHN" "TES"
#
# $TMV
# [1] "RFG" "SHN" "TES" "KLO"
#
# $SHN
# [1] "LOS" "RFG" "TMV" "TES"
#
# $QRE
# character(0)
#
# $TES
# [1] "RFG" "TMV" "SHN" "KLO"
#
# $KLO
# [1] "TMV" "TES"
答案 1 :(得分:2)
setNames(object = lapply(1:NCOL(df), function(i)
unique(colnames(df)[-i][which(as.matrix(df[which(df[,i] > 0), -i]) > 0,
arr.ind = TRUE)[,2]])),
nm = colnames(df))
#$LOS
#[1] "SHN"
#$RFG
#[1] "TMV" "SHN" "TES"
#$TMV
#[1] "RFG" "SHN" "TES" "KLO"
#$SHN
#[1] "LOS" "RFG" "TMV" "TES"
#$QRE
#character(0)
#$TES
#[1] "RFG" "TMV" "SHN" "KLO"
#$KLO
#[1] "TMV" "TES"
答案 2 :(得分:0)
首先,我使用相关内容构建一个未命名的列表,但包括零,然后我删除零值并命名列表
res <- lapply(seq_along(df1),function(x){df1 %>% subset(df1[x]!=0) %>% select(-x) %>% colSums}) %>%
lapply(function(x){names(x[x>0])}) %>% setNames(names(df1))
# $LOS
# [1] "SHN"
#
# $RFG
# [1] "TMV" "SHN" "TES"
#
# $TMV
# [1] "RFG" "SHN" "TES" "KLO"
#
# $SHN
# [1] "LOS" "RFG" "TMV" "TES"
#
# $QRE
# character(0)
#
# $TES
# [1] "RFG" "TMV" "SHN" "KLO"
#
# $KLO
# [1] "TMV" "TES"
答案 3 :(得分:-1)
df <- data.frame(LOS=c(0,1,0,2,0,0,0),
RFG=c(0,0,0,0,0,0,2),
TMV=c(3,0,39,0,7,0,3),
SHN=c(0,9,98,10,5,0,9),
QRE=rep(0,7),
TES=c(4,0,0,0,0,2,3),
KLO=c(28,0,0,0,0,6,0))
从df
删除全部为0的列。
library(dplyr)
library(iterators)
df1 <- df %>% select_if(function(col) mean(col)>0)
使用icount
作为迭代器和dplyr
谓词将数据框减少到相关的行和列。
l1 <- lapply(icount(ncol(df1)), function(x) df1 %>%
filter(df1[,x]>0) %>%
select_if(function(col) mean(col)>0) %>%
select(-1) %>%
colnames())
输出:
[[1]]
[1] "SHN"
[[2]]
[1] "TMV" "SHN" "TES"
[[3]]
[1] "TMV" "SHN" "TES" "KLO"
[[4]]
[1] "RFG" "TMV" "SHN" "TES"
[[5]]
[1] "TMV" "SHN" "TES" "KLO"
[[6]]
[1] "TES" "KLO"