我的数据看起来像这样
df<- structure(list(V2 = c(15L, 20L, 0L, 0L, 0L, 40L, 0L, 1538100000L,
0L), V3 = c(142480000L, 20L, 0L, 0L, 100L, 0L, 0L, 1444000000L,
100L), V4 = c(170130000L, 0L, 0L, 1577400000L, 0L, 0L, 1577400000L,
1577400000L, 20L), V5 = c(176030000L, 0L, 0L, 176030000L, 0L,
20431000L, 0L, 1449700000L, 0L)), .Names = c("V2", "V3", "V4",
"V5"), class = "data.frame", row.names = c("A", "B", "C", "D",
"F", "G", "H", "I", "J"))
# V2 V3 V4 V5
#A 15 142480000 170130000 176030000
#B 20 20 0 0
#C 0 0 0 0
#D 0 0 1577400000 176030000
#F 0 100 0 0
#G 40 0 0 20431000
#H 0 0 1577400000 0
#I 1538100000 1444000000 1577400000 1449700000
#J 0 100 20 0
我想要做的是删除那些可以像这样完成的行
df[rowSums(df)>0, ]
现在我想删除那些只有一个值且其余为零的行 然后我想删除一半值为零(左或右)的列
预期输出应该如下所示
# V2 V3 V4 V5
#A 15 142480000 170130000 176030000
#B 20 20 0 0
#D 0 0 1577400000 176030000
#I 1538100000 1444000000 1577400000 1449700000
名为C的行被删除,因为所有成员都为零
F已删除,因为只有一个值,其余为零
G已删除,因为(假设我们有4列,我们应该有 2的值为前2或后2),G有两个值 但并不紧密
H被删除,因为只有一个值
J被删除,因为两个值都没有两个值 一列或两列最后一列
答案 0 :(得分:2)
予。第一次尝试
df[rowSums(df) > 0 & rowSums(df==0) != ncol(df)-1 & df[,1] != 0 & df[,2] !=0 | df[,3] !=0 & df[,4] !=0,]
# V2 V3 V4 V5
# A 15 142480000 170130000 176030000
# B 20 20 0 0
# D 0 0 1577400000 176030000
# I 1538100000 1444000000 1577400000 1449700000
II。要知道删除的行的索引值,请将相反的条件放在which()
函数
which(rowSums(df) <= 0)
# C
# 3
which(rowSums(df==0) == ncol(df)-1)
# F H
# 5 7
答案 1 :(得分:1)
使用临时变量
## logical array indicating non-zero elements
nz <- array(df != 0, c(dim(df) / c(1, 2), 2))
rowSums(nz) > 1 # conditions 1 & 2
#[1] TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE
## rows with no zeros in the left half
apply(nz[, , 1], 1, all)
#[1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## rows with no zeros in the right half
apply(nz[, , 2], 1, all)
#[1] TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE
## rows with no zeros in either the left or the right half
rowSums(apply(nz, c(1, 3), all)) > 0 # condition 3
#[1] TRUE TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE
我认为前两个条件是多余的。
## rows that satisfy all conditions simultaneously
ind <- rowSums(nz) > 1 & rowSums(apply(nz, c(1, 3), all)) > 0
which(ind)
#[1] 1 2 4 8
df[ind, ]
# V2 V3 V4 V5
#A 15 142480000 170130000 176030000
#B 20 20 0 0
#D 0 0 1577400000 176030000
#I 1538100000 1444000000 1577400000 1449700000