Question

在大型数据框中，我想删除第6列中有1的行。此外，还应删除该行之后的行。有答案的人吗？

Partitions

所以结果应该是这样的：

    1        1      neutral            3  450    0
    2        1          con            1  538    0
    3        1      neutral            3  609    0
    4        1          inc            0  451    0
    5        1          inc            0  413    0
    6        1      neutral            3  425    1
    7        1          inc            0  514    0
    8        1          con            1  569    0

我试过这个，但显然这不对：

1        1      neutral            3  450    0
2        1          con            1  538    0
3        1      neutral            3  609    0
4        1          inc            0  451    0
5        1          inc            0  413    0
8        1          con            1  569    0

Answer 1

这是一个解决方案，首先提取要删除的行：

rem<-which(Pb[,6]==1)

然后您可以通过执行以下操作删除这些和以下内容：

Pb<-Pb[-c(rem,rem+1),]

如果您担心最后一行中可能存在1，并且想要使事物具有唯一性：

rem<-which(Pb[,6]==1)
rem<-c(rem,rem+1)
rem<-rem[rem<nrow(Pb)]
Pb<-Pb[-rem,]

Answer 2

您可以使用lag中的filter和dplyr功能：

df
#   V1 V2      V3 V4  V5 V6
# 1  1  1 neutral  3 450  0
# 2  2  1     con  1 538  0
# 3  3  1 neutral  3 609  0
# 4  4  1     inc  0 451  0
# 5  5  1     inc  0 413  0
# 6  6  1 neutral  3 425  1
# 7  7  1     inc  0 514  0
# 8  8  1     con  1 569  0

library(dplyr)
df %>% filter(V6 != 1, lag(V6, default = 0) != 1)

#   V1 V2      V3 V4  V5 V6
# 1  1  1 neutral  3 450  0
# 2  2  1     con  1 538  0
# 3  3  1 neutral  3 609  0
# 4  4  1     inc  0 451  0
# 5  5  1     inc  0 413  0
# 6  8  1     con  1 569  0

数据：

df <- structure(list(V1 = 1:8, V2 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), V3 = structure(c(3L, 1L, 3L, 2L, 2L, 3L, 2L, 1L), .Label = c("con", 
"inc", "neutral"), class = "factor"), V4 = c(3L, 1L, 3L, 0L, 
0L, 3L, 0L, 1L), V5 = c(450L, 538L, 609L, 451L, 413L, 425L, 514L, 
569L), V6 = c(0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L)), .Names = c("V1", 
"V2", "V3", "V4", "V5", "V6"), class = "data.frame", row.names = c(NA, 
-8L))

Answer 3

我们可以使用data.table

library(data.table)
setDT(df)[!df[, {i1 <- .I[!!V6]; .(c(i1,i1+1))}]$V1]
#   V1 V2      V3 V4  V5 V6
#1:  1  1 neutral  3 450  0
#2:  2  1     con  1 538  0
#3:  3  1 neutral  3 609  0
#4:  4  1     inc  0 451  0
#5:  5  1     inc  0 413  0
#6:  8  1     con  1 569  0

或使用shift

setDT(df)[!V6 & shift(!V6, fill = TRUE)]    
#   V1 V2      V3 V4  V5 V6
#1:  1  1 neutral  3 450  0
#2:  2  1     con  1 538  0
#3:  3  1 neutral  3 609  0
#4:  4  1     inc  0 451  0
#5:  5  1     inc  0 413  0
#6:  8  1     con  1 569  0

Answer 4

我正在组建一个数据框：

  dfm <- data.frame(a=rnorm(8), b=rbinom(8,1,.2))

#dfm <- structure(list(a = c(-1.06507365382823, 0.64103474967184, 0.0525921813159307, 
0.0465414259158497, -0.65811357438377, -0.466590684535497, -0.246266056446691, 
-0.397561700830275), b = c(1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L)), .Names = c("a", 
"b"), row.names = c(NA, -8L), class = "data.frame")
  dfm
           a b
1 -1.6687530 0
2 -1.1303493 1
3  0.1415896 0
4  1.8102502 1
5 -1.5421675 0
6  0.1462142 0
7  0.0821345 0
8  0.4063876 0

which（）函数可以很好地解决这些问题：

  rows_with_ones <- which(dfm$b == 1)
  subequent_rows <- rows_with_ones+1
  rows_to_remove <- c(rows_with_ones,subequent_rows)

并使用基数R中的子集来删除这些行

  dfm2 <- dfm[-rows_to_remove,]
  dfm2
           a b
1 -1.6687530 0
6  0.1462142 0
7  0.0821345 0
8  0.4063876 0

或者，您可以使用简单的R子集来更快地完成此操作并减少击键次数（但可能不容易为人类阅读）：

dfm2 <- dfm[dfm$b!=1,]

这读取类似于“子集dfm的规则，其中dfm的列”b“不等于1，然后将该对象分配给dfm2”

在R中有条件地删除一行和另一行

4 个答案: