我有这样的数据
df<- structure(list(Number = structure(1:11, .Label = c("A", "AA",
"AAA", "B", "BB", "BBB", "BBBB", "C", "CC", "CCC", "CCCC"), class = "factor"),
Col1 = c(31.22099237, 0, 17.16411573, 0, 0.705259568, 0,
2.66371587, 0, 2.720864088, 3.50268492, 0), Col2 = c(2180.612724,
0, 1175.574713, 0, 42.97845333, 0, 199.3804311, 0, 190.6518212,
247.7824952, 0), Col3 = c(3227.401883, 0, 1671.762522, 0,
72.9133296, 0, 344.3196473, 0, 333.6736573, 466.1626644,
502.3171147), Col4 = c(2735.221156, 2022.47486, 1387.524359,
0, 53.75158295, 0, 212.122076, 0, 191.9276388, 274.0036734,
0), Col5 = c(2988.544146, 2407.748537, 1627.935679, 1627.935679,
56.13075824, 0, 276.8770486, 0, 210.470166, 385.88476, 498.6120134
), Col6 = c(3371.951649, 0, 1627.659283, 0, 49.4177718, 58.1108116,
0, 0, 251.2365107, 431.2948353, 529.9698816), Col7 = c(0,
2325.388968, 1355.368616, 0, 48.20993462, 35.00690048, 0,
0, 219.8790867, 327.8801311, 0), Col8 = c(0, 0, 1502.048187,
1502.048187, 62.54871626, 338.4898404, 0, 483.841343, 261.3874571,
348.3883709, 0)), class = "data.frame", row.names = c(NA,
-11L))
我想要这样的输出
output<- structure(list(Number = structure(1:4, .Label = c("A", "AA",
"BBBB", "CCCC"), class = "factor"), Col1 = c(31.22099237, 0,
2.66371587, 0), Col2 = c(2180.612724, 0, 199.3804311, 0), Col3 = c(3227.401883,
0, 344.3196473, 502.3171147), Col4 = c(2735.221156, 2022.47486,
212.122076, 0), Col5 = c(2988.544146, 2407.748537, 276.8770486,
498.6120134), Col6 = c(3371.951649, 0, 0, 529.9698816), Col7 = c(0,
2325.388968, 0, 0), Col8 = c(0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-4L))
我要执行以下操作
df <- df %>% na.omit()
df[apply(df[,-1], 1, function(x) !all(x==0)),]
删除所有列中都有值的行。我想到了一个逻辑矩阵,但我无法弄清楚
请保留前5列至少具有2个值,而后3列没有小于2个值的行
第二个示例
df2<- structure(list(Number = structure(1:12, .Label = c("A", "AA",
"AAA", "B", "BB", "BBB", "C", "CC", "CCC", "D", "DD", "DDD"), class = "factor"),
COL1 = c(406173224.8, 96923176.09, 3447270.25, 37489836.02,
3324543.438, 432762367.5, 667314.875, 30974699.53, 20989067.38,
15745820.75, 6574354.484, 11424108.27), COL2 = c(242584392.6,
101980486.3, 579871.7188, 0, 2308453.438, 397535765.9, 0,
3746376.563, 31095794.56, 0, 1030556.969, 0), COL3 = c(402236010.2,
115299055.9, 0, 5080776.688, 72611542.24, 728695912, 0, 0,
37845525.63, 1037861.25, 413324.7813, 1191412.063), COL4 = c(302854623.1,
267007.3438, 2396334.5, 4207015.484, 1102826.25, 117023982.5,
190532.1563, 1418596.625, 29904788.16, 4053600.563, 745227.2773,
4503530.609), COL5 = c(444770100.6, 94153154.77, 872500.375,
0, 0, 624809234, 0, 17644014.5, 0, 0, 495620.8125, 581600.9375
), COL6 = c(0, 79994610.52, 0, 0, 885095.0625, 627937144.5,
0, 9275362.5, 0, 0, 0, 0), COL7 = c(291344235.3, 0, 1508724.25,
9924209.969, 1403628.125, 103400297.8, 0, 9286129.25, 0,
8254331.422, 753859.4063, 3172866.969), COL8 = c(424723458.1,
124013613.4, 1154568.5, 11550648.31, 0, 0, 0, 0, 0, 4820875.156,
395120.2813, 3858119.125)), class = "data.frame", row.names = c(NA,
-12L))
答案 0 :(得分:2)
m = replace(df[-1], is.na(df[-1]), 0) != 0
df[(rowSums(m) != 0) &
(rowSums(m) != NCOL(m)) &
((rowSums(m[,1:5]) >= 2) & rowSums(m[,(NCOL(m) - 2):NCOL(m)]) < 2),]
# Number Col1 Col2 Col3 Col4 Col5 Col6 Col7 Col8
#1 A 31.220992 2180.6127 3227.4019 2735.2212 2988.544 3371.9516 0.000 0
#2 AA 0.000000 0.0000 0.0000 2022.4749 2407.749 0.0000 2325.389 0
#7 BBBB 2.663716 199.3804 344.3196 212.1221 276.877 0.0000 0.000 0
#11 CCCC 0.000000 0.0000 502.3171 0.0000 498.612 529.9699 0.000 0