我有一个类似的数据框:
dput(tbl_core_abu[,-1])
structure(list(`10` = c(0, 0, 0, 58664.77, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `59` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `84` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`110` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), `134` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `199` = c(0,
104958.6967, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), `234` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 124035.0275, 0, 0, 0, 0, 0), `362` = c(0,
77721.19, 0, 152536.2825, 0, 0, 0, 166587.3025, 0, 102277.7225,
0, 0, 272194.79, 0, 276369.14, 138263.835, 187644.165, 0,
197116.2625, 0, 0), `433` = c(55386.35333, 120237.6333, 0,
105352.27, 0, 0, 0, 322688.3333, 97829.95667, 290855.53,
0, 0, 472599.1433, 0, 95569.16667, 227565.1033, 364478.0967,
0, 770653.39, 0, 0), `506` = c(0, 0, 0, 25778.4925, 289966.155,
0, 0, 0, 0, 20935.3925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`581` = c(131897.8625, 0, 100404.635, 0, 883894.2775, 0,
73022.6425, 105393.055, 0, 142834.03, 0, 0, 79358.81, 1192346.16,
0, 160301.1775, 0, 0, 0, 0, 0), `652` = c(1057886.688, 1982200.798,
321253.675, 601117.7, 4472375.41, 59737.5275, 797205.7125,
2382608.513, 449364.3925, 3917538.72, 51331.7675, 206527.6425,
1465000.365, 3024429.003, 232467.6875, 2783451.168, 2141222.723,
82442.1325, 1813534.675, 40380.1675, 559932.305), `733` = c(0,
0, 0, 35943.15, 159816.4767, 0, 1588.723333, 70380.19333,
0, 109879.3467, 0, 49431.19333, 73450.01667, 196120.7467,
0, 92769.24, 93007.26333, 0, 272181.6933, 0, 0), `818` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 13581.89333, 0, 0, 12132.77333, 0,
0, 0, 0, 0, 0, 0, 0), `896` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
21898.0425, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `972` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 15417.325, 0, 0, 0, 19955.4325, 0,
0, 0, 0, 0, 0, 0), `1039` = c(0, 0, 0, 0, 12918.05333, 0,
0, 7435.02, 0, 10715.63667, 0, 0, 9717.78, 0, 0, 0, 0, 0,
0, 0, 0)), .Names = c("10", "34", "59", "84", "110", "134",
"165", "199", "234", "257", "362", "433", "506", "581", "652",
"733", "818", "896", "972", "1039"), class = "data.frame", row.names = c(NA,
-21L))
第一列包含我不想破坏的名称,但将其视为字符串。
让我展示几行来解释我想要实现的目标:
> head(tbl_core_abu[,-1])
10 34 59 84 110 134 165 199 234 257 362 433 506 581 652 733 818 896 972 1039
1 0.00 0 0 0 0 0 0 0.0 0 0 0.00 55386.35 0.00 **131897.9** 1057886.69 0.00 0 0 0 0.00
2 0.00 0 0 0 0 0 0 104958.7 0 0 77721.19 120237.63 0.00 0.0 1982200.80 0.00 0 0 0 0.00
3 0.00 0 0 0 0 0 0 0.0 0 0 0.00 0.00 0.00 **100404.6** 321253.67 0.00 0 0 0 0.00
4 58664.77 0 0 0 0 0 0 0.0 0 0 152536.28 105352.27 25778.49 0.0 601117.70 **35943.15** 0 0 0 0.00
5 0.00 0 0 0 0 0 0 0.0 0 0 0.00 0.00 **289966.16** **883894.3** 4472375.41 **159816.48** 0 0 0 12918.05
6 0.00 0 0 0 0 0 0 0.0 0 0 0.00 0.00 0.00 0.0 59737.53 0.00 0 0 0 0.00
所以对我来说重要的一栏是652
。它是此数据框中的第16列。我想用0
替换此数据框中的所有数字,但有两个条件。列652
必须保持完整,并且数字左右连接到此列。 “连接”是什么意思?这意味着下一列中有一些数字没有中断(在这种情况下,值0
是一个中断)。为了形象化,我加粗了这些数字。其他数字应替换为0
。
这就是我的期望:
> dput(tbl_core_abu[,-1])
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `59` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `84` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`110` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), `134` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `199` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), `234` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `362` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 102277.7225, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`433` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 290855.53, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), `506` = c(0, 0, 0, 0, 289966.155,
0, 0, 0, 0, 20935.3925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`581` = c(131897.8625, 0, 100404.635, 0, 883894.2775, 0,
73022.6425, 105393.055, 0, 142834.03, 0, 0, 79358.81, 1192346.16,
0, 160301.1775, 0, 0, 0, 0, 0), `652` = c(1057886.688, 1982200.798,
321253.675, 601117.7, 4472375.41, 59737.5275, 797205.7125,
2382608.513, 449364.3925, 3917538.72, 51331.7675, 206527.6425,
1465000.365, 3024429.003, 232467.6875, 2783451.168, 2141222.723,
82442.1325, 1813534.675, 40380.1675, 559932.305), `733` = c(0,
0, 0, 35943.15, 159816.4767, 0, 1588.723333, 70380.19333,
0, 109879.3467, 0, 49431.19333, 73450.01667, 196120.7467,
0, 92769.24, 93007.26333, 0, 272181.6933, 0, 0), `818` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 13581.89333, 0, 0, 12132.77333, 0,
0, 0, 0, 0, 0, 0, 0), `896` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
21898.0425, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `972` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 15417.325, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), `1039` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 10715.63667,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("10", "34",
"59", "84", "110", "134", "165", "199", "234", "257", "362",
"433", "506", "581", "652", "733", "818", "896", "972", "1039"
), class = "data.frame", row.names = c(NA, -21L))
但我只是手动完成:
tbl_core_abu[,2:11] <- 0
tbl_core_abu[1:4,2:14] <- 0
tbl_core_abu[6:9,2:14] <- 0
tbl_core_abu[11:21,2:14] <- 0
tbl_core_abu[2:9,18:21] <- 0
tbl_core_abu[11:21,19:21] <- 0
如果我有更大的数据集,那将会非常痛苦......
答案 0 :(得分:2)
tbl_core_abu <- structure(list(`10`=c(0,0,0,58664.77,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`34`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`59`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`84`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`110`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`134`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`165`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`199`=c(0,104958.6967,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`234`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`257`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,124035.0275,0,0,0,0,0),`362`=c(0,77721.19,0,152536.2825,0,0,0,166587.3025,0,102277.7225,0,0,272194.79,0,276369.14,138263.835,187644.165,0,197116.2625,0,0),`433`=c(55386.35333,120237.6333,0,105352.27,0,0,0,322688.3333,97829.95667,290855.53,0,0,472599.1433,0,95569.16667,227565.1033,364478.0967,0,770653.39,0,0),`506`=c(0,0,0,25778.4925,289966.155,0,0,0,0,20935.3925,0,0,0,0,0,0,0,0,0,0,0),`581`=c(131897.8625,0,100404.635,0,883894.2775,0,73022.6425,105393.055,0,142834.03,0,0,79358.81,1192346.16,0,160301.1775,0,0,0,0,0),`652`=c(1057886.688,1982200.798,321253.675,601117.7,4472375.41,59737.5275,797205.7125,2382608.513,449364.3925,3917538.72,51331.7675,206527.6425,1465000.365,3024429.003,232467.6875,2783451.168,2141222.723,82442.1325,1813534.675,40380.1675,559932.305),`733`=c(0,0,0,35943.15,159816.4767,0,1588.723333,70380.19333,0,109879.3467,0,49431.19333,73450.01667,196120.7467,0,92769.24,93007.26333,0,272181.6933,0,0),`818`=c(0,0,0,0,0,0,0,0,0,13581.89333,0,0,12132.77333,0,0,0,0,0,0,0,0),`896`=c(0,0,0,0,0,0,0,0,0,21898.0425,0,0,0,0,0,0,0,0,0,0,0),`972`=c(0,0,0,0,0,0,0,0,0,15417.325,0,0,0,19955.4325,0,0,0,0,0,0,0),`1039`=c(0,0,0,0,12918.05333,0,0,7435.02,0,10715.63667,0,0,9717.78,0,0,0,0,0,0,0,0)),.Names=c('10','34','59','84','110','134','165','199','234','257','362','433','506','581','652','733','818','896','972','1039'),class='data.frame',row.names=c(NA,-21L));
trunkci <- match('652',names(tbl_core_abu));
cis <- 1:ncol(tbl_core_abu);
tbl_core_abu[t(apply(tbl_core_abu==0,1,function(x) { x[cis<trunkci-match(T,rev(x[1:(trunkci-1)])) | cis>match(T,x[(trunkci+1):length(x)])+trunkci] <- T; x; }))] <- 0;
tbl_core_abu;
## 10 34 59 84 110 134 165 199 234 257 362 433 506 581 652 733 818 896 972 1039
## 1 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 131897.86 1057886.69 0.000 0.00 0.00 0.00 0.00
## 2 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 1982200.80 0.000 0.00 0.00 0.00 0.00
## 3 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 100404.63 321253.67 0.000 0.00 0.00 0.00 0.00
## 4 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 601117.70 35943.150 0.00 0.00 0.00 0.00
## 5 0 0 0 0 0 0 0 0 0 0 0.0 0.0 289966.16 883894.28 4472375.41 159816.477 0.00 0.00 0.00 0.00
## 6 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 59737.53 0.000 0.00 0.00 0.00 0.00
## 7 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 73022.64 797205.71 1588.723 0.00 0.00 0.00 0.00
## 8 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 105393.05 2382608.51 70380.193 0.00 0.00 0.00 0.00
## 9 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 449364.39 0.000 0.00 0.00 0.00 0.00
## 10 0 0 0 0 0 0 0 0 0 0 102277.7 290855.5 20935.39 142834.03 3917538.72 109879.347 13581.89 21898.04 15417.33 10715.64
## 11 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 51331.77 0.000 0.00 0.00 0.00 0.00
## 12 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 206527.64 49431.193 0.00 0.00 0.00 0.00
## 13 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 79358.81 1465000.36 73450.017 12132.77 0.00 0.00 0.00
## 14 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 1192346.16 3024429.00 196120.747 0.00 0.00 0.00 0.00
## 15 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 232467.69 0.000 0.00 0.00 0.00 0.00
## 16 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 160301.18 2783451.17 92769.240 0.00 0.00 0.00 0.00
## 17 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 2141222.72 93007.263 0.00 0.00 0.00 0.00
## 18 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 82442.13 0.000 0.00 0.00 0.00 0.00
## 19 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 1813534.68 272181.693 0.00 0.00 0.00 0.00
## 20 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 40380.17 0.000 0.00 0.00 0.00 0.00
## 21 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.00 0.00 559932.31 0.000 0.00 0.00 0.00 0.00
该解决方案通过首先计算表示输入data.frame的哪些单元为零的逻辑矩阵来工作。然后,它使用apply()
循环逻辑矩阵的每一行。对于每一行,它标识“trunk列索引”(trunkci
)的 left 的第一个索引,该索引为true,而 right 的第一个索引的trunk列索引为true,然后为这两个索引 之外的所有索引赋予true。这基本上标记为删除行中与主干列分开至少一个零单元的每个单元。在apply()
调用返回后,必须使用t()
修复转置(因为某些原因apply()
始终反转输入矩阵的转置),最后我们可以索引tbl_core_abu
使用生成的逻辑矩阵,并为标记为true的所有单元格指定零。
这是另一种解决方案,使用Rcpp:
library('Rcpp');
cppFunction('
LogicalMatrix trunkify(LogicalMatrix input, int trunkci ) {
for (size_t r = 0; r < input.nrow(); ++r) {
int c;
for (c = trunkci-1; c >= 0; --c) if (input(r,c)) break;
for (--c; c >= 0; --c) input(r,c) = TRUE;
for (c = trunkci+1; c < input.ncol(); ++c) if (input(r,c)) break;
for (++c; c < input.ncol(); ++c) input(r,c) = TRUE;
}
return input;
}
');
tbl_core_abu[trunkify(tbl_core_abu==0,match('652',names(tbl_core_abu)))] <- 0;