如何有效地替换数据框中的值?

时间:2015-08-07 10:24:49

标签: r

我有一个类似的数据框:

dput(tbl_core_abu[,-1])
structure(list(`10` = c(0, 0, 0, 58664.77, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `59` = c(0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `84` = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    `110` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), `134` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `199` = c(0, 
    104958.6967, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), `234` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 124035.0275, 0, 0, 0, 0, 0), `362` = c(0, 
    77721.19, 0, 152536.2825, 0, 0, 0, 166587.3025, 0, 102277.7225, 
    0, 0, 272194.79, 0, 276369.14, 138263.835, 187644.165, 0, 
    197116.2625, 0, 0), `433` = c(55386.35333, 120237.6333, 0, 
    105352.27, 0, 0, 0, 322688.3333, 97829.95667, 290855.53, 
    0, 0, 472599.1433, 0, 95569.16667, 227565.1033, 364478.0967, 
    0, 770653.39, 0, 0), `506` = c(0, 0, 0, 25778.4925, 289966.155, 
    0, 0, 0, 0, 20935.3925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    `581` = c(131897.8625, 0, 100404.635, 0, 883894.2775, 0, 
    73022.6425, 105393.055, 0, 142834.03, 0, 0, 79358.81, 1192346.16, 
    0, 160301.1775, 0, 0, 0, 0, 0), `652` = c(1057886.688, 1982200.798, 
    321253.675, 601117.7, 4472375.41, 59737.5275, 797205.7125, 
    2382608.513, 449364.3925, 3917538.72, 51331.7675, 206527.6425, 
    1465000.365, 3024429.003, 232467.6875, 2783451.168, 2141222.723, 
    82442.1325, 1813534.675, 40380.1675, 559932.305), `733` = c(0, 
    0, 0, 35943.15, 159816.4767, 0, 1588.723333, 70380.19333, 
    0, 109879.3467, 0, 49431.19333, 73450.01667, 196120.7467, 
    0, 92769.24, 93007.26333, 0, 272181.6933, 0, 0), `818` = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 13581.89333, 0, 0, 12132.77333, 0, 
    0, 0, 0, 0, 0, 0, 0), `896` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
    21898.0425, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `972` = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 15417.325, 0, 0, 0, 19955.4325, 0, 
    0, 0, 0, 0, 0, 0), `1039` = c(0, 0, 0, 0, 12918.05333, 0, 
    0, 7435.02, 0, 10715.63667, 0, 0, 9717.78, 0, 0, 0, 0, 0, 
    0, 0, 0)), .Names = c("10", "34", "59", "84", "110", "134", 
"165", "199", "234", "257", "362", "433", "506", "581", "652", 
"733", "818", "896", "972", "1039"), class = "data.frame", row.names = c(NA, 
-21L))

第一列包含我不想破坏的名称,但将其视为字符串。

让我展示几行来解释我想要实现的目标:

> head(tbl_core_abu[,-1])
        10 34 59 84 110 134 165      199 234 257       362       433       506      581        652       733 818 896 972     1039
1     0.00  0  0  0   0   0   0      0.0   0   0      0.00  55386.35      0.00 **131897.9** 1057886.69      0.00   0   0   0     0.00
2     0.00  0  0  0   0   0   0 104958.7   0   0  77721.19 120237.63      0.00      0.0 1982200.80      0.00   0   0   0     0.00
3     0.00  0  0  0   0   0   0      0.0   0   0      0.00      0.00      0.00 **100404.6**  321253.67      0.00   0   0   0     0.00
4 58664.77  0  0  0   0   0   0      0.0   0   0 152536.28 105352.27  25778.49      0.0  601117.70  **35943.15**   0   0   0     0.00
5     0.00  0  0  0   0   0   0      0.0   0   0      0.00      0.00 **289966.16** **883894.3** 4472375.41 **159816.48**   0   0   0 12918.05
6     0.00  0  0  0   0   0   0      0.0   0   0      0.00      0.00      0.00      0.0   59737.53      0.00   0   0   0     0.00

所以对我来说重要的一栏是652。它是此数据框中的第16列。我想用0替换此数据框中的所有数字,但有两个条件。列652必须保持完整,并且数字左右连接到此列。 “连接”是什么意思?这意味着下一列中有一些数字没有中断(在这种情况下,值0是一个中断)。为了形象化,我加粗了这些数字。其他数字应替换为0

这就是我的期望:

> dput(tbl_core_abu[,-1])
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `59` = c(0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `84` = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    `110` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), `134` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `199` = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0), `234` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `362` = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 102277.7225, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    `433` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 290855.53, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), `506` = c(0, 0, 0, 0, 289966.155, 
    0, 0, 0, 0, 20935.3925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    `581` = c(131897.8625, 0, 100404.635, 0, 883894.2775, 0, 
    73022.6425, 105393.055, 0, 142834.03, 0, 0, 79358.81, 1192346.16, 
    0, 160301.1775, 0, 0, 0, 0, 0), `652` = c(1057886.688, 1982200.798, 
    321253.675, 601117.7, 4472375.41, 59737.5275, 797205.7125, 
    2382608.513, 449364.3925, 3917538.72, 51331.7675, 206527.6425, 
    1465000.365, 3024429.003, 232467.6875, 2783451.168, 2141222.723, 
    82442.1325, 1813534.675, 40380.1675, 559932.305), `733` = c(0, 
    0, 0, 35943.15, 159816.4767, 0, 1588.723333, 70380.19333, 
    0, 109879.3467, 0, 49431.19333, 73450.01667, 196120.7467, 
    0, 92769.24, 93007.26333, 0, 272181.6933, 0, 0), `818` = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 13581.89333, 0, 0, 12132.77333, 0, 
    0, 0, 0, 0, 0, 0, 0), `896` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
    21898.0425, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `972` = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 15417.325, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0), `1039` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 10715.63667, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("10", "34", 
"59", "84", "110", "134", "165", "199", "234", "257", "362", 
"433", "506", "581", "652", "733", "818", "896", "972", "1039"
), class = "data.frame", row.names = c(NA, -21L))

但我只是手动完成:

tbl_core_abu[,2:11] <- 0
tbl_core_abu[1:4,2:14] <- 0
tbl_core_abu[6:9,2:14] <- 0
tbl_core_abu[11:21,2:14] <- 0
tbl_core_abu[2:9,18:21] <- 0
tbl_core_abu[11:21,19:21] <- 0
如果我有更大的数据集,那将会非常痛苦......

1 个答案:

答案 0 :(得分:2)

tbl_core_abu <- structure(list(`10`=c(0,0,0,58664.77,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`34`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`59`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`84`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`110`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`134`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`165`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`199`=c(0,104958.6967,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`234`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),`257`=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,124035.0275,0,0,0,0,0),`362`=c(0,77721.19,0,152536.2825,0,0,0,166587.3025,0,102277.7225,0,0,272194.79,0,276369.14,138263.835,187644.165,0,197116.2625,0,0),`433`=c(55386.35333,120237.6333,0,105352.27,0,0,0,322688.3333,97829.95667,290855.53,0,0,472599.1433,0,95569.16667,227565.1033,364478.0967,0,770653.39,0,0),`506`=c(0,0,0,25778.4925,289966.155,0,0,0,0,20935.3925,0,0,0,0,0,0,0,0,0,0,0),`581`=c(131897.8625,0,100404.635,0,883894.2775,0,73022.6425,105393.055,0,142834.03,0,0,79358.81,1192346.16,0,160301.1775,0,0,0,0,0),`652`=c(1057886.688,1982200.798,321253.675,601117.7,4472375.41,59737.5275,797205.7125,2382608.513,449364.3925,3917538.72,51331.7675,206527.6425,1465000.365,3024429.003,232467.6875,2783451.168,2141222.723,82442.1325,1813534.675,40380.1675,559932.305),`733`=c(0,0,0,35943.15,159816.4767,0,1588.723333,70380.19333,0,109879.3467,0,49431.19333,73450.01667,196120.7467,0,92769.24,93007.26333,0,272181.6933,0,0),`818`=c(0,0,0,0,0,0,0,0,0,13581.89333,0,0,12132.77333,0,0,0,0,0,0,0,0),`896`=c(0,0,0,0,0,0,0,0,0,21898.0425,0,0,0,0,0,0,0,0,0,0,0),`972`=c(0,0,0,0,0,0,0,0,0,15417.325,0,0,0,19955.4325,0,0,0,0,0,0,0),`1039`=c(0,0,0,0,12918.05333,0,0,7435.02,0,10715.63667,0,0,9717.78,0,0,0,0,0,0,0,0)),.Names=c('10','34','59','84','110','134','165','199','234','257','362','433','506','581','652','733','818','896','972','1039'),class='data.frame',row.names=c(NA,-21L));
trunkci <- match('652',names(tbl_core_abu));
cis <- 1:ncol(tbl_core_abu);
tbl_core_abu[t(apply(tbl_core_abu==0,1,function(x) { x[cis<trunkci-match(T,rev(x[1:(trunkci-1)])) | cis>match(T,x[(trunkci+1):length(x)])+trunkci] <- T; x; }))] <- 0;
tbl_core_abu;
##    10 34 59 84 110 134 165 199 234 257      362      433       506        581        652        733      818      896      972     1039
## 1   0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00  131897.86 1057886.69      0.000     0.00     0.00     0.00     0.00
## 2   0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00 1982200.80      0.000     0.00     0.00     0.00     0.00
## 3   0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00  100404.63  321253.67      0.000     0.00     0.00     0.00     0.00
## 4   0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00  601117.70  35943.150     0.00     0.00     0.00     0.00
## 5   0  0  0  0   0   0   0   0   0   0      0.0      0.0 289966.16  883894.28 4472375.41 159816.477     0.00     0.00     0.00     0.00
## 6   0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00   59737.53      0.000     0.00     0.00     0.00     0.00
## 7   0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00   73022.64  797205.71   1588.723     0.00     0.00     0.00     0.00
## 8   0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00  105393.05 2382608.51  70380.193     0.00     0.00     0.00     0.00
## 9   0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00  449364.39      0.000     0.00     0.00     0.00     0.00
## 10  0  0  0  0   0   0   0   0   0   0 102277.7 290855.5  20935.39  142834.03 3917538.72 109879.347 13581.89 21898.04 15417.33 10715.64
## 11  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00   51331.77      0.000     0.00     0.00     0.00     0.00
## 12  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00  206527.64  49431.193     0.00     0.00     0.00     0.00
## 13  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00   79358.81 1465000.36  73450.017 12132.77     0.00     0.00     0.00
## 14  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00 1192346.16 3024429.00 196120.747     0.00     0.00     0.00     0.00
## 15  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00  232467.69      0.000     0.00     0.00     0.00     0.00
## 16  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00  160301.18 2783451.17  92769.240     0.00     0.00     0.00     0.00
## 17  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00 2141222.72  93007.263     0.00     0.00     0.00     0.00
## 18  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00   82442.13      0.000     0.00     0.00     0.00     0.00
## 19  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00 1813534.68 272181.693     0.00     0.00     0.00     0.00
## 20  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00   40380.17      0.000     0.00     0.00     0.00     0.00
## 21  0  0  0  0   0   0   0   0   0   0      0.0      0.0      0.00       0.00  559932.31      0.000     0.00     0.00     0.00     0.00

该解决方案通过首先计算表示输入data.frame的哪些单元为零的逻辑矩阵来工作。然后,它使用apply()循环逻辑矩阵的每一行。对于每一行,它标识“trunk列索引”(trunkci)的 left 的第一个索引,该索引为true,而 right 的第一个索引的trunk列索引为true,然后为这两个索引 之外的所有索引赋予true。这基本上标记为删除行中与主干列分开至少一个零单元的每个单元。在apply()调用返回后,必须使用t()修复转置(因为某些原因apply()始终反转输入矩阵的转置),最后我们可以索引tbl_core_abu使用生成的逻辑矩阵,并为标记为true的所有单元格指定零。

这是另一种解决方案,使用Rcpp

library('Rcpp');
cppFunction('
    LogicalMatrix trunkify(LogicalMatrix input, int trunkci ) {
        for (size_t r = 0; r < input.nrow(); ++r) {
            int c;
            for (c = trunkci-1; c >= 0; --c) if (input(r,c)) break;
            for (--c; c >= 0; --c) input(r,c) = TRUE;
            for (c = trunkci+1; c < input.ncol(); ++c) if (input(r,c)) break;
            for (++c; c < input.ncol(); ++c) input(r,c) = TRUE;
        }
        return input;
    }
');
tbl_core_abu[trunkify(tbl_core_abu==0,match('652',names(tbl_core_abu)))] <- 0;