我如何从另一个数据帧值一个数据帧的单元格

时间:2018-12-28 06:18:02

标签: r

我有一个看起来像这样的数据框:

print(Evaluation_statistics)
           Dataframe   Correct    Incorrect      Missing Taxlevel
1  SSU132_DIV_FC_FL6 0.9988139 0.0004367687 0.0007493188        1
2  SSU132_DIV_FC_FL6 0.9966982 0.0009376503 0.0023641609        2
3  SSU132_DIV_FC_FL6 0.9766509 0.0037626222 0.0195864722        3
4  SSU132_DIV_FC_FL6 0.9618368 0.0044237859 0.0337393813        4
5  SSU132_DIV_FC_FL6 0.9326855 0.0085350216 0.0587794518        5
6  SSU132_DIV_FC_FL6 0.8627184 0.0189132874 0.1183683283        6
7  SSU132_DIV_FC_FL6 0.3125260 0.0935726879 0.5939012662        7
8         SSU132_DIV 0.9995454 0.0001640075 0.0002906098        1
9         SSU132_DIV 0.9966292 0.0012156699 0.0021551166        2
10        SSU132_DIV 0.9813463 0.0040929952 0.0145607044        3
11        SSU132_DIV 0.9585493 0.0074407631 0.0340099843        4
12        SSU132_DIV 0.9243350 0.0114445611 0.0642204607        5
13        SSU132_DIV 0.8491361 0.0228517170 0.1280121999        6
14        SSU132_DIV 0.3572847 0.0851227899 0.5575925420        7
15     SSU132_DIV_FC 0.9995267 0.0001640075 0.0003093125        1
16     SSU132_DIV_FC 0.9965745 0.0012501978 0.0021752579        2
17     SSU132_DIV_FC 0.9811147 0.0042167201 0.0146686041        3
18     SSU132_DIV_FC 0.9587305 0.0073055288 0.0339639471        4
19     SSU132_DIV_FC 0.9241537 0.0116071300 0.0642391633        5
20     SSU132_DIV_FC 0.8488699 0.0230229179 0.1281071516        6
21     SSU132_DIV_FC 0.3583032 0.0850882620 0.5566084967        7

我具有每个税级的平均值,如下所示:

print(agg)

  Dataframe   Correct    Incorrect     Missing Taxlevel
1        NA 0.9992953 0.0002549279 0.000449747        1
2        NA 0.9966340 0.0011345060 0.002231512        2
3        NA 0.9797040 0.0040241125 0.016271927        3
4        NA 0.9597055 0.0063900259 0.033904438        4
5        NA 0.9270581 0.0105289043 0.062413025        5
6        NA 0.8535748 0.0215959741 0.124829227        6
7        NA 0.3427047 0.0879279132 0.569367435        7

如何根据税额从数据框“ agg”中减去数据框“ evaluation_statistics”中“正确”,“不正确”和“缺失”列的每个单元格的值,同时保留看起来类似于数据框“ Evaluation_statistics”。

2 个答案:

答案 0 :(得分:2)

在基数R中,我们可以使用match从要减去值的位置获取行号

cols <- c("Correct","Incorrect","Missing")
inds <- match(Evaluation_statistics$Taxlevel, agg$Taxlevel)
Evaluation_statistics[cols] - agg[inds, cols]


#      Correct     Incorrect       Missing
#1  -0.0004814  0.0001818408  0.0002995718
#2   0.0000642 -0.0001968557  0.0001326489
#3  -0.0030531 -0.0002614903  0.0033145452
#4   0.0021313 -0.0019662400 -0.0001650567
#5   0.0056274 -0.0019938827 -0.0036335732
#6   0.0091436 -0.0026826867 -0.0064608987
#7  -0.0301787  0.0056447747  0.0245338312
#......

如果要替换原始Evaluation_statistics数据框中的这些值,请执行

Evaluation_statistics[cols] <- Evaluation_statistics[cols] - agg[inds, cols]

答案 1 :(得分:0)

我们可以在'Taxlevel'上进行联接,然后减去相应列('nm1')的值,将输出分配(:=)回到'Evaluation_statistics'中的相同列

library(data.table)
nm1 <- c("Correct", "Incorrect", "Missing")
setDT(Evaluation_statistics)[agg, (nm1) := 
       Map(`-`, mget(nm1), mget(paste0("i.", nm1))), on = .(Taxlevel)]
Evaluation_statistics
#             Dataframe    Correct     Incorrect       Missing Taxlevel
# 1: SSU132_DIV_FC_FL6 -0.0004814  0.0001818408  0.0002995718        1
# 2: SSU132_DIV_FC_FL6  0.0000642 -0.0001968557  0.0001326489        2
# 3: SSU132_DIV_FC_FL6 -0.0030531 -0.0002614903  0.0033145452        3
# 4: SSU132_DIV_FC_FL6  0.0021313 -0.0019662400 -0.0001650567        4
# 5: SSU132_DIV_FC_FL6  0.0056274 -0.0019938827 -0.0036335732        5
# 6: SSU132_DIV_FC_FL6  0.0091436 -0.0026826867 -0.0064608987        6
# 7: SSU132_DIV_FC_FL6 -0.0301787  0.0056447747  0.0245338312        7
# 8:        SSU132_DIV  0.0002501 -0.0000909204 -0.0001591372        1
# 9:        SSU132_DIV -0.0000048  0.0000811639 -0.0000763954        2
#10:        SSU132_DIV  0.0016423  0.0000688827 -0.0017112226        3
#11:        SSU132_DIV -0.0011562  0.0010507372  0.0001055463        4
#12:        SSU132_DIV -0.0027231  0.0009156568  0.0018074357        5
#13:        SSU132_DIV -0.0044387  0.0012557429  0.0031829729        6
#14:        SSU132_DIV  0.0145800 -0.0028051233 -0.0117748930        7
#15:     SSU132_DIV_FC  0.0002314 -0.0000909204 -0.0001404345        1
#16:     SSU132_DIV_FC -0.0000595  0.0001156918 -0.0000562541        2
#17:     SSU132_DIV_FC  0.0014107  0.0001926076 -0.0016033229        3
#18:     SSU132_DIV_FC -0.0009750  0.0009155029  0.0000595091        4
#19:     SSU132_DIV_FC -0.0029044  0.0010782257  0.0018261383        5
#20:     SSU132_DIV_FC -0.0047049  0.0014269438  0.0032779246        6
#21:     SSU132_DIV_FC  0.0155985 -0.0028396512 -0.0127589383        7

注意:在这里,我们正在更新。应该很快


base R中,如果顺序相同,则复制“ agg”的行以使维度相同并减去两个相等大小的数据集会更容易

ind <- rep(seq_len(nrow(agg)), 3)
Evaluation_statistics[nm1] <-  Evaluation_statistics[nm1] -   agg[ind, nm1]

数据

Evaluation_statistics <- structure(list(Dataframe = 
 c("SSU132_DIV_FC_FL6", "SSU132_DIV_FC_FL6", 
"SSU132_DIV_FC_FL6", "SSU132_DIV_FC_FL6", "SSU132_DIV_FC_FL6", 
"SSU132_DIV_FC_FL6", "SSU132_DIV_FC_FL6", "SSU132_DIV", "SSU132_DIV", 
"SSU132_DIV", "SSU132_DIV", "SSU132_DIV", "SSU132_DIV", "SSU132_DIV", 
"SSU132_DIV_FC", "SSU132_DIV_FC", "SSU132_DIV_FC", "SSU132_DIV_FC", 
"SSU132_DIV_FC", "SSU132_DIV_FC", "SSU132_DIV_FC"), Correct = c(0.9988139, 
0.9966982, 0.9766509, 0.9618368, 0.9326855, 0.8627184, 0.312526, 
0.9995454, 0.9966292, 0.9813463, 0.9585493, 0.924335, 0.8491361, 
0.3572847, 0.9995267, 0.9965745, 0.9811147, 0.9587305, 0.9241537, 
0.8488699, 0.3583032), Incorrect = c(0.0004367687, 0.0009376503, 
0.0037626222, 0.0044237859, 0.0085350216, 0.0189132874, 0.0935726879, 
0.0001640075, 0.0012156699, 0.0040929952, 0.0074407631, 0.0114445611, 
0.022851717, 0.0851227899, 0.0001640075, 0.0012501978, 0.0042167201, 
0.0073055288, 0.01160713, 0.0230229179, 0.085088262), Missing = c(0.0007493188, 
0.0023641609, 0.0195864722, 0.0337393813, 0.0587794518, 0.1183683283, 
0.5939012662, 0.0002906098, 0.0021551166, 0.0145607044, 0.0340099843, 
0.0642204607, 0.1280121999, 0.557592542, 0.0003093125, 0.0021752579, 
0.0146686041, 0.0339639471, 0.0642391633, 0.1281071516, 0.5566084967
), Taxlevel = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21"))

agg <- structure(list(Dataframe = c(NA, NA, NA, NA, NA, NA, NA),
 Correct = c(0.9992953, 
0.996634, 0.979704, 0.9597055, 0.9270581, 0.8535748, 0.3427047
), Incorrect = c(0.0002549279, 0.001134506, 0.0040241125, 0.0063900259, 
0.0105289043, 0.0215959741, 0.0879279132), Missing = c(0.000449747, 
0.002231512, 0.016271927, 0.033904438, 0.062413025, 0.124829227, 
0.569367435), Taxlevel = 1:7), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7"))