我有一个看起来像这样的数据框:
print(Evaluation_statistics)
Dataframe Correct Incorrect Missing Taxlevel
1 SSU132_DIV_FC_FL6 0.9988139 0.0004367687 0.0007493188 1
2 SSU132_DIV_FC_FL6 0.9966982 0.0009376503 0.0023641609 2
3 SSU132_DIV_FC_FL6 0.9766509 0.0037626222 0.0195864722 3
4 SSU132_DIV_FC_FL6 0.9618368 0.0044237859 0.0337393813 4
5 SSU132_DIV_FC_FL6 0.9326855 0.0085350216 0.0587794518 5
6 SSU132_DIV_FC_FL6 0.8627184 0.0189132874 0.1183683283 6
7 SSU132_DIV_FC_FL6 0.3125260 0.0935726879 0.5939012662 7
8 SSU132_DIV 0.9995454 0.0001640075 0.0002906098 1
9 SSU132_DIV 0.9966292 0.0012156699 0.0021551166 2
10 SSU132_DIV 0.9813463 0.0040929952 0.0145607044 3
11 SSU132_DIV 0.9585493 0.0074407631 0.0340099843 4
12 SSU132_DIV 0.9243350 0.0114445611 0.0642204607 5
13 SSU132_DIV 0.8491361 0.0228517170 0.1280121999 6
14 SSU132_DIV 0.3572847 0.0851227899 0.5575925420 7
15 SSU132_DIV_FC 0.9995267 0.0001640075 0.0003093125 1
16 SSU132_DIV_FC 0.9965745 0.0012501978 0.0021752579 2
17 SSU132_DIV_FC 0.9811147 0.0042167201 0.0146686041 3
18 SSU132_DIV_FC 0.9587305 0.0073055288 0.0339639471 4
19 SSU132_DIV_FC 0.9241537 0.0116071300 0.0642391633 5
20 SSU132_DIV_FC 0.8488699 0.0230229179 0.1281071516 6
21 SSU132_DIV_FC 0.3583032 0.0850882620 0.5566084967 7
我具有每个税级的平均值,如下所示:
print(agg)
Dataframe Correct Incorrect Missing Taxlevel
1 NA 0.9992953 0.0002549279 0.000449747 1
2 NA 0.9966340 0.0011345060 0.002231512 2
3 NA 0.9797040 0.0040241125 0.016271927 3
4 NA 0.9597055 0.0063900259 0.033904438 4
5 NA 0.9270581 0.0105289043 0.062413025 5
6 NA 0.8535748 0.0215959741 0.124829227 6
7 NA 0.3427047 0.0879279132 0.569367435 7
如何根据税额从数据框“ agg”中减去数据框“ evaluation_statistics”中“正确”,“不正确”和“缺失”列的每个单元格的值,同时保留看起来类似于数据框“ Evaluation_statistics”。
答案 0 :(得分:2)
在基数R中,我们可以使用match
从要减去值的位置获取行号
cols <- c("Correct","Incorrect","Missing")
inds <- match(Evaluation_statistics$Taxlevel, agg$Taxlevel)
Evaluation_statistics[cols] - agg[inds, cols]
# Correct Incorrect Missing
#1 -0.0004814 0.0001818408 0.0002995718
#2 0.0000642 -0.0001968557 0.0001326489
#3 -0.0030531 -0.0002614903 0.0033145452
#4 0.0021313 -0.0019662400 -0.0001650567
#5 0.0056274 -0.0019938827 -0.0036335732
#6 0.0091436 -0.0026826867 -0.0064608987
#7 -0.0301787 0.0056447747 0.0245338312
#......
如果要替换原始Evaluation_statistics
数据框中的这些值,请执行
Evaluation_statistics[cols] <- Evaluation_statistics[cols] - agg[inds, cols]
答案 1 :(得分:0)
我们可以在'Taxlevel'上进行联接,然后减去相应列('nm1')的值,将输出分配(:=
)回到'Evaluation_statistics'中的相同列
library(data.table)
nm1 <- c("Correct", "Incorrect", "Missing")
setDT(Evaluation_statistics)[agg, (nm1) :=
Map(`-`, mget(nm1), mget(paste0("i.", nm1))), on = .(Taxlevel)]
Evaluation_statistics
# Dataframe Correct Incorrect Missing Taxlevel
# 1: SSU132_DIV_FC_FL6 -0.0004814 0.0001818408 0.0002995718 1
# 2: SSU132_DIV_FC_FL6 0.0000642 -0.0001968557 0.0001326489 2
# 3: SSU132_DIV_FC_FL6 -0.0030531 -0.0002614903 0.0033145452 3
# 4: SSU132_DIV_FC_FL6 0.0021313 -0.0019662400 -0.0001650567 4
# 5: SSU132_DIV_FC_FL6 0.0056274 -0.0019938827 -0.0036335732 5
# 6: SSU132_DIV_FC_FL6 0.0091436 -0.0026826867 -0.0064608987 6
# 7: SSU132_DIV_FC_FL6 -0.0301787 0.0056447747 0.0245338312 7
# 8: SSU132_DIV 0.0002501 -0.0000909204 -0.0001591372 1
# 9: SSU132_DIV -0.0000048 0.0000811639 -0.0000763954 2
#10: SSU132_DIV 0.0016423 0.0000688827 -0.0017112226 3
#11: SSU132_DIV -0.0011562 0.0010507372 0.0001055463 4
#12: SSU132_DIV -0.0027231 0.0009156568 0.0018074357 5
#13: SSU132_DIV -0.0044387 0.0012557429 0.0031829729 6
#14: SSU132_DIV 0.0145800 -0.0028051233 -0.0117748930 7
#15: SSU132_DIV_FC 0.0002314 -0.0000909204 -0.0001404345 1
#16: SSU132_DIV_FC -0.0000595 0.0001156918 -0.0000562541 2
#17: SSU132_DIV_FC 0.0014107 0.0001926076 -0.0016033229 3
#18: SSU132_DIV_FC -0.0009750 0.0009155029 0.0000595091 4
#19: SSU132_DIV_FC -0.0029044 0.0010782257 0.0018261383 5
#20: SSU132_DIV_FC -0.0047049 0.0014269438 0.0032779246 6
#21: SSU132_DIV_FC 0.0155985 -0.0028396512 -0.0127589383 7
注意:在这里,我们正在更新。应该很快
在base R
中,如果顺序相同,则复制“ agg”的行以使维度相同并减去两个相等大小的数据集会更容易
ind <- rep(seq_len(nrow(agg)), 3)
Evaluation_statistics[nm1] <- Evaluation_statistics[nm1] - agg[ind, nm1]
Evaluation_statistics <- structure(list(Dataframe =
c("SSU132_DIV_FC_FL6", "SSU132_DIV_FC_FL6",
"SSU132_DIV_FC_FL6", "SSU132_DIV_FC_FL6", "SSU132_DIV_FC_FL6",
"SSU132_DIV_FC_FL6", "SSU132_DIV_FC_FL6", "SSU132_DIV", "SSU132_DIV",
"SSU132_DIV", "SSU132_DIV", "SSU132_DIV", "SSU132_DIV", "SSU132_DIV",
"SSU132_DIV_FC", "SSU132_DIV_FC", "SSU132_DIV_FC", "SSU132_DIV_FC",
"SSU132_DIV_FC", "SSU132_DIV_FC", "SSU132_DIV_FC"), Correct = c(0.9988139,
0.9966982, 0.9766509, 0.9618368, 0.9326855, 0.8627184, 0.312526,
0.9995454, 0.9966292, 0.9813463, 0.9585493, 0.924335, 0.8491361,
0.3572847, 0.9995267, 0.9965745, 0.9811147, 0.9587305, 0.9241537,
0.8488699, 0.3583032), Incorrect = c(0.0004367687, 0.0009376503,
0.0037626222, 0.0044237859, 0.0085350216, 0.0189132874, 0.0935726879,
0.0001640075, 0.0012156699, 0.0040929952, 0.0074407631, 0.0114445611,
0.022851717, 0.0851227899, 0.0001640075, 0.0012501978, 0.0042167201,
0.0073055288, 0.01160713, 0.0230229179, 0.085088262), Missing = c(0.0007493188,
0.0023641609, 0.0195864722, 0.0337393813, 0.0587794518, 0.1183683283,
0.5939012662, 0.0002906098, 0.0021551166, 0.0145607044, 0.0340099843,
0.0642204607, 0.1280121999, 0.557592542, 0.0003093125, 0.0021752579,
0.0146686041, 0.0339639471, 0.0642391633, 0.1281071516, 0.5566084967
), Taxlevel = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21"))
agg <- structure(list(Dataframe = c(NA, NA, NA, NA, NA, NA, NA),
Correct = c(0.9992953,
0.996634, 0.979704, 0.9597055, 0.9270581, 0.8535748, 0.3427047
), Incorrect = c(0.0002549279, 0.001134506, 0.0040241125, 0.0063900259,
0.0105289043, 0.0215959741, 0.0879279132), Missing = c(0.000449747,
0.002231512, 0.016271927, 0.033904438, 0.062413025, 0.124829227,
0.569367435), Taxlevel = 1:7), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7"))