我有一个包含多个列的数据
df<- structure(list(X1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), X2 = structure(c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 7L, 8L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L), .Label = c("B02", "B03", "B04",
"B05", "B06", "B07", "C02", "C03", "C04", "C05", "C06", "C07",
"D02", "D03", "D04", "D05", "D06", "D07", "G02", "G03", "G04",
"G05", "G06", "G07"), class = "factor"), X3 = c(0.005648642,
0.005876389, 0.00592532, 0.006244456, 0.005987075, 0.006075874,
0.006198667, 0.006003758, 0.006041885, 0.006186987, 0.006041323,
0.006071594, 0.005902391, 0.005976096, 0.00593805, 0.005866524,
0.0059831, 0.005902586, 0.005914309, 0.005887304, 0.006054509,
0.005931266, 0.005936195, 0.005895191, 0.005840959, 0.005849247,
0.005808851, 0.005833586, 0.005825153, 0.00584873, 0.005983976,
0.00598669, 0.006011548, 0.005997747, 0.005851022, 0.005919044,
0.005854566, 0.0058226, 0.00578052, 0.005784874, 0.005933198,
0.005996407, 0.005898848, 0.00595775, 0.005918857, 0.005882898,
0.005877808, 0.005803604, 0.006235161, 0.005808725), X4 = c(0.024054157,
0.025850824, 0.023122678, 0.042337945, 0.030468744, 0.026481616,
0.017430149, 0.024019931, 0.025572552, 0.024869532, 0.024148692,
0.025228634, 0.030078166, 0.025860944, 0.023530696, 0.029244585,
0.026599876, 0.023578245, 0.024014744, 0.023963795, 0.025466639,
0.02969377, 0.02307532, 0.022739164, 0.02860112, 0.022800416,
0.022287748, 0.022491258, 0.023340693, 0.024576665, 0.024378624,
0.030037462, 0.024904579, 0.025901291, 0.02912765, 0.024926085,
0.024044815, 0.023799791, 0.023147189, 0.021253484, 0.023979501,
0.029659496, 0.029393487, 0.02470603, 0.024562731, 0.023819856,
0.025065129, 0.023228642, 0.023919905, 0.024781896), X5 = c(0.00535592,
0.00555428, 0.00555428, 0.00572213, 0.00573739, 0.00575265, 0.00576791,
0.00572213, 0.00573739, 0.00572213, 0.00575265, 0.00576791, 0.0056611,
0.0056611, 0.00567636, 0.00563058, 0.0056611, 0.00564584, 0.00563058,
0.00561532, 0.00575265, 0.00569162, 0.00567636, 0.00564584, 0.00561532,
0.00560006, 0.00556954, 0.0055848, 0.00555428, 0.00556954, 0.00569162,
0.00573739, 0.00572213, 0.00567636, 0.00561532, 0.00561532, 0.0055848,
0.00553903, 0.00552377, 0.00549325, 0.0056611, 0.00572213, 0.0056611,
0.0056611, 0.00563058, 0.00561532, 0.0055848, 0.00553903, 0.00553903,
0.00550851)), .Names = c("X1", "X2", "X3", "X4", "X5"), class = "data.frame", row.names = c(NA,
-50L))
基本上,我试图根据几个值的平均值来修正每个值
我想取G02,G03,G04和G05的平均值,然后如果X1为1或2或其他数字
,则从该列的每个值中减去它 例如,让我们看看X3取平均值
0.005914309
0.005887304
0.006054509
0.005931266
平均值为0.005946847。然后我将X1从第一个值中减去,将X1作为1.它变为0.005648642 -0.005946847 = -0.000298205
然后从该列中X1为1的每个值中减去
当X1为2时,取G02,G03,G04,G05的平均值,当X1为2等时,从该列的每个值中减去
答案 0 :(得分:2)
以下是使用data.table
加入
library(data.table)
nm1 <- paste0("X", 3:5)
nm2 <- paste0("G0", 2:5)
dfN <- copy(df)
setDT(dfN)[dfN[X2 %in% nm2, lapply(.SD, function(x) mean(x)),
by = .(X1), .SDcols = X3:X5], (nm1) := Map(`-`, mget(nm1), mget(paste0("i.", nm1))),
on = .(X1)]
head(dfN, 10)
# X1 X2 X3 X4 X5
# 1: 1 B02 -0.000298205 -0.001730580 -0.0003166225
# 2: 1 B03 -0.000070458 0.000066087 -0.0001182625
# 3: 1 B04 -0.000021527 -0.002662059 -0.0001182625
# 4: 1 B05 0.000297609 0.016553208 0.0000495875
# 5: 1 B06 0.000040228 0.004684007 0.0000648475
# 6: 1 B07 0.000129027 0.000696879 0.0000801075
# 7: 1 C02 0.000251820 -0.008354588 0.0000953675
# 8: 1 C03 0.000056911 -0.001764806 0.0000495875
# 9: 1 C04 0.000095038 -0.000212185 0.0000648475
#10: 1 C05 0.000240140 -0.000915205 0.0000495875
答案 1 :(得分:1)
使用dplyr
的解决方案。 df2
是从G02
到G05
的平均值。 df3
是最终输出。
library(dplyr)
df2 <- df %>%
filter(X2 %in% paste0("G0", 2:5)) %>%
group_by(X1) %>%
summarise_at(vars(-X2), funs(mean(.))) %>%
gather(Col, Value, -X1)
df3 <- df %>%
group_by(X1) %>%
mutate(ID = 1:n()) %>%
gather(Col, Value, -ID, -X1, -X2) %>%
left_join(df2, by = c("X1", "Col")) %>%
mutate(Value = Value.x - Value.y) %>%
select(ID, X1, X2, Col, Value) %>%
spread(Col, Value) %>%
arrange(X1, ID) %>%
select(-ID) %>%
ungroup()
df3
# A tibble: 50 x 5
X1 X2 X3 X4 X5
<int> <fctr> <dbl> <dbl> <dbl>
1 1 B02 -0.000298205 -0.001730580 -0.0003166225
2 1 B03 -0.000070458 0.000066087 -0.0001182625
3 1 B04 -0.000021527 -0.002662059 -0.0001182625
4 1 B05 0.000297609 0.016553208 0.0000495875
5 1 B06 0.000040228 0.004684007 0.0000648475
6 1 B07 0.000129027 0.000696879 0.0000801075
7 1 C02 0.000251820 -0.008354588 0.0000953675
8 1 C03 0.000056911 -0.001764806 0.0000495875
9 1 C04 0.000095038 -0.000212185 0.0000648475
10 1 C05 0.000240140 -0.000915205 0.0000495875
# ... with 40 more rows