用 dplyr 中的汇总创建的平均值减去一个数据帧中的值

时间:2021-07-23 15:10:39

标签: r dplyr

我有两个数据框,一个是原始数据:

df  <-  structure(list(day = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2), Subject = c(1, 1, 1, 1, 1, 1, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 
12, 12, 13, 13, 13, 13, 13, 13, 17, 17, 17, 17, 17, 17), TimePoint = c(1, 
5, 9, 13, 17, 21, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 
1, 5, 9, 15, 17, 21, 1, 5, 9, 13, 17, 21, 1, 5, 9, 13, 17, 21, 
1, 5, 9, 13, 17, 21), C4b = c(489.1, 3757.5, 4013.7, 963.4, 668.8, 
2135.4, 1793.5, 2485.9, 1337.2, 1200.1, 1854.5, 1163.8, 1050.4, 
1514.7, 1134, 1283.9, 1883.2, 1160, 441.4, 318.4, 261.8, 392, 
380.9, 312, 1731.1, 1482.9, 1635.6, 1144.3, 1455.3, 1427.1, 1202.6, 
1212.7, 1294.8, 1406.6, 2153.3, 1135, 1104.5, 1129.3, 737.4, 
1010.9, 934.3, 704.2), `Coagulation Factor XI` = c(2149.4, 2055.8, 
2256.3, 2179, 1859.5, 2158.4, 1877.4, 1800.7, 1838.7, 1899.9, 
1867, 1827.9, 1818.7, 1847, 1807.7, 1737, 2394.9, 1726.2, 1756.3, 
1746.3, 1744.5, 1573.8, 1765.5, 1954.1, 1522.4, 1387.7, 1374.4, 
1357.3, 1433.3, 1386.5, 1575.7, 1531.8, 1560.9, 1810.4, 1657.2, 
1509.4, 2110.4, 1974.5, 1856.1, 1939.6, 1832.3, 1932.5), CTACK = c(798.9, 
857.5, 702.9, 707.6, 903.7, 749.9, 689.2, 734.1, 716.8, 645.2, 
641.2, 663.1, 733.3, 667.3, 774.3, 782.2, 1766.7, 679.2, 1950.6, 
2084.9, 2021.5, 1930.8, 1952.3, 1917.2, 723.9, 709.2, 670.7, 
716.4, 944.9, 655.7, 706.8, 734.2, 680.5, 720.4, 778.3, 790.2, 
962.5, 726.4, 761.9, 849.2, 701.1, 664.3), Endostatin = c(30563, 
30006.5, 25972.2, 28048.9, 24503.9, 30371.2, 49733.9, 54933.8, 
54293.3, 60007.4, 60403.9, 58870.4, 56801, 59752.6, 54336.3, 
47746, 21270.4, 67312.9, 61931.5, 66395.3, 65680.8, 64419.4, 
63415, 67230.7, 49444.2, 55122.2, 52333, 62328.7, 47513.9, 61530, 
52549.8, 53633, 53983.7, 49023.8, 47305.6, 50569.8, 58754.1, 
65727.9, 78382, 68290.8, 69386.8, 59982.4), `TIMP-1` = c(279.4, 
262.4, 295.9, 273.3, 294.9, 278.3, 279.4, 238.7, 224.2, 239.3, 
235.7, 251.7, 228.2, 237.3, 258.4, 262.4, 389.4, 237.8, 303.1, 
296.4, 295.4, 297, 317, 351.6, 310.4, 244.2, 250.8, 261.1, 283.1, 
251.4, 242.2, 223.3, 234.6, 291.2, 262.3, 249.7, 293.9, 259.4, 
238.9, 257.9, 248.2, 329.3), tPA = c(377.9, 326.9, 346.3, 318.3, 
314, 348.2, 291.2, 427.8, 433.1, 428.8, 370.9, 390, 354, 354.4, 
368.8, 364.7, 595.2, 400.1, 354, 356.8, 368.7, 363.5, 394.3, 
412.3, 319, 406.9, 415.4, 292.3, 320.9, 348.2, 532, 457.4, 417.3, 
404, 401.5, 533.5, 213.6, 346.7, 310.7, 292.6, 280.2, 287.1), 
    `EG-VEGF` = c(106.8, 102.2, 107, 108.1, 138.8, 107.5, 88.1, 
    85.7, 82.7, 91.7, 98.8, 91.1, 82.3, 90.3, 89.2, 94.9, 128.3, 
    94.6, 529.1, 679.3, 599.4, 644.5, 674.1, 584.5, 98.2, 101.1, 
    97.3, 95.3, 86.8, 92.5, 101.9, 94.9, 109, 94.1, 103.8, 98.2, 
    92, 98.8, 91.6, 92.9, 98.8, 105.8), `TIMP-2` = c(466.5, 420.7, 
    479.8, 423, 462.7, 393.1, 400.5, 380.3, 355.9, 393.7, 423.6, 
    401.6, 364.2, 372.8, 391.3, 480.6, 1171.4, 357.8, 533.7, 
    522.6, 523.1, 511.5, 511, 644.8, 405.3, 398.2, 393, 369.7, 
    427.8, 380, 396.3, 394.5, 407, 490.9, 435.6, 402.7, 424.3, 
    436, 391.6, 405.6, 414.2, 536.2), `TGF-b1` = c(1451.2, 1194.6, 
    1521.3, 1705.8, 1945.4, 1900.2, 1839.4, 1701.9, 1752.9, 1714.6, 
    1597.6, 1660.5, 1786.9, 1694, 1519.6, 1906.1, 1654.2, 1636, 
    1566.8, 1730.9, 1496.2, 1850.9, 1715.1, 1648.8, 1834.9, 1686, 
    1769.4, 1750.8, 1833, 1862.6, 1511, 1505.9, 1524.9, 1514, 
    1391.2, 1369.1, 2064.6, 1790.9, 1799.1, 1548.2, 1839, 1667
    ), `VEGF sR3` = c(3094.3, 3235, 3116.3, 3027.1, 2536, 3061.8, 
    3215.7, 3189.6, 3139.1, 3618.8, 3585.6, 3556, 3326.7, 3167.5, 
    3403.3, 2583.1, 1621.6, 3415.2, 2293.9, 2508.4, 2662.6, 2486.2, 
    2538.8, 2452.5, 3012.5, 2920.1, 2914.1, 2870, 2455.2, 2809.3, 
    3534.8, 3501.9, 3691.4, 3517.6, 3414, 3405.6, 1943.3, 2334.2, 
    2137.4, 2185.7, 2273.2, 2304.9), C5 = c(5566.9, 5466.7, 5591.6, 
    5552.7, 5348.3, 5388.4, 5499.1, 5834.1, 5556.7, 5737.5, 5632.2, 
    5886.4, 5543.2, 6134.6, 5597.3, 5557.9, 5446.3, 5863.7, 5699.8, 
    5263.7, 5773.3, 5313.7, 6014.2, 5453.3, 6284.5, 5658.5, 5491.2, 
    5855.1, 5477.2, 5815.2, 5938.1, 5660.6, 6190.9, 5626.5, 6248.6, 
    5673, 6071.4, 6262.5, 5649.1, 6052, 6000.4, 5649), `Apo E` = c(3351.8, 
    936.5, 928.1, 3096.5, 2267.6, 2217.1, 2143.2, 2547.1, 2368.6, 
    2531.5, 1922.4, 2134.3, 2283.7, 2131, 2260.3, 2249.8, 2001.2, 
    2271.8, 3635.3, 2338.7, 2301.2, 2705, 2604.3, 2738.5, 5091.4, 
    3638.9, 2710.4, 3605, 3683, 3016.1, 3698.2, 3050.5, 2162.1, 
    2086.9, 2500.5, 2449.1, 4416, 2628.9, 2902.4, 3100.8, 2433.7, 
    3083.3), BDNF = c(325, 324, 337.9, 436.6, 557.3, 379.5, 556.2, 
    458.3, 400.3, 447.4, 448.1, 450.3, 425.1, 456.4, 482.1, 585, 
    663.2, 424.9, 343.8, 337.2, 328.1, 417.9, 343.2, 386.1, 590.6, 
    359.9, 340.3, 360.5, 394.1, 374.6, 367.9, 294.1, 319.4, 320, 
    366.2, 372.1, 502.1, 403.1, 432.9, 397.3, 389, 364.5), `bFGF-R` = c(12689.3, 
    13327.1, 12159.8, 11959.7, 11430.1, 11628.7, 11430.7, 12808.9, 
    11439, 12836.8, 13735.2, 12351.3, 11754, 12071.3, 11841.7, 
    10368.6, 12122.4, 12371.7, 11184.5, 11499.1, 11687.9, 10997.4, 
    11006.7, 10709.3, 11615, 12553.3, 11459.9, 12403, 10952, 
    12060.3, 13330.8, 12688.8, 13717.2, 11868.7, 11919.1, 11584.5, 
    10987.7, 12370.3, 11619.4, 11737.2, 12695.7, 12403.2), C8 = c(1868.7, 
    1678.4, 1916.9, 1966.6, 1724.9, 2028, 1840.3, 1797.4, 1871.4, 
    1967.9, 1926.4, 1860.2, 1860.4, 1901.4, 1951.1, 1675.4, 1026, 
    2024.3, 1950.6, 2174.5, 2266.5, 2251.5, 2176.4, 2067.2, 1968, 
    2187.1, 2360.5, 2057.5, 1971.1, 2202.2, 2099.5, 2337.2, 2320.8, 
    2232.9, 2358.2, 2374.8, 2071.3, 2773.3, 2695.9, 2641.2, 2738.7, 
    2607), `Cathepsin G` = c(1434.9, 1347, 1410.5, 1421.1, 2318.2, 
    1675, 1025.9, 1217.4, 1021.9, 1088.4, 1034.2, 1099.8, 1080.1, 
    1051.2, 967.8, 3874.4, 1875.6, 1058.9, 1093.7, 1083.2, 922.1, 
    1204.4, 1126.1, 1157.9, 1642.2, 1528.4, 1627.3, 1811.6, 1498.4, 
    1759.8, 926.3, 1022.4, 1106.7, 1058.7, 999.1, 995.3, 1020.5, 
    1085.4, 1088.9, 961.9, 1135.7, 1080.7), `CXCL16, soluble` = c(6878.8, 
    7479.3, 7088.7, 6739.4, 6143.9, 7477.5, 6742, 6822.4, 6164.9, 
    7320.9, 7459.1, 7150.5, 6407.1, 6504, 6901.8, 5929.6, 3924.1, 
    7827.3, 7358.7, 7758.3, 8305.5, 7586.9, 7829.1, 8268.3, 6860.7, 
    7470.7, 6857, 7003.7, 6364.7, 7738.7, 5441.6, 5609.3, 5732.5, 
    5128.2, 5642.7, 5402.4, 5170.2, 6272, 5963, 5996.4, 6461, 
    6721.3), `FGF-10` = c(255.2, 210, 262.6, 303.6, 219.4, 301.6, 
    241.2, 283.5, 248.6, 260.1, 265.7, 299.5, 268.9, 265.3, 201.3, 
    190.3, 296.6, 239.5, 242.7, 287.3, 216.2, 288.8, 283.8, 246.7, 
    255.1, 268.6, 236.3, 258.4, 206.8, 259.4, 264.1, 233.2, 277.1, 
    205.7, 200, 193.1, 251, 228, 229.9, 213.6, 247.2, 235.2), 
    `FGF-8B` = c(712.8, 648.4, 704.5, 763.9, 837.4, 795.9, 836.1, 
    841.7, 872.4, 860.1, 921.1, 845.8, 843.6, 875.2, 850, 847.4, 
    934.7, 938.6, 715.6, 823.9, 733.3, 801.1, 800.3, 819.9, 791.1, 
    930.8, 938.3, 869.6, 794.8, 939.9, 925.9, 971.2, 1015.9, 
    928.8, 851.9, 827, 833.6, 837.7, 858, 828.2, 935.7, 900.5
    ), GIIE = c(270, 254.6, 268.7, 279, 224.4, 293.2, 239.8, 
    238, 222.9, 260.2, 282.3, 262.3, 239, 241, 238.7, 200.6, 
    207.3, 252, 270.8, 281.1, 354.2, 289.2, 293, 295.2, 274.5, 
    291.4, 270.6, 275.8, 232, 272.7, 267, 245.3, 278.3, 260.6, 
    264, 250.6, 232.9, 274.4, 256.2, 254.1, 271.2, 282), GV = c(433.7, 
    441.1, 438.9, 468, 425.6, 459.2, 317.6, 332.2, 326, 306.4, 
    307.4, 310.6, 347.9, 317, 273.1, 325.9, 798.4, 299.1, 327.8, 
    307.9, 258.6, 308.7, 306.6, 298.6, 319.2, 326.2, 299.8, 329, 
    436.5, 297.7, 320.9, 306.6, 314.1, 312.1, 298.1, 300.1, 417, 
    306.6, 314.7, 321.4, 304.8, 305), `IL-12` = c(725.7, 667, 
    734.8, 772.8, 1045.1, 829.4, 659.4, 695.3, 653.8, 672.8, 
    701.1, 658.1, 683.5, 670.8, 678.8, 1002.5, 991.4, 703.8, 
    667.9, 714.1, 630.6, 720, 689.8, 781.4, 671.2, 715.2, 748.9, 
    693.2, 723.5, 724.7, 868.6, 891.2, 917.9, 858.8, 868.9, 828.6, 
    744.7, 711.9, 715.1, 683.2, 740.2, 724.3)), row.names = c(NA, 
-42L), class = c("tbl_df", "tbl", "data.frame"))

我的原始数据包含大量变量 (1,130),其中我使用 library(dplyr) 汇总函数按时间点为每一列创建平均值。

df_mean <- df %>%
                        group_by(TimePoint) %>%
                        summarise_at(vars(C4b:GV), mean, na.rm = T)

现在我想创建一个新的数据框,它只是 df 中的原始值与所有变量的每个时间点的平均值之间的差异,因此本质上是 df - df_mean = 新数据框,变量“day”除外,“主题”和“时间点”。

谢谢!

2 个答案:

答案 0 :(得分:1)

  1. R Base 中的 scale() 函数,居中选项的 scale = FALSE
  2. mutate_at() 一次应用于多个变量
  3. vars() 选择变量列表
  4. !选择变量的补充 - 您不希望将其应用于 day 和 Subject
library(tidyverse)
df_centered <- df %>%
    group_by(TimePoint) %>%
    mutate_at(vars(!c("day", "Subject")), ~scale(., scale = FALSE), na.rm = TRUE)

答案 1 :(得分:1)

我试过这样的东西,希望它有帮助。

df1 <- df[,-c(1,2,25)]

df_mm <- full_join(df1, df_mean, by = "TimePoint")

df_ss <- NULL
names <- colnames(df)[-length(df)]

for(i in 2:length(df1)){
   df_ss[[i]] <- (df_mm[,i] - df_mm[,length(df1)+i-1])
}

df_ss[1] <- NULL
df_ss <- do.call(cbind, df_ss)
df_ss <- mutate(df_ss, 
                day = df$day,
                Subject = df$Subject,
                TimePoint = df$TimePoint)
df_ss <- df_ss[,c(22:24,1:21)]
colnames(df_ss) <- names
View(df_ss)
相关问题