我有一个数据框x
,在每个时间点(n = 100)时间点包含每组(n = 100组)的一行数据:
# group time Qfr
#1.1 1 1 0.00475
#1.2 2 1 0.00543
#1.3 3 1 0.00586
...
#10.1 1 100 0.00599
#10.2 2 100 0.00641
#10.3 3 100 0.00639
我使用概率密度函数计算了一组中从time=0
到每个时间点i
的数值积分:
probability density for one group
在用100组复制数据帧之前,我计算了100个时间点中每个时间点的概率密度,如下所示:
x$Pfr <- sapply(1:100, function(i) trapz(x$time[1:i],x$Qfr[1:i]))
在复制的数据框中,我[故意!]对每个组都有不同的Qfr
值:
probability density for 100 groups
我一直在尝试计算每组中每个时间点的概率。到目前为止,我已经得到了答案,但他们只是给了我与第一组相同的答案。我也转而使用plyr
,因为它似乎更容易。这是我得到的最接近的,但没有成功计算每组中每个时间点的AUC:
x$Pfr.a <- ddply(x, "group", function(x) {
Pfr.a <- ldply(1:100, function(i) trapz(x$time[1:i],x$Qfr[1:i]))})
我还尝试了lapply(1:seq_along(x$group)
的一些变体,但收到错误:numerical expression has 10000 elements: only the first used
。
感谢您的帮助。
下面的子集数据框(不确定有用,'组'中的级别看起来有点时髦?!):
dput(head(dx, 200))
structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("1", "10", "100", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "2", "20", "21", "22", "23", "24", "25",
"26", "27", "28", "29", "3", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "4", "40", "41", "42", "43", "44", "45",
"46", "47", "48", "49", "5", "50", "51", "52", "53", "54", "55",
"56", "57", "58", "59", "6", "60", "61", "62", "63", "64", "65",
"66", "67", "68", "69", "7", "70", "71", "72", "73", "74", "75",
"76", "77", "78", "79", "8", "80", "81", "82", "83", "84", "85",
"86", "87", "88", "89", "9", "90", "91", "92", "93", "94", "95",
"96", "97", "98", "99"), class = "factor"), time1 = c(1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
99, 100), Qfr = c(0.0047483156533351, 0.00463590951803145, 0.00464211399782159,
0.00464503937974946, 0.00416004347704241, 0.00438197709286947,
0.00431759459957717, 0.00398730140493329, 0.00397706671079854,
0.00262493451933476, 0.00335399522396657, 0.00295667176648455,
0.00339798255045183, 0.00286198435791452, 0.00151335514001775,
0.00309227850666867, 0.00123886729097462, 0.00199250455202349,
0.00245754728547869, 0.00248380670396705, 0.000191936980024834,
0.00144436545780792, 0.00159482527163271, 0.00143249280691113,
0.000484372476157479, 0.000380442251991916, 2.18212361823795e-05,
0.000993709932506735, 0.000258154218317037, 0.000343103930118674,
9.90473630970747e-05, 0.000268793478538768, 0.000113110005984463,
3.70177657941174e-06, 0.000583526736386266, 0.000212063283055276,
0.000353818440596741, 6.0628579693711e-05, 4.49484396692589e-06,
1.3884356044914e-08, 1.49981290319997e-05, 0.000235896784127904,
9.92119974972607e-06, 1.51610269536329e-05, 1.85284651831128e-05,
5.96934962865593e-05, 7.78636269791861e-05, 1.47034930193804e-08,
5.47829801645376e-08, 3.11818699737472e-11, 9.20136676872913e-09,
5.30659379752264e-05, 4.74680461307365e-06, 9.73018297065787e-06,
4.66149280895574e-09, 2.82380262693817e-07, 2.06323844185656e-10,
8.49243682505304e-11, 6.17181366962855e-13, 5.75141855935289e-08,
1.89886638408073e-07, 1.57066746036808e-05, 2.15874613645258e-06,
1.27708681080894e-09, 6.67373587258097e-15, 1.18043395056545e-11,
3.19101038270704e-14, 1.89303135643209e-14, 2.54797586210295e-10,
2.13476349357714e-07, 1.34298640394892e-08, 2.26033813126483e-11,
2.16190422086398e-09, 6.47479253518485e-07, 4.35097460390434e-09,
6.55021523564957e-07, 0, 0, 3.84296329606427e-07, 1.09089405342197e-15,
1.4918132022553e-11, 9.66033421059101e-11, 3.16966795183138e-18,
7.17069226240885e-13, 2.03406644459584e-09, 0, 6.82986109849662e-09,
6.86761389563466e-17, 1.54878117946893e-10, 0, 0, 2.4001726289784e-11,
1.97333213109784e-10, 5.73033703451756e-14, 3.16966795183138e-17,
3.20176929229154e-12, 5.79052268958693e-11, 1.75321080525751e-10,
4.01491273898641e-16, 1.7142620839488e-15, 0.00709757914338659,
0.00706109716460003, 0.00665150008861973, 0.00686079941928745,
0.00682148691114509, 0.00543463618079141, 0.00656917473704193,
0.00516529035434138, 0.00582140269841734, 0.00500819911322036,
0.00577973602471336, 0.00226084428133863, 0.00508154665239833,
0.00183388797366075, 0.00244601975659835, 0.00419557683437959,
0.00278749187328208, 0.00409478497882702, 0.000364315724417681,
0.00149110061243606, 0.00121575627536572, 0.000462369015351559,
0.00253282581588482, 0.00189911970413857, 0.0012376528556745,
0.0020950095059074, 0.000689082866541792, 0.000854210316998352,
0.00105160571435329, 5.19929007887153e-05, 8.38955291047843e-05,
0.000411217583621136, 1.4199088966465e-05, 0.000179094881856513,
3.03675453964957e-06, 2.71370058754111e-05, 0.000151740491334497,
1.35118845984708e-05, 4.05998219507724e-05, 3.14016538352446e-07,
0.000256470226523964, 0.000263069174314681, 0.000402435776150782,
0.000244490268648669, 8.76325094234176e-05, 1.16471745957847e-05,
1.04777654042319e-05, 0.000128010867031077, 5.37084270348237e-05,
2.71131833361889e-08, 1.85296893139987e-06, 1.95480398901419e-08,
2.79980204848407e-08, 6.82242719264267e-08, 3.0747459003458e-11,
1.44794617496174e-06, 1.01646192427787e-11, 1.64589968127969e-06,
7.95250469160936e-07, 4.95979015924176e-08, 9.38624136717167e-06,
3.30158916225698e-10, 1.03056284825371e-08, 2.00488681957195e-11,
6.89757869076632e-06, 2.01876633469813e-09, 2.99539942021403e-10,
2.08739168256085e-07, 5.03678841806743e-07, 6.06527132057803e-09,
3.59877081003646e-16, 1.66720353967075e-10, 2.54660549523141e-08,
1.43559878592863e-09, 1.56893780051589e-15, 1.1374538535091e-08,
0, 5.52499836475257e-11, 0, 3.27112403629401e-12, 0, 1.23115843501247e-14,
1.57840825001599e-18, 3.13372991176312e-12, 0, 0, 1.26662941171199e-09,
1.24163053449599e-10, 1.20982387990113e-11, 0, 2.77586766889062e-14,
5.18268389772431e-09, 1.59138827447591e-10, 9.44263400071004e-12,
0, 0, 9.47044950009594e-18, 1.51307793254783e-13, 0, 0)), .Names = c("group",
"time1", "Qfr"), row.names = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39",
"40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50",
"51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61",
"62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72",
"73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83",
"84", "85", "86", "87", "88", "89", "90", "91", "92", "93", "94",
"95", "96", "97", "98", "99", "100", "1.9", "2.9", "3.9", "4.9",
"5.9", "6.9", "7.9", "8.9", "9.9", "10.9", "11.9", "12.9", "13.9",
"14.9", "15.9", "16.9", "17.9", "18.9", "19.9", "20.9", "21.9",
"22.9", "23.9", "24.9", "25.9", "26.9", "27.9", "28.9", "29.9",
"30.9", "31.9", "32.9", "33.9", "34.9", "35.9", "36.9", "37.9",
"38.9", "39.9", "40.9", "41.9", "42.9", "43.9", "44.9", "45.9",
"46.9", "47.9", "48.9", "49.9", "50.9", "51.9", "52.9", "53.9",
"54.9", "55.9", "56.9", "57.9", "58.9", "59.9", "60.9", "61.9",
"62.9", "63.9", "64.9", "65.9", "66.9", "67.9", "68.9", "69.9",
"70.9", "71.9", "72.9", "73.9", "74.9", "75.9", "76.9", "77.9",
"78.9", "79.9", "80.9", "81.9", "82.9", "83.9", "84.9", "85.9",
"86.9", "87.9", "88.9", "89.9", "90.9", "91.9", "92.9", "93.9",
"94.9", "95.9", "96.9", "97.9", "98.9", "99.9", "100.9"), class = "data.frame")
答案 0 :(得分:0)
在您的情况下拨打ddply
的正确方法是:
library(caTools)
library(plyr)
d <- ddply(dx, "group", .fun=mutate,
Pfr = sapply(1:length(time1), function(i) trapz(time1[1:i],Qfr[1:i])))
要应用于每个组的函数是plyr::mutate
,最后一个参数是要传递给plyr::mutate
的参数。该参数是由
sapply(1:length(time1), function(i) trapz(time1[1:i],Qfr[1:i]))
plyr::mutate
用于创建名为Pfr
的新列。请注意,在ddply
中的此定义中,使用的列名来自dx
,而不是引号。
使用您提供的数据:
head(d[d$group==1,])
## group time1 Qfr Pfr
##1 1 1 0.004748316 NA
##2 1 2 0.004635910 0.004692113
##3 1 3 0.004642114 0.009331124
##4 1 4 0.004645039 0.013974701
##5 1 5 0.004160043 0.018377242
##6 1 6 0.004381977 0.022648253
head(d[d$group==10,])
## group time1 Qfr Pfr
##101 10 1 0.007097579 NA
##102 10 2 0.007061097 0.007079338
##103 10 3 0.006651500 0.013935637
##104 10 4 0.006860799 0.020691787
##105 10 5 0.006821487 0.027532930
##106 10 6 0.005434636 0.033660991