在R中的向量的数据框中设置组名

时间:2014-09-30 12:44:05

标签: r

我想为我的数据创建一个带有来自矢量的组名的新列。我已经知道如何将函数应用于每个组(即tapply;我选择了ddply(df, .(datetime), function(x)…))。我现在需要的是一种使用来自标本的矢量名称的方法。这可能很简单,但我找不到解决方案。 这是我的示例数据:

df <- structure(list(time = c(0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 
8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 
19.5, 20.5, 21.5, 22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 
0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 
12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 
23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 0.5, 1.5, 2.5, 3.5, 
4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 
16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5, 24.5, 25.5, 26.5, 
27.5, 28.5, 29.5), datetime = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L), .Label = c("2014-09-29-16-53-45", "2014-09-29-16-54-18", 
"2014-09-29-16-54-51", "2014-09-29-16-55-24", "2014-09-29-16-55-57", 
"2014-09-29-16-56-30", "2014-09-29-16-57-03", "2014-09-29-16-57-36", 
"2014-09-29-16-58-08", "2014-09-29-16-58-41", "2014-09-29-16-59-14", 
"2014-09-29-16-59-47", "2014-09-29-17-00-20", "2014-09-29-17-00-53", 
"2014-09-29-17-01-26", "2014-09-29-15-19-52", "2014-09-29-15-20-25", 
"2014-09-29-15-20-58", "2014-09-29-15-21-31", "2014-09-29-15-22-04", 
"2014-09-29-15-22-37", "2014-09-29-15-23-10", "2014-09-29-15-23-43", 
"2014-09-29-15-24-15", "2014-09-29-15-24-48", "2014-09-29-15-25-21", 
"2014-09-29-15-25-54", "2014-09-29-15-26-27", "2014-09-29-15-27-00", 
"2014-09-29-15-27-33", "2014-09-29-17-35-05", "2014-09-29-17-35-37", 
"2014-09-29-17-36-10", "2014-09-29-17-36-43", "2014-09-29-17-37-16", 
"2014-09-29-17-37-49", "2014-09-29-17-38-22", "2014-09-29-17-38-55", 
"2014-09-29-17-39-28", "2014-09-29-17-40-01", "2014-09-29-17-40-33", 
"2014-09-29-17-41-06", "2014-09-29-17-41-39", "2014-09-29-17-42-12", 
"2014-09-29-17-42-45", "2014-09-29-16-43-43", "2014-09-29-16-44-16", 
"2014-09-29-16-44-49", "2014-09-29-16-45-22", "2014-09-29-16-45-55", 
"2014-09-29-16-46-28", "2014-09-29-16-47-00", "2014-09-29-16-47-33", 
"2014-09-29-16-48-06", "2014-09-29-16-48-39", "2014-09-29-16-49-12", 
"2014-09-29-16-49-45", "2014-09-29-16-50-18", "2014-09-29-16-50-51", 
"2014-09-29-16-51-24", "2014-09-29-17-15-11", "2014-09-29-17-15-44", 
"2014-09-29-17-16-17", "2014-09-29-17-16-50", "2014-09-29-17-17-23", 
"2014-09-29-17-17-55", "2014-09-29-17-18-28", "2014-09-29-17-19-01", 
"2014-09-29-17-19-34", "2014-09-29-17-20-07", "2014-09-29-17-20-40", 
"2014-09-29-17-21-13", "2014-09-29-17-21-46", "2014-09-29-17-22-19", 
"2014-09-29-17-22-51", "2014-09-29-15-33-19", "2014-09-29-15-33-51", 
"2014-09-29-15-34-24", "2014-09-29-15-34-57", "2014-09-29-15-35-30", 
"2014-09-29-15-36-03", "2014-09-29-15-36-36", "2014-09-29-15-37-09", 
"2014-09-29-15-37-42", "2014-09-29-15-38-15", "2014-09-29-15-38-47", 
"2014-09-29-15-39-20", "2014-09-29-15-39-53", "2014-09-29-15-40-26", 
"2014-09-29-15-40-59", "2014-09-29-14-57-46", "2014-09-29-14-58-19", 
"2014-09-29-14-58-52", "2014-09-29-14-59-25", "2014-09-29-14-59-58", 
"2014-09-29-15-00-30", "2014-09-29-15-01-03", "2014-09-29-15-01-36", 
"2014-09-29-15-02-09", "2014-09-29-15-02-42", "2014-09-29-15-03-15", 
"2014-09-29-15-03-48", "2014-09-29-15-04-21", "2014-09-29-15-04-54", 
"2014-09-29-15-05-26", "2014-09-29-16-01-23", "2014-09-29-16-01-56", 
"2014-09-29-16-02-29", "2014-09-29-16-03-02", "2014-09-29-16-03-34", 
"2014-09-29-16-04-07", "2014-09-29-16-04-40", "2014-09-29-16-05-13", 
"2014-09-29-16-05-46", "2014-09-29-16-06-19", "2014-09-29-16-06-52", 
"2014-09-29-16-07-25", "2014-09-29-16-07-58", "2014-09-29-16-08-31", 
"2014-09-29-16-09-03", "2014-09-29-15-08-44", "2014-09-29-15-09-17", 
"2014-09-29-15-09-50", "2014-09-29-15-10-23", "2014-09-29-15-10-56", 
"2014-09-29-15-11-29", "2014-09-29-15-12-02", "2014-09-29-15-12-34", 
"2014-09-29-15-13-07", "2014-09-29-15-13-40", "2014-09-29-15-14-13", 
"2014-09-29-15-14-46", "2014-09-29-15-15-19", "2014-09-29-15-15-52", 
"2014-09-29-15-16-25", "2014-09-29-14-47-22", "2014-09-29-14-47-55", 
"2014-09-29-14-48-28", "2014-09-29-14-49-01", "2014-09-29-14-49-33", 
"2014-09-29-14-50-06", "2014-09-29-14-50-39", "2014-09-29-14-51-12", 
"2014-09-29-14-51-45", "2014-09-29-14-52-18", "2014-09-29-14-52-51", 
"2014-09-29-14-53-24", "2014-09-29-14-53-57", "2014-09-29-14-54-30", 
"2014-09-29-14-55-02", "2014-09-29-16-28-48", "2014-09-29-16-29-21", 
"2014-09-29-16-29-54", "2014-09-29-16-30-27", "2014-09-29-16-31-00", 
"2014-09-29-16-31-33", "2014-09-29-16-32-06", "2014-09-29-16-32-39", 
"2014-09-29-16-33-11", "2014-09-29-16-33-44", "2014-09-29-16-34-17", 
"2014-09-29-16-34-50", "2014-09-29-16-35-23", "2014-09-29-16-35-56", 
"2014-09-29-16-36-29", "2014-09-29-17-25-14", "2014-09-29-17-25-47", 
"2014-09-29-17-26-19", "2014-09-29-17-26-52", "2014-09-29-17-27-25", 
"2014-09-29-17-27-58", "2014-09-29-17-28-31", "2014-09-29-17-29-04", 
"2014-09-29-17-29-37", "2014-09-29-17-30-10", "2014-09-29-17-30-43", 
"2014-09-29-17-31-15", "2014-09-29-17-31-48", "2014-09-29-17-32-21", 
"2014-09-29-17-32-54", "2014-09-29-16-17-27", "2014-09-29-16-18-00", 
"2014-09-29-16-18-33", "2014-09-29-16-19-06", "2014-09-29-16-19-39", 
"2014-09-29-16-20-12", "2014-09-29-16-20-44", "2014-09-29-16-21-17", 
"2014-09-29-16-21-50", "2014-09-29-16-22-23", "2014-09-29-16-22-56", 
"2014-09-29-16-23-29", "2014-09-29-16-24-02", "2014-09-29-16-24-35", 
"2014-09-29-16-25-08", "2014-09-29-15-47-58", "2014-09-29-15-48-31", 
"2014-09-29-15-49-04", "2014-09-29-15-49-37", "2014-09-29-15-50-10", 
"2014-09-29-15-50-43", "2014-09-29-15-51-16", "2014-09-29-15-51-48", 
"2014-09-29-15-52-21", "2014-09-29-15-52-54", "2014-09-29-15-53-27", 
"2014-09-29-15-54-00", "2014-09-29-15-54-33", "2014-09-29-15-55-06", 
"2014-09-29-15-55-39"), class = "factor"), slide = c(-0.539061527692515, 
5.66195389722871, 7.21700493760735, 8.57297342546652, 7.7773526671123, 
6.62474973733262, 6.560914, 7.31276540449108, 7.71350773385997, 
12.1338937897869, 11.9905159284941, 11.2351229860888, 11.171288, 
10.9577765834208, 10.1960091495425, 13.2991171422465, 11.880578, 
11.880578, 11.880578, 11.880578, 16.0933144270847, 16.9519788350095, 
16.845588, 19.2232432102836, 21.9205470146831, 22.6298362060887, 
22.519898, 22.2042838079135, 21.342480835212, 21.633288, -0.602895153275783, 
-0.244705202434696, 0.17732, 0.126656910412154, 1.86187998841909, 
3.54644, 3.55707925822747, 5.71051118116113, 6.22400266751609, 
5.51471224822015, 5.7249723129145, 6.56092, 6.44388637539319, 
7.9157386165015, 8.32690228832434, 9.53277141810982, 19.3110144969466, 
22.9135505458856, 22.3037317117908, 20.3740194194864, 20.924, 
22.3709640603964, 23.58383, 23.58383, 23.9987605376059, 24.644213635217, 
25.0024, 26.0663404636184, 26.3429555676271, 25.8074456968971, 
-0.0177319184791367, 0.719933225870554, 1.13841511129536, 0.600762904193025, 
0.741246909057598, 1.41858, 1.32811039115315, 3.04283881261579, 
5.10332993034282, 5.14234, 6.2983164198475, 10.4679694192137, 
15.4994167583022, 16.3774653773901, 15.2852127504721, 16.8568308869227, 
18.6826487817362, 17.2925140151485, 15.2497, 15.5511441936136, 
15.9660731899984, 16.2462459767841, 12.3834508635123, 11.34861, 
13.203433674866, 14.2105908360768, 13.3570493746889, 11.3947136984443, 
11.34861, 11.030156846851)), .Names = c("time", "datetime", "slide"
), row.names = c(NA, 90L), class = "data.frame")

看起来像这样:

   time            datetime       slide
1   0.5 2014-09-29-16-53-45 -0.53906153
2   1.5 2014-09-29-16-53-45  5.66195390
3   2.5 2014-09-29-16-53-45  7.21700494
4   3.5 2014-09-29-16-53-45  8.57297343
5   4.5 2014-09-29-16-53-45  7.77735267
6   5.5 2014-09-29-16-53-45  6.62474974
7   6.5 2014-09-29-16-53-45  6.56091400
8   7.5 2014-09-29-16-53-45  7.31276540
9   8.5 2014-09-29-16-53-45  7.71350773
10  9.5 2014-09-29-16-53-45 12.13389379
…

每组包含30行。我的标本载体看起来像这样:

specimen <- c("083", "083", "083")

那么,我怎样才能将标本的第一个元素应用到第一组,第二组...为了得到一个新的列(或者我可以然后cbind的向量),其中包含每行的相应标本?

1 个答案:

答案 0 :(得分:0)

specimen <- c("083", "084", "085")

#derive groups from time column (or possibly from datetime?)
df$group <- cumsum(c(TRUE, diff(df$time) < 0))
#label groups
df$group <- factor(df$group, labels = specimen)