按月和年分组

时间:2014-07-27 05:20:19

标签: r sorting

我必须按月和按年制作小组,并按时间顺序对数据进行排序。我正在使用以下数据和代码:

mydf = structure(list(vnum1 = c(0.213462416929903, 0.988030047419118, 
-1.18652469981587, -0.869178623205718, 0.912875335795115, -1.98798388768447, 
-0.304573289627417, 0.559868758619623, -0.663557878516269, -0.558487562052716, 
0.437910610434683, 0.294626820421212, 1.22382550331396, 1.33307181022467, 
-0.111632843418732, 0.012593612409791, 0.202491597986104, -0.0926340952847484, 
0.838878748813974, 0.397235027161488, -0.24188970321148, 0.941276507145062, 
0.209022985751647, 1.12583170538807, 1.32872138538229, 0.490518883526501, 
-1.5848233402832, 0.21465692222817, 0.32862179851896, 1.25692197516853, 
-0.101168594652985, 0.151940891762939, -1.56082855559097, 0.81784767965823, 
0.400190430382005, -1.53216256468244, -1.28940381159733, -0.795006205948021, 
1.06739871977495, 0.529556847460609, 0.39886466332703, 0.392956914201864, 
-1.87574207207718, 0.394469467803633, 1.78815629799651, 1.64468036754424, 
-1.5042078341332, 0.963769152123962, -0.22245472921696, 0.0439610905616637
), vmonthnum = c(12L, 7L, 3L, 9L, 3L, 9L, 9L, 5L, 7L, 12L, 5L, 
8L, 12L, 6L, 3L, 1L, 3L, 8L, 7L, 3L, 6L, 8L, 7L, 4L, 4L, 8L, 
10L, 1L, 11L, 9L, 7L, 6L, 10L, 8L, 9L, 8L, 3L, 9L, 1L, 6L, 12L, 
6L, 2L, 2L, 7L, 1L, 6L, 8L, 3L, 12L), vmonth = c("Dec", "Jul", 
"Mar", "Sep", "Mar", "Sep", "Sep", "May", "Jul", "Dec", "May", 
"Aug", "Dec", "Jun", "Mar", "Jan", "Mar", "Aug", "Jul", "Mar", 
"Jun", "Aug", "Jul", "Apr", "Apr", "Aug", "Oct", "Jan", "Nov", 
"Sep", "Jul", "Jun", "Oct", "Aug", "Sep", "Aug", "Mar", "Sep", 
"Jan", "Jun", "Dec", "Jun", "Feb", "Feb", "Jul", "Jan", "Jun", 
"Aug", "Mar", "Dec"), vyear = c(2013L, 2014L, 2014L, 2010L, 2011L, 
2012L, 2012L, 2011L, 2014L, 2011L, 2011L, 2010L, 2011L, 2014L, 
2010L, 2009L, 2010L, 2012L, 2010L, 2009L, 2010L, 2011L, 2013L, 
2013L, 2011L, 2013L, 2012L, 2011L, 2010L, 2010L, 2011L, 2014L, 
2010L, 2014L, 2013L, 2009L, 2012L, 2011L, 2014L, 2013L, 2013L, 
2009L, 2009L, 2010L, 2014L, 2011L, 2014L, 2010L, 2012L, 2014L
)), .Names = c("vnum1", "vmonthnum", "vmonth", "vyear"), row.names = c(NA, 
-50L), class = "data.frame")

> 
head(mydf)
              vnum1 vmonthnum vmonth vyear
1  0.2134624        12    Dec  2013
2  0.9880300         7    Jul  2014
3 -1.1865247         3    Mar  2014
4 -0.8691786         9    Sep  2010
5  0.9128753         3    Mar  2011
6 -1.9879839         9    Sep  2012

outdf = with(mydf, aggregate(vnum1~paste(vmonthnum,vyear,sep="_"),FUN=mean))
names(outdf)=c("grp", "grp_mean")
head(outdf)
      grp    grp_mean
1 10_2010 -1.56082856
2 10_2012 -1.58482334
3 11_2010  0.32862180
4  1_2009  0.01259361
5  1_2011  0.92966864
6  1_2014  1.06739872
> 

outdf
       grp     grp_mean
1  10_2010 -1.560828556
2  10_2012 -1.584823340
3  11_2010  0.328621799
4   1_2009  0.012593612
5   1_2011  0.929668645
6   1_2014  1.067398720
7  12_2011  0.332668971
8  12_2013  0.306163540
9  12_2014  0.043961091
10  2_2009 -1.875742072
11  2_2010  0.394469468
12  3_2009  0.397235027
13  3_2010  0.045429377
14  3_2011  0.912875336
15  3_2012 -0.755929270
16  3_2014 -1.186524700
17  4_2011  1.328721385
18  4_2013  1.125831705
19  5_2011  0.498889685
20  6_2009  0.392956914
21  6_2010 -0.241889703
22  6_2013  0.529556847
23  6_2014 -0.006398377
24  7_2010  0.838878749
25  7_2011 -0.101168595
26  7_2013  0.209022986
27  7_2014  0.704209489
28  8_2009 -1.532162565
29  8_2010  0.629197986
30  8_2011  0.941276507
31  8_2012 -0.092634095
32  8_2013  0.490518884
33  8_2014  0.817847680
34  9_2010  0.193871676
35  9_2011 -0.795006206
36  9_2012 -1.146278589
37  9_2013  0.400190430
>

如何在grp列上对outdf进行排序,使其符合时间顺序?我可以使用' Jan' 2月'等等(vmonth)为此。这对于绘制平均值(在y轴上)与x轴上的时间是必需的。我尝试在此页面上查看解决方案,但确切的日期可用:Sorting an data frame based on month-year time format 谢谢你的帮助。

3 个答案:

答案 0 :(得分:2)

您还可以使用zooas.yearmon

library(zoo)
mydf$grp <- with(mydf, as.yearmon(paste(vmonth, vyear)))
outdf <- with(mydf, aggregate(vnum1 ~ grp, FUN = mean))

head(outdf)
#        grp       vnum1
# 1 Jan 2009  0.01259361
# 2 Feb 2009 -1.87574207
# 3 Mar 2009  0.39723503
# 4 Jun 2009  0.39295691
# 5 Aug 2009 -1.53216256
# 6 Feb 2010  0.39446947

答案 1 :(得分:2)

如果要扩展解决方案,请使用data.table: data.table是data.frame的进化版本,并且可以更快地进行分组。

library(data.table)                     # Load package
mydt <- data.table(mydf)                # Convert to data.table
setkey(mydt,vyear,vmonthnum)            # Set the key, order is important
mydt[,mean(vnum1), by=key(mydt)]        # Do the computation

希望这有帮助。

答案 2 :(得分:1)

可以使用以下方法从这些数据中获取可排序的year_month密钥:

> sprintf("%4d_%02d",mydf$vyear,mydf$vmonthnum)
 [1] "2013_12" "2014_07" "2014_03" "2010_09" "2011_03" "2012_09" "2012_09"
 [8] "2011_05" "2014_07" "2011_12" "2011_05" "2010_08" "2011_12" "2014_06"
[15] "2010_03" "2009_01" "2010_03" "2012_08" "2010_07" "2009_03" "2010_06"
[22] "2011_08" "2013_07" "2013_04" "2011_04" "2013_08" "2012_10" "2011_01"
[29] "2010_11" "2010_09" "2011_07" "2014_06" "2010_10" "2014_08" "2013_09"
[36] "2009_08" "2012_03" "2011_09" "2014_01" "2013_06" "2013_12" "2009_06"
[43] "2009_02" "2010_02" "2014_07" "2011_01" "2014_06" "2010_08" "2012_03"
[50] "2014_12"
> 

......或:

> yrVal <- mydf$vyear + mydf$vmonthnum/12
> yrVal
 [1] 2014.000 2014.583 2014.250 2010.750 2011.250 2012.750 2012.750 2011.417
 [9] 2014.583 2012.000 2011.417 2010.667 2012.000 2014.500 2010.250 2009.083
[17] 2010.250 2012.667 2010.583 2009.250 2010.500 2011.667 2013.583 2013.333
[25] 2011.333 2013.667 2012.833 2011.083 2010.917 2010.750 2011.583 2014.500
[33] 2010.833 2014.667 2013.750 2009.667 2012.250 2011.750 2014.083 2013.500
[41] 2014.000 2009.500 2009.167 2010.167 2014.583 2011.083 2014.500 2010.667
[49] 2012.250 2015.000

第二个版本的优点是它可以用作图表x轴上的值,甚至不用先排序数据。