我必须按月和按年制作小组,并按时间顺序对数据进行排序。我正在使用以下数据和代码:
mydf = structure(list(vnum1 = c(0.213462416929903, 0.988030047419118,
-1.18652469981587, -0.869178623205718, 0.912875335795115, -1.98798388768447,
-0.304573289627417, 0.559868758619623, -0.663557878516269, -0.558487562052716,
0.437910610434683, 0.294626820421212, 1.22382550331396, 1.33307181022467,
-0.111632843418732, 0.012593612409791, 0.202491597986104, -0.0926340952847484,
0.838878748813974, 0.397235027161488, -0.24188970321148, 0.941276507145062,
0.209022985751647, 1.12583170538807, 1.32872138538229, 0.490518883526501,
-1.5848233402832, 0.21465692222817, 0.32862179851896, 1.25692197516853,
-0.101168594652985, 0.151940891762939, -1.56082855559097, 0.81784767965823,
0.400190430382005, -1.53216256468244, -1.28940381159733, -0.795006205948021,
1.06739871977495, 0.529556847460609, 0.39886466332703, 0.392956914201864,
-1.87574207207718, 0.394469467803633, 1.78815629799651, 1.64468036754424,
-1.5042078341332, 0.963769152123962, -0.22245472921696, 0.0439610905616637
), vmonthnum = c(12L, 7L, 3L, 9L, 3L, 9L, 9L, 5L, 7L, 12L, 5L,
8L, 12L, 6L, 3L, 1L, 3L, 8L, 7L, 3L, 6L, 8L, 7L, 4L, 4L, 8L,
10L, 1L, 11L, 9L, 7L, 6L, 10L, 8L, 9L, 8L, 3L, 9L, 1L, 6L, 12L,
6L, 2L, 2L, 7L, 1L, 6L, 8L, 3L, 12L), vmonth = c("Dec", "Jul",
"Mar", "Sep", "Mar", "Sep", "Sep", "May", "Jul", "Dec", "May",
"Aug", "Dec", "Jun", "Mar", "Jan", "Mar", "Aug", "Jul", "Mar",
"Jun", "Aug", "Jul", "Apr", "Apr", "Aug", "Oct", "Jan", "Nov",
"Sep", "Jul", "Jun", "Oct", "Aug", "Sep", "Aug", "Mar", "Sep",
"Jan", "Jun", "Dec", "Jun", "Feb", "Feb", "Jul", "Jan", "Jun",
"Aug", "Mar", "Dec"), vyear = c(2013L, 2014L, 2014L, 2010L, 2011L,
2012L, 2012L, 2011L, 2014L, 2011L, 2011L, 2010L, 2011L, 2014L,
2010L, 2009L, 2010L, 2012L, 2010L, 2009L, 2010L, 2011L, 2013L,
2013L, 2011L, 2013L, 2012L, 2011L, 2010L, 2010L, 2011L, 2014L,
2010L, 2014L, 2013L, 2009L, 2012L, 2011L, 2014L, 2013L, 2013L,
2009L, 2009L, 2010L, 2014L, 2011L, 2014L, 2010L, 2012L, 2014L
)), .Names = c("vnum1", "vmonthnum", "vmonth", "vyear"), row.names = c(NA,
-50L), class = "data.frame")
>
head(mydf)
vnum1 vmonthnum vmonth vyear
1 0.2134624 12 Dec 2013
2 0.9880300 7 Jul 2014
3 -1.1865247 3 Mar 2014
4 -0.8691786 9 Sep 2010
5 0.9128753 3 Mar 2011
6 -1.9879839 9 Sep 2012
outdf = with(mydf, aggregate(vnum1~paste(vmonthnum,vyear,sep="_"),FUN=mean))
names(outdf)=c("grp", "grp_mean")
head(outdf)
grp grp_mean
1 10_2010 -1.56082856
2 10_2012 -1.58482334
3 11_2010 0.32862180
4 1_2009 0.01259361
5 1_2011 0.92966864
6 1_2014 1.06739872
>
outdf
grp grp_mean
1 10_2010 -1.560828556
2 10_2012 -1.584823340
3 11_2010 0.328621799
4 1_2009 0.012593612
5 1_2011 0.929668645
6 1_2014 1.067398720
7 12_2011 0.332668971
8 12_2013 0.306163540
9 12_2014 0.043961091
10 2_2009 -1.875742072
11 2_2010 0.394469468
12 3_2009 0.397235027
13 3_2010 0.045429377
14 3_2011 0.912875336
15 3_2012 -0.755929270
16 3_2014 -1.186524700
17 4_2011 1.328721385
18 4_2013 1.125831705
19 5_2011 0.498889685
20 6_2009 0.392956914
21 6_2010 -0.241889703
22 6_2013 0.529556847
23 6_2014 -0.006398377
24 7_2010 0.838878749
25 7_2011 -0.101168595
26 7_2013 0.209022986
27 7_2014 0.704209489
28 8_2009 -1.532162565
29 8_2010 0.629197986
30 8_2011 0.941276507
31 8_2012 -0.092634095
32 8_2013 0.490518884
33 8_2014 0.817847680
34 9_2010 0.193871676
35 9_2011 -0.795006206
36 9_2012 -1.146278589
37 9_2013 0.400190430
>
如何在grp列上对outdf进行排序,使其符合时间顺序?我可以使用' Jan' 2月'等等(vmonth)为此。这对于绘制平均值(在y轴上)与x轴上的时间是必需的。我尝试在此页面上查看解决方案,但确切的日期可用:Sorting an data frame based on month-year time format 谢谢你的帮助。
答案 0 :(得分:2)
您还可以使用zoo
包as.yearmon
library(zoo)
mydf$grp <- with(mydf, as.yearmon(paste(vmonth, vyear)))
outdf <- with(mydf, aggregate(vnum1 ~ grp, FUN = mean))
head(outdf)
# grp vnum1
# 1 Jan 2009 0.01259361
# 2 Feb 2009 -1.87574207
# 3 Mar 2009 0.39723503
# 4 Jun 2009 0.39295691
# 5 Aug 2009 -1.53216256
# 6 Feb 2010 0.39446947
答案 1 :(得分:2)
如果要扩展解决方案,请使用data.table: data.table是data.frame的进化版本,并且可以更快地进行分组。
library(data.table) # Load package
mydt <- data.table(mydf) # Convert to data.table
setkey(mydt,vyear,vmonthnum) # Set the key, order is important
mydt[,mean(vnum1), by=key(mydt)] # Do the computation
希望这有帮助。
答案 2 :(得分:1)
可以使用以下方法从这些数据中获取可排序的year_month
密钥:
> sprintf("%4d_%02d",mydf$vyear,mydf$vmonthnum)
[1] "2013_12" "2014_07" "2014_03" "2010_09" "2011_03" "2012_09" "2012_09"
[8] "2011_05" "2014_07" "2011_12" "2011_05" "2010_08" "2011_12" "2014_06"
[15] "2010_03" "2009_01" "2010_03" "2012_08" "2010_07" "2009_03" "2010_06"
[22] "2011_08" "2013_07" "2013_04" "2011_04" "2013_08" "2012_10" "2011_01"
[29] "2010_11" "2010_09" "2011_07" "2014_06" "2010_10" "2014_08" "2013_09"
[36] "2009_08" "2012_03" "2011_09" "2014_01" "2013_06" "2013_12" "2009_06"
[43] "2009_02" "2010_02" "2014_07" "2011_01" "2014_06" "2010_08" "2012_03"
[50] "2014_12"
>
......或:
> yrVal <- mydf$vyear + mydf$vmonthnum/12
> yrVal
[1] 2014.000 2014.583 2014.250 2010.750 2011.250 2012.750 2012.750 2011.417
[9] 2014.583 2012.000 2011.417 2010.667 2012.000 2014.500 2010.250 2009.083
[17] 2010.250 2012.667 2010.583 2009.250 2010.500 2011.667 2013.583 2013.333
[25] 2011.333 2013.667 2012.833 2011.083 2010.917 2010.750 2011.583 2014.500
[33] 2010.833 2014.667 2013.750 2009.667 2012.250 2011.750 2014.083 2013.500
[41] 2014.000 2009.500 2009.167 2010.167 2014.583 2011.083 2014.500 2010.667
[49] 2012.250 2015.000
第二个版本的优点是它可以用作图表x
轴上的值,甚至不用先排序数据。