我是R的新人。
我有附加的数据范围,这是镇议会问题的清单:
SNO Dept FeedbackDate ClosedDate2 SubCategory1 SubZone
1 BTA 23/11/2012 4/12/2012 Permission-to-Park TOWNSVILL EAST
2 RTA 23/12/2012 4/13/2012 Rodent TOWNSVILL SOUTH
3 MTA 23/12/2012 4/16/2012 ConductVL TOWNSVILL SOUTH
我想获得一些见解
1)哪个部门对不同子区域中的同一子类别具有更高的解决时间(ClosedDate2 - FeedbackDate)。
2)不同子区域中重复子类别的解析时间。
答案 0 :(得分:1)
dplyr
和ggplot2
:
我已经编辑/制作了一个更大的数据集来展示它是如何工作的:
dat<-data.frame(SNO=1:100,
Dept=sample(c("BTA","RTA","MTA"),100,T),
FeedbackDate=as.Date("2012/12/23"),
ClosedDate2=as.Date("2012/12/23")+ceiling(runif(100)*20),
SubCategory1=sample(c("Permission-to-Park","Rodent","ConductVL"),100,T),
SubZone=sample(c("TOWNSVILL EAST","TOWNSVILL SOUTH"),100,T))
require(ggplot2)
require(dplyr) #for aggregation
dat.sum<-group_by(dat, SubCategory1, SubZone, Dept) %.% # group by SC1, SZ and Dept comb
summarise(AvgResTime=mean(ClosedDate2-FeedbackDate)) # calculate average of closure date for each
ggplot(dat.sum) + #use aggregated dat
geom_point(aes(x=SubCategory1, y=AvgResTime,color=SubZone),size=10) + # color points by Zone
facet_wrap(~ Dept) + # one face per department
theme(axis.text.x = element_text(angle = 90, hjust = 1)) # rotate x axis text
使用plyr
更新了以前的版本
require(plyr) #for aggregation
dat.sum<-ddply(dat,.(SubCategory1, SubZone, Dept),summarise,AvgResTime=mean(ClosedDate2-FeedbackDate))
ggplot(dat.sum) + #use aggregated dat
geom_point(aes(x=SubCategory1, y=as.integer(AvgResTime),color=SubZone),size=10) + # color points by Zone
facet_wrap(~ Dept) + # one face per department
theme(axis.text.x = element_text(angle = 90, hjust = 1)) # rotate x axis text