覆盖ggplot数据层

时间:2017-11-28 14:43:27

标签: r date dataframe ggplot2 time-series

我试图在ggplot中覆盖两个不同长度的数据集。

数据集1 :数据框r其中m是日期,V2是范围介于-1到+1之间的值:

> r
             m         V2
19991221    1
19910703    -0.396825397
19850326    0.916666667
19890328    -0.473053892
19610912    -0.75
20021106    -0.991525424
19940324    -1
19840522    -0.502145923
19780718    1
19811222    -0.447154472
19781017    0
19761108    -0.971014493
19791006    1
19891219    0.818181818
19851217    0.970149254
19980818    0.808219178
19940816    -0.985185185
19790814    -0.966666667
19990203    -0.882352941
19831220    1
19830114    -1
19980204    -0.991489362
19941115    -0.966101695
19860520    -0.986206897
19761019    -0.666666667
19900207    -0.983870968
19731010    0
19821221    -0.833333333
19770517    1
19800205    0.662337662
19760329    -0.545454545
19810224    -0.957446809
20000628    -0.989473684
19911105    -0.988571429
19960924    -0.483870968
19880816    1
19860923    1
20030506    -1
20031209    -1
19950201    -0.974025974
19790206    1
19811117    -0.989304813
19950822    -1
19860212    0.808219178
19730821    -0.463203463
19991221    1
19910703    -0.396825397
19850326    0.916666667
19890328    -0.473053892
19610912    -0.75
20021106    -0.991525424
19940324    -1
19840522    -0.502145923
19780718    1
19811222    -0.447154472
19781017    0
19761108    -0.971014493
19791006    1
19891219    0.818181818
19851217    0.970149254
19980818    0.808219178
19940816    -0.985185185
19790814    -0.966666667
19990203    -0.882352941
19831220    1
19830114    -1
19980204    -0.991489362
19941115    -0.966101695
19860520    -0.986206897
19761019    -0.666666667
19900207    -0.983870968
19731010    0
19821221    -0.833333333
19770517    1
19800205    0.662337662
19760329    -0.545454545
19810224    -0.957446809
20000628    -0.989473684
19911105    -0.988571429
19960924    -0.483870968
19880816    1
19860923    1
20030506    -1
20031209    -1
19950201    -0.974025974
19790206    1
19811117    -0.989304813
19950822    -1
19860212    0.808219178
19730821    -0.463203463
19991221    1
19910703    -0.396825397
19850326    0.916666667
19890328    -0.473053892
19610912    -0.75
20021106    -0.991525424
19940324    -1
19840522    -0.502145923
19780718    1
19811222    -0.447154472
19781017    0
19761108    -0.971014493
19791006    1
19891219    0.818181818
19851217    0.970149254
19980818    0.808219178
19940816    -0.985185185
19790814    -0.966666667
19990203    -0.882352941
19831220    1
19830114    -1
19980204    -0.991489362
19941115    -0.966101695
19860520    -0.986206897
19761019    -0.666666667
19900207    -0.983870968
19731010    0
19821221    -0.833333333
19770517    1
19800205    0.662337662
19760329    -0.545454545
19810224    -0.957446809
20000628    -0.989473684
19911105    -0.988571429
19960924    -0.483870968
19880816    1
19860923    1
20030506    -1
20031209    -1
19950201    -0.974025974
19790206    1
19811117    -0.989304813
19950822    -1
19860212    0.808219178
19730821    -0.463203463

使用这些行生成r

m<-gsub("-", "/", as.Date(as.character(fileloc$V1), "%Y%m%d"))
r<-cbind(m, fileloc[2])
colnames(r)
r

数据集2:以下数据集定义了美国的经济衰退期:

library(quantmod)
getSymbols("USREC",src="FRED")
getSymbols("UNRATE", src="FRED")
unrate.df <- data.frame(date= index(UNRATE),UNRATE$UNRATE)

start <- index(USREC[which(diff(USREC$USREC)==1)])
end   <- index(USREC[which(diff(USREC$USREC)==-1)-1])

reccesion.df <- data.frame(start=start, end=end[-1])
recession.df <- subset(reccesion.df, start >= min(unrate.df$date))

结果recession.df

> recession.df
        start        end
1 1948-12-01 1949-10-01
2 1953-08-01 1954-05-01
3 1957-09-01 1958-04-01
.....
11 2008-01-01 2009-06-01

绘图

我可以使用以下内容生成单独的散点图:

ggplot(r, aes(V2, r$m,  colour=V2))+
 geom_point()+xlab(label='Tone Score')+ylab(label='Dates')

和时间序列与阴影区域经济衰退:

ggplot()+
  geom_line(data=unrate.df, aes(x=date, y=UNRATE)) +
  geom_rect(data=recession.df,
            aes(xmin=start,xmax=end, ymin=0,ymax=max(unrate.df$UNRATE)), 
            fill="red", alpha=0.2)

如何合并这些图以查看覆盖时间序列上的散点图?

1 个答案:

答案 0 :(得分:2)

如果您没有提供问题的完整数据集,我已经为日期1973/08/211999/12/21之间的日期生成了一些随机数据:

set.seed(123)
r <- data.frame(m = seq.Date(as.Date("2017/12/21"), as.Date("1950/08/21"), 
                         length.out = 135),
            V2 = rnorm(n = 135, mean = 0, sd = 0.5))

您可以通过为您呼叫的每个data项添加不同的aesgeom_参数来覆盖ggplot中的多个图层。

ggplot() +
  geom_point(data = r, aes(x = m, y = V2, colour=V2))+
  geom_line(data=unrate.df, aes(x=date, y=UNRATE)) +
  geom_rect(data=recession.df,
            aes(xmin=start, xmax=end, ymin=0, ymax=max(unrate.df$UNRATE)), 
            fill="red", alpha=0.2) +
  xlab(label='Tone Score')+ylab(label='Dates')

enter image description here