如何从季度数据中获取月度数据

时间:2018-03-07 22:07:44

标签: r function ggplot2 analytics interpolation

问题:

我有季度级数据。我需要进行一个月的月度分析。是否有函数或ggplot功能可以插入季度数据并填写月度数据?

参考数据:

dput(HPF[1:25, ])
structure(list(region = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), path = c(1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
), date = structure(c(16116, 16205, 16297, 16389, 16481, 16570, 
16662, 16754, 16846, 16936, 17028, 17120, 17212, 17301, 17393, 
17485, 17577, 17666, 17758, 17850, 17942, 18031, 18123, 18215, 
18307), class = "Date"), index_value = c(1, 1.033852765, 1.041697122, 
1.038876363, 1.041043093, 1.060900982, 1.073728928, 1.075879441, 
1.080898915, 1.10368893, 1.119240863, 1.122827602, 1.128639801, 
1.15275796, 1.169021733, 1.172707492, 1.178666441, 1.203634882, 
1.220348482, 1.223890323, 1.229770019, 1.255791539, 1.273560554, 
1.278236959, 1.285508086), index = c(0, 1, 2, 3, 4, 5, 6, 7, 
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
24), counter = 1:25, BaseQoQ = c(0, 0.033852765, 0.00758749917354051, 
-0.00270784947028013, 0.00208564760655761, 0.0190749923163842, 
0.0120915582298895, 0.00200284535874973, 0.00466546139717505, 
0.0210843166587877, 0.0140908661646175, 0.00320461762840418, 
0.00517639483536669, 0.0213692260175751, 0.0141085757499344, 
0.00315285755256367, 0.00508136004984272, 0.0211836361259394, 
0.0138859385432799, 0.00290231933930496, 0.00480410367620832, 
0.021159663675294, 0.0141496533844698, 0.00367191413499146, 0.00568840303732765
), fdate = structure(c(16116, 16205, 16297, 16389, 16481, 16570, 
16662, 16754, 16846, 16936, 17028, 17120, 17212, 17301, 17393, 
17485, 17577, 17666, 17758, 17850, 17942, 18031, 18123, 18215, 
18307), class = "Date"), StressC = c(0.99749, 1.031342765, 1.039187122, 
1.036366363, 1.038533093, 1.058390982, 1.071218928, 1.073369441, 
1.078388915, 1.10117893, 1.116730863, 1.120317602, 1.126129801, 
1.15024796, 1.166511733, 1.170197492, 1.176156441, 1.201124882, 
1.217838482, 1.221380323, 1.229770019, 1.255791539, 1.273560554, 
1.278236959, 1.285508086), StressQoQ = c(0, 0.0339379492526242, 
0.00760596502560418, -0.0027143898728953, 0.00209069888540969, 
0.0191210941026796, 0.0121202336548254, 0.00200753827606026, 
0.00467637125510434, 0.0211333913794913, 0.0141229845362187, 
0.00321182042946733, 0.00518799221722843, 0.021416855302633, 
0.0141393626118667, 0.00315964160130755, 0.00509225924746737, 
0.021228843485116, 0.0139149560969629, 0.00290830110260876, 0.0068690282969297, 
0.021159663675294, 0.0141496533844698, 0.00367191413499146, 0.00568840303732765
)), .Names = c("region", "path", "date", "index_value", "index", 
"counter", "BaseQoQ", "fdate", "StressC", "StressQoQ"), row.names = c(NA, 
-25L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = "region", drop = TRUE, indices = list(
    0:24), group_sizes = 25L, biggest_group_size = 25L, labels = structure(list(
    region = 1), row.names = c(NA, -1L), class = "data.frame", vars = "region", drop = TRUE, .Names = "region"))

非常感谢任何见解!我希望我提供的数据足以提供建议。

目标:

我只需要绘制/绘制月度信息,我不需要在data.frame中引用它。

1 个答案:

答案 0 :(得分:2)

所以这就是你能做的:

1)创建一个插值日期向量

months <- lapply(X = data$date, FUN = seq.Date, by = "month", length.out = 3)
months <- data.frame(date = do.call(what = c, months))

2)将date.frame连接到months data.frame以创建用于外推的NA

library(dplyr)
monthly_data <- left_join(x = months, y = data, by = "date")

3)使用na.locf() na.appox() na.spline()之一插入f.ex StressC

library(zoo)
monthly_data$StressC <- na.spline(object = monthly_data$StressC)

注意:上面的推断是

  • na.locf() - 最近的一点
  • na.appox() - 线性
  • na.spline() - 样条曲线(通常用于图形)

运行以下(插值​​前)以查看差异:

plot(x = monthly_data$StressC, ylab = "StressC", xlab="", xaxt = "n")
lines(x = na.locf(monthly_data$StressC), col = "red")
lines(x = na.approx(monthly_data$StressC), col = "green")
lines(x = na.spline(monthly_data$StressC), col = "blue")

也可以这样做以获得gglot

ggplot(monthly_data, aes(x=date)) + 
  geom_point(aes(y = StressC), colour="black") + 
  geom_line(aes(y = na.locf(StressC)), col="red") +
  geom_line(aes(y = na.spline(StressC)), col="red")